aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs
diff options
context:
space:
mode:
authorbnagaev <bnagaev@yandex-team.ru>2022-02-10 16:47:04 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:04 +0300
commitc74559fb88da8adac0d9186cfa55a6b13c47695f (patch)
treeb83306b6e37edeea782e9eed673d89286c4fef35 /contrib/libs
parentd6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (diff)
downloadydb-c74559fb88da8adac0d9186cfa55a6b13c47695f.tar.gz
Restoring authorship annotation for <bnagaev@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs')
-rw-r--r--contrib/libs/hyperscan/CHANGELOG.md72
-rw-r--r--contrib/libs/hyperscan/COPYING52
-rw-r--r--contrib/libs/hyperscan/LICENSE236
-rw-r--r--contrib/libs/hyperscan/README.md82
-rw-r--r--contrib/libs/hyperscan/hs_version.h76
-rw-r--r--contrib/libs/hyperscan/include/boost-patched/graph/dominator_tree.hpp1002
-rw-r--r--contrib/libs/hyperscan/src/alloc.c206
-rw-r--r--contrib/libs/hyperscan/src/allocator.h132
-rw-r--r--contrib/libs/hyperscan/src/compiler/asserts.cpp568
-rw-r--r--contrib/libs/hyperscan/src/compiler/asserts.h96
-rw-r--r--contrib/libs/hyperscan/src/compiler/compiler.cpp748
-rw-r--r--contrib/libs/hyperscan/src/compiler/compiler.h246
-rw-r--r--contrib/libs/hyperscan/src/compiler/error.cpp186
-rw-r--r--contrib/libs/hyperscan/src/compiler/error.h110
-rw-r--r--contrib/libs/hyperscan/src/crc32.c1284
-rw-r--r--contrib/libs/hyperscan/src/crc32.h92
-rw-r--r--contrib/libs/hyperscan/src/database.c858
-rw-r--r--contrib/libs/hyperscan/src/database.h232
-rw-r--r--contrib/libs/hyperscan/src/fdr/engine_description.cpp98
-rw-r--r--contrib/libs/hyperscan/src/fdr/engine_description.h116
-rw-r--r--contrib/libs/hyperscan/src/fdr/fdr.c148
-rw-r--r--contrib/libs/hyperscan/src/fdr/fdr.h150
-rw-r--r--contrib/libs/hyperscan/src/fdr/fdr_compile.cpp718
-rw-r--r--contrib/libs/hyperscan/src/fdr/fdr_compile.h108
-rw-r--r--contrib/libs/hyperscan/src/fdr/fdr_compile_internal.h148
-rw-r--r--contrib/libs/hyperscan/src/fdr/fdr_compile_util.cpp130
-rw-r--r--contrib/libs/hyperscan/src/fdr/fdr_confirm.h164
-rw-r--r--contrib/libs/hyperscan/src/fdr/fdr_confirm_compile.cpp464
-rw-r--r--contrib/libs/hyperscan/src/fdr/fdr_confirm_runtime.h122
-rw-r--r--contrib/libs/hyperscan/src/fdr/fdr_engine_description.cpp406
-rw-r--r--contrib/libs/hyperscan/src/fdr/fdr_engine_description.h144
-rw-r--r--contrib/libs/hyperscan/src/fdr/fdr_internal.h184
-rw-r--r--contrib/libs/hyperscan/src/fdr/fdr_loadval.h106
-rw-r--r--contrib/libs/hyperscan/src/fdr/flood_compile.cpp414
-rw-r--r--contrib/libs/hyperscan/src/fdr/flood_runtime.h610
-rw-r--r--contrib/libs/hyperscan/src/fdr/teddy.c102
-rw-r--r--contrib/libs/hyperscan/src/fdr/teddy_compile.cpp574
-rw-r--r--contrib/libs/hyperscan/src/fdr/teddy_compile.h94
-rw-r--r--contrib/libs/hyperscan/src/fdr/teddy_engine_description.cpp384
-rw-r--r--contrib/libs/hyperscan/src/fdr/teddy_engine_description.h132
-rw-r--r--contrib/libs/hyperscan/src/fdr/teddy_internal.h82
-rw-r--r--contrib/libs/hyperscan/src/grey.cpp710
-rw-r--r--contrib/libs/hyperscan/src/grey.h380
-rw-r--r--contrib/libs/hyperscan/src/hs.cpp766
-rw-r--r--contrib/libs/hyperscan/src/hs.h88
-rw-r--r--contrib/libs/hyperscan/src/hs_common.h966
-rw-r--r--contrib/libs/hyperscan/src/hs_compile.h1632
-rw-r--r--contrib/libs/hyperscan/src/hs_internal.h154
-rw-r--r--contrib/libs/hyperscan/src/hs_runtime.h908
-rw-r--r--contrib/libs/hyperscan/src/hs_version.c68
-rw-r--r--contrib/libs/hyperscan/src/hwlm/hwlm.c404
-rw-r--r--contrib/libs/hyperscan/src/hwlm/hwlm.h248
-rw-r--r--contrib/libs/hyperscan/src/hwlm/hwlm_build.cpp382
-rw-r--r--contrib/libs/hyperscan/src/hwlm/hwlm_build.h154
-rw-r--r--contrib/libs/hyperscan/src/hwlm/hwlm_internal.h124
-rw-r--r--contrib/libs/hyperscan/src/hwlm/hwlm_literal.cpp210
-rw-r--r--contrib/libs/hyperscan/src/hwlm/hwlm_literal.h230
-rw-r--r--contrib/libs/hyperscan/src/hwlm/noodle_build.cpp162
-rw-r--r--contrib/libs/hyperscan/src/hwlm/noodle_build.h114
-rw-r--r--contrib/libs/hyperscan/src/hwlm/noodle_engine.c498
-rw-r--r--contrib/libs/hyperscan/src/hwlm/noodle_engine.h110
-rw-r--r--contrib/libs/hyperscan/src/hwlm/noodle_engine_avx2.c406
-rw-r--r--contrib/libs/hyperscan/src/hwlm/noodle_engine_sse.c350
-rw-r--r--contrib/libs/hyperscan/src/hwlm/noodle_internal.h78
-rw-r--r--contrib/libs/hyperscan/src/nfa/accel.c262
-rw-r--r--contrib/libs/hyperscan/src/nfa/accel.h220
-rw-r--r--contrib/libs/hyperscan/src/nfa/accelcompile.cpp348
-rw-r--r--contrib/libs/hyperscan/src/nfa/accelcompile.h108
-rw-r--r--contrib/libs/hyperscan/src/nfa/callback.h114
-rw-r--r--contrib/libs/hyperscan/src/nfa/castle.c1506
-rw-r--r--contrib/libs/hyperscan/src/nfa/castle.h94
-rw-r--r--contrib/libs/hyperscan/src/nfa/castle_internal.h172
-rw-r--r--contrib/libs/hyperscan/src/nfa/castlecompile.cpp1584
-rw-r--r--contrib/libs/hyperscan/src/nfa/castlecompile.h304
-rw-r--r--contrib/libs/hyperscan/src/nfa/dfa_min.cpp408
-rw-r--r--contrib/libs/hyperscan/src/nfa/dfa_min.h84
-rw-r--r--contrib/libs/hyperscan/src/nfa/gough.c2208
-rw-r--r--contrib/libs/hyperscan/src/nfa/gough.h154
-rw-r--r--contrib/libs/hyperscan/src/nfa/gough_internal.h268
-rw-r--r--contrib/libs/hyperscan/src/nfa/goughcompile.cpp2564
-rw-r--r--contrib/libs/hyperscan/src/nfa/goughcompile.h180
-rw-r--r--contrib/libs/hyperscan/src/nfa/goughcompile_accel.cpp562
-rw-r--r--contrib/libs/hyperscan/src/nfa/goughcompile_dump.h126
-rw-r--r--contrib/libs/hyperscan/src/nfa/goughcompile_internal.h428
-rw-r--r--contrib/libs/hyperscan/src/nfa/goughcompile_reg.cpp998
-rw-r--r--contrib/libs/hyperscan/src/nfa/lbr.c1030
-rw-r--r--contrib/libs/hyperscan/src/nfa/lbr.h288
-rw-r--r--contrib/libs/hyperscan/src/nfa/lbr_common_impl.h900
-rw-r--r--contrib/libs/hyperscan/src/nfa/lbr_internal.h164
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex.h154
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_accel.c254
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_accel.h134
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_common_impl.h552
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_compile.cpp3518
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_compile.h136
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_context.h150
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_exceptional.h568
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_internal.h352
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_limits.h68
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_native.c238
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_ring.h212
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_runtime.h328
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_runtime_impl.h1546
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_simd128.c114
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_simd256.c108
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_simd384.c108
-rw-r--r--contrib/libs/hyperscan/src/nfa/limex_state_impl.h226
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcclellan.c1630
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcclellan.h208
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h148
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcclellan_internal.h204
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp1656
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcclellancompile.h136
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcclellancompile_util.cpp424
-rw-r--r--contrib/libs/hyperscan/src/nfa/mcclellancompile_util.h102
-rw-r--r--contrib/libs/hyperscan/src/nfa/mpv.c2148
-rw-r--r--contrib/libs/hyperscan/src/nfa/mpv.h84
-rw-r--r--contrib/libs/hyperscan/src/nfa/mpv_internal.h374
-rw-r--r--contrib/libs/hyperscan/src/nfa/mpvcompile.cpp734
-rw-r--r--contrib/libs/hyperscan/src/nfa/mpvcompile.h116
-rw-r--r--contrib/libs/hyperscan/src/nfa/nfa_api.h488
-rw-r--r--contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c630
-rw-r--r--contrib/libs/hyperscan/src/nfa/nfa_api_queue.h576
-rw-r--r--contrib/libs/hyperscan/src/nfa/nfa_api_util.h164
-rw-r--r--contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp592
-rw-r--r--contrib/libs/hyperscan/src/nfa/nfa_build_util.h118
-rw-r--r--contrib/libs/hyperscan/src/nfa/nfa_internal.h374
-rw-r--r--contrib/libs/hyperscan/src/nfa/nfa_kind.h104
-rw-r--r--contrib/libs/hyperscan/src/nfa/nfa_rev_api.h314
-rw-r--r--contrib/libs/hyperscan/src/nfa/rdfa.h176
-rw-r--r--contrib/libs/hyperscan/src/nfa/rdfa_merge.cpp774
-rw-r--r--contrib/libs/hyperscan/src/nfa/rdfa_merge.h124
-rw-r--r--contrib/libs/hyperscan/src/nfa/repeat.c3136
-rw-r--r--contrib/libs/hyperscan/src/nfa/repeat.h722
-rw-r--r--contrib/libs/hyperscan/src/nfa/repeat_internal.h402
-rw-r--r--contrib/libs/hyperscan/src/nfa/repeatcompile.cpp740
-rw-r--r--contrib/libs/hyperscan/src/nfa/repeatcompile.h176
-rw-r--r--contrib/libs/hyperscan/src/nfa/shufti.c1054
-rw-r--r--contrib/libs/hyperscan/src/nfa/shufti.h122
-rw-r--r--contrib/libs/hyperscan/src/nfa/shufticompile.cpp298
-rw-r--r--contrib/libs/hyperscan/src/nfa/shufticompile.h126
-rw-r--r--contrib/libs/hyperscan/src/nfa/truffle.c396
-rw-r--r--contrib/libs/hyperscan/src/nfa/truffle.h102
-rw-r--r--contrib/libs/hyperscan/src/nfa/trufflecompile.cpp164
-rw-r--r--contrib/libs/hyperscan/src/nfa/trufflecompile.h80
-rw-r--r--contrib/libs/hyperscan/src/nfa/vermicelli.h612
-rw-r--r--contrib/libs/hyperscan/src/nfa/vermicelli_run.h180
-rw-r--r--contrib/libs/hyperscan/src/nfa/vermicelli_sse.h682
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng.cpp934
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng.h192
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.cpp134
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.h98
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.cpp1248
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.h90
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp1006
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_asserts.h88
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_builder.cpp496
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_builder.h184
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_calc_components.cpp698
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_calc_components.h96
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.cpp458
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.h90
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_depth.cpp628
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_depth.h146
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_dominators.cpp142
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_dominators.h90
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_dump.h328
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.cpp1014
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.h130
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_equivalence.cpp1076
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_equivalence.h94
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_execute.cpp636
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_execute.h134
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_expr_info.cpp236
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_expr_info.h88
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp1290
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_extparam.h86
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.cpp276
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.h92
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_haig.cpp1324
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_haig.h128
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_holder.cpp160
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_holder.h298
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_is_equal.cpp402
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_is_equal.h120
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp586
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_lbr.h114
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_limex.cpp904
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_limex.h238
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp962
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.h138
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp1316
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.h146
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_literal_component.cpp422
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_literal_component.h88
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.cpp476
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.h104
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.cpp984
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.h162
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_mcclellan_internal.h274
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.cpp1062
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.h148
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_netflow.cpp402
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_netflow.h98
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_prefilter.cpp652
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_prefilter.h90
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp802
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_prune.h150
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_puff.cpp1106
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_puff.h112
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_redundancy.cpp1742
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_redundancy.h108
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_region.cpp764
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_region.h412
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.cpp516
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.h98
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_repeat.cpp4638
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_repeat.h314
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_reports.cpp170
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_reports.h112
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_restructuring.cpp328
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_restructuring.h114
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_revacc.cpp592
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_revacc.h130
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_sep.cpp186
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_sep.h92
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.cpp508
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.h100
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_som.cpp5940
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_som.h144
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.cpp390
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.h94
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_som_util.cpp660
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_som_util.h160
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_split.cpp392
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_split.h128
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_squash.cpp1228
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_squash.h124
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_stop.cpp368
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_stop.h120
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.cpp870
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.h134
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_utf8.cpp558
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_utf8.h108
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_util.cpp996
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_util.h416
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_vacuous.cpp242
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_vacuous.h90
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_width.cpp432
-rw-r--r--contrib/libs/hyperscan/src/nfagraph/ng_width.h148
-rw-r--r--contrib/libs/hyperscan/src/parser/AsciiComponentClass.cpp320
-rw-r--r--contrib/libs/hyperscan/src/parser/AsciiComponentClass.h178
-rw-r--r--contrib/libs/hyperscan/src/parser/Component.cpp150
-rw-r--r--contrib/libs/hyperscan/src/parser/Component.h290
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentAlternation.cpp380
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentAlternation.h158
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentAssertion.cpp242
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentAssertion.h152
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentAtomicGroup.cpp184
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentAtomicGroup.h116
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentBackReference.cpp158
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentBackReference.h168
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentBoundary.cpp372
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentBoundary.h188
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentByte.cpp140
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentByte.h160
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentClass.cpp904
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentClass.h560
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentCondReference.cpp332
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentCondReference.h174
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentEUS.cpp150
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentEUS.h172
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentEmpty.cpp186
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentEmpty.h150
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentRepeat.cpp680
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentRepeat.h258
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentSequence.cpp752
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentSequence.h216
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentVisitor.cpp152
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentVisitor.h300
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentWordBoundary.cpp210
-rw-r--r--contrib/libs/hyperscan/src/parser/ComponentWordBoundary.h180
-rw-r--r--contrib/libs/hyperscan/src/parser/ConstComponentVisitor.cpp156
-rw-r--r--contrib/libs/hyperscan/src/parser/ConstComponentVisitor.h340
-rw-r--r--contrib/libs/hyperscan/src/parser/Parser.h148
-rw-r--r--contrib/libs/hyperscan/src/parser/Parser.rl63786
-rw-r--r--contrib/libs/hyperscan/src/parser/Utf8ComponentClass.cpp2346
-rw-r--r--contrib/libs/hyperscan/src/parser/Utf8ComponentClass.h232
-rw-r--r--contrib/libs/hyperscan/src/parser/buildstate.cpp1048
-rw-r--r--contrib/libs/hyperscan/src/parser/buildstate.h200
-rw-r--r--contrib/libs/hyperscan/src/parser/check_refs.cpp234
-rw-r--r--contrib/libs/hyperscan/src/parser/check_refs.h86
-rw-r--r--contrib/libs/hyperscan/src/parser/dump.h96
-rw-r--r--contrib/libs/hyperscan/src/parser/parse_error.cpp102
-rw-r--r--contrib/libs/hyperscan/src/parser/parse_error.h116
-rw-r--r--contrib/libs/hyperscan/src/parser/parser_util.cpp96
-rw-r--r--contrib/libs/hyperscan/src/parser/position.h214
-rw-r--r--contrib/libs/hyperscan/src/parser/position_dump.h126
-rw-r--r--contrib/libs/hyperscan/src/parser/position_info.h114
-rw-r--r--contrib/libs/hyperscan/src/parser/prefilter.cpp676
-rw-r--r--contrib/libs/hyperscan/src/parser/prefilter.h96
-rw-r--r--contrib/libs/hyperscan/src/parser/shortcut_literal.cpp386
-rw-r--r--contrib/libs/hyperscan/src/parser/shortcut_literal.h92
-rw-r--r--contrib/libs/hyperscan/src/parser/ucp_table.cpp238
-rw-r--r--contrib/libs/hyperscan/src/parser/ucp_table.h22086
-rw-r--r--contrib/libs/hyperscan/src/parser/unsupported.cpp172
-rw-r--r--contrib/libs/hyperscan/src/parser/unsupported.h94
-rw-r--r--contrib/libs/hyperscan/src/parser/utf8_validate.cpp322
-rw-r--r--contrib/libs/hyperscan/src/parser/utf8_validate.h76
-rw-r--r--contrib/libs/hyperscan/src/rose/block.c380
-rw-r--r--contrib/libs/hyperscan/src/rose/catchup.c1510
-rw-r--r--contrib/libs/hyperscan/src/rose/catchup.h234
-rw-r--r--contrib/libs/hyperscan/src/rose/counting_miracle.h514
-rw-r--r--contrib/libs/hyperscan/src/rose/infix.h274
-rw-r--r--contrib/libs/hyperscan/src/rose/init.c162
-rw-r--r--contrib/libs/hyperscan/src/rose/init.h88
-rw-r--r--contrib/libs/hyperscan/src/rose/match.c782
-rw-r--r--contrib/libs/hyperscan/src/rose/match.h450
-rw-r--r--contrib/libs/hyperscan/src/rose/miracle.h276
-rw-r--r--contrib/libs/hyperscan/src/rose/rose.h84
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build.h258
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_add.cpp3438
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_add_internal.h86
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_add_mask.cpp1462
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_anchored.cpp1510
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_anchored.h108
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_bytecode.cpp3010
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_compile.cpp2756
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_convert.cpp1508
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_convert.h82
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_dump.h104
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_impl.h948
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_infix.cpp612
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_infix.h104
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp1176
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_lookaround.h142
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_merge.cpp3616
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_merge.h134
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_misc.cpp1610
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.cpp2922
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.h74
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_scatter.cpp238
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_scatter.h110
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_util.h118
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_width.cpp494
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_width.h132
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_common.h86
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_graph.h400
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_in_dump.h98
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_in_graph.h342
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_in_util.cpp488
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_in_util.h102
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_internal.h930
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_types.h72
-rw-r--r--contrib/libs/hyperscan/src/rose/runtime.h238
-rw-r--r--contrib/libs/hyperscan/src/rose/stream.c994
-rw-r--r--contrib/libs/hyperscan/src/runtime.c1730
-rw-r--r--contrib/libs/hyperscan/src/scratch.c678
-rw-r--r--contrib/libs/hyperscan/src/scratch.h360
-rw-r--r--contrib/libs/hyperscan/src/smallwrite/smallwrite_build.cpp568
-rw-r--r--contrib/libs/hyperscan/src/smallwrite/smallwrite_build.h108
-rw-r--r--contrib/libs/hyperscan/src/smallwrite/smallwrite_internal.h106
-rw-r--r--contrib/libs/hyperscan/src/som/slot_manager.cpp490
-rw-r--r--contrib/libs/hyperscan/src/som/slot_manager.h212
-rw-r--r--contrib/libs/hyperscan/src/som/slot_manager_dump.h102
-rw-r--r--contrib/libs/hyperscan/src/som/slot_manager_internal.h198
-rw-r--r--contrib/libs/hyperscan/src/som/som.h80
-rw-r--r--contrib/libs/hyperscan/src/som/som_runtime.c978
-rw-r--r--contrib/libs/hyperscan/src/som/som_runtime.h122
-rw-r--r--contrib/libs/hyperscan/src/som/som_stream.c348
-rw-r--r--contrib/libs/hyperscan/src/som/som_stream.h96
-rw-r--r--contrib/libs/hyperscan/src/state.h138
-rw-r--r--contrib/libs/hyperscan/src/ue2common.h414
-rw-r--r--contrib/libs/hyperscan/src/util/alloc.cpp266
-rw-r--r--contrib/libs/hyperscan/src/util/alloc.h146
-rw-r--r--contrib/libs/hyperscan/src/util/bitfield.h760
-rw-r--r--contrib/libs/hyperscan/src/util/bitutils.h798
-rw-r--r--contrib/libs/hyperscan/src/util/boundary_reports.h96
-rw-r--r--contrib/libs/hyperscan/src/util/charreach.cpp296
-rw-r--r--contrib/libs/hyperscan/src/util/charreach.h356
-rw-r--r--contrib/libs/hyperscan/src/util/charreach_util.h110
-rw-r--r--contrib/libs/hyperscan/src/util/compare.h338
-rw-r--r--contrib/libs/hyperscan/src/util/compile_context.cpp92
-rw-r--r--contrib/libs/hyperscan/src/util/compile_context.h118
-rw-r--r--contrib/libs/hyperscan/src/util/compile_error.cpp118
-rw-r--r--contrib/libs/hyperscan/src/util/compile_error.h136
-rw-r--r--contrib/libs/hyperscan/src/util/container.h432
-rw-r--r--contrib/libs/hyperscan/src/util/cpuid_flags.c246
-rw-r--r--contrib/libs/hyperscan/src/util/cpuid_flags.h90
-rw-r--r--contrib/libs/hyperscan/src/util/depth.cpp182
-rw-r--r--contrib/libs/hyperscan/src/util/depth.h466
-rw-r--r--contrib/libs/hyperscan/src/util/determinise.h332
-rw-r--r--contrib/libs/hyperscan/src/util/dump_charclass.h118
-rw-r--r--contrib/libs/hyperscan/src/util/dump_mask.cpp126
-rw-r--r--contrib/libs/hyperscan/src/util/dump_mask.h110
-rw-r--r--contrib/libs/hyperscan/src/util/exhaust.h76
-rw-r--r--contrib/libs/hyperscan/src/util/fatbit.h162
-rw-r--r--contrib/libs/hyperscan/src/util/graph.h392
-rw-r--r--contrib/libs/hyperscan/src/util/graph_range.h220
-rw-r--r--contrib/libs/hyperscan/src/util/join.h68
-rw-r--r--contrib/libs/hyperscan/src/util/make_unique.h92
-rw-r--r--contrib/libs/hyperscan/src/util/masked_move.c174
-rw-r--r--contrib/libs/hyperscan/src/util/masked_move.h140
-rw-r--r--contrib/libs/hyperscan/src/util/multibit.c278
-rw-r--r--contrib/libs/hyperscan/src/util/multibit.h2818
-rw-r--r--contrib/libs/hyperscan/src/util/multibit_build.cpp584
-rw-r--r--contrib/libs/hyperscan/src/util/multibit_build.h108
-rw-r--r--contrib/libs/hyperscan/src/util/multibit_internal.h154
-rw-r--r--contrib/libs/hyperscan/src/util/order_check.h74
-rw-r--r--contrib/libs/hyperscan/src/util/pack_bits.h454
-rw-r--r--contrib/libs/hyperscan/src/util/partial_store.h326
-rw-r--r--contrib/libs/hyperscan/src/util/partitioned_set.h510
-rw-r--r--contrib/libs/hyperscan/src/util/popcount.h122
-rw-r--r--contrib/libs/hyperscan/src/util/pqueue.h218
-rw-r--r--contrib/libs/hyperscan/src/util/queue_index_factory.h98
-rw-r--r--contrib/libs/hyperscan/src/util/report.h424
-rw-r--r--contrib/libs/hyperscan/src/util/report_manager.cpp476
-rw-r--r--contrib/libs/hyperscan/src/util/report_manager.h286
-rw-r--r--contrib/libs/hyperscan/src/util/scatter.h110
-rw-r--r--contrib/libs/hyperscan/src/util/scatter_runtime.h148
-rw-r--r--contrib/libs/hyperscan/src/util/simd_types.h90
-rw-r--r--contrib/libs/hyperscan/src/util/simd_utils.h1692
-rw-r--r--contrib/libs/hyperscan/src/util/state_compress.c1182
-rw-r--r--contrib/libs/hyperscan/src/util/state_compress.h136
-rw-r--r--contrib/libs/hyperscan/src/util/target_info.cpp120
-rw-r--r--contrib/libs/hyperscan/src/util/target_info.h116
-rw-r--r--contrib/libs/hyperscan/src/util/ue2string.cpp678
-rw-r--r--contrib/libs/hyperscan/src/util/ue2string.h446
-rw-r--r--contrib/libs/hyperscan/src/util/unaligned.h196
-rw-r--r--contrib/libs/hyperscan/src/util/unicode_def.h170
-rw-r--r--contrib/libs/hyperscan/src/util/unicode_set.h282
-rw-r--r--contrib/libs/hyperscan/src/util/uniform_ops.h386
-rw-r--r--contrib/libs/hyperscan/src/util/verify_types.h102
-rw-r--r--contrib/libs/hyperscan/ya.make238
-rw-r--r--contrib/libs/pire/pire/extra/capture.h30
-rw-r--r--contrib/libs/pire/pire/extra/count.cpp6
-rw-r--r--contrib/libs/pire/pire/extra/count.h176
-rw-r--r--contrib/libs/pire/pire/fsm.cpp2
-rw-r--r--contrib/libs/pire/pire/fsm.h4
-rw-r--r--contrib/libs/pire/pire/platform.h8
-rw-r--r--contrib/libs/pire/pire/re_lexer.h8
-rw-r--r--contrib/libs/pire/pire/re_parser.y40
-rw-r--r--contrib/libs/pire/pire/run.h52
-rw-r--r--contrib/libs/pire/pire/scanner_io.cpp6
-rw-r--r--contrib/libs/pire/pire/scanners/common.h14
-rw-r--r--contrib/libs/pire/pire/scanners/loaded.h62
-rw-r--r--contrib/libs/pire/pire/scanners/multi.h58
-rw-r--r--contrib/libs/pire/pire/scanners/slow.h36
-rw-r--r--contrib/libs/ya.make6
-rw-r--r--contrib/libs/yaml-cpp/LICENSE38
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/anchor.h34
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/binary.h134
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/contrib/anchordict.h64
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/contrib/graphbuilder.h294
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/dll.h54
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/emitfromevents.h114
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/emitter.h502
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/emitterdef.h32
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/emittermanip.h274
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/emitterstyle.h32
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/eventhandler.h80
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/exceptions.h424
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/mark.h58
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/node/convert.h586
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/bool_type.h52
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/impl.h342
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/iterator.h96
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/iterator_fwd.h54
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/memory.h92
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node.h324
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node_data.h246
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node_iterator.h282
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node_ref.h190
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/node/emit.h48
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/node/impl.h890
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/node/iterator.h56
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/node/node.h288
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/node/parse.h58
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/node/ptr.h46
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/node/type.h32
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/noncopyable.h50
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/null.h48
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/ostream_wrapper.h144
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/parser.h88
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/stlemitter.h102
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/traits.h206
-rw-r--r--contrib/libs/yaml-cpp/include/yaml-cpp/yaml.h48
-rw-r--r--contrib/libs/yaml-cpp/src/binary.cpp186
-rw-r--r--contrib/libs/yaml-cpp/src/collectionstack.h78
-rw-r--r--contrib/libs/yaml-cpp/src/contrib/graphbuilder.cpp34
-rw-r--r--contrib/libs/yaml-cpp/src/contrib/graphbuilderadapter.cpp188
-rw-r--r--contrib/libs/yaml-cpp/src/contrib/graphbuilderadapter.h158
-rw-r--r--contrib/libs/yaml-cpp/src/convert.cpp150
-rw-r--r--contrib/libs/yaml-cpp/src/directives.cpp44
-rw-r--r--contrib/libs/yaml-cpp/src/directives.h58
-rw-r--r--contrib/libs/yaml-cpp/src/emit.cpp48
-rw-r--r--contrib/libs/yaml-cpp/src/emitfromevents.cpp236
-rw-r--r--contrib/libs/yaml-cpp/src/emitter.cpp1822
-rw-r--r--contrib/libs/yaml-cpp/src/emitterstate.cpp632
-rw-r--r--contrib/libs/yaml-cpp/src/emitterstate.h378
-rw-r--r--contrib/libs/yaml-cpp/src/emitterutils.cpp954
-rw-r--r--contrib/libs/yaml-cpp/src/emitterutils.h96
-rw-r--r--contrib/libs/yaml-cpp/src/exp.cpp272
-rw-r--r--contrib/libs/yaml-cpp/src/exp.h418
-rw-r--r--contrib/libs/yaml-cpp/src/indentation.h82
-rw-r--r--contrib/libs/yaml-cpp/src/memory.cpp52
-rw-r--r--contrib/libs/yaml-cpp/src/node.cpp24
-rw-r--r--contrib/libs/yaml-cpp/src/node_data.cpp588
-rw-r--r--contrib/libs/yaml-cpp/src/nodebuilder.cpp256
-rw-r--r--contrib/libs/yaml-cpp/src/nodebuilder.h140
-rw-r--r--contrib/libs/yaml-cpp/src/nodeevents.cpp202
-rw-r--r--contrib/libs/yaml-cpp/src/nodeevents.h128
-rw-r--r--contrib/libs/yaml-cpp/src/null.cpp10
-rw-r--r--contrib/libs/yaml-cpp/src/ostream_wrapper.cpp114
-rw-r--r--contrib/libs/yaml-cpp/src/parse.cpp126
-rw-r--r--contrib/libs/yaml-cpp/src/parser.cpp208
-rw-r--r--contrib/libs/yaml-cpp/src/ptr_vector.h72
-rw-r--r--contrib/libs/yaml-cpp/src/regex_yaml.cpp90
-rw-r--r--contrib/libs/yaml-cpp/src/regex_yaml.h160
-rw-r--r--contrib/libs/yaml-cpp/src/regeximpl.h372
-rw-r--r--contrib/libs/yaml-cpp/src/scanner.cpp614
-rw-r--r--contrib/libs/yaml-cpp/src/scanner.h266
-rw-r--r--contrib/libs/yaml-cpp/src/scanscalar.cpp390
-rw-r--r--contrib/libs/yaml-cpp/src/scanscalar.h118
-rw-r--r--contrib/libs/yaml-cpp/src/scantag.cpp162
-rw-r--r--contrib/libs/yaml-cpp/src/scantag.h38
-rw-r--r--contrib/libs/yaml-cpp/src/scantoken.cpp866
-rw-r--r--contrib/libs/yaml-cpp/src/setting.h170
-rw-r--r--contrib/libs/yaml-cpp/src/simplekey.cpp256
-rw-r--r--contrib/libs/yaml-cpp/src/singledocparser.cpp824
-rw-r--r--contrib/libs/yaml-cpp/src/singledocparser.h128
-rw-r--r--contrib/libs/yaml-cpp/src/stream.cpp896
-rw-r--r--contrib/libs/yaml-cpp/src/stream.h152
-rw-r--r--contrib/libs/yaml-cpp/src/streamcharsource.h96
-rw-r--r--contrib/libs/yaml-cpp/src/stringsource.h96
-rw-r--r--contrib/libs/yaml-cpp/src/tag.cpp98
-rw-r--r--contrib/libs/yaml-cpp/src/tag.h66
-rw-r--r--contrib/libs/yaml-cpp/src/token.h138
-rw-r--r--contrib/libs/yaml-cpp/ya.make78
539 files changed, 121245 insertions, 121245 deletions
diff --git a/contrib/libs/hyperscan/CHANGELOG.md b/contrib/libs/hyperscan/CHANGELOG.md
index 35452546a2..8de3a8d6c9 100644
--- a/contrib/libs/hyperscan/CHANGELOG.md
+++ b/contrib/libs/hyperscan/CHANGELOG.md
@@ -1,7 +1,7 @@
-# Hyperscan Change Log
-
-This is a list of notable changes to Hyperscan, in reverse chronological order.
-
+# Hyperscan Change Log
+
+This is a list of notable changes to Hyperscan, in reverse chronological order.
+
## [5.4.0] 2020-12-31
- Improvement on literal matcher "Fat Teddy" performance, including
support for Intel(R) AVX-512 Vector Byte Manipulation Instructions (Intel(R)
@@ -302,35 +302,35 @@ This is a list of notable changes to Hyperscan, in reverse chronological order.
when Hyperscan detects that a scratch region is already in use on entry to an
API function.
-## [4.1.0] 2015-12-18
-- Update version of PCRE used by testing tools as a syntax and semantic
- reference to PCRE 8.38.
-- Small updates to fix warnings identified by Coverity.
-- Clean up and unify exception handling behaviour across GPR and SIMD NFA
- models.
-- Fix bug in handling of bounded repeat triggers with large gaps between them
- for sparse repeat model.
-- Correctly reject POSIX collating elements (`[.ch.]`, `[=ch=]`) in the parser.
- These are not supported by Hyperscan.
-- Add support for quoted sequences (`\Q...\E`) inside character classes.
-- Simplify FDR literal matcher runtime by removing some static specialization.
-- Fix handling of the POSIX `[:graph:]`, `[:print:]` and `[:punct:]` character
- classes to match the behaviour of PCRE 8.38 in both standard operation and
- with the UCP flag set. (Note: some bugs were fixed in this area in PCRE
- 8.38.) Previously Hyperscan's behaviour was the same as versions of PCRE
- before 8.34.
-- Improve performance when compiling pattern sets that include a large number
- of similar bounded repeat constructs. (github issue #9)
-
-## [4.0.1] 2015-10-30
-- Minor cleanups to test code.
-- CMake and other build system improvements.
-- API update: allow `hs_reset_stream()` and `hs_reset_and_copy_stream()` to be
- supplied with a NULL scratch pointer if no matches are required. This is in
- line with the behaviour of `hs_close_stream()`.
-- Disallow bounded repeats with a very large minimum repeat but no maximum,
- i.e. {N,} for very large N.
-- Reduce compile memory usage in literal set explansion for some large cases.
-
-## [4.0.0] 2015-10-20
-- Original release of Hyperscan as open-source software.
+## [4.1.0] 2015-12-18
+- Update version of PCRE used by testing tools as a syntax and semantic
+ reference to PCRE 8.38.
+- Small updates to fix warnings identified by Coverity.
+- Clean up and unify exception handling behaviour across GPR and SIMD NFA
+ models.
+- Fix bug in handling of bounded repeat triggers with large gaps between them
+ for sparse repeat model.
+- Correctly reject POSIX collating elements (`[.ch.]`, `[=ch=]`) in the parser.
+ These are not supported by Hyperscan.
+- Add support for quoted sequences (`\Q...\E`) inside character classes.
+- Simplify FDR literal matcher runtime by removing some static specialization.
+- Fix handling of the POSIX `[:graph:]`, `[:print:]` and `[:punct:]` character
+ classes to match the behaviour of PCRE 8.38 in both standard operation and
+ with the UCP flag set. (Note: some bugs were fixed in this area in PCRE
+ 8.38.) Previously Hyperscan's behaviour was the same as versions of PCRE
+ before 8.34.
+- Improve performance when compiling pattern sets that include a large number
+ of similar bounded repeat constructs. (github issue #9)
+
+## [4.0.1] 2015-10-30
+- Minor cleanups to test code.
+- CMake and other build system improvements.
+- API update: allow `hs_reset_stream()` and `hs_reset_and_copy_stream()` to be
+ supplied with a NULL scratch pointer if no matches are required. This is in
+ line with the behaviour of `hs_close_stream()`.
+- Disallow bounded repeats with a very large minimum repeat but no maximum,
+ i.e. {N,} for very large N.
+- Reduce compile memory usage in literal set explansion for some large cases.
+
+## [4.0.0] 2015-10-20
+- Original release of Hyperscan as open-source software.
diff --git a/contrib/libs/hyperscan/COPYING b/contrib/libs/hyperscan/COPYING
index e578e611d3..ef9b24fb97 100644
--- a/contrib/libs/hyperscan/COPYING
+++ b/contrib/libs/hyperscan/COPYING
@@ -1,26 +1,26 @@
-Copyright (c) 2015, Intel Corporation
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its contributors
- may be used to endorse or promote products derived from this software
- without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
+Copyright (c) 2015, Intel Corporation
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
diff --git a/contrib/libs/hyperscan/LICENSE b/contrib/libs/hyperscan/LICENSE
index 3ccdaaec04..30c57a8013 100644
--- a/contrib/libs/hyperscan/LICENSE
+++ b/contrib/libs/hyperscan/LICENSE
@@ -1,118 +1,118 @@
-Hyperscan is licensed under the BSD License.
-
-Copyright (c) 2015, Intel Corporation
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its contributors
- may be used to endorse or promote products derived from this software
- without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
---------------------------------------------------------------------------------
-
-This product also contains code from third parties, under the following
-licenses:
-
-Intel's Slicing-by-8 CRC32 implementation
------------------------------------------
-
-Copyright (c) 2004-2006, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-Boost C++ Headers Library
--------------------------
-
-Boost Software License - Version 1.0 - August 17th, 2003
-
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-
-
-The Google C++ Testing Framework (Google Test)
-----------------------------------------------
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
- * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
+Hyperscan is licensed under the BSD License.
+
+Copyright (c) 2015, Intel Corporation
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+--------------------------------------------------------------------------------
+
+This product also contains code from third parties, under the following
+licenses:
+
+Intel's Slicing-by-8 CRC32 implementation
+-----------------------------------------
+
+Copyright (c) 2004-2006, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+Boost C++ Headers Library
+-------------------------
+
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+
+
+The Google C++ Testing Framework (Google Test)
+----------------------------------------------
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
diff --git a/contrib/libs/hyperscan/README.md b/contrib/libs/hyperscan/README.md
index cc3ff7990e..9f4c03723c 100644
--- a/contrib/libs/hyperscan/README.md
+++ b/contrib/libs/hyperscan/README.md
@@ -1,43 +1,43 @@
-# Hyperscan
-
-Hyperscan is a high-performance multiple regex matching library. It follows the
-regular expression syntax of the commonly-used libpcre library, but is a
-standalone library with its own C API.
-
-Hyperscan uses hybrid automata techniques to allow simultaneous matching of
-large numbers (up to tens of thousands) of regular expressions and for the
-matching of regular expressions across streams of data.
-
-Hyperscan is typically used in a DPI library stack.
-
-# Documentation
-
-Information on building the Hyperscan library and using its API is available in
+# Hyperscan
+
+Hyperscan is a high-performance multiple regex matching library. It follows the
+regular expression syntax of the commonly-used libpcre library, but is a
+standalone library with its own C API.
+
+Hyperscan uses hybrid automata techniques to allow simultaneous matching of
+large numbers (up to tens of thousands) of regular expressions and for the
+matching of regular expressions across streams of data.
+
+Hyperscan is typically used in a DPI library stack.
+
+# Documentation
+
+Information on building the Hyperscan library and using its API is available in
the [Developer Reference Guide](http://intel.github.io/hyperscan/dev-reference/).
-
-# License
-
-Hyperscan is licensed under the BSD License. See the LICENSE file in the
-project repository.
-
-# Versioning
-
-The `master` branch on Github will always contain the most recent release of
-Hyperscan. Each version released to `master` goes through QA and testing before
-it is released; if you're a user, rather than a developer, this is the version
-you should be using.
-
-Further development towards the next release takes place on the `develop`
-branch.
-
-# Get Involved
-
+
+# License
+
+Hyperscan is licensed under the BSD License. See the LICENSE file in the
+project repository.
+
+# Versioning
+
+The `master` branch on Github will always contain the most recent release of
+Hyperscan. Each version released to `master` goes through QA and testing before
+it is released; if you're a user, rather than a developer, this is the version
+you should be using.
+
+Further development towards the next release takes place on the `develop`
+branch.
+
+# Get Involved
+
The official homepage for Hyperscan is at [www.hyperscan.io](https://www.hyperscan.io).
-
-If you have questions or comments, we encourage you to [join the mailing
-list](https://lists.01.org/mailman/listinfo/hyperscan). Bugs can be filed by
-sending email to the list, or by creating an issue on Github.
-
-If you wish to contact the Hyperscan team at Intel directly, without posting
-publicly to the mailing list, send email to
-[hyperscan@intel.com](mailto:hyperscan@intel.com).
+
+If you have questions or comments, we encourage you to [join the mailing
+list](https://lists.01.org/mailman/listinfo/hyperscan). Bugs can be filed by
+sending email to the list, or by creating an issue on Github.
+
+If you wish to contact the Hyperscan team at Intel directly, without posting
+publicly to the mailing list, send email to
+[hyperscan@intel.com](mailto:hyperscan@intel.com).
diff --git a/contrib/libs/hyperscan/hs_version.h b/contrib/libs/hyperscan/hs_version.h
index 3c9cc7252f..af41f33bbc 100644
--- a/contrib/libs/hyperscan/hs_version.h
+++ b/contrib/libs/hyperscan/hs_version.h
@@ -1,40 +1,40 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef HS_VERSION_H_C6428FAF8E3713
-#define HS_VERSION_H_C6428FAF8E3713
-
-/**
- * A version string to identify this release of Hyperscan.
- */
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HS_VERSION_H_C6428FAF8E3713
+#define HS_VERSION_H_C6428FAF8E3713
+
+/**
+ * A version string to identify this release of Hyperscan.
+ */
#define HS_VERSION_STRING "5.4.0 1980-01-01"
-
+
#define HS_VERSION_32BIT ((5 << 24) | (4 << 16) | (0 << 8) | 0)
-
-#endif /* HS_VERSION_H_C6428FAF8E3713 */
-
+
+#endif /* HS_VERSION_H_C6428FAF8E3713 */
+
diff --git a/contrib/libs/hyperscan/include/boost-patched/graph/dominator_tree.hpp b/contrib/libs/hyperscan/include/boost-patched/graph/dominator_tree.hpp
index c0e166e9e6..c8b15886b4 100644
--- a/contrib/libs/hyperscan/include/boost-patched/graph/dominator_tree.hpp
+++ b/contrib/libs/hyperscan/include/boost-patched/graph/dominator_tree.hpp
@@ -1,501 +1,501 @@
-//=======================================================================
-// Copyright (C) 2005-2009 Jongsoo Park <jongsoo.park -at- gmail.com>
-//
-// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
-//=======================================================================
-
-#ifndef BOOST_GRAPH_DOMINATOR_HPP
-#define BOOST_GRAPH_DOMINATOR_HPP
-
-#include <boost/config.hpp>
-#include <deque>
-#include <set>
-#include <boost/graph/depth_first_search.hpp>
-#include <boost/concept/assert.hpp>
-
-// Dominator tree computation
-
-// NOTE: This file contains modifications from the distributed Boost version to
-// correctly support supplying a vertex index map to the algorithm. To
-// differentiate it, it has been moved into the boost_ue2 namespace.
-
-namespace boost_ue2 {
-
- using namespace boost;
-
- namespace detail {
- /**
- * An extended time_stamper which also records vertices for each dfs number
- */
- template<class TimeMap, class VertexVector, class TimeT, class Tag>
- class time_stamper_with_vertex_vector
- : public base_visitor<
- time_stamper_with_vertex_vector<TimeMap, VertexVector, TimeT, Tag> >
- {
- public :
- typedef Tag event_filter;
- time_stamper_with_vertex_vector(TimeMap timeMap, VertexVector& v,
- TimeT& t)
- : timeStamper_(timeMap, t), v_(v) { }
-
- template<class Graph>
- void
- operator()(const typename property_traits<TimeMap>::key_type& v,
- const Graph& g)
- {
- timeStamper_(v, g);
- v_[timeStamper_.m_time] = v;
- }
-
- private :
- time_stamper<TimeMap, TimeT, Tag> timeStamper_;
- VertexVector& v_;
- };
-
- /**
- * A convenient way to create a time_stamper_with_vertex_vector
- */
- template<class TimeMap, class VertexVector, class TimeT, class Tag>
- time_stamper_with_vertex_vector<TimeMap, VertexVector, TimeT, Tag>
- stamp_times_with_vertex_vector(TimeMap timeMap, VertexVector& v, TimeT& t,
- Tag)
- {
- return time_stamper_with_vertex_vector<TimeMap, VertexVector, TimeT,
- Tag>(timeMap, v, t);
- }
-
- template<class Graph, class IndexMap, class TimeMap, class PredMap,
- class DomTreePredMap>
- class dominator_visitor
- {
- typedef typename graph_traits<Graph>::vertex_descriptor Vertex;
- typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
-
- public :
- /**
- * @param g [in] the target graph of the dominator tree
- * @param entry [in] the entry node of g
- * @param indexMap [in] the vertex index map for g
- * @param domTreePredMap [out] the immediate dominator map
- * (parent map in dominator tree)
- */
- dominator_visitor(const Graph& g, const Vertex& entry,
- const IndexMap& indexMap,
- DomTreePredMap domTreePredMap)
- : semi_(num_vertices(g)),
- ancestor_(num_vertices(g), graph_traits<Graph>::null_vertex()),
- samedom_(ancestor_),
- best_(semi_),
- semiMap_(make_iterator_property_map(semi_.begin(),
- indexMap)),
- ancestorMap_(make_iterator_property_map(ancestor_.begin(),
- indexMap)),
- bestMap_(make_iterator_property_map(best_.begin(),
- indexMap)),
- buckets_(num_vertices(g)),
- bucketMap_(make_iterator_property_map(buckets_.begin(),
- indexMap)),
- entry_(entry),
- domTreePredMap_(domTreePredMap),
- numOfVertices_(num_vertices(g)),
- samedomMap(make_iterator_property_map(samedom_.begin(),
- indexMap))
- {
- }
-
- void
- operator()(const Vertex& n, const TimeMap& dfnumMap,
- const PredMap& parentMap, const Graph& g)
- {
- if (n == entry_) return;
-
- const Vertex p(get(parentMap, n));
- Vertex s(p);
-
- // 1. Calculate the semidominator of n,
- // based on the semidominator thm.
- // * Semidominator thm. : To find the semidominator of a node n,
- // consider all predecessors v of n in the CFG (Control Flow Graph).
- // - If v is a proper ancestor of n in the spanning tree
- // (so dfnum(v) < dfnum(n)), then v is a candidate for semi(n)
- // - If v is a non-ancestor of n (so dfnum(v) > dfnum(n))
- // then for each u that is an ancestor of v (or u = v),
- // Let semi(u) be a candidate for semi(n)
- // of all these candidates, the one with lowest dfnum is
- // the semidominator of n.
-
- // For each predecessor of n
- typename graph_traits<Graph>::in_edge_iterator inItr, inEnd;
- for (boost::tie(inItr, inEnd) = in_edges(n, g); inItr != inEnd; ++inItr)
- {
- const Vertex v = source(*inItr, g);
- // To deal with unreachable nodes
- if (get(dfnumMap, v) < 0 || get(dfnumMap, v) >= numOfVertices_)
- continue;
-
- Vertex s2;
- if (get(dfnumMap, v) <= get(dfnumMap, n))
- s2 = v;
- else
- s2 = get(semiMap_, ancestor_with_lowest_semi_(v, dfnumMap));
-
- if (get(dfnumMap, s2) < get(dfnumMap, s))
- s = s2;
- }
- put(semiMap_, n, s);
-
- // 2. Calculation of n's dominator is deferred until
- // the path from s to n has been linked into the forest
- get(bucketMap_, s).push_back(n);
- get(ancestorMap_, n) = p;
- get(bestMap_, n) = n;
-
- // 3. Now that the path from p to v has been linked into
- // the spanning forest, these lines calculate the dominator of v,
- // based on the dominator thm., or else defer the calculation
- // until y's dominator is known
- // * Dominator thm. : On the spanning-tree path below semi(n) and
- // above or including n, let y be the node
- // with the smallest-numbered semidominator. Then,
- //
- // idom(n) = semi(n) if semi(y)=semi(n) or
- // idom(y) if semi(y) != semi(n)
- typename std::deque<Vertex>::iterator buckItr;
- for (buckItr = get(bucketMap_, p).begin();
- buckItr != get(bucketMap_, p).end();
- ++buckItr)
- {
- const Vertex v(*buckItr);
- const Vertex y(ancestor_with_lowest_semi_(v, dfnumMap));
- if (get(semiMap_, y) == get(semiMap_, v))
- put(domTreePredMap_, v, p);
- else
- put(samedomMap, v, y);
- }
-
- get(bucketMap_, p).clear();
- }
-
- protected :
-
- /**
- * Evaluate function in Tarjan's path compression
- */
- const Vertex
- ancestor_with_lowest_semi_(const Vertex& v, const TimeMap& dfnumMap)
- {
- const Vertex a(get(ancestorMap_, v));
-
- if (get(ancestorMap_, a) != graph_traits<Graph>::null_vertex())
- {
- const Vertex b(ancestor_with_lowest_semi_(a, dfnumMap));
-
- put(ancestorMap_, v, get(ancestorMap_, a));
-
- if (get(dfnumMap, get(semiMap_, b)) <
- get(dfnumMap, get(semiMap_, get(bestMap_, v))))
- put(bestMap_, v, b);
- }
-
- return get(bestMap_, v);
- }
-
- std::vector<Vertex> semi_, ancestor_, samedom_, best_;
- PredMap semiMap_, ancestorMap_, bestMap_;
- std::vector< std::deque<Vertex> > buckets_;
-
- iterator_property_map<typename std::vector<std::deque<Vertex> >::iterator,
- IndexMap> bucketMap_;
-
- const Vertex& entry_;
- DomTreePredMap domTreePredMap_;
- const VerticesSizeType numOfVertices_;
-
- public :
-
- PredMap samedomMap;
- };
-
- } // namespace detail
-
- /**
- * @brief Build dominator tree using Lengauer-Tarjan algorithm.
- * It takes O((V+E)log(V+E)) time.
- *
- * @pre dfnumMap, parentMap and verticesByDFNum have dfs results corresponding
- * indexMap.
- * If dfs has already run before,
- * this function would be good for saving computations.
- * @pre Unreachable nodes must be masked as
- * graph_traits<Graph>::null_vertex in parentMap.
- * @pre Unreachable nodes must be masked as
- * (std::numeric_limits<VerticesSizeType>::max)() in dfnumMap.
- *
- * @param domTreePredMap [out] : immediate dominator map (parent map
- * in dom. tree)
- *
- * @note reference Appel. p. 452~453. algorithm 19.9, 19.10.
- *
- * @todo : Optimization in Finding Dominators in Practice, Loukas Georgiadis
- */
- template<class Graph, class IndexMap, class TimeMap, class PredMap,
- class VertexVector, class DomTreePredMap>
- void
- lengauer_tarjan_dominator_tree_without_dfs
- (const Graph& g,
- const typename graph_traits<Graph>::vertex_descriptor& entry,
- const IndexMap& indexMap,
- TimeMap dfnumMap, PredMap parentMap, VertexVector& verticesByDFNum,
- DomTreePredMap domTreePredMap)
- {
- // Typedefs and concept check
- typedef typename graph_traits<Graph>::vertex_descriptor Vertex;
- typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
-
- BOOST_CONCEPT_ASSERT(( BidirectionalGraphConcept<Graph> ));
-
- const VerticesSizeType numOfVertices = num_vertices(g);
- if (numOfVertices == 0) return;
-
- // 1. Visit each vertex in reverse post order and calculate sdom.
- detail::dominator_visitor<Graph, IndexMap, TimeMap, PredMap, DomTreePredMap>
- visitor(g, entry, indexMap, domTreePredMap);
-
- VerticesSizeType i;
- for (i = 0; i < numOfVertices; ++i)
- {
- const Vertex u(verticesByDFNum[numOfVertices - 1 - i]);
- if (u != graph_traits<Graph>::null_vertex())
- visitor(u, dfnumMap, parentMap, g);
- }
-
- // 2. Now all the deferred dominator calculations,
- // based on the second clause of the dominator thm., are performed
- for (i = 0; i < numOfVertices; ++i)
- {
- const Vertex n(verticesByDFNum[i]);
-
- if (n == entry || n == graph_traits<Graph>::null_vertex())
- continue;
-
- Vertex u = get(visitor.samedomMap, n);
- if (u != graph_traits<Graph>::null_vertex())
- {
- put(domTreePredMap, n, get(domTreePredMap, u));
- }
- }
- }
-
- /**
- * Unlike lengauer_tarjan_dominator_tree_without_dfs,
- * dfs is run in this function and
- * the result is written to dfnumMap, parentMap, vertices.
- *
- * If the result of dfs required after this algorithm,
- * this function can eliminate the need of rerunning dfs.
- */
- template<class Graph, class IndexMap, class TimeMap, class PredMap,
- class VertexVector, class DomTreePredMap>
- void
- lengauer_tarjan_dominator_tree
- (const Graph& g,
- const typename graph_traits<Graph>::vertex_descriptor& entry,
- const IndexMap& indexMap,
- TimeMap dfnumMap, PredMap parentMap, VertexVector& verticesByDFNum,
- DomTreePredMap domTreePredMap)
- {
- // Typedefs and concept check
- typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
-
- BOOST_CONCEPT_ASSERT(( BidirectionalGraphConcept<Graph> ));
-
- // 1. Depth first visit
- const VerticesSizeType numOfVertices = num_vertices(g);
- if (numOfVertices == 0) return;
-
- VerticesSizeType time =
- (std::numeric_limits<VerticesSizeType>::max)();
- std::vector<default_color_type>
- colors(numOfVertices, color_traits<default_color_type>::white());
- depth_first_visit
- (g, entry,
- make_dfs_visitor
- (make_pair(record_predecessors(parentMap, on_tree_edge()),
- detail::stamp_times_with_vertex_vector
- (dfnumMap, verticesByDFNum, time, on_discover_vertex()))),
- make_iterator_property_map(colors.begin(), indexMap));
-
- // 2. Run main algorithm.
- lengauer_tarjan_dominator_tree_without_dfs(g, entry, indexMap, dfnumMap,
- parentMap, verticesByDFNum,
- domTreePredMap);
- }
-
- /**
- * Use vertex_index as IndexMap and make dfnumMap, parentMap, verticesByDFNum
- * internally.
- * If we don't need the result of dfs (dfnumMap, parentMap, verticesByDFNum),
- * this function would be more convenient one.
- */
- template<class Graph, class DomTreePredMap>
- void
- lengauer_tarjan_dominator_tree
- (const Graph& g,
- const typename graph_traits<Graph>::vertex_descriptor& entry,
- DomTreePredMap domTreePredMap)
- {
- // typedefs
- typedef typename graph_traits<Graph>::vertex_descriptor Vertex;
- typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
- typedef typename property_map<Graph, vertex_index_t>::const_type IndexMap;
- typedef
- iterator_property_map<typename std::vector<VerticesSizeType>::iterator,
- IndexMap> TimeMap;
- typedef
- iterator_property_map<typename std::vector<Vertex>::iterator, IndexMap>
- PredMap;
-
- // Make property maps
- const VerticesSizeType numOfVertices = num_vertices(g);
- if (numOfVertices == 0) return;
-
- const IndexMap indexMap = get(vertex_index, g);
-
- std::vector<VerticesSizeType> dfnum(numOfVertices, 0);
- TimeMap dfnumMap(make_iterator_property_map(dfnum.begin(), indexMap));
-
- std::vector<Vertex> parent(numOfVertices,
- graph_traits<Graph>::null_vertex());
- PredMap parentMap(make_iterator_property_map(parent.begin(), indexMap));
-
- std::vector<Vertex> verticesByDFNum(parent);
-
- // Run main algorithm
- lengauer_tarjan_dominator_tree(g, entry,
- indexMap, dfnumMap, parentMap,
- verticesByDFNum, domTreePredMap);
- }
-
- /**
- * Muchnick. p. 182, 184
- *
- * using iterative bit vector analysis
- */
- template<class Graph, class IndexMap, class DomTreePredMap>
- void
- iterative_bit_vector_dominator_tree
- (const Graph& g,
- const typename graph_traits<Graph>::vertex_descriptor& entry,
- const IndexMap& indexMap,
- DomTreePredMap domTreePredMap)
- {
- typedef typename graph_traits<Graph>::vertex_descriptor Vertex;
- typedef typename graph_traits<Graph>::vertex_iterator vertexItr;
- typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
- typedef
- iterator_property_map<typename std::vector< std::set<Vertex> >::iterator,
- IndexMap> vertexSetMap;
-
- BOOST_CONCEPT_ASSERT(( BidirectionalGraphConcept<Graph> ));
-
- // 1. Finding dominator
- // 1.1. Initialize
- const VerticesSizeType numOfVertices = num_vertices(g);
- if (numOfVertices == 0) return;
-
- vertexItr vi, viend;
- boost::tie(vi, viend) = vertices(g);
- const std::set<Vertex> N(vi, viend);
-
- bool change = true;
-
- std::vector< std::set<Vertex> > dom(numOfVertices, N);
- vertexSetMap domMap(make_iterator_property_map(dom.begin(), indexMap));
- get(domMap, entry).clear();
- get(domMap, entry).insert(entry);
-
- while (change)
- {
- change = false;
- for (boost::tie(vi, viend) = vertices(g); vi != viend; ++vi)
- {
- if (*vi == entry) continue;
-
- std::set<Vertex> T(N);
-
- typename graph_traits<Graph>::in_edge_iterator inItr, inEnd;
- for (boost::tie(inItr, inEnd) = in_edges(*vi, g); inItr != inEnd; ++inItr)
- {
- const Vertex p = source(*inItr, g);
-
- std::set<Vertex> tempSet;
- std::set_intersection(T.begin(), T.end(),
- get(domMap, p).begin(),
- get(domMap, p).end(),
- std::inserter(tempSet, tempSet.begin()));
- T.swap(tempSet);
- }
-
- T.insert(*vi);
- if (T != get(domMap, *vi))
- {
- change = true;
- get(domMap, *vi).swap(T);
- }
- } // end of for (boost::tie(vi, viend) = vertices(g)
- } // end of while(change)
-
- // 2. Build dominator tree
- for (boost::tie(vi, viend) = vertices(g); vi != viend; ++vi)
- get(domMap, *vi).erase(*vi);
-
- Graph domTree(numOfVertices);
-
- for (boost::tie(vi, viend) = vertices(g); vi != viend; ++vi)
- {
- if (*vi == entry) continue;
-
- // We have to iterate through copied dominator set
- const std::set<Vertex> tempSet(get(domMap, *vi));
- typename std::set<Vertex>::const_iterator s;
- for (s = tempSet.begin(); s != tempSet.end(); ++s)
- {
- typename std::set<Vertex>::iterator t;
- for (t = get(domMap, *vi).begin(); t != get(domMap, *vi).end(); )
- {
- typename std::set<Vertex>::iterator old_t = t;
- ++t; // Done early because t may become invalid
- if (*old_t == *s) continue;
- if (get(domMap, *s).find(*old_t) != get(domMap, *s).end())
- get(domMap, *vi).erase(old_t);
- }
- }
- }
-
- for (boost::tie(vi, viend) = vertices(g); vi != viend; ++vi)
- {
- if (*vi != entry && get(domMap, *vi).size() == 1)
- {
- Vertex temp = *get(domMap, *vi).begin();
- put(domTreePredMap, *vi, temp);
- }
- }
- }
-
- template<class Graph, class DomTreePredMap>
- void
- iterative_bit_vector_dominator_tree
- (const Graph& g,
- const typename graph_traits<Graph>::vertex_descriptor& entry,
- DomTreePredMap domTreePredMap)
- {
- typename property_map<Graph, vertex_index_t>::const_type
- indexMap = get(vertex_index, g);
-
- iterative_bit_vector_dominator_tree(g, entry, indexMap, domTreePredMap);
- }
-} // namespace boost
-
-#endif // BOOST_GRAPH_DOMINATOR_HPP
+//=======================================================================
+// Copyright (C) 2005-2009 Jongsoo Park <jongsoo.park -at- gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//=======================================================================
+
+#ifndef BOOST_GRAPH_DOMINATOR_HPP
+#define BOOST_GRAPH_DOMINATOR_HPP
+
+#include <boost/config.hpp>
+#include <deque>
+#include <set>
+#include <boost/graph/depth_first_search.hpp>
+#include <boost/concept/assert.hpp>
+
+// Dominator tree computation
+
+// NOTE: This file contains modifications from the distributed Boost version to
+// correctly support supplying a vertex index map to the algorithm. To
+// differentiate it, it has been moved into the boost_ue2 namespace.
+
+namespace boost_ue2 {
+
+ using namespace boost;
+
+ namespace detail {
+ /**
+ * An extended time_stamper which also records vertices for each dfs number
+ */
+ template<class TimeMap, class VertexVector, class TimeT, class Tag>
+ class time_stamper_with_vertex_vector
+ : public base_visitor<
+ time_stamper_with_vertex_vector<TimeMap, VertexVector, TimeT, Tag> >
+ {
+ public :
+ typedef Tag event_filter;
+ time_stamper_with_vertex_vector(TimeMap timeMap, VertexVector& v,
+ TimeT& t)
+ : timeStamper_(timeMap, t), v_(v) { }
+
+ template<class Graph>
+ void
+ operator()(const typename property_traits<TimeMap>::key_type& v,
+ const Graph& g)
+ {
+ timeStamper_(v, g);
+ v_[timeStamper_.m_time] = v;
+ }
+
+ private :
+ time_stamper<TimeMap, TimeT, Tag> timeStamper_;
+ VertexVector& v_;
+ };
+
+ /**
+ * A convenient way to create a time_stamper_with_vertex_vector
+ */
+ template<class TimeMap, class VertexVector, class TimeT, class Tag>
+ time_stamper_with_vertex_vector<TimeMap, VertexVector, TimeT, Tag>
+ stamp_times_with_vertex_vector(TimeMap timeMap, VertexVector& v, TimeT& t,
+ Tag)
+ {
+ return time_stamper_with_vertex_vector<TimeMap, VertexVector, TimeT,
+ Tag>(timeMap, v, t);
+ }
+
+ template<class Graph, class IndexMap, class TimeMap, class PredMap,
+ class DomTreePredMap>
+ class dominator_visitor
+ {
+ typedef typename graph_traits<Graph>::vertex_descriptor Vertex;
+ typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
+
+ public :
+ /**
+ * @param g [in] the target graph of the dominator tree
+ * @param entry [in] the entry node of g
+ * @param indexMap [in] the vertex index map for g
+ * @param domTreePredMap [out] the immediate dominator map
+ * (parent map in dominator tree)
+ */
+ dominator_visitor(const Graph& g, const Vertex& entry,
+ const IndexMap& indexMap,
+ DomTreePredMap domTreePredMap)
+ : semi_(num_vertices(g)),
+ ancestor_(num_vertices(g), graph_traits<Graph>::null_vertex()),
+ samedom_(ancestor_),
+ best_(semi_),
+ semiMap_(make_iterator_property_map(semi_.begin(),
+ indexMap)),
+ ancestorMap_(make_iterator_property_map(ancestor_.begin(),
+ indexMap)),
+ bestMap_(make_iterator_property_map(best_.begin(),
+ indexMap)),
+ buckets_(num_vertices(g)),
+ bucketMap_(make_iterator_property_map(buckets_.begin(),
+ indexMap)),
+ entry_(entry),
+ domTreePredMap_(domTreePredMap),
+ numOfVertices_(num_vertices(g)),
+ samedomMap(make_iterator_property_map(samedom_.begin(),
+ indexMap))
+ {
+ }
+
+ void
+ operator()(const Vertex& n, const TimeMap& dfnumMap,
+ const PredMap& parentMap, const Graph& g)
+ {
+ if (n == entry_) return;
+
+ const Vertex p(get(parentMap, n));
+ Vertex s(p);
+
+ // 1. Calculate the semidominator of n,
+ // based on the semidominator thm.
+ // * Semidominator thm. : To find the semidominator of a node n,
+ // consider all predecessors v of n in the CFG (Control Flow Graph).
+ // - If v is a proper ancestor of n in the spanning tree
+ // (so dfnum(v) < dfnum(n)), then v is a candidate for semi(n)
+ // - If v is a non-ancestor of n (so dfnum(v) > dfnum(n))
+ // then for each u that is an ancestor of v (or u = v),
+ // Let semi(u) be a candidate for semi(n)
+ // of all these candidates, the one with lowest dfnum is
+ // the semidominator of n.
+
+ // For each predecessor of n
+ typename graph_traits<Graph>::in_edge_iterator inItr, inEnd;
+ for (boost::tie(inItr, inEnd) = in_edges(n, g); inItr != inEnd; ++inItr)
+ {
+ const Vertex v = source(*inItr, g);
+ // To deal with unreachable nodes
+ if (get(dfnumMap, v) < 0 || get(dfnumMap, v) >= numOfVertices_)
+ continue;
+
+ Vertex s2;
+ if (get(dfnumMap, v) <= get(dfnumMap, n))
+ s2 = v;
+ else
+ s2 = get(semiMap_, ancestor_with_lowest_semi_(v, dfnumMap));
+
+ if (get(dfnumMap, s2) < get(dfnumMap, s))
+ s = s2;
+ }
+ put(semiMap_, n, s);
+
+ // 2. Calculation of n's dominator is deferred until
+ // the path from s to n has been linked into the forest
+ get(bucketMap_, s).push_back(n);
+ get(ancestorMap_, n) = p;
+ get(bestMap_, n) = n;
+
+ // 3. Now that the path from p to v has been linked into
+ // the spanning forest, these lines calculate the dominator of v,
+ // based on the dominator thm., or else defer the calculation
+ // until y's dominator is known
+ // * Dominator thm. : On the spanning-tree path below semi(n) and
+ // above or including n, let y be the node
+ // with the smallest-numbered semidominator. Then,
+ //
+ // idom(n) = semi(n) if semi(y)=semi(n) or
+ // idom(y) if semi(y) != semi(n)
+ typename std::deque<Vertex>::iterator buckItr;
+ for (buckItr = get(bucketMap_, p).begin();
+ buckItr != get(bucketMap_, p).end();
+ ++buckItr)
+ {
+ const Vertex v(*buckItr);
+ const Vertex y(ancestor_with_lowest_semi_(v, dfnumMap));
+ if (get(semiMap_, y) == get(semiMap_, v))
+ put(domTreePredMap_, v, p);
+ else
+ put(samedomMap, v, y);
+ }
+
+ get(bucketMap_, p).clear();
+ }
+
+ protected :
+
+ /**
+ * Evaluate function in Tarjan's path compression
+ */
+ const Vertex
+ ancestor_with_lowest_semi_(const Vertex& v, const TimeMap& dfnumMap)
+ {
+ const Vertex a(get(ancestorMap_, v));
+
+ if (get(ancestorMap_, a) != graph_traits<Graph>::null_vertex())
+ {
+ const Vertex b(ancestor_with_lowest_semi_(a, dfnumMap));
+
+ put(ancestorMap_, v, get(ancestorMap_, a));
+
+ if (get(dfnumMap, get(semiMap_, b)) <
+ get(dfnumMap, get(semiMap_, get(bestMap_, v))))
+ put(bestMap_, v, b);
+ }
+
+ return get(bestMap_, v);
+ }
+
+ std::vector<Vertex> semi_, ancestor_, samedom_, best_;
+ PredMap semiMap_, ancestorMap_, bestMap_;
+ std::vector< std::deque<Vertex> > buckets_;
+
+ iterator_property_map<typename std::vector<std::deque<Vertex> >::iterator,
+ IndexMap> bucketMap_;
+
+ const Vertex& entry_;
+ DomTreePredMap domTreePredMap_;
+ const VerticesSizeType numOfVertices_;
+
+ public :
+
+ PredMap samedomMap;
+ };
+
+ } // namespace detail
+
+ /**
+ * @brief Build dominator tree using Lengauer-Tarjan algorithm.
+ * It takes O((V+E)log(V+E)) time.
+ *
+ * @pre dfnumMap, parentMap and verticesByDFNum have dfs results corresponding
+ * indexMap.
+ * If dfs has already run before,
+ * this function would be good for saving computations.
+ * @pre Unreachable nodes must be masked as
+ * graph_traits<Graph>::null_vertex in parentMap.
+ * @pre Unreachable nodes must be masked as
+ * (std::numeric_limits<VerticesSizeType>::max)() in dfnumMap.
+ *
+ * @param domTreePredMap [out] : immediate dominator map (parent map
+ * in dom. tree)
+ *
+ * @note reference Appel. p. 452~453. algorithm 19.9, 19.10.
+ *
+ * @todo : Optimization in Finding Dominators in Practice, Loukas Georgiadis
+ */
+ template<class Graph, class IndexMap, class TimeMap, class PredMap,
+ class VertexVector, class DomTreePredMap>
+ void
+ lengauer_tarjan_dominator_tree_without_dfs
+ (const Graph& g,
+ const typename graph_traits<Graph>::vertex_descriptor& entry,
+ const IndexMap& indexMap,
+ TimeMap dfnumMap, PredMap parentMap, VertexVector& verticesByDFNum,
+ DomTreePredMap domTreePredMap)
+ {
+ // Typedefs and concept check
+ typedef typename graph_traits<Graph>::vertex_descriptor Vertex;
+ typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
+
+ BOOST_CONCEPT_ASSERT(( BidirectionalGraphConcept<Graph> ));
+
+ const VerticesSizeType numOfVertices = num_vertices(g);
+ if (numOfVertices == 0) return;
+
+ // 1. Visit each vertex in reverse post order and calculate sdom.
+ detail::dominator_visitor<Graph, IndexMap, TimeMap, PredMap, DomTreePredMap>
+ visitor(g, entry, indexMap, domTreePredMap);
+
+ VerticesSizeType i;
+ for (i = 0; i < numOfVertices; ++i)
+ {
+ const Vertex u(verticesByDFNum[numOfVertices - 1 - i]);
+ if (u != graph_traits<Graph>::null_vertex())
+ visitor(u, dfnumMap, parentMap, g);
+ }
+
+ // 2. Now all the deferred dominator calculations,
+ // based on the second clause of the dominator thm., are performed
+ for (i = 0; i < numOfVertices; ++i)
+ {
+ const Vertex n(verticesByDFNum[i]);
+
+ if (n == entry || n == graph_traits<Graph>::null_vertex())
+ continue;
+
+ Vertex u = get(visitor.samedomMap, n);
+ if (u != graph_traits<Graph>::null_vertex())
+ {
+ put(domTreePredMap, n, get(domTreePredMap, u));
+ }
+ }
+ }
+
+ /**
+ * Unlike lengauer_tarjan_dominator_tree_without_dfs,
+ * dfs is run in this function and
+ * the result is written to dfnumMap, parentMap, vertices.
+ *
+ * If the result of dfs required after this algorithm,
+ * this function can eliminate the need of rerunning dfs.
+ */
+ template<class Graph, class IndexMap, class TimeMap, class PredMap,
+ class VertexVector, class DomTreePredMap>
+ void
+ lengauer_tarjan_dominator_tree
+ (const Graph& g,
+ const typename graph_traits<Graph>::vertex_descriptor& entry,
+ const IndexMap& indexMap,
+ TimeMap dfnumMap, PredMap parentMap, VertexVector& verticesByDFNum,
+ DomTreePredMap domTreePredMap)
+ {
+ // Typedefs and concept check
+ typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
+
+ BOOST_CONCEPT_ASSERT(( BidirectionalGraphConcept<Graph> ));
+
+ // 1. Depth first visit
+ const VerticesSizeType numOfVertices = num_vertices(g);
+ if (numOfVertices == 0) return;
+
+ VerticesSizeType time =
+ (std::numeric_limits<VerticesSizeType>::max)();
+ std::vector<default_color_type>
+ colors(numOfVertices, color_traits<default_color_type>::white());
+ depth_first_visit
+ (g, entry,
+ make_dfs_visitor
+ (make_pair(record_predecessors(parentMap, on_tree_edge()),
+ detail::stamp_times_with_vertex_vector
+ (dfnumMap, verticesByDFNum, time, on_discover_vertex()))),
+ make_iterator_property_map(colors.begin(), indexMap));
+
+ // 2. Run main algorithm.
+ lengauer_tarjan_dominator_tree_without_dfs(g, entry, indexMap, dfnumMap,
+ parentMap, verticesByDFNum,
+ domTreePredMap);
+ }
+
+ /**
+ * Use vertex_index as IndexMap and make dfnumMap, parentMap, verticesByDFNum
+ * internally.
+ * If we don't need the result of dfs (dfnumMap, parentMap, verticesByDFNum),
+ * this function would be more convenient one.
+ */
+ template<class Graph, class DomTreePredMap>
+ void
+ lengauer_tarjan_dominator_tree
+ (const Graph& g,
+ const typename graph_traits<Graph>::vertex_descriptor& entry,
+ DomTreePredMap domTreePredMap)
+ {
+ // typedefs
+ typedef typename graph_traits<Graph>::vertex_descriptor Vertex;
+ typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
+ typedef typename property_map<Graph, vertex_index_t>::const_type IndexMap;
+ typedef
+ iterator_property_map<typename std::vector<VerticesSizeType>::iterator,
+ IndexMap> TimeMap;
+ typedef
+ iterator_property_map<typename std::vector<Vertex>::iterator, IndexMap>
+ PredMap;
+
+ // Make property maps
+ const VerticesSizeType numOfVertices = num_vertices(g);
+ if (numOfVertices == 0) return;
+
+ const IndexMap indexMap = get(vertex_index, g);
+
+ std::vector<VerticesSizeType> dfnum(numOfVertices, 0);
+ TimeMap dfnumMap(make_iterator_property_map(dfnum.begin(), indexMap));
+
+ std::vector<Vertex> parent(numOfVertices,
+ graph_traits<Graph>::null_vertex());
+ PredMap parentMap(make_iterator_property_map(parent.begin(), indexMap));
+
+ std::vector<Vertex> verticesByDFNum(parent);
+
+ // Run main algorithm
+ lengauer_tarjan_dominator_tree(g, entry,
+ indexMap, dfnumMap, parentMap,
+ verticesByDFNum, domTreePredMap);
+ }
+
+ /**
+ * Muchnick. p. 182, 184
+ *
+ * using iterative bit vector analysis
+ */
+ template<class Graph, class IndexMap, class DomTreePredMap>
+ void
+ iterative_bit_vector_dominator_tree
+ (const Graph& g,
+ const typename graph_traits<Graph>::vertex_descriptor& entry,
+ const IndexMap& indexMap,
+ DomTreePredMap domTreePredMap)
+ {
+ typedef typename graph_traits<Graph>::vertex_descriptor Vertex;
+ typedef typename graph_traits<Graph>::vertex_iterator vertexItr;
+ typedef typename graph_traits<Graph>::vertices_size_type VerticesSizeType;
+ typedef
+ iterator_property_map<typename std::vector< std::set<Vertex> >::iterator,
+ IndexMap> vertexSetMap;
+
+ BOOST_CONCEPT_ASSERT(( BidirectionalGraphConcept<Graph> ));
+
+ // 1. Finding dominator
+ // 1.1. Initialize
+ const VerticesSizeType numOfVertices = num_vertices(g);
+ if (numOfVertices == 0) return;
+
+ vertexItr vi, viend;
+ boost::tie(vi, viend) = vertices(g);
+ const std::set<Vertex> N(vi, viend);
+
+ bool change = true;
+
+ std::vector< std::set<Vertex> > dom(numOfVertices, N);
+ vertexSetMap domMap(make_iterator_property_map(dom.begin(), indexMap));
+ get(domMap, entry).clear();
+ get(domMap, entry).insert(entry);
+
+ while (change)
+ {
+ change = false;
+ for (boost::tie(vi, viend) = vertices(g); vi != viend; ++vi)
+ {
+ if (*vi == entry) continue;
+
+ std::set<Vertex> T(N);
+
+ typename graph_traits<Graph>::in_edge_iterator inItr, inEnd;
+ for (boost::tie(inItr, inEnd) = in_edges(*vi, g); inItr != inEnd; ++inItr)
+ {
+ const Vertex p = source(*inItr, g);
+
+ std::set<Vertex> tempSet;
+ std::set_intersection(T.begin(), T.end(),
+ get(domMap, p).begin(),
+ get(domMap, p).end(),
+ std::inserter(tempSet, tempSet.begin()));
+ T.swap(tempSet);
+ }
+
+ T.insert(*vi);
+ if (T != get(domMap, *vi))
+ {
+ change = true;
+ get(domMap, *vi).swap(T);
+ }
+ } // end of for (boost::tie(vi, viend) = vertices(g)
+ } // end of while(change)
+
+ // 2. Build dominator tree
+ for (boost::tie(vi, viend) = vertices(g); vi != viend; ++vi)
+ get(domMap, *vi).erase(*vi);
+
+ Graph domTree(numOfVertices);
+
+ for (boost::tie(vi, viend) = vertices(g); vi != viend; ++vi)
+ {
+ if (*vi == entry) continue;
+
+ // We have to iterate through copied dominator set
+ const std::set<Vertex> tempSet(get(domMap, *vi));
+ typename std::set<Vertex>::const_iterator s;
+ for (s = tempSet.begin(); s != tempSet.end(); ++s)
+ {
+ typename std::set<Vertex>::iterator t;
+ for (t = get(domMap, *vi).begin(); t != get(domMap, *vi).end(); )
+ {
+ typename std::set<Vertex>::iterator old_t = t;
+ ++t; // Done early because t may become invalid
+ if (*old_t == *s) continue;
+ if (get(domMap, *s).find(*old_t) != get(domMap, *s).end())
+ get(domMap, *vi).erase(old_t);
+ }
+ }
+ }
+
+ for (boost::tie(vi, viend) = vertices(g); vi != viend; ++vi)
+ {
+ if (*vi != entry && get(domMap, *vi).size() == 1)
+ {
+ Vertex temp = *get(domMap, *vi).begin();
+ put(domTreePredMap, *vi, temp);
+ }
+ }
+ }
+
+ template<class Graph, class DomTreePredMap>
+ void
+ iterative_bit_vector_dominator_tree
+ (const Graph& g,
+ const typename graph_traits<Graph>::vertex_descriptor& entry,
+ DomTreePredMap domTreePredMap)
+ {
+ typename property_map<Graph, vertex_index_t>::const_type
+ indexMap = get(vertex_index, g);
+
+ iterative_bit_vector_dominator_tree(g, entry, indexMap, domTreePredMap);
+ }
+} // namespace boost
+
+#endif // BOOST_GRAPH_DOMINATOR_HPP
diff --git a/contrib/libs/hyperscan/src/alloc.c b/contrib/libs/hyperscan/src/alloc.c
index 125d5bce31..e27649bce8 100644
--- a/contrib/libs/hyperscan/src/alloc.c
+++ b/contrib/libs/hyperscan/src/alloc.c
@@ -1,113 +1,113 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Runtime functions for setting custom allocators.
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "allocator.h"
-
-#define default_malloc malloc
-#define default_free free
-
-hs_alloc_t hs_database_alloc = default_malloc;
-hs_alloc_t hs_misc_alloc = default_malloc;
-hs_alloc_t hs_scratch_alloc = default_malloc;
-hs_alloc_t hs_stream_alloc = default_malloc;
-
-hs_free_t hs_database_free = default_free;
-hs_free_t hs_misc_free = default_free;
-hs_free_t hs_scratch_free = default_free;
-hs_free_t hs_stream_free = default_free;
-
-static
-hs_alloc_t normalise_alloc(hs_alloc_t a) {
- if (!a) {
- return default_malloc;
- } else {
- return a;
- }
-}
-
-static
-hs_free_t normalise_free(hs_free_t f) {
- if (!f) {
- return default_free;
- } else {
- return f;
- }
-}
-
-HS_PUBLIC_API
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Runtime functions for setting custom allocators.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "allocator.h"
+
+#define default_malloc malloc
+#define default_free free
+
+hs_alloc_t hs_database_alloc = default_malloc;
+hs_alloc_t hs_misc_alloc = default_malloc;
+hs_alloc_t hs_scratch_alloc = default_malloc;
+hs_alloc_t hs_stream_alloc = default_malloc;
+
+hs_free_t hs_database_free = default_free;
+hs_free_t hs_misc_free = default_free;
+hs_free_t hs_scratch_free = default_free;
+hs_free_t hs_stream_free = default_free;
+
+static
+hs_alloc_t normalise_alloc(hs_alloc_t a) {
+ if (!a) {
+ return default_malloc;
+ } else {
+ return a;
+ }
+}
+
+static
+hs_free_t normalise_free(hs_free_t f) {
+ if (!f) {
+ return default_free;
+ } else {
+ return f;
+ }
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_set_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
- hs_set_database_allocator(allocfunc, freefunc);
- hs_set_misc_allocator(allocfunc, freefunc);
- hs_set_stream_allocator(allocfunc, freefunc);
- hs_set_scratch_allocator(allocfunc, freefunc);
-
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+ hs_set_database_allocator(allocfunc, freefunc);
+ hs_set_misc_allocator(allocfunc, freefunc);
+ hs_set_stream_allocator(allocfunc, freefunc);
+ hs_set_scratch_allocator(allocfunc, freefunc);
+
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_set_database_allocator(hs_alloc_t allocfunc,
hs_free_t freefunc) {
- hs_database_alloc = normalise_alloc(allocfunc);
- hs_database_free = normalise_free(freefunc);
-
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+ hs_database_alloc = normalise_alloc(allocfunc);
+ hs_database_free = normalise_free(freefunc);
+
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_set_misc_allocator(hs_alloc_t allocfunc,
hs_free_t freefunc) {
- hs_misc_alloc = normalise_alloc(allocfunc);
- hs_misc_free = normalise_free(freefunc);
-
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+ hs_misc_alloc = normalise_alloc(allocfunc);
+ hs_misc_free = normalise_free(freefunc);
+
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_set_scratch_allocator(hs_alloc_t allocfunc,
hs_free_t freefunc) {
- hs_scratch_alloc = normalise_alloc(allocfunc);
- hs_scratch_free = normalise_free(freefunc);
-
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+ hs_scratch_alloc = normalise_alloc(allocfunc);
+ hs_scratch_free = normalise_free(freefunc);
+
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_set_stream_allocator(hs_alloc_t allocfunc,
hs_free_t freefunc) {
- hs_stream_alloc = normalise_alloc(allocfunc);
- hs_stream_free = normalise_free(freefunc);
-
- return HS_SUCCESS;
-}
+ hs_stream_alloc = normalise_alloc(allocfunc);
+ hs_stream_free = normalise_free(freefunc);
+
+ return HS_SUCCESS;
+}
diff --git a/contrib/libs/hyperscan/src/allocator.h b/contrib/libs/hyperscan/src/allocator.h
index d0cd822f1d..61c20f914d 100644
--- a/contrib/libs/hyperscan/src/allocator.h
+++ b/contrib/libs/hyperscan/src/allocator.h
@@ -1,66 +1,66 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ALLOCATOR_H
-#define ALLOCATOR_H
-
-#include "hs_common.h"
-#include "ue2common.h"
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-extern hs_alloc_t hs_database_alloc;
-extern hs_alloc_t hs_misc_alloc;
-extern hs_alloc_t hs_scratch_alloc;
-extern hs_alloc_t hs_stream_alloc;
-
-extern hs_free_t hs_database_free;
-extern hs_free_t hs_misc_free;
-extern hs_free_t hs_scratch_free;
-extern hs_free_t hs_stream_free;
-#ifdef __cplusplus
-} /* extern C */
-#endif
-/** \brief Check the results of an alloc done with hs_alloc for alignment.
- *
- * If we have incorrect alignment, return an error. Caller should free the
- * offending block. */
-static really_inline
-hs_error_t hs_check_alloc(const void *mem) {
- hs_error_t ret = HS_SUCCESS;
- if (!mem) {
- ret = HS_NOMEM;
- } else if (!ISALIGNED_N(mem, alignof(unsigned long long))) {
- ret = HS_BAD_ALLOC;
- }
- return ret;
-}
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ALLOCATOR_H
+#define ALLOCATOR_H
+
+#include "hs_common.h"
+#include "ue2common.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+extern hs_alloc_t hs_database_alloc;
+extern hs_alloc_t hs_misc_alloc;
+extern hs_alloc_t hs_scratch_alloc;
+extern hs_alloc_t hs_stream_alloc;
+
+extern hs_free_t hs_database_free;
+extern hs_free_t hs_misc_free;
+extern hs_free_t hs_scratch_free;
+extern hs_free_t hs_stream_free;
+#ifdef __cplusplus
+} /* extern C */
+#endif
+/** \brief Check the results of an alloc done with hs_alloc for alignment.
+ *
+ * If we have incorrect alignment, return an error. Caller should free the
+ * offending block. */
+static really_inline
+hs_error_t hs_check_alloc(const void *mem) {
+ hs_error_t ret = HS_SUCCESS;
+ if (!mem) {
+ ret = HS_NOMEM;
+ } else if (!ISALIGNED_N(mem, alignof(unsigned long long))) {
+ ret = HS_BAD_ALLOC;
+ }
+ return ret;
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/compiler/asserts.cpp b/contrib/libs/hyperscan/src/compiler/asserts.cpp
index 6cef97475d..444422260c 100644
--- a/contrib/libs/hyperscan/src/compiler/asserts.cpp
+++ b/contrib/libs/hyperscan/src/compiler/asserts.cpp
@@ -1,311 +1,311 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Convert temporary assert vertices (from construction method) to
- * edge-based flags.
- *
- * This pass converts the temporary assert vertices created by the Glushkov
- * construction process above (vertices with special assertions flags) into
- * edges between those vertices' neighbours in the graph.
- *
- * These edges have the appropriate flags applied to them -- a path (u,t,v)
- * through an assert vertex t will be replaced with the edge (u,v) with the
- * assertion flags from t.
- *
- * Edges with mutually incompatible flags (such as the conjunction of
- * word-to-word and word-to-nonword) are dropped.
- */
-#include "asserts.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Convert temporary assert vertices (from construction method) to
+ * edge-based flags.
+ *
+ * This pass converts the temporary assert vertices created by the Glushkov
+ * construction process above (vertices with special assertions flags) into
+ * edges between those vertices' neighbours in the graph.
+ *
+ * These edges have the appropriate flags applied to them -- a path (u,t,v)
+ * through an assert vertex t will be replaced with the edge (u,v) with the
+ * assertion flags from t.
+ *
+ * Edges with mutually incompatible flags (such as the conjunction of
+ * word-to-word and word-to-nonword) are dropped.
+ */
+#include "asserts.h"
#include "compiler/compiler.h"
-#include "nfagraph/ng.h"
-#include "nfagraph/ng_prune.h"
-#include "nfagraph/ng_redundancy.h"
-#include "nfagraph/ng_util.h"
-#include "parser/position.h" // for POS flags
-#include "util/compile_error.h"
-#include "util/graph_range.h"
-
-#include <queue>
-#include <set>
-
-using namespace std;
-
-namespace ue2 {
-
-/** Hard limit on the maximum number of edges we'll clone before we throw up
- * our hands and report 'Pattern too large.' */
-static const size_t MAX_ASSERT_EDGES = 300000;
-
-/** Flags representing the word-boundary assertions, \\b or \\B. */
-static const int WORDBOUNDARY_FLAGS = POS_FLAG_ASSERT_WORD_TO_WORD
- | POS_FLAG_ASSERT_WORD_TO_NONWORD
- | POS_FLAG_ASSERT_NONWORD_TO_WORD
- | POS_FLAG_ASSERT_NONWORD_TO_NONWORD
- | POS_FLAG_ASSERT_WORD_TO_WORD_UCP
- | POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP
- | POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP
- | POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP;
-
-#define OPEN_EDGE 0U
-#define DEAD_EDGE (~0U)
-
-static
-u32 disjunct(u32 flags1, u32 flags2) {
- /* from two asserts in parallel */
- DEBUG_PRINTF("disjunct %x %x\n", flags1, flags2);
- u32 rv;
- if (flags1 == DEAD_EDGE) {
- rv = flags2;
- } else if (flags2 == DEAD_EDGE) {
- rv = flags1;
- } else if (flags1 == OPEN_EDGE || flags2 == OPEN_EDGE) {
- rv = OPEN_EDGE;
- } else {
- rv = flags1 | flags2;
- }
- DEBUG_PRINTF("--> %x\n", rv);
- return rv;
-}
-
-static
-u32 conjunct(u32 flags1, u32 flags2) {
- /* from two asserts in series */
- DEBUG_PRINTF("conjunct %x %x\n", flags1, flags2);
- u32 rv;
- if (flags1 == OPEN_EDGE) {
- rv = flags2;
- } else if (flags2 == OPEN_EDGE) {
- rv = flags1;
- } else if (flags1 & flags2) {
- rv = flags1 & flags2;
- } else {
- rv = DEAD_EDGE; /* the conjunction of two different word boundary
- * assertion is impassable */
- }
-
- DEBUG_PRINTF("--> %x\n", rv);
- return rv;
-}
-
-typedef map<pair<NFAVertex, NFAVertex>, NFAEdge> edge_cache_t;
-
-static
+#include "nfagraph/ng.h"
+#include "nfagraph/ng_prune.h"
+#include "nfagraph/ng_redundancy.h"
+#include "nfagraph/ng_util.h"
+#include "parser/position.h" // for POS flags
+#include "util/compile_error.h"
+#include "util/graph_range.h"
+
+#include <queue>
+#include <set>
+
+using namespace std;
+
+namespace ue2 {
+
+/** Hard limit on the maximum number of edges we'll clone before we throw up
+ * our hands and report 'Pattern too large.' */
+static const size_t MAX_ASSERT_EDGES = 300000;
+
+/** Flags representing the word-boundary assertions, \\b or \\B. */
+static const int WORDBOUNDARY_FLAGS = POS_FLAG_ASSERT_WORD_TO_WORD
+ | POS_FLAG_ASSERT_WORD_TO_NONWORD
+ | POS_FLAG_ASSERT_NONWORD_TO_WORD
+ | POS_FLAG_ASSERT_NONWORD_TO_NONWORD
+ | POS_FLAG_ASSERT_WORD_TO_WORD_UCP
+ | POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP
+ | POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP
+ | POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP;
+
+#define OPEN_EDGE 0U
+#define DEAD_EDGE (~0U)
+
+static
+u32 disjunct(u32 flags1, u32 flags2) {
+ /* from two asserts in parallel */
+ DEBUG_PRINTF("disjunct %x %x\n", flags1, flags2);
+ u32 rv;
+ if (flags1 == DEAD_EDGE) {
+ rv = flags2;
+ } else if (flags2 == DEAD_EDGE) {
+ rv = flags1;
+ } else if (flags1 == OPEN_EDGE || flags2 == OPEN_EDGE) {
+ rv = OPEN_EDGE;
+ } else {
+ rv = flags1 | flags2;
+ }
+ DEBUG_PRINTF("--> %x\n", rv);
+ return rv;
+}
+
+static
+u32 conjunct(u32 flags1, u32 flags2) {
+ /* from two asserts in series */
+ DEBUG_PRINTF("conjunct %x %x\n", flags1, flags2);
+ u32 rv;
+ if (flags1 == OPEN_EDGE) {
+ rv = flags2;
+ } else if (flags2 == OPEN_EDGE) {
+ rv = flags1;
+ } else if (flags1 & flags2) {
+ rv = flags1 & flags2;
+ } else {
+ rv = DEAD_EDGE; /* the conjunction of two different word boundary
+ * assertion is impassable */
+ }
+
+ DEBUG_PRINTF("--> %x\n", rv);
+ return rv;
+}
+
+typedef map<pair<NFAVertex, NFAVertex>, NFAEdge> edge_cache_t;
+
+static
void replaceAssertVertex(NGHolder &g, NFAVertex t, const ExpressionInfo &expr,
edge_cache_t &edge_cache, u32 &assert_edge_count) {
DEBUG_PRINTF("replacing assert vertex %zu\n", g[t].index);
-
- const u32 flags = g[t].assert_flags;
+
+ const u32 flags = g[t].assert_flags;
DEBUG_PRINTF("consider assert vertex %zu with flags %u\n", g[t].index,
flags);
-
- // Wire up all the predecessors to all the successors.
-
- for (const auto &inEdge : in_edges_range(t, g)) {
- NFAVertex u = source(inEdge, g);
- if (u == t) {
- continue; // ignore self-loops
- }
-
- const u32 flags_inc_in = conjunct(g[inEdge].assert_flags,
- flags);
- if (flags_inc_in == DEAD_EDGE) {
- DEBUG_PRINTF("fail, in-edge has bad flags %d\n",
- g[inEdge].assert_flags);
- continue;
- }
-
- for (const auto &outEdge : out_edges_range(t, g)) {
- NFAVertex v = target(outEdge, g);
-
+
+ // Wire up all the predecessors to all the successors.
+
+ for (const auto &inEdge : in_edges_range(t, g)) {
+ NFAVertex u = source(inEdge, g);
+ if (u == t) {
+ continue; // ignore self-loops
+ }
+
+ const u32 flags_inc_in = conjunct(g[inEdge].assert_flags,
+ flags);
+ if (flags_inc_in == DEAD_EDGE) {
+ DEBUG_PRINTF("fail, in-edge has bad flags %d\n",
+ g[inEdge].assert_flags);
+ continue;
+ }
+
+ for (const auto &outEdge : out_edges_range(t, g)) {
+ NFAVertex v = target(outEdge, g);
+
DEBUG_PRINTF("consider path [%zu,%zu,%zu]\n", g[u].index,
- g[t].index, g[v].index);
-
- if (v == t) {
- continue; // ignore self-loops
- }
-
- const u32 flags_final = conjunct(g[outEdge].assert_flags,
- flags_inc_in);
-
- if (flags_final == DEAD_EDGE) {
- DEBUG_PRINTF("fail, out-edge has bad flags %d\n",
- g[outEdge].assert_flags);
- continue;
- }
-
- if ((g[u].assert_flags & POS_FLAG_MULTILINE_START)
- && v == g.acceptEod) {
- DEBUG_PRINTF("fail, (?m)^ does not match \\n at eod\n");
- continue;
- }
-
- /* Replace path (u,t,v) with direct edge (u,v), unless the edge
- * already exists, in which case we just need to edit its
- * properties.
- *
- * Use edge_cache to prevent us going O(N).
- */
- auto cache_key = make_pair(u, v);
- auto ecit = edge_cache.find(cache_key);
- if (ecit == edge_cache.end()) {
+ g[t].index, g[v].index);
+
+ if (v == t) {
+ continue; // ignore self-loops
+ }
+
+ const u32 flags_final = conjunct(g[outEdge].assert_flags,
+ flags_inc_in);
+
+ if (flags_final == DEAD_EDGE) {
+ DEBUG_PRINTF("fail, out-edge has bad flags %d\n",
+ g[outEdge].assert_flags);
+ continue;
+ }
+
+ if ((g[u].assert_flags & POS_FLAG_MULTILINE_START)
+ && v == g.acceptEod) {
+ DEBUG_PRINTF("fail, (?m)^ does not match \\n at eod\n");
+ continue;
+ }
+
+ /* Replace path (u,t,v) with direct edge (u,v), unless the edge
+ * already exists, in which case we just need to edit its
+ * properties.
+ *
+ * Use edge_cache to prevent us going O(N).
+ */
+ auto cache_key = make_pair(u, v);
+ auto ecit = edge_cache.find(cache_key);
+ if (ecit == edge_cache.end()) {
DEBUG_PRINTF("adding edge %zu %zu\n", g[u].index, g[v].index);
NFAEdge e = add_edge(u, v, g);
- edge_cache.emplace(cache_key, e);
- g[e].assert_flags = flags;
- if (++assert_edge_count > MAX_ASSERT_EDGES) {
+ edge_cache.emplace(cache_key, e);
+ g[e].assert_flags = flags;
+ if (++assert_edge_count > MAX_ASSERT_EDGES) {
throw CompileError(expr.index, "Pattern is too large.");
- }
- } else {
- NFAEdge e = ecit->second;
+ }
+ } else {
+ NFAEdge e = ecit->second;
DEBUG_PRINTF("updating edge %zu %zu [a %zu]\n", g[u].index,
- g[v].index, g[t].index);
- // Edge already exists.
- u32 &e_flags = g[e].assert_flags;
- e_flags = disjunct(e_flags, flags_final);
- assert(e_flags != DEAD_EDGE);
- }
- }
- }
-
- // Clear vertex t to remove all the old edges.
- /* no need to clear the cache, as we will never look up its edge as it is
- * unreachable */
- clear_vertex(t, g);
-}
-
-static
+ g[v].index, g[t].index);
+ // Edge already exists.
+ u32 &e_flags = g[e].assert_flags;
+ e_flags = disjunct(e_flags, flags_final);
+ assert(e_flags != DEAD_EDGE);
+ }
+ }
+ }
+
+ // Clear vertex t to remove all the old edges.
+ /* no need to clear the cache, as we will never look up its edge as it is
+ * unreachable */
+ clear_vertex(t, g);
+}
+
+static
void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
NFAVertex v, s32 adj) {
- // Don't try and set the report ID of a special vertex.
- assert(!is_special(v, g));
-
- // There should be no reports set already.
- assert(g[v].reports.empty());
-
+ // Don't try and set the report ID of a special vertex.
+ assert(!is_special(v, g));
+
+ // There should be no reports set already.
+ assert(g[v].reports.empty());
+
Report r = rm.getBasicInternalReport(expr, adj);
-
- g[v].reports.insert(rm.getInternalId(r));
+
+ g[v].reports.insert(rm.getInternalId(r));
DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj);
-}
-
-static
+}
+
+static
void checkForMultilineStart(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr) {
- vector<NFAEdge> dead;
- for (auto v : adjacent_vertices_range(g.start, g)) {
- if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) {
- continue;
- }
+ vector<NFAEdge> dead;
+ for (auto v : adjacent_vertices_range(g.start, g)) {
+ if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) {
+ continue;
+ }
DEBUG_PRINTF("mls %zu %08x\n", g[v].index, g[v].assert_flags);
-
- /* we have found a multi-line start (maybe more than one) */
-
- /* we need to interpose a dummy dot vertex between v and accept if
- * required so that ^ doesn't match trailing \n */
- for (const auto &e : out_edges_range(v, g)) {
- if (target(e, g) == g.accept) {
- dead.push_back(e);
- }
- }
- /* assert has been resolved; clear flag */
- g[v].assert_flags &= ~POS_FLAG_MULTILINE_START;
- }
-
- for (const auto &e : dead) {
- NFAVertex dummy = add_vertex(g);
- g[dummy].char_reach.setall();
+
+ /* we have found a multi-line start (maybe more than one) */
+
+ /* we need to interpose a dummy dot vertex between v and accept if
+ * required so that ^ doesn't match trailing \n */
+ for (const auto &e : out_edges_range(v, g)) {
+ if (target(e, g) == g.accept) {
+ dead.push_back(e);
+ }
+ }
+ /* assert has been resolved; clear flag */
+ g[v].assert_flags &= ~POS_FLAG_MULTILINE_START;
+ }
+
+ for (const auto &e : dead) {
+ NFAVertex dummy = add_vertex(g);
+ g[dummy].char_reach.setall();
setReportId(rm, g, expr, dummy, -1);
- add_edge(source(e, g), dummy, g[e], g);
- add_edge(dummy, g.accept, g);
- }
-
- remove_edges(dead, g);
-}
-
-static
-bool hasAssertVertices(const NGHolder &g) {
- for (auto v : vertices_range(g)) {
- int flags = g[v].assert_flags;
- if (flags & WORDBOUNDARY_FLAGS) {
- return true;
- }
- }
- return false;
-}
-
-/** \brief Convert temporary assert vertices (from construction method) to
- * edge-based flags.
- *
- * Remove the horrors that are the temporary assert vertices which arise from
- * our construction method. Allows the rest of our code base to live in
- * blissful ignorance of their existence. */
+ add_edge(source(e, g), dummy, g[e], g);
+ add_edge(dummy, g.accept, g);
+ }
+
+ remove_edges(dead, g);
+}
+
+static
+bool hasAssertVertices(const NGHolder &g) {
+ for (auto v : vertices_range(g)) {
+ int flags = g[v].assert_flags;
+ if (flags & WORDBOUNDARY_FLAGS) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/** \brief Convert temporary assert vertices (from construction method) to
+ * edge-based flags.
+ *
+ * Remove the horrors that are the temporary assert vertices which arise from
+ * our construction method. Allows the rest of our code base to live in
+ * blissful ignorance of their existence. */
void removeAssertVertices(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr) {
- size_t num = 0;
-
- DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g));
-
- // Sweep over the graph and ascertain that we do actually have vertices
- // with assertion flags set. Otherwise, we're done.
- if (!hasAssertVertices(g)) {
- DEBUG_PRINTF("no assert vertices, done\n");
- return;
- }
-
- u32 assert_edge_count = 0;
-
- // Build a cache of (u, v) vertex pairs to edge descriptors.
- edge_cache_t edge_cache;
- for (const auto &e : edges_range(g)) {
- edge_cache[make_pair(source(e, g), target(e, g))] = e;
- }
-
- for (auto v : vertices_range(g)) {
- if (g[v].assert_flags & WORDBOUNDARY_FLAGS) {
+ size_t num = 0;
+
+ DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g));
+
+ // Sweep over the graph and ascertain that we do actually have vertices
+ // with assertion flags set. Otherwise, we're done.
+ if (!hasAssertVertices(g)) {
+ DEBUG_PRINTF("no assert vertices, done\n");
+ return;
+ }
+
+ u32 assert_edge_count = 0;
+
+ // Build a cache of (u, v) vertex pairs to edge descriptors.
+ edge_cache_t edge_cache;
+ for (const auto &e : edges_range(g)) {
+ edge_cache[make_pair(source(e, g), target(e, g))] = e;
+ }
+
+ for (auto v : vertices_range(g)) {
+ if (g[v].assert_flags & WORDBOUNDARY_FLAGS) {
replaceAssertVertex(g, v, expr, edge_cache, assert_edge_count);
- num++;
- }
- }
-
+ num++;
+ }
+ }
+
checkForMultilineStart(rm, g, expr);
-
- if (num) {
- DEBUG_PRINTF("resolved %zu assert vertices\n", num);
- pruneUseless(g);
- pruneEmptyVertices(g);
+
+ if (num) {
+ DEBUG_PRINTF("resolved %zu assert vertices\n", num);
+ pruneUseless(g);
+ pruneEmptyVertices(g);
renumber_vertices(g);
renumber_edges(g);
- }
-
- DEBUG_PRINTF("after: graph has %zu vertices\n", num_vertices(g));
- assert(!hasAssertVertices(g));
-}
-
-} // namespace ue2
+ }
+
+ DEBUG_PRINTF("after: graph has %zu vertices\n", num_vertices(g));
+ assert(!hasAssertVertices(g));
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/compiler/asserts.h b/contrib/libs/hyperscan/src/compiler/asserts.h
index e5b07e08bf..b4d64c6c9a 100644
--- a/contrib/libs/hyperscan/src/compiler/asserts.h
+++ b/contrib/libs/hyperscan/src/compiler/asserts.h
@@ -1,53 +1,53 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Convert temporary assert vertices (from construction method) to
- * edge-based flags.
- */
-#ifndef ASSERTS_H
-#define ASSERTS_H
-
-namespace ue2 {
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Convert temporary assert vertices (from construction method) to
+ * edge-based flags.
+ */
+#ifndef ASSERTS_H
+#define ASSERTS_H
+
+namespace ue2 {
+
class ExpressionInfo;
-class ReportManager;
+class ReportManager;
class NGHolder;
-
-/** \brief Convert temporary assert vertices (from construction method) to
- * edge-based flags.
- *
- * Remove the horrors that are the temporary assert vertices which arise from
- * our construction method. Allows the rest of our code base to live in
- * blissful ignorance of their existence. */
+
+/** \brief Convert temporary assert vertices (from construction method) to
+ * edge-based flags.
+ *
+ * Remove the horrors that are the temporary assert vertices which arise from
+ * our construction method. Allows the rest of our code base to live in
+ * blissful ignorance of their existence. */
void removeAssertVertices(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr);
-
-} // namespace ue2
-
-#endif // ASSERTS_H
+
+} // namespace ue2
+
+#endif // ASSERTS_H
diff --git a/contrib/libs/hyperscan/src/compiler/compiler.cpp b/contrib/libs/hyperscan/src/compiler/compiler.cpp
index 206c95bd7b..5751bd64f4 100644
--- a/contrib/libs/hyperscan/src/compiler/compiler.cpp
+++ b/contrib/libs/hyperscan/src/compiler/compiler.cpp
@@ -1,105 +1,105 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Compiler front-end interface.
- */
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Compiler front-end interface.
+ */
#include "allocator.h"
-#include "asserts.h"
-#include "compiler.h"
+#include "asserts.h"
+#include "compiler.h"
#include "crc32.h"
-#include "database.h"
-#include "grey.h"
-#include "hs_internal.h"
-#include "hs_runtime.h"
-#include "ue2common.h"
-#include "nfagraph/ng_builder.h"
-#include "nfagraph/ng_dump.h"
-#include "nfagraph/ng.h"
-#include "nfagraph/ng_util.h"
-#include "parser/buildstate.h"
-#include "parser/dump.h"
-#include "parser/Component.h"
+#include "database.h"
+#include "grey.h"
+#include "hs_internal.h"
+#include "hs_runtime.h"
+#include "ue2common.h"
+#include "nfagraph/ng_builder.h"
+#include "nfagraph/ng_dump.h"
+#include "nfagraph/ng.h"
+#include "nfagraph/ng_util.h"
+#include "parser/buildstate.h"
+#include "parser/dump.h"
+#include "parser/Component.h"
#include "parser/logical_combination.h"
-#include "parser/parse_error.h"
-#include "parser/Parser.h" // for flags
-#include "parser/position.h"
-#include "parser/position_dump.h"
-#include "parser/position_info.h"
-#include "parser/prefilter.h"
-#include "parser/shortcut_literal.h"
-#include "parser/unsupported.h"
-#include "parser/utf8_validate.h"
-#include "rose/rose_build.h"
+#include "parser/parse_error.h"
+#include "parser/Parser.h" // for flags
+#include "parser/position.h"
+#include "parser/position_dump.h"
+#include "parser/position_info.h"
+#include "parser/prefilter.h"
+#include "parser/shortcut_literal.h"
+#include "parser/unsupported.h"
+#include "parser/utf8_validate.h"
+#include "rose/rose_build.h"
#include "rose/rose_internal.h"
-#include "som/slot_manager_dump.h"
+#include "som/slot_manager_dump.h"
#include "util/bytecode_ptr.h"
-#include "util/compile_error.h"
-#include "util/target_info.h"
-#include "util/verify_types.h"
+#include "util/compile_error.h"
+#include "util/target_info.h"
+#include "util/verify_types.h"
#include "util/ue2string.h"
-
-#include <algorithm>
-#include <cassert>
-#include <cstdlib>
-#include <cstring>
-#include <fstream>
-#include <memory>
-#include <sstream>
-
-using namespace std;
-
-namespace ue2 {
-
-static
-void validateExt(const hs_expr_ext &ext) {
- static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET |
- HS_EXT_FLAG_MAX_OFFSET |
+
+#include <algorithm>
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
+#include <memory>
+#include <sstream>
+
+using namespace std;
+
+namespace ue2 {
+
+static
+void validateExt(const hs_expr_ext &ext) {
+ static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET |
+ HS_EXT_FLAG_MAX_OFFSET |
HS_EXT_FLAG_MIN_LENGTH |
HS_EXT_FLAG_EDIT_DISTANCE |
HS_EXT_FLAG_HAMMING_DISTANCE;
- if (ext.flags & ~ALL_EXT_FLAGS) {
- throw CompileError("Invalid hs_expr_ext flag set.");
- }
-
- if ((ext.flags & HS_EXT_FLAG_MIN_OFFSET) &&
- (ext.flags & HS_EXT_FLAG_MAX_OFFSET) &&
- (ext.min_offset > ext.max_offset)) {
- throw CompileError("In hs_expr_ext, min_offset must be less than or "
- "equal to max_offset.");
- }
-
- if ((ext.flags & HS_EXT_FLAG_MIN_LENGTH) &&
- (ext.flags & HS_EXT_FLAG_MAX_OFFSET) &&
- (ext.min_length > ext.max_offset)) {
- throw CompileError("In hs_expr_ext, min_length must be less than or "
- "equal to max_offset.");
- }
+ if (ext.flags & ~ALL_EXT_FLAGS) {
+ throw CompileError("Invalid hs_expr_ext flag set.");
+ }
+
+ if ((ext.flags & HS_EXT_FLAG_MIN_OFFSET) &&
+ (ext.flags & HS_EXT_FLAG_MAX_OFFSET) &&
+ (ext.min_offset > ext.max_offset)) {
+ throw CompileError("In hs_expr_ext, min_offset must be less than or "
+ "equal to max_offset.");
+ }
+
+ if ((ext.flags & HS_EXT_FLAG_MIN_LENGTH) &&
+ (ext.flags & HS_EXT_FLAG_MAX_OFFSET) &&
+ (ext.min_length > ext.max_offset)) {
+ throw CompileError("In hs_expr_ext, min_length must be less than or "
+ "equal to max_offset.");
+ }
if ((ext.flags & HS_EXT_FLAG_EDIT_DISTANCE) &&
(ext.flags & HS_EXT_FLAG_HAMMING_DISTANCE)) {
@@ -107,8 +107,8 @@ void validateExt(const hs_expr_ext &ext) {
"Hamming distance.");
}
-}
-
+}
+
void ParsedLitExpression::parseLiteral(const char *expression, size_t len,
bool nocase) {
const char *c = expression;
@@ -149,9 +149,9 @@ ParsedLitExpression::ParsedLitExpression(unsigned index_in,
}
-ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
+ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
unsigned flags, ReportID report,
- const hs_expr_ext *ext)
+ const hs_expr_ext *ext)
: expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH,
false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET,
0, 0, 0, flags & HS_FLAG_QUIET) {
@@ -161,128 +161,128 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
"combination with HS_FLAG_SOM_LEFTMOST.");
}
flags &= ~HS_FLAG_QUIET;
- ParseMode mode(flags);
-
- component = parse(expression, mode);
-
+ ParseMode mode(flags);
+
+ component = parse(expression, mode);
+
expr.utf8 = mode.utf8; /* utf8 may be set by parse() */
-
+
const size_t len = strlen(expression);
if (expr.utf8 && !isValidUtf8(expression, len)) {
- throw ParseError("Expression is not valid UTF-8.");
- }
-
- if (!component) {
- assert(0); // parse() should have thrown a ParseError.
- throw ParseError("Parse error.");
- }
-
- if (flags & ~HS_FLAG_ALL) {
- DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
- throw CompileError("Unrecognised flag.");
- }
-
- // FIXME: we disallow highlander + SOM, see UE-1850.
- if ((flags & HS_FLAG_SINGLEMATCH) && (flags & HS_FLAG_SOM_LEFTMOST)) {
- throw CompileError("HS_FLAG_SINGLEMATCH is not supported in "
- "combination with HS_FLAG_SOM_LEFTMOST.");
- }
-
- // FIXME: we disallow prefilter + SOM, see UE-1899.
- if ((flags & HS_FLAG_PREFILTER) && (flags & HS_FLAG_SOM_LEFTMOST)) {
- throw CompileError("HS_FLAG_PREFILTER is not supported in "
- "combination with HS_FLAG_SOM_LEFTMOST.");
- }
-
- // Set SOM type.
- if (flags & HS_FLAG_SOM_LEFTMOST) {
+ throw ParseError("Expression is not valid UTF-8.");
+ }
+
+ if (!component) {
+ assert(0); // parse() should have thrown a ParseError.
+ throw ParseError("Parse error.");
+ }
+
+ if (flags & ~HS_FLAG_ALL) {
+ DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
+ throw CompileError("Unrecognised flag.");
+ }
+
+ // FIXME: we disallow highlander + SOM, see UE-1850.
+ if ((flags & HS_FLAG_SINGLEMATCH) && (flags & HS_FLAG_SOM_LEFTMOST)) {
+ throw CompileError("HS_FLAG_SINGLEMATCH is not supported in "
+ "combination with HS_FLAG_SOM_LEFTMOST.");
+ }
+
+ // FIXME: we disallow prefilter + SOM, see UE-1899.
+ if ((flags & HS_FLAG_PREFILTER) && (flags & HS_FLAG_SOM_LEFTMOST)) {
+ throw CompileError("HS_FLAG_PREFILTER is not supported in "
+ "combination with HS_FLAG_SOM_LEFTMOST.");
+ }
+
+ // Set SOM type.
+ if (flags & HS_FLAG_SOM_LEFTMOST) {
expr.som = SOM_LEFT;
- }
-
- // Set extended parameters, if we have them.
- if (ext) {
- // Ensure that the given parameters make sense.
- validateExt(*ext);
-
- if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) {
+ }
+
+ // Set extended parameters, if we have them.
+ if (ext) {
+ // Ensure that the given parameters make sense.
+ validateExt(*ext);
+
+ if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) {
expr.min_offset = ext->min_offset;
- }
- if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) {
+ }
+ if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) {
expr.max_offset = ext->max_offset;
- }
- if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) {
+ }
+ if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) {
expr.min_length = ext->min_length;
- }
+ }
if (ext->flags & HS_EXT_FLAG_EDIT_DISTANCE) {
expr.edit_distance = ext->edit_distance;
}
if (ext->flags & HS_EXT_FLAG_HAMMING_DISTANCE) {
expr.hamm_distance = ext->hamming_distance;
}
- }
-
- // These are validated in validateExt, so an error will already have been
- // thrown if these conditions don't hold.
+ }
+
+ // These are validated in validateExt, so an error will already have been
+ // thrown if these conditions don't hold.
assert(expr.max_offset >= expr.min_offset);
assert(expr.max_offset >= expr.min_length);
-
- // Since prefiltering and SOM aren't supported together, we must squash any
- // min_length constraint as well.
+
+ // Since prefiltering and SOM aren't supported together, we must squash any
+ // min_length constraint as well.
if (flags & HS_FLAG_PREFILTER && expr.min_length) {
- DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n");
+ DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n");
expr.min_length = 0;
- }
-}
-
-#if defined(DUMP_SUPPORT) || defined(DEBUG)
-/**
- * \brief Dumps the parse tree to screen in debug mode and to disk in dump
- * mode.
- */
+ }
+}
+
+#if defined(DUMP_SUPPORT) || defined(DEBUG)
+/**
+ * \brief Dumps the parse tree to screen in debug mode and to disk in dump
+ * mode.
+ */
void dumpExpression(UNUSED const ParsedExpression &pe,
- UNUSED const char *stage, UNUSED const Grey &grey) {
-#if defined(DEBUG)
+ UNUSED const char *stage, UNUSED const Grey &grey) {
+#if defined(DEBUG)
DEBUG_PRINTF("===== Rule ID: %u (expression index: %u) =====\n",
pe.expr.report, pe.expr.index);
- ostringstream debug_tree;
+ ostringstream debug_tree;
dumpTree(debug_tree, pe.component.get());
- printf("%s\n", debug_tree.str().c_str());
-#endif // DEBUG
-
-#if defined(DUMP_SUPPORT)
- if (grey.dumpFlags & Grey::DUMP_PARSE) {
- stringstream ss;
+ printf("%s\n", debug_tree.str().c_str());
+#endif // DEBUG
+
+#if defined(DUMP_SUPPORT)
+ if (grey.dumpFlags & Grey::DUMP_PARSE) {
+ stringstream ss;
ss << grey.dumpPath << "Expr_" << pe.expr.index << "_componenttree_"
- << stage << ".txt";
- ofstream out(ss.str().c_str());
+ << stage << ".txt";
+ ofstream out(ss.str().c_str());
out << "Component Tree for " << pe.expr.report << endl;
dumpTree(out, pe.component.get());
if (pe.expr.utf8) {
- out << "UTF8 mode" << endl;
- }
- }
-#endif // DEBUG
-}
-#endif
-
-/** \brief Run Component tree optimisations on \a expr. */
-static
+ out << "UTF8 mode" << endl;
+ }
+ }
+#endif // DEBUG
+}
+#endif
+
+/** \brief Run Component tree optimisations on \a expr. */
+static
void optimise(ParsedExpression &pe) {
if (pe.expr.min_length || pe.expr.som) {
- return;
- }
-
- DEBUG_PRINTF("optimising\n");
+ return;
+ }
+
+ DEBUG_PRINTF("optimising\n");
pe.component->optimise(true /* root is connected to sds */);
-}
-
-void addExpression(NG &ng, unsigned index, const char *expression,
- unsigned flags, const hs_expr_ext *ext, ReportID id) {
- assert(expression);
- const CompileContext &cc = ng.cc;
- DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s'\n", index, id, flags,
- expression);
-
+}
+
+void addExpression(NG &ng, unsigned index, const char *expression,
+ unsigned flags, const hs_expr_ext *ext, ReportID id) {
+ assert(expression);
+ const CompileContext &cc = ng.cc;
+ DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s'\n", index, id, flags,
+ expression);
+
if (flags & HS_FLAG_COMBINATION) {
if (flags & ~(HS_FLAG_COMBINATION | HS_FLAG_QUIET |
HS_FLAG_SINGLEMATCH)) {
@@ -322,71 +322,71 @@ void addExpression(NG &ng, unsigned index, const char *expression,
return;
}
- // Ensure that our pattern isn't too long (in characters).
- if (strlen(expression) > cc.grey.limitPatternLength) {
- throw CompileError("Pattern length exceeds limit.");
- }
-
- // Do per-expression processing: errors here will result in an exception
- // being thrown up to our caller
+ // Ensure that our pattern isn't too long (in characters).
+ if (strlen(expression) > cc.grey.limitPatternLength) {
+ throw CompileError("Pattern length exceeds limit.");
+ }
+
+ // Do per-expression processing: errors here will result in an exception
+ // being thrown up to our caller
ParsedExpression pe(index, expression, flags, id, ext);
dumpExpression(pe, "orig", cc.grey);
-
- // Apply prefiltering transformations if desired.
+
+ // Apply prefiltering transformations if desired.
if (pe.expr.prefilter) {
prefilterTree(pe.component, ParseMode(flags));
dumpExpression(pe, "prefiltered", cc.grey);
- }
-
- // Expressions containing zero-width assertions and other extended pcre
- // types aren't supported yet. This call will throw a ParseError exception
- // if the component tree contains such a construct.
+ }
+
+ // Expressions containing zero-width assertions and other extended pcre
+ // types aren't supported yet. This call will throw a ParseError exception
+ // if the component tree contains such a construct.
checkUnsupported(*pe.component);
-
+
pe.component->checkEmbeddedStartAnchor(true);
pe.component->checkEmbeddedEndAnchor(true);
-
- if (cc.grey.optimiseComponentTree) {
+
+ if (cc.grey.optimiseComponentTree) {
optimise(pe);
dumpExpression(pe, "opt", cc.grey);
- }
-
- DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n",
+ }
+
+ DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n",
pe.component.get(), pe.expr.index, pe.expr.report);
-
- // You can only use the SOM flags if you've also specified an SOM
- // precision mode.
+
+ // You can only use the SOM flags if you've also specified an SOM
+ // precision mode.
if (pe.expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) {
- throw CompileError("To use a SOM expression flag in streaming mode, "
- "an SOM precision mode (e.g. "
- "HS_MODE_SOM_HORIZON_LARGE) must be specified.");
- }
-
- // If this expression is a literal, we can feed it directly to Rose rather
- // than building the NFA graph.
+ throw CompileError("To use a SOM expression flag in streaming mode, "
+ "an SOM precision mode (e.g. "
+ "HS_MODE_SOM_HORIZON_LARGE) must be specified.");
+ }
+
+ // If this expression is a literal, we can feed it directly to Rose rather
+ // than building the NFA graph.
if (shortcutLiteral(ng, pe)) {
- DEBUG_PRINTF("took literal short cut\n");
- return;
- }
-
+ DEBUG_PRINTF("took literal short cut\n");
+ return;
+ }
+
auto built_expr = buildGraph(ng.rm, cc, pe);
if (!built_expr.g) {
- DEBUG_PRINTF("NFA build failed on ID %u, but no exception was "
+ DEBUG_PRINTF("NFA build failed on ID %u, but no exception was "
"thrown.\n", pe.expr.report);
- throw CompileError("Internal error.");
- }
-
+ throw CompileError("Internal error.");
+ }
+
if (!pe.expr.allow_vacuous && matches_everywhere(*built_expr.g)) {
- throw CompileError("Pattern matches empty buffer; use "
- "HS_FLAG_ALLOWEMPTY to enable support.");
- }
-
+ throw CompileError("Pattern matches empty buffer; use "
+ "HS_FLAG_ALLOWEMPTY to enable support.");
+ }
+
if (!ng.addGraph(built_expr.expr, std::move(built_expr.g))) {
DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", pe.expr.report);
- throw CompileError("Error compiling expression.");
- }
-}
-
+ throw CompileError("Error compiling expression.");
+ }
+}
+
void addLitExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID id,
size_t expLength) {
@@ -429,41 +429,41 @@ void addLitExpression(NG &ng, unsigned index, const char *expression,
}
}
-static
+static
bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) {
- const u32 minWidth =
- ng.minWidth.is_finite() ? verify_u32(ng.minWidth) : ROSE_BOUND_INF;
- auto rose = ng.rose->buildRose(minWidth);
-
- if (!rose) {
- DEBUG_PRINTF("error building rose\n");
- assert(0);
- return nullptr;
- }
-
- dumpReportManager(ng.rm, ng.cc.grey);
- dumpSomSlotManager(ng.ssm, ng.cc.grey);
- dumpSmallWrite(rose.get(), ng.cc.grey);
-
- return rose;
-}
-
-platform_t target_to_platform(const target_t &target_info) {
- platform_t p;
- p = 0;
-
- if (!target_info.has_avx2()) {
- p |= HS_PLATFORM_NOAVX2;
- }
+ const u32 minWidth =
+ ng.minWidth.is_finite() ? verify_u32(ng.minWidth) : ROSE_BOUND_INF;
+ auto rose = ng.rose->buildRose(minWidth);
+
+ if (!rose) {
+ DEBUG_PRINTF("error building rose\n");
+ assert(0);
+ return nullptr;
+ }
+
+ dumpReportManager(ng.rm, ng.cc.grey);
+ dumpSomSlotManager(ng.ssm, ng.cc.grey);
+ dumpSmallWrite(rose.get(), ng.cc.grey);
+
+ return rose;
+}
+
+platform_t target_to_platform(const target_t &target_info) {
+ platform_t p;
+ p = 0;
+
+ if (!target_info.has_avx2()) {
+ p |= HS_PLATFORM_NOAVX2;
+ }
if (!target_info.has_avx512()) {
p |= HS_PLATFORM_NOAVX512;
}
if (!target_info.has_avx512vbmi()) {
p |= HS_PLATFORM_NOAVX512VBMI;
}
- return p;
-}
-
+ return p;
+}
+
/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated
* \ref hs_database, ensuring that it is padded correctly to give cacheline
* alignment. */
@@ -504,132 +504,132 @@ hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag) {
- assert(length);
-
- auto rose = generateRoseEngine(ng);
+ assert(length);
+
+ auto rose = generateRoseEngine(ng);
struct RoseEngine *roseHead = rose.get();
roseHead->pureLiteral = pureFlag;
- if (!rose) {
- throw CompileError("Unable to generate bytecode.");
- }
+ if (!rose) {
+ throw CompileError("Unable to generate bytecode.");
+ }
*length = rose.size();
- if (!*length) {
- DEBUG_PRINTF("RoseEngine has zero length\n");
- assert(0);
- throw CompileError("Internal error.");
- }
-
- const char *bytecode = (const char *)(rose.get());
- const platform_t p = target_to_platform(ng.cc.target_info);
- struct hs_database *db = dbCreate(bytecode, *length, p);
- if (!db) {
- throw CompileError("Could not allocate memory for bytecode.");
- }
-
- return db;
-}
-
-static
-void stripFromPositions(vector<PositionInfo> &v, Position pos) {
- auto removed = remove(v.begin(), v.end(), PositionInfo(pos));
- v.erase(removed, v.end());
-}
-
-static
-void connectInitialStates(GlushkovBuildState &bs,
- const ParsedExpression &expr) {
- vector<PositionInfo> initials = expr.component->first();
- const NFABuilder &builder = bs.getBuilder();
- const Position startState = builder.getStart();
- const Position startDotStarState = builder.getStartDotStar();
-
- DEBUG_PRINTF("wiring initials = %s\n",
- dumpPositions(initials.begin(), initials.end()).c_str());
-
- vector<PositionInfo> starts = {startState, startDotStarState};
-
- // strip start and startDs, which can be present due to boundaries
- stripFromPositions(initials, startState);
- stripFromPositions(initials, startDotStarState);
-
- // replace epsilons with accepts
- for (const auto &s : initials) {
- if (s.pos != GlushkovBuildState::POS_EPSILON) {
- continue;
- }
-
- assert(starts.size() == 2); /* start, startds */
- vector<PositionInfo> starts_temp = starts;
- starts_temp[0].flags = s.flags;
- starts_temp[1].flags = s.flags;
- bs.connectAccepts(starts_temp);
- }
-
- if (!initials.empty()) {
- bs.connectRegions(starts, initials);
- }
-}
-
-static
-void connectFinalStates(GlushkovBuildState &bs, const ParsedExpression &expr) {
- vector<PositionInfo> finals = expr.component->last();
-
- DEBUG_PRINTF("wiring finals = %s\n",
- dumpPositions(finals.begin(), finals.end()).c_str());
-
- bs.connectAccepts(finals);
-}
-
-#ifndef NDEBUG
-static
-bool isSupported(const Component &c) {
- try {
- checkUnsupported(c);
- return true;
- }
- catch (ParseError &) {
- return false;
- }
-}
-#endif
-
+ if (!*length) {
+ DEBUG_PRINTF("RoseEngine has zero length\n");
+ assert(0);
+ throw CompileError("Internal error.");
+ }
+
+ const char *bytecode = (const char *)(rose.get());
+ const platform_t p = target_to_platform(ng.cc.target_info);
+ struct hs_database *db = dbCreate(bytecode, *length, p);
+ if (!db) {
+ throw CompileError("Could not allocate memory for bytecode.");
+ }
+
+ return db;
+}
+
+static
+void stripFromPositions(vector<PositionInfo> &v, Position pos) {
+ auto removed = remove(v.begin(), v.end(), PositionInfo(pos));
+ v.erase(removed, v.end());
+}
+
+static
+void connectInitialStates(GlushkovBuildState &bs,
+ const ParsedExpression &expr) {
+ vector<PositionInfo> initials = expr.component->first();
+ const NFABuilder &builder = bs.getBuilder();
+ const Position startState = builder.getStart();
+ const Position startDotStarState = builder.getStartDotStar();
+
+ DEBUG_PRINTF("wiring initials = %s\n",
+ dumpPositions(initials.begin(), initials.end()).c_str());
+
+ vector<PositionInfo> starts = {startState, startDotStarState};
+
+ // strip start and startDs, which can be present due to boundaries
+ stripFromPositions(initials, startState);
+ stripFromPositions(initials, startDotStarState);
+
+ // replace epsilons with accepts
+ for (const auto &s : initials) {
+ if (s.pos != GlushkovBuildState::POS_EPSILON) {
+ continue;
+ }
+
+ assert(starts.size() == 2); /* start, startds */
+ vector<PositionInfo> starts_temp = starts;
+ starts_temp[0].flags = s.flags;
+ starts_temp[1].flags = s.flags;
+ bs.connectAccepts(starts_temp);
+ }
+
+ if (!initials.empty()) {
+ bs.connectRegions(starts, initials);
+ }
+}
+
+static
+void connectFinalStates(GlushkovBuildState &bs, const ParsedExpression &expr) {
+ vector<PositionInfo> finals = expr.component->last();
+
+ DEBUG_PRINTF("wiring finals = %s\n",
+ dumpPositions(finals.begin(), finals.end()).c_str());
+
+ bs.connectAccepts(finals);
+}
+
+#ifndef NDEBUG
+static
+bool isSupported(const Component &c) {
+ try {
+ checkUnsupported(c);
+ return true;
+ }
+ catch (ParseError &) {
+ return false;
+ }
+}
+#endif
+
BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc,
const ParsedExpression &pe) {
assert(isSupported(*pe.component));
-
+
const auto builder = makeNFABuilder(rm, cc, pe);
- assert(builder);
-
- // Set up START and ACCEPT states; retrieve the special states
+ assert(builder);
+
+ // Set up START and ACCEPT states; retrieve the special states
const auto bs = makeGlushkovBuildState(*builder, pe.expr.prefilter);
-
- // Map position IDs to characters/components
+
+ // Map position IDs to characters/components
pe.component->notePositions(*bs);
-
- // Wire the start dotstar state to the firsts
+
+ // Wire the start dotstar state to the firsts
connectInitialStates(*bs, pe);
-
- DEBUG_PRINTF("wire up body of expr\n");
- // Build the rest of the FOLLOW set
- vector<PositionInfo> initials = {builder->getStartDotStar(),
- builder->getStart()};
+
+ DEBUG_PRINTF("wire up body of expr\n");
+ // Build the rest of the FOLLOW set
+ vector<PositionInfo> initials = {builder->getStartDotStar(),
+ builder->getStart()};
pe.component->buildFollowSet(*bs, initials);
-
- // Wire the lasts to the accept state
+
+ // Wire the lasts to the accept state
connectFinalStates(*bs, pe);
-
- // Create our edges
- bs->buildEdges();
-
+
+ // Create our edges
+ bs->buildEdges();
+
BuiltExpression built_expr = builder->getGraph();
assert(built_expr.g);
-
+
dumpDotWrapper(*built_expr.g, built_expr.expr, "00_before_asserts",
cc.grey);
removeAssertVertices(rm, *built_expr.g, built_expr.expr);
-
+
return built_expr;
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/compiler/compiler.h b/contrib/libs/hyperscan/src/compiler/compiler.h
index 37412d8115..b42cb1425b 100644
--- a/contrib/libs/hyperscan/src/compiler/compiler.h
+++ b/contrib/libs/hyperscan/src/compiler/compiler.h
@@ -1,72 +1,72 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Compiler front-end interface
- */
-
-#ifndef COMPILER_H
-#define COMPILER_H
-
-#include "ue2common.h"
-#include "database.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Compiler front-end interface
+ */
+
+#ifndef COMPILER_H
+#define COMPILER_H
+
+#include "ue2common.h"
+#include "database.h"
#include "compiler/expression_info.h"
-#include "parser/Component.h"
+#include "parser/Component.h"
#include "util/noncopyable.h"
#include "util/ue2string.h"
-
-#include <memory>
-
-struct hs_database;
-struct hs_expr_ext;
-
-namespace ue2 {
-
-struct CompileContext;
-struct Grey;
-struct target_t;
-class NG;
+
+#include <memory>
+
+struct hs_database;
+struct hs_expr_ext;
+
+namespace ue2 {
+
+struct CompileContext;
+struct Grey;
+struct target_t;
+class NG;
class NGHolder;
-class ReportManager;
-
+class ReportManager;
+
/** \brief Class gathering together the pieces of a parsed expression. */
class ParsedExpression : noncopyable {
-public:
- ParsedExpression(unsigned index, const char *expression, unsigned flags,
+public:
+ ParsedExpression(unsigned index, const char *expression, unsigned flags,
ReportID report, const hs_expr_ext *ext = nullptr);
-
+
/** \brief Expression information (from flags, extparam etc) */
ExpressionInfo expr;
-
+
/** \brief Root node of parsed component tree. */
std::unique_ptr<Component> component;
};
-
+
/** \brief Class gathering together the pieces of a parsed lit-expression. */
class ParsedLitExpression : noncopyable {
@@ -90,83 +90,83 @@ public:
struct BuiltExpression {
/** \brief Expression information (from flags, extparam etc) */
ExpressionInfo expr;
-
+
/** \brief Built Glushkov NFA graph. */
std::unique_ptr<NGHolder> g;
-};
-
-/**
- * Add an expression to the compiler.
- *
- * @param ng
- * The global NG object.
- * @param index
- * The index of the expression (used for errors)
- * @param expression
- * NULL-terminated PCRE expression
- * @param flags
- * The full set of Hyperscan flags associated with this rule.
- * @param ext
- * Struct containing extra parameters for this expression, or NULL if
- * none.
+};
+
+/**
+ * Add an expression to the compiler.
+ *
+ * @param ng
+ * The global NG object.
+ * @param index
+ * The index of the expression (used for errors)
+ * @param expression
+ * NULL-terminated PCRE expression
+ * @param flags
+ * The full set of Hyperscan flags associated with this rule.
+ * @param ext
+ * Struct containing extra parameters for this expression, or NULL if
+ * none.
* @param report
- * The identifier to associate with the expression; returned by engine on
- * match.
- */
-void addExpression(NG &ng, unsigned index, const char *expression,
+ * The identifier to associate with the expression; returned by engine on
+ * match.
+ */
+void addExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID report);
-
+
void addLitExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID id,
size_t expLength);
-/**
- * Build a Hyperscan database out of the expressions we've been given. A
- * fatal error will result in an exception being thrown.
- *
- * @param ng
- * The global NG object.
- * @param[out] length
- * The number of bytes occupied by the compiled structure.
+/**
+ * Build a Hyperscan database out of the expressions we've been given. A
+ * fatal error will result in an exception being thrown.
+ *
+ * @param ng
+ * The global NG object.
+ * @param[out] length
+ * The number of bytes occupied by the compiled structure.
* @param pureFlag
* The flag indicating invocation from literal API or not.
- * @return
- * The compiled structure. Should be deallocated with the
- * hs_database_free() function.
- */
+ * @return
+ * The compiled structure. Should be deallocated with the
+ * hs_database_free() function.
+ */
struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag);
-
-/**
- * Constructs an NFA graph from the given expression tree.
- *
- * @param rm
- * Global ReportManager for this compile.
- * @param cc
- * Global compile context for this compile.
- * @param expr
- * ParsedExpression object.
- * @return
- * nullptr on error.
- */
+
+/**
+ * Constructs an NFA graph from the given expression tree.
+ *
+ * @param rm
+ * Global ReportManager for this compile.
+ * @param cc
+ * Global compile context for this compile.
+ * @param expr
+ * ParsedExpression object.
+ * @return
+ * nullptr on error.
+ */
BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc,
const ParsedExpression &expr);
-
-/**
- * Build a platform_t out of a target_t.
- */
-platform_t target_to_platform(const target_t &target_info);
-
-#if defined(DUMP_SUPPORT) || defined(DEBUG)
-void dumpExpression(const ParsedExpression &expr, const char *stage,
- const Grey &grey);
-#else
-static really_inline
-void dumpExpression(UNUSED const ParsedExpression &expr,
- UNUSED const char *stage, UNUSED const Grey &grey) {
-}
-
-#endif
-
-} // namespace
-
-#endif // COMPILER_H
+
+/**
+ * Build a platform_t out of a target_t.
+ */
+platform_t target_to_platform(const target_t &target_info);
+
+#if defined(DUMP_SUPPORT) || defined(DEBUG)
+void dumpExpression(const ParsedExpression &expr, const char *stage,
+ const Grey &grey);
+#else
+static really_inline
+void dumpExpression(UNUSED const ParsedExpression &expr,
+ UNUSED const char *stage, UNUSED const Grey &grey) {
+}
+
+#endif
+
+} // namespace
+
+#endif // COMPILER_H
diff --git a/contrib/libs/hyperscan/src/compiler/error.cpp b/contrib/libs/hyperscan/src/compiler/error.cpp
index e229c2a37b..07db98192d 100644
--- a/contrib/libs/hyperscan/src/compiler/error.cpp
+++ b/contrib/libs/hyperscan/src/compiler/error.cpp
@@ -1,110 +1,110 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Compile-time error utils.
- */
-#include "allocator.h"
-#include "error.h"
-#include "ue2common.h"
-#include "hs_compile.h"
-#include "util/compile_error.h"
-
-#include <cstring>
-#include <string>
-
-using std::string;
-
-static const char failureNoMemory[] = "Unable to allocate memory.";
-static const char failureInternal[] = "Internal error.";
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Compile-time error utils.
+ */
+#include "allocator.h"
+#include "error.h"
+#include "ue2common.h"
+#include "hs_compile.h"
+#include "util/compile_error.h"
+
+#include <cstring>
+#include <string>
+
+using std::string;
+
+static const char failureNoMemory[] = "Unable to allocate memory.";
+static const char failureInternal[] = "Internal error.";
static const char failureBadAlloc[] = "Allocator returned misaligned memory.";
-
-extern const hs_compile_error_t hs_enomem = {
- const_cast<char *>(failureNoMemory), 0
-};
-extern const hs_compile_error_t hs_einternal = {
- const_cast<char *>(failureInternal), 0
-};
+
+extern const hs_compile_error_t hs_enomem = {
+ const_cast<char *>(failureNoMemory), 0
+};
+extern const hs_compile_error_t hs_einternal = {
+ const_cast<char *>(failureInternal), 0
+};
extern const hs_compile_error_t hs_badalloc = {
const_cast<char *>(failureBadAlloc), 0
};
-
-namespace ue2 {
-
-hs_compile_error_t *generateCompileError(const string &err, int expression) {
- hs_compile_error_t *ret =
- (struct hs_compile_error *)hs_misc_alloc(sizeof(hs_compile_error_t));
- if (ret) {
+
+namespace ue2 {
+
+hs_compile_error_t *generateCompileError(const string &err, int expression) {
+ hs_compile_error_t *ret =
+ (struct hs_compile_error *)hs_misc_alloc(sizeof(hs_compile_error_t));
+ if (ret) {
hs_error_t e = hs_check_alloc(ret);
if (e != HS_SUCCESS) {
hs_misc_free(ret);
return const_cast<hs_compile_error_t *>(&hs_badalloc);
}
- char *msg = (char *)hs_misc_alloc(err.size() + 1);
- if (msg) {
+ char *msg = (char *)hs_misc_alloc(err.size() + 1);
+ if (msg) {
e = hs_check_alloc(msg);
if (e != HS_SUCCESS) {
hs_misc_free(msg);
return const_cast<hs_compile_error_t *>(&hs_badalloc);
}
- memcpy(msg, err.c_str(), err.size() + 1);
- ret->message = msg;
- } else {
- hs_misc_free(ret);
- ret = nullptr;
- }
- }
-
- if (!ret || !ret->message) {
- return const_cast<hs_compile_error_t *>(&hs_enomem);
- }
-
- ret->expression = expression;
-
- return ret;
-}
-
-hs_compile_error_t *generateCompileError(const CompileError &e) {
- return generateCompileError(e.reason, e.hasIndex ? (int)e.index : -1);
-}
-
-void freeCompileError(hs_compile_error_t *error) {
- if (!error) {
- return;
- }
+ memcpy(msg, err.c_str(), err.size() + 1);
+ ret->message = msg;
+ } else {
+ hs_misc_free(ret);
+ ret = nullptr;
+ }
+ }
+
+ if (!ret || !ret->message) {
+ return const_cast<hs_compile_error_t *>(&hs_enomem);
+ }
+
+ ret->expression = expression;
+
+ return ret;
+}
+
+hs_compile_error_t *generateCompileError(const CompileError &e) {
+ return generateCompileError(e.reason, e.hasIndex ? (int)e.index : -1);
+}
+
+void freeCompileError(hs_compile_error_t *error) {
+ if (!error) {
+ return;
+ }
if (error == &hs_enomem || error == &hs_einternal ||
error == &hs_badalloc) {
- // These are not allocated.
- return;
- }
-
- hs_misc_free(error->message);
- hs_misc_free(error);
-}
-
-} // namespace ue2
+ // These are not allocated.
+ return;
+ }
+
+ hs_misc_free(error->message);
+ hs_misc_free(error);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/compiler/error.h b/contrib/libs/hyperscan/src/compiler/error.h
index aad66967a2..2e35ae32d8 100644
--- a/contrib/libs/hyperscan/src/compiler/error.h
+++ b/contrib/libs/hyperscan/src/compiler/error.h
@@ -1,55 +1,55 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Compile-time error utils.
- */
-
-#ifndef COMPILE_ERROR_H
-#define COMPILE_ERROR_H
-
-#include <string>
-
-struct hs_compile_error;
-
-// Special errors that aren't allocated with hs_alloc/hs_free.
-extern const hs_compile_error hs_enomem;
-extern const hs_compile_error hs_einternal;
-
-namespace ue2 {
-
-class CompileError;
-
-hs_compile_error *generateCompileError(const std::string &err, int expression);
-hs_compile_error *generateCompileError(const CompileError &e);
-
-void freeCompileError(hs_compile_error *error);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Compile-time error utils.
+ */
+
+#ifndef COMPILE_ERROR_H
+#define COMPILE_ERROR_H
+
+#include <string>
+
+struct hs_compile_error;
+
+// Special errors that aren't allocated with hs_alloc/hs_free.
+extern const hs_compile_error hs_enomem;
+extern const hs_compile_error hs_einternal;
+
+namespace ue2 {
+
+class CompileError;
+
+hs_compile_error *generateCompileError(const std::string &err, int expression);
+hs_compile_error *generateCompileError(const CompileError &e);
+
+void freeCompileError(hs_compile_error *error);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/crc32.c b/contrib/libs/hyperscan/src/crc32.c
index c3087393ba..1dae47b4e9 100644
--- a/contrib/libs/hyperscan/src/crc32.c
+++ b/contrib/libs/hyperscan/src/crc32.c
@@ -1,648 +1,648 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "crc32.h"
-#include "config.h"
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "crc32.h"
+#include "config.h"
+#include "ue2common.h"
#include "util/arch.h"
#include "util/intrinsics.h"
-
+
#if !defined(HAVE_SSE42)
-
-/***
- *** What follows is derived from Intel's Slicing-by-8 CRC32 impl, which is BSD
- *** licensed and available from http://sourceforge.net/projects/slicing-by-8/
- ***/
-
-/*
- * Copyright (c) 2004-2006 Intel Corporation - All Rights Reserved
- *
- *
- * This software program is licensed subject to the BSD License,
- * available at http://www.opensource.org/licenses/bsd-license.html.
- *
- * Abstract:
- *
- * Tables for software CRC generation
- */
-
-/*
- * The following CRC lookup table was generated automagically
- * using the following model parameters:
- *
- * Generator Polynomial = ................. 0x1EDC6F41
- * Generator Polynomial Length = .......... 32 bits
- * Reflected Bits = ....................... TRUE
- * Table Generation Offset = .............. 32 bits
- * Number of Slices = ..................... 8 slices
- * Slice Lengths = ........................ 8 8 8 8 8 8 8 8
- * Directory Name = ....................... .\
- * File Name = ............................ 8x256_tables.c
- */
-
-static
-u32 crc_tableil8_o32[256] =
-{
- 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
- 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
- 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
- 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
- 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
- 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
- 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
- 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
- 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
- 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
- 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
- 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
- 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
- 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
- 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
- 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
- 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
- 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
- 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
- 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
- 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
- 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
- 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
- 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
- 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
- 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
- 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
- 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
- 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
- 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
- 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
- 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
-};
-
-/*
- * end of the CRC lookup table crc_tableil8_o32
- */
-
-
-
-/*
- * The following CRC lookup table was generated automagically
- * using the following model parameters:
- *
- * Generator Polynomial = ................. 0x1EDC6F41
- * Generator Polynomial Length = .......... 32 bits
- * Reflected Bits = ....................... TRUE
- * Table Generation Offset = .............. 32 bits
- * Number of Slices = ..................... 8 slices
- * Slice Lengths = ........................ 8 8 8 8 8 8 8 8
- * Directory Name = ....................... .\
- * File Name = ............................ 8x256_tables.c
- */
-
-static
-u32 crc_tableil8_o40[256] =
-{
- 0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945,
- 0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, 0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD,
- 0x3FC5F181, 0x2C6769F6, 0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4,
- 0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, 0xEC5B53E5, 0xFFF9CB92, 0xCB1E630B, 0xD8BCFB7C,
- 0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B, 0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47,
- 0xE29F20BA, 0xF13DB8CD, 0xC5DA1054, 0xD6788823, 0xAC154166, 0xBFB7D911, 0x8B507188, 0x98F2E9FF,
- 0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, 0x0EC4735F, 0x1D66EB28, 0x298143B1, 0x3A23DBC6,
- 0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2, 0x93D0B0E7, 0x80722890, 0xB4958009, 0xA737187E,
- 0xFF17C604, 0xECB55E73, 0xD852F6EA, 0xCBF06E9D, 0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41,
- 0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, 0x2C896460, 0x3F2BFC17, 0x0BCC548E, 0x186ECCF9,
- 0xC0D23785, 0xD370AFF2, 0xE797076B, 0xF4359F1C, 0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0,
- 0x5DC6F43D, 0x4E646C4A, 0x7A83C4D3, 0x69215CA4, 0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78,
- 0x809C2506, 0x933EBD71, 0xA7D915E8, 0xB47B8D9F, 0xCE1644DA, 0xDDB4DCAD, 0xE9537434, 0xFAF1EC43,
- 0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27, 0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB,
- 0xBF59D487, 0xACFB4CF0, 0x981CE469, 0x8BBE7C1E, 0xF1D3B55B, 0xE2712D2C, 0xD69685B5, 0xC5341DC2,
- 0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, 0x6CC776E3, 0x7F65EE94, 0x4B82460D, 0x5820DE7A,
- 0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260, 0xB5499B25, 0xA6EB0352, 0x920CABCB, 0x81AE33BC,
- 0x66D73941, 0x7575A136, 0x419209AF, 0x523091D8, 0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004,
- 0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, 0x8A8C6AA4, 0x992EF2D3, 0xADC95A4A, 0xBE6BC23D,
- 0x5912C8C0, 0x4AB050B7, 0x7E57F82E, 0x6DF56059, 0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185,
- 0x844819FB, 0x97EA818C, 0xA30D2915, 0xB0AFB162, 0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE,
- 0x195CDA43, 0x0AFE4234, 0x3E19EAAD, 0x2DBB72DA, 0x57D6BB9F, 0x447423E8, 0x70938B71, 0x63311306,
- 0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3, 0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F,
- 0x26992BC2, 0x353BB3B5, 0x01DC1B2C, 0x127E835B, 0x68134A1E, 0x7BB1D269, 0x4F567AF0, 0x5CF4E287,
- 0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, 0x4A5E5D21, 0x59FCC556, 0x6D1B6DCF, 0x7EB9F5B8,
- 0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC, 0xD74A9E99, 0xC4E806EE, 0xF00FAE77, 0xE3AD3600,
- 0x3B11CD7C, 0x28B3550B, 0x1C54FD92, 0x0FF665E5, 0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439,
- 0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, 0xE88F6F18, 0xFB2DF76F, 0xCFCA5FF6, 0xDC68C781,
- 0x7B5FDFFF, 0x68FD4788, 0x5C1AEF11, 0x4FB87766, 0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA,
- 0xE64B1C47, 0xF5E98430, 0xC10E2CA9, 0xD2ACB4DE, 0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502,
- 0x449A2E7E, 0x5738B609, 0x63DF1E90, 0x707D86E7, 0x0A104FA2, 0x19B2D7D5, 0x2D557F4C, 0x3EF7E73B,
- 0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F, 0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483
-};
-
-/*
- * end of the CRC lookup table crc_tableil8_o40
- */
-
-
-
-/*
- * The following CRC lookup table was generated automagically
- * using the following model parameters:
- *
- * Generator Polynomial = ................. 0x1EDC6F41
- * Generator Polynomial Length = .......... 32 bits
- * Reflected Bits = ....................... TRUE
- * Table Generation Offset = .............. 32 bits
- * Number of Slices = ..................... 8 slices
- * Slice Lengths = ........................ 8 8 8 8 8 8 8 8
- * Directory Name = ....................... .\
- * File Name = ............................ 8x256_tables.c
- */
-
-static
-u32 crc_tableil8_o48[256] =
-{
- 0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, 0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469,
- 0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, 0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC,
- 0x70A27D8A, 0xD5E3EFF4, 0x3FCD2F87, 0x9A8CBDF9, 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3,
- 0x48F3434F, 0xEDB2D131, 0x079C1142, 0xA2DD833C, 0xD62DE755, 0x736C752B, 0x9942B558, 0x3C032726,
- 0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67, 0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D,
- 0xD915C5D1, 0x7C5457AF, 0x967A97DC, 0x333B05A2, 0x47CB61CB, 0xE28AF3B5, 0x08A433C6, 0xADE5A1B8,
- 0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, 0x0F382284, 0xAA79B0FA, 0x40577089, 0xE516E2F7,
- 0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828, 0x37691C41, 0x92288E3F, 0x78064E4C, 0xDD47DC32,
- 0xC76580D9, 0x622412A7, 0x880AD2D4, 0x2D4B40AA, 0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0,
- 0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, 0x61EA1A06, 0xC4AB8878, 0x2E85480B, 0x8BC4DA75,
- 0xB7C7FD53, 0x12866F2D, 0xF8A8AF5E, 0x5DE93D20, 0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A,
- 0x8F96C396, 0x2AD751E8, 0xC0F9919B, 0x65B803E5, 0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF,
- 0x26217BCD, 0x8360E9B3, 0x694E29C0, 0xCC0FBBBE, 0xB8FFDFD7, 0x1DBE4DA9, 0xF7908DDA, 0x52D11FA4,
- 0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B, 0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161,
- 0x56830647, 0xF3C29439, 0x19EC544A, 0xBCADC634, 0xC85DA25D, 0x6D1C3023, 0x8732F050, 0x2273622E,
- 0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, 0xF00C9C98, 0x554D0EE6, 0xBF63CE95, 0x1A225CEB,
- 0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730, 0x15F9D359, 0xB0B84127, 0x5A968154, 0xFFD7132A,
- 0xB3764986, 0x1637DBF8, 0xFC191B8B, 0x595889F5, 0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF,
- 0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, 0x655BAED3, 0xC01A3CAD, 0x2A34FCDE, 0x8F756EA0,
- 0xC3D4340C, 0x6695A672, 0x8CBB6601, 0x29FAF47F, 0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065,
- 0x6A638C57, 0xCF221E29, 0x250CDE5A, 0x804D4C24, 0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E,
- 0x5232B292, 0xF77320EC, 0x1D5DE09F, 0xB81C72E1, 0xCCEC1688, 0x69AD84F6, 0x83834485, 0x26C2D6FB,
- 0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE, 0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4,
- 0x2290CF18, 0x87D15D66, 0x6DFF9D15, 0xC8BE0F6B, 0xBC4E6B02, 0x190FF97C, 0xF321390F, 0x5660AB71,
- 0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, 0xD29C5380, 0x77DDC1FE, 0x9DF3018D, 0x38B293F3,
- 0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C, 0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48, 0x00E3AD36,
- 0x3CE08A10, 0x99A1186E, 0x738FD81D, 0xD6CE4A63, 0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79,
- 0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, 0x9A6F10CF, 0x3F2E82B1, 0xD50042C2, 0x7041D0BC,
- 0xAD060C8E, 0x08479EF0, 0xE2695E83, 0x4728CCFD, 0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7,
- 0x9557324B, 0x3016A035, 0xDA386046, 0x7F79F238, 0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622,
- 0xDDA47104, 0x78E5E37A, 0x92CB2309, 0x378AB177, 0x437AD51E, 0xE63B4760, 0x0C158713, 0xA954156D,
- 0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2, 0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8
-};
-
-/*
- * end of the CRC lookup table crc_tableil8_o48
- */
-
-
-
-/*
- * The following CRC lookup table was generated automagically
- * using the following model parameters:
- *
- * Generator Polynomial = ................. 0x1EDC6F41
- * Generator Polynomial Length = .......... 32 bits
- * Reflected Bits = ....................... TRUE
- * Table Generation Offset = .............. 32 bits
- * Number of Slices = ..................... 8 slices
- * Slice Lengths = ........................ 8 8 8 8 8 8 8 8
- * Directory Name = ....................... .\
- * File Name = ............................ 8x256_tables.c
- */
-
-static
-u32 crc_tableil8_o56[256] =
-{
- 0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, 0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA,
- 0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF, 0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C,
- 0xE964B13D, 0x34211B85, 0x560392BC, 0x8B463804, 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7,
- 0x1F20D2DB, 0xC2657863, 0xA047F15A, 0x7D025BE2, 0x6402E328, 0xB9474990, 0xDB65C0A9, 0x06206A11,
- 0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2, 0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41,
- 0x2161776D, 0xFC24DDD5, 0x9E0654EC, 0x4343FE54, 0x5A43469E, 0x8706EC26, 0xE524651F, 0x3861CFA7,
- 0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, 0x45639445, 0x98263EFD, 0xFA04B7C4, 0x27411D7C,
- 0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69, 0xB327F7A3, 0x6E625D1B, 0x0C40D422, 0xD1057E9A,
- 0xABA65FE7, 0x76E3F55F, 0x14C17C66, 0xC984D6DE, 0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D,
- 0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, 0x26C00DF2, 0xFB85A74A, 0x99A72E73, 0x44E284CB,
- 0x42C2EEDA, 0x9F874462, 0xFDA5CD5B, 0x20E067E3, 0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610,
- 0xB4868D3C, 0x69C32784, 0x0BE1AEBD, 0xD6A40405, 0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6,
- 0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED, 0x1EA1C255, 0x07A17A9F, 0xDAE4D027, 0xB8C6591E, 0x6583F3A6,
- 0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3, 0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040,
- 0x95E7FA51, 0x48A250E9, 0x2A80D9D0, 0xF7C57368, 0xEEC5CBA2, 0x3380611A, 0x51A2E823, 0x8CE7429B,
- 0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, 0x1881A844, 0xC5C402FC, 0xA7E68BC5, 0x7AA3217D,
- 0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006, 0x2982F8CC, 0xF4C75274, 0x96E5DB4D, 0x4BA071F5,
- 0xA4E4AAD9, 0x79A10061, 0x1B838958, 0xC6C623E0, 0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213,
- 0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, 0xC0E649F1, 0x1DA3E349, 0x7F816A70, 0xA2C4C0C8,
- 0x4D801BE4, 0x90C5B15C, 0xF2E73865, 0x2FA292DD, 0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E,
- 0x8585DDB4, 0x58C0770C, 0x3AE2FE35, 0xE7A7548D, 0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E,
- 0x73C1BE52, 0xAE8414EA, 0xCCA69DD3, 0x11E3376B, 0x08E38FA1, 0xD5A62519, 0xB784AC20, 0x6AC10698,
- 0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0, 0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443,
- 0x9AA50F6F, 0x47E0A5D7, 0x25C22CEE, 0xF8878656, 0xE1873E9C, 0x3CC29424, 0x5EE01D1D, 0x83A5B7A5,
- 0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, 0x8224A72B, 0x5F610D93, 0x3D4384AA, 0xE0062E12,
- 0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07, 0x7460C4CD, 0xA9256E75, 0xCB07E74C, 0x16424DF4,
- 0x106227E5, 0xCD278D5D, 0xAF050464, 0x7240AEDC, 0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F,
- 0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, 0x9D0475F0, 0x4041DF48, 0x22635671, 0xFF26FCC9,
- 0x2E238253, 0xF36628EB, 0x9144A1D2, 0x4C010B6A, 0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99,
- 0xD867E1B5, 0x05224B0D, 0x6700C234, 0xBA45688C, 0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F,
- 0xC747336E, 0x1A0299D6, 0x782010EF, 0xA565BA57, 0xBC65029D, 0x6120A825, 0x0302211C, 0xDE478BA4,
- 0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1, 0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842
-};
-
-/*
- * end of the CRC lookup table crc_tableil8_o56
- */
-
-
-
-/*
- * The following CRC lookup table was generated automagically
- * using the following model parameters:
- *
- * Generator Polynomial = ................. 0x1EDC6F41
- * Generator Polynomial Length = .......... 32 bits
- * Reflected Bits = ....................... TRUE
- * Table Generation Offset = .............. 32 bits
- * Number of Slices = ..................... 8 slices
- * Slice Lengths = ........................ 8 8 8 8 8 8 8 8
- * Directory Name = ....................... .\
- * File Name = ............................ 8x256_tables.c
- */
-
-static
-u32 crc_tableil8_o64[256] =
-{
- 0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, 0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44,
- 0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65, 0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5,
- 0x8F2261D3, 0xB7330E7F, 0xFF00BE8B, 0xC711D127, 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97,
- 0x4A456A42, 0x725405EE, 0x3A67B51A, 0x0276DAB6, 0xAA00D4F2, 0x9211BB5E, 0xDA220BAA, 0xE2336406,
- 0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3, 0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13,
- 0xDECFBEC6, 0xE6DED16A, 0xAEED619E, 0x96FC0E32, 0x3E8A0076, 0x069B6FDA, 0x4EA8DF2E, 0x76B9B082,
- 0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, 0x74CF6A34, 0x4CDE0598, 0x04EDB56C, 0x3CFCDAC0,
- 0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1, 0xB1A861A5, 0x89B90E09, 0xC18ABEFD, 0xF99BD151,
- 0x37516AAE, 0x0F400502, 0x4773B5F6, 0x7F62DA5A, 0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA,
- 0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, 0x1273DF8F, 0x2A62B023, 0x625100D7, 0x5A406F7B,
- 0xB8730B7D, 0x806264D1, 0xC851D425, 0xF040BB89, 0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539,
- 0x7D1400EC, 0x45056F40, 0x0D36DFB4, 0x3527B018, 0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8,
- 0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1, 0x64CA6F0D, 0xCCBC6149, 0xF4AD0EE5, 0xBC9EBE11, 0x848FD1BD,
- 0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C, 0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C,
- 0xA3DBBE2A, 0x9BCAD186, 0xD3F96172, 0xEBE80EDE, 0x439E009A, 0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E,
- 0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, 0x86F90B0B, 0xBEE864A7, 0xF6DBD453, 0xCECABBFF,
- 0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8, 0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4, 0xC6D4DB18,
- 0xABC5DECD, 0x93D4B161, 0xDBE70195, 0xE3F66E39, 0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089,
- 0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, 0x01C50A3F, 0x39D46593, 0x71E7D567, 0x49F6BACB,
- 0x24E7BF1E, 0x1CF6D0B2, 0x54C56046, 0x6CD40FEA, 0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A,
- 0x750A600B, 0x4D1B0FA7, 0x0528BF53, 0x3D39D0FF, 0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F,
- 0xB06D6B9A, 0x887C0436, 0xC04FB4C2, 0xF85EDB6E, 0x5028D52A, 0x6839BA86, 0x200A0A72, 0x181B65DE,
- 0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C, 0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C,
- 0x3F4F0A49, 0x075E65E5, 0x4F6DD511, 0x777CBABD, 0xDF0AB4F9, 0xE71BDB55, 0xAF286BA1, 0x9739040D,
- 0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, 0xB9B60142, 0x81A76EEE, 0xC994DE1A, 0xF185B1B6,
- 0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497, 0x7CD10AD3, 0x44C0657F, 0x0CF3D58B, 0x34E2BA27,
- 0xD6D1DE21, 0xEEC0B18D, 0xA6F30179, 0x9EE26ED5, 0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065,
- 0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, 0xF3F36B00, 0xCBE204AC, 0x83D1B458, 0xBBC0DBF4,
- 0x425B0AA5, 0x7A4A6509, 0x3279D5FD, 0x0A68BA51, 0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1,
- 0x873C0134, 0xBF2D6E98, 0xF71EDE6C, 0xCF0FB1C0, 0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70,
- 0xCD796B76, 0xF56804DA, 0xBD5BB42E, 0x854ADB82, 0x2D3CD5C6, 0x152DBA6A, 0x5D1E0A9E, 0x650F6532,
- 0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013, 0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3
-};
-
-/*
- * end of the CRC lookup table crc_tableil8_o64
- */
-
-
-
-/*
- * The following CRC lookup table was generated automagically
- * using the following model parameters:
- *
- * Generator Polynomial = ................. 0x1EDC6F41
- * Generator Polynomial Length = .......... 32 bits
- * Reflected Bits = ....................... TRUE
- * Table Generation Offset = .............. 32 bits
- * Number of Slices = ..................... 8 slices
- * Slice Lengths = ........................ 8 8 8 8 8 8 8 8
- * Directory Name = ....................... .\
- * File Name = ............................ 8x256_tables.c
- */
-
-static
-u32 crc_tableil8_o72[256] =
-{
- 0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, 0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD,
- 0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, 0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2,
- 0xC00C303E, 0x2F3C5B27, 0x1B8090FD, 0xF4B0FBE4, 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93,
- 0xA00A2821, 0x4F3A4338, 0x7B8688E2, 0x94B6E3FB, 0x12FF1F56, 0xFDCF744F, 0xC973BF95, 0x2643D48C,
- 0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57, 0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20,
- 0xE5F20E92, 0x0AC2658B, 0x3E7EAE51, 0xD14EC548, 0x570739E5, 0xB83752FC, 0x8C8B9926, 0x63BBF23F,
- 0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, 0xF70D11C4, 0x183D7ADD, 0x2C81B107, 0xC3B1DA1E,
- 0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576, 0x970B09DB, 0x783B62C2, 0x4C87A918, 0xA3B7C201,
- 0x0E045BEB, 0xE13430F2, 0xD588FB28, 0x3AB89031, 0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746,
- 0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, 0xDCF77483, 0x33C71F9A, 0x077BD440, 0xE84BBF59,
- 0xCE086BD5, 0x213800CC, 0x1584CB16, 0xFAB4A00F, 0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778,
- 0xAE0E73CA, 0x413E18D3, 0x7582D309, 0x9AB2B810, 0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67,
- 0x8BF04D66, 0x64C0267F, 0x507CEDA5, 0xBF4C86BC, 0x39057A11, 0xD6351108, 0xE289DAD2, 0x0DB9B1CB,
- 0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3, 0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4,
- 0x4BFC7D58, 0xA4CC1641, 0x9070DD9B, 0x7F40B682, 0xF9094A2F, 0x16392136, 0x2285EAEC, 0xCDB581F5,
- 0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, 0x990F5230, 0x763F3929, 0x4283F2F3, 0xADB399EA,
- 0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C, 0xAEFD80A1, 0x41CDEBB8, 0x75712062, 0x9A414B7B,
- 0x7C0EAFC9, 0x933EC4D0, 0xA7820F0A, 0x48B26413, 0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364,
- 0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, 0x6EF1B09F, 0x81C1DB86, 0xB57D105C, 0x5A4D7B45,
- 0xBC029FF7, 0x5332F4EE, 0x678E3F34, 0x88BE542D, 0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A,
- 0x99FCA15B, 0x76CCCA42, 0x42700198, 0xAD406A81, 0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6,
- 0xF9FAB944, 0x16CAD25D, 0x22761987, 0xCD46729E, 0x4B0F8E33, 0xA43FE52A, 0x90832EF0, 0x7FB345E9,
- 0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF, 0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8,
- 0x39F6897A, 0xD6C6E263, 0xE27A29B9, 0x0D4A42A0, 0x8B03BE0D, 0x6433D514, 0x508F1ECE, 0xBFBF75D7,
- 0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, 0xA0F9DB4A, 0x4FC9B053, 0x7B757B89, 0x94451090,
- 0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8, 0xC0FFC355, 0x2FCFA84C, 0x1B736396, 0xF443088F,
- 0xD200DC03, 0x3D30B71A, 0x098C7CC0, 0xE6BC17D9, 0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE,
- 0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, 0x00F3F36B, 0xEFC39872, 0xDB7F53A8, 0x344F38B1,
- 0x97F8FAB0, 0x78C891A9, 0x4C745A73, 0xA344316A, 0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D,
- 0xF7FEE2AF, 0x18CE89B6, 0x2C72426C, 0xC3422975, 0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02,
- 0x57F4CA8E, 0xB8C4A197, 0x8C786A4D, 0x63480154, 0xE501FDF9, 0x0A3196E0, 0x3E8D5D3A, 0xD1BD3623,
- 0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B, 0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C
-};
-
-/*
- * end of the CRC lookup table crc_tableil8_o72
- */
-
-
-
-/*
- * The following CRC lookup table was generated automagically
- * using the following model parameters:
- *
- * Generator Polynomial = ................. 0x1EDC6F41
- * Generator Polynomial Length = .......... 32 bits
- * Reflected Bits = ....................... TRUE
- * Table Generation Offset = .............. 32 bits
- * Number of Slices = ..................... 8 slices
- * Slice Lengths = ........................ 8 8 8 8 8 8 8 8
- * Directory Name = ....................... .\
- * File Name = ............................ 8x256_tables.c
- */
-
-static
-u32 crc_tableil8_o80[256] =
-{
- 0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, 0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089,
- 0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B, 0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA,
- 0x9C5BFAA6, 0xF458D66E, 0x4C5DA336, 0x245E8FFE, 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F,
- 0xD27607F5, 0xBA752B3D, 0x02705E65, 0x6A7372AD, 0x7796C224, 0x1F95EEEC, 0xA7909BB4, 0xCF93B77C,
- 0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5, 0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334,
- 0x73767EEE, 0x1B755226, 0xA370277E, 0xCB730BB6, 0xD696BB3F, 0xBE9597F7, 0x0690E2AF, 0x6E93CE67,
- 0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, 0x04E0BCCA, 0x6CE39002, 0xD4E6E55A, 0xBCE5C992,
- 0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110, 0x4ACD4199, 0x22CE6D51, 0x9ACB1809, 0xF2C834C1,
- 0x7AB7077A, 0x12B42BB2, 0xAAB15EEA, 0xC2B27222, 0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3,
- 0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, 0x917A3FF8, 0xF9791330, 0x417C6668, 0x297F4AA0,
- 0xE6ECFDDC, 0x8EEFD114, 0x36EAA44C, 0x5EE98884, 0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55,
- 0xA8C1008F, 0xC0C22C47, 0x78C7591F, 0x10C475D7, 0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006,
- 0x47EC84C7, 0x2FEFA80F, 0x97EADD57, 0xFFE9F19F, 0xE20C4116, 0x8A0F6DDE, 0x320A1886, 0x5A09344E,
- 0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC, 0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D,
- 0xDBB77E61, 0xB3B452A9, 0x0BB127F1, 0x63B20B39, 0x7E57BBB0, 0x16549778, 0xAE51E220, 0xC652CEE8,
- 0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, 0x307A46E3, 0x58796A2B, 0xE07C1F73, 0x887F33BB,
- 0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC, 0x508ECB25, 0x388DE7ED, 0x808892B5, 0xE88BBE7D,
- 0xBB43F3A7, 0xD340DF6F, 0x6B45AA37, 0x034686FF, 0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E,
- 0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, 0xCCD53183, 0xA4D61D4B, 0x1CD36813, 0x74D044DB,
- 0x27180901, 0x4F1B25C9, 0xF71E5091, 0x9F1D7C59, 0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988,
- 0xC8358D49, 0xA036A181, 0x1833D4D9, 0x7030F811, 0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0,
- 0x8618701A, 0xEE1B5CD2, 0x561E298A, 0x3E1D0542, 0x23F8B5CB, 0x4BFB9903, 0xF3FEEC5B, 0x9BFDC093,
- 0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7, 0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766,
- 0x1A438ABC, 0x7240A674, 0xCA45D32C, 0xA246FFE4, 0xBFA34F6D, 0xD7A063A5, 0x6FA516FD, 0x07A63A35,
- 0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, 0x2A39CC5F, 0x423AE097, 0xFA3F95CF, 0x923CB907,
- 0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185, 0x6414310C, 0x0C171DC4, 0xB412689C, 0xDC114454,
- 0x1382F328, 0x7B81DFE0, 0xC384AAB8, 0xAB878670, 0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1,
- 0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, 0xF84FCBAA, 0x904CE762, 0x2849923A, 0x404ABEF2,
- 0xB2828A33, 0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B, 0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA,
- 0xFCAF7760, 0x94AC5BA8, 0x2CA92EF0, 0x44AA0238, 0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9,
- 0x2ED97095, 0x46DA5C5D, 0xFEDF2905, 0x96DC05CD, 0x8B39B544, 0xE33A998C, 0x5B3FECD4, 0x333CC01C,
- 0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E, 0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F
-};
-
-/*
- * end of the CRC lookup table crc_tableil8_o80
- */
-
-
-
-/*
- * The following CRC lookup table was generated automagically
- * using the following model parameters:
- *
- * Generator Polynomial = ................. 0x1EDC6F41
- * Generator Polynomial Length = .......... 32 bits
- * Reflected Bits = ....................... TRUE
- * Table Generation Offset = .............. 32 bits
- * Number of Slices = ..................... 8 slices
- * Slice Lengths = ........................ 8 8 8 8 8 8 8 8
- * Directory Name = ....................... .\
- * File Name = ............................ 8x256_tables.c
- */
-
-static
-u32 crc_tableil8_o88[256] =
-{
- 0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, 0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504,
- 0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3, 0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE,
- 0x847609B4, 0xCD4A7493, 0x160EF3FA, 0x5F328EDD, 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0,
- 0xC64D0D6E, 0x8F717049, 0x5435F720, 0x1D098A07, 0xE7508F03, 0xAE6CF224, 0x7528754D, 0x3C14086A,
- 0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0, 0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D,
- 0x4F3B6143, 0x06071C64, 0xDD439B0D, 0x947FE62A, 0x6E26E32E, 0x271A9E09, 0xFC5E1960, 0xB5626447,
- 0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, 0xA86BEE40, 0xE1579367, 0x3A13140E, 0x732F6929,
- 0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E, 0xEA50EA9A, 0xA36C97BD, 0x782810D4, 0x31146DF3,
- 0x1A00CB32, 0x533CB615, 0x8878317C, 0xC1444C5B, 0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36,
- 0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, 0x79264D85, 0x301A30A2, 0xEB5EB7CB, 0xA262CAEC,
- 0x9E76C286, 0xD74ABFA1, 0x0C0E38C8, 0x453245EF, 0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782,
- 0xDC4DC65C, 0x9571BB7B, 0x4E353C12, 0x07094135, 0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358,
- 0x1700AEAB, 0x5E3CD38C, 0x857854E5, 0xCC4429C2, 0x361D2CC6, 0x7F2151E1, 0xA465D688, 0xED59ABAF,
- 0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18, 0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75,
- 0x9376A71F, 0xDA4ADA38, 0x010E5D51, 0x48322076, 0xB26B2572, 0xFB575855, 0x2013DF3C, 0x692FA21B,
- 0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, 0xF05021A8, 0xB96C5C8F, 0x6228DBE6, 0x2B14A6C1,
- 0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D, 0x151C1409, 0x5C20692E, 0x8764EE47, 0xCE589360,
- 0x763A92BE, 0x3F06EF99, 0xE44268F0, 0xAD7E15D7, 0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA,
- 0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, 0x916A1DBD, 0xD856609A, 0x0312E7F3, 0x4A2E9AD4,
- 0xF24C9B0A, 0xBB70E62D, 0x60346144, 0x29081C63, 0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E,
- 0x3901F3FD, 0x703D8EDA, 0xAB7909B3, 0xE2457494, 0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9,
- 0x7B3AF727, 0x32068A00, 0xE9420D69, 0xA07E704E, 0x5A27754A, 0x131B086D, 0xC85F8F04, 0x8163F223,
- 0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20, 0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D,
- 0xFF4CFE93, 0xB67083B4, 0x6D3404DD, 0x240879FA, 0xDE517CFE, 0x976D01D9, 0x4C2986B0, 0x0515FB97,
- 0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, 0x0F1CDF3B, 0x4620A21C, 0x9D642575, 0xD4585852,
- 0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5, 0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF, 0x96635C88,
- 0xAA7754E2, 0xE34B29C5, 0x380FAEAC, 0x7133D38B, 0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6,
- 0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, 0xC951D255, 0x806DAF72, 0x5B29281B, 0x1215553C,
- 0x230138CF, 0x6A3D45E8, 0xB179C281, 0xF845BFA6, 0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB,
- 0x613A3C15, 0x28064132, 0xF342C65B, 0xBA7EBB7C, 0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911,
- 0xA777317B, 0xEE4B4C5C, 0x350FCB35, 0x7C33B612, 0x866AB316, 0xCF56CE31, 0x14124958, 0x5D2E347F,
- 0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8, 0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5
-};
-
-/*
- * end of the CRC lookup table crc_tableil8_o88
- */
-
-//#define VERIFY_ASSERTION
-
-#ifdef VERIFY_ASSERTION
-
-// Trivial byte-by-byte version: you can switch on the assertion in the
-// Crc32_ComputeBuf function (by defining VERIFY_ASSERTION) to check this
-// against the slicing variant.
-static really_inline
-u32 crc32c(u32 running_crc, const unsigned char* p_buf, size_t length) {
- u32 crc = running_crc;
- while (length--) {
- crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
- }
- return crc;
-}
-
-#endif // VERIFY_ASSERTION
-
-// Slicing-by-8 approach, which is much faster. Derived from Intel's
-// BSD-licensed code, with additions to handled aligned case automatically.
-static really_inline
-u32 crc32c_sb8_64_bit(u32 running_crc, const unsigned char* p_buf,
- const size_t length) {
- u32 crc = running_crc;
-
- // Process byte-by-byte until p_buf is aligned
-
- const unsigned char *aligned_buf = ROUNDUP_PTR(p_buf, 4);
- size_t init_bytes = aligned_buf - p_buf;
- size_t running_length = ((length - init_bytes)/8)*8;
- size_t end_bytes = length - init_bytes - running_length;
-
- while (p_buf < aligned_buf) {
- crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
- }
-
- // Main aligned loop, processes eight bytes at a time.
-
- u32 term1, term2;
- for (size_t li = 0; li < running_length/8; li++) {
- u32 block = *(const u32 *)p_buf;
- crc ^= block;
- p_buf += 4;
- term1 = crc_tableil8_o88[crc & 0x000000FF] ^
- crc_tableil8_o80[(crc >> 8) & 0x000000FF];
- term2 = crc >> 16;
- crc = term1 ^
- crc_tableil8_o72[term2 & 0x000000FF] ^
- crc_tableil8_o64[(term2 >> 8) & 0x000000FF];
-
-
- block = *(const u32 *)p_buf;
-
- term1 = crc_tableil8_o56[block & 0x000000FF] ^
- crc_tableil8_o48[(block >> 8) & 0x000000FF];
-
- term2 = block >> 16;
- crc = crc ^
- term1 ^
- crc_tableil8_o40[term2 & 0x000000FF] ^
- crc_tableil8_o32[(term2 >> 8) & 0x000000FF];
- p_buf += 4;
- }
-
- // Remaining bytes
-
- for(size_t li = 0; li < end_bytes; li++) {
- crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
- }
-
- return crc;
-}
-
+
+/***
+ *** What follows is derived from Intel's Slicing-by-8 CRC32 impl, which is BSD
+ *** licensed and available from http://sourceforge.net/projects/slicing-by-8/
+ ***/
+
+/*
+ * Copyright (c) 2004-2006 Intel Corporation - All Rights Reserved
+ *
+ *
+ * This software program is licensed subject to the BSD License,
+ * available at http://www.opensource.org/licenses/bsd-license.html.
+ *
+ * Abstract:
+ *
+ * Tables for software CRC generation
+ */
+
+/*
+ * The following CRC lookup table was generated automagically
+ * using the following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41
+ * Generator Polynomial Length = .......... 32 bits
+ * Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits
+ * Number of Slices = ..................... 8 slices
+ * Slice Lengths = ........................ 8 8 8 8 8 8 8 8
+ * Directory Name = ....................... .\
+ * File Name = ............................ 8x256_tables.c
+ */
+
+static
+u32 crc_tableil8_o32[256] =
+{
+ 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
+ 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
+ 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
+ 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
+ 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
+ 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
+ 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
+ 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
+ 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
+ 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
+ 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
+ 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
+ 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
+ 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
+ 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
+ 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
+ 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
+ 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
+ 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
+ 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
+ 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
+ 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
+ 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
+ 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
+ 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
+ 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
+ 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
+ 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
+ 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
+ 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
+ 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
+ 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o32
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically
+ * using the following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41
+ * Generator Polynomial Length = .......... 32 bits
+ * Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits
+ * Number of Slices = ..................... 8 slices
+ * Slice Lengths = ........................ 8 8 8 8 8 8 8 8
+ * Directory Name = ....................... .\
+ * File Name = ............................ 8x256_tables.c
+ */
+
+static
+u32 crc_tableil8_o40[256] =
+{
+ 0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945,
+ 0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, 0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD,
+ 0x3FC5F181, 0x2C6769F6, 0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4,
+ 0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, 0xEC5B53E5, 0xFFF9CB92, 0xCB1E630B, 0xD8BCFB7C,
+ 0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B, 0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47,
+ 0xE29F20BA, 0xF13DB8CD, 0xC5DA1054, 0xD6788823, 0xAC154166, 0xBFB7D911, 0x8B507188, 0x98F2E9FF,
+ 0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, 0x0EC4735F, 0x1D66EB28, 0x298143B1, 0x3A23DBC6,
+ 0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2, 0x93D0B0E7, 0x80722890, 0xB4958009, 0xA737187E,
+ 0xFF17C604, 0xECB55E73, 0xD852F6EA, 0xCBF06E9D, 0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41,
+ 0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, 0x2C896460, 0x3F2BFC17, 0x0BCC548E, 0x186ECCF9,
+ 0xC0D23785, 0xD370AFF2, 0xE797076B, 0xF4359F1C, 0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0,
+ 0x5DC6F43D, 0x4E646C4A, 0x7A83C4D3, 0x69215CA4, 0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78,
+ 0x809C2506, 0x933EBD71, 0xA7D915E8, 0xB47B8D9F, 0xCE1644DA, 0xDDB4DCAD, 0xE9537434, 0xFAF1EC43,
+ 0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27, 0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB,
+ 0xBF59D487, 0xACFB4CF0, 0x981CE469, 0x8BBE7C1E, 0xF1D3B55B, 0xE2712D2C, 0xD69685B5, 0xC5341DC2,
+ 0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, 0x6CC776E3, 0x7F65EE94, 0x4B82460D, 0x5820DE7A,
+ 0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260, 0xB5499B25, 0xA6EB0352, 0x920CABCB, 0x81AE33BC,
+ 0x66D73941, 0x7575A136, 0x419209AF, 0x523091D8, 0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004,
+ 0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, 0x8A8C6AA4, 0x992EF2D3, 0xADC95A4A, 0xBE6BC23D,
+ 0x5912C8C0, 0x4AB050B7, 0x7E57F82E, 0x6DF56059, 0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185,
+ 0x844819FB, 0x97EA818C, 0xA30D2915, 0xB0AFB162, 0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE,
+ 0x195CDA43, 0x0AFE4234, 0x3E19EAAD, 0x2DBB72DA, 0x57D6BB9F, 0x447423E8, 0x70938B71, 0x63311306,
+ 0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3, 0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F,
+ 0x26992BC2, 0x353BB3B5, 0x01DC1B2C, 0x127E835B, 0x68134A1E, 0x7BB1D269, 0x4F567AF0, 0x5CF4E287,
+ 0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, 0x4A5E5D21, 0x59FCC556, 0x6D1B6DCF, 0x7EB9F5B8,
+ 0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC, 0xD74A9E99, 0xC4E806EE, 0xF00FAE77, 0xE3AD3600,
+ 0x3B11CD7C, 0x28B3550B, 0x1C54FD92, 0x0FF665E5, 0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439,
+ 0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, 0xE88F6F18, 0xFB2DF76F, 0xCFCA5FF6, 0xDC68C781,
+ 0x7B5FDFFF, 0x68FD4788, 0x5C1AEF11, 0x4FB87766, 0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA,
+ 0xE64B1C47, 0xF5E98430, 0xC10E2CA9, 0xD2ACB4DE, 0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502,
+ 0x449A2E7E, 0x5738B609, 0x63DF1E90, 0x707D86E7, 0x0A104FA2, 0x19B2D7D5, 0x2D557F4C, 0x3EF7E73B,
+ 0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F, 0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o40
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically
+ * using the following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41
+ * Generator Polynomial Length = .......... 32 bits
+ * Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits
+ * Number of Slices = ..................... 8 slices
+ * Slice Lengths = ........................ 8 8 8 8 8 8 8 8
+ * Directory Name = ....................... .\
+ * File Name = ............................ 8x256_tables.c
+ */
+
+static
+u32 crc_tableil8_o48[256] =
+{
+ 0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, 0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469,
+ 0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, 0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC,
+ 0x70A27D8A, 0xD5E3EFF4, 0x3FCD2F87, 0x9A8CBDF9, 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3,
+ 0x48F3434F, 0xEDB2D131, 0x079C1142, 0xA2DD833C, 0xD62DE755, 0x736C752B, 0x9942B558, 0x3C032726,
+ 0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67, 0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D,
+ 0xD915C5D1, 0x7C5457AF, 0x967A97DC, 0x333B05A2, 0x47CB61CB, 0xE28AF3B5, 0x08A433C6, 0xADE5A1B8,
+ 0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, 0x0F382284, 0xAA79B0FA, 0x40577089, 0xE516E2F7,
+ 0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828, 0x37691C41, 0x92288E3F, 0x78064E4C, 0xDD47DC32,
+ 0xC76580D9, 0x622412A7, 0x880AD2D4, 0x2D4B40AA, 0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0,
+ 0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, 0x61EA1A06, 0xC4AB8878, 0x2E85480B, 0x8BC4DA75,
+ 0xB7C7FD53, 0x12866F2D, 0xF8A8AF5E, 0x5DE93D20, 0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A,
+ 0x8F96C396, 0x2AD751E8, 0xC0F9919B, 0x65B803E5, 0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF,
+ 0x26217BCD, 0x8360E9B3, 0x694E29C0, 0xCC0FBBBE, 0xB8FFDFD7, 0x1DBE4DA9, 0xF7908DDA, 0x52D11FA4,
+ 0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B, 0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161,
+ 0x56830647, 0xF3C29439, 0x19EC544A, 0xBCADC634, 0xC85DA25D, 0x6D1C3023, 0x8732F050, 0x2273622E,
+ 0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, 0xF00C9C98, 0x554D0EE6, 0xBF63CE95, 0x1A225CEB,
+ 0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730, 0x15F9D359, 0xB0B84127, 0x5A968154, 0xFFD7132A,
+ 0xB3764986, 0x1637DBF8, 0xFC191B8B, 0x595889F5, 0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF,
+ 0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, 0x655BAED3, 0xC01A3CAD, 0x2A34FCDE, 0x8F756EA0,
+ 0xC3D4340C, 0x6695A672, 0x8CBB6601, 0x29FAF47F, 0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065,
+ 0x6A638C57, 0xCF221E29, 0x250CDE5A, 0x804D4C24, 0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E,
+ 0x5232B292, 0xF77320EC, 0x1D5DE09F, 0xB81C72E1, 0xCCEC1688, 0x69AD84F6, 0x83834485, 0x26C2D6FB,
+ 0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE, 0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4,
+ 0x2290CF18, 0x87D15D66, 0x6DFF9D15, 0xC8BE0F6B, 0xBC4E6B02, 0x190FF97C, 0xF321390F, 0x5660AB71,
+ 0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, 0xD29C5380, 0x77DDC1FE, 0x9DF3018D, 0x38B293F3,
+ 0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C, 0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48, 0x00E3AD36,
+ 0x3CE08A10, 0x99A1186E, 0x738FD81D, 0xD6CE4A63, 0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79,
+ 0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, 0x9A6F10CF, 0x3F2E82B1, 0xD50042C2, 0x7041D0BC,
+ 0xAD060C8E, 0x08479EF0, 0xE2695E83, 0x4728CCFD, 0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7,
+ 0x9557324B, 0x3016A035, 0xDA386046, 0x7F79F238, 0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622,
+ 0xDDA47104, 0x78E5E37A, 0x92CB2309, 0x378AB177, 0x437AD51E, 0xE63B4760, 0x0C158713, 0xA954156D,
+ 0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2, 0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o48
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically
+ * using the following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41
+ * Generator Polynomial Length = .......... 32 bits
+ * Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits
+ * Number of Slices = ..................... 8 slices
+ * Slice Lengths = ........................ 8 8 8 8 8 8 8 8
+ * Directory Name = ....................... .\
+ * File Name = ............................ 8x256_tables.c
+ */
+
+static
+u32 crc_tableil8_o56[256] =
+{
+ 0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, 0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA,
+ 0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF, 0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C,
+ 0xE964B13D, 0x34211B85, 0x560392BC, 0x8B463804, 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7,
+ 0x1F20D2DB, 0xC2657863, 0xA047F15A, 0x7D025BE2, 0x6402E328, 0xB9474990, 0xDB65C0A9, 0x06206A11,
+ 0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2, 0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41,
+ 0x2161776D, 0xFC24DDD5, 0x9E0654EC, 0x4343FE54, 0x5A43469E, 0x8706EC26, 0xE524651F, 0x3861CFA7,
+ 0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, 0x45639445, 0x98263EFD, 0xFA04B7C4, 0x27411D7C,
+ 0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69, 0xB327F7A3, 0x6E625D1B, 0x0C40D422, 0xD1057E9A,
+ 0xABA65FE7, 0x76E3F55F, 0x14C17C66, 0xC984D6DE, 0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D,
+ 0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, 0x26C00DF2, 0xFB85A74A, 0x99A72E73, 0x44E284CB,
+ 0x42C2EEDA, 0x9F874462, 0xFDA5CD5B, 0x20E067E3, 0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610,
+ 0xB4868D3C, 0x69C32784, 0x0BE1AEBD, 0xD6A40405, 0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6,
+ 0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED, 0x1EA1C255, 0x07A17A9F, 0xDAE4D027, 0xB8C6591E, 0x6583F3A6,
+ 0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3, 0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040,
+ 0x95E7FA51, 0x48A250E9, 0x2A80D9D0, 0xF7C57368, 0xEEC5CBA2, 0x3380611A, 0x51A2E823, 0x8CE7429B,
+ 0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, 0x1881A844, 0xC5C402FC, 0xA7E68BC5, 0x7AA3217D,
+ 0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006, 0x2982F8CC, 0xF4C75274, 0x96E5DB4D, 0x4BA071F5,
+ 0xA4E4AAD9, 0x79A10061, 0x1B838958, 0xC6C623E0, 0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213,
+ 0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, 0xC0E649F1, 0x1DA3E349, 0x7F816A70, 0xA2C4C0C8,
+ 0x4D801BE4, 0x90C5B15C, 0xF2E73865, 0x2FA292DD, 0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E,
+ 0x8585DDB4, 0x58C0770C, 0x3AE2FE35, 0xE7A7548D, 0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E,
+ 0x73C1BE52, 0xAE8414EA, 0xCCA69DD3, 0x11E3376B, 0x08E38FA1, 0xD5A62519, 0xB784AC20, 0x6AC10698,
+ 0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0, 0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443,
+ 0x9AA50F6F, 0x47E0A5D7, 0x25C22CEE, 0xF8878656, 0xE1873E9C, 0x3CC29424, 0x5EE01D1D, 0x83A5B7A5,
+ 0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, 0x8224A72B, 0x5F610D93, 0x3D4384AA, 0xE0062E12,
+ 0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07, 0x7460C4CD, 0xA9256E75, 0xCB07E74C, 0x16424DF4,
+ 0x106227E5, 0xCD278D5D, 0xAF050464, 0x7240AEDC, 0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F,
+ 0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, 0x9D0475F0, 0x4041DF48, 0x22635671, 0xFF26FCC9,
+ 0x2E238253, 0xF36628EB, 0x9144A1D2, 0x4C010B6A, 0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99,
+ 0xD867E1B5, 0x05224B0D, 0x6700C234, 0xBA45688C, 0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F,
+ 0xC747336E, 0x1A0299D6, 0x782010EF, 0xA565BA57, 0xBC65029D, 0x6120A825, 0x0302211C, 0xDE478BA4,
+ 0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1, 0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o56
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically
+ * using the following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41
+ * Generator Polynomial Length = .......... 32 bits
+ * Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits
+ * Number of Slices = ..................... 8 slices
+ * Slice Lengths = ........................ 8 8 8 8 8 8 8 8
+ * Directory Name = ....................... .\
+ * File Name = ............................ 8x256_tables.c
+ */
+
+static
+u32 crc_tableil8_o64[256] =
+{
+ 0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, 0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44,
+ 0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65, 0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5,
+ 0x8F2261D3, 0xB7330E7F, 0xFF00BE8B, 0xC711D127, 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97,
+ 0x4A456A42, 0x725405EE, 0x3A67B51A, 0x0276DAB6, 0xAA00D4F2, 0x9211BB5E, 0xDA220BAA, 0xE2336406,
+ 0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3, 0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13,
+ 0xDECFBEC6, 0xE6DED16A, 0xAEED619E, 0x96FC0E32, 0x3E8A0076, 0x069B6FDA, 0x4EA8DF2E, 0x76B9B082,
+ 0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, 0x74CF6A34, 0x4CDE0598, 0x04EDB56C, 0x3CFCDAC0,
+ 0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1, 0xB1A861A5, 0x89B90E09, 0xC18ABEFD, 0xF99BD151,
+ 0x37516AAE, 0x0F400502, 0x4773B5F6, 0x7F62DA5A, 0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA,
+ 0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, 0x1273DF8F, 0x2A62B023, 0x625100D7, 0x5A406F7B,
+ 0xB8730B7D, 0x806264D1, 0xC851D425, 0xF040BB89, 0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539,
+ 0x7D1400EC, 0x45056F40, 0x0D36DFB4, 0x3527B018, 0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8,
+ 0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1, 0x64CA6F0D, 0xCCBC6149, 0xF4AD0EE5, 0xBC9EBE11, 0x848FD1BD,
+ 0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C, 0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C,
+ 0xA3DBBE2A, 0x9BCAD186, 0xD3F96172, 0xEBE80EDE, 0x439E009A, 0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E,
+ 0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, 0x86F90B0B, 0xBEE864A7, 0xF6DBD453, 0xCECABBFF,
+ 0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8, 0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4, 0xC6D4DB18,
+ 0xABC5DECD, 0x93D4B161, 0xDBE70195, 0xE3F66E39, 0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089,
+ 0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, 0x01C50A3F, 0x39D46593, 0x71E7D567, 0x49F6BACB,
+ 0x24E7BF1E, 0x1CF6D0B2, 0x54C56046, 0x6CD40FEA, 0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A,
+ 0x750A600B, 0x4D1B0FA7, 0x0528BF53, 0x3D39D0FF, 0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F,
+ 0xB06D6B9A, 0x887C0436, 0xC04FB4C2, 0xF85EDB6E, 0x5028D52A, 0x6839BA86, 0x200A0A72, 0x181B65DE,
+ 0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C, 0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C,
+ 0x3F4F0A49, 0x075E65E5, 0x4F6DD511, 0x777CBABD, 0xDF0AB4F9, 0xE71BDB55, 0xAF286BA1, 0x9739040D,
+ 0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, 0xB9B60142, 0x81A76EEE, 0xC994DE1A, 0xF185B1B6,
+ 0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497, 0x7CD10AD3, 0x44C0657F, 0x0CF3D58B, 0x34E2BA27,
+ 0xD6D1DE21, 0xEEC0B18D, 0xA6F30179, 0x9EE26ED5, 0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065,
+ 0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, 0xF3F36B00, 0xCBE204AC, 0x83D1B458, 0xBBC0DBF4,
+ 0x425B0AA5, 0x7A4A6509, 0x3279D5FD, 0x0A68BA51, 0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1,
+ 0x873C0134, 0xBF2D6E98, 0xF71EDE6C, 0xCF0FB1C0, 0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70,
+ 0xCD796B76, 0xF56804DA, 0xBD5BB42E, 0x854ADB82, 0x2D3CD5C6, 0x152DBA6A, 0x5D1E0A9E, 0x650F6532,
+ 0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013, 0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o64
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically
+ * using the following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41
+ * Generator Polynomial Length = .......... 32 bits
+ * Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits
+ * Number of Slices = ..................... 8 slices
+ * Slice Lengths = ........................ 8 8 8 8 8 8 8 8
+ * Directory Name = ....................... .\
+ * File Name = ............................ 8x256_tables.c
+ */
+
+static
+u32 crc_tableil8_o72[256] =
+{
+ 0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, 0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD,
+ 0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, 0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2,
+ 0xC00C303E, 0x2F3C5B27, 0x1B8090FD, 0xF4B0FBE4, 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93,
+ 0xA00A2821, 0x4F3A4338, 0x7B8688E2, 0x94B6E3FB, 0x12FF1F56, 0xFDCF744F, 0xC973BF95, 0x2643D48C,
+ 0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57, 0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20,
+ 0xE5F20E92, 0x0AC2658B, 0x3E7EAE51, 0xD14EC548, 0x570739E5, 0xB83752FC, 0x8C8B9926, 0x63BBF23F,
+ 0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, 0xF70D11C4, 0x183D7ADD, 0x2C81B107, 0xC3B1DA1E,
+ 0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576, 0x970B09DB, 0x783B62C2, 0x4C87A918, 0xA3B7C201,
+ 0x0E045BEB, 0xE13430F2, 0xD588FB28, 0x3AB89031, 0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746,
+ 0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, 0xDCF77483, 0x33C71F9A, 0x077BD440, 0xE84BBF59,
+ 0xCE086BD5, 0x213800CC, 0x1584CB16, 0xFAB4A00F, 0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778,
+ 0xAE0E73CA, 0x413E18D3, 0x7582D309, 0x9AB2B810, 0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67,
+ 0x8BF04D66, 0x64C0267F, 0x507CEDA5, 0xBF4C86BC, 0x39057A11, 0xD6351108, 0xE289DAD2, 0x0DB9B1CB,
+ 0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3, 0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4,
+ 0x4BFC7D58, 0xA4CC1641, 0x9070DD9B, 0x7F40B682, 0xF9094A2F, 0x16392136, 0x2285EAEC, 0xCDB581F5,
+ 0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, 0x990F5230, 0x763F3929, 0x4283F2F3, 0xADB399EA,
+ 0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C, 0xAEFD80A1, 0x41CDEBB8, 0x75712062, 0x9A414B7B,
+ 0x7C0EAFC9, 0x933EC4D0, 0xA7820F0A, 0x48B26413, 0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364,
+ 0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, 0x6EF1B09F, 0x81C1DB86, 0xB57D105C, 0x5A4D7B45,
+ 0xBC029FF7, 0x5332F4EE, 0x678E3F34, 0x88BE542D, 0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A,
+ 0x99FCA15B, 0x76CCCA42, 0x42700198, 0xAD406A81, 0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6,
+ 0xF9FAB944, 0x16CAD25D, 0x22761987, 0xCD46729E, 0x4B0F8E33, 0xA43FE52A, 0x90832EF0, 0x7FB345E9,
+ 0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF, 0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8,
+ 0x39F6897A, 0xD6C6E263, 0xE27A29B9, 0x0D4A42A0, 0x8B03BE0D, 0x6433D514, 0x508F1ECE, 0xBFBF75D7,
+ 0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, 0xA0F9DB4A, 0x4FC9B053, 0x7B757B89, 0x94451090,
+ 0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8, 0xC0FFC355, 0x2FCFA84C, 0x1B736396, 0xF443088F,
+ 0xD200DC03, 0x3D30B71A, 0x098C7CC0, 0xE6BC17D9, 0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE,
+ 0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, 0x00F3F36B, 0xEFC39872, 0xDB7F53A8, 0x344F38B1,
+ 0x97F8FAB0, 0x78C891A9, 0x4C745A73, 0xA344316A, 0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D,
+ 0xF7FEE2AF, 0x18CE89B6, 0x2C72426C, 0xC3422975, 0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02,
+ 0x57F4CA8E, 0xB8C4A197, 0x8C786A4D, 0x63480154, 0xE501FDF9, 0x0A3196E0, 0x3E8D5D3A, 0xD1BD3623,
+ 0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B, 0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o72
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically
+ * using the following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41
+ * Generator Polynomial Length = .......... 32 bits
+ * Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits
+ * Number of Slices = ..................... 8 slices
+ * Slice Lengths = ........................ 8 8 8 8 8 8 8 8
+ * Directory Name = ....................... .\
+ * File Name = ............................ 8x256_tables.c
+ */
+
+static
+u32 crc_tableil8_o80[256] =
+{
+ 0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, 0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089,
+ 0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B, 0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA,
+ 0x9C5BFAA6, 0xF458D66E, 0x4C5DA336, 0x245E8FFE, 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F,
+ 0xD27607F5, 0xBA752B3D, 0x02705E65, 0x6A7372AD, 0x7796C224, 0x1F95EEEC, 0xA7909BB4, 0xCF93B77C,
+ 0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5, 0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334,
+ 0x73767EEE, 0x1B755226, 0xA370277E, 0xCB730BB6, 0xD696BB3F, 0xBE9597F7, 0x0690E2AF, 0x6E93CE67,
+ 0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, 0x04E0BCCA, 0x6CE39002, 0xD4E6E55A, 0xBCE5C992,
+ 0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110, 0x4ACD4199, 0x22CE6D51, 0x9ACB1809, 0xF2C834C1,
+ 0x7AB7077A, 0x12B42BB2, 0xAAB15EEA, 0xC2B27222, 0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3,
+ 0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, 0x917A3FF8, 0xF9791330, 0x417C6668, 0x297F4AA0,
+ 0xE6ECFDDC, 0x8EEFD114, 0x36EAA44C, 0x5EE98884, 0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55,
+ 0xA8C1008F, 0xC0C22C47, 0x78C7591F, 0x10C475D7, 0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006,
+ 0x47EC84C7, 0x2FEFA80F, 0x97EADD57, 0xFFE9F19F, 0xE20C4116, 0x8A0F6DDE, 0x320A1886, 0x5A09344E,
+ 0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC, 0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D,
+ 0xDBB77E61, 0xB3B452A9, 0x0BB127F1, 0x63B20B39, 0x7E57BBB0, 0x16549778, 0xAE51E220, 0xC652CEE8,
+ 0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, 0x307A46E3, 0x58796A2B, 0xE07C1F73, 0x887F33BB,
+ 0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC, 0x508ECB25, 0x388DE7ED, 0x808892B5, 0xE88BBE7D,
+ 0xBB43F3A7, 0xD340DF6F, 0x6B45AA37, 0x034686FF, 0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E,
+ 0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, 0xCCD53183, 0xA4D61D4B, 0x1CD36813, 0x74D044DB,
+ 0x27180901, 0x4F1B25C9, 0xF71E5091, 0x9F1D7C59, 0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988,
+ 0xC8358D49, 0xA036A181, 0x1833D4D9, 0x7030F811, 0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0,
+ 0x8618701A, 0xEE1B5CD2, 0x561E298A, 0x3E1D0542, 0x23F8B5CB, 0x4BFB9903, 0xF3FEEC5B, 0x9BFDC093,
+ 0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7, 0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766,
+ 0x1A438ABC, 0x7240A674, 0xCA45D32C, 0xA246FFE4, 0xBFA34F6D, 0xD7A063A5, 0x6FA516FD, 0x07A63A35,
+ 0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, 0x2A39CC5F, 0x423AE097, 0xFA3F95CF, 0x923CB907,
+ 0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185, 0x6414310C, 0x0C171DC4, 0xB412689C, 0xDC114454,
+ 0x1382F328, 0x7B81DFE0, 0xC384AAB8, 0xAB878670, 0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1,
+ 0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, 0xF84FCBAA, 0x904CE762, 0x2849923A, 0x404ABEF2,
+ 0xB2828A33, 0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B, 0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA,
+ 0xFCAF7760, 0x94AC5BA8, 0x2CA92EF0, 0x44AA0238, 0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9,
+ 0x2ED97095, 0x46DA5C5D, 0xFEDF2905, 0x96DC05CD, 0x8B39B544, 0xE33A998C, 0x5B3FECD4, 0x333CC01C,
+ 0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E, 0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o80
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically
+ * using the following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41
+ * Generator Polynomial Length = .......... 32 bits
+ * Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits
+ * Number of Slices = ..................... 8 slices
+ * Slice Lengths = ........................ 8 8 8 8 8 8 8 8
+ * Directory Name = ....................... .\
+ * File Name = ............................ 8x256_tables.c
+ */
+
+static
+u32 crc_tableil8_o88[256] =
+{
+ 0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, 0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504,
+ 0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3, 0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE,
+ 0x847609B4, 0xCD4A7493, 0x160EF3FA, 0x5F328EDD, 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0,
+ 0xC64D0D6E, 0x8F717049, 0x5435F720, 0x1D098A07, 0xE7508F03, 0xAE6CF224, 0x7528754D, 0x3C14086A,
+ 0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0, 0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D,
+ 0x4F3B6143, 0x06071C64, 0xDD439B0D, 0x947FE62A, 0x6E26E32E, 0x271A9E09, 0xFC5E1960, 0xB5626447,
+ 0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, 0xA86BEE40, 0xE1579367, 0x3A13140E, 0x732F6929,
+ 0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E, 0xEA50EA9A, 0xA36C97BD, 0x782810D4, 0x31146DF3,
+ 0x1A00CB32, 0x533CB615, 0x8878317C, 0xC1444C5B, 0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36,
+ 0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, 0x79264D85, 0x301A30A2, 0xEB5EB7CB, 0xA262CAEC,
+ 0x9E76C286, 0xD74ABFA1, 0x0C0E38C8, 0x453245EF, 0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782,
+ 0xDC4DC65C, 0x9571BB7B, 0x4E353C12, 0x07094135, 0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358,
+ 0x1700AEAB, 0x5E3CD38C, 0x857854E5, 0xCC4429C2, 0x361D2CC6, 0x7F2151E1, 0xA465D688, 0xED59ABAF,
+ 0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18, 0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75,
+ 0x9376A71F, 0xDA4ADA38, 0x010E5D51, 0x48322076, 0xB26B2572, 0xFB575855, 0x2013DF3C, 0x692FA21B,
+ 0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, 0xF05021A8, 0xB96C5C8F, 0x6228DBE6, 0x2B14A6C1,
+ 0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D, 0x151C1409, 0x5C20692E, 0x8764EE47, 0xCE589360,
+ 0x763A92BE, 0x3F06EF99, 0xE44268F0, 0xAD7E15D7, 0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA,
+ 0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, 0x916A1DBD, 0xD856609A, 0x0312E7F3, 0x4A2E9AD4,
+ 0xF24C9B0A, 0xBB70E62D, 0x60346144, 0x29081C63, 0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E,
+ 0x3901F3FD, 0x703D8EDA, 0xAB7909B3, 0xE2457494, 0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9,
+ 0x7B3AF727, 0x32068A00, 0xE9420D69, 0xA07E704E, 0x5A27754A, 0x131B086D, 0xC85F8F04, 0x8163F223,
+ 0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20, 0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D,
+ 0xFF4CFE93, 0xB67083B4, 0x6D3404DD, 0x240879FA, 0xDE517CFE, 0x976D01D9, 0x4C2986B0, 0x0515FB97,
+ 0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, 0x0F1CDF3B, 0x4620A21C, 0x9D642575, 0xD4585852,
+ 0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5, 0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF, 0x96635C88,
+ 0xAA7754E2, 0xE34B29C5, 0x380FAEAC, 0x7133D38B, 0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6,
+ 0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, 0xC951D255, 0x806DAF72, 0x5B29281B, 0x1215553C,
+ 0x230138CF, 0x6A3D45E8, 0xB179C281, 0xF845BFA6, 0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB,
+ 0x613A3C15, 0x28064132, 0xF342C65B, 0xBA7EBB7C, 0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911,
+ 0xA777317B, 0xEE4B4C5C, 0x350FCB35, 0x7C33B612, 0x866AB316, 0xCF56CE31, 0x14124958, 0x5D2E347F,
+ 0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8, 0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o88
+ */
+
+//#define VERIFY_ASSERTION
+
+#ifdef VERIFY_ASSERTION
+
+// Trivial byte-by-byte version: you can switch on the assertion in the
+// Crc32_ComputeBuf function (by defining VERIFY_ASSERTION) to check this
+// against the slicing variant.
+static really_inline
+u32 crc32c(u32 running_crc, const unsigned char* p_buf, size_t length) {
+ u32 crc = running_crc;
+ while (length--) {
+ crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
+ }
+ return crc;
+}
+
+#endif // VERIFY_ASSERTION
+
+// Slicing-by-8 approach, which is much faster. Derived from Intel's
+// BSD-licensed code, with additions to handled aligned case automatically.
+static really_inline
+u32 crc32c_sb8_64_bit(u32 running_crc, const unsigned char* p_buf,
+ const size_t length) {
+ u32 crc = running_crc;
+
+ // Process byte-by-byte until p_buf is aligned
+
+ const unsigned char *aligned_buf = ROUNDUP_PTR(p_buf, 4);
+ size_t init_bytes = aligned_buf - p_buf;
+ size_t running_length = ((length - init_bytes)/8)*8;
+ size_t end_bytes = length - init_bytes - running_length;
+
+ while (p_buf < aligned_buf) {
+ crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
+ }
+
+ // Main aligned loop, processes eight bytes at a time.
+
+ u32 term1, term2;
+ for (size_t li = 0; li < running_length/8; li++) {
+ u32 block = *(const u32 *)p_buf;
+ crc ^= block;
+ p_buf += 4;
+ term1 = crc_tableil8_o88[crc & 0x000000FF] ^
+ crc_tableil8_o80[(crc >> 8) & 0x000000FF];
+ term2 = crc >> 16;
+ crc = term1 ^
+ crc_tableil8_o72[term2 & 0x000000FF] ^
+ crc_tableil8_o64[(term2 >> 8) & 0x000000FF];
+
+
+ block = *(const u32 *)p_buf;
+
+ term1 = crc_tableil8_o56[block & 0x000000FF] ^
+ crc_tableil8_o48[(block >> 8) & 0x000000FF];
+
+ term2 = block >> 16;
+ crc = crc ^
+ term1 ^
+ crc_tableil8_o40[term2 & 0x000000FF] ^
+ crc_tableil8_o32[(term2 >> 8) & 0x000000FF];
+ p_buf += 4;
+ }
+
+ // Remaining bytes
+
+ for(size_t li = 0; li < end_bytes; li++) {
+ crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
+ }
+
+ return crc;
+}
+
#else // HAVE_SSE42
-
-#ifdef ARCH_64_BIT
-#define CRC_WORD 8
-#define CRC_TYPE u64a
-#define CRC_FUNC _mm_crc32_u64
-#else
-#define CRC_WORD 4
-#define CRC_TYPE u32
-#define CRC_FUNC _mm_crc32_u32
-#endif
-
-/*
- * Use the crc32 instruction from SSE4.2 to compute our checksum - same
- * polynomial as the above function.
- */
-static really_inline
-u32 crc32c_sse42(u32 running_crc, const unsigned char* p_buf,
- const size_t length) {
- u32 crc = running_crc;
-
- // Process byte-by-byte until p_buf is aligned
-
- const unsigned char *aligned_buf = ROUNDUP_PTR(p_buf, CRC_WORD);
- size_t init_bytes = aligned_buf - p_buf;
- size_t running_length = ((length - init_bytes)/CRC_WORD)*CRC_WORD;
- size_t end_bytes = length - init_bytes - running_length;
-
- while (p_buf < aligned_buf) {
- crc = _mm_crc32_u8(crc, *p_buf++);
- }
-
- // Main aligned loop, processes a word at a time.
-
- for (size_t li = 0; li < running_length/CRC_WORD; li++) {
- CRC_TYPE block = *(const CRC_TYPE *)p_buf;
- crc = CRC_FUNC(crc, block);
- p_buf += CRC_WORD;
- }
-
- // Remaining bytes
-
- for(size_t li = 0; li < end_bytes; li++) {
- crc = _mm_crc32_u8(crc, *p_buf++);
- }
-
- return crc;
-}
-#endif
-
-#ifdef VERIFY_ASSERTION
-#include <assert.h>
-#endif
-
-// Externally visible function
-u32 Crc32c_ComputeBuf(u32 inCrc32, const void *buf, size_t bufLen) {
+
+#ifdef ARCH_64_BIT
+#define CRC_WORD 8
+#define CRC_TYPE u64a
+#define CRC_FUNC _mm_crc32_u64
+#else
+#define CRC_WORD 4
+#define CRC_TYPE u32
+#define CRC_FUNC _mm_crc32_u32
+#endif
+
+/*
+ * Use the crc32 instruction from SSE4.2 to compute our checksum - same
+ * polynomial as the above function.
+ */
+static really_inline
+u32 crc32c_sse42(u32 running_crc, const unsigned char* p_buf,
+ const size_t length) {
+ u32 crc = running_crc;
+
+ // Process byte-by-byte until p_buf is aligned
+
+ const unsigned char *aligned_buf = ROUNDUP_PTR(p_buf, CRC_WORD);
+ size_t init_bytes = aligned_buf - p_buf;
+ size_t running_length = ((length - init_bytes)/CRC_WORD)*CRC_WORD;
+ size_t end_bytes = length - init_bytes - running_length;
+
+ while (p_buf < aligned_buf) {
+ crc = _mm_crc32_u8(crc, *p_buf++);
+ }
+
+ // Main aligned loop, processes a word at a time.
+
+ for (size_t li = 0; li < running_length/CRC_WORD; li++) {
+ CRC_TYPE block = *(const CRC_TYPE *)p_buf;
+ crc = CRC_FUNC(crc, block);
+ p_buf += CRC_WORD;
+ }
+
+ // Remaining bytes
+
+ for(size_t li = 0; li < end_bytes; li++) {
+ crc = _mm_crc32_u8(crc, *p_buf++);
+ }
+
+ return crc;
+}
+#endif
+
+#ifdef VERIFY_ASSERTION
+#include <assert.h>
+#endif
+
+// Externally visible function
+u32 Crc32c_ComputeBuf(u32 inCrc32, const void *buf, size_t bufLen) {
#if defined(HAVE_SSE42)
- u32 crc = crc32c_sse42(inCrc32, (const unsigned char *)buf, bufLen);
-#else
- u32 crc = crc32c_sb8_64_bit(inCrc32, (const unsigned char *)buf, bufLen);
-#endif
-
-#ifdef VERIFY_ASSERTION
- assert(crc == crc32c(inCrc32, (const unsigned char *)buf, bufLen));
-#endif
-
- return crc;
-}
+ u32 crc = crc32c_sse42(inCrc32, (const unsigned char *)buf, bufLen);
+#else
+ u32 crc = crc32c_sb8_64_bit(inCrc32, (const unsigned char *)buf, bufLen);
+#endif
+
+#ifdef VERIFY_ASSERTION
+ assert(crc == crc32c(inCrc32, (const unsigned char *)buf, bufLen));
+#endif
+
+ return crc;
+}
diff --git a/contrib/libs/hyperscan/src/crc32.h b/contrib/libs/hyperscan/src/crc32.h
index bbfa23284d..7e9e1cecb8 100644
--- a/contrib/libs/hyperscan/src/crc32.h
+++ b/contrib/libs/hyperscan/src/crc32.h
@@ -1,46 +1,46 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef CRC32_H_36A5015B5840C1
-#define CRC32_H_36A5015B5840C1
-
-#include "ue2common.h"
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-u32 Crc32c_ComputeBuf(u32 inCrc32, const void *buf, size_t bufLen);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* CRC32_H_36A5015B5840C1 */
-
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CRC32_H_36A5015B5840C1
+#define CRC32_H_36A5015B5840C1
+
+#include "ue2common.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+u32 Crc32c_ComputeBuf(u32 inCrc32, const void *buf, size_t bufLen);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CRC32_H_36A5015B5840C1 */
+
diff --git a/contrib/libs/hyperscan/src/database.c b/contrib/libs/hyperscan/src/database.c
index b2838bc9a5..6adf1419dd 100644
--- a/contrib/libs/hyperscan/src/database.c
+++ b/contrib/libs/hyperscan/src/database.c
@@ -1,462 +1,462 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Runtime code for hs_database manipulation.
- */
-
-#include <stdio.h>
-#include <string.h>
-
-#include "allocator.h"
-#include "hs_common.h"
-#include "hs_internal.h"
-#include "hs_version.h"
-#include "ue2common.h"
-#include "database.h"
-#include "crc32.h"
-#include "rose/rose_internal.h"
-#include "util/unaligned.h"
-
-static really_inline
-int db_correctly_aligned(const void *db) {
- return ISALIGNED_N(db, alignof(unsigned long long));
-}
-
-HS_PUBLIC_API
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Runtime code for hs_database manipulation.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "allocator.h"
+#include "hs_common.h"
+#include "hs_internal.h"
+#include "hs_version.h"
+#include "ue2common.h"
+#include "database.h"
+#include "crc32.h"
+#include "rose/rose_internal.h"
+#include "util/unaligned.h"
+
+static really_inline
+int db_correctly_aligned(const void *db) {
+ return ISALIGNED_N(db, alignof(unsigned long long));
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_free_database(hs_database_t *db) {
- if (db && db->magic != HS_DB_MAGIC) {
- return HS_INVALID;
- }
- hs_database_free(db);
-
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+ if (db && db->magic != HS_DB_MAGIC) {
+ return HS_INVALID;
+ }
+ hs_database_free(db);
+
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_serialize_database(const hs_database_t *db, char **bytes,
size_t *serialized_length) {
- if (!db || !bytes || !serialized_length) {
- return HS_INVALID;
- }
-
- if (!db_correctly_aligned(db)) {
- return HS_BAD_ALIGN;
- }
-
- hs_error_t ret = validDatabase(db);
- if (ret != HS_SUCCESS) {
- return ret;
- }
-
- size_t length = sizeof(struct hs_database) + db->length;
-
- char *out = hs_misc_alloc(length);
- ret = hs_check_alloc(out);
- if (ret != HS_SUCCESS) {
- hs_misc_free(out);
- return ret;
- }
-
- memset(out, 0, length);
-
- u32 *buf = (u32 *)out;
- *buf = db->magic;
- buf++;
- *buf = db->version;
- buf++;
- *buf = db->length;
- buf++;
- memcpy(buf, &db->platform, sizeof(u64a));
- buf += 2;
- *buf = db->crc32;
- buf++;
- *buf = db->reserved0;
- buf++;
- *buf = db->reserved1;
- buf++;
-
- const char *bytecode = hs_get_bytecode(db);
- memcpy(buf, bytecode, db->length);
-
- *bytes = out;
- *serialized_length = length;
- return HS_SUCCESS;
-}
-
-// check that the database header's platform is compatible with the current
-// runtime platform.
-static
-hs_error_t db_check_platform(const u64a p) {
- if (p != hs_current_platform
+ if (!db || !bytes || !serialized_length) {
+ return HS_INVALID;
+ }
+
+ if (!db_correctly_aligned(db)) {
+ return HS_BAD_ALIGN;
+ }
+
+ hs_error_t ret = validDatabase(db);
+ if (ret != HS_SUCCESS) {
+ return ret;
+ }
+
+ size_t length = sizeof(struct hs_database) + db->length;
+
+ char *out = hs_misc_alloc(length);
+ ret = hs_check_alloc(out);
+ if (ret != HS_SUCCESS) {
+ hs_misc_free(out);
+ return ret;
+ }
+
+ memset(out, 0, length);
+
+ u32 *buf = (u32 *)out;
+ *buf = db->magic;
+ buf++;
+ *buf = db->version;
+ buf++;
+ *buf = db->length;
+ buf++;
+ memcpy(buf, &db->platform, sizeof(u64a));
+ buf += 2;
+ *buf = db->crc32;
+ buf++;
+ *buf = db->reserved0;
+ buf++;
+ *buf = db->reserved1;
+ buf++;
+
+ const char *bytecode = hs_get_bytecode(db);
+ memcpy(buf, bytecode, db->length);
+
+ *bytes = out;
+ *serialized_length = length;
+ return HS_SUCCESS;
+}
+
+// check that the database header's platform is compatible with the current
+// runtime platform.
+static
+hs_error_t db_check_platform(const u64a p) {
+ if (p != hs_current_platform
&& p != (hs_current_platform | hs_current_platform_no_avx2)
&& p != (hs_current_platform | hs_current_platform_no_avx512)
&& p != (hs_current_platform | hs_current_platform_no_avx512vbmi)) {
- return HS_DB_PLATFORM_ERROR;
- }
- // passed all checks
- return HS_SUCCESS;
-}
-
-// Decode and check the database header, returning appropriate errors or
-// HS_SUCCESS if it's OK. The header should be allocated on the stack
-// and later copied into the deserialized database.
-static
-hs_error_t db_decode_header(const char **bytes, const size_t length,
- struct hs_database *header) {
- if (!*bytes) {
- return HS_INVALID;
- }
-
- if (length < sizeof(struct hs_database)) {
- return HS_INVALID;
- }
-
- // There's no requirement, really, that the serialized stream of bytes
- // we've been given is 4-byte aligned, so we use unaligned loads here.
-
- const u32 *buf = (const u32 *)*bytes;
-
- // Zero header so that none of it (e.g. its padding) is uninitialized.
- memset(header, 0, sizeof(struct hs_database));
-
- header->magic = unaligned_load_u32(buf++);
- if (header->magic != HS_DB_MAGIC) {
- return HS_INVALID;
- }
-
- header->version = unaligned_load_u32(buf++);
- if (header->version != HS_DB_VERSION) {
- return HS_DB_VERSION_ERROR;
- }
-
- header->length = unaligned_load_u32(buf++);
- if (length != sizeof(struct hs_database) + header->length) {
- DEBUG_PRINTF("bad length %zu, expecting %zu\n", length,
- sizeof(struct hs_database) + header->length);
- return HS_INVALID;
- }
-
- header->platform = unaligned_load_u64a(buf);
- buf += 2;
- header->crc32 = unaligned_load_u32(buf++);
- header->reserved0 = unaligned_load_u32(buf++);
- header->reserved1 = unaligned_load_u32(buf++);
-
- *bytes = (const char *)buf;
-
- return HS_SUCCESS; // Header checks out
-}
-
-// Check the CRC on a database
-static
-hs_error_t db_check_crc(const hs_database_t *db) {
- const char *bytecode = hs_get_bytecode(db);
- u32 crc = Crc32c_ComputeBuf(0, bytecode, db->length);
- if (crc != db->crc32) {
- DEBUG_PRINTF("crc mismatch! 0x%x != 0x%x\n", crc, db->crc32);
- return HS_INVALID;
- }
- return HS_SUCCESS;
-}
-
-static
-void db_copy_bytecode(const char *serialized, hs_database_t *db) {
- // we need to align things manually
- uintptr_t shift = (uintptr_t)db->bytes & 0x3f;
- db->bytecode = offsetof(struct hs_database, bytes) - shift;
- char *bytecode = (char *)db + db->bytecode;
-
- // Copy the bytecode into place
- memcpy(bytecode, serialized, db->length);
-}
-
-HS_PUBLIC_API
+ return HS_DB_PLATFORM_ERROR;
+ }
+ // passed all checks
+ return HS_SUCCESS;
+}
+
+// Decode and check the database header, returning appropriate errors or
+// HS_SUCCESS if it's OK. The header should be allocated on the stack
+// and later copied into the deserialized database.
+static
+hs_error_t db_decode_header(const char **bytes, const size_t length,
+ struct hs_database *header) {
+ if (!*bytes) {
+ return HS_INVALID;
+ }
+
+ if (length < sizeof(struct hs_database)) {
+ return HS_INVALID;
+ }
+
+ // There's no requirement, really, that the serialized stream of bytes
+ // we've been given is 4-byte aligned, so we use unaligned loads here.
+
+ const u32 *buf = (const u32 *)*bytes;
+
+ // Zero header so that none of it (e.g. its padding) is uninitialized.
+ memset(header, 0, sizeof(struct hs_database));
+
+ header->magic = unaligned_load_u32(buf++);
+ if (header->magic != HS_DB_MAGIC) {
+ return HS_INVALID;
+ }
+
+ header->version = unaligned_load_u32(buf++);
+ if (header->version != HS_DB_VERSION) {
+ return HS_DB_VERSION_ERROR;
+ }
+
+ header->length = unaligned_load_u32(buf++);
+ if (length != sizeof(struct hs_database) + header->length) {
+ DEBUG_PRINTF("bad length %zu, expecting %zu\n", length,
+ sizeof(struct hs_database) + header->length);
+ return HS_INVALID;
+ }
+
+ header->platform = unaligned_load_u64a(buf);
+ buf += 2;
+ header->crc32 = unaligned_load_u32(buf++);
+ header->reserved0 = unaligned_load_u32(buf++);
+ header->reserved1 = unaligned_load_u32(buf++);
+
+ *bytes = (const char *)buf;
+
+ return HS_SUCCESS; // Header checks out
+}
+
+// Check the CRC on a database
+static
+hs_error_t db_check_crc(const hs_database_t *db) {
+ const char *bytecode = hs_get_bytecode(db);
+ u32 crc = Crc32c_ComputeBuf(0, bytecode, db->length);
+ if (crc != db->crc32) {
+ DEBUG_PRINTF("crc mismatch! 0x%x != 0x%x\n", crc, db->crc32);
+ return HS_INVALID;
+ }
+ return HS_SUCCESS;
+}
+
+static
+void db_copy_bytecode(const char *serialized, hs_database_t *db) {
+ // we need to align things manually
+ uintptr_t shift = (uintptr_t)db->bytes & 0x3f;
+ db->bytecode = offsetof(struct hs_database, bytes) - shift;
+ char *bytecode = (char *)db + db->bytecode;
+
+ // Copy the bytecode into place
+ memcpy(bytecode, serialized, db->length);
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_deserialize_database_at(const char *bytes,
const size_t length,
hs_database_t *db) {
- if (!bytes || !db) {
- return HS_INVALID;
- }
-
- // We require the user to deserialize into an 8-byte aligned region.
- if (!ISALIGNED_N(db, 8)) {
- return HS_BAD_ALIGN;
- }
-
- // Decode the header
- hs_database_t header;
- hs_error_t ret = db_decode_header(&bytes, length, &header);
- if (ret != HS_SUCCESS) {
- return ret;
- }
-
- // Make sure the serialized database is for our platform
- ret = db_check_platform(header.platform);
- if (ret != HS_SUCCESS) {
- return ret;
- }
-
- // Zero new space for safety
- size_t dblength = sizeof(struct hs_database) + header.length;
- memset(db, 0, dblength);
-
- // Copy the decoded header into place
- memcpy(db, &header, sizeof(header));
-
- // Copy the bytecode into the correctly-aligned location, set offsets
- db_copy_bytecode(bytes, db);
-
- if (db_check_crc(db) != HS_SUCCESS) {
- return HS_INVALID;
- }
-
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+ if (!bytes || !db) {
+ return HS_INVALID;
+ }
+
+ // We require the user to deserialize into an 8-byte aligned region.
+ if (!ISALIGNED_N(db, 8)) {
+ return HS_BAD_ALIGN;
+ }
+
+ // Decode the header
+ hs_database_t header;
+ hs_error_t ret = db_decode_header(&bytes, length, &header);
+ if (ret != HS_SUCCESS) {
+ return ret;
+ }
+
+ // Make sure the serialized database is for our platform
+ ret = db_check_platform(header.platform);
+ if (ret != HS_SUCCESS) {
+ return ret;
+ }
+
+ // Zero new space for safety
+ size_t dblength = sizeof(struct hs_database) + header.length;
+ memset(db, 0, dblength);
+
+ // Copy the decoded header into place
+ memcpy(db, &header, sizeof(header));
+
+ // Copy the bytecode into the correctly-aligned location, set offsets
+ db_copy_bytecode(bytes, db);
+
+ if (db_check_crc(db) != HS_SUCCESS) {
+ return HS_INVALID;
+ }
+
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_deserialize_database(const char *bytes,
const size_t length,
hs_database_t **db) {
- if (!bytes || !db) {
- return HS_INVALID;
- }
-
- *db = NULL;
-
- // Decode and check the header
- hs_database_t header;
- hs_error_t ret = db_decode_header(&bytes, length, &header);
- if (ret != HS_SUCCESS) {
- return ret;
- }
-
- // Make sure the serialized database is for our platform
- ret = db_check_platform(header.platform);
- if (ret != HS_SUCCESS) {
- return ret;
- }
-
- // Allocate space for new database
- size_t dblength = sizeof(struct hs_database) + header.length;
- struct hs_database *tempdb = hs_database_alloc(dblength);
- ret = hs_check_alloc(tempdb);
- if (ret != HS_SUCCESS) {
- hs_database_free(tempdb);
- return ret;
- }
-
- // Zero new space for safety
- memset(tempdb, 0, dblength);
-
- // Copy the decoded header into place
- memcpy(tempdb, &header, sizeof(header));
-
- // Copy the bytecode into the correctly-aligned location, set offsets
- db_copy_bytecode(bytes, tempdb);
-
- if (db_check_crc(tempdb) != HS_SUCCESS) {
- hs_database_free(tempdb);
- return HS_INVALID;
- }
-
- *db = tempdb;
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+ if (!bytes || !db) {
+ return HS_INVALID;
+ }
+
+ *db = NULL;
+
+ // Decode and check the header
+ hs_database_t header;
+ hs_error_t ret = db_decode_header(&bytes, length, &header);
+ if (ret != HS_SUCCESS) {
+ return ret;
+ }
+
+ // Make sure the serialized database is for our platform
+ ret = db_check_platform(header.platform);
+ if (ret != HS_SUCCESS) {
+ return ret;
+ }
+
+ // Allocate space for new database
+ size_t dblength = sizeof(struct hs_database) + header.length;
+ struct hs_database *tempdb = hs_database_alloc(dblength);
+ ret = hs_check_alloc(tempdb);
+ if (ret != HS_SUCCESS) {
+ hs_database_free(tempdb);
+ return ret;
+ }
+
+ // Zero new space for safety
+ memset(tempdb, 0, dblength);
+
+ // Copy the decoded header into place
+ memcpy(tempdb, &header, sizeof(header));
+
+ // Copy the bytecode into the correctly-aligned location, set offsets
+ db_copy_bytecode(bytes, tempdb);
+
+ if (db_check_crc(tempdb) != HS_SUCCESS) {
+ hs_database_free(tempdb);
+ return HS_INVALID;
+ }
+
+ *db = tempdb;
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_database_size(const hs_database_t *db, size_t *size) {
- if (!size) {
- return HS_INVALID;
- }
-
- hs_error_t ret = validDatabase(db);
- if (unlikely(ret != HS_SUCCESS)) {
- return ret;
- }
-
- *size = sizeof(struct hs_database) + db->length;
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+ if (!size) {
+ return HS_INVALID;
+ }
+
+ hs_error_t ret = validDatabase(db);
+ if (unlikely(ret != HS_SUCCESS)) {
+ return ret;
+ }
+
+ *size = sizeof(struct hs_database) + db->length;
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_serialized_database_size(const char *bytes,
const size_t length,
size_t *size) {
- // Decode and check the header
- hs_database_t header;
- hs_error_t ret = db_decode_header(&bytes, length, &header);
- if (ret != HS_SUCCESS) {
- return ret;
- }
-
- if (!size) {
- return HS_INVALID;
- }
-
- *size = sizeof(struct hs_database) + header.length;
- return HS_SUCCESS;
-}
-
-hs_error_t dbIsValid(const hs_database_t *db) {
- if (db->magic != HS_DB_MAGIC) {
- DEBUG_PRINTF("bad magic\n");
- return HS_INVALID;
- }
-
- if (db->version != HS_DB_VERSION) {
- DEBUG_PRINTF("bad version\n");
- return HS_DB_VERSION_ERROR;
- }
-
- if (db_check_platform(db->platform) != HS_SUCCESS) {
- DEBUG_PRINTF("bad platform\n");
- return HS_DB_PLATFORM_ERROR;
- }
-
- if (!ISALIGNED_16(hs_get_bytecode(db))) {
- DEBUG_PRINTF("bad alignment\n");
- return HS_INVALID;
- }
-
- hs_error_t rv = db_check_crc(db);
- if (rv != HS_SUCCESS) {
- DEBUG_PRINTF("bad crc\n");
- return rv;
- }
-
- return HS_SUCCESS;
-}
-
-#if defined(_WIN32)
-#define SNPRINTF_COMPAT _snprintf
-#else
-#define SNPRINTF_COMPAT snprintf
-#endif
-
-/** Allocate a buffer and prints the database info into it. Returns an
- * appropriate error code on failure, or HS_SUCCESS on success. */
-static
-hs_error_t print_database_string(char **s, u32 version, const platform_t plat,
- u32 raw_mode) {
- assert(s);
- *s = NULL;
-
- u8 release = (version >> 8) & 0xff;
- u8 minor = (version >> 16) & 0xff;
- u8 major = (version >> 24) & 0xff;
-
+ // Decode and check the header
+ hs_database_t header;
+ hs_error_t ret = db_decode_header(&bytes, length, &header);
+ if (ret != HS_SUCCESS) {
+ return ret;
+ }
+
+ if (!size) {
+ return HS_INVALID;
+ }
+
+ *size = sizeof(struct hs_database) + header.length;
+ return HS_SUCCESS;
+}
+
+hs_error_t dbIsValid(const hs_database_t *db) {
+ if (db->magic != HS_DB_MAGIC) {
+ DEBUG_PRINTF("bad magic\n");
+ return HS_INVALID;
+ }
+
+ if (db->version != HS_DB_VERSION) {
+ DEBUG_PRINTF("bad version\n");
+ return HS_DB_VERSION_ERROR;
+ }
+
+ if (db_check_platform(db->platform) != HS_SUCCESS) {
+ DEBUG_PRINTF("bad platform\n");
+ return HS_DB_PLATFORM_ERROR;
+ }
+
+ if (!ISALIGNED_16(hs_get_bytecode(db))) {
+ DEBUG_PRINTF("bad alignment\n");
+ return HS_INVALID;
+ }
+
+ hs_error_t rv = db_check_crc(db);
+ if (rv != HS_SUCCESS) {
+ DEBUG_PRINTF("bad crc\n");
+ return rv;
+ }
+
+ return HS_SUCCESS;
+}
+
+#if defined(_WIN32)
+#define SNPRINTF_COMPAT _snprintf
+#else
+#define SNPRINTF_COMPAT snprintf
+#endif
+
+/** Allocate a buffer and prints the database info into it. Returns an
+ * appropriate error code on failure, or HS_SUCCESS on success. */
+static
+hs_error_t print_database_string(char **s, u32 version, const platform_t plat,
+ u32 raw_mode) {
+ assert(s);
+ *s = NULL;
+
+ u8 release = (version >> 8) & 0xff;
+ u8 minor = (version >> 16) & 0xff;
+ u8 major = (version >> 24) & 0xff;
+
const char *features = (plat & HS_PLATFORM_NOAVX512VBMI)
? (plat & HS_PLATFORM_NOAVX512)
? (plat & HS_PLATFORM_NOAVX2) ? "" : "AVX2"
: "AVX512"
: "AVX512VBMI";
-
- const char *mode = NULL;
-
- if (raw_mode == HS_MODE_STREAM) {
- mode = "STREAM";
- } else if (raw_mode == HS_MODE_VECTORED) {
- mode = "VECTORED";
- } else {
- assert(raw_mode == HS_MODE_BLOCK);
- mode = "BLOCK";
- }
-
- // Initial allocation size, which should be large enough to print our info.
- // If it isn't, snprintf will tell us and we can resize appropriately.
- size_t len = 256;
-
- while (1) {
- char *buf = hs_misc_alloc(len);
- hs_error_t ret = hs_check_alloc(buf);
- if (ret != HS_SUCCESS) {
- hs_misc_free(buf);
- return ret;
- }
-
- // Note: SNPRINTF_COMPAT is a macro defined above, to cope with systems
- // that don't have snprintf but have a workalike.
- int p_len = SNPRINTF_COMPAT(
- buf, len, "Version: %u.%u.%u Features: %s Mode: %s",
+
+ const char *mode = NULL;
+
+ if (raw_mode == HS_MODE_STREAM) {
+ mode = "STREAM";
+ } else if (raw_mode == HS_MODE_VECTORED) {
+ mode = "VECTORED";
+ } else {
+ assert(raw_mode == HS_MODE_BLOCK);
+ mode = "BLOCK";
+ }
+
+ // Initial allocation size, which should be large enough to print our info.
+ // If it isn't, snprintf will tell us and we can resize appropriately.
+ size_t len = 256;
+
+ while (1) {
+ char *buf = hs_misc_alloc(len);
+ hs_error_t ret = hs_check_alloc(buf);
+ if (ret != HS_SUCCESS) {
+ hs_misc_free(buf);
+ return ret;
+ }
+
+ // Note: SNPRINTF_COMPAT is a macro defined above, to cope with systems
+ // that don't have snprintf but have a workalike.
+ int p_len = SNPRINTF_COMPAT(
+ buf, len, "Version: %u.%u.%u Features: %s Mode: %s",
major, minor, release, features, mode);
- if (p_len < 0) {
- DEBUG_PRINTF("snprintf output error, returned %d\n", p_len);
- hs_misc_free(buf);
- break;
- } else if ((size_t)p_len < len) { // output fit within buffer.
- assert(buf[p_len] == '\0');
- *s = buf;
- return HS_SUCCESS;
- } else { // output didn't fit: resize and reallocate.
- len = (size_t)p_len + 1; // must add one for null terminator.
- hs_misc_free(buf);
- }
- }
-
- return HS_NOMEM;
-}
-
-HS_PUBLIC_API
+ if (p_len < 0) {
+ DEBUG_PRINTF("snprintf output error, returned %d\n", p_len);
+ hs_misc_free(buf);
+ break;
+ } else if ((size_t)p_len < len) { // output fit within buffer.
+ assert(buf[p_len] == '\0');
+ *s = buf;
+ return HS_SUCCESS;
+ } else { // output didn't fit: resize and reallocate.
+ len = (size_t)p_len + 1; // must add one for null terminator.
+ hs_misc_free(buf);
+ }
+ }
+
+ return HS_NOMEM;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_serialized_database_info(const char *bytes,
size_t length, char **info) {
- if (!info) {
- return HS_INVALID;
- }
- *info = NULL;
-
+ if (!info) {
+ return HS_INVALID;
+ }
+ *info = NULL;
+
// Decode and check the header
hs_database_t header;
hs_error_t ret = db_decode_header(&bytes, length, &header);
if (ret != HS_SUCCESS) {
return ret;
- }
-
+ }
+
u32 mode = unaligned_load_u32(bytes + offsetof(struct RoseEngine, mode));
-
+
return print_database_string(info, header.version, header.platform, mode);
-}
-
-HS_PUBLIC_API
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_database_info(const hs_database_t *db, char **info) {
- if (!info) {
- return HS_INVALID;
- }
- *info = NULL;
-
- if (!db || !db_correctly_aligned(db) || db->magic != HS_DB_MAGIC) {
- return HS_INVALID;
- }
-
- platform_t plat;
- plat = db->platform;
-
- const struct RoseEngine *rose = hs_get_bytecode(db);
-
- return print_database_string(info, db->version, plat, rose->mode);
-}
+ if (!info) {
+ return HS_INVALID;
+ }
+ *info = NULL;
+
+ if (!db || !db_correctly_aligned(db) || db->magic != HS_DB_MAGIC) {
+ return HS_INVALID;
+ }
+
+ platform_t plat;
+ plat = db->platform;
+
+ const struct RoseEngine *rose = hs_get_bytecode(db);
+
+ return print_database_string(info, db->version, plat, rose->mode);
+}
diff --git a/contrib/libs/hyperscan/src/database.h b/contrib/libs/hyperscan/src/database.h
index 5a85be2cd1..f122f97be7 100644
--- a/contrib/libs/hyperscan/src/database.h
+++ b/contrib/libs/hyperscan/src/database.h
@@ -1,87 +1,87 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Runtime code for hs_database manipulation.
- */
-
-#ifndef DATABASE_H_D467FD6F343DDE
-#define DATABASE_H_D467FD6F343DDE
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-#include "hs_compile.h" // for HS_MODE_ flags
-#include "hs_version.h"
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Runtime code for hs_database manipulation.
+ */
+
+#ifndef DATABASE_H_D467FD6F343DDE
+#define DATABASE_H_D467FD6F343DDE
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "hs_compile.h" // for HS_MODE_ flags
+#include "hs_version.h"
+#include "ue2common.h"
#include "util/arch.h"
-
-#define HS_DB_VERSION HS_VERSION_32BIT
-#define HS_DB_MAGIC (0xdbdbdbdbU)
-
-// Values in here cannot (easily) change - add new ones!
-
-// CPU type is the low 6 bits (we can't need more than 64, surely!)
-
-#define HS_PLATFORM_INTEL 1
-#define HS_PLATFORM_CPU_MASK 0x3F
-
-#define HS_PLATFORM_NOAVX2 (4<<13)
+
+#define HS_DB_VERSION HS_VERSION_32BIT
+#define HS_DB_MAGIC (0xdbdbdbdbU)
+
+// Values in here cannot (easily) change - add new ones!
+
+// CPU type is the low 6 bits (we can't need more than 64, surely!)
+
+#define HS_PLATFORM_INTEL 1
+#define HS_PLATFORM_CPU_MASK 0x3F
+
+#define HS_PLATFORM_NOAVX2 (4<<13)
#define HS_PLATFORM_NOAVX512 (8<<13)
#define HS_PLATFORM_NOAVX512VBMI (0x10<<13)
-
-/** \brief Platform features bitmask. */
-typedef u64a platform_t;
-
-static UNUSED
-const platform_t hs_current_platform = {
+
+/** \brief Platform features bitmask. */
+typedef u64a platform_t;
+
+static UNUSED
+const platform_t hs_current_platform = {
#if !defined(HAVE_AVX2)
- HS_PLATFORM_NOAVX2 |
-#endif
+ HS_PLATFORM_NOAVX2 |
+#endif
#if !defined(HAVE_AVX512)
HS_PLATFORM_NOAVX512 |
#endif
#if !defined(HAVE_AVX512VBMI)
HS_PLATFORM_NOAVX512VBMI |
#endif
- 0,
-};
-
-static UNUSED
-const platform_t hs_current_platform_no_avx2 = {
- HS_PLATFORM_NOAVX2 |
+ 0,
+};
+
+static UNUSED
+const platform_t hs_current_platform_no_avx2 = {
+ HS_PLATFORM_NOAVX2 |
HS_PLATFORM_NOAVX512 |
HS_PLATFORM_NOAVX512VBMI |
- 0,
-};
-
+ 0,
+};
+
static UNUSED
const platform_t hs_current_platform_no_avx512 = {
HS_PLATFORM_NOAVX512 |
@@ -95,48 +95,48 @@ const platform_t hs_current_platform_no_avx512vbmi = {
0,
};
-/*
- * a header to enclose the actual bytecode - useful for keeping info about the
- * compiled data.
- */
-struct hs_database {
- u32 magic;
- u32 version;
- u32 length;
- u64a platform;
- u32 crc32;
- u32 reserved0;
- u32 reserved1;
- u32 bytecode; // offset relative to db start
- u32 padding[16];
- char bytes[];
-};
-
-static really_inline
-const void *hs_get_bytecode(const struct hs_database *db) {
- return ((const char *)db + db->bytecode);
-}
-
-/**
- * Cheap database sanity checks used in block mode scan calls and streaming
- * mode open calls.
- */
-static really_inline
-hs_error_t validDatabase(const hs_database_t *db) {
- if (!db || db->magic != HS_DB_MAGIC) {
- return HS_INVALID;
- }
- if (db->version != HS_DB_VERSION) {
- return HS_DB_VERSION_ERROR;
- }
-
- return HS_SUCCESS;
-}
-
-hs_error_t dbIsValid(const struct hs_database *db);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* DATABASE_H_D467FD6F343DDE */
+/*
+ * a header to enclose the actual bytecode - useful for keeping info about the
+ * compiled data.
+ */
+struct hs_database {
+ u32 magic;
+ u32 version;
+ u32 length;
+ u64a platform;
+ u32 crc32;
+ u32 reserved0;
+ u32 reserved1;
+ u32 bytecode; // offset relative to db start
+ u32 padding[16];
+ char bytes[];
+};
+
+static really_inline
+const void *hs_get_bytecode(const struct hs_database *db) {
+ return ((const char *)db + db->bytecode);
+}
+
+/**
+ * Cheap database sanity checks used in block mode scan calls and streaming
+ * mode open calls.
+ */
+static really_inline
+hs_error_t validDatabase(const hs_database_t *db) {
+ if (!db || db->magic != HS_DB_MAGIC) {
+ return HS_INVALID;
+ }
+ if (db->version != HS_DB_VERSION) {
+ return HS_DB_VERSION_ERROR;
+ }
+
+ return HS_SUCCESS;
+}
+
+hs_error_t dbIsValid(const struct hs_database *db);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* DATABASE_H_D467FD6F343DDE */
diff --git a/contrib/libs/hyperscan/src/fdr/engine_description.cpp b/contrib/libs/hyperscan/src/fdr/engine_description.cpp
index 147050e081..1890f7c61d 100644
--- a/contrib/libs/hyperscan/src/fdr/engine_description.cpp
+++ b/contrib/libs/hyperscan/src/fdr/engine_description.cpp
@@ -1,49 +1,49 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "engine_description.h"
-#include "hs_compile.h" // for hs_platform_info
-#include "util/target_info.h"
-
-namespace ue2 {
-
-EngineDescription::~EngineDescription() {}
-
-bool EngineDescription::isValidOnTarget(const target_t &target_in) const {
- return target_in.can_run_on_code_built_for(code_target);
-}
-
-target_t targetByArchFeatures(u64a cpu_features) {
- hs_platform_info p;
- p.tune = HS_TUNE_FAMILY_GENERIC;
- p.cpu_features = cpu_features;
-
- return target_t(p);
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "engine_description.h"
+#include "hs_compile.h" // for hs_platform_info
+#include "util/target_info.h"
+
+namespace ue2 {
+
+EngineDescription::~EngineDescription() {}
+
+bool EngineDescription::isValidOnTarget(const target_t &target_in) const {
+ return target_in.can_run_on_code_built_for(code_target);
+}
+
+target_t targetByArchFeatures(u64a cpu_features) {
+ hs_platform_info p;
+ p.tune = HS_TUNE_FAMILY_GENERIC;
+ p.cpu_features = cpu_features;
+
+ return target_t(p);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/fdr/engine_description.h b/contrib/libs/hyperscan/src/fdr/engine_description.h
index 5e410e3b2c..b545e6474e 100644
--- a/contrib/libs/hyperscan/src/fdr/engine_description.h
+++ b/contrib/libs/hyperscan/src/fdr/engine_description.h
@@ -1,61 +1,61 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ENGINE_DESCRIPTION_H
-#define ENGINE_DESCRIPTION_H
-
-#include "ue2common.h"
-#include "util/target_info.h"
-
-namespace ue2 {
-
-class EngineDescription {
- u32 id;
- target_t code_target; // the target that we built this code for
- u32 numBuckets;
-
-public:
- EngineDescription(u32 id_in, const target_t &code_target_in,
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ENGINE_DESCRIPTION_H
+#define ENGINE_DESCRIPTION_H
+
+#include "ue2common.h"
+#include "util/target_info.h"
+
+namespace ue2 {
+
+class EngineDescription {
+ u32 id;
+ target_t code_target; // the target that we built this code for
+ u32 numBuckets;
+
+public:
+ EngineDescription(u32 id_in, const target_t &code_target_in,
u32 numBuckets_in)
: id(id_in), code_target(code_target_in), numBuckets(numBuckets_in) {}
-
- virtual ~EngineDescription();
-
- u32 getID() const { return id; }
- u32 getNumBuckets() const { return numBuckets; }
-
- bool isValidOnTarget(const target_t &target_in) const;
- virtual u32 getDefaultFloodSuffixLength() const = 0;
-};
-
-/** Returns a target given a CPU feature set value. */
-target_t targetByArchFeatures(u64a cpu_features);
-
-} // namespace ue2
-
-#endif
+
+ virtual ~EngineDescription();
+
+ u32 getID() const { return id; }
+ u32 getNumBuckets() const { return numBuckets; }
+
+ bool isValidOnTarget(const target_t &target_in) const;
+ virtual u32 getDefaultFloodSuffixLength() const = 0;
+};
+
+/** Returns a target given a CPU feature set value. */
+target_t targetByArchFeatures(u64a cpu_features);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/fdr/fdr.c b/contrib/libs/hyperscan/src/fdr/fdr.c
index 15f4afd910..d33756d358 100644
--- a/contrib/libs/hyperscan/src/fdr/fdr.c
+++ b/contrib/libs/hyperscan/src/fdr/fdr.c
@@ -1,50 +1,50 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "fdr.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fdr.h"
#include "fdr_confirm.h"
#include "fdr_confirm_runtime.h"
-#include "fdr_internal.h"
+#include "fdr_internal.h"
#include "fdr_loadval.h"
#include "flood_runtime.h"
#include "scratch.h"
#include "teddy.h"
-#include "teddy_internal.h"
+#include "teddy_internal.h"
#include "util/arch.h"
#include "util/simd_utils.h"
#include "util/uniform_ops.h"
-
+
/** \brief number of bytes processed in each iteration */
#define ITER_BYTES 16
-
+
/** \brief total zone buffer size */
#define ZONE_TOTAL_SIZE 64
-
+
/** \brief maximum number of allowed zones */
#define ZONE_MAX 3
@@ -821,61 +821,61 @@ static const FDRFUNCTYPE funcs[] = {
fdr_exec_teddy_msks4_pck,
};
-#define FAKE_HISTORY_SIZE 16
-static const u8 fake_history[FAKE_HISTORY_SIZE];
-
+#define FAKE_HISTORY_SIZE 16
+static const u8 fake_history[FAKE_HISTORY_SIZE];
+
hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
size_t start, HWLMCallback cb,
struct hs_scratch *scratch, hwlm_group_t groups) {
// We guarantee (for safezone construction) that it is safe to read 16
// bytes before the end of the history buffer.
const u8 *hbuf = fake_history + FAKE_HISTORY_SIZE;
-
- const struct FDR_Runtime_Args a = {
- buf,
- len,
+
+ const struct FDR_Runtime_Args a = {
+ buf,
+ len,
hbuf,
- 0,
- start,
- cb,
+ 0,
+ start,
+ cb,
scratch,
- nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
- 0
- };
- if (unlikely(a.start_offset >= a.len)) {
- return HWLM_SUCCESS;
- } else {
- assert(funcs[fdr->engineID]);
+ nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
+ 0
+ };
+ if (unlikely(a.start_offset >= a.len)) {
+ return HWLM_SUCCESS;
+ } else {
+ assert(funcs[fdr->engineID]);
return funcs[fdr->engineID](fdr, &a, groups);
- }
-}
-
-hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
- size_t hlen, const u8 *buf, size_t len,
+ }
+}
+
+hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
+ size_t hlen, const u8 *buf, size_t len,
size_t start, HWLMCallback cb,
struct hs_scratch *scratch,
hwlm_group_t groups) {
- struct FDR_Runtime_Args a = {
- buf,
- len,
- hbuf,
- hlen,
- start,
- cb,
+ struct FDR_Runtime_Args a = {
+ buf,
+ len,
+ hbuf,
+ hlen,
+ start,
+ cb,
scratch,
- nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
+ nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
/* we are guaranteed to always have 16 initialised bytes at the end of
* the history buffer (they may be garbage). */
hbuf ? unaligned_load_u64a(hbuf + hlen - sizeof(u64a)) : (u64a)0
- };
-
- hwlm_error_t ret;
- if (unlikely(a.start_offset >= a.len)) {
- ret = HWLM_SUCCESS;
- } else {
- assert(funcs[fdr->engineID]);
+ };
+
+ hwlm_error_t ret;
+ if (unlikely(a.start_offset >= a.len)) {
+ ret = HWLM_SUCCESS;
+ } else {
+ assert(funcs[fdr->engineID]);
ret = funcs[fdr->engineID](fdr, &a, groups);
- }
-
- return ret;
-}
+ }
+
+ return ret;
+}
diff --git a/contrib/libs/hyperscan/src/fdr/fdr.h b/contrib/libs/hyperscan/src/fdr/fdr.h
index a09c42ed2d..4dcef851d6 100644
--- a/contrib/libs/hyperscan/src/fdr/fdr.h
+++ b/contrib/libs/hyperscan/src/fdr/fdr.h
@@ -1,85 +1,85 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief FDR literal matcher: runtime API.
- */
-
-#ifndef FDR_H
-#define FDR_H
-
-#include "ue2common.h"
-#include "hwlm/hwlm.h"
-
-// C linkage in the API
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct FDR;
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief FDR literal matcher: runtime API.
+ */
+
+#ifndef FDR_H
+#define FDR_H
+
+#include "ue2common.h"
+#include "hwlm/hwlm.h"
+
+// C linkage in the API
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct FDR;
struct hs_scratch;
-
-/**
- * \brief Block-mode scan.
- *
- * \param fdr FDR matcher engine.
- * \param buf Buffer to scan.
- * \param len Length of buffer to scan.
+
+/**
+ * \brief Block-mode scan.
+ *
+ * \param fdr FDR matcher engine.
+ * \param buf Buffer to scan.
+ * \param len Length of buffer to scan.
* \param start First offset in buf at which a match may start.
- * \param cb Callback to call when a match is found.
+ * \param cb Callback to call when a match is found.
* \param scratch Scratch supplied to callback on match.
- * \param groups Initial groups mask.
- */
-hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
+ * \param groups Initial groups mask.
+ */
+hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, struct hs_scratch *scratch,
- hwlm_group_t groups);
-
-/**
- * \brief Streaming-mode scan.
- *
- * \param fdr FDR matcher engine.
- * \param hbuf History buffer.
- * \param hlen Length of history buffer (hbuf).
- * \param buf Buffer to scan.
- * \param len Length of buffer to scan (buf).
+ hwlm_group_t groups);
+
+/**
+ * \brief Streaming-mode scan.
+ *
+ * \param fdr FDR matcher engine.
+ * \param hbuf History buffer.
+ * \param hlen Length of history buffer (hbuf).
+ * \param buf Buffer to scan.
+ * \param len Length of buffer to scan (buf).
* \param start First offset in buf at which a match may start.
- * \param cb Callback to call when a match is found.
+ * \param cb Callback to call when a match is found.
* \param scratch Scratch supplied to callback on match.
- * \param groups Initial groups mask.
- */
-hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
- size_t hlen, const u8 *buf, size_t len,
+ * \param groups Initial groups mask.
+ */
+hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
+ size_t hlen, const u8 *buf, size_t len,
size_t start, HWLMCallback cb,
struct hs_scratch *scratch,
hwlm_group_t groups);
-
-#ifdef __cplusplus
-}
-#endif // __cplusplus
-
-#endif // FDR_H
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+#endif // FDR_H
diff --git a/contrib/libs/hyperscan/src/fdr/fdr_compile.cpp b/contrib/libs/hyperscan/src/fdr/fdr_compile.cpp
index e968990be0..fcfc08638e 100644
--- a/contrib/libs/hyperscan/src/fdr/fdr_compile.cpp
+++ b/contrib/libs/hyperscan/src/fdr/fdr_compile.cpp
@@ -1,156 +1,156 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief FDR literal matcher: build API.
- */
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief FDR literal matcher: build API.
+ */
#include "fdr_compile.h"
-#include "fdr_internal.h"
-#include "fdr_confirm.h"
-#include "fdr_compile_internal.h"
-#include "fdr_engine_description.h"
-#include "teddy_compile.h"
-#include "teddy_engine_description.h"
-#include "grey.h"
-#include "ue2common.h"
+#include "fdr_internal.h"
+#include "fdr_confirm.h"
+#include "fdr_compile_internal.h"
+#include "fdr_engine_description.h"
+#include "teddy_compile.h"
+#include "teddy_engine_description.h"
+#include "grey.h"
+#include "ue2common.h"
#include "hwlm/hwlm_build.h"
-#include "util/compare.h"
+#include "util/compare.h"
#include "util/container.h"
-#include "util/dump_mask.h"
+#include "util/dump_mask.h"
#include "util/make_unique.h"
#include "util/math.h"
#include "util/noncopyable.h"
-#include "util/target_info.h"
-#include "util/ue2string.h"
-#include "util/verify_types.h"
-
-#include <algorithm>
+#include "util/target_info.h"
+#include "util/ue2string.h"
+#include "util/verify_types.h"
+
+#include <algorithm>
#include <array>
-#include <cassert>
-#include <cctype>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
+#include <cassert>
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
#include <limits>
-#include <map>
-#include <memory>
+#include <map>
+#include <memory>
#include <numeric>
-#include <set>
-#include <string>
+#include <set>
+#include <string>
#include <unordered_map>
#include <unordered_set>
-#include <vector>
-
+#include <vector>
+
#include <boost/multi_array.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
-namespace {
-
+
+using namespace std;
+
+namespace ue2 {
+
+namespace {
+
class FDRCompiler : noncopyable {
-private:
- const FDREngineDescription &eng;
+private:
+ const FDREngineDescription &eng;
const Grey &grey;
- vector<u8> tab;
+ vector<u8> tab;
vector<hwlmLiteral> lits;
- map<BucketIndex, std::vector<LiteralIndex> > bucketToLits;
- bool make_small;
-
- u8 *tabIndexToMask(u32 indexInTable);
-#ifdef DEBUG
- void dumpMasks(const u8 *defaultMask);
-#endif
- void setupTab();
+ map<BucketIndex, std::vector<LiteralIndex> > bucketToLits;
+ bool make_small;
+
+ u8 *tabIndexToMask(u32 indexInTable);
+#ifdef DEBUG
+ void dumpMasks(const u8 *defaultMask);
+#endif
+ void setupTab();
bytecode_ptr<FDR> setupFDR();
- void createInitialState(FDR *fdr);
-
-public:
+ void createInitialState(FDR *fdr);
+
+public:
FDRCompiler(vector<hwlmLiteral> lits_in,
map<BucketIndex, std::vector<LiteralIndex>> bucketToLits_in,
const FDREngineDescription &eng_in,
bool make_small_in, const Grey &grey_in)
: eng(eng_in), grey(grey_in), tab(eng_in.getTabSizeBytes()),
lits(move(lits_in)), bucketToLits(move(bucketToLits_in)),
- make_small(make_small_in) {}
-
+ make_small(make_small_in) {}
+
bytecode_ptr<FDR> build();
-};
-
-u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) {
- assert(indexInTable < tab.size());
- return &tab[0] + (indexInTable * (eng.getSchemeWidth() / 8));
-}
-
-static
-void setbit(u8 *msk, u32 bit) {
- msk[bit / 8] |= 1U << (bit % 8);
-}
-
-static
-void clearbit(u8 *msk, u32 bit) {
- msk[bit / 8] &= ~(1U << (bit % 8));
-}
-
-static
-void andMask(u8 *dest, const u8 *a, const u8 *b, u32 num_bytes) {
- for (u32 i = 0; i < num_bytes; i++) {
- dest[i] = a[i] & b[i];
- }
-}
-
-void FDRCompiler::createInitialState(FDR *fdr) {
- u8 *start = (u8 *)&fdr->start;
-
- /* initial state should to be 1 in each slot in the bucket up to bucket
- * minlen - 1, and 0 thereafter */
- for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
- // Find the minimum length for the literals in this bucket.
- const vector<LiteralIndex> &bucket_lits = bucketToLits[b];
- u32 min_len = ~0U;
+};
+
+u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) {
+ assert(indexInTable < tab.size());
+ return &tab[0] + (indexInTable * (eng.getSchemeWidth() / 8));
+}
+
+static
+void setbit(u8 *msk, u32 bit) {
+ msk[bit / 8] |= 1U << (bit % 8);
+}
+
+static
+void clearbit(u8 *msk, u32 bit) {
+ msk[bit / 8] &= ~(1U << (bit % 8));
+}
+
+static
+void andMask(u8 *dest, const u8 *a, const u8 *b, u32 num_bytes) {
+ for (u32 i = 0; i < num_bytes; i++) {
+ dest[i] = a[i] & b[i];
+ }
+}
+
+void FDRCompiler::createInitialState(FDR *fdr) {
+ u8 *start = (u8 *)&fdr->start;
+
+ /* initial state should to be 1 in each slot in the bucket up to bucket
+ * minlen - 1, and 0 thereafter */
+ for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
+ // Find the minimum length for the literals in this bucket.
+ const vector<LiteralIndex> &bucket_lits = bucketToLits[b];
+ u32 min_len = ~0U;
for (const LiteralIndex &lit_idx : bucket_lits) {
min_len = min(min_len, verify_u32(lits[lit_idx].s.length()));
- }
-
- DEBUG_PRINTF("bucket %u has min_len=%u\n", b, min_len);
- assert(min_len);
-
- for (PositionInBucket i = 0; i < eng.getBucketWidth(b); i++) {
- if (i < min_len - 1) {
- setbit(start, eng.getSchemeBit(b, i));
- }
- }
- }
-}
-
+ }
+
+ DEBUG_PRINTF("bucket %u has min_len=%u\n", b, min_len);
+ assert(min_len);
+
+ for (PositionInBucket i = 0; i < eng.getBucketWidth(b); i++) {
+ if (i < min_len - 1) {
+ setbit(start, eng.getSchemeBit(b, i));
+ }
+ }
+ }
+}
+
/**
* \brief Lay out FDR structures in bytecode.
*
@@ -162,57 +162,57 @@ bytecode_ptr<FDR> FDRCompiler::setupFDR() {
auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small);
size_t headerSize = sizeof(FDR);
- size_t tabSize = eng.getTabSizeBytes();
-
+ size_t tabSize = eng.getTabSizeBytes();
+
// Note: we place each major structure here on a cacheline boundary.
size_t size = ROUNDUP_CL(headerSize) + ROUNDUP_CL(tabSize) +
ROUNDUP_CL(confirmTable.size()) + floodTable.size();
-
- DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu "
- "total=%zu\n",
+
+ DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu "
+ "total=%zu\n",
headerSize, tabSize, confirmTable.size(), floodTable.size(),
- size);
-
+ size);
+
auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
- assert(fdr); // otherwise would have thrown std::bad_alloc
-
+ assert(fdr); // otherwise would have thrown std::bad_alloc
+
u8 *fdr_base = (u8 *)fdr.get();
// Write header.
- fdr->size = size;
- fdr->engineID = eng.getID();
- fdr->maxStringLen = verify_u32(maxLen(lits));
+ fdr->size = size;
+ fdr->engineID = eng.getID();
+ fdr->maxStringLen = verify_u32(maxLen(lits));
fdr->numStrings = verify_u32(lits.size());
assert(eng.bits > 8 && eng.bits < 16); // we allow domains 9 to 15 only
fdr->domain = eng.bits;
fdr->domainMask = (1 << eng.bits) - 1;
fdr->tabSize = tabSize;
fdr->stride = eng.stride;
- createInitialState(fdr.get());
-
+ createInitialState(fdr.get());
+
// Write table.
u8 *ptr = fdr_base + ROUNDUP_CL(sizeof(FDR));
assert(ISALIGNED_CL(ptr));
- copy(tab.begin(), tab.end(), ptr);
+ copy(tab.begin(), tab.end(), ptr);
ptr += ROUNDUP_CL(tabSize);
-
+
// Write confirm structures.
assert(ISALIGNED_CL(ptr));
fdr->confOffset = verify_u32(ptr - fdr_base);
memcpy(ptr, confirmTable.get(), confirmTable.size());
ptr += ROUNDUP_CL(confirmTable.size());
-
+
// Write flood control structures.
assert(ISALIGNED_CL(ptr));
- fdr->floodOffset = verify_u32(ptr - fdr_base);
+ fdr->floodOffset = verify_u32(ptr - fdr_base);
memcpy(ptr, floodTable.get(), floodTable.size());
ptr += floodTable.size(); // last write, no need to round up
-
- return fdr;
-}
-
+
+ return fdr;
+}
+
//#define DEBUG_ASSIGNMENT
-
+
/**
* Utility class for computing:
*
@@ -223,14 +223,14 @@ bytecode_ptr<FDR> FDRCompiler::setupFDR() {
*/
class Scorer {
unordered_map<u32, double> count_factor_cache;
-
+
// LUT: pow(count, 1.05) for small values of count.
static const array<double, 100> count_lut;
-
+
double count_factor(u32 count) {
if (count < count_lut.size()) {
return count_lut[count];
- }
+ }
auto it = count_factor_cache.find(count);
if (it != count_factor_cache.end()) {
@@ -239,16 +239,16 @@ class Scorer {
double r = our_pow(count, 1.05);
count_factor_cache.emplace(count, r);
return r;
- }
-
+ }
+
// LUT: pow(len, -3) for len in range [0,8].
static const array<double, 9> len_lut;
-
+
double len_factor(u32 len) {
assert(len <= len_lut.size());
return len_lut[len];
- }
-
+ }
+
public:
double operator()(u32 len, u32 count) {
if (len == 0) {
@@ -257,7 +257,7 @@ public:
return count_factor(count) * len_factor(len);
}
};
-
+
const array<double, 100> Scorer::count_lut{{
pow(0, 1.05), pow(1, 1.05), pow(2, 1.05), pow(3, 1.05), pow(4, 1.05),
pow(5, 1.05), pow(6, 1.05), pow(7, 1.05), pow(8, 1.05), pow(9, 1.05),
@@ -280,11 +280,11 @@ const array<double, 100> Scorer::count_lut{{
pow(90, 1.05), pow(91, 1.05), pow(92, 1.05), pow(93, 1.05), pow(94, 1.05),
pow(95, 1.05), pow(96, 1.05), pow(97, 1.05), pow(98, 1.05), pow(99, 1.05),
}};
-
+
const array<double, 9> Scorer::len_lut{{
0, pow(1, -3.0), pow(2, -3.0), pow(3, -3.0), pow(4, -3.0),
pow(5, -3.0), pow(6, -3.0), pow(7, -3.0), pow(8, -3.0)}};
-
+
/**
* Returns true if the two given literals should be placed in the same chunk as
* they are identical except for a difference in caselessness.
@@ -297,13 +297,13 @@ bool isEquivLit(const hwlmLiteral &a, const hwlmLiteral &b,
if (a_len != b_len) {
return false;
- }
-
+ }
+
bool nocase = last_nocase_lit && a_len == last_nocase_lit->s.size() &&
!cmp(a.s.c_str(), last_nocase_lit->s.c_str(), a_len, true);
return !cmp(a.s.c_str(), b.s.c_str(), a.s.size(), nocase);
}
-
+
struct Chunk {
Chunk(u32 first_id_in, u32 count_in, u32 length_in)
: first_id(first_id_in), count(count_in), length(length_in) {}
@@ -311,12 +311,12 @@ struct Chunk {
u32 count; //!< how many are in this chunk
u32 length; //!< how long things in the chunk are
};
-
+
static
vector<Chunk> assignChunks(const vector<hwlmLiteral> &lits,
const map<u32, u32> &lenCounts) {
const u32 CHUNK_MAX = 512;
- const u32 MAX_CONSIDERED_LENGTH = 16;
+ const u32 MAX_CONSIDERED_LENGTH = 16;
// TODO: detailed early stage literal analysis for v. small cases (actually
// look at lits) yes - after we factor this out and merge in the Teddy
@@ -330,10 +330,10 @@ vector<Chunk> assignChunks(const vector<hwlmLiteral> &lits,
const u32 maxPerChunk = lits.size() /
(CHUNK_MAX - MIN(MAX_CONSIDERED_LENGTH, lenCounts.size())) + 1;
- u32 currentSize = 0;
- u32 chunkStartID = 0;
+ u32 currentSize = 0;
+ u32 chunkStartID = 0;
const hwlmLiteral *last_nocase_lit = nullptr;
-
+
for (u32 i = 0; i < lits.size() && chunks.size() < CHUNK_MAX - 1; i++) {
const auto &lit = lits[i];
@@ -350,38 +350,38 @@ vector<Chunk> assignChunks(const vector<hwlmLiteral> &lits,
if ((currentSize < MAX_CONSIDERED_LENGTH &&
(lit.s.size() != currentSize)) ||
- (currentSize != 1 && ((i - chunkStartID) >= maxPerChunk))) {
+ (currentSize != 1 && ((i - chunkStartID) >= maxPerChunk))) {
currentSize = lit.s.size();
if (!chunks.empty()) {
chunks.back().count = i - chunkStartID;
- }
+ }
chunkStartID = i;
chunks.emplace_back(i, 0, currentSize);
- }
+ }
next_literal:
if (lit.nocase) {
last_nocase_lit = &lit;
}
- }
-
+ }
+
assert(!chunks.empty());
chunks.back().count = lits.size() - chunkStartID;
- // close off chunks with an empty row
+ // close off chunks with an empty row
chunks.emplace_back(lits.size(), 0, 0);
-
-#ifdef DEBUG_ASSIGNMENT
+
+#ifdef DEBUG_ASSIGNMENT
for (size_t j = 0; j < chunks.size(); j++) {
const auto &chunk = chunks[j];
printf("chunk %zu first_id=%u count=%u length=%u\n", j, chunk.first_id,
chunk.count, chunk.length);
- }
-#endif
-
+ }
+#endif
+
DEBUG_PRINTF("built %zu chunks (%zu lits)\n", chunks.size(), lits.size());
assert(chunks.size() <= CHUNK_MAX);
return chunks;
}
-
+
static
map<BucketIndex, vector<LiteralIndex>> assignStringsToBuckets(
vector<hwlmLiteral> &lits,
@@ -431,52 +431,52 @@ map<BucketIndex, vector<LiteralIndex>> assignStringsToBuckets(
Scorer scorer;
for (u32 j = 0; j < numChunks; j++) {
- u32 cnt = 0;
+ u32 cnt = 0;
for (u32 k = j; k < numChunks; ++k) {
cnt += chunks[k].count;
- }
+ }
t[j][0] = {scorer(chunks[j].length, cnt), 0};
- }
-
+ }
+
for (u32 i = 1; i < numBuckets; i++) {
for (u32 j = 0; j < numChunks - 1; j++) { // don't do last, empty row
pair<double, u32> best = {MAX_SCORE, 0};
u32 cnt = chunks[j].count;
for (u32 k = j + 1; k < numChunks - 1; k++) {
auto score = scorer(chunks[j].length, cnt);
- if (score > best.first) {
+ if (score > best.first) {
break; // now worse locally than our best score, give up
- }
- score += t[k][i-1].first;
- if (score < best.first) {
+ }
+ score += t[k][i-1].first;
+ if (score < best.first) {
best = {score, k};
- }
+ }
cnt += chunks[k].count;
- }
- t[j][i] = best;
- }
+ }
+ t[j][i] = best;
+ }
t[numChunks - 1][i] = {0,0}; // fill in empty final row for next iter
- }
-
-#ifdef DEBUG_ASSIGNMENT
+ }
+
+#ifdef DEBUG_ASSIGNMENT
for (u32 j = 0; j < numChunks; j++) {
printf("%03u: ", j);
for (u32 i = 0; i < numBuckets; i++) {
const auto &v = t[j][i];
printf("<%0.3f,%3d> ", v.first, v.second);
- }
- printf("\n");
- }
-#endif
-
+ }
+ printf("\n");
+ }
+#endif
+
// our best score is in t[0][N_BUCKETS-1] and we can follow the links
- // to find where our buckets should start and what goes into them
+ // to find where our buckets should start and what goes into them
vector<vector<LiteralIndex>> buckets;
for (u32 i = 0, n = numBuckets; n && (i != numChunks - 1); n--) {
- u32 j = t[i][n - 1].second;
- if (j == 0) {
+ u32 j = t[i][n - 1].second;
+ if (j == 0) {
j = numChunks - 1;
- }
+ }
// put chunks between i - j into bucket (numBuckets - n).
u32 first_id = chunks[i].first_id;
@@ -495,11 +495,11 @@ map<BucketIndex, vector<LiteralIndex>> assignStringsToBuckets(
// long literals first for included literals checking
for (u32 k = 0; k < cnt; k++) {
litIds.push_back(last_id - k - 1);
- }
+ }
- i = j;
+ i = j;
buckets.push_back(litIds);
- }
+ }
// reverse bucket id, longer literals come first
map<BucketIndex, vector<LiteralIndex>> bucketToLits;
@@ -509,133 +509,133 @@ map<BucketIndex, vector<LiteralIndex>> assignStringsToBuckets(
}
return bucketToLits;
-}
-
-#ifdef DEBUG
-void FDRCompiler::dumpMasks(const u8 *defaultMask) {
- const size_t width = eng.getSchemeWidth();
- printf("default mask: %s\n", dumpMask(defaultMask, width).c_str());
- for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
- u8 *m = tabIndexToMask(i);
- if (memcmp(m, defaultMask, width / 8)) {
- printf("tab %04x: %s\n", i, dumpMask(m, width).c_str());
- }
- }
-}
-#endif
-
-static
-bool getMultiEntriesAtPosition(const FDREngineDescription &eng,
- const vector<LiteralIndex> &vl,
- const vector<hwlmLiteral> &lits,
- SuffixPositionInString pos,
+}
+
+#ifdef DEBUG
+void FDRCompiler::dumpMasks(const u8 *defaultMask) {
+ const size_t width = eng.getSchemeWidth();
+ printf("default mask: %s\n", dumpMask(defaultMask, width).c_str());
+ for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
+ u8 *m = tabIndexToMask(i);
+ if (memcmp(m, defaultMask, width / 8)) {
+ printf("tab %04x: %s\n", i, dumpMask(m, width).c_str());
+ }
+ }
+}
+#endif
+
+static
+bool getMultiEntriesAtPosition(const FDREngineDescription &eng,
+ const vector<LiteralIndex> &vl,
+ const vector<hwlmLiteral> &lits,
+ SuffixPositionInString pos,
map<u32, unordered_set<u32>> &m2) {
- assert(eng.bits < 32);
-
- u32 distance = 0;
- if (eng.bits <= 8) {
- distance = 1;
- } else if (eng.bits <= 16) {
- distance = 2;
- } else {
- distance = 4;
- }
-
+ assert(eng.bits < 32);
+
+ u32 distance = 0;
+ if (eng.bits <= 8) {
+ distance = 1;
+ } else if (eng.bits <= 16) {
+ distance = 2;
+ } else {
+ distance = 4;
+ }
+
for (auto i = vl.begin(), e = vl.end(); i != e; ++i) {
- if (e - i > 5) {
- __builtin_prefetch(&lits[*(i + 5)]);
- }
- const hwlmLiteral &lit = lits[*i];
- const size_t sz = lit.s.size();
- u32 mask = 0;
- u32 dontCares = 0;
- for (u32 cnt = 0; cnt < distance; cnt++) {
- int newPos = pos - cnt;
- u8 dontCareByte = 0x0;
- u8 maskByte = 0x0;
- if (newPos < 0 || ((u32)newPos >= sz)) {
- dontCareByte = 0xff;
- } else {
- u8 c = lit.s[sz - newPos - 1];
- maskByte = c;
- u32 remainder = eng.bits - cnt * 8;
- assert(remainder != 0);
- if (remainder < 8) {
- u8 cmask = (1U << remainder) - 1;
- maskByte &= cmask;
- dontCareByte |= ~cmask;
- }
- if (lit.nocase && ourisalpha(c)) {
- maskByte &= 0xdf;
- dontCareByte |= 0x20;
- }
- }
- u32 loc = cnt * 8;
- mask |= maskByte << loc;
- dontCares |= dontCareByte << loc;
- }
-
- // truncate m and dc down to nBits
- mask &= (1U << eng.bits) - 1;
- dontCares &= (1U << eng.bits) - 1;
- if (dontCares == ((1U << eng.bits) - 1)) {
- return true;
- }
- m2[dontCares].insert(mask);
- }
- return false;
-}
-
-void FDRCompiler::setupTab() {
- const size_t mask_size = eng.getSchemeWidth() / 8;
- assert(mask_size);
-
- vector<u8> defaultMask(mask_size, 0xff);
- for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
- memcpy(tabIndexToMask(i), &defaultMask[0], mask_size);
- }
-
- for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
- const vector<LiteralIndex> &vl = bucketToLits[b];
- SuffixPositionInString pLimit = eng.getBucketWidth(b);
- for (SuffixPositionInString pos = 0; pos < pLimit; pos++) {
- u32 bit = eng.getSchemeBit(b, pos);
+ if (e - i > 5) {
+ __builtin_prefetch(&lits[*(i + 5)]);
+ }
+ const hwlmLiteral &lit = lits[*i];
+ const size_t sz = lit.s.size();
+ u32 mask = 0;
+ u32 dontCares = 0;
+ for (u32 cnt = 0; cnt < distance; cnt++) {
+ int newPos = pos - cnt;
+ u8 dontCareByte = 0x0;
+ u8 maskByte = 0x0;
+ if (newPos < 0 || ((u32)newPos >= sz)) {
+ dontCareByte = 0xff;
+ } else {
+ u8 c = lit.s[sz - newPos - 1];
+ maskByte = c;
+ u32 remainder = eng.bits - cnt * 8;
+ assert(remainder != 0);
+ if (remainder < 8) {
+ u8 cmask = (1U << remainder) - 1;
+ maskByte &= cmask;
+ dontCareByte |= ~cmask;
+ }
+ if (lit.nocase && ourisalpha(c)) {
+ maskByte &= 0xdf;
+ dontCareByte |= 0x20;
+ }
+ }
+ u32 loc = cnt * 8;
+ mask |= maskByte << loc;
+ dontCares |= dontCareByte << loc;
+ }
+
+ // truncate m and dc down to nBits
+ mask &= (1U << eng.bits) - 1;
+ dontCares &= (1U << eng.bits) - 1;
+ if (dontCares == ((1U << eng.bits) - 1)) {
+ return true;
+ }
+ m2[dontCares].insert(mask);
+ }
+ return false;
+}
+
+void FDRCompiler::setupTab() {
+ const size_t mask_size = eng.getSchemeWidth() / 8;
+ assert(mask_size);
+
+ vector<u8> defaultMask(mask_size, 0xff);
+ for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
+ memcpy(tabIndexToMask(i), &defaultMask[0], mask_size);
+ }
+
+ for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
+ const vector<LiteralIndex> &vl = bucketToLits[b];
+ SuffixPositionInString pLimit = eng.getBucketWidth(b);
+ for (SuffixPositionInString pos = 0; pos < pLimit; pos++) {
+ u32 bit = eng.getSchemeBit(b, pos);
map<u32, unordered_set<u32>> m2;
- bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2);
- if (done) {
- clearbit(&defaultMask[0], bit);
- continue;
- }
+ bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2);
+ if (done) {
+ clearbit(&defaultMask[0], bit);
+ continue;
+ }
for (const auto &elem : m2) {
u32 dc = elem.first;
const unordered_set<u32> &mskSet = elem.second;
- u32 v = ~dc;
- do {
- u32 b2 = v & dc;
+ u32 v = ~dc;
+ do {
+ u32 b2 = v & dc;
for (const u32 &mskVal : mskSet) {
u32 val = (mskVal & ~dc) | b2;
- clearbit(tabIndexToMask(val), bit);
- }
- v = (v + (dc & -dc)) | ~dc;
- } while (v != ~dc);
- }
- }
- }
-
- for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
- u8 *m = tabIndexToMask(i);
- andMask(m, m, &defaultMask[0], mask_size);
- }
-#ifdef DEBUG
- dumpMasks(&defaultMask[0]);
-#endif
-}
-
+ clearbit(tabIndexToMask(val), bit);
+ }
+ v = (v + (dc & -dc)) | ~dc;
+ } while (v != ~dc);
+ }
+ }
+ }
+
+ for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
+ u8 *m = tabIndexToMask(i);
+ andMask(m, m, &defaultMask[0], mask_size);
+ }
+#ifdef DEBUG
+ dumpMasks(&defaultMask[0]);
+#endif
+}
+
bytecode_ptr<FDR> FDRCompiler::build() {
- setupTab();
+ setupTab();
return setupFDR();
-}
-
+}
+
static
bool isSuffix(const hwlmLiteral &lit1, const hwlmLiteral &lit2) {
const auto &s1 = lit1.s;
@@ -643,7 +643,7 @@ bool isSuffix(const hwlmLiteral &lit1, const hwlmLiteral &lit2) {
size_t len1 = s1.length();
size_t len2 = s2.length();
assert(len1 >= len2);
-
+
if (lit1.nocase || lit2.nocase) {
return equal(s2.begin(), s2.end(), s1.begin() + len1 - len2,
[](char a, char b) { return mytoupper(a) == mytoupper(b); });
@@ -659,13 +659,13 @@ bool isSuffix(const hwlmLiteral &lit1, const hwlmLiteral &lit2) {
* squashing. e.g. AAA(no case) in bucket 0, AA(no case) and aa in bucket 1,
* we can't squash bucket 1 if we have input like "aaa" as aa can also match.
*/
-static
+static
bool includedCheck(const hwlmLiteral &lit1, const hwlmLiteral &lit2) {
/* lit1 is caseless and lit2 is case sensitive */
if ((lit1.nocase && !lit2.nocase)) {
return true;
- }
-
+ }
+
/* lit2's group is a subset of lit1 */
if (lit1.groups != lit2.groups &&
(lit2.groups == (lit1.groups & lit2.groups))) {
@@ -840,46 +840,46 @@ unique_ptr<HWLMProto> fdrBuildProtoInternal(u8 engType,
bool make_small,
const target_t &target,
const Grey &grey, u32 hint) {
- DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
-
- if (grey.fdrAllowTeddy) {
+ DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
+
+ if (grey.fdrAllowTeddy) {
auto proto = teddyBuildProtoHinted(engType, lits, make_small, hint,
target);
if (proto) {
- DEBUG_PRINTF("build with teddy succeeded\n");
+ DEBUG_PRINTF("build with teddy succeeded\n");
return proto;
- } else {
- DEBUG_PRINTF("build with teddy failed, will try with FDR\n");
- }
- }
-
+ } else {
+ DEBUG_PRINTF("build with teddy failed, will try with FDR\n");
+ }
+ }
+
auto des = (hint == HINT_INVALID) ? chooseEngine(target, lits, make_small)
: getFdrDescription(hint);
- if (!des) {
- return nullptr;
- }
-
- // temporary hack for unit testing
- if (hint != HINT_INVALID) {
- des->bits = 9;
+ if (!des) {
+ return nullptr;
+ }
+
+ // temporary hack for unit testing
+ if (hint != HINT_INVALID) {
+ des->bits = 9;
des->stride = 1;
- }
-
+ }
+
auto bucketToLits = assignStringsToBuckets(lits, *des);
addIncludedInfo(lits, des->getNumBuckets(), bucketToLits);
auto proto =
ue2::make_unique<HWLMProto>(engType, move(des), lits, bucketToLits,
make_small);
return proto;
-}
-
+}
+
unique_ptr<HWLMProto> fdrBuildProto(u8 engType, vector<hwlmLiteral> lits,
bool make_small, const target_t &target,
const Grey &grey) {
return fdrBuildProtoInternal(engType, lits, make_small, target, grey,
HINT_INVALID);
-}
-
+}
+
static
bytecode_ptr<FDR> fdrBuildTableInternal(const HWLMProto &proto,
const Grey &grey) {
@@ -897,8 +897,8 @@ bytecode_ptr<FDR> fdrBuildTable(const HWLMProto &proto, const Grey &grey) {
return fdrBuildTableInternal(proto, grey);
}
-#if !defined(RELEASE_BUILD)
-
+#if !defined(RELEASE_BUILD)
+
unique_ptr<HWLMProto> fdrBuildProtoHinted(u8 engType,
vector<hwlmLiteral> lits,
bool make_small, u32 hint,
@@ -906,13 +906,13 @@ unique_ptr<HWLMProto> fdrBuildProtoHinted(u8 engType,
const Grey &grey) {
return fdrBuildProtoInternal(engType, lits, make_small, target, grey,
hint);
-}
-
-#endif
-
-size_t fdrSize(const FDR *fdr) {
- assert(fdr);
- return fdr->size;
-}
+}
+
+#endif
+
+size_t fdrSize(const FDR *fdr) {
+ assert(fdr);
+ return fdr->size;
+}
} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/fdr/fdr_compile.h b/contrib/libs/hyperscan/src/fdr/fdr_compile.h
index 72d664cd14..f0ce49256a 100644
--- a/contrib/libs/hyperscan/src/fdr/fdr_compile.h
+++ b/contrib/libs/hyperscan/src/fdr/fdr_compile.h
@@ -1,55 +1,55 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief FDR literal matcher: build API.
- */
-
-#ifndef FDR_COMPILE_H
-#define FDR_COMPILE_H
-
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief FDR literal matcher: build API.
+ */
+
+#ifndef FDR_COMPILE_H
+#define FDR_COMPILE_H
+
+#include "ue2common.h"
#include "hwlm/hwlm_build.h"
#include "util/bytecode_ptr.h"
-
-#include <vector>
-
-struct FDR;
-
-namespace ue2 {
-
-struct hwlmLiteral;
-struct Grey;
-struct target_t;
-
+
+#include <vector>
+
+struct FDR;
+
+namespace ue2 {
+
+struct hwlmLiteral;
+struct Grey;
+struct target_t;
+
bytecode_ptr<FDR> fdrBuildTable(const HWLMProto &proto, const Grey &grey);
-
-#if !defined(RELEASE_BUILD)
+
+#if !defined(RELEASE_BUILD)
std::unique_ptr<HWLMProto> fdrBuildProtoHinted(
u8 engType,
std::vector<hwlmLiteral> lits,
@@ -57,16 +57,16 @@ std::unique_ptr<HWLMProto> fdrBuildProtoHinted(
const target_t &target,
const Grey &grey);
#endif
-
+
std::unique_ptr<HWLMProto> fdrBuildProto(
u8 engType,
std::vector<hwlmLiteral> lits,
bool make_small, const target_t &target,
const Grey &grey);
-
+
/** \brief Returns size in bytes of the given FDR engine. */
size_t fdrSize(const struct FDR *fdr);
-
-} // namespace ue2
-
-#endif
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/fdr/fdr_compile_internal.h b/contrib/libs/hyperscan/src/fdr/fdr_compile_internal.h
index de0779d153..3879960a29 100644
--- a/contrib/libs/hyperscan/src/fdr/fdr_compile_internal.h
+++ b/contrib/libs/hyperscan/src/fdr/fdr_compile_internal.h
@@ -1,87 +1,87 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef FDR_COMPILE_INTERNAL_H
-#define FDR_COMPILE_INTERNAL_H
-
-#include "ue2common.h"
-#include "hwlm/hwlm_literal.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FDR_COMPILE_INTERNAL_H
+#define FDR_COMPILE_INTERNAL_H
+
+#include "ue2common.h"
+#include "hwlm/hwlm_literal.h"
#include "util/bytecode_ptr.h"
-
-#include <map>
-#include <utility>
-#include <vector>
-
-struct FDRConfirm;
-struct LitInfo;
-
-namespace ue2 {
-
-// a pile of decorative typedefs
-// good for documentation purposes more than anything else
-typedef u32 LiteralIndex;
-typedef u32 SuffixPositionInString; // zero is last byte, counting back
- // into the string
-typedef u32 BucketIndex;
-typedef u32 SchemeBitIndex;
-typedef u32 PositionInBucket; // zero is 'we are matching right now!",
- // counting towards future matches
-
-class EngineDescription;
-class FDREngineDescription;
-struct hwlmStreamingControl;
+
+#include <map>
+#include <utility>
+#include <vector>
+
+struct FDRConfirm;
+struct LitInfo;
+
+namespace ue2 {
+
+// a pile of decorative typedefs
+// good for documentation purposes more than anything else
+typedef u32 LiteralIndex;
+typedef u32 SuffixPositionInString; // zero is last byte, counting back
+ // into the string
+typedef u32 BucketIndex;
+typedef u32 SchemeBitIndex;
+typedef u32 PositionInBucket; // zero is 'we are matching right now!",
+ // counting towards future matches
+
+class EngineDescription;
+class FDREngineDescription;
+struct hwlmStreamingControl;
struct Grey;
-
+
bytecode_ptr<u8> setupFullConfs(
const std::vector<hwlmLiteral> &lits,
const EngineDescription &eng,
const std::map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits,
bool make_small);
-
-// all suffixes include an implicit max_bucket_width suffix to ensure that
-// we always read a full-scale flood "behind" us in terms of what's in our
-// state; if we don't have a flood that's long enough we won't be in the
-// right state yet to allow blindly advancing
+
+// all suffixes include an implicit max_bucket_width suffix to ensure that
+// we always read a full-scale flood "behind" us in terms of what's in our
+// state; if we don't have a flood that's long enough we won't be in the
+// right state yet to allow blindly advancing
bytecode_ptr<u8> setupFDRFloodControl(const std::vector<hwlmLiteral> &lits,
const EngineDescription &eng,
const Grey &grey);
-
+
bytecode_ptr<u8>
-fdrBuildTableStreaming(const std::vector<hwlmLiteral> &lits,
+fdrBuildTableStreaming(const std::vector<hwlmLiteral> &lits,
hwlmStreamingControl &stream_control);
-
-static constexpr u32 HINT_INVALID = 0xffffffff;
-
-// fdr_compile_util.cpp utilities
-size_t maxLen(const std::vector<hwlmLiteral> &lits);
-size_t minLenCount(const std::vector<hwlmLiteral> &lits, size_t *count);
-u32 absdiff(u32 i, u32 j);
-
-} // namespace ue2
-
-#endif
+
+static constexpr u32 HINT_INVALID = 0xffffffff;
+
+// fdr_compile_util.cpp utilities
+size_t maxLen(const std::vector<hwlmLiteral> &lits);
+size_t minLenCount(const std::vector<hwlmLiteral> &lits, size_t *count);
+u32 absdiff(u32 i, u32 j);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/fdr/fdr_compile_util.cpp b/contrib/libs/hyperscan/src/fdr/fdr_compile_util.cpp
index ab84b53ab8..350a096742 100644
--- a/contrib/libs/hyperscan/src/fdr/fdr_compile_util.cpp
+++ b/contrib/libs/hyperscan/src/fdr/fdr_compile_util.cpp
@@ -1,65 +1,65 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "fdr_compile_internal.h"
-#include "hwlm/hwlm_literal.h"
-
-#include <algorithm>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-size_t maxLen(const vector<hwlmLiteral> &lits) {
- size_t rv = 0;
- for (const auto &lit : lits) {
- rv = max(rv, lit.s.size());
- }
- return rv;
-}
-
-size_t minLenCount(const vector<hwlmLiteral> &lits, size_t *count) {
- size_t rv = (size_t)-1;
- *count = 0;
- for (const auto &lit : lits) {
- if (lit.s.size() < rv) {
- rv = lit.s.size();
- *count = 1;
- } else if (lit.s.size() == rv) {
- (*count)++;
- }
- }
- return rv;
-}
-
-u32 absdiff(u32 i, u32 j) {
- return (i > j) ? (i - j) : (j - i);
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fdr_compile_internal.h"
+#include "hwlm/hwlm_literal.h"
+
+#include <algorithm>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+size_t maxLen(const vector<hwlmLiteral> &lits) {
+ size_t rv = 0;
+ for (const auto &lit : lits) {
+ rv = max(rv, lit.s.size());
+ }
+ return rv;
+}
+
+size_t minLenCount(const vector<hwlmLiteral> &lits, size_t *count) {
+ size_t rv = (size_t)-1;
+ *count = 0;
+ for (const auto &lit : lits) {
+ if (lit.s.size() < rv) {
+ rv = lit.s.size();
+ *count = 1;
+ } else if (lit.s.size() == rv) {
+ (*count)++;
+ }
+ }
+ return rv;
+}
+
+u32 absdiff(u32 i, u32 j) {
+ return (i > j) ? (i - j) : (j - i);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/fdr/fdr_confirm.h b/contrib/libs/hyperscan/src/fdr/fdr_confirm.h
index 61c23e936b..a23082cc6d 100644
--- a/contrib/libs/hyperscan/src/fdr/fdr_confirm.h
+++ b/contrib/libs/hyperscan/src/fdr/fdr_confirm.h
@@ -1,94 +1,94 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef FDR_CONFIRM_H
-#define FDR_CONFIRM_H
-
-#include "ue2common.h"
-#include "hwlm/hwlm.h"
-
-static really_inline
-u32 mul_hash_64(u64a lv, u64a andmsk, u64a mult, u32 nBits) {
- return ((lv & andmsk) * mult) >> (sizeof(u64a)*8 - nBits);
-}
-
-// data structures
-// TODO: fix this hard-coding
-#define CONF_TYPE u64a
-#define CONF_HASH_CALL mul_hash_64
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FDR_CONFIRM_H
+#define FDR_CONFIRM_H
+
+#include "ue2common.h"
+#include "hwlm/hwlm.h"
+
+static really_inline
+u32 mul_hash_64(u64a lv, u64a andmsk, u64a mult, u32 nBits) {
+ return ((lv & andmsk) * mult) >> (sizeof(u64a)*8 - nBits);
+}
+
+// data structures
+// TODO: fix this hard-coding
+#define CONF_TYPE u64a
+#define CONF_HASH_CALL mul_hash_64
+
/**
* \brief Flag indicating this literal doesn't need to be delivered more than
* once, used in LitInfo::flags.
*/
#define FDR_LIT_FLAG_NOREPEAT 1
-
-/**
- * \brief Structure describing a literal, linked to by FDRConfirm.
- *
+
+/**
+ * \brief Structure describing a literal, linked to by FDRConfirm.
+ *
* This structure is followed in memory by a variable-sized string prefix, for
* strings that are longer than CONF_TYPE.
- */
-struct LitInfo {
- CONF_TYPE v;
- CONF_TYPE msk;
- hwlm_group_t groups;
- u32 id; // literal ID as passed in
+ */
+struct LitInfo {
+ CONF_TYPE v;
+ CONF_TYPE msk;
+ hwlm_group_t groups;
+ u32 id; // literal ID as passed in
u8 size;
u8 flags; //!< bitfield of flags from FDR_LIT_FLAG_* above.
- u8 next;
-};
-
-#define FDRC_FLAG_NO_CONFIRM 1
+ u8 next;
+};
+
+#define FDRC_FLAG_NO_CONFIRM 1
#define FDRC_FLAG_NOREPEAT 2
-
-/**
- * \brief FDR confirm header.
- *
- * This structure is followed in memory by:
- *
- * -# lit index mapping (array of u32)
- * -# list of LitInfo structures
- */
-struct FDRConfirm {
- CONF_TYPE andmsk;
- CONF_TYPE mult;
+
+/**
+ * \brief FDR confirm header.
+ *
+ * This structure is followed in memory by:
+ *
+ * -# lit index mapping (array of u32)
+ * -# list of LitInfo structures
+ */
+struct FDRConfirm {
+ CONF_TYPE andmsk;
+ CONF_TYPE mult;
u32 nBits;
- hwlm_group_t groups;
-};
-
-static really_inline
-const u32 *getConfirmLitIndex(const struct FDRConfirm *fdrc) {
- const u8 *base = (const u8 *)fdrc;
- const u32 *litIndex =
- (const u32 *)(base + ROUNDUP_N(sizeof(*fdrc), alignof(u32)));
- assert(ISALIGNED(litIndex));
- return litIndex;
-}
-
-#endif // FDR_CONFIRM_H
+ hwlm_group_t groups;
+};
+
+static really_inline
+const u32 *getConfirmLitIndex(const struct FDRConfirm *fdrc) {
+ const u8 *base = (const u8 *)fdrc;
+ const u32 *litIndex =
+ (const u32 *)(base + ROUNDUP_N(sizeof(*fdrc), alignof(u32)));
+ assert(ISALIGNED(litIndex));
+ return litIndex;
+}
+
+#endif // FDR_CONFIRM_H
diff --git a/contrib/libs/hyperscan/src/fdr/fdr_confirm_compile.cpp b/contrib/libs/hyperscan/src/fdr/fdr_confirm_compile.cpp
index 4cd67673c8..8e3690895e 100644
--- a/contrib/libs/hyperscan/src/fdr/fdr_confirm_compile.cpp
+++ b/contrib/libs/hyperscan/src/fdr/fdr_confirm_compile.cpp
@@ -1,134 +1,134 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "fdr_internal.h"
-#include "fdr_compile_internal.h"
-#include "fdr_confirm.h"
-#include "engine_description.h"
-#include "teddy_engine_description.h"
-#include "ue2common.h"
-#include "util/alloc.h"
-#include "util/bitutils.h"
-#include "util/compare.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fdr_internal.h"
+#include "fdr_compile_internal.h"
+#include "fdr_confirm.h"
+#include "engine_description.h"
+#include "teddy_engine_description.h"
+#include "ue2common.h"
+#include "util/alloc.h"
+#include "util/bitutils.h"
+#include "util/compare.h"
#include "util/container.h"
-#include "util/verify_types.h"
-
-#include <algorithm>
-#include <cstring>
-#include <set>
-
-using namespace std;
-
-namespace ue2 {
-
+#include "util/verify_types.h"
+
+#include <algorithm>
+#include <cstring>
+#include <set>
+
+using namespace std;
+
+namespace ue2 {
+
using BC2CONF = map<BucketIndex, bytecode_ptr<FDRConfirm>>;
-
-static
-u64a make_u64a_mask(const vector<u8> &v) {
- assert(v.size() <= sizeof(u64a));
- if (v.size() > sizeof(u64a)) {
- throw std::exception();
- }
-
- u64a mask = 0;
- size_t vlen = v.size();
- size_t len = std::min(vlen, sizeof(mask));
- unsigned char *m = (unsigned char *)&mask;
- memcpy(m + sizeof(mask) - len, &v[vlen - len], len);
- return mask;
-}
-
-/**
- * Build a temporary vector of LitInfo structures (without the corresponding
- * pointers to the actual strings; these cannot be laid out yet). These
- * stay in 1:1 correspondence with the lits[] vector as that's the only
- * place we have to obtain our full strings.
- */
-static
-void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
- CONF_TYPE &andmsk) {
- const CONF_TYPE all_ones = ~(u64a)0;
- andmsk = all_ones; // fill in with 'and' of all literal masks
-
- for (LiteralIndex i = 0; i < lits.size(); i++) {
- const hwlmLiteral &lit = lits[i];
- LitInfo &info = tmpLitInfo[i];
- memset(&info, 0, sizeof(info));
- info.id = lit.id;
+
+static
+u64a make_u64a_mask(const vector<u8> &v) {
+ assert(v.size() <= sizeof(u64a));
+ if (v.size() > sizeof(u64a)) {
+ throw std::exception();
+ }
+
+ u64a mask = 0;
+ size_t vlen = v.size();
+ size_t len = std::min(vlen, sizeof(mask));
+ unsigned char *m = (unsigned char *)&mask;
+ memcpy(m + sizeof(mask) - len, &v[vlen - len], len);
+ return mask;
+}
+
+/**
+ * Build a temporary vector of LitInfo structures (without the corresponding
+ * pointers to the actual strings; these cannot be laid out yet). These
+ * stay in 1:1 correspondence with the lits[] vector as that's the only
+ * place we have to obtain our full strings.
+ */
+static
+void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
+ CONF_TYPE &andmsk) {
+ const CONF_TYPE all_ones = ~(u64a)0;
+ andmsk = all_ones; // fill in with 'and' of all literal masks
+
+ for (LiteralIndex i = 0; i < lits.size(); i++) {
+ const hwlmLiteral &lit = lits[i];
+ LitInfo &info = tmpLitInfo[i];
+ memset(&info, 0, sizeof(info));
+ info.id = lit.id;
u8 flags = 0;
- if (lit.noruns) {
+ if (lit.noruns) {
flags |= FDR_LIT_FLAG_NOREPEAT;
- }
- info.flags = flags;
+ }
+ info.flags = flags;
info.size = verify_u8(max(lit.msk.size(), lit.s.size()));
- info.groups = lit.groups;
-
- // these are built up assuming a LE machine
- CONF_TYPE msk = all_ones;
- CONF_TYPE val = 0;
- for (u32 j = 0; j < sizeof(CONF_TYPE); j++) {
- u32 shiftLoc = (sizeof(CONF_TYPE) - j - 1) * 8;
- if (j >= lit.s.size()) {
- msk &= ~((CONF_TYPE)0xff << shiftLoc);
- } else {
- u8 c = lit.s[lit.s.size() - j - 1];
- if (lit.nocase && ourisalpha(c)) {
- msk &= ~((CONF_TYPE)CASE_BIT << shiftLoc);
- val |= (CONF_TYPE)(c & CASE_CLEAR) << shiftLoc;
- } else {
- val |= (CONF_TYPE)c << shiftLoc;
- }
- }
- }
-
- info.v = val;
- info.msk = msk;
- if (!lit.msk.empty()) {
- u64a l_msk = make_u64a_mask(lit.msk);
- u64a l_cmp = make_u64a_mask(lit.cmp);
-
- // test for consistency - if there's intersection, then v and msk
- // values must line up
- UNUSED u64a intersection = l_msk & info.msk;
- assert((info.v & intersection) == (l_cmp & intersection));
-
- // incorporate lit.msk, lit.cmp into v and msk
- info.msk |= l_msk;
- info.v |= l_cmp;
- }
-
- andmsk &= info.msk;
- }
-}
-
-//#define FDR_CONFIRM_DUMP 1
-
-static
+ info.groups = lit.groups;
+
+ // these are built up assuming a LE machine
+ CONF_TYPE msk = all_ones;
+ CONF_TYPE val = 0;
+ for (u32 j = 0; j < sizeof(CONF_TYPE); j++) {
+ u32 shiftLoc = (sizeof(CONF_TYPE) - j - 1) * 8;
+ if (j >= lit.s.size()) {
+ msk &= ~((CONF_TYPE)0xff << shiftLoc);
+ } else {
+ u8 c = lit.s[lit.s.size() - j - 1];
+ if (lit.nocase && ourisalpha(c)) {
+ msk &= ~((CONF_TYPE)CASE_BIT << shiftLoc);
+ val |= (CONF_TYPE)(c & CASE_CLEAR) << shiftLoc;
+ } else {
+ val |= (CONF_TYPE)c << shiftLoc;
+ }
+ }
+ }
+
+ info.v = val;
+ info.msk = msk;
+ if (!lit.msk.empty()) {
+ u64a l_msk = make_u64a_mask(lit.msk);
+ u64a l_cmp = make_u64a_mask(lit.cmp);
+
+ // test for consistency - if there's intersection, then v and msk
+ // values must line up
+ UNUSED u64a intersection = l_msk & info.msk;
+ assert((info.v & intersection) == (l_cmp & intersection));
+
+ // incorporate lit.msk, lit.cmp into v and msk
+ info.msk |= l_msk;
+ info.v |= l_cmp;
+ }
+
+ andmsk &= info.msk;
+ }
+}
+
+//#define FDR_CONFIRM_DUMP 1
+
+static
bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
bool make_small) {
// Every literal must fit within CONF_TYPE.
@@ -136,39 +136,39 @@ bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
return lit.s.size() <= sizeof(CONF_TYPE);
}));
- vector<LitInfo> tmpLitInfo(lits.size());
- CONF_TYPE andmsk;
- fillLitInfo(lits, tmpLitInfo, andmsk);
-
-#ifdef FDR_CONFIRM_DUMP
- printf("-------------------\n");
-#endif
-
- // just magic numbers and crude measures for now
- u32 nBits;
- if (make_small) {
- nBits = min(10U, lg2(lits.size()) + 1);
- } else {
+ vector<LitInfo> tmpLitInfo(lits.size());
+ CONF_TYPE andmsk;
+ fillLitInfo(lits, tmpLitInfo, andmsk);
+
+#ifdef FDR_CONFIRM_DUMP
+ printf("-------------------\n");
+#endif
+
+ // just magic numbers and crude measures for now
+ u32 nBits;
+ if (make_small) {
+ nBits = min(10U, lg2(lits.size()) + 1);
+ } else {
nBits = lg2(lits.size()) + 4;
- }
-
- CONF_TYPE mult = (CONF_TYPE)0x0b4e0ef37bc32127ULL;
-
- // we can walk the vector and assign elements from the vectors to a
- // map by hash value
- map<u32, vector<LiteralIndex> > res2lits;
- hwlm_group_t gm = 0;
- for (LiteralIndex i = 0; i < lits.size(); i++) {
- LitInfo & li = tmpLitInfo[i];
- u32 hash = CONF_HASH_CALL(li.v, andmsk, mult, nBits);
- DEBUG_PRINTF("%016llx --> %u\n", li.v, hash);
- res2lits[hash].push_back(i);
- gm |= li.groups;
- }
-
-#ifdef FDR_CONFIRM_DUMP
- // print out the literals reversed - makes it easier to line up analyses
- // that are end-offset based
+ }
+
+ CONF_TYPE mult = (CONF_TYPE)0x0b4e0ef37bc32127ULL;
+
+ // we can walk the vector and assign elements from the vectors to a
+ // map by hash value
+ map<u32, vector<LiteralIndex> > res2lits;
+ hwlm_group_t gm = 0;
+ for (LiteralIndex i = 0; i < lits.size(); i++) {
+ LitInfo & li = tmpLitInfo[i];
+ u32 hash = CONF_HASH_CALL(li.v, andmsk, mult, nBits);
+ DEBUG_PRINTF("%016llx --> %u\n", li.v, hash);
+ res2lits[hash].push_back(i);
+ gm |= li.groups;
+ }
+
+#ifdef FDR_CONFIRM_DUMP
+ // print out the literals reversed - makes it easier to line up analyses
+ // that are end-offset based
for (const auto &m : res2lits) {
const u32 &hash = m.first;
const vector<LiteralIndex> &vlidx = m.second;
@@ -185,7 +185,7 @@ bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
const auto &lit = lits[litIdx];
if (lit.s.size() > vsl.size()) {
vsl.resize(lit.s.size());
- }
+ }
for (size_t j = lit.s.size(); j != 0; j--) {
vsl[lit.s.size() - j].insert(lit.s[j - 1]);
}
@@ -207,12 +207,12 @@ bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
size_t dist_from_end = lit.s.size() - j;
if (dist_from_end < min_len && vsl[dist_from_end].size() == 1) {
printf("__");
- } else {
+ } else {
printf("%02x", lit.s[j - 1]);
- }
- }
- printf("\n");
- }
+ }
+ }
+ printf("\n");
+ }
size_t total_compares = 0;
for (const auto &v : vsl) {
total_compares += v.size();
@@ -224,117 +224,117 @@ bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
}
printf("Total compare load: %zu Total string size: %zu\n\n",
total_compares, total_string_size);
- }
-#endif
-
- const size_t bitsToLitIndexSize = (1U << nBits) * sizeof(u32);
-
- // this size can now be a worst-case as we can always be a bit smaller
- size_t size = ROUNDUP_N(sizeof(FDRConfirm), alignof(u32)) +
- ROUNDUP_N(bitsToLitIndexSize, alignof(LitInfo)) +
+ }
+#endif
+
+ const size_t bitsToLitIndexSize = (1U << nBits) * sizeof(u32);
+
+ // this size can now be a worst-case as we can always be a bit smaller
+ size_t size = ROUNDUP_N(sizeof(FDRConfirm), alignof(u32)) +
+ ROUNDUP_N(bitsToLitIndexSize, alignof(LitInfo)) +
sizeof(LitInfo) * lits.size();
- size = ROUNDUP_N(size, alignof(FDRConfirm));
-
+ size = ROUNDUP_N(size, alignof(FDRConfirm));
+
auto fdrc = make_zeroed_bytecode_ptr<FDRConfirm>(size);
- assert(fdrc); // otherwise would have thrown std::bad_alloc
-
- fdrc->andmsk = andmsk;
- fdrc->mult = mult;
+ assert(fdrc); // otherwise would have thrown std::bad_alloc
+
+ fdrc->andmsk = andmsk;
+ fdrc->mult = mult;
fdrc->nBits = nBits;
-
- fdrc->groups = gm;
-
- // After the FDRConfirm, we have the lit index array.
+
+ fdrc->groups = gm;
+
+ // After the FDRConfirm, we have the lit index array.
u8 *fdrc_base = (u8 *)fdrc.get();
- u8 *ptr = fdrc_base + sizeof(*fdrc);
- ptr = ROUNDUP_PTR(ptr, alignof(u32));
- u32 *bitsToLitIndex = (u32 *)ptr;
- ptr += bitsToLitIndexSize;
-
- // After the lit index array, we have the LitInfo structures themselves,
- // which vary in size (as each may have a variable-length string after it).
- ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
-
- // Walk the map by hash value assigning indexes and laying out the
- // elements (and their associated string confirm material) in memory.
+ u8 *ptr = fdrc_base + sizeof(*fdrc);
+ ptr = ROUNDUP_PTR(ptr, alignof(u32));
+ u32 *bitsToLitIndex = (u32 *)ptr;
+ ptr += bitsToLitIndexSize;
+
+ // After the lit index array, we have the LitInfo structures themselves,
+ // which vary in size (as each may have a variable-length string after it).
+ ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
+
+ // Walk the map by hash value assigning indexes and laying out the
+ // elements (and their associated string confirm material) in memory.
for (const auto &m : res2lits) {
const u32 hash = m.first;
const vector<LiteralIndex> &vlidx = m.second;
bitsToLitIndex[hash] = verify_u32(ptr - fdrc_base);
for (auto i = vlidx.begin(), e = vlidx.end(); i != e; ++i) {
LiteralIndex litIdx = *i;
-
- // Write LitInfo header.
- LitInfo &finalLI = *(LitInfo *)ptr;
- finalLI = tmpLitInfo[litIdx];
-
- ptr += sizeof(LitInfo); // String starts directly after LitInfo.
+
+ // Write LitInfo header.
+ LitInfo &finalLI = *(LitInfo *)ptr;
+ finalLI = tmpLitInfo[litIdx];
+
+ ptr += sizeof(LitInfo); // String starts directly after LitInfo.
assert(lits[litIdx].s.size() <= sizeof(CONF_TYPE));
if (next(i) == e) {
finalLI.next = 0;
- } else {
+ } else {
finalLI.next = 1;
- }
- }
- assert((size_t)(ptr - fdrc_base) <= size);
- }
-
- // Return actual used size, not worst-case size. Must be rounded up to
- // FDRConfirm alignment so that the caller can lay out a sequence of these.
- size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base),
- alignof(FDRConfirm));
- assert(actual_size <= size);
+ }
+ }
+ assert((size_t)(ptr - fdrc_base) <= size);
+ }
+
+ // Return actual used size, not worst-case size. Must be rounded up to
+ // FDRConfirm alignment so that the caller can lay out a sequence of these.
+ size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base),
+ alignof(FDRConfirm));
+ assert(actual_size <= size);
fdrc.shrink(actual_size);
return fdrc;
-}
-
+}
+
bytecode_ptr<u8>
setupFullConfs(const vector<hwlmLiteral> &lits,
const EngineDescription &eng,
const map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
bool make_small) {
- unique_ptr<TeddyEngineDescription> teddyDescr =
- getTeddyDescription(eng.getID());
-
+ unique_ptr<TeddyEngineDescription> teddyDescr =
+ getTeddyDescription(eng.getID());
+
BC2CONF bc2Conf;
- u32 totalConfirmSize = 0;
- for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
+ u32 totalConfirmSize = 0;
+ for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
if (contains(bucketToLits, b)) {
vector<hwlmLiteral> vl;
for (const LiteralIndex &lit_idx : bucketToLits.at(b)) {
vl.push_back(lits[lit_idx]);
- }
-
+ }
+
DEBUG_PRINTF("b %d sz %zu\n", b, vl.size());
auto fc = getFDRConfirm(vl, make_small);
totalConfirmSize += fc.size();
bc2Conf.emplace(b, move(fc));
- }
- }
-
- u32 nBuckets = eng.getNumBuckets();
+ }
+ }
+
+ u32 nBuckets = eng.getNumBuckets();
u32 totalConfSwitchSize = ROUNDUP_CL(nBuckets * sizeof(u32));
u32 totalSize = totalConfSwitchSize + totalConfirmSize;
-
+
auto buf = make_zeroed_bytecode_ptr<u8>(totalSize, 64);
- assert(buf); // otherwise would have thrown std::bad_alloc
-
+ assert(buf); // otherwise would have thrown std::bad_alloc
+
u32 *confBase = (u32 *)buf.get();
u8 *ptr = buf.get() + totalConfSwitchSize;
assert(ISALIGNED_CL(ptr));
-
+
for (const auto &m : bc2Conf) {
const BucketIndex &idx = m.first;
const bytecode_ptr<FDRConfirm> &p = m.second;
- // confirm offset is relative to the base of this structure, now
+ // confirm offset is relative to the base of this structure, now
u32 confirm_offset = verify_u32(ptr - buf.get());
memcpy(ptr, p.get(), p.size());
ptr += p.size();
- confBase[idx] = confirm_offset;
- }
+ confBase[idx] = confirm_offset;
+ }
return buf;
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/fdr/fdr_confirm_runtime.h b/contrib/libs/hyperscan/src/fdr/fdr_confirm_runtime.h
index 6df49ae634..5a2164952c 100644
--- a/contrib/libs/hyperscan/src/fdr/fdr_confirm_runtime.h
+++ b/contrib/libs/hyperscan/src/fdr/fdr_confirm_runtime.h
@@ -1,63 +1,63 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef FDR_CONFIRM_RUNTIME_H
-#define FDR_CONFIRM_RUNTIME_H
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FDR_CONFIRM_RUNTIME_H
+#define FDR_CONFIRM_RUNTIME_H
+
#include "scratch.h"
-#include "fdr_internal.h"
-#include "fdr_loadval.h"
-#include "hwlm/hwlm.h"
-#include "ue2common.h"
-#include "util/bitutils.h"
-#include "util/compare.h"
-
-// this is ordinary confirmation function which runs through
-// the whole confirmation procedure
-static really_inline
+#include "fdr_internal.h"
+#include "fdr_loadval.h"
+#include "hwlm/hwlm.h"
+#include "ue2common.h"
+#include "util/bitutils.h"
+#include "util/compare.h"
+
+// this is ordinary confirmation function which runs through
+// the whole confirmation procedure
+static really_inline
void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a,
size_t i, hwlmcb_rv_t *control, u32 *last_match,
u64a conf_key, u64a *conf, u8 bit) {
- assert(i < a->len);
+ assert(i < a->len);
assert(i >= a->start_offset);
- assert(ISALIGNED(fdrc));
-
- const u8 * buf = a->buf;
+ assert(ISALIGNED(fdrc));
+
+ const u8 * buf = a->buf;
u32 c = CONF_HASH_CALL(conf_key, fdrc->andmsk, fdrc->mult,
fdrc->nBits);
u32 start = getConfirmLitIndex(fdrc)[c];
if (likely(!start)) {
return;
- }
-
+ }
+
const struct LitInfo *li
= (const struct LitInfo *)((const u8 *)fdrc + start);
-
+
struct hs_scratch *scratch = a->scratch;
assert(!scratch->fdr_conf);
scratch->fdr_conf = conf;
@@ -65,33 +65,33 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
u8 oldNext; // initialized in loop
do {
assert(ISALIGNED(li));
-
+
if (unlikely((conf_key & li->msk) != li->v)) {
goto out;
}
-
+
if ((*last_match == li->id) && (li->flags & FDR_LIT_FLAG_NOREPEAT)) {
goto out;
}
-
+
const u8 *loc = buf + i - li->size + 1;
-
+
if (loc < buf) {
u32 full_overhang = buf - loc;
size_t len_history = a->len_history;
-
+
// can't do a vectored confirm either if we don't have
// the bytes
if (full_overhang > len_history) {
- goto out;
- }
- }
+ goto out;
+ }
+ }
assert(li->size <= sizeof(CONF_TYPE));
-
+
if (unlikely(!(li->groups & *control))) {
goto out;
- }
-
+ }
+
*last_match = li->id;
*control = a->cb(i, li->id, scratch);
out:
@@ -99,6 +99,6 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a
li++;
} while (oldNext);
scratch->fdr_conf = NULL;
-}
-
-#endif
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/fdr/fdr_engine_description.cpp b/contrib/libs/hyperscan/src/fdr/fdr_engine_description.cpp
index 0296dc1094..2f9ba420c0 100644
--- a/contrib/libs/hyperscan/src/fdr/fdr_engine_description.cpp
+++ b/contrib/libs/hyperscan/src/fdr/fdr_engine_description.cpp
@@ -1,228 +1,228 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "fdr_compile_internal.h"
-#include "fdr_engine_description.h"
-#include "hs_compile.h"
-#include "util/target_info.h"
-#include "util/compare.h" // for ourisalpha()
-#include "util/make_unique.h"
-
-#include <cassert>
-#include <cstdlib>
-#include <map>
-#include <string>
-
-using namespace std;
-
-namespace ue2 {
-
-FDREngineDescription::FDREngineDescription(const FDREngineDef &def)
- : EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fdr_compile_internal.h"
+#include "fdr_engine_description.h"
+#include "hs_compile.h"
+#include "util/target_info.h"
+#include "util/compare.h" // for ourisalpha()
+#include "util/make_unique.h"
+
+#include <cassert>
+#include <cstdlib>
+#include <map>
+#include <string>
+
+using namespace std;
+
+namespace ue2 {
+
+FDREngineDescription::FDREngineDescription(const FDREngineDef &def)
+ : EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
def.numBuckets),
schemeWidth(def.schemeWidth), stride(0), bits(0) {}
-
-u32 FDREngineDescription::getDefaultFloodSuffixLength() const {
- // rounding up, so that scheme width 32 and 6 buckets is 6 not 5!
- // the +1 avoids pain due to various reach choices
- return ((getSchemeWidth() + getNumBuckets() - 1) / getNumBuckets()) + 1;
-}
-
+
+u32 FDREngineDescription::getDefaultFloodSuffixLength() const {
+ // rounding up, so that scheme width 32 and 6 buckets is 6 not 5!
+ // the +1 avoids pain due to various reach choices
+ return ((getSchemeWidth() + getNumBuckets() - 1) / getNumBuckets()) + 1;
+}
+
void getFdrDescriptions(vector<FDREngineDescription> *out) {
static const FDREngineDef def = {0, 64, 8, 0};
out->clear();
out->emplace_back(def);
}
-static
-u32 findDesiredStride(size_t num_lits, size_t min_len, size_t min_len_count) {
- u32 desiredStride = 1; // always our safe fallback
- if (min_len > 1) {
- if (num_lits < 250) {
- // small cases we just go for it
- desiredStride = min_len;
- } else if (num_lits < 800) {
- // intermediate cases
- desiredStride = min_len - 1;
- } else if (num_lits < 5000) {
- // for larger but not huge sizes, go to stride 2 only if we have at
- // least minlen 3
- desiredStride = MIN(min_len - 1, 2);
- }
- }
-
- // patch if count is quite large - a ton of length 2 literals can
- // break things
-#ifdef TRY_THIS_LATER
- if ((min_len == 2) && (desiredStride == 2) && (min_len_count > 20)) {
- desiredStride = 1;
- }
-#endif
-
- // patch stuff just for the stride 4 case; don't let min_len=4,
- // desiredStride=4 through as even a few length 4 literals can break things
- // (far more fragile)
- if ((min_len == 4) && (desiredStride == 4) && (min_len_count > 2)) {
- desiredStride = 2;
- }
-
- return desiredStride;
-}
-
-unique_ptr<FDREngineDescription> chooseEngine(const target_t &target,
- const vector<hwlmLiteral> &vl,
- bool make_small) {
- vector<FDREngineDescription> allDescs;
- getFdrDescriptions(&allDescs);
-
- // find desired stride
- size_t count;
- size_t msl = minLenCount(vl, &count);
- u32 desiredStride = findDesiredStride(vl.size(), msl, count);
-
- DEBUG_PRINTF("%zu lits, msl=%zu, desiredStride=%u\n", vl.size(), msl,
- desiredStride);
-
- FDREngineDescription *best = nullptr;
- u32 best_score = 0;
-
+static
+u32 findDesiredStride(size_t num_lits, size_t min_len, size_t min_len_count) {
+ u32 desiredStride = 1; // always our safe fallback
+ if (min_len > 1) {
+ if (num_lits < 250) {
+ // small cases we just go for it
+ desiredStride = min_len;
+ } else if (num_lits < 800) {
+ // intermediate cases
+ desiredStride = min_len - 1;
+ } else if (num_lits < 5000) {
+ // for larger but not huge sizes, go to stride 2 only if we have at
+ // least minlen 3
+ desiredStride = MIN(min_len - 1, 2);
+ }
+ }
+
+ // patch if count is quite large - a ton of length 2 literals can
+ // break things
+#ifdef TRY_THIS_LATER
+ if ((min_len == 2) && (desiredStride == 2) && (min_len_count > 20)) {
+ desiredStride = 1;
+ }
+#endif
+
+ // patch stuff just for the stride 4 case; don't let min_len=4,
+ // desiredStride=4 through as even a few length 4 literals can break things
+ // (far more fragile)
+ if ((min_len == 4) && (desiredStride == 4) && (min_len_count > 2)) {
+ desiredStride = 2;
+ }
+
+ return desiredStride;
+}
+
+unique_ptr<FDREngineDescription> chooseEngine(const target_t &target,
+ const vector<hwlmLiteral> &vl,
+ bool make_small) {
+ vector<FDREngineDescription> allDescs;
+ getFdrDescriptions(&allDescs);
+
+ // find desired stride
+ size_t count;
+ size_t msl = minLenCount(vl, &count);
+ u32 desiredStride = findDesiredStride(vl.size(), msl, count);
+
+ DEBUG_PRINTF("%zu lits, msl=%zu, desiredStride=%u\n", vl.size(), msl,
+ desiredStride);
+
+ FDREngineDescription *best = nullptr;
+ u32 best_score = 0;
+
FDREngineDescription &eng = allDescs[0];
- for (u32 domain = 9; domain <= 15; domain++) {
+ for (u32 domain = 9; domain <= 15; domain++) {
for (size_t stride = 1; stride <= 4; stride *= 2) {
- // to make sure that domains >=14 have stride 1 according to origin
+ // to make sure that domains >=14 have stride 1 according to origin
if (domain > 13 && stride > 1) {
- continue;
- }
- if (!eng.isValidOnTarget(target)) {
- continue;
- }
+ continue;
+ }
+ if (!eng.isValidOnTarget(target)) {
+ continue;
+ }
if (msl < stride) {
- continue;
- }
-
- u32 score = 100;
-
+ continue;
+ }
+
+ u32 score = 100;
+
score -= absdiff(desiredStride, stride);
-
+
if (stride <= desiredStride) {
score += stride;
- }
-
- u32 effLits = vl.size(); /* * desiredStride;*/
- u32 ideal;
- if (effLits < eng.getNumBuckets()) {
+ }
+
+ u32 effLits = vl.size(); /* * desiredStride;*/
+ u32 ideal;
+ if (effLits < eng.getNumBuckets()) {
if (stride == 1) {
- ideal = 8;
- } else {
- ideal = 10;
- }
- } else if (effLits < 20) {
- ideal = 10;
- } else if (effLits < 100) {
- ideal = 11;
- } else if (effLits < 1000) {
- ideal = 12;
- } else if (effLits < 10000) {
- ideal = 13;
- } else {
- ideal = 15;
- }
-
- if (ideal != 8 && eng.schemeWidth == 32) {
- ideal += 1;
- }
-
- if (make_small) {
- ideal -= 2;
- }
-
+ ideal = 8;
+ } else {
+ ideal = 10;
+ }
+ } else if (effLits < 20) {
+ ideal = 10;
+ } else if (effLits < 100) {
+ ideal = 11;
+ } else if (effLits < 1000) {
+ ideal = 12;
+ } else if (effLits < 10000) {
+ ideal = 13;
+ } else {
+ ideal = 15;
+ }
+
+ if (ideal != 8 && eng.schemeWidth == 32) {
+ ideal += 1;
+ }
+
+ if (make_small) {
+ ideal -= 2;
+ }
+
if (stride > 1) {
- ideal++;
- }
-
- DEBUG_PRINTF("effLits %u\n", effLits);
-
- if (target.is_atom_class() && !make_small && effLits < 4000) {
+ ideal++;
+ }
+
+ DEBUG_PRINTF("effLits %u\n", effLits);
+
+ if (target.is_atom_class() && !make_small && effLits < 4000) {
/* Unless it is a very heavy case, we want to build smaller
* tables on lightweight machines due to their small caches. */
- ideal -= 2;
- }
-
- score -= absdiff(ideal, domain);
-
+ ideal -= 2;
+ }
+
+ score -= absdiff(ideal, domain);
+
DEBUG_PRINTF("fdr %u: width=%u, domain=%u, buckets=%u, stride=%zu "
- "-> score=%u\n",
+ "-> score=%u\n",
eng.getID(), eng.schemeWidth, domain,
eng.getNumBuckets(), stride, score);
-
- if (!best || score > best_score) {
- eng.bits = domain;
+
+ if (!best || score > best_score) {
+ eng.bits = domain;
eng.stride = stride;
- best = &eng;
- best_score = score;
- }
- }
- }
-
- if (!best) {
- DEBUG_PRINTF("failed to find engine\n");
- return nullptr;
- }
-
- DEBUG_PRINTF("using engine %u\n", best->getID());
- return ue2::make_unique<FDREngineDescription>(*best);
-}
-
-SchemeBitIndex FDREngineDescription::getSchemeBit(BucketIndex b,
- PositionInBucket p) const {
- assert(p < getBucketWidth(b));
- SchemeBitIndex sbi = p * getNumBuckets() + b;
- assert(sbi < getSchemeWidth());
- return sbi;
-}
-
-u32 FDREngineDescription::getBucketWidth(BucketIndex) const {
- u32 sw = getSchemeWidth();
- u32 nm = getNumBuckets();
- assert(sw % nm == 0);
- return sw/nm;
-}
-
-unique_ptr<FDREngineDescription> getFdrDescription(u32 engineID) {
- vector<FDREngineDescription> allDescs;
- getFdrDescriptions(&allDescs);
-
- if (engineID >= allDescs.size()) {
- return nullptr;
- }
-
- return ue2::make_unique<FDREngineDescription>(allDescs[engineID]);
-}
-
-} // namespace ue2
+ best = &eng;
+ best_score = score;
+ }
+ }
+ }
+
+ if (!best) {
+ DEBUG_PRINTF("failed to find engine\n");
+ return nullptr;
+ }
+
+ DEBUG_PRINTF("using engine %u\n", best->getID());
+ return ue2::make_unique<FDREngineDescription>(*best);
+}
+
+SchemeBitIndex FDREngineDescription::getSchemeBit(BucketIndex b,
+ PositionInBucket p) const {
+ assert(p < getBucketWidth(b));
+ SchemeBitIndex sbi = p * getNumBuckets() + b;
+ assert(sbi < getSchemeWidth());
+ return sbi;
+}
+
+u32 FDREngineDescription::getBucketWidth(BucketIndex) const {
+ u32 sw = getSchemeWidth();
+ u32 nm = getNumBuckets();
+ assert(sw % nm == 0);
+ return sw/nm;
+}
+
+unique_ptr<FDREngineDescription> getFdrDescription(u32 engineID) {
+ vector<FDREngineDescription> allDescs;
+ getFdrDescriptions(&allDescs);
+
+ if (engineID >= allDescs.size()) {
+ return nullptr;
+ }
+
+ return ue2::make_unique<FDREngineDescription>(allDescs[engineID]);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/fdr/fdr_engine_description.h b/contrib/libs/hyperscan/src/fdr/fdr_engine_description.h
index 7b763d4a37..1c464fe3ad 100644
--- a/contrib/libs/hyperscan/src/fdr/fdr_engine_description.h
+++ b/contrib/libs/hyperscan/src/fdr/fdr_engine_description.h
@@ -1,73 +1,73 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef FDR_ENGINE_DESCRIPTION_H
-#define FDR_ENGINE_DESCRIPTION_H
-
-#include "engine_description.h"
-
-#include <map>
-#include <memory>
-#include <vector>
-
-namespace ue2 {
-
-struct FDREngineDef {
- u32 id;
- u32 schemeWidth;
- u32 numBuckets;
- u64a cpu_features;
-};
-
-class FDREngineDescription : public EngineDescription {
-public:
- u32 schemeWidth;
- u32 stride;
- u32 bits;
-
- u32 getSchemeWidth() const { return schemeWidth; }
- u32 getBucketWidth(BucketIndex b) const;
- SchemeBitIndex getSchemeBit(BucketIndex b, PositionInBucket p) const;
- u32 getNumTableEntries() const { return 1 << bits; }
- u32 getTabSizeBytes() const {
- return schemeWidth / 8 * getNumTableEntries();
- }
-
- explicit FDREngineDescription(const FDREngineDef &def);
-
- u32 getDefaultFloodSuffixLength() const override;
-};
-
-std::unique_ptr<FDREngineDescription>
-chooseEngine(const target_t &target, const std::vector<hwlmLiteral> &vl,
- bool make_small);
-std::unique_ptr<FDREngineDescription> getFdrDescription(u32 engineID);
-void getFdrDescriptions(std::vector<FDREngineDescription> *out);
-} // namespace ue2
-
-#endif
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FDR_ENGINE_DESCRIPTION_H
+#define FDR_ENGINE_DESCRIPTION_H
+
+#include "engine_description.h"
+
+#include <map>
+#include <memory>
+#include <vector>
+
+namespace ue2 {
+
+struct FDREngineDef {
+ u32 id;
+ u32 schemeWidth;
+ u32 numBuckets;
+ u64a cpu_features;
+};
+
+class FDREngineDescription : public EngineDescription {
+public:
+ u32 schemeWidth;
+ u32 stride;
+ u32 bits;
+
+ u32 getSchemeWidth() const { return schemeWidth; }
+ u32 getBucketWidth(BucketIndex b) const;
+ SchemeBitIndex getSchemeBit(BucketIndex b, PositionInBucket p) const;
+ u32 getNumTableEntries() const { return 1 << bits; }
+ u32 getTabSizeBytes() const {
+ return schemeWidth / 8 * getNumTableEntries();
+ }
+
+ explicit FDREngineDescription(const FDREngineDef &def);
+
+ u32 getDefaultFloodSuffixLength() const override;
+};
+
+std::unique_ptr<FDREngineDescription>
+chooseEngine(const target_t &target, const std::vector<hwlmLiteral> &vl,
+ bool make_small);
+std::unique_ptr<FDREngineDescription> getFdrDescription(u32 engineID);
+void getFdrDescriptions(std::vector<FDREngineDescription> *out);
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/fdr/fdr_internal.h b/contrib/libs/hyperscan/src/fdr/fdr_internal.h
index 88a34efe45..c79f61c1f1 100644
--- a/contrib/libs/hyperscan/src/fdr/fdr_internal.h
+++ b/contrib/libs/hyperscan/src/fdr/fdr_internal.h
@@ -1,105 +1,105 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief FDR literal matcher: data structures.
- */
-
-#ifndef FDR_INTERNAL_H
-#define FDR_INTERNAL_H
-
-#include "ue2common.h"
-#include "hwlm/hwlm.h" // for hwlm_group_t, HWLMCallback
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief FDR literal matcher: data structures.
+ */
+
+#ifndef FDR_INTERNAL_H
+#define FDR_INTERNAL_H
+
+#include "ue2common.h"
+#include "hwlm/hwlm.h" // for hwlm_group_t, HWLMCallback
+
struct hs_scratch;
-typedef enum {
- NOT_CAUTIOUS, //!< not near a boundary (quantify?)
- VECTORING //!< potentially vectoring
-} CautionReason;
-
-/** \brief number of different ids that can be triggered by floods of any given
- * character. */
-#define FDR_FLOOD_MAX_IDS 16
-
-struct FDRFlood {
- hwlm_group_t allGroups; //!< all the groups or'd together
- u32 suffix;
-
- /** \brief 0 to FDR_FLOOD_MAX_IDS-1 ids that are generated once per char on
- * a flood.
- * If larger we won't handle this through the flood path at all. */
- u16 idCount;
-
- u32 ids[FDR_FLOOD_MAX_IDS]; //!< the ids
- hwlm_group_t groups[FDR_FLOOD_MAX_IDS]; //!< group ids to go with string ids
-};
-
-/** \brief FDR structure.
- *
- * 1. struct as-is
- * 2. primary matching table
- * 3. confirm stuff
- */
-struct FDR {
- u32 engineID;
- u32 size;
- u32 maxStringLen;
+typedef enum {
+ NOT_CAUTIOUS, //!< not near a boundary (quantify?)
+ VECTORING //!< potentially vectoring
+} CautionReason;
+
+/** \brief number of different ids that can be triggered by floods of any given
+ * character. */
+#define FDR_FLOOD_MAX_IDS 16
+
+struct FDRFlood {
+ hwlm_group_t allGroups; //!< all the groups or'd together
+ u32 suffix;
+
+ /** \brief 0 to FDR_FLOOD_MAX_IDS-1 ids that are generated once per char on
+ * a flood.
+ * If larger we won't handle this through the flood path at all. */
+ u16 idCount;
+
+ u32 ids[FDR_FLOOD_MAX_IDS]; //!< the ids
+ hwlm_group_t groups[FDR_FLOOD_MAX_IDS]; //!< group ids to go with string ids
+};
+
+/** \brief FDR structure.
+ *
+ * 1. struct as-is
+ * 2. primary matching table
+ * 3. confirm stuff
+ */
+struct FDR {
+ u32 engineID;
+ u32 size;
+ u32 maxStringLen;
u32 numStrings;
u32 confOffset;
- u32 floodOffset;
+ u32 floodOffset;
u8 stride; /* stride - how frequently the data is consulted by the first
* stage matcher */
u8 domain; /* number of bits used to index into main FDR table. This value
* is used only of debugging/asserts. */
- u16 domainMask; /* pre-computed domain mask */
- u32 tabSize; /* pre-computed hashtable size in bytes */
+ u16 domainMask; /* pre-computed domain mask */
+ u32 tabSize; /* pre-computed hashtable size in bytes */
m128 start; /* initial start state to use at offset 0. The state has been
* set up based on the min length of buckets to reduce the need
* for pointless confirms. */
-};
-
-/** \brief FDR runtime arguments.
- *
- * This structure handles read-only things that are passed extensively around
- * the FDR run-time functions. They are set by the API, passed by value into
- * the main function, then a pointer is passed around to all the various
- * sub-functions (confirm & flood). */
-struct FDR_Runtime_Args {
- const u8 *buf;
- size_t len;
- const u8 *buf_history;
- size_t len_history;
- size_t start_offset;
- HWLMCallback cb;
+};
+
+/** \brief FDR runtime arguments.
+ *
+ * This structure handles read-only things that are passed extensively around
+ * the FDR run-time functions. They are set by the API, passed by value into
+ * the main function, then a pointer is passed around to all the various
+ * sub-functions (confirm & flood). */
+struct FDR_Runtime_Args {
+ const u8 *buf;
+ size_t len;
+ const u8 *buf_history;
+ size_t len_history;
+ size_t start_offset;
+ HWLMCallback cb;
struct hs_scratch *scratch;
- const u8 *firstFloodDetect;
- const u64a histBytes;
-};
-
-#endif
+ const u8 *firstFloodDetect;
+ const u64a histBytes;
+};
+
+#endif
diff --git a/contrib/libs/hyperscan/src/fdr/fdr_loadval.h b/contrib/libs/hyperscan/src/fdr/fdr_loadval.h
index 7eec374937..86c39c7f30 100644
--- a/contrib/libs/hyperscan/src/fdr/fdr_loadval.h
+++ b/contrib/libs/hyperscan/src/fdr/fdr_loadval.h
@@ -1,47 +1,47 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef FDR_LOADVAL_H
-#define FDR_LOADVAL_H
-
-#include "ue2common.h"
-#include "util/unaligned.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FDR_LOADVAL_H
+#define FDR_LOADVAL_H
+
+#include "ue2common.h"
+#include "util/unaligned.h"
+
#define MAKE_LOADVAL(type, name) \
static really_inline \
type name(const u8 *ptr, UNUSED const u8 *lo, UNUSED const u8 *hi)
-
+
#define NORMAL_SAFE(type) \
do { \
assert(ptr >= lo); \
assert(ptr + sizeof(type) - 1 < hi); \
} while(0)
-
+
#define MAKE_LOOP_CE(TYPE) \
TYPE v = 0; \
for (TYPE i = 0; i < sizeof(TYPE); i++) { \
@@ -49,23 +49,23 @@
v += (TYPE)ptr[i] << (i*8); \
} \
} \
- return v;
-
-// no suffix = normal (unaligned)
-// _ce = cautious everywhere (in both directions); test against hi and lo
-
-MAKE_LOADVAL(u16, lv_u16) {
- NORMAL_SAFE(u16);
- return unaligned_load_u16(ptr);
-}
-
-MAKE_LOADVAL(u64a, lv_u64a) {
- NORMAL_SAFE(u32);
- return unaligned_load_u64a(ptr);
-}
-
+ return v;
+
+// no suffix = normal (unaligned)
+// _ce = cautious everywhere (in both directions); test against hi and lo
+
+MAKE_LOADVAL(u16, lv_u16) {
+ NORMAL_SAFE(u16);
+ return unaligned_load_u16(ptr);
+}
+
+MAKE_LOADVAL(u64a, lv_u64a) {
+ NORMAL_SAFE(u32);
+ return unaligned_load_u64a(ptr);
+}
+
MAKE_LOADVAL(u16, lv_u16_ce) { MAKE_LOOP_CE(u16); }
-
+
MAKE_LOADVAL(u64a, lv_u64a_ce) { MAKE_LOOP_CE(u64a); }
-
-#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/fdr/flood_compile.cpp b/contrib/libs/hyperscan/src/fdr/flood_compile.cpp
index 80a667193b..ff805ca399 100644
--- a/contrib/libs/hyperscan/src/fdr/flood_compile.cpp
+++ b/contrib/libs/hyperscan/src/fdr/flood_compile.cpp
@@ -1,191 +1,191 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "fdr_internal.h"
-#include "fdr_confirm.h"
-#include "fdr_compile_internal.h"
-#include "fdr_engine_description.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fdr_internal.h"
+#include "fdr_confirm.h"
+#include "fdr_compile_internal.h"
+#include "fdr_engine_description.h"
#include "grey.h"
-#include "ue2common.h"
-#include "util/alloc.h"
-#include "util/bitutils.h"
-#include "util/charreach.h"
-#include "util/compare.h"
-#include "util/ue2string.h"
-#include "util/verify_types.h"
-
-#include <cstring>
-#include <map>
-#include <memory>
-#include <string>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-namespace {
-struct FloodComparator {
- bool operator()(const FDRFlood &f1, const FDRFlood &f2) const {
- return std::memcmp(&f1, &f2, sizeof(f1)) < 0;
- }
-};
-}
-
-static
-bool isDifferent(u8 oldC, u8 c, bool caseless) {
- if (caseless) {
- return mytolower(oldC) != mytolower(c);
- } else {
- return oldC != c;
- }
-}
-
-static
-void updateFloodSuffix(vector<FDRFlood> &tmpFlood, u8 c, u32 suffix) {
- FDRFlood &fl = tmpFlood[c];
- fl.suffix = MAX(fl.suffix, suffix + 1);
+#include "ue2common.h"
+#include "util/alloc.h"
+#include "util/bitutils.h"
+#include "util/charreach.h"
+#include "util/compare.h"
+#include "util/ue2string.h"
+#include "util/verify_types.h"
+
+#include <cstring>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+namespace {
+struct FloodComparator {
+ bool operator()(const FDRFlood &f1, const FDRFlood &f2) const {
+ return std::memcmp(&f1, &f2, sizeof(f1)) < 0;
+ }
+};
+}
+
+static
+bool isDifferent(u8 oldC, u8 c, bool caseless) {
+ if (caseless) {
+ return mytolower(oldC) != mytolower(c);
+ } else {
+ return oldC != c;
+ }
+}
+
+static
+void updateFloodSuffix(vector<FDRFlood> &tmpFlood, u8 c, u32 suffix) {
+ FDRFlood &fl = tmpFlood[c];
+ fl.suffix = MAX(fl.suffix, suffix + 1);
DEBUG_PRINTF("Updated Flood Suffix for char 0x%02x to %u\n", c, fl.suffix);
-}
-
-static
-void addFlood(vector<FDRFlood> &tmpFlood, u8 c, const hwlmLiteral &lit,
- u32 suffix) {
- FDRFlood &fl = tmpFlood[c];
- fl.suffix = MAX(fl.suffix, suffix + 1);
- if (fl.idCount < FDR_FLOOD_MAX_IDS) {
- fl.ids[fl.idCount] = lit.id;
- fl.allGroups |= lit.groups;
- fl.groups[fl.idCount] = lit.groups;
- // when idCount gets to max_ids this flood no longer happens
- // only incremented one more time to avoid arithmetic overflow
- DEBUG_PRINTF("Added Flood for char '%c' suffix=%u len[%hu]=%u\n",
+}
+
+static
+void addFlood(vector<FDRFlood> &tmpFlood, u8 c, const hwlmLiteral &lit,
+ u32 suffix) {
+ FDRFlood &fl = tmpFlood[c];
+ fl.suffix = MAX(fl.suffix, suffix + 1);
+ if (fl.idCount < FDR_FLOOD_MAX_IDS) {
+ fl.ids[fl.idCount] = lit.id;
+ fl.allGroups |= lit.groups;
+ fl.groups[fl.idCount] = lit.groups;
+ // when idCount gets to max_ids this flood no longer happens
+ // only incremented one more time to avoid arithmetic overflow
+ DEBUG_PRINTF("Added Flood for char '%c' suffix=%u len[%hu]=%u\n",
c, fl.suffix, fl.idCount, suffix);
- fl.idCount++;
- }
-}
-
+ fl.idCount++;
+ }
+}
+
bytecode_ptr<u8> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
const EngineDescription &eng,
const Grey &grey) {
- vector<FDRFlood> tmpFlood(N_CHARS);
- u32 default_suffix = eng.getDefaultFloodSuffixLength();
-
- // zero everything to avoid spurious distinctions in the compares
- memset(&tmpFlood[0], 0, N_CHARS * sizeof(FDRFlood));
-
- for (u32 c = 0; c < N_CHARS; c++) {
- tmpFlood[c].suffix = default_suffix;
- }
-
- for (const auto &lit : lits) {
- DEBUG_PRINTF("lit: '%s'%s\n", escapeString(lit.s).c_str(),
- lit.nocase ? " (nocase)" : "");
- u32 litSize = verify_u32(lit.s.size());
- u32 maskSize = (u32)lit.msk.size();
- u8 c = lit.s[litSize - 1];
- bool nocase = ourisalpha(c) ? lit.nocase : false;
-
- if (nocase && maskSize && (lit.msk[maskSize - 1] & CASE_BIT)) {
- c = (lit.cmp[maskSize - 1] & CASE_BIT) ? mytolower(c) : mytoupper(c);
- nocase = false;
- }
-
- u32 iEnd = MAX(litSize, maskSize);
- u32 upSuffix = iEnd; // upSuffix is used as an upper case suffix length
- // for case-less, or as a suffix length for case-sensitive;
- u32 loSuffix = iEnd; // loSuffix used only for case-less as a lower case suffix
- // length;
-
- for (u32 i = 0; i < iEnd; i++) {
- if (i < litSize) {
- if (isDifferent(c, lit.s[litSize - i - 1], lit.nocase)) {
+ vector<FDRFlood> tmpFlood(N_CHARS);
+ u32 default_suffix = eng.getDefaultFloodSuffixLength();
+
+ // zero everything to avoid spurious distinctions in the compares
+ memset(&tmpFlood[0], 0, N_CHARS * sizeof(FDRFlood));
+
+ for (u32 c = 0; c < N_CHARS; c++) {
+ tmpFlood[c].suffix = default_suffix;
+ }
+
+ for (const auto &lit : lits) {
+ DEBUG_PRINTF("lit: '%s'%s\n", escapeString(lit.s).c_str(),
+ lit.nocase ? " (nocase)" : "");
+ u32 litSize = verify_u32(lit.s.size());
+ u32 maskSize = (u32)lit.msk.size();
+ u8 c = lit.s[litSize - 1];
+ bool nocase = ourisalpha(c) ? lit.nocase : false;
+
+ if (nocase && maskSize && (lit.msk[maskSize - 1] & CASE_BIT)) {
+ c = (lit.cmp[maskSize - 1] & CASE_BIT) ? mytolower(c) : mytoupper(c);
+ nocase = false;
+ }
+
+ u32 iEnd = MAX(litSize, maskSize);
+ u32 upSuffix = iEnd; // upSuffix is used as an upper case suffix length
+ // for case-less, or as a suffix length for case-sensitive;
+ u32 loSuffix = iEnd; // loSuffix used only for case-less as a lower case suffix
+ // length;
+
+ for (u32 i = 0; i < iEnd; i++) {
+ if (i < litSize) {
+ if (isDifferent(c, lit.s[litSize - i - 1], lit.nocase)) {
DEBUG_PRINTF("non-flood char in literal[%u]: "
"0x%02x != 0x%02x\n",
i, c, lit.s[litSize - i - 1]);
- upSuffix = MIN(upSuffix, i);
- loSuffix = MIN(loSuffix, i); // makes sense only for case-less
- break;
- }
- }
- if (i < maskSize) {
- u8 m = lit.msk[maskSize - i - 1];
- u8 cm = lit.cmp[maskSize - i - 1] & m;
- if(nocase) {
- if ((mytoupper(c) & m) != cm) {
- DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n",
- i, mytoupper(c), cm);
- upSuffix = MIN(upSuffix, i);
- }
- if ((mytolower(c) & m) != cm) {
- DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n",
- i, mytolower(c), cm);
- loSuffix = MIN(loSuffix, i);
- }
- if (loSuffix != iEnd && upSuffix != iEnd) {
- break;
- }
- } else if ((c & m) != cm) {
- DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n", i, c, cm);
- upSuffix = MIN(upSuffix, i);
- break;
- }
- }
- }
- if(upSuffix != iEnd) {
- updateFloodSuffix(tmpFlood, nocase ? mytoupper(c) : c, upSuffix);
- } else {
- addFlood(tmpFlood, nocase ? mytoupper(c) : c, lit, upSuffix);
- }
- if (nocase) {
- if(loSuffix != iEnd) {
- updateFloodSuffix(tmpFlood, mytolower(c), loSuffix);
- } else {
- addFlood(tmpFlood, mytolower(c), lit, loSuffix);
- }
- }
- }
-
-#ifdef DEBUG
- for (u32 i = 0; i < N_CHARS; i++) {
- FDRFlood &fl = tmpFlood[i];
- if (!fl.idCount) {
- continue;
- }
-
- printf("i is %02x fl->idCount is %hd fl->suffix is %d fl->allGroups is "
- "%016llx\n", i, fl.idCount, fl.suffix, fl.allGroups);
- for (u32 j = 0; j < fl.idCount; j++) {
+ upSuffix = MIN(upSuffix, i);
+ loSuffix = MIN(loSuffix, i); // makes sense only for case-less
+ break;
+ }
+ }
+ if (i < maskSize) {
+ u8 m = lit.msk[maskSize - i - 1];
+ u8 cm = lit.cmp[maskSize - i - 1] & m;
+ if(nocase) {
+ if ((mytoupper(c) & m) != cm) {
+ DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n",
+ i, mytoupper(c), cm);
+ upSuffix = MIN(upSuffix, i);
+ }
+ if ((mytolower(c) & m) != cm) {
+ DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n",
+ i, mytolower(c), cm);
+ loSuffix = MIN(loSuffix, i);
+ }
+ if (loSuffix != iEnd && upSuffix != iEnd) {
+ break;
+ }
+ } else if ((c & m) != cm) {
+ DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n", i, c, cm);
+ upSuffix = MIN(upSuffix, i);
+ break;
+ }
+ }
+ }
+ if(upSuffix != iEnd) {
+ updateFloodSuffix(tmpFlood, nocase ? mytoupper(c) : c, upSuffix);
+ } else {
+ addFlood(tmpFlood, nocase ? mytoupper(c) : c, lit, upSuffix);
+ }
+ if (nocase) {
+ if(loSuffix != iEnd) {
+ updateFloodSuffix(tmpFlood, mytolower(c), loSuffix);
+ } else {
+ addFlood(tmpFlood, mytolower(c), lit, loSuffix);
+ }
+ }
+ }
+
+#ifdef DEBUG
+ for (u32 i = 0; i < N_CHARS; i++) {
+ FDRFlood &fl = tmpFlood[i];
+ if (!fl.idCount) {
+ continue;
+ }
+
+ printf("i is %02x fl->idCount is %hd fl->suffix is %d fl->allGroups is "
+ "%016llx\n", i, fl.idCount, fl.suffix, fl.allGroups);
+ for (u32 j = 0; j < fl.idCount; j++) {
printf("j is %d fl.groups[j] %016llx\n", j, fl.groups[j]);
- }
- }
-#endif
-
+ }
+ }
+#endif
+
// If flood detection has been switched off in the grey box, we comply by
// setting idCount too high for all floods.
if (!grey.fdrAllowFlood) {
@@ -194,38 +194,38 @@ bytecode_ptr<u8> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
}
}
- map<FDRFlood, CharReach, FloodComparator> flood2chars;
- for (u32 i = 0; i < N_CHARS; i++) {
- FDRFlood fl = tmpFlood[i];
- flood2chars[fl].set(i);
- }
-
- u32 nDistinctFloods = flood2chars.size();
- size_t floodHeaderSize = sizeof(u32) * N_CHARS;
- size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods;
- size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize);
+ map<FDRFlood, CharReach, FloodComparator> flood2chars;
+ for (u32 i = 0; i < N_CHARS; i++) {
+ FDRFlood fl = tmpFlood[i];
+ flood2chars[fl].set(i);
+ }
+
+ u32 nDistinctFloods = flood2chars.size();
+ size_t floodHeaderSize = sizeof(u32) * N_CHARS;
+ size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods;
+ size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize);
auto buf = make_zeroed_bytecode_ptr<u8>(totalSize, 16);
- assert(buf); // otherwise would have thrown std::bad_alloc
-
+ assert(buf); // otherwise would have thrown std::bad_alloc
+
u32 *floodHeader = (u32 *)buf.get();
FDRFlood *layoutFlood = (FDRFlood *)(buf.get() + floodHeaderSize);
-
- u32 currentFloodIndex = 0;
- for (const auto &m : flood2chars) {
- const FDRFlood &fl = m.first;
- const CharReach &cr = m.second;
- layoutFlood[currentFloodIndex] = fl;
- for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
- floodHeader[c] = currentFloodIndex;
- }
- currentFloodIndex++;
- }
-
- DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n",
- floodHeaderSize, floodStructSize, totalSize);
-
+
+ u32 currentFloodIndex = 0;
+ for (const auto &m : flood2chars) {
+ const FDRFlood &fl = m.first;
+ const CharReach &cr = m.second;
+ layoutFlood[currentFloodIndex] = fl;
+ for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
+ floodHeader[c] = currentFloodIndex;
+ }
+ currentFloodIndex++;
+ }
+
+ DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n",
+ floodHeaderSize, floodStructSize, totalSize);
+
return buf;
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/fdr/flood_runtime.h b/contrib/libs/hyperscan/src/fdr/flood_runtime.h
index 85353edeca..2d5a32d92a 100644
--- a/contrib/libs/hyperscan/src/fdr/flood_runtime.h
+++ b/contrib/libs/hyperscan/src/fdr/flood_runtime.h
@@ -1,337 +1,337 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef FLOOD_RUNTIME
-#define FLOOD_RUNTIME
-
-#if defined(ARCH_64_BIT)
-#define FLOOD_64
-#else
-#define FLOOD_32
-#endif
-#define FLOOD_MINIMUM_SIZE 256
-#define FLOOD_BACKOFF_START 32
-
-static really_inline
-const u8 * nextFloodDetect(const u8 * buf, size_t len, u32 floodBackoff) {
- // if we don't have a flood at either the start or end,
- // or have a very small buffer, don't bother with flood detection
- if (len < FLOOD_MINIMUM_SIZE) {
- return buf + len;
- }
-
- /* entry points in runtime.c prefetch relevant data */
-#ifndef FLOOD_32
- u64a x11 = *(const u64a *)ROUNDUP_PTR(buf, 8);
- u64a x12 = *(const u64a *)ROUNDUP_PTR(buf+8, 8);
- if (x11 == x12) {
- return buf + floodBackoff;
- }
- u64a x21 = *(const u64a *)ROUNDUP_PTR(buf + len/2, 8);
- u64a x22 = *(const u64a *)ROUNDUP_PTR(buf + len/2 + 8, 8);
- if (x21 == x22) {
- return buf + floodBackoff;
- }
- u64a x31 = *(const u64a *)ROUNDUP_PTR(buf + len - 24, 8);
- u64a x32 = *(const u64a *)ROUNDUP_PTR(buf + len - 16, 8);
- if (x31 == x32) {
- return buf + floodBackoff;
- }
-#else
- u32 x11 = *(const u32 *)ROUNDUP_PTR(buf, 4);
- u32 x12 = *(const u32 *)ROUNDUP_PTR(buf+4, 4);
- if (x11 == x12) {
- return buf + floodBackoff;
- }
- u32 x21 = *(const u32 *)ROUNDUP_PTR(buf + len/2, 4);
- u32 x22 = *(const u32 *)ROUNDUP_PTR(buf + len/2 + 4, 4);
- if (x21 == x22) {
- return buf + floodBackoff;
- }
- u32 x31 = *(const u32 *)ROUNDUP_PTR(buf + len - 12, 4);
- u32 x32 = *(const u32 *)ROUNDUP_PTR(buf + len - 8, 4);
- if (x31 == x32) {
- return buf + floodBackoff;
- }
-#endif
- return buf + len;
-}
-
-static really_inline
-const u8 * floodDetect(const struct FDR * fdr,
- const struct FDR_Runtime_Args * a,
- const u8 ** ptrPtr,
- const u8 * tryFloodDetect,
- u32 * floodBackoffPtr,
- hwlmcb_rv_t * control,
- u32 iterBytes) {
- DEBUG_PRINTF("attempting flood detection at %p\n", tryFloodDetect);
- const u8 * buf = a->buf;
- const size_t len = a->len;
- HWLMCallback cb = a->cb;
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FLOOD_RUNTIME
+#define FLOOD_RUNTIME
+
+#if defined(ARCH_64_BIT)
+#define FLOOD_64
+#else
+#define FLOOD_32
+#endif
+#define FLOOD_MINIMUM_SIZE 256
+#define FLOOD_BACKOFF_START 32
+
+static really_inline
+const u8 * nextFloodDetect(const u8 * buf, size_t len, u32 floodBackoff) {
+ // if we don't have a flood at either the start or end,
+ // or have a very small buffer, don't bother with flood detection
+ if (len < FLOOD_MINIMUM_SIZE) {
+ return buf + len;
+ }
+
+ /* entry points in runtime.c prefetch relevant data */
+#ifndef FLOOD_32
+ u64a x11 = *(const u64a *)ROUNDUP_PTR(buf, 8);
+ u64a x12 = *(const u64a *)ROUNDUP_PTR(buf+8, 8);
+ if (x11 == x12) {
+ return buf + floodBackoff;
+ }
+ u64a x21 = *(const u64a *)ROUNDUP_PTR(buf + len/2, 8);
+ u64a x22 = *(const u64a *)ROUNDUP_PTR(buf + len/2 + 8, 8);
+ if (x21 == x22) {
+ return buf + floodBackoff;
+ }
+ u64a x31 = *(const u64a *)ROUNDUP_PTR(buf + len - 24, 8);
+ u64a x32 = *(const u64a *)ROUNDUP_PTR(buf + len - 16, 8);
+ if (x31 == x32) {
+ return buf + floodBackoff;
+ }
+#else
+ u32 x11 = *(const u32 *)ROUNDUP_PTR(buf, 4);
+ u32 x12 = *(const u32 *)ROUNDUP_PTR(buf+4, 4);
+ if (x11 == x12) {
+ return buf + floodBackoff;
+ }
+ u32 x21 = *(const u32 *)ROUNDUP_PTR(buf + len/2, 4);
+ u32 x22 = *(const u32 *)ROUNDUP_PTR(buf + len/2 + 4, 4);
+ if (x21 == x22) {
+ return buf + floodBackoff;
+ }
+ u32 x31 = *(const u32 *)ROUNDUP_PTR(buf + len - 12, 4);
+ u32 x32 = *(const u32 *)ROUNDUP_PTR(buf + len - 8, 4);
+ if (x31 == x32) {
+ return buf + floodBackoff;
+ }
+#endif
+ return buf + len;
+}
+
+static really_inline
+const u8 * floodDetect(const struct FDR * fdr,
+ const struct FDR_Runtime_Args * a,
+ const u8 ** ptrPtr,
+ const u8 * tryFloodDetect,
+ u32 * floodBackoffPtr,
+ hwlmcb_rv_t * control,
+ u32 iterBytes) {
+ DEBUG_PRINTF("attempting flood detection at %p\n", tryFloodDetect);
+ const u8 * buf = a->buf;
+ const size_t len = a->len;
+ HWLMCallback cb = a->cb;
struct hs_scratch *scratch = a->scratch;
-
- const u8 * ptr = *ptrPtr;
- // tryFloodDetect is never put in places where unconditional
- // reads a short distance forward or backward here
- // TODO: rationale for this line needs to be rediscovered!!
+
+ const u8 * ptr = *ptrPtr;
+ // tryFloodDetect is never put in places where unconditional
+ // reads a short distance forward or backward here
+ // TODO: rationale for this line needs to be rediscovered!!
size_t mainLoopLen = len > 2 * iterBytes ? len - 2 * iterBytes : 0;
- const u32 i = ptr - buf;
- u32 j = i;
-
- // go from c to our FDRFlood structure
- u8 c = buf[i];
- const u8 * fBase = ((const u8 *)fdr) + fdr->floodOffset;
- u32 fIdx = ((const u32 *)fBase)[c];
- const struct FDRFlood * fsb = (const struct FDRFlood *)(fBase + sizeof(u32) * 256);
- const struct FDRFlood * fl = &fsb[fIdx];
-
-#ifndef FLOOD_32
- u64a cmpVal = c;
- cmpVal |= cmpVal << 8;
- cmpVal |= cmpVal << 16;
- cmpVal |= cmpVal << 32;
- u64a probe = *(const u64a *)ROUNDUP_PTR(buf+i, 8);
-#else
- u32 cmpVal = c;
- cmpVal |= cmpVal << 8;
- cmpVal |= cmpVal << 16;
- u32 probe = *(const u32 *)ROUNDUP_PTR(buf+i, 4);
-#endif
-
- if ((probe != cmpVal) || (fl->idCount >= FDR_FLOOD_MAX_IDS)) {
- *floodBackoffPtr *= 2;
- goto floodout;
- }
-
- if (i < fl->suffix + 7) {
- *floodBackoffPtr *= 2;
- goto floodout;
- }
-
- j = i - fl->suffix;
-
-#ifndef FLOOD_32
- j -= (u32)((uintptr_t)buf + j) & 0x7; // push j back to yield 8-aligned addrs
- for (; j + 32 < mainLoopLen; j += 32) {
- u64a v = *(const u64a *)(buf + j);
- u64a v2 = *(const u64a *)(buf + j + 8);
- u64a v3 = *(const u64a *)(buf + j + 16);
- u64a v4 = *(const u64a *)(buf + j + 24);
- if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) {
- break;
- }
- }
- for (; j + 8 < mainLoopLen; j += 8) {
- u64a v = *(const u64a *)(buf + j);
- if (v != cmpVal) {
- break;
- }
- }
-#else
- j -= (u32)((size_t)buf + j) & 0x3; // push j back to yield 4-aligned addrs
- for (; j + 16 < mainLoopLen; j += 16) {
- u32 v = *(const u32 *)(buf + j);
- u32 v2 = *(const u32 *)(buf + j + 4);
- u32 v3 = *(const u32 *)(buf + j + 8);
- u32 v4 = *(const u32 *)(buf + j + 12);
- if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) {
- break;
- }
- }
- for (; j + 4 < mainLoopLen; j += 4) {
- u32 v = *(const u32 *)(buf + j);
- if (v != cmpVal) {
- break;
- }
- }
-#endif
- for (; j < mainLoopLen; j++) {
- u8 v = *(const u8 *)(buf + j);
- if (v != c) {
- break;
- }
- }
- if (j > i ) {
- j--; // needed for some reaches
- u32 itersAhead = (j-i)/iterBytes;
- u32 floodSize = itersAhead*iterBytes;
-
- DEBUG_PRINTF("flooding %u size j %u i %u fl->idCount %hu "
- "*control %016llx fl->allGroups %016llx\n",
- floodSize, j, i, fl->idCount, *control, fl->allGroups);
- DEBUG_PRINTF("mainloopLen %zu mainStart ??? mainEnd ??? len %zu\n",
- mainLoopLen, len);
-
- if (fl->idCount && (*control & fl->allGroups)) {
- switch (fl->idCount) {
-#if !defined(FLOOD_DEBUG)
- // Carefully unrolled code
- case 1:
- for (u32 t = 0; t < floodSize && (*control & fl->allGroups);
- t += 4) {
- DEBUG_PRINTF("aaa %u %llx\n", t, fl->groups[0]);
- if (*control & fl->groups[0]) {
+ const u32 i = ptr - buf;
+ u32 j = i;
+
+ // go from c to our FDRFlood structure
+ u8 c = buf[i];
+ const u8 * fBase = ((const u8 *)fdr) + fdr->floodOffset;
+ u32 fIdx = ((const u32 *)fBase)[c];
+ const struct FDRFlood * fsb = (const struct FDRFlood *)(fBase + sizeof(u32) * 256);
+ const struct FDRFlood * fl = &fsb[fIdx];
+
+#ifndef FLOOD_32
+ u64a cmpVal = c;
+ cmpVal |= cmpVal << 8;
+ cmpVal |= cmpVal << 16;
+ cmpVal |= cmpVal << 32;
+ u64a probe = *(const u64a *)ROUNDUP_PTR(buf+i, 8);
+#else
+ u32 cmpVal = c;
+ cmpVal |= cmpVal << 8;
+ cmpVal |= cmpVal << 16;
+ u32 probe = *(const u32 *)ROUNDUP_PTR(buf+i, 4);
+#endif
+
+ if ((probe != cmpVal) || (fl->idCount >= FDR_FLOOD_MAX_IDS)) {
+ *floodBackoffPtr *= 2;
+ goto floodout;
+ }
+
+ if (i < fl->suffix + 7) {
+ *floodBackoffPtr *= 2;
+ goto floodout;
+ }
+
+ j = i - fl->suffix;
+
+#ifndef FLOOD_32
+ j -= (u32)((uintptr_t)buf + j) & 0x7; // push j back to yield 8-aligned addrs
+ for (; j + 32 < mainLoopLen; j += 32) {
+ u64a v = *(const u64a *)(buf + j);
+ u64a v2 = *(const u64a *)(buf + j + 8);
+ u64a v3 = *(const u64a *)(buf + j + 16);
+ u64a v4 = *(const u64a *)(buf + j + 24);
+ if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) {
+ break;
+ }
+ }
+ for (; j + 8 < mainLoopLen; j += 8) {
+ u64a v = *(const u64a *)(buf + j);
+ if (v != cmpVal) {
+ break;
+ }
+ }
+#else
+ j -= (u32)((size_t)buf + j) & 0x3; // push j back to yield 4-aligned addrs
+ for (; j + 16 < mainLoopLen; j += 16) {
+ u32 v = *(const u32 *)(buf + j);
+ u32 v2 = *(const u32 *)(buf + j + 4);
+ u32 v3 = *(const u32 *)(buf + j + 8);
+ u32 v4 = *(const u32 *)(buf + j + 12);
+ if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) {
+ break;
+ }
+ }
+ for (; j + 4 < mainLoopLen; j += 4) {
+ u32 v = *(const u32 *)(buf + j);
+ if (v != cmpVal) {
+ break;
+ }
+ }
+#endif
+ for (; j < mainLoopLen; j++) {
+ u8 v = *(const u8 *)(buf + j);
+ if (v != c) {
+ break;
+ }
+ }
+ if (j > i ) {
+ j--; // needed for some reaches
+ u32 itersAhead = (j-i)/iterBytes;
+ u32 floodSize = itersAhead*iterBytes;
+
+ DEBUG_PRINTF("flooding %u size j %u i %u fl->idCount %hu "
+ "*control %016llx fl->allGroups %016llx\n",
+ floodSize, j, i, fl->idCount, *control, fl->allGroups);
+ DEBUG_PRINTF("mainloopLen %zu mainStart ??? mainEnd ??? len %zu\n",
+ mainLoopLen, len);
+
+ if (fl->idCount && (*control & fl->allGroups)) {
+ switch (fl->idCount) {
+#if !defined(FLOOD_DEBUG)
+ // Carefully unrolled code
+ case 1:
+ for (u32 t = 0; t < floodSize && (*control & fl->allGroups);
+ t += 4) {
+ DEBUG_PRINTF("aaa %u %llx\n", t, fl->groups[0]);
+ if (*control & fl->groups[0]) {
*control = cb(i + t + 0, fl->ids[0], scratch);
- }
- if (*control & fl->groups[0]) {
+ }
+ if (*control & fl->groups[0]) {
*control = cb(i + t + 1, fl->ids[0], scratch);
- }
- if (*control & fl->groups[0]) {
+ }
+ if (*control & fl->groups[0]) {
*control = cb(i + t + 2, fl->ids[0], scratch);
- }
- if (*control & fl->groups[0]) {
+ }
+ if (*control & fl->groups[0]) {
*control = cb(i + t + 3, fl->ids[0], scratch);
- }
- }
- break;
- case 2:
- for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 4) {
- if (*control & fl->groups[0]) {
+ }
+ }
+ break;
+ case 2:
+ for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 4) {
+ if (*control & fl->groups[0]) {
*control = cb(i + t, fl->ids[0], scratch);
- }
- if (*control & fl->groups[1]) {
+ }
+ if (*control & fl->groups[1]) {
*control = cb(i + t, fl->ids[1], scratch);
- }
- if (*control & fl->groups[0]) {
- *control =
+ }
+ if (*control & fl->groups[0]) {
+ *control =
cb(i + t + 1, fl->ids[0], scratch);
- }
- if (*control & fl->groups[1]) {
+ }
+ if (*control & fl->groups[1]) {
*control = cb(i + t + 1, fl->ids[1], scratch);
- }
- if (*control & fl->groups[0]) {
+ }
+ if (*control & fl->groups[0]) {
*control = cb(i + t + 2, fl->ids[0], scratch);
- }
- if (*control & fl->groups[1]) {
+ }
+ if (*control & fl->groups[1]) {
*control = cb(i + t + 2, fl->ids[1], scratch);
- }
- if (*control & fl->groups[0]) {
+ }
+ if (*control & fl->groups[0]) {
*control = cb(i + t + 3, fl->ids[0], scratch);
- }
- if (*control & fl->groups[1]) {
+ }
+ if (*control & fl->groups[1]) {
*control = cb(i + t + 3, fl->ids[1], scratch);
- }
- }
- break;
- case 3:
- for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
- if (*control & fl->groups[0]) {
+ }
+ }
+ break;
+ case 3:
+ for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
+ if (*control & fl->groups[0]) {
*control = cb(i + t, fl->ids[0], scratch);
- }
- if (*control & fl->groups[1]) {
+ }
+ if (*control & fl->groups[1]) {
*control = cb(i + t, fl->ids[1], scratch);
- }
- if (*control & fl->groups[2]) {
+ }
+ if (*control & fl->groups[2]) {
*control = cb(i + t, fl->ids[2], scratch);
- }
- if (*control & fl->groups[0]) {
+ }
+ if (*control & fl->groups[0]) {
*control = cb(i + t + 1, fl->ids[0], scratch);
- }
- if (*control & fl->groups[1]) {
+ }
+ if (*control & fl->groups[1]) {
*control = cb(i + t + 1, fl->ids[1], scratch);
- }
- if (*control & fl->groups[2]) {
+ }
+ if (*control & fl->groups[2]) {
*control = cb(i + t + 1, fl->ids[2], scratch);
- }
- }
- break;
- default:
- // slow generalized loop
- for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
-
- if (*control & fl->groups[0]) {
+ }
+ }
+ break;
+ default:
+ // slow generalized loop
+ for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
+
+ if (*control & fl->groups[0]) {
*control = cb(i + t, fl->ids[0], scratch);
- }
- if (*control & fl->groups[1]) {
+ }
+ if (*control & fl->groups[1]) {
*control = cb(i + t, fl->ids[1], scratch);
- }
- if (*control & fl->groups[2]) {
+ }
+ if (*control & fl->groups[2]) {
*control = cb(i + t, fl->ids[2], scratch);
- }
- if (*control & fl->groups[3]) {
+ }
+ if (*control & fl->groups[3]) {
*control = cb(i + t, fl->ids[3], scratch);
- }
-
- for (u32 t2 = 4; t2 < fl->idCount; t2++) {
- if (*control & fl->groups[t2]) {
+ }
+
+ for (u32 t2 = 4; t2 < fl->idCount; t2++) {
+ if (*control & fl->groups[t2]) {
*control = cb(i + t, fl->ids[t2], scratch);
- }
- }
-
- if (*control & fl->groups[0]) {
+ }
+ }
+
+ if (*control & fl->groups[0]) {
*control = cb(i + t + 1, fl->ids[0], scratch);
- }
- if (*control & fl->groups[1]) {
+ }
+ if (*control & fl->groups[1]) {
*control = cb(i + t + 1, fl->ids[1], scratch);
- }
- if (*control & fl->groups[2]) {
+ }
+ if (*control & fl->groups[2]) {
*control = cb(i + t + 1, fl->ids[2], scratch);
- }
- if (*control & fl->groups[3]) {
+ }
+ if (*control & fl->groups[3]) {
*control = cb(i + t + 1, fl->ids[3], scratch);
- }
-
- for (u32 t2 = 4; t2 < fl->idCount; t2++) {
- if (*control & fl->groups[t2]) {
+ }
+
+ for (u32 t2 = 4; t2 < fl->idCount; t2++) {
+ if (*control & fl->groups[t2]) {
*control = cb(i + t + 1, fl->ids[t2], scratch);
- }
- }
- }
- break;
-#else
- // Fallback for debugging
- default:
- for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t++) {
- for (u32 t2 = 0; t2 < fl->idCount; t2++) {
- if (*control & fl->groups[t2]) {
+ }
+ }
+ }
+ break;
+#else
+ // Fallback for debugging
+ default:
+ for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t++) {
+ for (u32 t2 = 0; t2 < fl->idCount; t2++) {
+ if (*control & fl->groups[t2]) {
*control = cb(i + t, fl->ids[t2], scratch);
- }
- }
- }
-#endif
- }
- }
- ptr += floodSize;
- } else {
- *floodBackoffPtr *= 2;
- }
-
-floodout:
- if (j + *floodBackoffPtr < mainLoopLen - 128) {
- tryFloodDetect = buf + MAX(i,j) + *floodBackoffPtr;
- } else {
- tryFloodDetect = buf + mainLoopLen; // set so we never do another flood detect
- }
- *ptrPtr = ptr;
- DEBUG_PRINTF("finished flood detection at %p (next check %p)\n",
- ptr, tryFloodDetect);
- return tryFloodDetect;
-}
-
-#endif
+ }
+ }
+ }
+#endif
+ }
+ }
+ ptr += floodSize;
+ } else {
+ *floodBackoffPtr *= 2;
+ }
+
+floodout:
+ if (j + *floodBackoffPtr < mainLoopLen - 128) {
+ tryFloodDetect = buf + MAX(i,j) + *floodBackoffPtr;
+ } else {
+ tryFloodDetect = buf + mainLoopLen; // set so we never do another flood detect
+ }
+ *ptrPtr = ptr;
+ DEBUG_PRINTF("finished flood detection at %p (next check %p)\n",
+ ptr, tryFloodDetect);
+ return tryFloodDetect;
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/fdr/teddy.c b/contrib/libs/hyperscan/src/fdr/teddy.c
index 8aaf69b26c..e6f5476198 100644
--- a/contrib/libs/hyperscan/src/fdr/teddy.c
+++ b/contrib/libs/hyperscan/src/fdr/teddy.c
@@ -1,31 +1,31 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/** \file
* \brief Teddy literal matcher: SSSE3 engine runtime.
*/
@@ -35,8 +35,8 @@
#include "teddy.h"
#include "teddy_internal.h"
#include "teddy_runtime_common.h"
-#include "util/simd_utils.h"
-
+#include "util/simd_utils.h"
+
const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
@@ -72,8 +72,8 @@ const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff},
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
-};
-
+};
+
#if defined(HAVE_AVX512VBMI) // VBMI strong teddy
#define CONF_CHUNK_64(chunk, bucket, off, reason, pt, conf_fn) \
@@ -107,7 +107,7 @@ do { \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
} while(0)
-
+
#define CONF_CHUNK_32(chunk, bucket, off, reason, conf_fn) \
do { \
if (unlikely(chunk != ones_u32)) { \
@@ -117,7 +117,7 @@ do { \
CHECK_HWLM_TERMINATE_MATCHING; \
} \
} while(0)
-
+
#endif
#if defined(HAVE_AVX512VBMI) // VBMI strong teddy
@@ -496,8 +496,8 @@ m512 prep_conf_teddy_no_reinforcement_m1(const m512 *lo_mask,
const m512 val) {
PREP_SHUF_MASK_NO_REINFORCEMENT(val);
return SHIFT_OR_M1;
-}
-
+}
+
static really_inline
m512 prep_conf_teddy_no_reinforcement_m2(const m512 *lo_mask,
const m512 *dup_mask,
@@ -505,7 +505,7 @@ m512 prep_conf_teddy_no_reinforcement_m2(const m512 *lo_mask,
PREP_SHUF_MASK_NO_REINFORCEMENT(val);
return SHIFT_OR_M2;
}
-
+
static really_inline
m512 prep_conf_teddy_no_reinforcement_m3(const m512 *lo_mask,
const m512 *dup_mask,
@@ -513,15 +513,15 @@ m512 prep_conf_teddy_no_reinforcement_m3(const m512 *lo_mask,
PREP_SHUF_MASK_NO_REINFORCEMENT(val);
return SHIFT_OR_M3;
}
-
+
static really_inline
m512 prep_conf_teddy_no_reinforcement_m4(const m512 *lo_mask,
const m512 *dup_mask,
const m512 val) {
PREP_SHUF_MASK_NO_REINFORCEMENT(val);
return SHIFT_OR_M4;
-}
-
+}
+
static really_inline
m512 prep_conf_teddy_m1(const m512 *lo_mask, const m512 *dup_mask,
const u8 *ptr, const u64a *r_msk_base,
@@ -529,7 +529,7 @@ m512 prep_conf_teddy_m1(const m512 *lo_mask, const m512 *dup_mask,
PREP_SHUF_MASK;
return or512(SHIFT_OR_M1, r_msk);
}
-
+
static really_inline
m512 prep_conf_teddy_m2(const m512 *lo_mask, const m512 *dup_mask,
const u8 *ptr, const u64a *r_msk_base,
@@ -537,7 +537,7 @@ m512 prep_conf_teddy_m2(const m512 *lo_mask, const m512 *dup_mask,
PREP_SHUF_MASK;
return or512(SHIFT_OR_M2, r_msk);
}
-
+
static really_inline
m512 prep_conf_teddy_m3(const m512 *lo_mask, const m512 *dup_mask,
const u8 *ptr, const u64a *r_msk_base,
@@ -545,7 +545,7 @@ m512 prep_conf_teddy_m3(const m512 *lo_mask, const m512 *dup_mask,
PREP_SHUF_MASK;
return or512(SHIFT_OR_M3, r_msk);
}
-
+
static really_inline
m512 prep_conf_teddy_m4(const m512 *lo_mask, const m512 *dup_mask,
const u8 *ptr, const u64a *r_msk_base,
@@ -553,7 +553,7 @@ m512 prep_conf_teddy_m4(const m512 *lo_mask, const m512 *dup_mask,
PREP_SHUF_MASK;
return or512(SHIFT_OR_M4, r_msk);
}
-
+
#define PREP_CONF_FN_NO_REINFORCEMENT(val, n) \
prep_conf_teddy_no_reinforcement_m##n(&lo_mask, dup_mask, val)
@@ -732,8 +732,8 @@ m256 prep_conf_teddy_no_reinforcement_m1(const m256 *lo_mask,
const m256 val) {
PREP_SHUF_MASK_NO_REINFORCEMENT(val);
return SHIFT_OR_M1;
-}
-
+}
+
static really_inline
m256 prep_conf_teddy_no_reinforcement_m2(const m256 *lo_mask,
const m256 *dup_mask,
@@ -741,7 +741,7 @@ m256 prep_conf_teddy_no_reinforcement_m2(const m256 *lo_mask,
PREP_SHUF_MASK_NO_REINFORCEMENT(val);
return SHIFT_OR_M2;
}
-
+
static really_inline
m256 prep_conf_teddy_no_reinforcement_m3(const m256 *lo_mask,
const m256 *dup_mask,
@@ -749,7 +749,7 @@ m256 prep_conf_teddy_no_reinforcement_m3(const m256 *lo_mask,
PREP_SHUF_MASK_NO_REINFORCEMENT(val);
return SHIFT_OR_M3;
}
-
+
static really_inline
m256 prep_conf_teddy_no_reinforcement_m4(const m256 *lo_mask,
const m256 *dup_mask,
@@ -757,7 +757,7 @@ m256 prep_conf_teddy_no_reinforcement_m4(const m256 *lo_mask,
PREP_SHUF_MASK_NO_REINFORCEMENT(val);
return SHIFT_OR_M4;
}
-
+
static really_inline
m256 prep_conf_teddy_m1(const m256 *lo_mask, const m256 *dup_mask,
const u8 *ptr, const u64a *r_msk_base,
@@ -765,7 +765,7 @@ m256 prep_conf_teddy_m1(const m256 *lo_mask, const m256 *dup_mask,
PREP_SHUF_MASK;
return or256(SHIFT_OR_M1, r_msk);
}
-
+
static really_inline
m256 prep_conf_teddy_m2(const m256 *lo_mask, const m256 *dup_mask,
const u8 *ptr, const u64a *r_msk_base,
@@ -773,7 +773,7 @@ m256 prep_conf_teddy_m2(const m256 *lo_mask, const m256 *dup_mask,
PREP_SHUF_MASK;
return or256(SHIFT_OR_M2, r_msk);
}
-
+
static really_inline
m256 prep_conf_teddy_m3(const m256 *lo_mask, const m256 *dup_mask,
const u8 *ptr, const u64a *r_msk_base,
@@ -781,7 +781,7 @@ m256 prep_conf_teddy_m3(const m256 *lo_mask, const m256 *dup_mask,
PREP_SHUF_MASK;
return or256(SHIFT_OR_M3, r_msk);
}
-
+
static really_inline
m256 prep_conf_teddy_m4(const m256 *lo_mask, const m256 *dup_mask,
const u8 *ptr, const u64a *r_msk_base,
diff --git a/contrib/libs/hyperscan/src/fdr/teddy_compile.cpp b/contrib/libs/hyperscan/src/fdr/teddy_compile.cpp
index 0e252b494b..eae9c2c136 100644
--- a/contrib/libs/hyperscan/src/fdr/teddy_compile.cpp
+++ b/contrib/libs/hyperscan/src/fdr/teddy_compile.cpp
@@ -1,31 +1,31 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
* \brief FDR literal matcher: Teddy build code.
@@ -33,68 +33,68 @@
#include "teddy_compile.h"
-#include "fdr.h"
-#include "fdr_internal.h"
-#include "fdr_compile_internal.h"
-#include "fdr_confirm.h"
-#include "fdr_engine_description.h"
+#include "fdr.h"
+#include "fdr_internal.h"
+#include "fdr_compile_internal.h"
+#include "fdr_confirm.h"
+#include "fdr_engine_description.h"
#include "teddy_internal.h"
#include "teddy_engine_description.h"
#include "grey.h"
-#include "ue2common.h"
+#include "ue2common.h"
#include "hwlm/hwlm_build.h"
-#include "util/alloc.h"
-#include "util/compare.h"
+#include "util/alloc.h"
+#include "util/compare.h"
#include "util/container.h"
#include "util/make_unique.h"
#include "util/noncopyable.h"
-#include "util/popcount.h"
+#include "util/popcount.h"
#include "util/small_vector.h"
-#include "util/target_info.h"
-#include "util/verify_types.h"
-
-#include <algorithm>
-#include <cassert>
-#include <cctype>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <map>
-#include <memory>
-#include <set>
-#include <string>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-namespace {
-
-//#define TEDDY_DEBUG
-
+#include "util/target_info.h"
+#include "util/verify_types.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+namespace {
+
+//#define TEDDY_DEBUG
+
/** \brief Max number of Teddy masks we use. */
static constexpr size_t MAX_NUM_MASKS = 4;
class TeddyCompiler : noncopyable {
- const TeddyEngineDescription &eng;
+ const TeddyEngineDescription &eng;
const Grey &grey;
- const vector<hwlmLiteral> &lits;
+ const vector<hwlmLiteral> &lits;
map<BucketIndex, std::vector<LiteralIndex>> bucketToLits;
- bool make_small;
-
-public:
- TeddyCompiler(const vector<hwlmLiteral> &lits_in,
+ bool make_small;
+
+public:
+ TeddyCompiler(const vector<hwlmLiteral> &lits_in,
map<BucketIndex, std::vector<LiteralIndex>> bucketToLits_in,
const TeddyEngineDescription &eng_in, bool make_small_in,
const Grey &grey_in)
: eng(eng_in), grey(grey_in), lits(lits_in),
bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {}
-
+
bytecode_ptr<FDR> build();
-};
-
-class TeddySet {
+};
+
+class TeddySet {
/**
* \brief Estimate of the max number of literals in a set, used to
* minimise allocations.
@@ -102,7 +102,7 @@ class TeddySet {
static constexpr size_t LITS_PER_SET = 20;
/** \brief Number of masks. */
- u32 len;
+ u32 len;
/**
* \brief A series of bitfields over 16 predicates that represent the
@@ -119,90 +119,90 @@ class TeddySet {
*/
small_vector<u32, LITS_PER_SET> litIds;
-public:
+public:
explicit TeddySet(u32 len_in) : len(len_in), nibbleSets(len_in * 2, 0) {}
- size_t litCount() const { return litIds.size(); }
+ size_t litCount() const { return litIds.size(); }
const small_vector<u32, LITS_PER_SET> &getLits() const { return litIds; }
-
+
bool operator<(const TeddySet &s) const {
- return litIds < s.litIds;
- }
-
-#ifdef TEDDY_DEBUG
- void dump() const {
- printf("TS: ");
- for (u32 i = 0; i < nibbleSets.size(); i++) {
- printf("%04x ", (u32)nibbleSets[i]);
- }
- printf("\nnlits: %zu\nLit ids: ", litCount());
- printf("Prob: %llu\n", probability());
+ return litIds < s.litIds;
+ }
+
+#ifdef TEDDY_DEBUG
+ void dump() const {
+ printf("TS: ");
+ for (u32 i = 0; i < nibbleSets.size(); i++) {
+ printf("%04x ", (u32)nibbleSets[i]);
+ }
+ printf("\nnlits: %zu\nLit ids: ", litCount());
+ printf("Prob: %llu\n", probability());
for (const auto &id : litIds) {
printf("%u ", id);
- }
- printf("\n");
+ }
+ printf("\n");
printf("Flood prone : %s\n", isRunProne() ? "yes" : "no");
- }
-#endif
-
+ }
+#endif
+
bool identicalTail(const TeddySet &ts) const {
- return nibbleSets == ts.nibbleSets;
- }
-
+ return nibbleSets == ts.nibbleSets;
+ }
+
void addLiteral(u32 lit_id, const hwlmLiteral &lit) {
const string &s = lit.s;
- for (u32 i = 0; i < len; i++) {
- if (i < s.size()) {
- u8 c = s[s.size() - i - 1];
- u8 c_hi = (c >> 4) & 0xf;
- u8 c_lo = c & 0xf;
+ for (u32 i = 0; i < len; i++) {
+ if (i < s.size()) {
+ u8 c = s[s.size() - i - 1];
+ u8 c_hi = (c >> 4) & 0xf;
+ u8 c_lo = c & 0xf;
nibbleSets[i * 2] = 1 << c_lo;
if (lit.nocase && ourisalpha(c)) {
nibbleSets[i * 2 + 1] =
(1 << (c_hi & 0xd)) | (1 << (c_hi | 0x2));
- } else {
+ } else {
nibbleSets[i * 2 + 1] = 1 << c_hi;
- }
- } else {
+ }
+ } else {
nibbleSets[i * 2] = nibbleSets[i * 2 + 1] = 0xffff;
- }
- }
+ }
+ }
litIds.push_back(lit_id);
sort_and_unique(litIds);
- }
-
- // return a value p from 0 .. MAXINT64 that gives p/MAXINT64
- // likelihood of this TeddySet firing a first-stage accept
- // if it was given a bucket of its own and random data were
- // to be passed in
- u64a probability() const {
- u64a val = 1;
- for (size_t i = 0; i < nibbleSets.size(); i++) {
- val *= popcount32((u32)nibbleSets[i]);
- }
- return val;
- }
-
- // return a score based around the chance of this hitting times
- // a small fixed cost + the cost of traversing some sort of followup
- // (assumption is that the followup is linear)
- u64a heuristic() const {
+ }
+
+ // return a value p from 0 .. MAXINT64 that gives p/MAXINT64
+ // likelihood of this TeddySet firing a first-stage accept
+ // if it was given a bucket of its own and random data were
+ // to be passed in
+ u64a probability() const {
+ u64a val = 1;
+ for (size_t i = 0; i < nibbleSets.size(); i++) {
+ val *= popcount32((u32)nibbleSets[i]);
+ }
+ return val;
+ }
+
+ // return a score based around the chance of this hitting times
+ // a small fixed cost + the cost of traversing some sort of followup
+ // (assumption is that the followup is linear)
+ u64a heuristic() const {
return probability() * (2 + litCount());
- }
-
- bool isRunProne() const {
- u16 lo_and = 0xffff;
- u16 hi_and = 0xffff;
- for (u32 i = 0; i < len; i++) {
+ }
+
+ bool isRunProne() const {
+ u16 lo_and = 0xffff;
+ u16 hi_and = 0xffff;
+ for (u32 i = 0; i < len; i++) {
lo_and &= nibbleSets[i * 2];
hi_and &= nibbleSets[i * 2 + 1];
- }
- // we're not flood-prone if there's no way to get
- // through with a flood
- if (!lo_and || !hi_and) {
- return false;
- }
- return true;
- }
+ }
+ // we're not flood-prone if there's no way to get
+ // through with a flood
+ if (!lo_and || !hi_and) {
+ return false;
+ }
+ return true;
+ }
friend TeddySet merge(const TeddySet &a, const TeddySet &b) {
assert(a.nibbleSets.size() == b.nibbleSets.size());
@@ -218,127 +218,127 @@ public:
return m;
}
-};
-
+};
+
static
bool pack(const vector<hwlmLiteral> &lits,
const TeddyEngineDescription &eng,
map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits) {
- set<TeddySet> sts;
-
- for (u32 i = 0; i < lits.size(); i++) {
+ set<TeddySet> sts;
+
+ for (u32 i = 0; i < lits.size(); i++) {
TeddySet ts(eng.numMasks);
ts.addLiteral(i, lits[i]);
- sts.insert(ts);
- }
-
- while (1) {
-#ifdef TEDDY_DEBUG
- printf("Size %zu\n", sts.size());
+ sts.insert(ts);
+ }
+
+ while (1) {
+#ifdef TEDDY_DEBUG
+ printf("Size %zu\n", sts.size());
for (const TeddySet &ts : sts) {
printf("\n");
ts.dump();
- }
- printf("\n===============================================\n");
-#endif
-
+ }
+ printf("\n===============================================\n");
+#endif
+
auto m1 = sts.end(), m2 = sts.end();
- u64a best = 0xffffffffffffffffULL;
-
+ u64a best = 0xffffffffffffffffULL;
+
for (auto i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
- const TeddySet &s1 = *i1;
+ const TeddySet &s1 = *i1;
for (auto i2 = next(i1), e2 = sts.end(); i2 != e2; ++i2) {
- const TeddySet &s2 = *i2;
-
- // be more conservative if we don't absolutely need to
- // keep packing
- if ((sts.size() <= eng.getNumBuckets()) &&
- !s1.identicalTail(s2)) {
- continue;
- }
-
+ const TeddySet &s2 = *i2;
+
+ // be more conservative if we don't absolutely need to
+ // keep packing
+ if ((sts.size() <= eng.getNumBuckets()) &&
+ !s1.identicalTail(s2)) {
+ continue;
+ }
+
TeddySet tmpSet = merge(s1, s2);
- u64a newScore = tmpSet.heuristic();
- u64a oldScore = s1.heuristic() + s2.heuristic();
- if (newScore < oldScore) {
- m1 = i1;
- m2 = i2;
- break;
- } else {
- u64a score = newScore - oldScore;
- bool oldRunProne = s1.isRunProne() && s2.isRunProne();
- bool newRunProne = tmpSet.isRunProne();
- if (newRunProne && !oldRunProne) {
- continue;
- }
- if (score < best) {
- best = score;
- m1 = i1;
- m2 = i2;
- }
- }
- }
- }
- // if we didn't find a merge candidate, bail out
- if ((m1 == sts.end()) || (m2 == sts.end())) {
- break;
- }
-
- // do the merge
+ u64a newScore = tmpSet.heuristic();
+ u64a oldScore = s1.heuristic() + s2.heuristic();
+ if (newScore < oldScore) {
+ m1 = i1;
+ m2 = i2;
+ break;
+ } else {
+ u64a score = newScore - oldScore;
+ bool oldRunProne = s1.isRunProne() && s2.isRunProne();
+ bool newRunProne = tmpSet.isRunProne();
+ if (newRunProne && !oldRunProne) {
+ continue;
+ }
+ if (score < best) {
+ best = score;
+ m1 = i1;
+ m2 = i2;
+ }
+ }
+ }
+ }
+ // if we didn't find a merge candidate, bail out
+ if ((m1 == sts.end()) || (m2 == sts.end())) {
+ break;
+ }
+
+ // do the merge
TeddySet nts = merge(*m1, *m2);
-#ifdef TEDDY_DEBUG
- printf("Merging\n");
- printf("m1 = \n");
- m1->dump();
- printf("m2 = \n");
- m2->dump();
- printf("nts = \n");
- nts.dump();
- printf("\n===============================================\n");
-#endif
- sts.erase(m1);
- sts.erase(m2);
- sts.insert(nts);
- }
-
- if (sts.size() > eng.getNumBuckets()) {
- return false;
- }
-
+#ifdef TEDDY_DEBUG
+ printf("Merging\n");
+ printf("m1 = \n");
+ m1->dump();
+ printf("m2 = \n");
+ m2->dump();
+ printf("nts = \n");
+ nts.dump();
+ printf("\n===============================================\n");
+#endif
+ sts.erase(m1);
+ sts.erase(m2);
+ sts.insert(nts);
+ }
+
+ if (sts.size() > eng.getNumBuckets()) {
+ return false;
+ }
+
u32 bucket_id = 0;
for (const TeddySet &ts : sts) {
const auto &ts_lits = ts.getLits();
auto &bucket_lits = bucketToLits[bucket_id];
bucket_lits.insert(end(bucket_lits), begin(ts_lits), end(ts_lits));
bucket_id++;
- }
- return true;
-}
-
+ }
+ return true;
+}
+
// this entry has all-zero mask to skip reinforcement
#define NO_REINFORCEMENT N_CHARS
-
+
// this means every entry in reinforcement table
#define ALL_CHAR_SET N_CHARS
-
+
// each item's reinforcement mask has REINFORCED_MSK_LEN bytes
#define REINFORCED_MSK_LEN 8
-
+
// reinforcement table size for each 8 buckets set
#define RTABLE_SIZE ((N_CHARS + 1) * REINFORCED_MSK_LEN)
-
+
static
void initReinforcedTable(u8 *rmsk) {
u64a *mask = (u64a *)rmsk;
fill_n(mask, N_CHARS, 0x00ffffffffffffffULL);
}
-
+
static
void fillReinforcedMskZero(u8 *rmsk) {
u8 *mc = rmsk + NO_REINFORCEMENT * REINFORCED_MSK_LEN;
fill_n(mc, REINFORCED_MSK_LEN, 0x00);
}
-
+
static
void fillReinforcedMsk(u8 *rmsk, u16 c, u32 j, u8 bmsk) {
assert(j > 0);
@@ -347,12 +347,12 @@ void fillReinforcedMsk(u8 *rmsk, u16 c, u32 j, u8 bmsk) {
u8 *mc = rmsk + i * REINFORCED_MSK_LEN;
mc[j - 1] &= ~bmsk;
}
- } else {
+ } else {
u8 *mc = rmsk + c * REINFORCED_MSK_LEN;
mc[j - 1] &= ~bmsk;
- }
+ }
}
-
+
static
void fillDupNibbleMasks(const map<BucketIndex,
vector<LiteralIndex>> &bucketToLits,
@@ -443,71 +443,71 @@ void fillNibbleMasks(const map<BucketIndex,
u32 numMasks, u32 maskWidth, size_t maskLen,
u8 *baseMsk) {
memset(baseMsk, 0xff, maskLen);
-
+
for (const auto &b2l : bucketToLits) {
const u32 &bucket_id = b2l.first;
const vector<LiteralIndex> &ids = b2l.second;
- const u8 bmsk = 1U << (bucket_id % 8);
-
+ const u8 bmsk = 1U << (bucket_id % 8);
+
for (const LiteralIndex &lit_id : ids) {
const hwlmLiteral &l = lits[lit_id];
- DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
- const u32 sz = verify_u32(l.s.size());
-
- // fill in masks
+ DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
+ const u32 sz = verify_u32(l.s.size());
+
+ // fill in masks
for (u32 j = 0; j < numMasks; j++) {
const u32 msk_id_lo = j * 2 * maskWidth + (bucket_id / 8);
const u32 msk_id_hi = (j * 2 + 1) * maskWidth + (bucket_id / 8);
const u32 lo_base = msk_id_lo * 16;
const u32 hi_base = msk_id_hi * 16;
-
- // if we don't have a char at this position, fill in i
- // locations in these masks with '1'
- if (j >= sz) {
- for (u32 n = 0; n < 16; n++) {
+
+ // if we don't have a char at this position, fill in i
+ // locations in these masks with '1'
+ if (j >= sz) {
+ for (u32 n = 0; n < 16; n++) {
baseMsk[lo_base + n] &= ~bmsk;
baseMsk[hi_base + n] &= ~bmsk;
- }
- } else {
- u8 c = l.s[sz - 1 - j];
- // if we do have a char at this position
- const u32 hiShift = 4;
- u32 n_hi = (c >> hiShift) & 0xf;
- u32 n_lo = c & 0xf;
-
- if (j < l.msk.size() && l.msk[l.msk.size() - 1 - j]) {
- u8 m = l.msk[l.msk.size() - 1 - j];
- u8 m_hi = (m >> hiShift) & 0xf;
- u8 m_lo = m & 0xf;
- u8 cmp = l.cmp[l.msk.size() - 1 - j];
- u8 cmp_lo = cmp & 0xf;
- u8 cmp_hi = (cmp >> hiShift) & 0xf;
-
- for (u8 cm = 0; cm < 0x10; cm++) {
- if ((cm & m_lo) == (cmp_lo & m_lo)) {
+ }
+ } else {
+ u8 c = l.s[sz - 1 - j];
+ // if we do have a char at this position
+ const u32 hiShift = 4;
+ u32 n_hi = (c >> hiShift) & 0xf;
+ u32 n_lo = c & 0xf;
+
+ if (j < l.msk.size() && l.msk[l.msk.size() - 1 - j]) {
+ u8 m = l.msk[l.msk.size() - 1 - j];
+ u8 m_hi = (m >> hiShift) & 0xf;
+ u8 m_lo = m & 0xf;
+ u8 cmp = l.cmp[l.msk.size() - 1 - j];
+ u8 cmp_lo = cmp & 0xf;
+ u8 cmp_hi = (cmp >> hiShift) & 0xf;
+
+ for (u8 cm = 0; cm < 0x10; cm++) {
+ if ((cm & m_lo) == (cmp_lo & m_lo)) {
baseMsk[lo_base + cm] &= ~bmsk;
- }
- if ((cm & m_hi) == (cmp_hi & m_hi)) {
+ }
+ if ((cm & m_hi) == (cmp_hi & m_hi)) {
baseMsk[hi_base + cm] &= ~bmsk;
- }
- }
+ }
+ }
} else {
- if (l.nocase && ourisalpha(c)) {
- u32 cmHalfClear = (0xdf >> hiShift) & 0xf;
+ if (l.nocase && ourisalpha(c)) {
+ u32 cmHalfClear = (0xdf >> hiShift) & 0xf;
u32 cmHalfSet = (0x20 >> hiShift) & 0xf;
baseMsk[hi_base + (n_hi & cmHalfClear)] &= ~bmsk;
baseMsk[hi_base + (n_hi | cmHalfSet)] &= ~bmsk;
- } else {
+ } else {
baseMsk[hi_base + n_hi] &= ~bmsk;
- }
+ }
baseMsk[lo_base + n_lo] &= ~bmsk;
- }
- }
- }
- }
- }
+ }
+ }
+ }
+ }
+ }
}
-
+
static
void fillReinforcedTable(const map<BucketIndex,
vector<LiteralIndex>> &bucketToLits,
@@ -517,7 +517,7 @@ void fillReinforcedTable(const map<BucketIndex,
for (u32 i = 0; i < num_tables; i++) {
tables.push_back(rtable_base + i * RTABLE_SIZE);
}
-
+
for (auto t : tables) {
initReinforcedTable(t);
}
@@ -548,8 +548,8 @@ void fillReinforcedTable(const map<BucketIndex,
fillReinforcedMsk(rmsk, c, j, bmsk);
}
}
- }
- }
+ }
+ }
}
for (auto t : tables) {
@@ -638,20 +638,20 @@ bool assignStringsToBuckets(
for (size_t j = 0; j < lits[i].s.size(); j++) {
printf("%02x", ((u32)lits[i].s[j])&0xff);
}
- printf("\n");
- }
-#endif
-
+ printf("\n");
+ }
+#endif
+
if (!pack(lits, eng, bucketToLits)) {
DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n",
lits.size(), eng.getNumBuckets());
return false;
}
return true;
-}
-
-} // namespace
-
+}
+
+} // namespace
+
bytecode_ptr<FDR> teddyBuildTable(const HWLMProto &proto, const Grey &grey) {
TeddyCompiler tc(proto.lits, proto.bucketToLits, *(proto.teddyEng),
proto.make_small, grey);
@@ -662,15 +662,15 @@ bytecode_ptr<FDR> teddyBuildTable(const HWLMProto &proto, const Grey &grey) {
unique_ptr<HWLMProto> teddyBuildProtoHinted(
u8 engType, const vector<hwlmLiteral> &lits,
bool make_small, u32 hint, const target_t &target) {
- unique_ptr<TeddyEngineDescription> des;
- if (hint == HINT_INVALID) {
- des = chooseTeddyEngine(target, lits);
- } else {
- des = getTeddyDescription(hint);
- }
- if (!des) {
- return nullptr;
- }
+ unique_ptr<TeddyEngineDescription> des;
+ if (hint == HINT_INVALID) {
+ des = chooseTeddyEngine(target, lits);
+ } else {
+ des = getTeddyDescription(hint);
+ }
+ if (!des) {
+ return nullptr;
+ }
map<BucketIndex, std::vector<LiteralIndex>> bucketToLits;
if (!assignStringsToBuckets(lits, *des, bucketToLits)) {
@@ -679,6 +679,6 @@ unique_ptr<HWLMProto> teddyBuildProtoHinted(
return ue2::make_unique<HWLMProto>(engType, move(des), lits,
bucketToLits, make_small);
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/fdr/teddy_compile.h b/contrib/libs/hyperscan/src/fdr/teddy_compile.h
index 63e8d64a87..a2b4a13ca3 100644
--- a/contrib/libs/hyperscan/src/fdr/teddy_compile.h
+++ b/contrib/libs/hyperscan/src/fdr/teddy_compile.h
@@ -1,59 +1,59 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief FDR literal matcher: Teddy build API.
- */
-
-#ifndef TEDDY_COMPILE_H
-#define TEDDY_COMPILE_H
-
-#include "ue2common.h"
+ * \brief FDR literal matcher: Teddy build API.
+ */
+
+#ifndef TEDDY_COMPILE_H
+#define TEDDY_COMPILE_H
+
+#include "ue2common.h"
#include "hwlm/hwlm_build.h"
#include "util/bytecode_ptr.h"
-
-#include <vector>
-
-struct FDR;
-
-namespace ue2 {
-
+
+#include <vector>
+
+struct FDR;
+
+namespace ue2 {
+
class TeddyEngineDescription;
struct Grey;
-struct hwlmLiteral;
+struct hwlmLiteral;
struct target_t;
-
+
bytecode_ptr<FDR> teddyBuildTable(const HWLMProto &proto, const Grey &grey);
-
+
std::unique_ptr<HWLMProto> teddyBuildProtoHinted(
u8 engType, const std::vector<hwlmLiteral> &lits,
bool make_small, u32 hint, const target_t &target);
-} // namespace ue2
-
-#endif // TEDDY_COMPILE_H
+} // namespace ue2
+
+#endif // TEDDY_COMPILE_H
diff --git a/contrib/libs/hyperscan/src/fdr/teddy_engine_description.cpp b/contrib/libs/hyperscan/src/fdr/teddy_engine_description.cpp
index 8803366fba..88ae0f5382 100644
--- a/contrib/libs/hyperscan/src/fdr/teddy_engine_description.cpp
+++ b/contrib/libs/hyperscan/src/fdr/teddy_engine_description.cpp
@@ -1,56 +1,56 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "fdr_internal.h"
-#include "fdr_compile_internal.h"
-#include "fdr_confirm.h"
-#include "ue2common.h"
-#include "hs_internal.h"
-#include "fdr_engine_description.h"
-#include "teddy_internal.h"
-#include "teddy_engine_description.h"
-#include "util/make_unique.h"
-
-#include <cmath>
-
-using namespace std;
-
-namespace ue2 {
-
-TeddyEngineDescription::TeddyEngineDescription(const TeddyEngineDef &def)
- : EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fdr_internal.h"
+#include "fdr_compile_internal.h"
+#include "fdr_confirm.h"
+#include "ue2common.h"
+#include "hs_internal.h"
+#include "fdr_engine_description.h"
+#include "teddy_internal.h"
+#include "teddy_engine_description.h"
+#include "util/make_unique.h"
+
+#include <cmath>
+
+using namespace std;
+
+namespace ue2 {
+
+TeddyEngineDescription::TeddyEngineDescription(const TeddyEngineDef &def)
+ : EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
def.numBuckets),
- numMasks(def.numMasks), packed(def.packed) {}
-
-u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const {
- return numMasks;
-}
-
+ numMasks(def.numMasks), packed(def.packed) {}
+
+u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const {
+ return numMasks;
+}
+
void getTeddyDescriptions(vector<TeddyEngineDescription> *out) {
static const TeddyEngineDef defns[] = {
{ 3, 0 | HS_CPU_FEATURES_AVX2, 1, 16, false },
@@ -73,144 +73,144 @@ void getTeddyDescriptions(vector<TeddyEngineDescription> *out) {
out->clear();
for (const auto &def : defns) {
out->emplace_back(def);
- }
-}
-
-static
-size_t maxFloodTailLen(const vector<hwlmLiteral> &vl) {
- size_t max_flood_tail = 0;
- for (const auto &lit : vl) {
- const string &s = lit.s;
- assert(!s.empty());
- size_t j;
- for (j = 1; j < s.length(); j++) {
- if (s[s.length() - j - 1] != s[s.length() - 1]) {
- break;
- }
- }
- max_flood_tail = max(max_flood_tail, j);
- }
- return max_flood_tail;
-}
-
-/**
- * \brief True if this Teddy engine is qualified to handle this set of literals
- * on this target.
- */
-static
-bool isAllowed(const vector<hwlmLiteral> &vl, const TeddyEngineDescription &eng,
- const size_t max_lit_len, const target_t &target) {
- if (!eng.isValidOnTarget(target)) {
- DEBUG_PRINTF("%u disallowed: not valid on target\n", eng.getID());
- return false;
- }
- if (eng.getNumBuckets() < vl.size() && !eng.packed) {
- DEBUG_PRINTF("%u disallowed: num buckets < num lits and not packed\n",
- eng.getID());
- return false;
- }
- if (eng.getNumBuckets() * TEDDY_BUCKET_LOAD < vl.size()) {
- DEBUG_PRINTF("%u disallowed: too many lits for num buckets\n",
- eng.getID());
- return false;
- }
- if (eng.numMasks > max_lit_len) {
- DEBUG_PRINTF("%u disallowed: more masks than max lit len (%zu)\n",
- eng.getID(), max_lit_len);
- return false;
- }
-
- if (vl.size() > 40) {
- u32 n_small_lits = 0;
- for (const auto &lit : vl) {
- if (lit.s.length() < eng.numMasks) {
- n_small_lits++;
- }
- }
- if (n_small_lits * 5 > vl.size()) {
- DEBUG_PRINTF("too many short literals (%u)\n", n_small_lits);
- return false;
- }
- }
-
- return true;
-}
-
-unique_ptr<TeddyEngineDescription>
-chooseTeddyEngine(const target_t &target, const vector<hwlmLiteral> &vl) {
- vector<TeddyEngineDescription> descs;
- getTeddyDescriptions(&descs);
- const TeddyEngineDescription *best = nullptr;
-
- const size_t max_lit_len = maxLen(vl);
- const size_t max_flood_tail = maxFloodTailLen(vl);
- DEBUG_PRINTF("%zu lits, max_lit_len=%zu, max_flood_tail=%zu\n", vl.size(),
- max_lit_len, max_flood_tail);
-
- u32 best_score = 0;
- for (size_t engineID = 0; engineID < descs.size(); engineID++) {
- const TeddyEngineDescription &eng = descs[engineID];
- if (!isAllowed(vl, eng, max_lit_len, target)) {
- continue;
- }
-
- u32 score = 0;
-
- // We prefer unpacked Teddy models.
- if (!eng.packed) {
- score += 100;
- }
-
- // If we're heavily loaded, we prefer to have more masks.
- if (vl.size() > 4 * eng.getNumBuckets()) {
- score += eng.numMasks * 4;
- } else {
- // Lightly loaded cases are great.
- score += 100;
- }
-
- // We want enough masks to avoid becoming flood-prone.
- if (eng.numMasks > max_flood_tail) {
- score += 50;
- }
-
- // We prefer having 3 masks. 3 is just right.
- score += 6 / (abs(3 - (int)eng.numMasks) + 1);
-
- // We prefer cheaper, smaller Teddy models.
- score += 16 / eng.getNumBuckets();
-
- DEBUG_PRINTF("teddy %u: masks=%u, buckets=%u, packed=%u "
- "-> score=%u\n",
- eng.getID(), eng.numMasks, eng.getNumBuckets(),
- eng.packed ? 1U : 0U, score);
-
- if (!best || score > best_score) {
- best = &eng;
- best_score = score;
- }
- }
-
- if (!best) {
- DEBUG_PRINTF("failed to find engine\n");
- return nullptr;
- }
-
- DEBUG_PRINTF("using engine %u\n", best->getID());
- return ue2::make_unique<TeddyEngineDescription>(*best);
-}
-
-unique_ptr<TeddyEngineDescription> getTeddyDescription(u32 engineID) {
- vector<TeddyEngineDescription> descs;
- getTeddyDescriptions(&descs);
-
- for (const auto &desc : descs) {
- if (desc.getID() == engineID) {
- return ue2::make_unique<TeddyEngineDescription>(desc);
- }
- }
-
- return nullptr;
-}
-
-} // namespace ue2
+ }
+}
+
+static
+size_t maxFloodTailLen(const vector<hwlmLiteral> &vl) {
+ size_t max_flood_tail = 0;
+ for (const auto &lit : vl) {
+ const string &s = lit.s;
+ assert(!s.empty());
+ size_t j;
+ for (j = 1; j < s.length(); j++) {
+ if (s[s.length() - j - 1] != s[s.length() - 1]) {
+ break;
+ }
+ }
+ max_flood_tail = max(max_flood_tail, j);
+ }
+ return max_flood_tail;
+}
+
+/**
+ * \brief True if this Teddy engine is qualified to handle this set of literals
+ * on this target.
+ */
+static
+bool isAllowed(const vector<hwlmLiteral> &vl, const TeddyEngineDescription &eng,
+ const size_t max_lit_len, const target_t &target) {
+ if (!eng.isValidOnTarget(target)) {
+ DEBUG_PRINTF("%u disallowed: not valid on target\n", eng.getID());
+ return false;
+ }
+ if (eng.getNumBuckets() < vl.size() && !eng.packed) {
+ DEBUG_PRINTF("%u disallowed: num buckets < num lits and not packed\n",
+ eng.getID());
+ return false;
+ }
+ if (eng.getNumBuckets() * TEDDY_BUCKET_LOAD < vl.size()) {
+ DEBUG_PRINTF("%u disallowed: too many lits for num buckets\n",
+ eng.getID());
+ return false;
+ }
+ if (eng.numMasks > max_lit_len) {
+ DEBUG_PRINTF("%u disallowed: more masks than max lit len (%zu)\n",
+ eng.getID(), max_lit_len);
+ return false;
+ }
+
+ if (vl.size() > 40) {
+ u32 n_small_lits = 0;
+ for (const auto &lit : vl) {
+ if (lit.s.length() < eng.numMasks) {
+ n_small_lits++;
+ }
+ }
+ if (n_small_lits * 5 > vl.size()) {
+ DEBUG_PRINTF("too many short literals (%u)\n", n_small_lits);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+unique_ptr<TeddyEngineDescription>
+chooseTeddyEngine(const target_t &target, const vector<hwlmLiteral> &vl) {
+ vector<TeddyEngineDescription> descs;
+ getTeddyDescriptions(&descs);
+ const TeddyEngineDescription *best = nullptr;
+
+ const size_t max_lit_len = maxLen(vl);
+ const size_t max_flood_tail = maxFloodTailLen(vl);
+ DEBUG_PRINTF("%zu lits, max_lit_len=%zu, max_flood_tail=%zu\n", vl.size(),
+ max_lit_len, max_flood_tail);
+
+ u32 best_score = 0;
+ for (size_t engineID = 0; engineID < descs.size(); engineID++) {
+ const TeddyEngineDescription &eng = descs[engineID];
+ if (!isAllowed(vl, eng, max_lit_len, target)) {
+ continue;
+ }
+
+ u32 score = 0;
+
+ // We prefer unpacked Teddy models.
+ if (!eng.packed) {
+ score += 100;
+ }
+
+ // If we're heavily loaded, we prefer to have more masks.
+ if (vl.size() > 4 * eng.getNumBuckets()) {
+ score += eng.numMasks * 4;
+ } else {
+ // Lightly loaded cases are great.
+ score += 100;
+ }
+
+ // We want enough masks to avoid becoming flood-prone.
+ if (eng.numMasks > max_flood_tail) {
+ score += 50;
+ }
+
+ // We prefer having 3 masks. 3 is just right.
+ score += 6 / (abs(3 - (int)eng.numMasks) + 1);
+
+ // We prefer cheaper, smaller Teddy models.
+ score += 16 / eng.getNumBuckets();
+
+ DEBUG_PRINTF("teddy %u: masks=%u, buckets=%u, packed=%u "
+ "-> score=%u\n",
+ eng.getID(), eng.numMasks, eng.getNumBuckets(),
+ eng.packed ? 1U : 0U, score);
+
+ if (!best || score > best_score) {
+ best = &eng;
+ best_score = score;
+ }
+ }
+
+ if (!best) {
+ DEBUG_PRINTF("failed to find engine\n");
+ return nullptr;
+ }
+
+ DEBUG_PRINTF("using engine %u\n", best->getID());
+ return ue2::make_unique<TeddyEngineDescription>(*best);
+}
+
+unique_ptr<TeddyEngineDescription> getTeddyDescription(u32 engineID) {
+ vector<TeddyEngineDescription> descs;
+ getTeddyDescriptions(&descs);
+
+ for (const auto &desc : descs) {
+ if (desc.getID() == engineID) {
+ return ue2::make_unique<TeddyEngineDescription>(desc);
+ }
+ }
+
+ return nullptr;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/fdr/teddy_engine_description.h b/contrib/libs/hyperscan/src/fdr/teddy_engine_description.h
index b47e3c09ee..9593161384 100644
--- a/contrib/libs/hyperscan/src/fdr/teddy_engine_description.h
+++ b/contrib/libs/hyperscan/src/fdr/teddy_engine_description.h
@@ -1,67 +1,67 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TEDDY_ENGINE_DESCRIPTION_H
-#define TEDDY_ENGINE_DESCRIPTION_H
-
-#include "engine_description.h"
-#include "fdr_compile_internal.h"
-
-#include <memory>
-#include <vector>
-
-namespace ue2 {
-
-#define TEDDY_BUCKET_LOAD 6
-
-struct TeddyEngineDef {
- u32 id;
- u64a cpu_features;
- u32 numMasks;
- u32 numBuckets;
- bool packed;
-};
-
-class TeddyEngineDescription : public EngineDescription {
-public:
- u32 numMasks;
- bool packed;
-
- explicit TeddyEngineDescription(const TeddyEngineDef &def);
-
- u32 getDefaultFloodSuffixLength() const override;
-};
-
-std::unique_ptr<TeddyEngineDescription>
-chooseTeddyEngine(const target_t &target, const std::vector<hwlmLiteral> &vl);
-std::unique_ptr<TeddyEngineDescription> getTeddyDescription(u32 engineID);
-void getTeddyDescriptions(std::vector<TeddyEngineDescription> *out);
-
-} // namespace ue2
-
-#endif
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TEDDY_ENGINE_DESCRIPTION_H
+#define TEDDY_ENGINE_DESCRIPTION_H
+
+#include "engine_description.h"
+#include "fdr_compile_internal.h"
+
+#include <memory>
+#include <vector>
+
+namespace ue2 {
+
+#define TEDDY_BUCKET_LOAD 6
+
+struct TeddyEngineDef {
+ u32 id;
+ u64a cpu_features;
+ u32 numMasks;
+ u32 numBuckets;
+ bool packed;
+};
+
+class TeddyEngineDescription : public EngineDescription {
+public:
+ u32 numMasks;
+ bool packed;
+
+ explicit TeddyEngineDescription(const TeddyEngineDef &def);
+
+ u32 getDefaultFloodSuffixLength() const override;
+};
+
+std::unique_ptr<TeddyEngineDescription>
+chooseTeddyEngine(const target_t &target, const std::vector<hwlmLiteral> &vl);
+std::unique_ptr<TeddyEngineDescription> getTeddyDescription(u32 engineID);
+void getTeddyDescriptions(std::vector<TeddyEngineDescription> *out);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/fdr/teddy_internal.h b/contrib/libs/hyperscan/src/fdr/teddy_internal.h
index aab1b4d071..1e9e603fa7 100644
--- a/contrib/libs/hyperscan/src/fdr/teddy_internal.h
+++ b/contrib/libs/hyperscan/src/fdr/teddy_internal.h
@@ -1,31 +1,31 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/* Teddy bytecode layout:
* * |-----|
* * | | struct Teddy
@@ -48,19 +48,19 @@
* * |-----|
*/
-#ifndef TEDDY_INTERNAL_H
-#define TEDDY_INTERNAL_H
-
-#include "ue2common.h"
-
-// first part is compatible with an FDR
-struct Teddy {
- u32 engineID;
- u32 size;
- u32 maxStringLen;
+#ifndef TEDDY_INTERNAL_H
+#define TEDDY_INTERNAL_H
+
+#include "ue2common.h"
+
+// first part is compatible with an FDR
+struct Teddy {
+ u32 engineID;
+ u32 size;
+ u32 maxStringLen;
u32 numStrings;
u32 confOffset;
- u32 floodOffset;
-};
-
-#endif
+ u32 floodOffset;
+};
+
+#endif
diff --git a/contrib/libs/hyperscan/src/grey.cpp b/contrib/libs/hyperscan/src/grey.cpp
index a219c08ba2..86a93d25aa 100644
--- a/contrib/libs/hyperscan/src/grey.cpp
+++ b/contrib/libs/hyperscan/src/grey.cpp
@@ -1,70 +1,70 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "grey.h"
-#include "ue2common.h"
-
-#include <algorithm>
-#include <cstdlib> // exit
-#include <string>
-#include <vector>
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "grey.h"
+#include "ue2common.h"
+
+#include <algorithm>
+#include <cstdlib> // exit
+#include <string>
+#include <vector>
+
#define DEFAULT_MAX_HISTORY 110
-
-using namespace std;
-
-namespace ue2 {
-
-Grey::Grey(void) :
- optimiseComponentTree(true),
+
+using namespace std;
+
+namespace ue2 {
+
+Grey::Grey(void) :
+ optimiseComponentTree(true),
calcComponents(true),
- performGraphSimplification(true),
- prefilterReductions(true),
- removeEdgeRedundancy(true),
- allowGough(true),
- allowHaigLit(true),
- allowLitHaig(true),
- allowLbr(true),
- allowMcClellan(true),
+ performGraphSimplification(true),
+ prefilterReductions(true),
+ removeEdgeRedundancy(true),
+ allowGough(true),
+ allowHaigLit(true),
+ allowLitHaig(true),
+ allowLbr(true),
+ allowMcClellan(true),
allowSheng(true),
allowMcSheng(true),
- allowPuff(true),
+ allowPuff(true),
allowLiteral(true),
allowViolet(true),
- allowExtendedNFA(true), /* bounded repeats of course */
- allowLimExNFA(true),
- allowAnchoredAcyclic(true),
- allowSmallLiteralSet(true),
- allowCastle(true),
- allowDecoratedLiteral(true),
+ allowExtendedNFA(true), /* bounded repeats of course */
+ allowLimExNFA(true),
+ allowAnchoredAcyclic(true),
+ allowSmallLiteralSet(true),
+ allowCastle(true),
+ allowDecoratedLiteral(true),
allowApproximateMatching(true),
- allowNoodle(true),
- fdrAllowTeddy(true),
+ allowNoodle(true),
+ fdrAllowTeddy(true),
fdrAllowFlood(true),
violetAvoidSuffixes(true),
violetAvoidWeakInfixes(true),
@@ -73,131 +73,131 @@ Grey::Grey(void) :
violetLiteralChains(true),
violetDoubleCutLiteralLen(3),
violetEarlyCleanLiteralLen(6),
- puffImproveHead(true),
- castleExclusive(true),
- mergeSEP(true), /* short exhaustible passthroughs */
- mergeRose(true), // roses inside rose
- mergeSuffixes(true), // suffix nfas inside rose
- mergeOutfixes(true),
- onlyOneOutfix(false),
- allowShermanStates(true),
- allowMcClellan8(true),
+ puffImproveHead(true),
+ castleExclusive(true),
+ mergeSEP(true), /* short exhaustible passthroughs */
+ mergeRose(true), // roses inside rose
+ mergeSuffixes(true), // suffix nfas inside rose
+ mergeOutfixes(true),
+ onlyOneOutfix(false),
+ allowShermanStates(true),
+ allowMcClellan8(true),
allowWideStates(true), // enable wide state for McClellan8
- highlanderPruneDFA(true),
- minimizeDFA(true),
- accelerateDFA(true),
- accelerateNFA(true),
- reverseAccelerate(true),
- squashNFA(true),
- compressNFAState(true),
- numberNFAStatesWrong(false), /* debugging only */
- highlanderSquash(true),
- allowZombies(true),
- floodAsPuffette(false),
- nfaForceSize(0),
- maxHistoryAvailable(DEFAULT_MAX_HISTORY),
- minHistoryAvailable(0), /* debugging only */
- maxAnchoredRegion(63), /* for rose's atable to run over */
- minRoseLiteralLength(3),
- minRoseNetflowLiteralLength(2),
- maxRoseNetflowEdges(50000), /* otherwise no netflow pass. */
+ highlanderPruneDFA(true),
+ minimizeDFA(true),
+ accelerateDFA(true),
+ accelerateNFA(true),
+ reverseAccelerate(true),
+ squashNFA(true),
+ compressNFAState(true),
+ numberNFAStatesWrong(false), /* debugging only */
+ highlanderSquash(true),
+ allowZombies(true),
+ floodAsPuffette(false),
+ nfaForceSize(0),
+ maxHistoryAvailable(DEFAULT_MAX_HISTORY),
+ minHistoryAvailable(0), /* debugging only */
+ maxAnchoredRegion(63), /* for rose's atable to run over */
+ minRoseLiteralLength(3),
+ minRoseNetflowLiteralLength(2),
+ maxRoseNetflowEdges(50000), /* otherwise no netflow pass. */
maxEditDistance(16),
- minExtBoundedRepeatSize(32),
- goughCopyPropagate(true),
- goughRegisterAllocate(true),
- shortcutLiterals(true),
- roseGraphReduction(true),
- roseRoleAliasing(true),
- roseMasks(true),
- roseConvertFloodProneSuffixes(true),
- roseMergeRosesDuringAliasing(true),
- roseMultiTopRoses(true),
- roseHamsterMasks(true),
- roseLookaroundMasks(true),
- roseMcClellanPrefix(1),
- roseMcClellanSuffix(1),
- roseMcClellanOutfix(2),
- roseTransformDelay(true),
- earlyMcClellanPrefix(true),
- earlyMcClellanInfix(true),
- earlyMcClellanSuffix(true),
- allowCountingMiracles(true),
- allowSomChain(true),
- somMaxRevNfaLength(126),
- hamsterAccelForward(true),
- hamsterAccelReverse(false),
- miracleHistoryBonus(16),
- equivalenceEnable(true),
-
- allowSmallWrite(true), // McClellan dfas for small patterns
+ minExtBoundedRepeatSize(32),
+ goughCopyPropagate(true),
+ goughRegisterAllocate(true),
+ shortcutLiterals(true),
+ roseGraphReduction(true),
+ roseRoleAliasing(true),
+ roseMasks(true),
+ roseConvertFloodProneSuffixes(true),
+ roseMergeRosesDuringAliasing(true),
+ roseMultiTopRoses(true),
+ roseHamsterMasks(true),
+ roseLookaroundMasks(true),
+ roseMcClellanPrefix(1),
+ roseMcClellanSuffix(1),
+ roseMcClellanOutfix(2),
+ roseTransformDelay(true),
+ earlyMcClellanPrefix(true),
+ earlyMcClellanInfix(true),
+ earlyMcClellanSuffix(true),
+ allowCountingMiracles(true),
+ allowSomChain(true),
+ somMaxRevNfaLength(126),
+ hamsterAccelForward(true),
+ hamsterAccelReverse(false),
+ miracleHistoryBonus(16),
+ equivalenceEnable(true),
+
+ allowSmallWrite(true), // McClellan dfas for small patterns
allowSmallWriteSheng(false), // allow use of Sheng for SMWR
-
- smallWriteLargestBuffer(70), // largest buffer that can be
- // considered a small write
- // all blocks larger than this
- // are given to rose &co
- smallWriteLargestBufferBad(35),
- limitSmallWriteOutfixSize(1048576), // 1 MB
+
+ smallWriteLargestBuffer(70), // largest buffer that can be
+ // considered a small write
+ // all blocks larger than this
+ // are given to rose &co
+ smallWriteLargestBufferBad(35),
+ limitSmallWriteOutfixSize(1048576), // 1 MB
smallWriteMaxPatterns(10000),
smallWriteMaxLiterals(10000),
smallWriteMergeBatchSize(20),
allowTamarama(true), // Tamarama engine
tamaChunkSize(100),
- dumpFlags(0),
- limitPatternCount(8000000), // 8M patterns
- limitPatternLength(16000), // 16K bytes
- limitGraphVertices(500000), // 500K vertices
- limitGraphEdges(1000000), // 1M edges
- limitReportCount(4*8000000),
- limitLiteralCount(8000000), // 8M literals
- limitLiteralLength(16000),
- limitLiteralMatcherChars(1073741824), // 1 GB
- limitLiteralMatcherSize(1073741824), // 1 GB
- limitRoseRoleCount(4*8000000),
- limitRoseEngineCount(8000000), // 8M engines
- limitRoseAnchoredSize(1073741824), // 1 GB
- limitEngineSize(1073741824), // 1 GB
- limitDFASize(1073741824), // 1 GB
- limitNFASize(1048576), // 1 MB
+ dumpFlags(0),
+ limitPatternCount(8000000), // 8M patterns
+ limitPatternLength(16000), // 16K bytes
+ limitGraphVertices(500000), // 500K vertices
+ limitGraphEdges(1000000), // 1M edges
+ limitReportCount(4*8000000),
+ limitLiteralCount(8000000), // 8M literals
+ limitLiteralLength(16000),
+ limitLiteralMatcherChars(1073741824), // 1 GB
+ limitLiteralMatcherSize(1073741824), // 1 GB
+ limitRoseRoleCount(4*8000000),
+ limitRoseEngineCount(8000000), // 8M engines
+ limitRoseAnchoredSize(1073741824), // 1 GB
+ limitEngineSize(1073741824), // 1 GB
+ limitDFASize(1073741824), // 1 GB
+ limitNFASize(1048576), // 1 MB
limitLBRSize(1048576), // 1 MB
limitApproxMatchingVertices(5000)
-{
- assert(maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
-}
-
-} // namespace ue2
-
-#ifndef RELEASE_BUILD
-
-#include <boost/lexical_cast.hpp>
-using boost::lexical_cast;
-
-namespace ue2 {
-
-void applyGreyOverrides(Grey *g, const string &s) {
- string::const_iterator p = s.begin();
- string::const_iterator pe = s.end();
- string help = "help:0";
- bool invalid_key_seen = false;
- Grey defaultg;
-
- if (s == "help" || s == "help:") {
- printf("Valid grey overrides:\n");
- p = help.begin();
- pe = help.end();
- }
-
- while (p != pe) {
- string::const_iterator ke = find(p, pe, ':');
-
- if (ke == pe) {
- break;
- }
-
- string key(p, ke);
-
- string::const_iterator ve = find(ke, pe, ';');
-
+{
+ assert(maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
+}
+
+} // namespace ue2
+
+#ifndef RELEASE_BUILD
+
+#include <boost/lexical_cast.hpp>
+using boost::lexical_cast;
+
+namespace ue2 {
+
+void applyGreyOverrides(Grey *g, const string &s) {
+ string::const_iterator p = s.begin();
+ string::const_iterator pe = s.end();
+ string help = "help:0";
+ bool invalid_key_seen = false;
+ Grey defaultg;
+
+ if (s == "help" || s == "help:") {
+ printf("Valid grey overrides:\n");
+ p = help.begin();
+ pe = help.end();
+ }
+
+ while (p != pe) {
+ string::const_iterator ke = find(p, pe, ':');
+
+ if (ke == pe) {
+ break;
+ }
+
+ string key(p, ke);
+
+ string::const_iterator ve = find(ke, pe, ';');
+
unsigned int value = 0;
try {
value = lexical_cast<unsigned int>(string(ke + 1, ve));
@@ -207,42 +207,42 @@ void applyGreyOverrides(Grey *g, const string &s) {
invalid_key_seen = true;
break;
}
- bool done = false;
-
- /* surely there exists a nice template to go with this macro to make
- * all the boring code disappear */
-#define G_UPDATE(k) do { \
- if (key == ""#k) { g->k = value; done = 1;} \
- if (key == "help") { \
- printf("\t%-30s\tdefault: %s\n", #k, \
- lexical_cast<string>(defaultg.k).c_str()); \
- } \
- } while (0)
-
- G_UPDATE(optimiseComponentTree);
+ bool done = false;
+
+ /* surely there exists a nice template to go with this macro to make
+ * all the boring code disappear */
+#define G_UPDATE(k) do { \
+ if (key == ""#k) { g->k = value; done = 1;} \
+ if (key == "help") { \
+ printf("\t%-30s\tdefault: %s\n", #k, \
+ lexical_cast<string>(defaultg.k).c_str()); \
+ } \
+ } while (0)
+
+ G_UPDATE(optimiseComponentTree);
G_UPDATE(calcComponents);
- G_UPDATE(performGraphSimplification);
- G_UPDATE(prefilterReductions);
- G_UPDATE(removeEdgeRedundancy);
- G_UPDATE(allowGough);
- G_UPDATE(allowHaigLit);
- G_UPDATE(allowLitHaig);
- G_UPDATE(allowLbr);
- G_UPDATE(allowMcClellan);
+ G_UPDATE(performGraphSimplification);
+ G_UPDATE(prefilterReductions);
+ G_UPDATE(removeEdgeRedundancy);
+ G_UPDATE(allowGough);
+ G_UPDATE(allowHaigLit);
+ G_UPDATE(allowLitHaig);
+ G_UPDATE(allowLbr);
+ G_UPDATE(allowMcClellan);
G_UPDATE(allowSheng);
G_UPDATE(allowMcSheng);
- G_UPDATE(allowPuff);
+ G_UPDATE(allowPuff);
G_UPDATE(allowLiteral);
G_UPDATE(allowViolet);
- G_UPDATE(allowExtendedNFA);
- G_UPDATE(allowLimExNFA);
- G_UPDATE(allowAnchoredAcyclic);
- G_UPDATE(allowSmallLiteralSet);
- G_UPDATE(allowCastle);
- G_UPDATE(allowDecoratedLiteral);
- G_UPDATE(allowNoodle);
+ G_UPDATE(allowExtendedNFA);
+ G_UPDATE(allowLimExNFA);
+ G_UPDATE(allowAnchoredAcyclic);
+ G_UPDATE(allowSmallLiteralSet);
+ G_UPDATE(allowCastle);
+ G_UPDATE(allowDecoratedLiteral);
+ G_UPDATE(allowNoodle);
G_UPDATE(allowApproximateMatching);
- G_UPDATE(fdrAllowTeddy);
+ G_UPDATE(fdrAllowTeddy);
G_UPDATE(fdrAllowFlood);
G_UPDATE(violetAvoidSuffixes);
G_UPDATE(violetAvoidWeakInfixes);
@@ -251,169 +251,169 @@ void applyGreyOverrides(Grey *g, const string &s) {
G_UPDATE(violetLiteralChains);
G_UPDATE(violetDoubleCutLiteralLen);
G_UPDATE(violetEarlyCleanLiteralLen);
- G_UPDATE(puffImproveHead);
- G_UPDATE(castleExclusive);
- G_UPDATE(mergeSEP);
- G_UPDATE(mergeRose);
- G_UPDATE(mergeSuffixes);
- G_UPDATE(mergeOutfixes);
- G_UPDATE(onlyOneOutfix);
- G_UPDATE(allowShermanStates);
- G_UPDATE(allowMcClellan8);
+ G_UPDATE(puffImproveHead);
+ G_UPDATE(castleExclusive);
+ G_UPDATE(mergeSEP);
+ G_UPDATE(mergeRose);
+ G_UPDATE(mergeSuffixes);
+ G_UPDATE(mergeOutfixes);
+ G_UPDATE(onlyOneOutfix);
+ G_UPDATE(allowShermanStates);
+ G_UPDATE(allowMcClellan8);
G_UPDATE(allowWideStates);
- G_UPDATE(highlanderPruneDFA);
- G_UPDATE(minimizeDFA);
- G_UPDATE(accelerateDFA);
- G_UPDATE(accelerateNFA);
- G_UPDATE(reverseAccelerate);
- G_UPDATE(squashNFA);
- G_UPDATE(compressNFAState);
- G_UPDATE(numberNFAStatesWrong);
- G_UPDATE(allowZombies);
- G_UPDATE(floodAsPuffette);
- G_UPDATE(nfaForceSize);
- G_UPDATE(highlanderSquash);
- G_UPDATE(maxHistoryAvailable);
- G_UPDATE(minHistoryAvailable);
- G_UPDATE(maxAnchoredRegion);
- G_UPDATE(minRoseLiteralLength);
- G_UPDATE(minRoseNetflowLiteralLength);
- G_UPDATE(maxRoseNetflowEdges);
+ G_UPDATE(highlanderPruneDFA);
+ G_UPDATE(minimizeDFA);
+ G_UPDATE(accelerateDFA);
+ G_UPDATE(accelerateNFA);
+ G_UPDATE(reverseAccelerate);
+ G_UPDATE(squashNFA);
+ G_UPDATE(compressNFAState);
+ G_UPDATE(numberNFAStatesWrong);
+ G_UPDATE(allowZombies);
+ G_UPDATE(floodAsPuffette);
+ G_UPDATE(nfaForceSize);
+ G_UPDATE(highlanderSquash);
+ G_UPDATE(maxHistoryAvailable);
+ G_UPDATE(minHistoryAvailable);
+ G_UPDATE(maxAnchoredRegion);
+ G_UPDATE(minRoseLiteralLength);
+ G_UPDATE(minRoseNetflowLiteralLength);
+ G_UPDATE(maxRoseNetflowEdges);
G_UPDATE(maxEditDistance);
- G_UPDATE(minExtBoundedRepeatSize);
- G_UPDATE(goughCopyPropagate);
- G_UPDATE(goughRegisterAllocate);
- G_UPDATE(shortcutLiterals);
- G_UPDATE(roseGraphReduction);
- G_UPDATE(roseRoleAliasing);
- G_UPDATE(roseMasks);
- G_UPDATE(roseConvertFloodProneSuffixes);
- G_UPDATE(roseMergeRosesDuringAliasing);
- G_UPDATE(roseMultiTopRoses);
- G_UPDATE(roseHamsterMasks);
- G_UPDATE(roseLookaroundMasks);
- G_UPDATE(roseMcClellanPrefix);
- G_UPDATE(roseMcClellanSuffix);
- G_UPDATE(roseMcClellanOutfix);
- G_UPDATE(roseTransformDelay);
- G_UPDATE(earlyMcClellanPrefix);
- G_UPDATE(earlyMcClellanInfix);
- G_UPDATE(earlyMcClellanSuffix);
- G_UPDATE(allowSomChain);
- G_UPDATE(allowCountingMiracles);
- G_UPDATE(somMaxRevNfaLength);
- G_UPDATE(hamsterAccelForward);
- G_UPDATE(hamsterAccelReverse);
- G_UPDATE(miracleHistoryBonus);
- G_UPDATE(equivalenceEnable);
- G_UPDATE(allowSmallWrite);
+ G_UPDATE(minExtBoundedRepeatSize);
+ G_UPDATE(goughCopyPropagate);
+ G_UPDATE(goughRegisterAllocate);
+ G_UPDATE(shortcutLiterals);
+ G_UPDATE(roseGraphReduction);
+ G_UPDATE(roseRoleAliasing);
+ G_UPDATE(roseMasks);
+ G_UPDATE(roseConvertFloodProneSuffixes);
+ G_UPDATE(roseMergeRosesDuringAliasing);
+ G_UPDATE(roseMultiTopRoses);
+ G_UPDATE(roseHamsterMasks);
+ G_UPDATE(roseLookaroundMasks);
+ G_UPDATE(roseMcClellanPrefix);
+ G_UPDATE(roseMcClellanSuffix);
+ G_UPDATE(roseMcClellanOutfix);
+ G_UPDATE(roseTransformDelay);
+ G_UPDATE(earlyMcClellanPrefix);
+ G_UPDATE(earlyMcClellanInfix);
+ G_UPDATE(earlyMcClellanSuffix);
+ G_UPDATE(allowSomChain);
+ G_UPDATE(allowCountingMiracles);
+ G_UPDATE(somMaxRevNfaLength);
+ G_UPDATE(hamsterAccelForward);
+ G_UPDATE(hamsterAccelReverse);
+ G_UPDATE(miracleHistoryBonus);
+ G_UPDATE(equivalenceEnable);
+ G_UPDATE(allowSmallWrite);
G_UPDATE(allowSmallWriteSheng);
- G_UPDATE(smallWriteLargestBuffer);
- G_UPDATE(smallWriteLargestBufferBad);
- G_UPDATE(limitSmallWriteOutfixSize);
+ G_UPDATE(smallWriteLargestBuffer);
+ G_UPDATE(smallWriteLargestBufferBad);
+ G_UPDATE(limitSmallWriteOutfixSize);
G_UPDATE(smallWriteMaxPatterns);
G_UPDATE(smallWriteMaxLiterals);
G_UPDATE(smallWriteMergeBatchSize);
G_UPDATE(allowTamarama);
G_UPDATE(tamaChunkSize);
- G_UPDATE(limitPatternCount);
- G_UPDATE(limitPatternLength);
- G_UPDATE(limitGraphVertices);
- G_UPDATE(limitGraphEdges);
- G_UPDATE(limitReportCount);
- G_UPDATE(limitLiteralCount);
- G_UPDATE(limitLiteralLength);
- G_UPDATE(limitLiteralMatcherChars);
- G_UPDATE(limitLiteralMatcherSize);
- G_UPDATE(limitRoseRoleCount);
- G_UPDATE(limitRoseEngineCount);
- G_UPDATE(limitRoseAnchoredSize);
- G_UPDATE(limitEngineSize);
- G_UPDATE(limitDFASize);
- G_UPDATE(limitNFASize);
- G_UPDATE(limitLBRSize);
+ G_UPDATE(limitPatternCount);
+ G_UPDATE(limitPatternLength);
+ G_UPDATE(limitGraphVertices);
+ G_UPDATE(limitGraphEdges);
+ G_UPDATE(limitReportCount);
+ G_UPDATE(limitLiteralCount);
+ G_UPDATE(limitLiteralLength);
+ G_UPDATE(limitLiteralMatcherChars);
+ G_UPDATE(limitLiteralMatcherSize);
+ G_UPDATE(limitRoseRoleCount);
+ G_UPDATE(limitRoseEngineCount);
+ G_UPDATE(limitRoseAnchoredSize);
+ G_UPDATE(limitEngineSize);
+ G_UPDATE(limitDFASize);
+ G_UPDATE(limitNFASize);
+ G_UPDATE(limitLBRSize);
G_UPDATE(limitApproxMatchingVertices);
-
-#undef G_UPDATE
- if (key == "simple_som") {
- g->allowHaigLit = false;
- g->allowLitHaig = false;
- g->allowSomChain = false;
- g->somMaxRevNfaLength = 0;
- done = true;
- }
- if (key == "forceOutfixesNFA") {
- g->allowAnchoredAcyclic = false;
- g->allowCastle = false;
- g->allowDecoratedLiteral = false;
- g->allowGough = false;
- g->allowHaigLit = false;
- g->allowLbr = false;
- g->allowLimExNFA = true;
- g->allowLitHaig = false;
- g->allowMcClellan = false;
- g->allowPuff = false;
+
+#undef G_UPDATE
+ if (key == "simple_som") {
+ g->allowHaigLit = false;
+ g->allowLitHaig = false;
+ g->allowSomChain = false;
+ g->somMaxRevNfaLength = 0;
+ done = true;
+ }
+ if (key == "forceOutfixesNFA") {
+ g->allowAnchoredAcyclic = false;
+ g->allowCastle = false;
+ g->allowDecoratedLiteral = false;
+ g->allowGough = false;
+ g->allowHaigLit = false;
+ g->allowLbr = false;
+ g->allowLimExNFA = true;
+ g->allowLitHaig = false;
+ g->allowMcClellan = false;
+ g->allowPuff = false;
g->allowLiteral = false;
g->allowViolet = false;
- g->allowSmallLiteralSet = false;
- g->roseMasks = false;
- done = true;
- }
- if (key == "forceOutfixesDFA") {
- g->allowAnchoredAcyclic = false;
- g->allowCastle = false;
- g->allowDecoratedLiteral = false;
- g->allowGough = false;
- g->allowHaigLit = false;
- g->allowLbr = false;
- g->allowLimExNFA = false;
- g->allowLitHaig = false;
- g->allowMcClellan = true;
- g->allowPuff = false;
+ g->allowSmallLiteralSet = false;
+ g->roseMasks = false;
+ done = true;
+ }
+ if (key == "forceOutfixesDFA") {
+ g->allowAnchoredAcyclic = false;
+ g->allowCastle = false;
+ g->allowDecoratedLiteral = false;
+ g->allowGough = false;
+ g->allowHaigLit = false;
+ g->allowLbr = false;
+ g->allowLimExNFA = false;
+ g->allowLitHaig = false;
+ g->allowMcClellan = true;
+ g->allowPuff = false;
g->allowLiteral = false;
g->allowViolet = false;
- g->allowSmallLiteralSet = false;
- g->roseMasks = false;
- done = true;
- }
- if (key == "forceOutfixes") {
- g->allowAnchoredAcyclic = false;
- g->allowCastle = false;
- g->allowDecoratedLiteral = false;
- g->allowGough = true;
- g->allowHaigLit = false;
- g->allowLbr = false;
- g->allowLimExNFA = true;
- g->allowLitHaig = false;
- g->allowMcClellan = true;
- g->allowPuff = false;
+ g->allowSmallLiteralSet = false;
+ g->roseMasks = false;
+ done = true;
+ }
+ if (key == "forceOutfixes") {
+ g->allowAnchoredAcyclic = false;
+ g->allowCastle = false;
+ g->allowDecoratedLiteral = false;
+ g->allowGough = true;
+ g->allowHaigLit = false;
+ g->allowLbr = false;
+ g->allowLimExNFA = true;
+ g->allowLitHaig = false;
+ g->allowMcClellan = true;
+ g->allowPuff = false;
g->allowLiteral = false;
g->allowViolet = false;
- g->allowSmallLiteralSet = false;
- g->roseMasks = false;
- done = true;
- }
-
- if (!done && key != "help") {
- printf("Invalid grey override key %s:%u\n", key.c_str(), value);
- invalid_key_seen = true;
- }
-
- p = ve;
-
- if (p != pe) {
- ++p;
- }
- }
-
- if (invalid_key_seen) {
- applyGreyOverrides(g, "help");
- exit(1);
- }
-
- assert(g->maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
-}
-
-} // namespace ue2
-
-#endif
+ g->allowSmallLiteralSet = false;
+ g->roseMasks = false;
+ done = true;
+ }
+
+ if (!done && key != "help") {
+ printf("Invalid grey override key %s:%u\n", key.c_str(), value);
+ invalid_key_seen = true;
+ }
+
+ p = ve;
+
+ if (p != pe) {
+ ++p;
+ }
+ }
+
+ if (invalid_key_seen) {
+ applyGreyOverrides(g, "help");
+ exit(1);
+ }
+
+ assert(g->maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
+}
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/grey.h b/contrib/libs/hyperscan/src/grey.h
index 352d9892e4..ed2f845a4b 100644
--- a/contrib/libs/hyperscan/src/grey.h
+++ b/contrib/libs/hyperscan/src/grey.h
@@ -1,73 +1,73 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef GREY_H
-#define GREY_H
-
-#include <vector>
-#include <string>
-
-#include "ue2common.h"
-
-namespace ue2 {
-
-struct Grey {
- Grey(void);
-
- bool optimiseComponentTree;
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef GREY_H
+#define GREY_H
+
+#include <vector>
+#include <string>
+
+#include "ue2common.h"
+
+namespace ue2 {
+
+struct Grey {
+ Grey(void);
+
+ bool optimiseComponentTree;
+
bool calcComponents;
- bool performGraphSimplification;
- bool prefilterReductions;
- bool removeEdgeRedundancy;
-
- bool allowGough;
- bool allowHaigLit;
- bool allowLitHaig;
- bool allowLbr;
- bool allowMcClellan;
+ bool performGraphSimplification;
+ bool prefilterReductions;
+ bool removeEdgeRedundancy;
+
+ bool allowGough;
+ bool allowHaigLit;
+ bool allowLitHaig;
+ bool allowLbr;
+ bool allowMcClellan;
bool allowSheng;
bool allowMcSheng;
- bool allowPuff;
+ bool allowPuff;
bool allowLiteral;
bool allowViolet;
- bool allowExtendedNFA;
- bool allowLimExNFA;
- bool allowAnchoredAcyclic;
- bool allowSmallLiteralSet;
- bool allowCastle;
- bool allowDecoratedLiteral;
+ bool allowExtendedNFA;
+ bool allowLimExNFA;
+ bool allowAnchoredAcyclic;
+ bool allowSmallLiteralSet;
+ bool allowCastle;
+ bool allowDecoratedLiteral;
bool allowApproximateMatching;
-
- bool allowNoodle;
- bool fdrAllowTeddy;
+
+ bool allowNoodle;
+ bool fdrAllowTeddy;
bool fdrAllowFlood;
-
+
u32 violetAvoidSuffixes; /* 0=never, 1=sometimes, 2=always */
bool violetAvoidWeakInfixes;
bool violetDoubleCut;
@@ -76,144 +76,144 @@ struct Grey {
u32 violetDoubleCutLiteralLen;
u32 violetEarlyCleanLiteralLen;
- bool puffImproveHead;
- bool castleExclusive; // enable castle mutual exclusion analysis
-
- bool mergeSEP;
- bool mergeRose;
- bool mergeSuffixes;
- bool mergeOutfixes;
- bool onlyOneOutfix; // if > 1 outfix, fail compile
-
- bool allowShermanStates;
- bool allowMcClellan8;
+ bool puffImproveHead;
+ bool castleExclusive; // enable castle mutual exclusion analysis
+
+ bool mergeSEP;
+ bool mergeRose;
+ bool mergeSuffixes;
+ bool mergeOutfixes;
+ bool onlyOneOutfix; // if > 1 outfix, fail compile
+
+ bool allowShermanStates;
+ bool allowMcClellan8;
bool allowWideStates; // enable wide state for McClellan8
- bool highlanderPruneDFA;
- bool minimizeDFA;
-
- bool accelerateDFA;
- bool accelerateNFA;
- bool reverseAccelerate;
-
- bool squashNFA;
- bool compressNFAState;
- bool numberNFAStatesWrong;
- bool highlanderSquash;
- bool allowZombies;
- bool floodAsPuffette;
-
- u32 nfaForceSize;
-
- u32 maxHistoryAvailable;
- u32 minHistoryAvailable;
- u32 maxAnchoredRegion;
- u32 minRoseLiteralLength;
- u32 minRoseNetflowLiteralLength;
- u32 maxRoseNetflowEdges;
+ bool highlanderPruneDFA;
+ bool minimizeDFA;
+
+ bool accelerateDFA;
+ bool accelerateNFA;
+ bool reverseAccelerate;
+
+ bool squashNFA;
+ bool compressNFAState;
+ bool numberNFAStatesWrong;
+ bool highlanderSquash;
+ bool allowZombies;
+ bool floodAsPuffette;
+
+ u32 nfaForceSize;
+
+ u32 maxHistoryAvailable;
+ u32 minHistoryAvailable;
+ u32 maxAnchoredRegion;
+ u32 minRoseLiteralLength;
+ u32 minRoseNetflowLiteralLength;
+ u32 maxRoseNetflowEdges;
u32 maxEditDistance;
-
- u32 minExtBoundedRepeatSize; /* to be considered for ng_repeat */
-
- bool goughCopyPropagate;
- bool goughRegisterAllocate;
-
- bool shortcutLiterals;
-
- bool roseGraphReduction;
- bool roseRoleAliasing;
- bool roseMasks;
- bool roseConvertFloodProneSuffixes;
- bool roseMergeRosesDuringAliasing;
- bool roseMultiTopRoses;
- bool roseHamsterMasks;
- bool roseLookaroundMasks;
- u32 roseMcClellanPrefix; /* 0 = off, 1 = only if large nfa, 2 = always */
- u32 roseMcClellanSuffix; /* 0 = off, 1 = only if very large nfa, 2 =
- * always */
- u32 roseMcClellanOutfix; /* 0 = off, 1 = sometimes, 2 = almost always */
- bool roseTransformDelay;
-
- bool earlyMcClellanPrefix;
- bool earlyMcClellanInfix;
- bool earlyMcClellanSuffix;
-
- bool allowCountingMiracles;
-
- bool allowSomChain;
- u32 somMaxRevNfaLength;
-
- bool hamsterAccelForward;
- bool hamsterAccelReverse; // currently not implemented
-
- u32 miracleHistoryBonus; /* cheap hack to make miracles better, TODO
- * something dignified */
-
- bool equivalenceEnable;
-
- // SmallWrite engine
- bool allowSmallWrite;
+
+ u32 minExtBoundedRepeatSize; /* to be considered for ng_repeat */
+
+ bool goughCopyPropagate;
+ bool goughRegisterAllocate;
+
+ bool shortcutLiterals;
+
+ bool roseGraphReduction;
+ bool roseRoleAliasing;
+ bool roseMasks;
+ bool roseConvertFloodProneSuffixes;
+ bool roseMergeRosesDuringAliasing;
+ bool roseMultiTopRoses;
+ bool roseHamsterMasks;
+ bool roseLookaroundMasks;
+ u32 roseMcClellanPrefix; /* 0 = off, 1 = only if large nfa, 2 = always */
+ u32 roseMcClellanSuffix; /* 0 = off, 1 = only if very large nfa, 2 =
+ * always */
+ u32 roseMcClellanOutfix; /* 0 = off, 1 = sometimes, 2 = almost always */
+ bool roseTransformDelay;
+
+ bool earlyMcClellanPrefix;
+ bool earlyMcClellanInfix;
+ bool earlyMcClellanSuffix;
+
+ bool allowCountingMiracles;
+
+ bool allowSomChain;
+ u32 somMaxRevNfaLength;
+
+ bool hamsterAccelForward;
+ bool hamsterAccelReverse; // currently not implemented
+
+ u32 miracleHistoryBonus; /* cheap hack to make miracles better, TODO
+ * something dignified */
+
+ bool equivalenceEnable;
+
+ // SmallWrite engine
+ bool allowSmallWrite;
bool allowSmallWriteSheng;
- u32 smallWriteLargestBuffer; // largest buffer that can be small write
- u32 smallWriteLargestBufferBad;// largest buffer that can be small write
- u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs
+ u32 smallWriteLargestBuffer; // largest buffer that can be small write
+ u32 smallWriteLargestBufferBad;// largest buffer that can be small write
+ u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs
u32 smallWriteMaxPatterns; // only try small writes if fewer patterns
u32 smallWriteMaxLiterals; // only try small writes if fewer literals
u32 smallWriteMergeBatchSize; // number of DFAs to merge in a batch
-
+
// Tamarama engine
bool allowTamarama;
u32 tamaChunkSize; //!< max chunk size for exclusivity analysis in Tamarama
- enum DumpFlags {
- DUMP_NONE = 0,
- DUMP_BASICS = 1 << 0, // Dump basic textual data
- DUMP_PARSE = 1 << 1, // Dump component tree to .txt
- DUMP_INT_GRAPH = 1 << 2, // Dump non-implementation graphs
- DUMP_IMPL = 1 << 3 // Dump implementation graphs
- };
-
- u32 dumpFlags;
- std::string dumpPath;
-
- /* Resource limits. These are somewhat arbitrary, but are intended to bound
- * the input to many of our internal structures. Exceeding one of these
- * limits will cause an error to be returned to the user.
- *
- * NOTE: Raising these limitations make cause smoke to come out of parts of
- * the runtime. */
-
- u32 limitPatternCount; //!< max number of patterns
- u32 limitPatternLength; //!< max number of characters in a regex
- u32 limitGraphVertices; //!< max number of states in built NFA graph
- u32 limitGraphEdges; //!< max number of edges in build NFA graph
- u32 limitReportCount; //!< max number of ReportIDs allocated internally
-
- // HWLM literal matcher limits.
- u32 limitLiteralCount; //!< max number of literals in an HWLM table
- u32 limitLiteralLength; //!< max number of characters in a literal
- u32 limitLiteralMatcherChars; //!< max characters in an HWLM literal matcher
- u32 limitLiteralMatcherSize; //!< max size of an HWLM matcher (in bytes)
-
- // Rose limits.
- u32 limitRoseRoleCount; //!< max number of Rose roles
- u32 limitRoseEngineCount; //!< max prefix/infix/suffix/outfix engines
- u32 limitRoseAnchoredSize; //!< max total size of anchored DFAs (bytes)
-
- // Engine (DFA/NFA/etc) limits.
- u32 limitEngineSize; //!< max size of an engine (in bytes)
- u32 limitDFASize; //!< max size of a DFA (in bytes)
- u32 limitNFASize; //!< max size of an NFA (in bytes)
- u32 limitLBRSize; //!< max size of an LBR engine (in bytes)
+ enum DumpFlags {
+ DUMP_NONE = 0,
+ DUMP_BASICS = 1 << 0, // Dump basic textual data
+ DUMP_PARSE = 1 << 1, // Dump component tree to .txt
+ DUMP_INT_GRAPH = 1 << 2, // Dump non-implementation graphs
+ DUMP_IMPL = 1 << 3 // Dump implementation graphs
+ };
+
+ u32 dumpFlags;
+ std::string dumpPath;
+
+ /* Resource limits. These are somewhat arbitrary, but are intended to bound
+ * the input to many of our internal structures. Exceeding one of these
+ * limits will cause an error to be returned to the user.
+ *
+ * NOTE: Raising these limitations make cause smoke to come out of parts of
+ * the runtime. */
+
+ u32 limitPatternCount; //!< max number of patterns
+ u32 limitPatternLength; //!< max number of characters in a regex
+ u32 limitGraphVertices; //!< max number of states in built NFA graph
+ u32 limitGraphEdges; //!< max number of edges in build NFA graph
+ u32 limitReportCount; //!< max number of ReportIDs allocated internally
+
+ // HWLM literal matcher limits.
+ u32 limitLiteralCount; //!< max number of literals in an HWLM table
+ u32 limitLiteralLength; //!< max number of characters in a literal
+ u32 limitLiteralMatcherChars; //!< max characters in an HWLM literal matcher
+ u32 limitLiteralMatcherSize; //!< max size of an HWLM matcher (in bytes)
+
+ // Rose limits.
+ u32 limitRoseRoleCount; //!< max number of Rose roles
+ u32 limitRoseEngineCount; //!< max prefix/infix/suffix/outfix engines
+ u32 limitRoseAnchoredSize; //!< max total size of anchored DFAs (bytes)
+
+ // Engine (DFA/NFA/etc) limits.
+ u32 limitEngineSize; //!< max size of an engine (in bytes)
+ u32 limitDFASize; //!< max size of a DFA (in bytes)
+ u32 limitNFASize; //!< max size of an NFA (in bytes)
+ u32 limitLBRSize; //!< max size of an LBR engine (in bytes)
// Approximate matching limits.
u32 limitApproxMatchingVertices; //!< max number of vertices per graph
-};
-
-#ifndef RELEASE_BUILD
-#include <string>
-void applyGreyOverrides(Grey *g, const std::string &overrides);
-#endif
-
-} // namespace ue2
-
-#endif
+};
+
+#ifndef RELEASE_BUILD
+#include <string>
+void applyGreyOverrides(Grey *g, const std::string &overrides);
+#endif
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/hs.cpp b/contrib/libs/hyperscan/src/hs.cpp
index 2d519e6169..eac588891c 100644
--- a/contrib/libs/hyperscan/src/hs.cpp
+++ b/contrib/libs/hyperscan/src/hs.cpp
@@ -1,200 +1,200 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Compiler front-end, including public API calls for compilation.
- */
-#include "allocator.h"
-#include "ue2common.h"
-#include "grey.h"
-#include "hs_compile.h"
-#include "hs_internal.h"
-#include "database.h"
-#include "compiler/compiler.h"
-#include "compiler/error.h"
-#include "nfagraph/ng.h"
-#include "nfagraph/ng_expr_info.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Compiler front-end, including public API calls for compilation.
+ */
+#include "allocator.h"
+#include "ue2common.h"
+#include "grey.h"
+#include "hs_compile.h"
+#include "hs_internal.h"
+#include "database.h"
+#include "compiler/compiler.h"
+#include "compiler/error.h"
+#include "nfagraph/ng.h"
+#include "nfagraph/ng_expr_info.h"
#include "parser/Parser.h"
-#include "parser/parse_error.h"
-#include "parser/prefilter.h"
+#include "parser/parse_error.h"
+#include "parser/prefilter.h"
#include "parser/unsupported.h"
-#include "util/compile_error.h"
-#include "util/cpuid_flags.h"
+#include "util/compile_error.h"
+#include "util/cpuid_flags.h"
#include "util/cpuid_inline.h"
-#include "util/depth.h"
-#include "util/popcount.h"
-#include "util/target_info.h"
-
-#include <cassert>
-#include <cstddef>
-#include <cstring>
-#include <limits.h>
-#include <string>
-#include <vector>
-
-using namespace std;
-using namespace ue2;
-
-/** \brief Cheap check that no unexpected mode flags are on. */
-static
-bool validModeFlags(unsigned int mode) {
- static const unsigned allModeFlags = HS_MODE_BLOCK
- | HS_MODE_STREAM
- | HS_MODE_VECTORED
- | HS_MODE_SOM_HORIZON_LARGE
- | HS_MODE_SOM_HORIZON_MEDIUM
- | HS_MODE_SOM_HORIZON_SMALL;
-
- return !(mode & ~allModeFlags);
-}
-
-/** \brief Validate mode flags. */
-static
-bool checkMode(unsigned int mode, hs_compile_error **comp_error) {
- // First, check that only bits with meaning are on.
- if (!validModeFlags(mode)) {
- *comp_error = generateCompileError("Invalid parameter: "
- "unrecognised mode flags.", -1);
- return false;
- }
-
- // Our mode must be ONE of (block, streaming, vectored).
- unsigned checkmode
- = mode & (HS_MODE_STREAM | HS_MODE_BLOCK | HS_MODE_VECTORED);
- if (popcount32(checkmode) != 1) {
- *comp_error = generateCompileError(
- "Invalid parameter: mode must have one "
- "(and only one) of HS_MODE_BLOCK, HS_MODE_STREAM or "
- "HS_MODE_VECTORED set.",
- -1);
- return false;
- }
-
- // If you specify SOM precision, you must be in streaming mode and you only
- // get to have one.
- unsigned somMode = mode & (HS_MODE_SOM_HORIZON_LARGE |
- HS_MODE_SOM_HORIZON_MEDIUM |
- HS_MODE_SOM_HORIZON_SMALL);
- if (somMode) {
- if (!(mode & HS_MODE_STREAM)) {
- *comp_error = generateCompileError("Invalid parameter: the "
- "HS_MODE_SOM_HORIZON_ mode flags may only be set in "
- "streaming mode.", -1);
- return false;
-
- }
- if ((somMode & (somMode - 1)) != 0) {
- *comp_error = generateCompileError("Invalid parameter: only one "
- "HS_MODE_SOM_HORIZON_ mode flag can be set.", -1);
- return false;
- }
- }
-
- return true;
-}
-
-static
-bool checkPlatform(const hs_platform_info *p, hs_compile_error **comp_error) {
+#include "util/depth.h"
+#include "util/popcount.h"
+#include "util/target_info.h"
+
+#include <cassert>
+#include <cstddef>
+#include <cstring>
+#include <limits.h>
+#include <string>
+#include <vector>
+
+using namespace std;
+using namespace ue2;
+
+/** \brief Cheap check that no unexpected mode flags are on. */
+static
+bool validModeFlags(unsigned int mode) {
+ static const unsigned allModeFlags = HS_MODE_BLOCK
+ | HS_MODE_STREAM
+ | HS_MODE_VECTORED
+ | HS_MODE_SOM_HORIZON_LARGE
+ | HS_MODE_SOM_HORIZON_MEDIUM
+ | HS_MODE_SOM_HORIZON_SMALL;
+
+ return !(mode & ~allModeFlags);
+}
+
+/** \brief Validate mode flags. */
+static
+bool checkMode(unsigned int mode, hs_compile_error **comp_error) {
+ // First, check that only bits with meaning are on.
+ if (!validModeFlags(mode)) {
+ *comp_error = generateCompileError("Invalid parameter: "
+ "unrecognised mode flags.", -1);
+ return false;
+ }
+
+ // Our mode must be ONE of (block, streaming, vectored).
+ unsigned checkmode
+ = mode & (HS_MODE_STREAM | HS_MODE_BLOCK | HS_MODE_VECTORED);
+ if (popcount32(checkmode) != 1) {
+ *comp_error = generateCompileError(
+ "Invalid parameter: mode must have one "
+ "(and only one) of HS_MODE_BLOCK, HS_MODE_STREAM or "
+ "HS_MODE_VECTORED set.",
+ -1);
+ return false;
+ }
+
+ // If you specify SOM precision, you must be in streaming mode and you only
+ // get to have one.
+ unsigned somMode = mode & (HS_MODE_SOM_HORIZON_LARGE |
+ HS_MODE_SOM_HORIZON_MEDIUM |
+ HS_MODE_SOM_HORIZON_SMALL);
+ if (somMode) {
+ if (!(mode & HS_MODE_STREAM)) {
+ *comp_error = generateCompileError("Invalid parameter: the "
+ "HS_MODE_SOM_HORIZON_ mode flags may only be set in "
+ "streaming mode.", -1);
+ return false;
+
+ }
+ if ((somMode & (somMode - 1)) != 0) {
+ *comp_error = generateCompileError("Invalid parameter: only one "
+ "HS_MODE_SOM_HORIZON_ mode flag can be set.", -1);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static
+bool checkPlatform(const hs_platform_info *p, hs_compile_error **comp_error) {
static constexpr u32 HS_TUNE_LAST = HS_TUNE_FAMILY_ICX;
static constexpr u32 HS_CPU_FEATURES_ALL =
HS_CPU_FEATURES_AVX2 | HS_CPU_FEATURES_AVX512 |
HS_CPU_FEATURES_AVX512VBMI;
-
- if (!p) {
- return true;
- }
-
- if (p->cpu_features & ~HS_CPU_FEATURES_ALL) {
- *comp_error = generateCompileError("Invalid cpu features specified in "
- "the platform information.", -1);
- return false;
- }
-
- if (p->tune > HS_TUNE_LAST) {
- *comp_error = generateCompileError("Invalid tuning value specified in "
- "the platform information.", -1);
- return false;
- }
-
- return true;
-}
-
-/** \brief Convert from SOM mode to bytes of precision. */
-static
-unsigned getSomPrecision(unsigned mode) {
- if (mode & HS_MODE_VECTORED) {
- /* always assume full precision for vectoring */
- return 8;
- }
-
- if (mode & HS_MODE_SOM_HORIZON_LARGE) {
- return 8;
- } else if (mode & HS_MODE_SOM_HORIZON_MEDIUM) {
- return 4;
- } else if (mode & HS_MODE_SOM_HORIZON_SMALL) {
- return 2;
- }
- return 0;
-}
-
-namespace ue2 {
-
-hs_error_t
-hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
- const unsigned *ids, const hs_expr_ext *const *ext,
- unsigned elements, unsigned mode,
- const hs_platform_info_t *platform, hs_database_t **db,
- hs_compile_error_t **comp_error, const Grey &g) {
- // Check the args: note that it's OK for flags, ids or ext to be null.
- if (!comp_error) {
- if (db) {
- *db = nullptr;
- }
- // nowhere to write the string, but we can still report an error code
- return HS_COMPILER_ERROR;
- }
- if (!db) {
- *comp_error = generateCompileError("Invalid parameter: db is NULL", -1);
- return HS_COMPILER_ERROR;
- }
- if (!expressions) {
- *db = nullptr;
- *comp_error
- = generateCompileError("Invalid parameter: expressions is NULL",
- -1);
- return HS_COMPILER_ERROR;
- }
- if (elements == 0) {
- *db = nullptr;
- *comp_error = generateCompileError("Invalid parameter: elements is zero", -1);
- return HS_COMPILER_ERROR;
- }
-
+
+ if (!p) {
+ return true;
+ }
+
+ if (p->cpu_features & ~HS_CPU_FEATURES_ALL) {
+ *comp_error = generateCompileError("Invalid cpu features specified in "
+ "the platform information.", -1);
+ return false;
+ }
+
+ if (p->tune > HS_TUNE_LAST) {
+ *comp_error = generateCompileError("Invalid tuning value specified in "
+ "the platform information.", -1);
+ return false;
+ }
+
+ return true;
+}
+
+/** \brief Convert from SOM mode to bytes of precision. */
+static
+unsigned getSomPrecision(unsigned mode) {
+ if (mode & HS_MODE_VECTORED) {
+ /* always assume full precision for vectoring */
+ return 8;
+ }
+
+ if (mode & HS_MODE_SOM_HORIZON_LARGE) {
+ return 8;
+ } else if (mode & HS_MODE_SOM_HORIZON_MEDIUM) {
+ return 4;
+ } else if (mode & HS_MODE_SOM_HORIZON_SMALL) {
+ return 2;
+ }
+ return 0;
+}
+
+namespace ue2 {
+
+hs_error_t
+hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
+ const unsigned *ids, const hs_expr_ext *const *ext,
+ unsigned elements, unsigned mode,
+ const hs_platform_info_t *platform, hs_database_t **db,
+ hs_compile_error_t **comp_error, const Grey &g) {
+ // Check the args: note that it's OK for flags, ids or ext to be null.
+ if (!comp_error) {
+ if (db) {
+ *db = nullptr;
+ }
+ // nowhere to write the string, but we can still report an error code
+ return HS_COMPILER_ERROR;
+ }
+ if (!db) {
+ *comp_error = generateCompileError("Invalid parameter: db is NULL", -1);
+ return HS_COMPILER_ERROR;
+ }
+ if (!expressions) {
+ *db = nullptr;
+ *comp_error
+ = generateCompileError("Invalid parameter: expressions is NULL",
+ -1);
+ return HS_COMPILER_ERROR;
+ }
+ if (elements == 0) {
+ *db = nullptr;
+ *comp_error = generateCompileError("Invalid parameter: elements is zero", -1);
+ return HS_COMPILER_ERROR;
+ }
+
#if defined(FAT_RUNTIME)
if (!check_ssse3()) {
*db = nullptr;
@@ -203,85 +203,85 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
}
#endif
- if (!checkMode(mode, comp_error)) {
- *db = nullptr;
- assert(*comp_error); // set by checkMode.
- return HS_COMPILER_ERROR;
- }
-
- if (!checkPlatform(platform, comp_error)) {
- *db = nullptr;
- assert(*comp_error); // set by checkPlatform.
- return HS_COMPILER_ERROR;
- }
-
- if (elements > g.limitPatternCount) {
- *db = nullptr;
- *comp_error = generateCompileError("Number of patterns too large", -1);
- return HS_COMPILER_ERROR;
- }
-
- // This function is simply a wrapper around both the parser and compiler
- bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED);
- bool isVectored = mode & HS_MODE_VECTORED;
- unsigned somPrecision = getSomPrecision(mode);
-
- target_t target_info = platform ? target_t(*platform)
- : get_current_target();
-
+ if (!checkMode(mode, comp_error)) {
+ *db = nullptr;
+ assert(*comp_error); // set by checkMode.
+ return HS_COMPILER_ERROR;
+ }
+
+ if (!checkPlatform(platform, comp_error)) {
+ *db = nullptr;
+ assert(*comp_error); // set by checkPlatform.
+ return HS_COMPILER_ERROR;
+ }
+
+ if (elements > g.limitPatternCount) {
+ *db = nullptr;
+ *comp_error = generateCompileError("Number of patterns too large", -1);
+ return HS_COMPILER_ERROR;
+ }
+
+ // This function is simply a wrapper around both the parser and compiler
+ bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED);
+ bool isVectored = mode & HS_MODE_VECTORED;
+ unsigned somPrecision = getSomPrecision(mode);
+
+ target_t target_info = platform ? target_t(*platform)
+ : get_current_target();
+
try {
CompileContext cc(isStreaming, isVectored, target_info, g);
NG ng(cc, elements, somPrecision);
-
- for (unsigned int i = 0; i < elements; i++) {
- // Add this expression to the compiler
- try {
- addExpression(ng, i, expressions[i], flags ? flags[i] : 0,
- ext ? ext[i] : nullptr, ids ? ids[i] : 0);
- } catch (CompileError &e) {
- /* Caught a parse error:
- * throw it upstream as a CompileError with a specific index */
- e.setExpressionIndex(i);
- throw; /* do not slice */
- }
- }
-
+
+ for (unsigned int i = 0; i < elements; i++) {
+ // Add this expression to the compiler
+ try {
+ addExpression(ng, i, expressions[i], flags ? flags[i] : 0,
+ ext ? ext[i] : nullptr, ids ? ids[i] : 0);
+ } catch (CompileError &e) {
+ /* Caught a parse error:
+ * throw it upstream as a CompileError with a specific index */
+ e.setExpressionIndex(i);
+ throw; /* do not slice */
+ }
+ }
+
// Check sub-expression ids
ng.rm.pl.validateSubIDs(ids, expressions, flags, elements);
// Renumber and assign lkey to reports
ng.rm.logicalKeyRenumber();
- unsigned length = 0;
+ unsigned length = 0;
struct hs_database *out = build(ng, &length, 0);
-
- assert(out); // should have thrown exception on error
- assert(length);
-
- *db = out;
- *comp_error = nullptr;
-
- return HS_SUCCESS;
- }
- catch (const CompileError &e) {
- // Compiler error occurred
- *db = nullptr;
- *comp_error = generateCompileError(e.reason,
- e.hasIndex ? (int)e.index : -1);
- return HS_COMPILER_ERROR;
- }
+
+ assert(out); // should have thrown exception on error
+ assert(length);
+
+ *db = out;
+ *comp_error = nullptr;
+
+ return HS_SUCCESS;
+ }
+ catch (const CompileError &e) {
+ // Compiler error occurred
+ *db = nullptr;
+ *comp_error = generateCompileError(e.reason,
+ e.hasIndex ? (int)e.index : -1);
+ return HS_COMPILER_ERROR;
+ }
catch (const std::bad_alloc &) {
- *db = nullptr;
- *comp_error = const_cast<hs_compile_error_t *>(&hs_enomem);
- return HS_COMPILER_ERROR;
- }
- catch (...) {
- assert(!"Internal error, unexpected exception");
- *db = nullptr;
- *comp_error = const_cast<hs_compile_error_t *>(&hs_einternal);
- return HS_COMPILER_ERROR;
- }
-}
-
+ *db = nullptr;
+ *comp_error = const_cast<hs_compile_error_t *>(&hs_enomem);
+ return HS_COMPILER_ERROR;
+ }
+ catch (...) {
+ assert(!"Internal error, unexpected exception");
+ *db = nullptr;
+ *comp_error = const_cast<hs_compile_error_t *>(&hs_einternal);
+ return HS_COMPILER_ERROR;
+ }
+}
+
hs_error_t
hs_compile_lit_multi_int(const char *const *expressions, const unsigned *flags,
const unsigned *ids, const hs_expr_ext *const *ext,
@@ -406,40 +406,40 @@ hs_compile_lit_multi_int(const char *const *expressions, const unsigned *flags,
}
}
-} // namespace ue2
-
-extern "C" HS_PUBLIC_API
+} // namespace ue2
+
+extern "C" HS_PUBLIC_API
hs_error_t HS_CDECL hs_compile(const char *expression, unsigned flags,
unsigned mode,
const hs_platform_info_t *platform,
hs_database_t **db, hs_compile_error_t **error) {
- if (expression == nullptr) {
- *db = nullptr;
- *error = generateCompileError("Invalid parameter: expression is NULL",
- -1);
- return HS_COMPILER_ERROR;
- }
-
- unsigned id = 0; // single expressions get zero as an ID
- const hs_expr_ext * const *ext = nullptr; // unused for this call.
-
- return hs_compile_multi_int(&expression, &flags, &id, ext, 1, mode,
- platform, db, error, Grey());
-}
-
-extern "C" HS_PUBLIC_API
+ if (expression == nullptr) {
+ *db = nullptr;
+ *error = generateCompileError("Invalid parameter: expression is NULL",
+ -1);
+ return HS_COMPILER_ERROR;
+ }
+
+ unsigned id = 0; // single expressions get zero as an ID
+ const hs_expr_ext * const *ext = nullptr; // unused for this call.
+
+ return hs_compile_multi_int(&expression, &flags, &id, ext, 1, mode,
+ platform, db, error, Grey());
+}
+
+extern "C" HS_PUBLIC_API
hs_error_t HS_CDECL hs_compile_multi(const char *const *expressions,
const unsigned *flags, const unsigned *ids,
unsigned elements, unsigned mode,
const hs_platform_info_t *platform,
hs_database_t **db,
hs_compile_error_t **error) {
- const hs_expr_ext * const *ext = nullptr; // unused for this call.
- return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode,
- platform, db, error, Grey());
-}
-
-extern "C" HS_PUBLIC_API
+ const hs_expr_ext * const *ext = nullptr; // unused for this call.
+ return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode,
+ platform, db, error, Grey());
+}
+
+extern "C" HS_PUBLIC_API
hs_error_t HS_CDECL hs_compile_ext_multi(const char * const *expressions,
const unsigned *flags, const unsigned *ids,
const hs_expr_ext * const *ext,
@@ -447,10 +447,10 @@ hs_error_t HS_CDECL hs_compile_ext_multi(const char * const *expressions,
const hs_platform_info_t *platform,
hs_database_t **db,
hs_compile_error_t **error) {
- return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode,
- platform, db, error, Grey());
-}
-
+ return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode,
+ platform, db, error, Grey());
+}
+
extern "C" HS_PUBLIC_API
hs_error_t HS_CDECL hs_compile_lit(const char *expression, unsigned flags,
const size_t len, unsigned mode,
@@ -486,16 +486,16 @@ hs_error_t HS_CDECL hs_compile_lit_multi(const char * const *expressions,
Grey());
}
-static
-hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
+static
+hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
const hs_expr_ext_t *ext, unsigned int mode,
hs_expr_info_t **info,
- hs_compile_error_t **error) {
- if (!error) {
- // nowhere to write an error, but we can still return an error code.
- return HS_COMPILER_ERROR;
- }
-
+ hs_compile_error_t **error) {
+ if (!error) {
+ // nowhere to write an error, but we can still return an error code.
+ return HS_COMPILER_ERROR;
+ }
+
#if defined(FAT_RUNTIME)
if (!check_ssse3()) {
*error = generateCompileError("Unsupported architecture", -1);
@@ -503,49 +503,49 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
}
#endif
- if (!info) {
- *error = generateCompileError("Invalid parameter: info is NULL", -1);
- return HS_COMPILER_ERROR;
- }
-
- if (!expression) {
- *error = generateCompileError("Invalid parameter: expression is NULL",
- -1);
- return HS_COMPILER_ERROR;
- }
-
- *info = nullptr;
- *error = nullptr;
-
- hs_expr_info local_info;
- memset(&local_info, 0, sizeof(local_info));
-
- try {
- bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED);
- bool isVectored = mode & HS_MODE_VECTORED;
-
- CompileContext cc(isStreaming, isVectored, get_current_target(),
- Grey());
-
- // Ensure that our pattern isn't too long (in characters).
- if (strlen(expression) > cc.grey.limitPatternLength) {
- throw ParseError("Pattern length exceeds limit.");
- }
-
- ReportManager rm(cc.grey);
+ if (!info) {
+ *error = generateCompileError("Invalid parameter: info is NULL", -1);
+ return HS_COMPILER_ERROR;
+ }
+
+ if (!expression) {
+ *error = generateCompileError("Invalid parameter: expression is NULL",
+ -1);
+ return HS_COMPILER_ERROR;
+ }
+
+ *info = nullptr;
+ *error = nullptr;
+
+ hs_expr_info local_info;
+ memset(&local_info, 0, sizeof(local_info));
+
+ try {
+ bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED);
+ bool isVectored = mode & HS_MODE_VECTORED;
+
+ CompileContext cc(isStreaming, isVectored, get_current_target(),
+ Grey());
+
+ // Ensure that our pattern isn't too long (in characters).
+ if (strlen(expression) > cc.grey.limitPatternLength) {
+ throw ParseError("Pattern length exceeds limit.");
+ }
+
+ ReportManager rm(cc.grey);
ParsedExpression pe(0, expression, flags, 0, ext);
- assert(pe.component);
-
- // Apply prefiltering transformations if desired.
+ assert(pe.component);
+
+ // Apply prefiltering transformations if desired.
if (pe.expr.prefilter) {
- prefilterTree(pe.component, ParseMode(flags));
- }
-
+ prefilterTree(pe.component, ParseMode(flags));
+ }
+
// Expressions containing zero-width assertions and other extended pcre
// types aren't supported yet. This call will throw a ParseError
// exception if the component tree contains such a construct.
checkUnsupported(*pe.component);
-
+
pe.component->checkEmbeddedStartAnchor(true);
pe.component->checkEmbeddedEndAnchor(true);
@@ -553,40 +553,40 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
unique_ptr<NGHolder> &g = built_expr.g;
ExpressionInfo &expr = built_expr.expr;
- if (!g) {
- DEBUG_PRINTF("NFA build failed, but no exception was thrown.\n");
- throw ParseError("Internal error.");
- }
-
+ if (!g) {
+ DEBUG_PRINTF("NFA build failed, but no exception was thrown.\n");
+ throw ParseError("Internal error.");
+ }
+
fillExpressionInfo(rm, cc, *g, expr, &local_info);
- }
- catch (const CompileError &e) {
- // Compiler error occurred
- *error = generateCompileError(e);
- return HS_COMPILER_ERROR;
- }
+ }
+ catch (const CompileError &e) {
+ // Compiler error occurred
+ *error = generateCompileError(e);
+ return HS_COMPILER_ERROR;
+ }
catch (std::bad_alloc &) {
- *error = const_cast<hs_compile_error_t *>(&hs_enomem);
- return HS_COMPILER_ERROR;
- }
- catch (...) {
- assert(!"Internal error, unexpected exception");
- *error = const_cast<hs_compile_error_t *>(&hs_einternal);
- return HS_COMPILER_ERROR;
- }
-
- hs_expr_info *rv = (hs_expr_info *)hs_misc_alloc(sizeof(*rv));
- if (!rv) {
- *error = const_cast<hs_compile_error_t *>(&hs_enomem);
- return HS_COMPILER_ERROR;
- }
-
- *rv = local_info;
- *info = rv;
- return HS_SUCCESS;
-}
-
-extern "C" HS_PUBLIC_API
+ *error = const_cast<hs_compile_error_t *>(&hs_enomem);
+ return HS_COMPILER_ERROR;
+ }
+ catch (...) {
+ assert(!"Internal error, unexpected exception");
+ *error = const_cast<hs_compile_error_t *>(&hs_einternal);
+ return HS_COMPILER_ERROR;
+ }
+
+ hs_expr_info *rv = (hs_expr_info *)hs_misc_alloc(sizeof(*rv));
+ if (!rv) {
+ *error = const_cast<hs_compile_error_t *>(&hs_enomem);
+ return HS_COMPILER_ERROR;
+ }
+
+ *rv = local_info;
+ *info = rv;
+ return HS_SUCCESS;
+}
+
+extern "C" HS_PUBLIC_API
hs_error_t HS_CDECL hs_expression_info(const char *expression,
unsigned int flags,
hs_expr_info_t **info,
@@ -602,30 +602,30 @@ hs_error_t HS_CDECL hs_expression_ext_info(const char *expression,
hs_expr_info_t **info,
hs_compile_error_t **error) {
return hs_expression_info_int(expression, flags, ext, HS_MODE_BLOCK, info,
- error);
-}
-
-extern "C" HS_PUBLIC_API
+ error);
+}
+
+extern "C" HS_PUBLIC_API
hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform) {
- if (!platform) {
- return HS_INVALID;
- }
-
- memset(platform, 0, sizeof(*platform));
-
- platform->cpu_features = cpuid_flags();
- platform->tune = cpuid_tune();
-
- return HS_SUCCESS;
-}
-
-extern "C" HS_PUBLIC_API
+ if (!platform) {
+ return HS_INVALID;
+ }
+
+ memset(platform, 0, sizeof(*platform));
+
+ platform->cpu_features = cpuid_flags();
+ platform->tune = cpuid_tune();
+
+ return HS_SUCCESS;
+}
+
+extern "C" HS_PUBLIC_API
hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error) {
#if defined(FAT_RUNTIME)
if (!check_ssse3()) {
return HS_ARCH_ERROR;
}
#endif
- freeCompileError(error);
- return HS_SUCCESS;
-}
+ freeCompileError(error);
+ return HS_SUCCESS;
+}
diff --git a/contrib/libs/hyperscan/src/hs.h b/contrib/libs/hyperscan/src/hs.h
index 3eaa86637e..2fe5d248b7 100644
--- a/contrib/libs/hyperscan/src/hs.h
+++ b/contrib/libs/hyperscan/src/hs.h
@@ -1,51 +1,51 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef HS_H_
-#define HS_H_
-
-/**
- * @file
- * @brief The complete Hyperscan API definition.
- *
- * Hyperscan is a high speed regular expression engine.
- *
- * This header includes both the Hyperscan compiler and runtime components. See
- * the individual component headers for documentation.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HS_H_
+#define HS_H_
+
+/**
+ * @file
+ * @brief The complete Hyperscan API definition.
+ *
+ * Hyperscan is a high speed regular expression engine.
+ *
+ * This header includes both the Hyperscan compiler and runtime components. See
+ * the individual component headers for documentation.
+ */
+
/* The current Hyperscan version information. */
#define HS_MAJOR 5
#define HS_MINOR 4
#define HS_PATCH 0
-#include "hs_compile.h"
-#include "hs_runtime.h"
-
-#endif /* HS_H_ */
+#include "hs_compile.h"
+#include "hs_runtime.h"
+
+#endif /* HS_H_ */
diff --git a/contrib/libs/hyperscan/src/hs_common.h b/contrib/libs/hyperscan/src/hs_common.h
index 163c38b703..93dc1fe8a1 100644
--- a/contrib/libs/hyperscan/src/hs_common.h
+++ b/contrib/libs/hyperscan/src/hs_common.h
@@ -1,455 +1,455 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef HS_COMMON_H_
-#define HS_COMMON_H_
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HS_COMMON_H_
+#define HS_COMMON_H_
+
#if defined(_WIN32)
#define HS_CDECL __cdecl
#else
#define HS_CDECL
#endif
-#include <stdlib.h>
-
-/**
- * @file
- * @brief The Hyperscan common API definition.
- *
- * Hyperscan is a high speed regular expression engine.
- *
- * This header contains functions available to both the Hyperscan compiler and
- * runtime.
- */
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-struct hs_database;
-
-/**
- * A Hyperscan pattern database.
- *
- * Generated by one of the Hyperscan compiler functions:
- * - @ref hs_compile()
- * - @ref hs_compile_multi()
- * - @ref hs_compile_ext_multi()
- */
-typedef struct hs_database hs_database_t;
-
-/**
- * A type for errors returned by Hyperscan functions.
- */
-typedef int hs_error_t;
-
-/**
- * Free a compiled pattern database.
- *
- * The free callback set by @ref hs_set_database_allocator() (or @ref
- * hs_set_allocator()) will be used by this function.
- *
- * @param db
- * A compiled pattern database. NULL may also be safely provided, in which
- * case the function does nothing.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+#include <stdlib.h>
+
+/**
+ * @file
+ * @brief The Hyperscan common API definition.
+ *
+ * Hyperscan is a high speed regular expression engine.
+ *
+ * This header contains functions available to both the Hyperscan compiler and
+ * runtime.
+ */
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+struct hs_database;
+
+/**
+ * A Hyperscan pattern database.
+ *
+ * Generated by one of the Hyperscan compiler functions:
+ * - @ref hs_compile()
+ * - @ref hs_compile_multi()
+ * - @ref hs_compile_ext_multi()
+ */
+typedef struct hs_database hs_database_t;
+
+/**
+ * A type for errors returned by Hyperscan functions.
+ */
+typedef int hs_error_t;
+
+/**
+ * Free a compiled pattern database.
+ *
+ * The free callback set by @ref hs_set_database_allocator() (or @ref
+ * hs_set_allocator()) will be used by this function.
+ *
+ * @param db
+ * A compiled pattern database. NULL may also be safely provided, in which
+ * case the function does nothing.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_free_database(hs_database_t *db);
-
-/**
- * Serialize a pattern database to a stream of bytes.
- *
- * The allocator callback set by @ref hs_set_misc_allocator() (or @ref
- * hs_set_allocator()) will be used by this function.
- *
- * @param db
- * A compiled pattern database.
- *
- * @param bytes
- * On success, a pointer to an array of bytes will be returned here.
- * These bytes can be subsequently relocated or written to disk. The
- * caller is responsible for freeing this block.
- *
- * @param length
- * On success, the number of bytes in the generated byte array will be
- * returned here.
- *
- * @return
- * @ref HS_SUCCESS on success, @ref HS_NOMEM if the byte array cannot be
- * allocated, other values may be returned if errors are detected.
- */
+
+/**
+ * Serialize a pattern database to a stream of bytes.
+ *
+ * The allocator callback set by @ref hs_set_misc_allocator() (or @ref
+ * hs_set_allocator()) will be used by this function.
+ *
+ * @param db
+ * A compiled pattern database.
+ *
+ * @param bytes
+ * On success, a pointer to an array of bytes will be returned here.
+ * These bytes can be subsequently relocated or written to disk. The
+ * caller is responsible for freeing this block.
+ *
+ * @param length
+ * On success, the number of bytes in the generated byte array will be
+ * returned here.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, @ref HS_NOMEM if the byte array cannot be
+ * allocated, other values may be returned if errors are detected.
+ */
hs_error_t HS_CDECL hs_serialize_database(const hs_database_t *db, char **bytes,
size_t *length);
-
-/**
- * Reconstruct a pattern database from a stream of bytes previously generated
- * by @ref hs_serialize_database().
- *
- * This function will allocate sufficient space for the database using the
- * allocator set with @ref hs_set_database_allocator() (or @ref
- * hs_set_allocator()); to use a pre-allocated region of memory, use the @ref
- * hs_deserialize_database_at() function.
- *
- * @param bytes
- * A byte array generated by @ref hs_serialize_database() representing a
- * compiled pattern database.
- *
- * @param length
- * The length of the byte array generated by @ref hs_serialize_database().
- * This should be the same value as that returned by @ref
- * hs_serialize_database().
- *
- * @param db
- * On success, a pointer to a newly allocated @ref hs_database_t will be
- * returned here. This database can then be used for scanning, and
- * eventually freed by the caller using @ref hs_free_database().
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+
+/**
+ * Reconstruct a pattern database from a stream of bytes previously generated
+ * by @ref hs_serialize_database().
+ *
+ * This function will allocate sufficient space for the database using the
+ * allocator set with @ref hs_set_database_allocator() (or @ref
+ * hs_set_allocator()); to use a pre-allocated region of memory, use the @ref
+ * hs_deserialize_database_at() function.
+ *
+ * @param bytes
+ * A byte array generated by @ref hs_serialize_database() representing a
+ * compiled pattern database.
+ *
+ * @param length
+ * The length of the byte array generated by @ref hs_serialize_database().
+ * This should be the same value as that returned by @ref
+ * hs_serialize_database().
+ *
+ * @param db
+ * On success, a pointer to a newly allocated @ref hs_database_t will be
+ * returned here. This database can then be used for scanning, and
+ * eventually freed by the caller using @ref hs_free_database().
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_deserialize_database(const char *bytes,
const size_t length,
hs_database_t **db);
-
-/**
- * Reconstruct a pattern database from a stream of bytes previously generated
- * by @ref hs_serialize_database() at a given memory location.
- *
- * This function (unlike @ref hs_deserialize_database()) will write the
+
+/**
+ * Reconstruct a pattern database from a stream of bytes previously generated
+ * by @ref hs_serialize_database() at a given memory location.
+ *
+ * This function (unlike @ref hs_deserialize_database()) will write the
* reconstructed database to the memory location given in the @p db parameter.
- * The amount of space required at this location can be determined with the
- * @ref hs_serialized_database_size() function.
- *
- * @param bytes
- * A byte array generated by @ref hs_serialize_database() representing a
- * compiled pattern database.
- *
- * @param length
- * The length of the byte array generated by @ref hs_serialize_database().
- * This should be the same value as that returned by @ref
- * hs_serialize_database().
- *
- * @param db
- * Pointer to an 8-byte aligned block of memory of sufficient size to hold
- * the deserialized database. On success, the reconstructed database will
- * be written to this location. This database can then be used for pattern
- * matching. The user is responsible for freeing this memory; the @ref
- * hs_free_database() call should not be used.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+ * The amount of space required at this location can be determined with the
+ * @ref hs_serialized_database_size() function.
+ *
+ * @param bytes
+ * A byte array generated by @ref hs_serialize_database() representing a
+ * compiled pattern database.
+ *
+ * @param length
+ * The length of the byte array generated by @ref hs_serialize_database().
+ * This should be the same value as that returned by @ref
+ * hs_serialize_database().
+ *
+ * @param db
+ * Pointer to an 8-byte aligned block of memory of sufficient size to hold
+ * the deserialized database. On success, the reconstructed database will
+ * be written to this location. This database can then be used for pattern
+ * matching. The user is responsible for freeing this memory; the @ref
+ * hs_free_database() call should not be used.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_deserialize_database_at(const char *bytes,
const size_t length,
hs_database_t *db);
-
-/**
- * Provides the size of the stream state allocated by a single stream opened
- * against the given database.
- *
- * @param database
- * Pointer to a compiled (streaming mode) pattern database.
- *
- * @param stream_size
- * On success, the size in bytes of an individual stream opened against the
- * given database is placed in this parameter.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+
+/**
+ * Provides the size of the stream state allocated by a single stream opened
+ * against the given database.
+ *
+ * @param database
+ * Pointer to a compiled (streaming mode) pattern database.
+ *
+ * @param stream_size
+ * On success, the size in bytes of an individual stream opened against the
+ * given database is placed in this parameter.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_stream_size(const hs_database_t *database,
size_t *stream_size);
-
-/**
- * Provides the size of the given database in bytes.
- *
- * @param database
- * Pointer to compiled pattern database.
- *
- * @param database_size
- * On success, the size of the compiled database in bytes is placed in this
- * parameter.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+
+/**
+ * Provides the size of the given database in bytes.
+ *
+ * @param database
+ * Pointer to compiled pattern database.
+ *
+ * @param database_size
+ * On success, the size of the compiled database in bytes is placed in this
+ * parameter.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_database_size(const hs_database_t *database,
size_t *database_size);
-
-/**
- * Utility function for reporting the size that would be required by a
- * database if it were deserialized.
- *
- * This can be used to allocate a shared memory region or other "special"
- * allocation prior to deserializing with the @ref hs_deserialize_database_at()
- * function.
- *
- * @param bytes
- * Pointer to a byte array generated by @ref hs_serialize_database()
- * representing a compiled pattern database.
- *
- * @param length
- * The length of the byte array generated by @ref hs_serialize_database().
- * This should be the same value as that returned by @ref
- * hs_serialize_database().
- *
- * @param deserialized_size
- * On success, the size of the compiled database that would be generated
- * by @ref hs_deserialize_database_at() is returned here.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+
+/**
+ * Utility function for reporting the size that would be required by a
+ * database if it were deserialized.
+ *
+ * This can be used to allocate a shared memory region or other "special"
+ * allocation prior to deserializing with the @ref hs_deserialize_database_at()
+ * function.
+ *
+ * @param bytes
+ * Pointer to a byte array generated by @ref hs_serialize_database()
+ * representing a compiled pattern database.
+ *
+ * @param length
+ * The length of the byte array generated by @ref hs_serialize_database().
+ * This should be the same value as that returned by @ref
+ * hs_serialize_database().
+ *
+ * @param deserialized_size
+ * On success, the size of the compiled database that would be generated
+ * by @ref hs_deserialize_database_at() is returned here.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_serialized_database_size(const char *bytes,
const size_t length,
size_t *deserialized_size);
-
-/**
- * Utility function providing information about a database.
- *
- * @param database
- * Pointer to a compiled database.
- *
- * @param info
- * On success, a string containing the version and platform information for
- * the supplied database is placed in the parameter. The string is
- * allocated using the allocator supplied in @ref hs_set_misc_allocator()
- * (or malloc() if no allocator was set) and should be freed by the caller.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+
+/**
+ * Utility function providing information about a database.
+ *
+ * @param database
+ * Pointer to a compiled database.
+ *
+ * @param info
+ * On success, a string containing the version and platform information for
+ * the supplied database is placed in the parameter. The string is
+ * allocated using the allocator supplied in @ref hs_set_misc_allocator()
+ * (or malloc() if no allocator was set) and should be freed by the caller.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_database_info(const hs_database_t *database,
char **info);
-
-/**
- * Utility function providing information about a serialized database.
- *
- * @param bytes
- * Pointer to a serialized database.
- *
- * @param length
- * Length in bytes of the serialized database.
- *
- * @param info
- * On success, a string containing the version and platform information
- * for the supplied serialized database is placed in the parameter. The
- * string is allocated using the allocator supplied in @ref
- * hs_set_misc_allocator() (or malloc() if no allocator was set) and
- * should be freed by the caller.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+
+/**
+ * Utility function providing information about a serialized database.
+ *
+ * @param bytes
+ * Pointer to a serialized database.
+ *
+ * @param length
+ * Length in bytes of the serialized database.
+ *
+ * @param info
+ * On success, a string containing the version and platform information
+ * for the supplied serialized database is placed in the parameter. The
+ * string is allocated using the allocator supplied in @ref
+ * hs_set_misc_allocator() (or malloc() if no allocator was set) and
+ * should be freed by the caller.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_serialized_database_info(const char *bytes,
size_t length, char **info);
-
-/**
- * The type of the callback function that will be used by Hyperscan to allocate
- * more memory at runtime as required, for example in @ref hs_open_stream() to
- * allocate stream state.
- *
- * If Hyperscan is to be used in a multi-threaded, or similarly concurrent
- * environment, the allocation function will need to be re-entrant, or
- * similarly safe for concurrent use.
- *
- * @param size
- * The number of bytes to allocate.
- * @return
- * A pointer to the region of memory allocated, or NULL on error.
- */
+
+/**
+ * The type of the callback function that will be used by Hyperscan to allocate
+ * more memory at runtime as required, for example in @ref hs_open_stream() to
+ * allocate stream state.
+ *
+ * If Hyperscan is to be used in a multi-threaded, or similarly concurrent
+ * environment, the allocation function will need to be re-entrant, or
+ * similarly safe for concurrent use.
+ *
+ * @param size
+ * The number of bytes to allocate.
+ * @return
+ * A pointer to the region of memory allocated, or NULL on error.
+ */
typedef void *(HS_CDECL *hs_alloc_t)(size_t size);
-
-/**
- * The type of the callback function that will be used by Hyperscan to free
- * memory regions previously allocated using the @ref hs_alloc_t function.
- *
- * @param ptr
- * The region of memory to be freed.
- */
+
+/**
+ * The type of the callback function that will be used by Hyperscan to free
+ * memory regions previously allocated using the @ref hs_alloc_t function.
+ *
+ * @param ptr
+ * The region of memory to be freed.
+ */
typedef void (HS_CDECL *hs_free_t)(void *ptr);
-
-/**
- * Set the allocate and free functions used by Hyperscan for allocating
- * memory at runtime for stream state, scratch space, database bytecode,
- * and various other data structure returned by the Hyperscan API.
- *
- * The function is equivalent to calling @ref hs_set_stream_allocator(),
- * @ref hs_set_scratch_allocator(), @ref hs_set_database_allocator() and
- * @ref hs_set_misc_allocator() with the provided parameters.
- *
- * This call will override any previous allocators that have been set.
- *
- * Note: there is no way to change the allocator used for temporary objects
- * created during the various compile calls (@ref hs_compile(), @ref
- * hs_compile_multi(), @ref hs_compile_ext_multi()).
- *
- * @param alloc_func
- * A callback function pointer that allocates memory. This function must
- * return memory suitably aligned for the largest representable data type
- * on this platform.
- *
- * @param free_func
- * A callback function pointer that frees allocated memory.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+
+/**
+ * Set the allocate and free functions used by Hyperscan for allocating
+ * memory at runtime for stream state, scratch space, database bytecode,
+ * and various other data structure returned by the Hyperscan API.
+ *
+ * The function is equivalent to calling @ref hs_set_stream_allocator(),
+ * @ref hs_set_scratch_allocator(), @ref hs_set_database_allocator() and
+ * @ref hs_set_misc_allocator() with the provided parameters.
+ *
+ * This call will override any previous allocators that have been set.
+ *
+ * Note: there is no way to change the allocator used for temporary objects
+ * created during the various compile calls (@ref hs_compile(), @ref
+ * hs_compile_multi(), @ref hs_compile_ext_multi()).
+ *
+ * @param alloc_func
+ * A callback function pointer that allocates memory. This function must
+ * return memory suitably aligned for the largest representable data type
+ * on this platform.
+ *
+ * @param free_func
+ * A callback function pointer that frees allocated memory.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_set_allocator(hs_alloc_t alloc_func,
hs_free_t free_func);
-
-/**
- * Set the allocate and free functions used by Hyperscan for allocating memory
- * for database bytecode produced by the compile calls (@ref hs_compile(), @ref
- * hs_compile_multi(), @ref hs_compile_ext_multi()) and by database
- * deserialization (@ref hs_deserialize_database()).
- *
- * If no database allocation functions are set, or if NULL is used in place of
- * both parameters, then memory allocation will default to standard methods
- * (such as the system malloc() and free() calls).
- *
- * This call will override any previous database allocators that have been set.
- *
- * Note: the database allocator may also be set by calling @ref
- * hs_set_allocator().
- *
- * Note: there is no way to change how temporary objects created during the
- * various compile calls (@ref hs_compile(), @ref hs_compile_multi(), @ref
- * hs_compile_ext_multi()) are allocated.
- *
- * @param alloc_func
- * A callback function pointer that allocates memory. This function must
- * return memory suitably aligned for the largest representable data type
- * on this platform.
- *
- * @param free_func
- * A callback function pointer that frees allocated memory.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+
+/**
+ * Set the allocate and free functions used by Hyperscan for allocating memory
+ * for database bytecode produced by the compile calls (@ref hs_compile(), @ref
+ * hs_compile_multi(), @ref hs_compile_ext_multi()) and by database
+ * deserialization (@ref hs_deserialize_database()).
+ *
+ * If no database allocation functions are set, or if NULL is used in place of
+ * both parameters, then memory allocation will default to standard methods
+ * (such as the system malloc() and free() calls).
+ *
+ * This call will override any previous database allocators that have been set.
+ *
+ * Note: the database allocator may also be set by calling @ref
+ * hs_set_allocator().
+ *
+ * Note: there is no way to change how temporary objects created during the
+ * various compile calls (@ref hs_compile(), @ref hs_compile_multi(), @ref
+ * hs_compile_ext_multi()) are allocated.
+ *
+ * @param alloc_func
+ * A callback function pointer that allocates memory. This function must
+ * return memory suitably aligned for the largest representable data type
+ * on this platform.
+ *
+ * @param free_func
+ * A callback function pointer that frees allocated memory.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_set_database_allocator(hs_alloc_t alloc_func,
hs_free_t free_func);
-
-/**
- * Set the allocate and free functions used by Hyperscan for allocating memory
- * for items returned by the Hyperscan API such as @ref hs_compile_error_t, @ref
- * hs_expr_info_t and serialized databases.
- *
- * If no misc allocation functions are set, or if NULL is used in place of both
- * parameters, then memory allocation will default to standard methods (such as
- * the system malloc() and free() calls).
- *
- * This call will override any previous misc allocators that have been set.
- *
- * Note: the misc allocator may also be set by calling @ref hs_set_allocator().
- *
- * @param alloc_func
- * A callback function pointer that allocates memory. This function must
- * return memory suitably aligned for the largest representable data type
- * on this platform.
- *
- * @param free_func
- * A callback function pointer that frees allocated memory.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+
+/**
+ * Set the allocate and free functions used by Hyperscan for allocating memory
+ * for items returned by the Hyperscan API such as @ref hs_compile_error_t, @ref
+ * hs_expr_info_t and serialized databases.
+ *
+ * If no misc allocation functions are set, or if NULL is used in place of both
+ * parameters, then memory allocation will default to standard methods (such as
+ * the system malloc() and free() calls).
+ *
+ * This call will override any previous misc allocators that have been set.
+ *
+ * Note: the misc allocator may also be set by calling @ref hs_set_allocator().
+ *
+ * @param alloc_func
+ * A callback function pointer that allocates memory. This function must
+ * return memory suitably aligned for the largest representable data type
+ * on this platform.
+ *
+ * @param free_func
+ * A callback function pointer that frees allocated memory.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_set_misc_allocator(hs_alloc_t alloc_func,
hs_free_t free_func);
-
-/**
- * Set the allocate and free functions used by Hyperscan for allocating memory
- * for scratch space by @ref hs_alloc_scratch() and @ref hs_clone_scratch().
- *
- * If no scratch allocation functions are set, or if NULL is used in place of
- * both parameters, then memory allocation will default to standard methods
- * (such as the system malloc() and free() calls).
- *
- * This call will override any previous scratch allocators that have been set.
- *
- * Note: the scratch allocator may also be set by calling @ref
- * hs_set_allocator().
- *
- * @param alloc_func
- * A callback function pointer that allocates memory. This function must
- * return memory suitably aligned for the largest representable data type
- * on this platform.
- *
- * @param free_func
- * A callback function pointer that frees allocated memory.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+
+/**
+ * Set the allocate and free functions used by Hyperscan for allocating memory
+ * for scratch space by @ref hs_alloc_scratch() and @ref hs_clone_scratch().
+ *
+ * If no scratch allocation functions are set, or if NULL is used in place of
+ * both parameters, then memory allocation will default to standard methods
+ * (such as the system malloc() and free() calls).
+ *
+ * This call will override any previous scratch allocators that have been set.
+ *
+ * Note: the scratch allocator may also be set by calling @ref
+ * hs_set_allocator().
+ *
+ * @param alloc_func
+ * A callback function pointer that allocates memory. This function must
+ * return memory suitably aligned for the largest representable data type
+ * on this platform.
+ *
+ * @param free_func
+ * A callback function pointer that frees allocated memory.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_set_scratch_allocator(hs_alloc_t alloc_func,
hs_free_t free_func);
-
-/**
- * Set the allocate and free functions used by Hyperscan for allocating memory
- * for stream state by @ref hs_open_stream().
- *
- * If no stream allocation functions are set, or if NULL is used in place of
- * both parameters, then memory allocation will default to standard methods
- * (such as the system malloc() and free() calls).
- *
- * This call will override any previous stream allocators that have been set.
- *
- * Note: the stream allocator may also be set by calling @ref
- * hs_set_allocator().
- *
- * @param alloc_func
- * A callback function pointer that allocates memory. This function must
- * return memory suitably aligned for the largest representable data type
- * on this platform.
- *
- * @param free_func
- * A callback function pointer that frees allocated memory.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+
+/**
+ * Set the allocate and free functions used by Hyperscan for allocating memory
+ * for stream state by @ref hs_open_stream().
+ *
+ * If no stream allocation functions are set, or if NULL is used in place of
+ * both parameters, then memory allocation will default to standard methods
+ * (such as the system malloc() and free() calls).
+ *
+ * This call will override any previous stream allocators that have been set.
+ *
+ * Note: the stream allocator may also be set by calling @ref
+ * hs_set_allocator().
+ *
+ * @param alloc_func
+ * A callback function pointer that allocates memory. This function must
+ * return memory suitably aligned for the largest representable data type
+ * on this platform.
+ *
+ * @param free_func
+ * A callback function pointer that frees allocated memory.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_set_stream_allocator(hs_alloc_t alloc_func,
hs_free_t free_func);
-
-/**
- * Utility function for identifying this release version.
- *
- * @return
- * A string containing the version number of this release build and the
- * date of the build. It is allocated statically, so it does not need to
- * be freed by the caller.
- */
+
+/**
+ * Utility function for identifying this release version.
+ *
+ * @return
+ * A string containing the version number of this release build and the
+ * date of the build. It is allocated statically, so it does not need to
+ * be freed by the caller.
+ */
const char * HS_CDECL hs_version(void);
-
-/**
+
+/**
* Utility function to test the current system architecture.
*
* Hyperscan requires the Supplemental Streaming SIMD Extensions 3 instruction
@@ -467,74 +467,74 @@ const char * HS_CDECL hs_version(void);
hs_error_t HS_CDECL hs_valid_platform(void);
/**
- * @defgroup HS_ERROR hs_error_t values
- *
- * @{
- */
-
-/**
- * The engine completed normally.
- */
-#define HS_SUCCESS 0
-
-/**
- * A parameter passed to this function was invalid.
+ * @defgroup HS_ERROR hs_error_t values
+ *
+ * @{
+ */
+
+/**
+ * The engine completed normally.
+ */
+#define HS_SUCCESS 0
+
+/**
+ * A parameter passed to this function was invalid.
*
* This error is only returned in cases where the function can detect an
* invalid parameter -- it cannot be relied upon to detect (for example)
* pointers to freed memory or other invalid data.
- */
-#define HS_INVALID (-1)
-
-/**
- * A memory allocation failed.
- */
-#define HS_NOMEM (-2)
-
-/**
- * The engine was terminated by callback.
- *
- * This return value indicates that the target buffer was partially scanned,
- * but that the callback function requested that scanning cease after a match
- * was located.
- */
-#define HS_SCAN_TERMINATED (-3)
-
-/**
- * The pattern compiler failed, and the @ref hs_compile_error_t should be
- * inspected for more detail.
- */
-#define HS_COMPILER_ERROR (-4)
-
-/**
- * The given database was built for a different version of Hyperscan.
- */
-#define HS_DB_VERSION_ERROR (-5)
-
-/**
- * The given database was built for a different platform (i.e., CPU type).
- */
-#define HS_DB_PLATFORM_ERROR (-6)
-
-/**
- * The given database was built for a different mode of operation. This error
- * is returned when streaming calls are used with a block or vectored database
- * and vice versa.
- */
-#define HS_DB_MODE_ERROR (-7)
-
-/**
- * A parameter passed to this function was not correctly aligned.
- */
-#define HS_BAD_ALIGN (-8)
-
-/**
- * The memory allocator (either malloc() or the allocator set with @ref
- * hs_set_allocator()) did not correctly return memory suitably aligned for the
- * largest representable data type on this platform.
- */
-#define HS_BAD_ALLOC (-9)
-
+ */
+#define HS_INVALID (-1)
+
+/**
+ * A memory allocation failed.
+ */
+#define HS_NOMEM (-2)
+
+/**
+ * The engine was terminated by callback.
+ *
+ * This return value indicates that the target buffer was partially scanned,
+ * but that the callback function requested that scanning cease after a match
+ * was located.
+ */
+#define HS_SCAN_TERMINATED (-3)
+
+/**
+ * The pattern compiler failed, and the @ref hs_compile_error_t should be
+ * inspected for more detail.
+ */
+#define HS_COMPILER_ERROR (-4)
+
+/**
+ * The given database was built for a different version of Hyperscan.
+ */
+#define HS_DB_VERSION_ERROR (-5)
+
+/**
+ * The given database was built for a different platform (i.e., CPU type).
+ */
+#define HS_DB_PLATFORM_ERROR (-6)
+
+/**
+ * The given database was built for a different mode of operation. This error
+ * is returned when streaming calls are used with a block or vectored database
+ * and vice versa.
+ */
+#define HS_DB_MODE_ERROR (-7)
+
+/**
+ * A parameter passed to this function was not correctly aligned.
+ */
+#define HS_BAD_ALIGN (-8)
+
+/**
+ * The memory allocator (either malloc() or the allocator set with @ref
+ * hs_set_allocator()) did not correctly return memory suitably aligned for the
+ * largest representable data type on this platform.
+ */
+#define HS_BAD_ALLOC (-9)
+
/**
* The scratch region was already in use.
*
@@ -587,10 +587,10 @@ hs_error_t HS_CDECL hs_valid_platform(void);
*/
#define HS_UNKNOWN_ERROR (-13)
-/** @} */
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* HS_COMMON_H_ */
+/** @} */
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* HS_COMMON_H_ */
diff --git a/contrib/libs/hyperscan/src/hs_compile.h b/contrib/libs/hyperscan/src/hs_compile.h
index db34aec25e..b318c29db1 100644
--- a/contrib/libs/hyperscan/src/hs_compile.h
+++ b/contrib/libs/hyperscan/src/hs_compile.h
@@ -1,184 +1,184 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef HS_COMPILE_H_
-#define HS_COMPILE_H_
-
-/**
- * @file
- * @brief The Hyperscan compiler API definition.
- *
- * Hyperscan is a high speed regular expression engine.
- *
- * This header contains functions for compiling regular expressions into
- * Hyperscan databases that can be used by the Hyperscan runtime.
- */
-
-#include "hs_common.h"
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-/**
- * A type containing error details that is returned by the compile calls (@ref
- * hs_compile(), @ref hs_compile_multi() and @ref hs_compile_ext_multi()) on
- * failure. The caller may inspect the values returned in this type to
- * determine the cause of failure.
- *
- * Common errors generated during the compile process include:
- *
- * - *Invalid parameter*
- *
- * An invalid argument was specified in the compile call.
- *
- * - *Unrecognised flag*
- *
- * An unrecognised value was passed in the flags argument.
- *
- * - *Pattern matches empty buffer*
- *
- * By default, Hyperscan only supports patterns that will *always*
- * consume at least one byte of input. Patterns that do not have this
- * property (such as `/(abc)?/`) will produce this error unless
- * the @ref HS_FLAG_ALLOWEMPTY flag is supplied. Note that such
- * patterns will produce a match for *every* byte when scanned.
- *
- * - *Embedded anchors not supported*
- *
- * Hyperscan only supports the use of anchor meta-characters (such as
- * `^` and `$`) in patterns where they could *only* match
- * at the start or end of a buffer. A pattern containing an embedded
- * anchor, such as `/abc^def/`, can never match, as there is no
- * way for `abc` to precede the start of the data stream.
- *
- * - *Bounded repeat is too large*
- *
- * The pattern contains a repeated construct with very large finite
- * bounds.
- *
- * - *Unsupported component type*
- *
- * An unsupported PCRE construct was used in the pattern.
- *
- * - *Unable to generate bytecode*
- *
- * This error indicates that Hyperscan was unable to compile a pattern
- * that is syntactically valid. The most common cause is a pattern that is
- * very long and complex or contains a large repeated subpattern.
- *
- * - *Unable to allocate memory*
- *
- * The library was unable to allocate temporary storage used during
- * compilation time.
- *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HS_COMPILE_H_
+#define HS_COMPILE_H_
+
+/**
+ * @file
+ * @brief The Hyperscan compiler API definition.
+ *
+ * Hyperscan is a high speed regular expression engine.
+ *
+ * This header contains functions for compiling regular expressions into
+ * Hyperscan databases that can be used by the Hyperscan runtime.
+ */
+
+#include "hs_common.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * A type containing error details that is returned by the compile calls (@ref
+ * hs_compile(), @ref hs_compile_multi() and @ref hs_compile_ext_multi()) on
+ * failure. The caller may inspect the values returned in this type to
+ * determine the cause of failure.
+ *
+ * Common errors generated during the compile process include:
+ *
+ * - *Invalid parameter*
+ *
+ * An invalid argument was specified in the compile call.
+ *
+ * - *Unrecognised flag*
+ *
+ * An unrecognised value was passed in the flags argument.
+ *
+ * - *Pattern matches empty buffer*
+ *
+ * By default, Hyperscan only supports patterns that will *always*
+ * consume at least one byte of input. Patterns that do not have this
+ * property (such as `/(abc)?/`) will produce this error unless
+ * the @ref HS_FLAG_ALLOWEMPTY flag is supplied. Note that such
+ * patterns will produce a match for *every* byte when scanned.
+ *
+ * - *Embedded anchors not supported*
+ *
+ * Hyperscan only supports the use of anchor meta-characters (such as
+ * `^` and `$`) in patterns where they could *only* match
+ * at the start or end of a buffer. A pattern containing an embedded
+ * anchor, such as `/abc^def/`, can never match, as there is no
+ * way for `abc` to precede the start of the data stream.
+ *
+ * - *Bounded repeat is too large*
+ *
+ * The pattern contains a repeated construct with very large finite
+ * bounds.
+ *
+ * - *Unsupported component type*
+ *
+ * An unsupported PCRE construct was used in the pattern.
+ *
+ * - *Unable to generate bytecode*
+ *
+ * This error indicates that Hyperscan was unable to compile a pattern
+ * that is syntactically valid. The most common cause is a pattern that is
+ * very long and complex or contains a large repeated subpattern.
+ *
+ * - *Unable to allocate memory*
+ *
+ * The library was unable to allocate temporary storage used during
+ * compilation time.
+ *
* - *Allocator returned misaligned memory*
*
* The memory allocator (either malloc() or the allocator set with @ref
* hs_set_allocator()) did not correctly return memory suitably aligned
* for the largest representable data type on this platform.
*
- * - *Internal error*
- *
- * An unexpected error occurred: if this error is reported, please contact
- * the Hyperscan team with a description of the situation.
- */
-typedef struct hs_compile_error {
- /**
- * A human-readable error message describing the error.
- */
- char *message;
-
- /**
- * The zero-based number of the expression that caused the error (if this
- * can be determined). If the error is not specific to an expression, then
- * this value will be less than zero.
- */
- int expression;
-} hs_compile_error_t;
-
-/**
- * A type containing information on the target platform which may optionally be
- * provided to the compile calls (@ref hs_compile(), @ref hs_compile_multi(),
- * @ref hs_compile_ext_multi()).
- *
- * A hs_platform_info structure may be populated for the current platform by
- * using the @ref hs_populate_platform() call.
- */
-typedef struct hs_platform_info {
- /**
- * Information about the target platform which may be used to guide the
- * optimisation process of the compile.
- *
- * Use of this field does not limit the processors that the resulting
- * database can run on, but may impact the performance of the resulting
- * database.
- */
- unsigned int tune;
-
- /**
- * Relevant CPU features available on the target platform
- *
- * This value may be produced by combining HS_CPU_FEATURE_* flags (such as
- * @ref HS_CPU_FEATURES_AVX2). Multiple CPU features may be or'ed together
- * to produce the value.
- */
- unsigned long long cpu_features;
-
- /**
- * Reserved for future use.
- */
- unsigned long long reserved1;
-
- /**
- * Reserved for future use.
- */
- unsigned long long reserved2;
-} hs_platform_info_t;
-
-/**
- * A type containing information related to an expression that is returned by
+ * - *Internal error*
+ *
+ * An unexpected error occurred: if this error is reported, please contact
+ * the Hyperscan team with a description of the situation.
+ */
+typedef struct hs_compile_error {
+ /**
+ * A human-readable error message describing the error.
+ */
+ char *message;
+
+ /**
+ * The zero-based number of the expression that caused the error (if this
+ * can be determined). If the error is not specific to an expression, then
+ * this value will be less than zero.
+ */
+ int expression;
+} hs_compile_error_t;
+
+/**
+ * A type containing information on the target platform which may optionally be
+ * provided to the compile calls (@ref hs_compile(), @ref hs_compile_multi(),
+ * @ref hs_compile_ext_multi()).
+ *
+ * A hs_platform_info structure may be populated for the current platform by
+ * using the @ref hs_populate_platform() call.
+ */
+typedef struct hs_platform_info {
+ /**
+ * Information about the target platform which may be used to guide the
+ * optimisation process of the compile.
+ *
+ * Use of this field does not limit the processors that the resulting
+ * database can run on, but may impact the performance of the resulting
+ * database.
+ */
+ unsigned int tune;
+
+ /**
+ * Relevant CPU features available on the target platform
+ *
+ * This value may be produced by combining HS_CPU_FEATURE_* flags (such as
+ * @ref HS_CPU_FEATURES_AVX2). Multiple CPU features may be or'ed together
+ * to produce the value.
+ */
+ unsigned long long cpu_features;
+
+ /**
+ * Reserved for future use.
+ */
+ unsigned long long reserved1;
+
+ /**
+ * Reserved for future use.
+ */
+ unsigned long long reserved2;
+} hs_platform_info_t;
+
+/**
+ * A type containing information related to an expression that is returned by
* @ref hs_expression_info() or @ref hs_expression_ext_info.
- */
-typedef struct hs_expr_info {
- /**
- * The minimum length in bytes of a match for the pattern.
+ */
+typedef struct hs_expr_info {
+ /**
+ * The minimum length in bytes of a match for the pattern.
*
* Note: in some cases when using advanced features to suppress matches
* (such as extended parameters or the @ref HS_FLAG_SINGLEMATCH flag) this
* may represent a conservative lower bound for the true minimum length of
* a match.
- */
- unsigned int min_width;
-
- /**
- * The maximum length in bytes of a match for the pattern. If the pattern
+ */
+ unsigned int min_width;
+
+ /**
+ * The maximum length in bytes of a match for the pattern. If the pattern
* has an unbounded maximum length, this will be set to the maximum value
* of an unsigned int (UINT_MAX).
*
@@ -186,71 +186,71 @@ typedef struct hs_expr_info {
* (such as extended parameters or the @ref HS_FLAG_SINGLEMATCH flag) this
* may represent a conservative upper bound for the true maximum length of
* a match.
- */
- unsigned int max_width;
-
- /**
- * Whether this expression can produce matches that are not returned in
- * order, such as those produced by assertions. Zero if false, non-zero if
- * true.
- */
- char unordered_matches;
-
- /**
- * Whether this expression can produce matches at end of data (EOD). In
- * streaming mode, EOD matches are raised during @ref hs_close_stream(),
- * since it is only when @ref hs_close_stream() is called that the EOD
- * location is known. Zero if false, non-zero if true.
- *
- * Note: trailing `\b` word boundary assertions may also result in EOD
- * matches as end-of-data can act as a word boundary.
- */
- char matches_at_eod;
-
- /**
- * Whether this expression can *only* produce matches at end of data (EOD).
- * In streaming mode, all matches for this expression are raised during
- * @ref hs_close_stream(). Zero if false, non-zero if true.
- */
- char matches_only_at_eod;
-} hs_expr_info_t;
-
-/**
- * A structure containing additional parameters related to an expression,
+ */
+ unsigned int max_width;
+
+ /**
+ * Whether this expression can produce matches that are not returned in
+ * order, such as those produced by assertions. Zero if false, non-zero if
+ * true.
+ */
+ char unordered_matches;
+
+ /**
+ * Whether this expression can produce matches at end of data (EOD). In
+ * streaming mode, EOD matches are raised during @ref hs_close_stream(),
+ * since it is only when @ref hs_close_stream() is called that the EOD
+ * location is known. Zero if false, non-zero if true.
+ *
+ * Note: trailing `\b` word boundary assertions may also result in EOD
+ * matches as end-of-data can act as a word boundary.
+ */
+ char matches_at_eod;
+
+ /**
+ * Whether this expression can *only* produce matches at end of data (EOD).
+ * In streaming mode, all matches for this expression are raised during
+ * @ref hs_close_stream(). Zero if false, non-zero if true.
+ */
+ char matches_only_at_eod;
+} hs_expr_info_t;
+
+/**
+ * A structure containing additional parameters related to an expression,
* passed in at build time to @ref hs_compile_ext_multi() or @ref
* hs_expression_ext_info.
- *
- * These parameters allow the set of matches produced by a pattern to be
- * constrained at compile time, rather than relying on the application to
- * process unwanted matches at runtime.
- */
-typedef struct hs_expr_ext {
- /**
- * Flags governing which parts of this structure are to be used by the
- * compiler. See @ref HS_EXT_FLAG.
- */
- unsigned long long flags;
-
- /**
- * The minimum end offset in the data stream at which this expression
- * should match successfully. To use this parameter, set the
- * @ref HS_EXT_FLAG_MIN_OFFSET flag in the hs_expr_ext::flags field.
- */
- unsigned long long min_offset;
-
- /**
- * The maximum end offset in the data stream at which this expression
- * should match successfully. To use this parameter, set the
- * @ref HS_EXT_FLAG_MAX_OFFSET flag in the hs_expr_ext::flags field.
- */
- unsigned long long max_offset;
-
- /**
- * The minimum match length (from start to end) required to successfully
- * match this expression. To use this parameter, set the
- * @ref HS_EXT_FLAG_MIN_LENGTH flag in the hs_expr_ext::flags field.
- */
- unsigned long long min_length;
+ *
+ * These parameters allow the set of matches produced by a pattern to be
+ * constrained at compile time, rather than relying on the application to
+ * process unwanted matches at runtime.
+ */
+typedef struct hs_expr_ext {
+ /**
+ * Flags governing which parts of this structure are to be used by the
+ * compiler. See @ref HS_EXT_FLAG.
+ */
+ unsigned long long flags;
+
+ /**
+ * The minimum end offset in the data stream at which this expression
+ * should match successfully. To use this parameter, set the
+ * @ref HS_EXT_FLAG_MIN_OFFSET flag in the hs_expr_ext::flags field.
+ */
+ unsigned long long min_offset;
+
+ /**
+ * The maximum end offset in the data stream at which this expression
+ * should match successfully. To use this parameter, set the
+ * @ref HS_EXT_FLAG_MAX_OFFSET flag in the hs_expr_ext::flags field.
+ */
+ unsigned long long max_offset;
+
+ /**
+ * The minimum match length (from start to end) required to successfully
+ * match this expression. To use this parameter, set the
+ * @ref HS_EXT_FLAG_MIN_LENGTH flag in the hs_expr_ext::flags field.
+ */
+ unsigned long long min_length;
/**
* Allow patterns to approximately match within this edit distance. To use
@@ -265,181 +265,181 @@ typedef struct hs_expr_ext {
* hs_expr_ext::flags field.
*/
unsigned hamming_distance;
-} hs_expr_ext_t;
-
-/**
- * @defgroup HS_EXT_FLAG hs_expr_ext_t flags
- *
- * These flags are used in @ref hs_expr_ext_t::flags to indicate which fields
- * are used.
- *
- * @{
- */
-
-/** Flag indicating that the hs_expr_ext::min_offset field is used. */
-#define HS_EXT_FLAG_MIN_OFFSET 1ULL
-
-/** Flag indicating that the hs_expr_ext::max_offset field is used. */
-#define HS_EXT_FLAG_MAX_OFFSET 2ULL
-
-/** Flag indicating that the hs_expr_ext::min_length field is used. */
-#define HS_EXT_FLAG_MIN_LENGTH 4ULL
-
+} hs_expr_ext_t;
+
+/**
+ * @defgroup HS_EXT_FLAG hs_expr_ext_t flags
+ *
+ * These flags are used in @ref hs_expr_ext_t::flags to indicate which fields
+ * are used.
+ *
+ * @{
+ */
+
+/** Flag indicating that the hs_expr_ext::min_offset field is used. */
+#define HS_EXT_FLAG_MIN_OFFSET 1ULL
+
+/** Flag indicating that the hs_expr_ext::max_offset field is used. */
+#define HS_EXT_FLAG_MAX_OFFSET 2ULL
+
+/** Flag indicating that the hs_expr_ext::min_length field is used. */
+#define HS_EXT_FLAG_MIN_LENGTH 4ULL
+
/** Flag indicating that the hs_expr_ext::edit_distance field is used. */
#define HS_EXT_FLAG_EDIT_DISTANCE 8ULL
/** Flag indicating that the hs_expr_ext::hamming_distance field is used. */
#define HS_EXT_FLAG_HAMMING_DISTANCE 16ULL
-/** @} */
-
-/**
- * The basic regular expression compiler.
- *
- * This is the function call with which an expression is compiled into a
- * Hyperscan database which can be passed to the runtime functions (such as
- * @ref hs_scan(), @ref hs_open_stream(), etc.)
- *
- * @param expression
- * The NULL-terminated expression to parse. Note that this string must
- * represent ONLY the pattern to be matched, with no delimiters or flags;
+/** @} */
+
+/**
+ * The basic regular expression compiler.
+ *
+ * This is the function call with which an expression is compiled into a
+ * Hyperscan database which can be passed to the runtime functions (such as
+ * @ref hs_scan(), @ref hs_open_stream(), etc.)
+ *
+ * @param expression
+ * The NULL-terminated expression to parse. Note that this string must
+ * represent ONLY the pattern to be matched, with no delimiters or flags;
* any global flags should be specified with the @p flags argument. For
- * example, the expression `/abc?def/i` should be compiled by providing
+ * example, the expression `/abc?def/i` should be compiled by providing
* `abc?def` as the @p expression, and @ref HS_FLAG_CASELESS as the @a
- * flags.
- *
- * @param flags
- * Flags which modify the behaviour of the expression. Multiple flags may
- * be used by ORing them together. Valid values are:
- * - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
- * - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
- * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
- * - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
- * expression per stream.
- * - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
- * empty string, such as `.*`.
- * - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
- * - HS_FLAG_UCP - Use Unicode properties for character classes.
- * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
- * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
- * when a match is found.
+ * flags.
+ *
+ * @param flags
+ * Flags which modify the behaviour of the expression. Multiple flags may
+ * be used by ORing them together. Valid values are:
+ * - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
+ * - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
+ * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
+ * - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
+ * expression per stream.
+ * - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
+ * empty string, such as `.*`.
+ * - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
+ * - HS_FLAG_UCP - Use Unicode properties for character classes.
+ * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
+ * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
+ * when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
- *
- * @param mode
- * Compiler mode flags that affect the database as a whole. One of @ref
- * HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
- * supplied, to select between the generation of a streaming, block or
- * vectored database. In addition, other flags (beginning with HS_MODE_)
- * may be supplied to enable specific features. See @ref HS_MODE_FLAG for
- * more details.
- *
- * @param platform
- * If not NULL, the platform structure is used to determine the target
- * platform for the database. If NULL, a database suitable for running
- * on the current host platform is produced.
- *
- * @param db
- * On success, a pointer to the generated database will be returned in
- * this parameter, or NULL on failure. The caller is responsible for
- * deallocating the buffer using the @ref hs_free_database() function.
- *
- * @param error
- * If the compile fails, a pointer to a @ref hs_compile_error_t will be
- * returned, providing details of the error condition. The caller is
- * responsible for deallocating the buffer using the @ref
- * hs_free_compile_error() function.
- *
- * @return
- * @ref HS_SUCCESS is returned on successful compilation; @ref
- * HS_COMPILER_ERROR on failure, with details provided in the error
- * parameter.
- */
+ *
+ * @param mode
+ * Compiler mode flags that affect the database as a whole. One of @ref
+ * HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
+ * supplied, to select between the generation of a streaming, block or
+ * vectored database. In addition, other flags (beginning with HS_MODE_)
+ * may be supplied to enable specific features. See @ref HS_MODE_FLAG for
+ * more details.
+ *
+ * @param platform
+ * If not NULL, the platform structure is used to determine the target
+ * platform for the database. If NULL, a database suitable for running
+ * on the current host platform is produced.
+ *
+ * @param db
+ * On success, a pointer to the generated database will be returned in
+ * this parameter, or NULL on failure. The caller is responsible for
+ * deallocating the buffer using the @ref hs_free_database() function.
+ *
+ * @param error
+ * If the compile fails, a pointer to a @ref hs_compile_error_t will be
+ * returned, providing details of the error condition. The caller is
+ * responsible for deallocating the buffer using the @ref
+ * hs_free_compile_error() function.
+ *
+ * @return
+ * @ref HS_SUCCESS is returned on successful compilation; @ref
+ * HS_COMPILER_ERROR on failure, with details provided in the error
+ * parameter.
+ */
hs_error_t HS_CDECL hs_compile(const char *expression, unsigned int flags,
unsigned int mode,
const hs_platform_info_t *platform,
hs_database_t **db, hs_compile_error_t **error);
-
-/**
- * The multiple regular expression compiler.
- *
- * This is the function call with which a set of expressions is compiled into a
- * database which can be passed to the runtime functions (such as @ref
- * hs_scan(), @ref hs_open_stream(), etc.) Each expression can be labelled with
- * a unique integer which is passed into the match callback to identify the
- * pattern that has matched.
- *
- * @param expressions
- * Array of NULL-terminated expressions to compile. Note that (as for @ref
- * hs_compile()) these strings must contain only the pattern to be
- * matched, with no delimiters or flags. For example, the expression
- * `/abc?def/i` should be compiled by providing `abc?def` as the first
+
+/**
+ * The multiple regular expression compiler.
+ *
+ * This is the function call with which a set of expressions is compiled into a
+ * database which can be passed to the runtime functions (such as @ref
+ * hs_scan(), @ref hs_open_stream(), etc.) Each expression can be labelled with
+ * a unique integer which is passed into the match callback to identify the
+ * pattern that has matched.
+ *
+ * @param expressions
+ * Array of NULL-terminated expressions to compile. Note that (as for @ref
+ * hs_compile()) these strings must contain only the pattern to be
+ * matched, with no delimiters or flags. For example, the expression
+ * `/abc?def/i` should be compiled by providing `abc?def` as the first
* string in the @p expressions array, and @ref HS_FLAG_CASELESS as the
* first value in the @p flags array.
- *
- * @param flags
- * Array of flags which modify the behaviour of each expression. Multiple
- * flags may be used by ORing them together. Specifying the NULL pointer
- * in place of an array will set the flags value for all patterns to zero.
- * Valid values are:
- * - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
- * - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
- * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
- * - HS_FLAG_SINGLEMATCH - Only one match will be generated by patterns
- * with this match id per stream.
- * - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
- * empty string, such as `.*`.
- * - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
- * - HS_FLAG_UCP - Use Unicode properties for character classes.
- * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
- * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
- * when a match is found.
+ *
+ * @param flags
+ * Array of flags which modify the behaviour of each expression. Multiple
+ * flags may be used by ORing them together. Specifying the NULL pointer
+ * in place of an array will set the flags value for all patterns to zero.
+ * Valid values are:
+ * - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
+ * - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
+ * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
+ * - HS_FLAG_SINGLEMATCH - Only one match will be generated by patterns
+ * with this match id per stream.
+ * - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
+ * empty string, such as `.*`.
+ * - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
+ * - HS_FLAG_UCP - Use Unicode properties for character classes.
+ * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
+ * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
+ * when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
- *
- * @param ids
- * An array of integers specifying the ID number to be associated with the
- * corresponding pattern in the expressions array. Specifying the NULL
- * pointer in place of an array will set the ID value for all patterns to
- * zero.
- *
- * @param elements
- * The number of elements in the input arrays.
- *
- * @param mode
- * Compiler mode flags that affect the database as a whole. One of @ref
- * HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
- * supplied, to select between the generation of a streaming, block or
- * vectored database. In addition, other flags (beginning with HS_MODE_)
- * may be supplied to enable specific features. See @ref HS_MODE_FLAG for
- * more details.
- *
- * @param platform
- * If not NULL, the platform structure is used to determine the target
- * platform for the database. If NULL, a database suitable for running
- * on the current host platform is produced.
- *
- * @param db
- * On success, a pointer to the generated database will be returned in
- * this parameter, or NULL on failure. The caller is responsible for
- * deallocating the buffer using the @ref hs_free_database() function.
- *
- * @param error
- * If the compile fails, a pointer to a @ref hs_compile_error_t will be
- * returned, providing details of the error condition. The caller is
- * responsible for deallocating the buffer using the @ref
- * hs_free_compile_error() function.
- *
- * @return
- * @ref HS_SUCCESS is returned on successful compilation; @ref
+ *
+ * @param ids
+ * An array of integers specifying the ID number to be associated with the
+ * corresponding pattern in the expressions array. Specifying the NULL
+ * pointer in place of an array will set the ID value for all patterns to
+ * zero.
+ *
+ * @param elements
+ * The number of elements in the input arrays.
+ *
+ * @param mode
+ * Compiler mode flags that affect the database as a whole. One of @ref
+ * HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
+ * supplied, to select between the generation of a streaming, block or
+ * vectored database. In addition, other flags (beginning with HS_MODE_)
+ * may be supplied to enable specific features. See @ref HS_MODE_FLAG for
+ * more details.
+ *
+ * @param platform
+ * If not NULL, the platform structure is used to determine the target
+ * platform for the database. If NULL, a database suitable for running
+ * on the current host platform is produced.
+ *
+ * @param db
+ * On success, a pointer to the generated database will be returned in
+ * this parameter, or NULL on failure. The caller is responsible for
+ * deallocating the buffer using the @ref hs_free_database() function.
+ *
+ * @param error
+ * If the compile fails, a pointer to a @ref hs_compile_error_t will be
+ * returned, providing details of the error condition. The caller is
+ * responsible for deallocating the buffer using the @ref
+ * hs_free_compile_error() function.
+ *
+ * @return
+ * @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR on failure, with details provided in the @p error
- * parameter.
- *
- */
+ * parameter.
+ *
+ */
hs_error_t HS_CDECL hs_compile_multi(const char *const *expressions,
const unsigned int *flags,
const unsigned int *ids,
@@ -447,99 +447,99 @@ hs_error_t HS_CDECL hs_compile_multi(const char *const *expressions,
const hs_platform_info_t *platform,
hs_database_t **db,
hs_compile_error_t **error);
-
-/**
+
+/**
* The multiple regular expression compiler with extended parameter support.
- *
- * This function call compiles a group of expressions into a database in the
- * same way as @ref hs_compile_multi(), but allows additional parameters to be
- * specified via an @ref hs_expr_ext_t structure per expression.
- *
- * @param expressions
- * Array of NULL-terminated expressions to compile. Note that (as for @ref
- * hs_compile()) these strings must contain only the pattern to be
- * matched, with no delimiters or flags. For example, the expression
- * `/abc?def/i` should be compiled by providing `abc?def` as the first
+ *
+ * This function call compiles a group of expressions into a database in the
+ * same way as @ref hs_compile_multi(), but allows additional parameters to be
+ * specified via an @ref hs_expr_ext_t structure per expression.
+ *
+ * @param expressions
+ * Array of NULL-terminated expressions to compile. Note that (as for @ref
+ * hs_compile()) these strings must contain only the pattern to be
+ * matched, with no delimiters or flags. For example, the expression
+ * `/abc?def/i` should be compiled by providing `abc?def` as the first
* string in the @p expressions array, and @ref HS_FLAG_CASELESS as the
* first value in the @p flags array.
- *
- * @param flags
- * Array of flags which modify the behaviour of each expression. Multiple
- * flags may be used by ORing them together. Specifying the NULL pointer
- * in place of an array will set the flags value for all patterns to zero.
- * Valid values are:
- * - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
- * - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
- * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
- * - HS_FLAG_SINGLEMATCH - Only one match will be generated by patterns
- * with this match id per stream.
- * - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
- * empty string, such as `.*`.
- * - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
- * - HS_FLAG_UCP - Use Unicode properties for character classes.
- * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
- * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
- * when a match is found.
+ *
+ * @param flags
+ * Array of flags which modify the behaviour of each expression. Multiple
+ * flags may be used by ORing them together. Specifying the NULL pointer
+ * in place of an array will set the flags value for all patterns to zero.
+ * Valid values are:
+ * - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
+ * - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
+ * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
+ * - HS_FLAG_SINGLEMATCH - Only one match will be generated by patterns
+ * with this match id per stream.
+ * - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
+ * empty string, such as `.*`.
+ * - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
+ * - HS_FLAG_UCP - Use Unicode properties for character classes.
+ * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
+ * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
+ * when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
- *
- * @param ids
- * An array of integers specifying the ID number to be associated with the
- * corresponding pattern in the expressions array. Specifying the NULL
- * pointer in place of an array will set the ID value for all patterns to
- * zero.
- *
- * @param ext
- * An array of pointers to filled @ref hs_expr_ext_t structures that
- * define extended behaviour for each pattern. NULL may be specified if no
- * extended behaviour is needed for an individual pattern, or in place of
- * the whole array if it is not needed for any expressions. Memory used by
- * these structures must be both allocated and freed by the caller.
- *
- * @param elements
- * The number of elements in the input arrays.
- *
- * @param mode
- * Compiler mode flags that affect the database as a whole. One of @ref
- * HS_MODE_STREAM, @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
- * supplied, to select between the generation of a streaming, block or
- * vectored database. In addition, other flags (beginning with HS_MODE_)
- * may be supplied to enable specific features. See @ref HS_MODE_FLAG for
- * more details.
- *
- * @param platform
- * If not NULL, the platform structure is used to determine the target
- * platform for the database. If NULL, a database suitable for running
- * on the current host platform is produced.
- *
- * @param db
- * On success, a pointer to the generated database will be returned in
- * this parameter, or NULL on failure. The caller is responsible for
- * deallocating the buffer using the @ref hs_free_database() function.
- *
- * @param error
- * If the compile fails, a pointer to a @ref hs_compile_error_t will be
- * returned, providing details of the error condition. The caller is
- * responsible for deallocating the buffer using the @ref
- * hs_free_compile_error() function.
- *
- * @return
- * @ref HS_SUCCESS is returned on successful compilation; @ref
+ *
+ * @param ids
+ * An array of integers specifying the ID number to be associated with the
+ * corresponding pattern in the expressions array. Specifying the NULL
+ * pointer in place of an array will set the ID value for all patterns to
+ * zero.
+ *
+ * @param ext
+ * An array of pointers to filled @ref hs_expr_ext_t structures that
+ * define extended behaviour for each pattern. NULL may be specified if no
+ * extended behaviour is needed for an individual pattern, or in place of
+ * the whole array if it is not needed for any expressions. Memory used by
+ * these structures must be both allocated and freed by the caller.
+ *
+ * @param elements
+ * The number of elements in the input arrays.
+ *
+ * @param mode
+ * Compiler mode flags that affect the database as a whole. One of @ref
+ * HS_MODE_STREAM, @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
+ * supplied, to select between the generation of a streaming, block or
+ * vectored database. In addition, other flags (beginning with HS_MODE_)
+ * may be supplied to enable specific features. See @ref HS_MODE_FLAG for
+ * more details.
+ *
+ * @param platform
+ * If not NULL, the platform structure is used to determine the target
+ * platform for the database. If NULL, a database suitable for running
+ * on the current host platform is produced.
+ *
+ * @param db
+ * On success, a pointer to the generated database will be returned in
+ * this parameter, or NULL on failure. The caller is responsible for
+ * deallocating the buffer using the @ref hs_free_database() function.
+ *
+ * @param error
+ * If the compile fails, a pointer to a @ref hs_compile_error_t will be
+ * returned, providing details of the error condition. The caller is
+ * responsible for deallocating the buffer using the @ref
+ * hs_free_compile_error() function.
+ *
+ * @return
+ * @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR on failure, with details provided in the @p error
- * parameter.
- *
- */
+ * parameter.
+ *
+ */
hs_error_t HS_CDECL hs_compile_ext_multi(const char *const *expressions,
- const unsigned int *flags,
- const unsigned int *ids,
- const hs_expr_ext_t *const *ext,
- unsigned int elements, unsigned int mode,
- const hs_platform_info_t *platform,
- hs_database_t **db, hs_compile_error_t **error);
-
-/**
+ const unsigned int *flags,
+ const unsigned int *ids,
+ const hs_expr_ext_t *const *ext,
+ unsigned int elements, unsigned int mode,
+ const hs_platform_info_t *platform,
+ hs_database_t **db, hs_compile_error_t **error);
+
+/**
* The basic pure literal expression compiler.
*
* This is the function call with which a pure literal expression (not a
@@ -697,23 +697,23 @@ hs_error_t HS_CDECL hs_compile_lit_multi(const char * const *expressions,
hs_compile_error_t **error);
/**
- * Free an error structure generated by @ref hs_compile(), @ref
- * hs_compile_multi() or @ref hs_compile_ext_multi().
- *
- * @param error
- * The @ref hs_compile_error_t to be freed. NULL may also be safely
- * provided.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+ * Free an error structure generated by @ref hs_compile(), @ref
+ * hs_compile_multi() or @ref hs_compile_ext_multi().
+ *
+ * @param error
+ * The @ref hs_compile_error_t to be freed. NULL may also be safely
+ * provided.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error);
-
-/**
- * Utility function providing information about a regular expression. The
- * information provided in @ref hs_expr_info_t includes the minimum and maximum
- * width of a pattern match.
- *
+
+/**
+ * Utility function providing information about a regular expression. The
+ * information provided in @ref hs_expr_info_t includes the minimum and maximum
+ * width of a pattern match.
+ *
* Note: successful analysis of an expression with this function does not imply
* that compilation of the same expression (via @ref hs_compile(), @ref
* hs_compile_multi() or @ref hs_compile_ext_multi()) would succeed. This
@@ -725,57 +725,57 @@ hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error);
* the properties returned in the @ref hs_expr_info_t structure, they will not
* affect the outcome of this function.
*
- * @param expression
- * The NULL-terminated expression to parse. Note that this string must
- * represent ONLY the pattern to be matched, with no delimiters or flags;
+ * @param expression
+ * The NULL-terminated expression to parse. Note that this string must
+ * represent ONLY the pattern to be matched, with no delimiters or flags;
* any global flags should be specified with the @p flags argument. For
- * example, the expression `/abc?def/i` should be compiled by providing
+ * example, the expression `/abc?def/i` should be compiled by providing
* `abc?def` as the @p expression, and @ref HS_FLAG_CASELESS as the @a
- * flags.
- *
- * @param flags
- * Flags which modify the behaviour of the expression. Multiple flags may
- * be used by ORing them together. Valid values are:
- * - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
- * - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
- * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
- * - HS_FLAG_SINGLEMATCH - Only one match will be generated by the
- * expression per stream.
- * - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
- * empty string, such as `.*`.
- * - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
- * - HS_FLAG_UCP - Use Unicode properties for character classes.
- * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
- * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
- * when a match is found.
+ * flags.
+ *
+ * @param flags
+ * Flags which modify the behaviour of the expression. Multiple flags may
+ * be used by ORing them together. Valid values are:
+ * - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
+ * - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
+ * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
+ * - HS_FLAG_SINGLEMATCH - Only one match will be generated by the
+ * expression per stream.
+ * - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
+ * empty string, such as `.*`.
+ * - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
+ * - HS_FLAG_UCP - Use Unicode properties for character classes.
+ * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
+ * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
+ * when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
- *
- * @param info
- * On success, a pointer to the pattern information will be returned in
- * this parameter, or NULL on failure. This structure is allocated using
- * the allocator supplied in @ref hs_set_allocator() (or malloc() if no
- * allocator was set) and should be freed by the caller.
- *
- * @param error
- * If the call fails, a pointer to a @ref hs_compile_error_t will be
- * returned, providing details of the error condition. The caller is
- * responsible for deallocating the buffer using the @ref
- * hs_free_compile_error() function.
- *
- * @return
- * @ref HS_SUCCESS is returned on successful compilation; @ref
- * HS_COMPILER_ERROR on failure, with details provided in the error
- * parameter.
- */
+ *
+ * @param info
+ * On success, a pointer to the pattern information will be returned in
+ * this parameter, or NULL on failure. This structure is allocated using
+ * the allocator supplied in @ref hs_set_allocator() (or malloc() if no
+ * allocator was set) and should be freed by the caller.
+ *
+ * @param error
+ * If the call fails, a pointer to a @ref hs_compile_error_t will be
+ * returned, providing details of the error condition. The caller is
+ * responsible for deallocating the buffer using the @ref
+ * hs_free_compile_error() function.
+ *
+ * @return
+ * @ref HS_SUCCESS is returned on successful compilation; @ref
+ * HS_COMPILER_ERROR on failure, with details provided in the error
+ * parameter.
+ */
hs_error_t HS_CDECL hs_expression_info(const char *expression,
unsigned int flags,
hs_expr_info_t **info,
hs_compile_error_t **error);
-
-/**
+
+/**
* Utility function providing information about a regular expression, with
* extended parameter support. The information provided in @ref hs_expr_info_t
* includes the minimum and maximum width of a pattern match.
@@ -848,146 +848,146 @@ hs_error_t HS_CDECL hs_expression_ext_info(const char *expression,
hs_compile_error_t **error);
/**
- * Populates the platform information based on the current host.
- *
- * @param platform
- * On success, the pointed to structure is populated based on the current
- * host.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+ * Populates the platform information based on the current host.
+ *
+ * @param platform
+ * On success, the pointed to structure is populated based on the current
+ * host.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform);
-
-/**
- * @defgroup HS_PATTERN_FLAG Pattern flags
- *
- * @{
- */
-
-/**
- * Compile flag: Set case-insensitive matching.
- *
- * This flag sets the expression to be matched case-insensitively by default.
- * The expression may still use PCRE tokens (notably `(?i)` and
- * `(?-i)`) to switch case-insensitive matching on and off.
- */
-#define HS_FLAG_CASELESS 1
-
-/**
- * Compile flag: Matching a `.` will not exclude newlines.
- *
- * This flag sets any instances of the `.` token to match newline characters as
- * well as all other characters. The PCRE specification states that the `.`
- * token does not match newline characters by default, so without this flag the
- * `.` token will not cross line boundaries.
- */
-#define HS_FLAG_DOTALL 2
-
-/**
- * Compile flag: Set multi-line anchoring.
- *
- * This flag instructs the expression to make the `^` and `$` tokens match
- * newline characters as well as the start and end of the stream. If this flag
- * is not specified, the `^` token will only ever match at the start of a
- * stream, and the `$` token will only ever match at the end of a stream within
- * the guidelines of the PCRE specification.
- */
-#define HS_FLAG_MULTILINE 4
-
-/**
- * Compile flag: Set single-match only mode.
- *
- * This flag sets the expression's match ID to match at most once. In streaming
- * mode, this means that the expression will return only a single match over
- * the lifetime of the stream, rather than reporting every match as per
- * standard Hyperscan semantics. In block mode or vectored mode, only the first
- * match for each invocation of @ref hs_scan() or @ref hs_scan_vector() will be
- * returned.
- *
- * If multiple expressions in the database share the same match ID, then they
- * either must all specify @ref HS_FLAG_SINGLEMATCH or none of them specify
- * @ref HS_FLAG_SINGLEMATCH. If a group of expressions sharing a match ID
- * specify the flag, then at most one match with the match ID will be generated
- * per stream.
- *
- * Note: The use of this flag in combination with @ref HS_FLAG_SOM_LEFTMOST
- * is not currently supported.
- */
-#define HS_FLAG_SINGLEMATCH 8
-
-/**
- * Compile flag: Allow expressions that can match against empty buffers.
- *
- * This flag instructs the compiler to allow expressions that can match against
- * empty buffers, such as `.?`, `.*`, `(a|)`. Since Hyperscan can return every
- * possible match for an expression, such expressions generally execute very
- * slowly; the default behaviour is to return an error when an attempt to
- * compile one is made. Using this flag will force the compiler to allow such
- * an expression.
- */
-#define HS_FLAG_ALLOWEMPTY 16
-
-/**
- * Compile flag: Enable UTF-8 mode for this expression.
- *
- * This flag instructs Hyperscan to treat the pattern as a sequence of UTF-8
- * characters. The results of scanning invalid UTF-8 sequences with a Hyperscan
- * library that has been compiled with one or more patterns using this flag are
- * undefined.
- */
-#define HS_FLAG_UTF8 32
-
-/**
- * Compile flag: Enable Unicode property support for this expression.
- *
- * This flag instructs Hyperscan to use Unicode properties, rather than the
- * default ASCII interpretations, for character mnemonics like `\w` and `\s` as
- * well as the POSIX character classes. It is only meaningful in conjunction
- * with @ref HS_FLAG_UTF8.
- */
-#define HS_FLAG_UCP 64
-
-/**
- * Compile flag: Enable prefiltering mode for this expression.
- *
- * This flag instructs Hyperscan to compile an "approximate" version of this
- * pattern for use in a prefiltering application, even if Hyperscan does not
- * support the pattern in normal operation.
- *
- * The set of matches returned when this flag is used is guaranteed to be a
- * superset of the matches specified by the non-prefiltering expression.
- *
- * If the pattern contains pattern constructs not supported by Hyperscan (such
- * as zero-width assertions, back-references or conditional references) these
- * constructs will be replaced internally with broader constructs that may
- * match more often.
- *
- * Furthermore, in prefiltering mode Hyperscan may simplify a pattern that
- * would otherwise return a "Pattern too large" error at compile time, or for
- * performance reasons (subject to the matching guarantee above).
- *
- * It is generally expected that the application will subsequently confirm
- * prefilter matches with another regular expression matcher that can provide
- * exact matches for the pattern.
- *
- * Note: The use of this flag in combination with @ref HS_FLAG_SOM_LEFTMOST
- * is not currently supported.
- */
-#define HS_FLAG_PREFILTER 128
-
-/**
- * Compile flag: Enable leftmost start of match reporting.
- *
- * This flag instructs Hyperscan to report the leftmost possible start of match
- * offset when a match is reported for this expression. (By default, no start
- * of match is returned.)
- *
+
+/**
+ * @defgroup HS_PATTERN_FLAG Pattern flags
+ *
+ * @{
+ */
+
+/**
+ * Compile flag: Set case-insensitive matching.
+ *
+ * This flag sets the expression to be matched case-insensitively by default.
+ * The expression may still use PCRE tokens (notably `(?i)` and
+ * `(?-i)`) to switch case-insensitive matching on and off.
+ */
+#define HS_FLAG_CASELESS 1
+
+/**
+ * Compile flag: Matching a `.` will not exclude newlines.
+ *
+ * This flag sets any instances of the `.` token to match newline characters as
+ * well as all other characters. The PCRE specification states that the `.`
+ * token does not match newline characters by default, so without this flag the
+ * `.` token will not cross line boundaries.
+ */
+#define HS_FLAG_DOTALL 2
+
+/**
+ * Compile flag: Set multi-line anchoring.
+ *
+ * This flag instructs the expression to make the `^` and `$` tokens match
+ * newline characters as well as the start and end of the stream. If this flag
+ * is not specified, the `^` token will only ever match at the start of a
+ * stream, and the `$` token will only ever match at the end of a stream within
+ * the guidelines of the PCRE specification.
+ */
+#define HS_FLAG_MULTILINE 4
+
+/**
+ * Compile flag: Set single-match only mode.
+ *
+ * This flag sets the expression's match ID to match at most once. In streaming
+ * mode, this means that the expression will return only a single match over
+ * the lifetime of the stream, rather than reporting every match as per
+ * standard Hyperscan semantics. In block mode or vectored mode, only the first
+ * match for each invocation of @ref hs_scan() or @ref hs_scan_vector() will be
+ * returned.
+ *
+ * If multiple expressions in the database share the same match ID, then they
+ * either must all specify @ref HS_FLAG_SINGLEMATCH or none of them specify
+ * @ref HS_FLAG_SINGLEMATCH. If a group of expressions sharing a match ID
+ * specify the flag, then at most one match with the match ID will be generated
+ * per stream.
+ *
+ * Note: The use of this flag in combination with @ref HS_FLAG_SOM_LEFTMOST
+ * is not currently supported.
+ */
+#define HS_FLAG_SINGLEMATCH 8
+
+/**
+ * Compile flag: Allow expressions that can match against empty buffers.
+ *
+ * This flag instructs the compiler to allow expressions that can match against
+ * empty buffers, such as `.?`, `.*`, `(a|)`. Since Hyperscan can return every
+ * possible match for an expression, such expressions generally execute very
+ * slowly; the default behaviour is to return an error when an attempt to
+ * compile one is made. Using this flag will force the compiler to allow such
+ * an expression.
+ */
+#define HS_FLAG_ALLOWEMPTY 16
+
+/**
+ * Compile flag: Enable UTF-8 mode for this expression.
+ *
+ * This flag instructs Hyperscan to treat the pattern as a sequence of UTF-8
+ * characters. The results of scanning invalid UTF-8 sequences with a Hyperscan
+ * library that has been compiled with one or more patterns using this flag are
+ * undefined.
+ */
+#define HS_FLAG_UTF8 32
+
+/**
+ * Compile flag: Enable Unicode property support for this expression.
+ *
+ * This flag instructs Hyperscan to use Unicode properties, rather than the
+ * default ASCII interpretations, for character mnemonics like `\w` and `\s` as
+ * well as the POSIX character classes. It is only meaningful in conjunction
+ * with @ref HS_FLAG_UTF8.
+ */
+#define HS_FLAG_UCP 64
+
+/**
+ * Compile flag: Enable prefiltering mode for this expression.
+ *
+ * This flag instructs Hyperscan to compile an "approximate" version of this
+ * pattern for use in a prefiltering application, even if Hyperscan does not
+ * support the pattern in normal operation.
+ *
+ * The set of matches returned when this flag is used is guaranteed to be a
+ * superset of the matches specified by the non-prefiltering expression.
+ *
+ * If the pattern contains pattern constructs not supported by Hyperscan (such
+ * as zero-width assertions, back-references or conditional references) these
+ * constructs will be replaced internally with broader constructs that may
+ * match more often.
+ *
+ * Furthermore, in prefiltering mode Hyperscan may simplify a pattern that
+ * would otherwise return a "Pattern too large" error at compile time, or for
+ * performance reasons (subject to the matching guarantee above).
+ *
+ * It is generally expected that the application will subsequently confirm
+ * prefilter matches with another regular expression matcher that can provide
+ * exact matches for the pattern.
+ *
+ * Note: The use of this flag in combination with @ref HS_FLAG_SOM_LEFTMOST
+ * is not currently supported.
+ */
+#define HS_FLAG_PREFILTER 128
+
+/**
+ * Compile flag: Enable leftmost start of match reporting.
+ *
+ * This flag instructs Hyperscan to report the leftmost possible start of match
+ * offset when a match is reported for this expression. (By default, no start
+ * of match is returned.)
+ *
* For all the 3 modes, enabling this behaviour may reduce performance. And
* particularly, it may increase stream state requirements in streaming mode.
- */
-#define HS_FLAG_SOM_LEFTMOST 256
-
+ */
+#define HS_FLAG_SOM_LEFTMOST 256
+
/**
* Compile flag: Logical combination.
*
@@ -1010,22 +1010,22 @@ hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform);
*/
#define HS_FLAG_QUIET 1024
-/** @} */
-
-/**
- * @defgroup HS_CPU_FEATURES_FLAG CPU feature support flags
- *
- * @{
- */
-
-/**
- * CPU features flag - Intel(R) Advanced Vector Extensions 2 (Intel(R) AVX2)
- *
- * Setting this flag indicates that the target platform supports AVX2
- * instructions.
- */
-#define HS_CPU_FEATURES_AVX2 (1ULL << 2)
-
+/** @} */
+
+/**
+ * @defgroup HS_CPU_FEATURES_FLAG CPU feature support flags
+ *
+ * @{
+ */
+
+/**
+ * CPU features flag - Intel(R) Advanced Vector Extensions 2 (Intel(R) AVX2)
+ *
+ * Setting this flag indicates that the target platform supports AVX2
+ * instructions.
+ */
+#define HS_CPU_FEATURES_AVX2 (1ULL << 2)
+
/**
* CPU features flag - Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX512)
*
@@ -1043,62 +1043,62 @@ hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform);
*/
#define HS_CPU_FEATURES_AVX512VBMI (1ULL << 4)
-/** @} */
-
-/**
- * @defgroup HS_TUNE_FLAG Tuning flags
- *
- * @{
- */
-
-/**
- * Tuning Parameter - Generic
- *
- * This indicates that the compiled database should not be tuned for any
- * particular target platform.
- */
-#define HS_TUNE_FAMILY_GENERIC 0
-
-/**
- * Tuning Parameter - Intel(R) microarchitecture code name Sandy Bridge
- *
- * This indicates that the compiled database should be tuned for the
- * Sandy Bridge microarchitecture.
- */
-#define HS_TUNE_FAMILY_SNB 1
-
-/**
- * Tuning Parameter - Intel(R) microarchitecture code name Ivy Bridge
- *
- * This indicates that the compiled database should be tuned for the
- * Ivy Bridge microarchitecture.
- */
-#define HS_TUNE_FAMILY_IVB 2
-
-/**
- * Tuning Parameter - Intel(R) microarchitecture code name Haswell
- *
- * This indicates that the compiled database should be tuned for the
- * Haswell microarchitecture.
- */
-#define HS_TUNE_FAMILY_HSW 3
-
-/**
- * Tuning Parameter - Intel(R) microarchitecture code name Silvermont
- *
- * This indicates that the compiled database should be tuned for the
- * Silvermont microarchitecture.
- */
-#define HS_TUNE_FAMILY_SLM 4
-
-/**
- * Tuning Parameter - Intel(R) microarchitecture code name Broadwell
- *
- * This indicates that the compiled database should be tuned for the
- * Broadwell microarchitecture.
- */
-#define HS_TUNE_FAMILY_BDW 5
-
+/** @} */
+
+/**
+ * @defgroup HS_TUNE_FLAG Tuning flags
+ *
+ * @{
+ */
+
+/**
+ * Tuning Parameter - Generic
+ *
+ * This indicates that the compiled database should not be tuned for any
+ * particular target platform.
+ */
+#define HS_TUNE_FAMILY_GENERIC 0
+
+/**
+ * Tuning Parameter - Intel(R) microarchitecture code name Sandy Bridge
+ *
+ * This indicates that the compiled database should be tuned for the
+ * Sandy Bridge microarchitecture.
+ */
+#define HS_TUNE_FAMILY_SNB 1
+
+/**
+ * Tuning Parameter - Intel(R) microarchitecture code name Ivy Bridge
+ *
+ * This indicates that the compiled database should be tuned for the
+ * Ivy Bridge microarchitecture.
+ */
+#define HS_TUNE_FAMILY_IVB 2
+
+/**
+ * Tuning Parameter - Intel(R) microarchitecture code name Haswell
+ *
+ * This indicates that the compiled database should be tuned for the
+ * Haswell microarchitecture.
+ */
+#define HS_TUNE_FAMILY_HSW 3
+
+/**
+ * Tuning Parameter - Intel(R) microarchitecture code name Silvermont
+ *
+ * This indicates that the compiled database should be tuned for the
+ * Silvermont microarchitecture.
+ */
+#define HS_TUNE_FAMILY_SLM 4
+
+/**
+ * Tuning Parameter - Intel(R) microarchitecture code name Broadwell
+ *
+ * This indicates that the compiled database should be tuned for the
+ * Broadwell microarchitecture.
+ */
+#define HS_TUNE_FAMILY_BDW 5
+
/**
* Tuning Parameter - Intel(R) microarchitecture code name Skylake
*
@@ -1139,86 +1139,86 @@ hs_error_t HS_CDECL hs_populate_platform(hs_platform_info_t *platform);
*/
#define HS_TUNE_FAMILY_ICX 10
-/** @} */
-
-/**
- * @defgroup HS_MODE_FLAG Compile mode flags
- *
- * The mode flags are used as values for the mode parameter of the various
- * compile calls (@ref hs_compile(), @ref hs_compile_multi() and @ref
- * hs_compile_ext_multi()).
- *
- * A mode value can be built by ORing these flag values together; the only
- * required flag is one of @ref HS_MODE_BLOCK, @ref HS_MODE_STREAM or @ref
- * HS_MODE_VECTORED. Other flags may be added to enable support for additional
- * features.
- *
- * @{
- */
-
-/**
- * Compiler mode flag: Block scan (non-streaming) database.
- */
-#define HS_MODE_BLOCK 1
-
-/**
- * Compiler mode flag: Alias for @ref HS_MODE_BLOCK.
- */
-#define HS_MODE_NOSTREAM 1
-
-/**
- * Compiler mode flag: Streaming database.
- */
-#define HS_MODE_STREAM 2
-
-/**
- * Compiler mode flag: Vectored scanning database.
- */
-#define HS_MODE_VECTORED 4
-
-/**
- * Compiler mode flag: use full precision to track start of match offsets in
- * stream state.
- *
- * This mode will use the most stream state per pattern, but will always return
- * an accurate start of match offset regardless of how far back in the past it
- * was found.
- *
- * One of the SOM_HORIZON modes must be selected to use the @ref
- * HS_FLAG_SOM_LEFTMOST expression flag.
- */
-#define HS_MODE_SOM_HORIZON_LARGE (1U << 24)
-
-/**
- * Compiler mode flag: use medium precision to track start of match offsets in
- * stream state.
- *
- * This mode will use less stream state than @ref HS_MODE_SOM_HORIZON_LARGE and
- * will limit start of match accuracy to offsets within 2^32 bytes of the
- * end of match offset reported.
- *
- * One of the SOM_HORIZON modes must be selected to use the @ref
- * HS_FLAG_SOM_LEFTMOST expression flag.
- */
-#define HS_MODE_SOM_HORIZON_MEDIUM (1U << 25)
-
-/**
- * Compiler mode flag: use limited precision to track start of match offsets in
- * stream state.
- *
- * This mode will use less stream state than @ref HS_MODE_SOM_HORIZON_LARGE and
- * will limit start of match accuracy to offsets within 2^16 bytes of the
- * end of match offset reported.
- *
- * One of the SOM_HORIZON modes must be selected to use the @ref
- * HS_FLAG_SOM_LEFTMOST expression flag.
- */
-#define HS_MODE_SOM_HORIZON_SMALL (1U << 26)
-
-/** @} */
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* HS_COMPILE_H_ */
+/** @} */
+
+/**
+ * @defgroup HS_MODE_FLAG Compile mode flags
+ *
+ * The mode flags are used as values for the mode parameter of the various
+ * compile calls (@ref hs_compile(), @ref hs_compile_multi() and @ref
+ * hs_compile_ext_multi()).
+ *
+ * A mode value can be built by ORing these flag values together; the only
+ * required flag is one of @ref HS_MODE_BLOCK, @ref HS_MODE_STREAM or @ref
+ * HS_MODE_VECTORED. Other flags may be added to enable support for additional
+ * features.
+ *
+ * @{
+ */
+
+/**
+ * Compiler mode flag: Block scan (non-streaming) database.
+ */
+#define HS_MODE_BLOCK 1
+
+/**
+ * Compiler mode flag: Alias for @ref HS_MODE_BLOCK.
+ */
+#define HS_MODE_NOSTREAM 1
+
+/**
+ * Compiler mode flag: Streaming database.
+ */
+#define HS_MODE_STREAM 2
+
+/**
+ * Compiler mode flag: Vectored scanning database.
+ */
+#define HS_MODE_VECTORED 4
+
+/**
+ * Compiler mode flag: use full precision to track start of match offsets in
+ * stream state.
+ *
+ * This mode will use the most stream state per pattern, but will always return
+ * an accurate start of match offset regardless of how far back in the past it
+ * was found.
+ *
+ * One of the SOM_HORIZON modes must be selected to use the @ref
+ * HS_FLAG_SOM_LEFTMOST expression flag.
+ */
+#define HS_MODE_SOM_HORIZON_LARGE (1U << 24)
+
+/**
+ * Compiler mode flag: use medium precision to track start of match offsets in
+ * stream state.
+ *
+ * This mode will use less stream state than @ref HS_MODE_SOM_HORIZON_LARGE and
+ * will limit start of match accuracy to offsets within 2^32 bytes of the
+ * end of match offset reported.
+ *
+ * One of the SOM_HORIZON modes must be selected to use the @ref
+ * HS_FLAG_SOM_LEFTMOST expression flag.
+ */
+#define HS_MODE_SOM_HORIZON_MEDIUM (1U << 25)
+
+/**
+ * Compiler mode flag: use limited precision to track start of match offsets in
+ * stream state.
+ *
+ * This mode will use less stream state than @ref HS_MODE_SOM_HORIZON_LARGE and
+ * will limit start of match accuracy to offsets within 2^16 bytes of the
+ * end of match offset reported.
+ *
+ * One of the SOM_HORIZON modes must be selected to use the @ref
+ * HS_FLAG_SOM_LEFTMOST expression flag.
+ */
+#define HS_MODE_SOM_HORIZON_SMALL (1U << 26)
+
+/** @} */
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* HS_COMPILE_H_ */
diff --git a/contrib/libs/hyperscan/src/hs_internal.h b/contrib/libs/hyperscan/src/hs_internal.h
index dac079689e..adf07b22cf 100644
--- a/contrib/libs/hyperscan/src/hs_internal.h
+++ b/contrib/libs/hyperscan/src/hs_internal.h
@@ -1,57 +1,57 @@
-/*
+/*
* Copyright (c) 2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Internal-use only definitions. Available to internal tools.
- */
-
-#ifndef HS_INTERNAL_H
-#define HS_INTERNAL_H
-
-#include "ue2common.h"
-#include "hs.h"
-
-#ifdef __cplusplus
-
-namespace ue2 {
-
-struct Grey;
-
-/** \brief Internal use only: takes a Grey argument so that we can use it in
- * tools. */
-hs_error_t hs_compile_multi_int(const char *const *expressions,
- const unsigned *flags, const unsigned *ids,
- const hs_expr_ext *const *ext,
- unsigned elements, unsigned mode,
- const hs_platform_info_t *platform,
- hs_database_t **db,
- hs_compile_error_t **comp_error, const Grey &g);
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Internal-use only definitions. Available to internal tools.
+ */
+
+#ifndef HS_INTERNAL_H
+#define HS_INTERNAL_H
+
+#include "ue2common.h"
+#include "hs.h"
+
+#ifdef __cplusplus
+
+namespace ue2 {
+
+struct Grey;
+
+/** \brief Internal use only: takes a Grey argument so that we can use it in
+ * tools. */
+hs_error_t hs_compile_multi_int(const char *const *expressions,
+ const unsigned *flags, const unsigned *ids,
+ const hs_expr_ext *const *ext,
+ unsigned elements, unsigned mode,
+ const hs_platform_info_t *platform,
+ hs_database_t **db,
+ hs_compile_error_t **comp_error, const Grey &g);
+
/** \brief Internal use only: takes a Grey argument so that we can use it in
* tools. */
hs_error_t hs_compile_lit_multi_int(const char *const *expressions,
@@ -63,27 +63,27 @@ hs_error_t hs_compile_lit_multi_int(const char *const *expressions,
hs_database_t **db,
hs_compile_error_t **comp_error,
const Grey &g);
-} // namespace ue2
-
-extern "C"
-{
-#endif
-
-#define HS_MATCH_FLAG_ADJUSTED 1U
-
-/** \brief Bitmask of all valid Hyperscan flags. */
-#define HS_FLAG_ALL ( HS_FLAG_CASELESS \
- | HS_FLAG_DOTALL \
- | HS_FLAG_MULTILINE \
- | HS_FLAG_UTF8 \
- | HS_FLAG_UCP \
- | HS_FLAG_PREFILTER \
- | HS_FLAG_SINGLEMATCH \
- | HS_FLAG_ALLOWEMPTY \
- | HS_FLAG_SOM_LEFTMOST)
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
+} // namespace ue2
+
+extern "C"
+{
+#endif
+
+#define HS_MATCH_FLAG_ADJUSTED 1U
+
+/** \brief Bitmask of all valid Hyperscan flags. */
+#define HS_FLAG_ALL ( HS_FLAG_CASELESS \
+ | HS_FLAG_DOTALL \
+ | HS_FLAG_MULTILINE \
+ | HS_FLAG_UTF8 \
+ | HS_FLAG_UCP \
+ | HS_FLAG_PREFILTER \
+ | HS_FLAG_SINGLEMATCH \
+ | HS_FLAG_ALLOWEMPTY \
+ | HS_FLAG_SOM_LEFTMOST)
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/hs_runtime.h b/contrib/libs/hyperscan/src/hs_runtime.h
index 22f32c58e5..6d34b6c484 100644
--- a/contrib/libs/hyperscan/src/hs_runtime.h
+++ b/contrib/libs/hyperscan/src/hs_runtime.h
@@ -1,333 +1,333 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef HS_RUNTIME_H_
-#define HS_RUNTIME_H_
-
-#include <stdlib.h>
-
-/**
- * @file
- * @brief The Hyperscan runtime API definition.
- *
- * Hyperscan is a high speed regular expression engine.
- *
- * This header contains functions for using compiled Hyperscan databases for
- * scanning data at runtime.
- */
-
-#include "hs_common.h"
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-/**
- * Definition of the stream identifier type.
- */
-struct hs_stream;
-
-/**
- * The stream identifier returned by @ref hs_open_stream().
- */
-typedef struct hs_stream hs_stream_t;
-
-struct hs_scratch;
-
-/**
- * A Hyperscan scratch space.
- */
-typedef struct hs_scratch hs_scratch_t;
-
-/**
- * Definition of the match event callback function type.
- *
- * A callback function matching the defined type must be provided by the
- * application calling the @ref hs_scan(), @ref hs_scan_vector() or @ref
- * hs_scan_stream() functions (or other streaming calls which can produce
- * matches).
- *
- * This callback function will be invoked whenever a match is located in the
- * target data during the execution of a scan. The details of the match are
- * passed in as parameters to the callback function, and the callback function
- * should return a value indicating whether or not matching should continue on
- * the target data. If no callbacks are desired from a scan call, NULL may be
- * provided in order to suppress match production.
- *
- * This callback function should not attempt to call Hyperscan API functions on
- * the same stream nor should it attempt to reuse the scratch space allocated
- * for the API calls that caused it to be triggered. Making another call to the
- * Hyperscan library with completely independent parameters should work (for
- * example, scanning a different database in a new stream and with new scratch
- * space), but reusing data structures like stream state and/or scratch space
- * will produce undefined behavior.
- *
- * @param id
- * The ID number of the expression that matched. If the expression was a
- * single expression compiled with @ref hs_compile(), this value will be
- * zero.
- *
- * @param from
- * - If a start of match flag is enabled for the current pattern, this
- * argument will be set to the start of match for the pattern assuming
- * that that start of match value lies within the current 'start of match
- * horizon' chosen by one of the SOM_HORIZON mode flags.
-
- * - If the start of match value lies outside this horizon (possible only
- * when the SOM_HORIZON value is not @ref HS_MODE_SOM_HORIZON_LARGE),
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HS_RUNTIME_H_
+#define HS_RUNTIME_H_
+
+#include <stdlib.h>
+
+/**
+ * @file
+ * @brief The Hyperscan runtime API definition.
+ *
+ * Hyperscan is a high speed regular expression engine.
+ *
+ * This header contains functions for using compiled Hyperscan databases for
+ * scanning data at runtime.
+ */
+
+#include "hs_common.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * Definition of the stream identifier type.
+ */
+struct hs_stream;
+
+/**
+ * The stream identifier returned by @ref hs_open_stream().
+ */
+typedef struct hs_stream hs_stream_t;
+
+struct hs_scratch;
+
+/**
+ * A Hyperscan scratch space.
+ */
+typedef struct hs_scratch hs_scratch_t;
+
+/**
+ * Definition of the match event callback function type.
+ *
+ * A callback function matching the defined type must be provided by the
+ * application calling the @ref hs_scan(), @ref hs_scan_vector() or @ref
+ * hs_scan_stream() functions (or other streaming calls which can produce
+ * matches).
+ *
+ * This callback function will be invoked whenever a match is located in the
+ * target data during the execution of a scan. The details of the match are
+ * passed in as parameters to the callback function, and the callback function
+ * should return a value indicating whether or not matching should continue on
+ * the target data. If no callbacks are desired from a scan call, NULL may be
+ * provided in order to suppress match production.
+ *
+ * This callback function should not attempt to call Hyperscan API functions on
+ * the same stream nor should it attempt to reuse the scratch space allocated
+ * for the API calls that caused it to be triggered. Making another call to the
+ * Hyperscan library with completely independent parameters should work (for
+ * example, scanning a different database in a new stream and with new scratch
+ * space), but reusing data structures like stream state and/or scratch space
+ * will produce undefined behavior.
+ *
+ * @param id
+ * The ID number of the expression that matched. If the expression was a
+ * single expression compiled with @ref hs_compile(), this value will be
+ * zero.
+ *
+ * @param from
+ * - If a start of match flag is enabled for the current pattern, this
+ * argument will be set to the start of match for the pattern assuming
+ * that that start of match value lies within the current 'start of match
+ * horizon' chosen by one of the SOM_HORIZON mode flags.
+
+ * - If the start of match value lies outside this horizon (possible only
+ * when the SOM_HORIZON value is not @ref HS_MODE_SOM_HORIZON_LARGE),
* the @p from value will be set to @ref HS_OFFSET_PAST_HORIZON.
-
- * - This argument will be set to zero if the Start of Match flag is not
- * enabled for the given pattern.
- *
- * @param to
- * The offset after the last byte that matches the expression.
- *
- * @param flags
- * This is provided for future use and is unused at present.
- *
- * @param context
- * The pointer supplied by the user to the @ref hs_scan(), @ref
- * hs_scan_vector() or @ref hs_scan_stream() function.
- *
- * @return
- * Non-zero if the matching should cease, else zero. If scanning is
- * performed in streaming mode and a non-zero value is returned, any
- * subsequent calls to @ref hs_scan_stream() for that stream will
- * immediately return with @ref HS_SCAN_TERMINATED.
- */
+
+ * - This argument will be set to zero if the Start of Match flag is not
+ * enabled for the given pattern.
+ *
+ * @param to
+ * The offset after the last byte that matches the expression.
+ *
+ * @param flags
+ * This is provided for future use and is unused at present.
+ *
+ * @param context
+ * The pointer supplied by the user to the @ref hs_scan(), @ref
+ * hs_scan_vector() or @ref hs_scan_stream() function.
+ *
+ * @return
+ * Non-zero if the matching should cease, else zero. If scanning is
+ * performed in streaming mode and a non-zero value is returned, any
+ * subsequent calls to @ref hs_scan_stream() for that stream will
+ * immediately return with @ref HS_SCAN_TERMINATED.
+ */
typedef int (HS_CDECL *match_event_handler)(unsigned int id,
unsigned long long from,
unsigned long long to,
unsigned int flags,
void *context);
-
-/**
- * Open and initialise a stream.
- *
- * @param db
- * A compiled pattern database.
- *
- * @param flags
- * Flags modifying the behaviour of the stream. This parameter is provided
- * for future use and is unused at present.
- *
- * @param stream
- * On success, a pointer to the generated @ref hs_stream_t will be
- * returned; NULL on failure.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+
+/**
+ * Open and initialise a stream.
+ *
+ * @param db
+ * A compiled pattern database.
+ *
+ * @param flags
+ * Flags modifying the behaviour of the stream. This parameter is provided
+ * for future use and is unused at present.
+ *
+ * @param stream
+ * On success, a pointer to the generated @ref hs_stream_t will be
+ * returned; NULL on failure.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_open_stream(const hs_database_t *db, unsigned int flags,
hs_stream_t **stream);
-
-/**
- * Write data to be scanned to the opened stream.
- *
- * This is the function call in which the actual pattern matching takes place
- * as data is written to the stream. Matches will be returned via the @ref
- * match_event_handler callback supplied.
- *
- * @param id
- * The stream ID (returned by @ref hs_open_stream()) to which the data
- * will be written.
- *
- * @param data
- * Pointer to the data to be scanned.
- *
- * @param length
- * The number of bytes to scan.
- *
- * @param flags
- * Flags modifying the behaviour of the stream. This parameter is provided
- * for future use and is unused at present.
- *
- * @param scratch
- * A per-thread scratch space allocated by @ref hs_alloc_scratch().
- *
- * @param onEvent
- * Pointer to a match event callback function. If a NULL pointer is given,
- * no matches will be returned.
- *
- * @param ctxt
- * The user defined pointer which will be passed to the callback function
- * when a match occurs.
- *
- * @return
- * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
- * match callback indicated that scanning should stop; other values on
- * error.
- */
+
+/**
+ * Write data to be scanned to the opened stream.
+ *
+ * This is the function call in which the actual pattern matching takes place
+ * as data is written to the stream. Matches will be returned via the @ref
+ * match_event_handler callback supplied.
+ *
+ * @param id
+ * The stream ID (returned by @ref hs_open_stream()) to which the data
+ * will be written.
+ *
+ * @param data
+ * Pointer to the data to be scanned.
+ *
+ * @param length
+ * The number of bytes to scan.
+ *
+ * @param flags
+ * Flags modifying the behaviour of the stream. This parameter is provided
+ * for future use and is unused at present.
+ *
+ * @param scratch
+ * A per-thread scratch space allocated by @ref hs_alloc_scratch().
+ *
+ * @param onEvent
+ * Pointer to a match event callback function. If a NULL pointer is given,
+ * no matches will be returned.
+ *
+ * @param ctxt
+ * The user defined pointer which will be passed to the callback function
+ * when a match occurs.
+ *
+ * @return
+ * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
+ * match callback indicated that scanning should stop; other values on
+ * error.
+ */
hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data,
unsigned int length, unsigned int flags,
hs_scratch_t *scratch,
match_event_handler onEvent, void *ctxt);
-
-/**
- * Close a stream.
- *
+
+/**
+ * Close a stream.
+ *
* This function completes matching on the given stream and frees the memory
* associated with the stream state. After this call, the stream pointed to by
* @p id is invalid and can no longer be used. To reuse the stream state after
* completion, rather than closing it, the @ref hs_reset_stream function can be
* used.
*
- * This function must be called for any stream created with @ref
- * hs_open_stream(), even if scanning has been terminated by a non-zero return
- * from the match callback function.
- *
- * Note: This operation may result in matches being returned (via calls to the
- * match event callback) for expressions anchored to the end of the data stream
- * (for example, via the use of the `$` meta-character). If these matches are
- * not desired, NULL may be provided as the @ref match_event_handler callback.
- *
- * If NULL is provided as the @ref match_event_handler callback, it is
- * permissible to provide a NULL scratch.
- *
- * @param id
- * The stream ID returned by @ref hs_open_stream().
- *
- * @param scratch
- * A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
+ * This function must be called for any stream created with @ref
+ * hs_open_stream(), even if scanning has been terminated by a non-zero return
+ * from the match callback function.
+ *
+ * Note: This operation may result in matches being returned (via calls to the
+ * match event callback) for expressions anchored to the end of the data stream
+ * (for example, via the use of the `$` meta-character). If these matches are
+ * not desired, NULL may be provided as the @ref match_event_handler callback.
+ *
+ * If NULL is provided as the @ref match_event_handler callback, it is
+ * permissible to provide a NULL scratch.
+ *
+ * @param id
+ * The stream ID returned by @ref hs_open_stream().
+ *
+ * @param scratch
+ * A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
* allowed to be NULL only if the @p onEvent callback is also NULL.
- *
- * @param onEvent
- * Pointer to a match event callback function. If a NULL pointer is given,
- * no matches will be returned.
- *
- * @param ctxt
- * The user defined pointer which will be passed to the callback function
- * when a match occurs.
- *
- * @return
- * Returns @ref HS_SUCCESS on success, other values on failure.
- */
+ *
+ * @param onEvent
+ * Pointer to a match event callback function. If a NULL pointer is given,
+ * no matches will be returned.
+ *
+ * @param ctxt
+ * The user defined pointer which will be passed to the callback function
+ * when a match occurs.
+ *
+ * @return
+ * Returns @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
match_event_handler onEvent, void *ctxt);
-
-/**
- * Reset a stream to an initial state.
- *
- * Conceptually, this is equivalent to performing @ref hs_close_stream() on the
- * given stream, followed by a @ref hs_open_stream(). This new stream replaces
- * the original stream in memory, avoiding the overhead of freeing the old
- * stream and allocating the new one.
- *
- * Note: This operation may result in matches being returned (via calls to the
- * match event callback) for expressions anchored to the end of the original
- * data stream (for example, via the use of the `$` meta-character). If these
- * matches are not desired, NULL may be provided as the @ref match_event_handler
- * callback.
- *
- * Note: the stream will also be tied to the same database.
- *
- * @param id
- * The stream (as created by @ref hs_open_stream()) to be replaced.
- *
- * @param flags
- * Flags modifying the behaviour of the stream. This parameter is provided
- * for future use and is unused at present.
- *
- * @param scratch
- * A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
+
+/**
+ * Reset a stream to an initial state.
+ *
+ * Conceptually, this is equivalent to performing @ref hs_close_stream() on the
+ * given stream, followed by a @ref hs_open_stream(). This new stream replaces
+ * the original stream in memory, avoiding the overhead of freeing the old
+ * stream and allocating the new one.
+ *
+ * Note: This operation may result in matches being returned (via calls to the
+ * match event callback) for expressions anchored to the end of the original
+ * data stream (for example, via the use of the `$` meta-character). If these
+ * matches are not desired, NULL may be provided as the @ref match_event_handler
+ * callback.
+ *
+ * Note: the stream will also be tied to the same database.
+ *
+ * @param id
+ * The stream (as created by @ref hs_open_stream()) to be replaced.
+ *
+ * @param flags
+ * Flags modifying the behaviour of the stream. This parameter is provided
+ * for future use and is unused at present.
+ *
+ * @param scratch
+ * A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
* allowed to be NULL only if the @p onEvent callback is also NULL.
- *
- * @param onEvent
- * Pointer to a match event callback function. If a NULL pointer is given,
- * no matches will be returned.
- *
- * @param context
- * The user defined pointer which will be passed to the callback function
- * when a match occurs.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+ *
+ * @param onEvent
+ * Pointer to a match event callback function. If a NULL pointer is given,
+ * no matches will be returned.
+ *
+ * @param context
+ * The user defined pointer which will be passed to the callback function
+ * when a match occurs.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, unsigned int flags,
hs_scratch_t *scratch,
match_event_handler onEvent, void *context);
-
-/**
- * Duplicate the given stream. The new stream will have the same state as the
- * original including the current stream offset.
- *
- * @param to_id
- * On success, a pointer to the new, copied @ref hs_stream_t will be
- * returned; NULL on failure.
- *
- * @param from_id
- * The stream (as created by @ref hs_open_stream()) to be copied.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+
+/**
+ * Duplicate the given stream. The new stream will have the same state as the
+ * original including the current stream offset.
+ *
+ * @param to_id
+ * On success, a pointer to the new, copied @ref hs_stream_t will be
+ * returned; NULL on failure.
+ *
+ * @param from_id
+ * The stream (as created by @ref hs_open_stream()) to be copied.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_copy_stream(hs_stream_t **to_id,
const hs_stream_t *from_id);
-
-/**
- * Duplicate the given 'from' stream state onto the 'to' stream. The 'to' stream
+
+/**
+ * Duplicate the given 'from' stream state onto the 'to' stream. The 'to' stream
* will first be reset (reporting any EOD matches if a non-NULL @p onEvent
- * callback handler is provided).
- *
- * Note: the 'to' stream and the 'from' stream must be open against the same
- * database.
- *
- * @param to_id
- * On success, a pointer to the new, copied @ref hs_stream_t will be
- * returned; NULL on failure.
- *
- * @param from_id
- * The stream (as created by @ref hs_open_stream()) to be copied.
- *
- * @param scratch
- * A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
+ * callback handler is provided).
+ *
+ * Note: the 'to' stream and the 'from' stream must be open against the same
+ * database.
+ *
+ * @param to_id
+ * On success, a pointer to the new, copied @ref hs_stream_t will be
+ * returned; NULL on failure.
+ *
+ * @param from_id
+ * The stream (as created by @ref hs_open_stream()) to be copied.
+ *
+ * @param scratch
+ * A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
* allowed to be NULL only if the @p onEvent callback is also NULL.
- *
- * @param onEvent
- * Pointer to a match event callback function. If a NULL pointer is given,
- * no matches will be returned.
- *
- * @param context
- * The user defined pointer which will be passed to the callback function
- * when a match occurs.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+ *
+ * @param onEvent
+ * Pointer to a match event callback function. If a NULL pointer is given,
+ * no matches will be returned.
+ *
+ * @param context
+ * The user defined pointer which will be passed to the callback function
+ * when a match occurs.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id,
const hs_stream_t *from_id,
hs_scratch_t *scratch,
match_event_handler onEvent,
void *context);
-
-/**
+
+/**
* Creates a compressed representation of the provided stream in the buffer
* provided. This compressed representation can be converted back into a stream
* state by using @ref hs_expand_stream() or @ref hs_reset_and_expand_stream().
@@ -442,180 +442,180 @@ hs_error_t HS_CDECL hs_reset_and_expand_stream(hs_stream_t *to_stream,
void *context);
/**
- * The block (non-streaming) regular expression scanner.
- *
- * This is the function call in which the actual pattern matching takes place
- * for block-mode pattern databases.
- *
- * @param db
- * A compiled pattern database.
- *
- * @param data
- * Pointer to the data to be scanned.
- *
- * @param length
- * The number of bytes to scan.
- *
- * @param flags
- * Flags modifying the behaviour of this function. This parameter is
- * provided for future use and is unused at present.
- *
- * @param scratch
- * A per-thread scratch space allocated by @ref hs_alloc_scratch() for this
- * database.
- *
- * @param onEvent
- * Pointer to a match event callback function. If a NULL pointer is given,
- * no matches will be returned.
- *
- * @param context
- * The user defined pointer which will be passed to the callback function.
- *
- * @return
- * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
- * match callback indicated that scanning should stop; other values on
- * error.
- */
+ * The block (non-streaming) regular expression scanner.
+ *
+ * This is the function call in which the actual pattern matching takes place
+ * for block-mode pattern databases.
+ *
+ * @param db
+ * A compiled pattern database.
+ *
+ * @param data
+ * Pointer to the data to be scanned.
+ *
+ * @param length
+ * The number of bytes to scan.
+ *
+ * @param flags
+ * Flags modifying the behaviour of this function. This parameter is
+ * provided for future use and is unused at present.
+ *
+ * @param scratch
+ * A per-thread scratch space allocated by @ref hs_alloc_scratch() for this
+ * database.
+ *
+ * @param onEvent
+ * Pointer to a match event callback function. If a NULL pointer is given,
+ * no matches will be returned.
+ *
+ * @param context
+ * The user defined pointer which will be passed to the callback function.
+ *
+ * @return
+ * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
+ * match callback indicated that scanning should stop; other values on
+ * error.
+ */
hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data,
unsigned int length, unsigned int flags,
hs_scratch_t *scratch, match_event_handler onEvent,
void *context);
-
-/**
- * The vectored regular expression scanner.
- *
- * This is the function call in which the actual pattern matching takes place
- * for vectoring-mode pattern databases.
- *
- * @param db
- * A compiled pattern database.
- *
- * @param data
- * An array of pointers to the data blocks to be scanned.
- *
- * @param length
- * An array of lengths (in bytes) of each data block to scan.
- *
- * @param count
- * Number of data blocks to scan. This should correspond to the size of
+
+/**
+ * The vectored regular expression scanner.
+ *
+ * This is the function call in which the actual pattern matching takes place
+ * for vectoring-mode pattern databases.
+ *
+ * @param db
+ * A compiled pattern database.
+ *
+ * @param data
+ * An array of pointers to the data blocks to be scanned.
+ *
+ * @param length
+ * An array of lengths (in bytes) of each data block to scan.
+ *
+ * @param count
+ * Number of data blocks to scan. This should correspond to the size of
* of the @p data and @p length arrays.
- *
- * @param flags
- * Flags modifying the behaviour of this function. This parameter is
- * provided for future use and is unused at present.
- *
- * @param scratch
- * A per-thread scratch space allocated by @ref hs_alloc_scratch() for
- * this database.
- *
- * @param onEvent
- * Pointer to a match event callback function. If a NULL pointer is given,
- * no matches will be returned.
- *
- * @param context
- * The user defined pointer which will be passed to the callback function.
- *
- * @return
- * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the match
- * callback indicated that scanning should stop; other values on error.
- */
+ *
+ * @param flags
+ * Flags modifying the behaviour of this function. This parameter is
+ * provided for future use and is unused at present.
+ *
+ * @param scratch
+ * A per-thread scratch space allocated by @ref hs_alloc_scratch() for
+ * this database.
+ *
+ * @param onEvent
+ * Pointer to a match event callback function. If a NULL pointer is given,
+ * no matches will be returned.
+ *
+ * @param context
+ * The user defined pointer which will be passed to the callback function.
+ *
+ * @return
+ * Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the match
+ * callback indicated that scanning should stop; other values on error.
+ */
hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db,
const char *const *data,
const unsigned int *length,
unsigned int count, unsigned int flags,
hs_scratch_t *scratch,
match_event_handler onEvent, void *context);
-
-/**
- * Allocate a "scratch" space for use by Hyperscan.
- *
- * This is required for runtime use, and one scratch space per thread, or
- * concurrent caller, is required. Any allocator callback set by @ref
- * hs_set_scratch_allocator() or @ref hs_set_allocator() will be used by this
- * function.
- *
- * @param db
- * The database, as produced by @ref hs_compile().
- *
- * @param scratch
- * On first allocation, a pointer to NULL should be provided so a new
- * scratch can be allocated. If a scratch block has been previously
- * allocated, then a pointer to it should be passed back in to see if it
- * is valid for this database block. If a new scratch block is required,
- * the original will be freed and the new one returned, otherwise the
- * previous scratch block will be returned. On success, the scratch block
- * will be suitable for use with the provided database in addition to any
- * databases that original scratch space was suitable for.
- *
- * @return
- * @ref HS_SUCCESS on successful allocation; @ref HS_NOMEM if the
- * allocation fails. Other errors may be returned if invalid parameters
- * are specified.
- */
+
+/**
+ * Allocate a "scratch" space for use by Hyperscan.
+ *
+ * This is required for runtime use, and one scratch space per thread, or
+ * concurrent caller, is required. Any allocator callback set by @ref
+ * hs_set_scratch_allocator() or @ref hs_set_allocator() will be used by this
+ * function.
+ *
+ * @param db
+ * The database, as produced by @ref hs_compile().
+ *
+ * @param scratch
+ * On first allocation, a pointer to NULL should be provided so a new
+ * scratch can be allocated. If a scratch block has been previously
+ * allocated, then a pointer to it should be passed back in to see if it
+ * is valid for this database block. If a new scratch block is required,
+ * the original will be freed and the new one returned, otherwise the
+ * previous scratch block will be returned. On success, the scratch block
+ * will be suitable for use with the provided database in addition to any
+ * databases that original scratch space was suitable for.
+ *
+ * @return
+ * @ref HS_SUCCESS on successful allocation; @ref HS_NOMEM if the
+ * allocation fails. Other errors may be returned if invalid parameters
+ * are specified.
+ */
hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db,
hs_scratch_t **scratch);
-
-/**
- * Allocate a scratch space that is a clone of an existing scratch space.
- *
- * This is useful when multiple concurrent threads will be using the same set
- * of compiled databases, and another scratch space is required. Any allocator
- * callback set by @ref hs_set_scratch_allocator() or @ref hs_set_allocator()
- * will be used by this function.
- *
- * @param src
- * The existing @ref hs_scratch_t to be cloned.
- *
- * @param dest
- * A pointer to the new scratch space will be returned here.
- *
- * @return
- * @ref HS_SUCCESS on success; @ref HS_NOMEM if the allocation fails.
- * Other errors may be returned if invalid parameters are specified.
- */
+
+/**
+ * Allocate a scratch space that is a clone of an existing scratch space.
+ *
+ * This is useful when multiple concurrent threads will be using the same set
+ * of compiled databases, and another scratch space is required. Any allocator
+ * callback set by @ref hs_set_scratch_allocator() or @ref hs_set_allocator()
+ * will be used by this function.
+ *
+ * @param src
+ * The existing @ref hs_scratch_t to be cloned.
+ *
+ * @param dest
+ * A pointer to the new scratch space will be returned here.
+ *
+ * @return
+ * @ref HS_SUCCESS on success; @ref HS_NOMEM if the allocation fails.
+ * Other errors may be returned if invalid parameters are specified.
+ */
hs_error_t HS_CDECL hs_clone_scratch(const hs_scratch_t *src,
hs_scratch_t **dest);
-
-/**
- * Provides the size of the given scratch space.
- *
- * @param scratch
- * A per-thread scratch space allocated by @ref hs_alloc_scratch() or @ref
- * hs_clone_scratch().
- *
- * @param scratch_size
- * On success, the size of the scratch space in bytes is placed in this
- * parameter.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+
+/**
+ * Provides the size of the given scratch space.
+ *
+ * @param scratch
+ * A per-thread scratch space allocated by @ref hs_alloc_scratch() or @ref
+ * hs_clone_scratch().
+ *
+ * @param scratch_size
+ * On success, the size of the scratch space in bytes is placed in this
+ * parameter.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_scratch_size(const hs_scratch_t *scratch,
size_t *scratch_size);
-
-/**
- * Free a scratch block previously allocated by @ref hs_alloc_scratch() or @ref
- * hs_clone_scratch().
- *
- * The free callback set by @ref hs_set_scratch_allocator() or @ref
- * hs_set_allocator() will be used by this function.
- *
- * @param scratch
- * The scratch block to be freed. NULL may also be safely provided.
- *
- * @return
- * @ref HS_SUCCESS on success, other values on failure.
- */
+
+/**
+ * Free a scratch block previously allocated by @ref hs_alloc_scratch() or @ref
+ * hs_clone_scratch().
+ *
+ * The free callback set by @ref hs_set_scratch_allocator() or @ref
+ * hs_set_allocator() will be used by this function.
+ *
+ * @param scratch
+ * The scratch block to be freed. NULL may also be safely provided.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
hs_error_t HS_CDECL hs_free_scratch(hs_scratch_t *scratch);
-
-/**
- * Callback 'from' return value, indicating that the start of this match was
- * too early to be tracked with the requested SOM_HORIZON precision.
- */
-#define HS_OFFSET_PAST_HORIZON (~0ULL)
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* HS_RUNTIME_H_ */
+
+/**
+ * Callback 'from' return value, indicating that the start of this match was
+ * too early to be tracked with the requested SOM_HORIZON precision.
+ */
+#define HS_OFFSET_PAST_HORIZON (~0ULL)
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* HS_RUNTIME_H_ */
diff --git a/contrib/libs/hyperscan/src/hs_version.c b/contrib/libs/hyperscan/src/hs_version.c
index b69b6e2c99..04cf46f3f6 100644
--- a/contrib/libs/hyperscan/src/hs_version.c
+++ b/contrib/libs/hyperscan/src/hs_version.c
@@ -1,36 +1,36 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "ue2common.h"
-#include "hs_common.h"
-#include "hs_version.h"
-
-HS_PUBLIC_API
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ue2common.h"
+#include "hs_common.h"
+#include "hs_version.h"
+
+HS_PUBLIC_API
const char * HS_CDECL hs_version(void) {
- return HS_VERSION_STRING;
-}
+ return HS_VERSION_STRING;
+}
diff --git a/contrib/libs/hyperscan/src/hwlm/hwlm.c b/contrib/libs/hyperscan/src/hwlm/hwlm.c
index a46cb26103..8cf585a98c 100644
--- a/contrib/libs/hyperscan/src/hwlm/hwlm.c
+++ b/contrib/libs/hyperscan/src/hwlm/hwlm.c
@@ -1,191 +1,191 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Hamster Wheel Literal Matcher: runtime.
- */
-#include "hwlm.h"
-#include "hwlm_internal.h"
-#include "noodle_engine.h"
-#include "scratch.h"
-#include "ue2common.h"
-#include "fdr/fdr.h"
-#include "nfa/accel.h"
-#include "nfa/shufti.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Hamster Wheel Literal Matcher: runtime.
+ */
+#include "hwlm.h"
+#include "hwlm_internal.h"
+#include "noodle_engine.h"
+#include "scratch.h"
+#include "ue2common.h"
+#include "fdr/fdr.h"
+#include "nfa/accel.h"
+#include "nfa/shufti.h"
#include "nfa/truffle.h"
-#include "nfa/vermicelli.h"
-#include <string.h>
-
-#define MIN_ACCEL_LEN_BLOCK 16
-#define MIN_ACCEL_LEN_STREAM 16
-
-static really_inline
-const u8 *run_hwlm_accel(const union AccelAux *aux, const u8 *ptr,
- const u8 *end) {
- switch (aux->accel_type) {
- case ACCEL_VERM:
- DEBUG_PRINTF("single vermicelli for 0x%02hhx\n", aux->verm.c);
- return vermicelliExec(aux->verm.c, 0, ptr, end);
- case ACCEL_VERM_NOCASE:
- DEBUG_PRINTF("single vermicelli-nocase for 0x%02hhx\n", aux->verm.c);
- return vermicelliExec(aux->verm.c, 1, ptr, end);
- case ACCEL_DVERM:
- DEBUG_PRINTF("double vermicelli for 0x%02hhx%02hhx\n", aux->dverm.c1,
- aux->dverm.c2);
- return vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 0, ptr, end);
- case ACCEL_DVERM_NOCASE:
- DEBUG_PRINTF("double vermicelli-nocase for 0x%02hhx%02hhx\n",
- aux->dverm.c1, aux->dverm.c2);
- return vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 1, ptr, end);
- case ACCEL_SHUFTI:
- DEBUG_PRINTF("single shufti\n");
- return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end);
+#include "nfa/vermicelli.h"
+#include <string.h>
+
+#define MIN_ACCEL_LEN_BLOCK 16
+#define MIN_ACCEL_LEN_STREAM 16
+
+static really_inline
+const u8 *run_hwlm_accel(const union AccelAux *aux, const u8 *ptr,
+ const u8 *end) {
+ switch (aux->accel_type) {
+ case ACCEL_VERM:
+ DEBUG_PRINTF("single vermicelli for 0x%02hhx\n", aux->verm.c);
+ return vermicelliExec(aux->verm.c, 0, ptr, end);
+ case ACCEL_VERM_NOCASE:
+ DEBUG_PRINTF("single vermicelli-nocase for 0x%02hhx\n", aux->verm.c);
+ return vermicelliExec(aux->verm.c, 1, ptr, end);
+ case ACCEL_DVERM:
+ DEBUG_PRINTF("double vermicelli for 0x%02hhx%02hhx\n", aux->dverm.c1,
+ aux->dverm.c2);
+ return vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 0, ptr, end);
+ case ACCEL_DVERM_NOCASE:
+ DEBUG_PRINTF("double vermicelli-nocase for 0x%02hhx%02hhx\n",
+ aux->dverm.c1, aux->dverm.c2);
+ return vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 1, ptr, end);
+ case ACCEL_SHUFTI:
+ DEBUG_PRINTF("single shufti\n");
+ return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end);
case ACCEL_TRUFFLE:
DEBUG_PRINTF("truffle\n");
return truffleExec(aux->truffle.mask1, aux->truffle.mask2, ptr, end);
- default:
- /* no acceleration, fall through and return current ptr */
+ default:
+ /* no acceleration, fall through and return current ptr */
DEBUG_PRINTF("no accel; %u\n", (int)aux->accel_type);
assert(aux->accel_type == ACCEL_NONE);
- return ptr;
- }
-}
-
-static really_inline
-void do_accel_block(const union AccelAux *aux, const u8 *buf, size_t len,
- size_t *start) {
- if (len - *start < MIN_ACCEL_LEN_BLOCK) {
- return;
- }
-
- const u8 *ptr = buf + *start;
- const u8 *end = buf + len;
- const u8 offset = aux->generic.offset;
- ptr = run_hwlm_accel(aux, ptr, end);
-
- if (offset) {
- ptr -= offset;
- if (ptr < buf) {
- ptr = buf;
- }
- }
- assert(ptr >= buf);
- *start = ptr - buf;
-}
-
-static really_inline
-int inaccurate_accel(u8 type) {
- /* accels which don't always catch up to the boundary
- * DSHUFTI is also inaccurate but it is not used by the hamsters */
- return type == ACCEL_DVERM_NOCASE || type == ACCEL_DVERM;
-}
-
-static never_inline
-void do_accel_streaming(const union AccelAux *aux, const u8 *hbuf, size_t hlen,
- const u8 *buf, size_t len, size_t *start) {
- if (aux->accel_type == ACCEL_NONE || len - *start < MIN_ACCEL_LEN_STREAM) {
- return;
- }
-
- const u8 offset = aux->generic.offset;
-
- DEBUG_PRINTF("using accel %hhu offset %hhu\n", aux->accel_type, offset);
-
- // Scan history buffer, but only if the start offset (which always refers to
- // buf) is zero.
-
- if (!*start && hlen) {
- const u8 *ptr1 = hbuf;
- const u8 *end1 = hbuf + hlen;
- if (hlen >= 16) {
- ptr1 = run_hwlm_accel(aux, ptr1, end1);
- }
-
- if ((hlen <= 16 || inaccurate_accel(aux->accel_type))
- && end1 != ptr1 && end1 - ptr1 <= 16) {
- DEBUG_PRINTF("already scanned %zu/%zu\n", ptr1 - hbuf, hlen);
- /* see if we can finish off the history buffer completely */
- u8 ALIGN_DIRECTIVE temp[17];
- ptrdiff_t tlen = end1 - ptr1;
- memcpy(temp, ptr1, tlen);
- memset(temp + tlen, 0, 17 - tlen);
- if (len) { /* for dverm */
- temp[end1 - ptr1] = *buf;
- }
-
- const u8 *tempp = run_hwlm_accel(aux, temp, temp + 17);
-
- if (tempp - temp >= tlen) {
- ptr1 = end1;
- }
- DEBUG_PRINTF("got %zu\n", tempp - temp);
- }
-
- if (ptr1 != end1) {
- DEBUG_PRINTF("bailing in history\n");
- return;
- }
- }
-
- DEBUG_PRINTF("scanning main buffer, start=%zu, len=%zu\n", *start, len);
-
- const u8 *ptr2 = buf + *start;
- const u8 *end2 = buf + len;
-
- const u8 *found = run_hwlm_accel(aux, ptr2, end2);
-
- if (found >= ptr2 + offset) {
- size_t delta = found - offset - ptr2;
- DEBUG_PRINTF("got %zu/%zu in 2nd buffer\n", delta, len);
- *start += delta;
- } else if (hlen) {
- UNUSED size_t remaining = offset + ptr2 - found;
- DEBUG_PRINTF("got %zu/%zu remaining in 1st buffer\n", remaining, hlen);
- }
-}
-
-hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len,
+ return ptr;
+ }
+}
+
+static really_inline
+void do_accel_block(const union AccelAux *aux, const u8 *buf, size_t len,
+ size_t *start) {
+ if (len - *start < MIN_ACCEL_LEN_BLOCK) {
+ return;
+ }
+
+ const u8 *ptr = buf + *start;
+ const u8 *end = buf + len;
+ const u8 offset = aux->generic.offset;
+ ptr = run_hwlm_accel(aux, ptr, end);
+
+ if (offset) {
+ ptr -= offset;
+ if (ptr < buf) {
+ ptr = buf;
+ }
+ }
+ assert(ptr >= buf);
+ *start = ptr - buf;
+}
+
+static really_inline
+int inaccurate_accel(u8 type) {
+ /* accels which don't always catch up to the boundary
+ * DSHUFTI is also inaccurate but it is not used by the hamsters */
+ return type == ACCEL_DVERM_NOCASE || type == ACCEL_DVERM;
+}
+
+static never_inline
+void do_accel_streaming(const union AccelAux *aux, const u8 *hbuf, size_t hlen,
+ const u8 *buf, size_t len, size_t *start) {
+ if (aux->accel_type == ACCEL_NONE || len - *start < MIN_ACCEL_LEN_STREAM) {
+ return;
+ }
+
+ const u8 offset = aux->generic.offset;
+
+ DEBUG_PRINTF("using accel %hhu offset %hhu\n", aux->accel_type, offset);
+
+ // Scan history buffer, but only if the start offset (which always refers to
+ // buf) is zero.
+
+ if (!*start && hlen) {
+ const u8 *ptr1 = hbuf;
+ const u8 *end1 = hbuf + hlen;
+ if (hlen >= 16) {
+ ptr1 = run_hwlm_accel(aux, ptr1, end1);
+ }
+
+ if ((hlen <= 16 || inaccurate_accel(aux->accel_type))
+ && end1 != ptr1 && end1 - ptr1 <= 16) {
+ DEBUG_PRINTF("already scanned %zu/%zu\n", ptr1 - hbuf, hlen);
+ /* see if we can finish off the history buffer completely */
+ u8 ALIGN_DIRECTIVE temp[17];
+ ptrdiff_t tlen = end1 - ptr1;
+ memcpy(temp, ptr1, tlen);
+ memset(temp + tlen, 0, 17 - tlen);
+ if (len) { /* for dverm */
+ temp[end1 - ptr1] = *buf;
+ }
+
+ const u8 *tempp = run_hwlm_accel(aux, temp, temp + 17);
+
+ if (tempp - temp >= tlen) {
+ ptr1 = end1;
+ }
+ DEBUG_PRINTF("got %zu\n", tempp - temp);
+ }
+
+ if (ptr1 != end1) {
+ DEBUG_PRINTF("bailing in history\n");
+ return;
+ }
+ }
+
+ DEBUG_PRINTF("scanning main buffer, start=%zu, len=%zu\n", *start, len);
+
+ const u8 *ptr2 = buf + *start;
+ const u8 *end2 = buf + len;
+
+ const u8 *found = run_hwlm_accel(aux, ptr2, end2);
+
+ if (found >= ptr2 + offset) {
+ size_t delta = found - offset - ptr2;
+ DEBUG_PRINTF("got %zu/%zu in 2nd buffer\n", delta, len);
+ *start += delta;
+ } else if (hlen) {
+ UNUSED size_t remaining = offset + ptr2 - found;
+ DEBUG_PRINTF("got %zu/%zu remaining in 1st buffer\n", remaining, hlen);
+ }
+}
+
+hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, struct hs_scratch *scratch,
- hwlm_group_t groups) {
+ hwlm_group_t groups) {
assert(t);
- DEBUG_PRINTF("buf len=%zu, start=%zu, groups=%llx\n", len, start, groups);
- if (!groups) {
- DEBUG_PRINTF("groups all off\n");
- return HWLM_SUCCESS;
- }
-
- assert(start < len);
-
- if (t->type == HWLM_ENGINE_NOOD) {
- DEBUG_PRINTF("calling noodExec\n");
+ DEBUG_PRINTF("buf len=%zu, start=%zu, groups=%llx\n", len, start, groups);
+ if (!groups) {
+ DEBUG_PRINTF("groups all off\n");
+ return HWLM_SUCCESS;
+ }
+
+ assert(start < len);
+
+ if (t->type == HWLM_ENGINE_NOOD) {
+ DEBUG_PRINTF("calling noodExec\n");
return noodExec(HWLM_C_DATA(t), buf, len, start, cb, scratch);
- }
+ }
assert(t->type == HWLM_ENGINE_FDR);
const union AccelAux *aa = &t->accel0;
@@ -196,38 +196,38 @@ hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len,
do_accel_block(aa, buf, len, &start);
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start);
return fdrExec(HWLM_C_DATA(t), buf, len, start, cb, scratch, groups);
-}
-
+}
+
hwlm_error_t hwlmExecStreaming(const struct HWLM *t, size_t len, size_t start,
HWLMCallback cb, struct hs_scratch *scratch,
hwlm_group_t groups) {
assert(t);
assert(scratch);
- const u8 *hbuf = scratch->core_info.hbuf;
- const size_t hlen = scratch->core_info.hlen;
- const u8 *buf = scratch->core_info.buf;
-
- DEBUG_PRINTF("hbuf len=%zu, buf len=%zu, start=%zu, groups=%llx\n", hlen,
- len, start, groups);
-
- if (!groups) {
- return HWLM_SUCCESS;
- }
-
- assert(start < len);
-
- if (t->type == HWLM_ENGINE_NOOD) {
- DEBUG_PRINTF("calling noodExec\n");
- // If we've been handed a start offset, we can use a block mode scan at
- // that offset.
- if (start) {
+ const u8 *hbuf = scratch->core_info.hbuf;
+ const size_t hlen = scratch->core_info.hlen;
+ const u8 *buf = scratch->core_info.buf;
+
+ DEBUG_PRINTF("hbuf len=%zu, buf len=%zu, start=%zu, groups=%llx\n", hlen,
+ len, start, groups);
+
+ if (!groups) {
+ return HWLM_SUCCESS;
+ }
+
+ assert(start < len);
+
+ if (t->type == HWLM_ENGINE_NOOD) {
+ DEBUG_PRINTF("calling noodExec\n");
+ // If we've been handed a start offset, we can use a block mode scan at
+ // that offset.
+ if (start) {
return noodExec(HWLM_C_DATA(t), buf, len, start, cb, scratch);
- } else {
- return noodExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, cb,
+ } else {
+ return noodExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, cb,
scratch);
- }
- }
+ }
+ }
assert(t->type == HWLM_ENGINE_FDR);
const union AccelAux *aa = &t->accel0;
@@ -239,4 +239,4 @@ hwlm_error_t hwlmExecStreaming(const struct HWLM *t, size_t len, size_t start,
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start);
return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, start, cb,
scratch, groups);
-}
+}
diff --git a/contrib/libs/hyperscan/src/hwlm/hwlm.h b/contrib/libs/hyperscan/src/hwlm/hwlm.h
index 95a537bd50..224ecf6bf9 100644
--- a/contrib/libs/hyperscan/src/hwlm/hwlm.h
+++ b/contrib/libs/hyperscan/src/hwlm/hwlm.h
@@ -1,145 +1,145 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Hamster Wheel Literal Matcher: runtime API.
- */
-
-#ifndef HWLM_H
-#define HWLM_H
-
-#include "ue2common.h"
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-/** \brief Error return type for exec functions. */
-typedef int hwlm_error_t;
-
-/** \brief Type representing a set of groups as a bitmap. */
-typedef u64a hwlm_group_t;
-
-/** \brief HWLM callback return type. */
-typedef hwlm_group_t hwlmcb_rv_t;
-
-/** \brief Value representing all possible literal groups. */
-#define HWLM_ALL_GROUPS ((hwlm_group_t)~0ULL)
-
-/** \brief Callback return value indicating that we should continue matching. */
-#define HWLM_CONTINUE_MATCHING HWLM_ALL_GROUPS
-
-/** \brief Callback return value indicating that we should halt matching. */
-#define HWLM_TERMINATE_MATCHING 0
-
-/** \brief Matching finished without being terminated by the user. */
-#define HWLM_SUCCESS 0
-
-/** \brief The user terminated matching by returning HWLM_TERMINATE_MATCHING
- * from the match callback. */
-#define HWLM_TERMINATED 1
-
-/** \brief An error occurred during matching.
- *
- * This should only be used if an unsupported engine was called (like one
- * designed for a different architecture). */
-#define HWLM_ERROR_UNKNOWN 2
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Hamster Wheel Literal Matcher: runtime API.
+ */
+
+#ifndef HWLM_H
+#define HWLM_H
+
+#include "ue2common.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/** \brief Error return type for exec functions. */
+typedef int hwlm_error_t;
+
+/** \brief Type representing a set of groups as a bitmap. */
+typedef u64a hwlm_group_t;
+
+/** \brief HWLM callback return type. */
+typedef hwlm_group_t hwlmcb_rv_t;
+
+/** \brief Value representing all possible literal groups. */
+#define HWLM_ALL_GROUPS ((hwlm_group_t)~0ULL)
+
+/** \brief Callback return value indicating that we should continue matching. */
+#define HWLM_CONTINUE_MATCHING HWLM_ALL_GROUPS
+
+/** \brief Callback return value indicating that we should halt matching. */
+#define HWLM_TERMINATE_MATCHING 0
+
+/** \brief Matching finished without being terminated by the user. */
+#define HWLM_SUCCESS 0
+
+/** \brief The user terminated matching by returning HWLM_TERMINATE_MATCHING
+ * from the match callback. */
+#define HWLM_TERMINATED 1
+
+/** \brief An error occurred during matching.
+ *
+ * This should only be used if an unsupported engine was called (like one
+ * designed for a different architecture). */
+#define HWLM_ERROR_UNKNOWN 2
+
/** \brief Max length of the literal passed to HWLM. */
#define HWLM_LITERAL_MAX_LEN 8
-struct hs_scratch;
-struct HWLM;
-
-/** \brief The type for an HWLM callback.
- *
+struct hs_scratch;
+struct HWLM;
+
+/** \brief The type for an HWLM callback.
+ *
* This callback receives an end-of-match offset, the ID of the match and
* the context pointer that was passed into \ref hwlmExec or
* \ref hwlmExecStreaming.
- *
- * A callback return of \ref HWLM_TERMINATE_MATCHING will stop matching.
- *
- * A callback return of \ref HWLM_CONTINUE_MATCHING continues matching.
- *
- * An arbitrary group mask may be given as the return value. This will be taken
- * as a hint by the underlying engine that only literals with groups
- * overlapping the provided mask need to be reported.
- *
- * The underlying engine may choose not to report a match if there is no group
- * belonging to the literal which was active at the when the end match location
- * was first reached.
- */
+ *
+ * A callback return of \ref HWLM_TERMINATE_MATCHING will stop matching.
+ *
+ * A callback return of \ref HWLM_CONTINUE_MATCHING continues matching.
+ *
+ * An arbitrary group mask may be given as the return value. This will be taken
+ * as a hint by the underlying engine that only literals with groups
+ * overlapping the provided mask need to be reported.
+ *
+ * The underlying engine may choose not to report a match if there is no group
+ * belonging to the literal which was active at the when the end match location
+ * was first reached.
+ */
typedef hwlmcb_rv_t (*HWLMCallback)(size_t end, u32 id,
struct hs_scratch *scratch);
-
-/** \brief Match strings in table.
- *
- * If a match occurs, the callback function given will be called with the index
- * of the last character in the string and the \p context (passed through
- * without interpretation).
- *
- * Returns \ref HWLM_TERMINATED if scanning is cancelled due to the callback
- * returning \ref HWLM_TERMINATE_MATCHING.
- *
+
+/** \brief Match strings in table.
+ *
+ * If a match occurs, the callback function given will be called with the index
+ * of the last character in the string and the \p context (passed through
+ * without interpretation).
+ *
+ * Returns \ref HWLM_TERMINATED if scanning is cancelled due to the callback
+ * returning \ref HWLM_TERMINATE_MATCHING.
+ *
* \p start is the first offset at which a match may start. Note: match
* starts may include masks overhanging the main literal.
- *
- * The underlying engine may choose not to report any match which starts before
- * the first possible match of a literal which is in the initial group mask.
- */
-hwlm_error_t hwlmExec(const struct HWLM *tab, const u8 *buf, size_t len,
+ *
+ * The underlying engine may choose not to report any match which starts before
+ * the first possible match of a literal which is in the initial group mask.
+ */
+hwlm_error_t hwlmExec(const struct HWLM *tab, const u8 *buf, size_t len,
size_t start, HWLMCallback callback,
struct hs_scratch *scratch, hwlm_group_t groups);
-
-/** \brief As for \ref hwlmExec, but a streaming case across two buffers.
- *
- * \p len is the length of the main buffer to be scanned.
- *
- * \p start is an advisory hint representing the first offset at which a match
+
+/** \brief As for \ref hwlmExec, but a streaming case across two buffers.
+ *
+ * \p len is the length of the main buffer to be scanned.
+ *
+ * \p start is an advisory hint representing the first offset at which a match
* may start. Some underlying literal matches may not respect it. Note: match
* starts may include masks overhanging the main literal.
- *
+ *
* \p scratch is used to access the history buffer, history length and
* the main buffer.
*
- * Two buffers/lengths are provided. Matches that occur entirely within
- * the history buffer will not be reported by this function. The offsets
- * reported for the main buffer are relative to the start of that buffer (a
- * match at byte 10 of the main buffer is reported as 10). Matches that start
- * in the history buffer will have starts reported with 'negative' values.
- */
+ * Two buffers/lengths are provided. Matches that occur entirely within
+ * the history buffer will not be reported by this function. The offsets
+ * reported for the main buffer are relative to the start of that buffer (a
+ * match at byte 10 of the main buffer is reported as 10). Matches that start
+ * in the history buffer will have starts reported with 'negative' values.
+ */
hwlm_error_t hwlmExecStreaming(const struct HWLM *tab, size_t len, size_t start,
HWLMCallback callback,
struct hs_scratch *scratch, hwlm_group_t groups);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/hwlm/hwlm_build.cpp b/contrib/libs/hyperscan/src/hwlm/hwlm_build.cpp
index bbd49d340d..1b33281529 100644
--- a/contrib/libs/hyperscan/src/hwlm/hwlm_build.cpp
+++ b/contrib/libs/hyperscan/src/hwlm/hwlm_build.cpp
@@ -1,65 +1,65 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Hamster Wheel Literal Matcher: build code.
- */
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Hamster Wheel Literal Matcher: build code.
+ */
#include "hwlm_build.h"
-#include "grey.h"
-#include "hwlm.h"
-#include "hwlm_internal.h"
+#include "grey.h"
+#include "hwlm.h"
+#include "hwlm_internal.h"
#include "hwlm_literal.h"
-#include "noodle_engine.h"
-#include "noodle_build.h"
+#include "noodle_engine.h"
+#include "noodle_build.h"
#include "scratch.h"
-#include "ue2common.h"
-#include "fdr/fdr_compile.h"
+#include "ue2common.h"
+#include "fdr/fdr_compile.h"
#include "fdr/fdr_compile_internal.h"
#include "fdr/fdr_engine_description.h"
#include "fdr/teddy_engine_description.h"
-#include "util/compile_context.h"
-#include "util/compile_error.h"
+#include "util/compile_context.h"
+#include "util/compile_error.h"
#include "util/make_unique.h"
-#include "util/ue2string.h"
-
-#include <cassert>
-#include <cstring>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
+#include "util/ue2string.h"
+
+#include <cassert>
+#include <cstring>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
HWLMProto::HWLMProto(u8 engType_in, vector<hwlmLiteral> lits_in)
: engType(engType_in), lits(move(lits_in)) {}
-
+
HWLMProto::HWLMProto(u8 engType_in,
unique_ptr<FDREngineDescription> eng_in,
vector<hwlmLiteral> lits_in,
@@ -67,7 +67,7 @@ HWLMProto::HWLMProto(u8 engType_in,
bool make_small_in)
: engType(engType_in), fdrEng(move(eng_in)), lits(move(lits_in)),
bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {}
-
+
HWLMProto::HWLMProto(u8 engType_in,
unique_ptr<TeddyEngineDescription> eng_in,
vector<hwlmLiteral> lits_in,
@@ -76,45 +76,45 @@ HWLMProto::HWLMProto(u8 engType_in,
: engType(engType_in), teddyEng(move(eng_in)),
lits(move(lits_in)),
bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {}
-
+
HWLMProto::~HWLMProto() {}
-
-static
-void dumpLits(UNUSED const vector<hwlmLiteral> &lits) {
-#ifdef DEBUG
- DEBUG_PRINTF("building lit table for:\n");
- for (const auto &lit : lits) {
- printf("\t%u:%016llx %s%s\n", lit.id, lit.groups,
- escapeString(lit.s).c_str(), lit.nocase ? " (nc)" : "");
- }
-#endif
-}
-
-#ifndef NDEBUG
-// Called by an assertion.
-static
-bool everyoneHasGroups(const vector<hwlmLiteral> &lits) {
- for (const auto &lit : lits) {
- if (!lit.groups) {
- return false;
- }
- }
- return true;
-}
-#endif
-
-static
-bool isNoodleable(const vector<hwlmLiteral> &lits,
- const CompileContext &cc) {
- if (!cc.grey.allowNoodle) {
- return false;
- }
-
- if (lits.size() != 1) {
- DEBUG_PRINTF("too many literals for noodle\n");
- return false;
- }
-
+
+static
+void dumpLits(UNUSED const vector<hwlmLiteral> &lits) {
+#ifdef DEBUG
+ DEBUG_PRINTF("building lit table for:\n");
+ for (const auto &lit : lits) {
+ printf("\t%u:%016llx %s%s\n", lit.id, lit.groups,
+ escapeString(lit.s).c_str(), lit.nocase ? " (nc)" : "");
+ }
+#endif
+}
+
+#ifndef NDEBUG
+// Called by an assertion.
+static
+bool everyoneHasGroups(const vector<hwlmLiteral> &lits) {
+ for (const auto &lit : lits) {
+ if (!lit.groups) {
+ return false;
+ }
+ }
+ return true;
+}
+#endif
+
+static
+bool isNoodleable(const vector<hwlmLiteral> &lits,
+ const CompileContext &cc) {
+ if (!cc.grey.allowNoodle) {
+ return false;
+ }
+
+ if (lits.size() != 1) {
+ DEBUG_PRINTF("too many literals for noodle\n");
+ return false;
+ }
+
return true;
}
@@ -132,7 +132,7 @@ bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc,
auto noodle = noodBuildTable(lit);
if (noodle) {
engSize = noodle.size();
- }
+ }
eng = move(noodle);
} else {
DEBUG_PRINTF("building a new deal\n");
@@ -141,12 +141,12 @@ bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc,
engSize = fdr.size();
}
eng = move(fdr);
- }
-
+ }
+
if (!eng) {
return nullptr;
- }
-
+ }
+
assert(engSize);
if (engSize > cc.grey.limitLiteralMatcherSize) {
throw ResourceLimitError();
@@ -159,111 +159,111 @@ bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc,
memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
return h;
-}
-
+}
+
unique_ptr<HWLMProto>
hwlmBuildProto(vector<hwlmLiteral> &lits, bool make_small,
const CompileContext &cc) {
- assert(!lits.empty());
- dumpLits(lits);
-
- // Check that we haven't exceeded the maximum number of literals.
- if (lits.size() > cc.grey.limitLiteralCount) {
- throw ResourceLimitError();
- }
-
- // Safety and resource limit checks.
- u64a total_chars = 0;
- for (const auto &lit : lits) {
- assert(!lit.s.empty());
-
- if (lit.s.length() > cc.grey.limitLiteralLength) {
- throw ResourceLimitError();
- }
- total_chars += lit.s.length();
- if (total_chars > cc.grey.limitLiteralMatcherChars) {
- throw ResourceLimitError();
- }
-
- // We do not allow the all-ones ID, as we reserve that for internal use
- // within literal matchers.
- if (lit.id == 0xffffffffu) {
- assert(!"reserved id 0xffffffff used");
- throw CompileError("Internal error.");
- }
- }
-
+ assert(!lits.empty());
+ dumpLits(lits);
+
+ // Check that we haven't exceeded the maximum number of literals.
+ if (lits.size() > cc.grey.limitLiteralCount) {
+ throw ResourceLimitError();
+ }
+
+ // Safety and resource limit checks.
+ u64a total_chars = 0;
+ for (const auto &lit : lits) {
+ assert(!lit.s.empty());
+
+ if (lit.s.length() > cc.grey.limitLiteralLength) {
+ throw ResourceLimitError();
+ }
+ total_chars += lit.s.length();
+ if (total_chars > cc.grey.limitLiteralMatcherChars) {
+ throw ResourceLimitError();
+ }
+
+ // We do not allow the all-ones ID, as we reserve that for internal use
+ // within literal matchers.
+ if (lit.id == 0xffffffffu) {
+ assert(!"reserved id 0xffffffff used");
+ throw CompileError("Internal error.");
+ }
+ }
+
unique_ptr<HWLMProto> proto;
-
- DEBUG_PRINTF("building table with %zu strings\n", lits.size());
-
- assert(everyoneHasGroups(lits));
-
+
+ DEBUG_PRINTF("building table with %zu strings\n", lits.size());
+
+ assert(everyoneHasGroups(lits));
+
if (isNoodleable(lits, cc)) {
- DEBUG_PRINTF("build noodle table\n");
+ DEBUG_PRINTF("build noodle table\n");
proto = ue2::make_unique<HWLMProto>(HWLM_ENGINE_NOOD, lits);
- } else {
- DEBUG_PRINTF("building a new deal\n");
+ } else {
+ DEBUG_PRINTF("building a new deal\n");
proto = fdrBuildProto(HWLM_ENGINE_FDR, lits, make_small,
cc.target_info, cc.grey);
if (!proto) {
return nullptr;
- }
- }
-
+ }
+ }
+
return proto;
-}
-
-size_t hwlmSize(const HWLM *h) {
- size_t engSize = 0;
-
- switch (h->type) {
- case HWLM_ENGINE_NOOD:
- engSize = noodSize((const noodTable *)HWLM_C_DATA(h));
- break;
- case HWLM_ENGINE_FDR:
- engSize = fdrSize((const FDR *)HWLM_C_DATA(h));
- break;
- }
-
- if (!engSize) {
- return 0;
- }
-
- return engSize + ROUNDUP_CL(sizeof(*h));
-}
-
-size_t hwlmFloodProneSuffixLen(size_t numLiterals, const CompileContext &cc) {
- const size_t NO_LIMIT = ~(size_t)0;
-
- // NOTE: this function contains a number of magic numbers which are
- // conservative estimates of flood-proneness based on internal details of
- // the various literal engines that fall under the HWLM aegis. If you
- // change those engines, you might need to change this function too.
-
- DEBUG_PRINTF("%zu literals\n", numLiterals);
-
- if (cc.grey.allowNoodle && numLiterals <= 1) {
- DEBUG_PRINTF("noodle\n");
- return NO_LIMIT;
- }
-
- if (cc.grey.fdrAllowTeddy) {
- if (numLiterals <= 48) {
- DEBUG_PRINTF("teddy\n");
- return 3;
- }
- if (cc.target_info.has_avx2() && numLiterals <= 96) {
- DEBUG_PRINTF("avx2 teddy\n");
- return 3;
- }
- }
-
- // TODO: we had thought we could push this value up to 9, but it seems that
- // hurts performance on floods in some FDR models. Super-conservative for
- // now.
- DEBUG_PRINTF("fdr\n");
- return 3;
-}
-
-} // namespace ue2
+}
+
+size_t hwlmSize(const HWLM *h) {
+ size_t engSize = 0;
+
+ switch (h->type) {
+ case HWLM_ENGINE_NOOD:
+ engSize = noodSize((const noodTable *)HWLM_C_DATA(h));
+ break;
+ case HWLM_ENGINE_FDR:
+ engSize = fdrSize((const FDR *)HWLM_C_DATA(h));
+ break;
+ }
+
+ if (!engSize) {
+ return 0;
+ }
+
+ return engSize + ROUNDUP_CL(sizeof(*h));
+}
+
+size_t hwlmFloodProneSuffixLen(size_t numLiterals, const CompileContext &cc) {
+ const size_t NO_LIMIT = ~(size_t)0;
+
+ // NOTE: this function contains a number of magic numbers which are
+ // conservative estimates of flood-proneness based on internal details of
+ // the various literal engines that fall under the HWLM aegis. If you
+ // change those engines, you might need to change this function too.
+
+ DEBUG_PRINTF("%zu literals\n", numLiterals);
+
+ if (cc.grey.allowNoodle && numLiterals <= 1) {
+ DEBUG_PRINTF("noodle\n");
+ return NO_LIMIT;
+ }
+
+ if (cc.grey.fdrAllowTeddy) {
+ if (numLiterals <= 48) {
+ DEBUG_PRINTF("teddy\n");
+ return 3;
+ }
+ if (cc.target_info.has_avx2() && numLiterals <= 96) {
+ DEBUG_PRINTF("avx2 teddy\n");
+ return 3;
+ }
+ }
+
+ // TODO: we had thought we could push this value up to 9, but it seems that
+ // hurts performance on floods in some FDR models. Super-conservative for
+ // now.
+ DEBUG_PRINTF("fdr\n");
+ return 3;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/hwlm/hwlm_build.h b/contrib/libs/hyperscan/src/hwlm/hwlm_build.h
index bb13ec011b..91f227dce4 100644
--- a/contrib/libs/hyperscan/src/hwlm/hwlm_build.h
+++ b/contrib/libs/hyperscan/src/hwlm/hwlm_build.h
@@ -1,73 +1,73 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Hamster Wheel Literal Matcher: build API.
- */
-
-#ifndef HWLM_BUILD_H
-#define HWLM_BUILD_H
-
-#include "hwlm.h"
-#include "hwlm_literal.h"
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Hamster Wheel Literal Matcher: build API.
+ */
+
+#ifndef HWLM_BUILD_H
+#define HWLM_BUILD_H
+
+#include "hwlm.h"
+#include "hwlm_literal.h"
+#include "ue2common.h"
#include "util/bytecode_ptr.h"
-
+
#include <map>
-#include <memory>
-#include <vector>
-
-struct HWLM;
-
-namespace ue2 {
-
+#include <memory>
+#include <vector>
+
+struct HWLM;
+
+namespace ue2 {
+
class FDREngineDescription;
class TeddyEngineDescription;
-struct CompileContext;
-struct Grey;
-
+struct CompileContext;
+struct Grey;
+
/** \brief Class representing a literal matcher prototype. */
struct HWLMProto {
/**
* \brief Engine type to distinguish noodle from FDR and Teddy.
*/
u8 engType;
-
+
/**
* \brief FDR engine description.
*/
std::unique_ptr<FDREngineDescription> fdrEng;
-
+
/**
* \brief Teddy engine description.
*/
std::unique_ptr<TeddyEngineDescription> teddyEng;
-
+
/**
* \brief HWLM literals passed from Rose.
*/
@@ -96,36 +96,36 @@ struct HWLMProto {
bool make_small_in);
~HWLMProto();
-};
-
-/** \brief Build an \ref HWLM literal matcher runtime structure for a group of
- * literals.
- *
+};
+
+/** \brief Build an \ref HWLM literal matcher runtime structure for a group of
+ * literals.
+ *
* \param proto Literal matcher prototype.
- * \param cc Compile context.
- * \param expected_groups FIXME: document me!
- *
- * Build failures are generally a result of memory allocation failure. These
- * may result in a nullptr return value, or a std::bad_alloc exception being
- * thrown.
- */
+ * \param cc Compile context.
+ * \param expected_groups FIXME: document me!
+ *
+ * Build failures are generally a result of memory allocation failure. These
+ * may result in a nullptr return value, or a std::bad_alloc exception being
+ * thrown.
+ */
bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc,
hwlm_group_t expected_groups = HWLM_ALL_GROUPS);
-
+
std::unique_ptr<HWLMProto>
hwlmBuildProto(std::vector<hwlmLiteral> &lits, bool make_small,
const CompileContext &cc);
-/**
- * Returns an estimate of the number of repeated characters on the end of a
- * literal that will make a literal set of size \a numLiterals suffer
- * performance degradation.
- */
-size_t hwlmFloodProneSuffixLen(size_t numLiterals, const CompileContext &cc);
-
-/** \brief Return the size in bytes of an HWLM structure. */
-size_t hwlmSize(const HWLM *h);
-
-} // namespace
-
-#endif // HWLM_BUILD_H
+/**
+ * Returns an estimate of the number of repeated characters on the end of a
+ * literal that will make a literal set of size \a numLiterals suffer
+ * performance degradation.
+ */
+size_t hwlmFloodProneSuffixLen(size_t numLiterals, const CompileContext &cc);
+
+/** \brief Return the size in bytes of an HWLM structure. */
+size_t hwlmSize(const HWLM *h);
+
+} // namespace
+
+#endif // HWLM_BUILD_H
diff --git a/contrib/libs/hyperscan/src/hwlm/hwlm_internal.h b/contrib/libs/hyperscan/src/hwlm/hwlm_internal.h
index 92941160aa..e35c84fdf7 100644
--- a/contrib/libs/hyperscan/src/hwlm/hwlm_internal.h
+++ b/contrib/libs/hyperscan/src/hwlm/hwlm_internal.h
@@ -1,62 +1,62 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Hamster Wheel Literal Matcher: data structures.
- */
-
-#ifndef HWLM_INTERNAL_H
-#define HWLM_INTERNAL_H
-
-#include "hwlm.h"
-#include "ue2common.h"
-#include "nfa/accel.h"
-
-/** \brief Underlying engine is FDR. */
-#define HWLM_ENGINE_FDR 12
-
-/** \brief Underlying engine is Noodle. */
-#define HWLM_ENGINE_NOOD 16
-
-/** \brief Main Hamster Wheel Literal Matcher header. Followed by
- * engine-specific structure. */
-struct HWLM {
- u8 type; /**< HWLM_ENGINE_NOOD or HWLM_ENGINE_FDR */
- hwlm_group_t accel1_groups; /**< accelerable groups. */
- union AccelAux accel1; /**< used if group mask is subset of accel1_groups */
- union AccelAux accel0; /**< fallback accel scheme */
-};
-
-/** \brief Fetch a const pointer to the underlying engine. */
-#define HWLM_C_DATA(p) ((const void *)((const char *)(p) \
- + ROUNDUP_CL(sizeof(struct HWLM))))
-
-/** \brief Fetch a pointer to the underlying engine. */
-#define HWLM_DATA(p) ((void *)((char *)(p) + ROUNDUP_CL(sizeof(struct HWLM))))
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Hamster Wheel Literal Matcher: data structures.
+ */
+
+#ifndef HWLM_INTERNAL_H
+#define HWLM_INTERNAL_H
+
+#include "hwlm.h"
+#include "ue2common.h"
+#include "nfa/accel.h"
+
+/** \brief Underlying engine is FDR. */
+#define HWLM_ENGINE_FDR 12
+
+/** \brief Underlying engine is Noodle. */
+#define HWLM_ENGINE_NOOD 16
+
+/** \brief Main Hamster Wheel Literal Matcher header. Followed by
+ * engine-specific structure. */
+struct HWLM {
+ u8 type; /**< HWLM_ENGINE_NOOD or HWLM_ENGINE_FDR */
+ hwlm_group_t accel1_groups; /**< accelerable groups. */
+ union AccelAux accel1; /**< used if group mask is subset of accel1_groups */
+ union AccelAux accel0; /**< fallback accel scheme */
+};
+
+/** \brief Fetch a const pointer to the underlying engine. */
+#define HWLM_C_DATA(p) ((const void *)((const char *)(p) \
+ + ROUNDUP_CL(sizeof(struct HWLM))))
+
+/** \brief Fetch a pointer to the underlying engine. */
+#define HWLM_DATA(p) ((void *)((char *)(p) + ROUNDUP_CL(sizeof(struct HWLM))))
+
+#endif
diff --git a/contrib/libs/hyperscan/src/hwlm/hwlm_literal.cpp b/contrib/libs/hyperscan/src/hwlm/hwlm_literal.cpp
index bdab5edd69..692f7c6c0e 100644
--- a/contrib/libs/hyperscan/src/hwlm/hwlm_literal.cpp
+++ b/contrib/libs/hyperscan/src/hwlm/hwlm_literal.cpp
@@ -1,95 +1,95 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Hamster Wheel Literal Matcher: literal representation at build time.
- */
-#include "hwlm_literal.h"
-#include "util/bitutils.h" // for CASE_BIT
-#include "util/compare.h" // for ourisalpha
-#include "util/ue2string.h" // for escapeString
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Hamster Wheel Literal Matcher: literal representation at build time.
+ */
+#include "hwlm_literal.h"
+#include "util/bitutils.h" // for CASE_BIT
+#include "util/compare.h" // for ourisalpha
+#include "util/ue2string.h" // for escapeString
+
#include <algorithm>
-#include <iomanip>
-#include <sstream>
-
-using namespace std;
-
-namespace ue2 {
-
-#ifdef DEBUG
-static UNUSED
-std::string dumpMask(const vector<u8> &v) {
- ostringstream oss;
- vector<u8>::const_iterator it, ite;
- for (it = v.begin(), ite = v.end(); it != ite; ++it) {
- oss << setfill('0') << setw(2) << hex << (unsigned int)*it;
- }
- return oss.str();
-}
-#endif
-
-bool maskIsConsistent(const std::string &s, bool nocase, const vector<u8> &msk,
- const vector<u8> &cmp) {
- string::const_reverse_iterator si = s.rbegin();
- vector<u8>::const_reverse_iterator mi = msk.rbegin(), ci = cmp.rbegin();
-
- for (; si != s.rend() && mi != msk.rend(); ++si, ++mi, ++ci) {
- u8 c = *si, m = *mi, v = *ci;
- if (nocase && ourisalpha(c)) {
- m &= ~CASE_BIT;
- v &= ~CASE_BIT;
- }
-
- assert(ci != cmp.rend());
- if ((c & m) != v) {
- DEBUG_PRINTF("c = %02hhx; *ci = %02hhx m =%02hhx\n", c, *ci, m);
- DEBUG_PRINTF("s = %s; dist = %zd\n", s.c_str(), si - s.rbegin());
- return false;
- }
- }
-
- return true;
-}
-
-/** \brief Complete constructor, takes group information and msk/cmp.
- *
- * This constructor takes a msk/cmp pair. Both must be vectors of length <=
- * \ref HWLM_MASKLEN. */
-hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in,
- bool noruns_in, u32 id_in, hwlm_group_t groups_in,
- const vector<u8> &msk_in, const vector<u8> &cmp_in)
- : s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in),
- groups(groups_in), msk(msk_in), cmp(cmp_in) {
+#include <iomanip>
+#include <sstream>
+
+using namespace std;
+
+namespace ue2 {
+
+#ifdef DEBUG
+static UNUSED
+std::string dumpMask(const vector<u8> &v) {
+ ostringstream oss;
+ vector<u8>::const_iterator it, ite;
+ for (it = v.begin(), ite = v.end(); it != ite; ++it) {
+ oss << setfill('0') << setw(2) << hex << (unsigned int)*it;
+ }
+ return oss.str();
+}
+#endif
+
+bool maskIsConsistent(const std::string &s, bool nocase, const vector<u8> &msk,
+ const vector<u8> &cmp) {
+ string::const_reverse_iterator si = s.rbegin();
+ vector<u8>::const_reverse_iterator mi = msk.rbegin(), ci = cmp.rbegin();
+
+ for (; si != s.rend() && mi != msk.rend(); ++si, ++mi, ++ci) {
+ u8 c = *si, m = *mi, v = *ci;
+ if (nocase && ourisalpha(c)) {
+ m &= ~CASE_BIT;
+ v &= ~CASE_BIT;
+ }
+
+ assert(ci != cmp.rend());
+ if ((c & m) != v) {
+ DEBUG_PRINTF("c = %02hhx; *ci = %02hhx m =%02hhx\n", c, *ci, m);
+ DEBUG_PRINTF("s = %s; dist = %zd\n", s.c_str(), si - s.rbegin());
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/** \brief Complete constructor, takes group information and msk/cmp.
+ *
+ * This constructor takes a msk/cmp pair. Both must be vectors of length <=
+ * \ref HWLM_MASKLEN. */
+hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in,
+ bool noruns_in, u32 id_in, hwlm_group_t groups_in,
+ const vector<u8> &msk_in, const vector<u8> &cmp_in)
+ : s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in),
+ groups(groups_in), msk(msk_in), cmp(cmp_in) {
assert(s.size() <= HWLM_LITERAL_MAX_LEN);
- assert(msk.size() <= HWLM_MASKLEN);
- assert(msk.size() == cmp.size());
-
+ assert(msk.size() <= HWLM_MASKLEN);
+ assert(msk.size() == cmp.size());
+
// If we've been handled a nocase literal, all letter characters must be
// upper-case.
if (nocase) {
@@ -98,20 +98,20 @@ hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in,
DEBUG_PRINTF("literal '%s'%s, msk=%s, cmp=%s\n", escapeString(s).c_str(),
nocase ? " (nocase)" : "", dumpMask(msk).c_str(),
- dumpMask(cmp).c_str());
-
-
- // Mask and compare vectors MUST be the same size.
- assert(msk.size() == cmp.size());
-
- // We must have been passed a msk/cmp that can be applied to s.
- assert(maskIsConsistent(s, nocase, msk, cmp));
-
- // In the name of good hygiene, zap msk/cmp if msk is all zeroes.
+ dumpMask(cmp).c_str());
+
+
+ // Mask and compare vectors MUST be the same size.
+ assert(msk.size() == cmp.size());
+
+ // We must have been passed a msk/cmp that can be applied to s.
+ assert(maskIsConsistent(s, nocase, msk, cmp));
+
+ // In the name of good hygiene, zap msk/cmp if msk is all zeroes.
if (all_of(begin(msk), end(msk), [](u8 val) { return val == 0; })) {
- msk.clear();
- cmp.clear();
- }
-}
-
-} // namespace ue2
+ msk.clear();
+ cmp.clear();
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/hwlm/hwlm_literal.h b/contrib/libs/hyperscan/src/hwlm/hwlm_literal.h
index b9bf17538a..598de81471 100644
--- a/contrib/libs/hyperscan/src/hwlm/hwlm_literal.h
+++ b/contrib/libs/hyperscan/src/hwlm/hwlm_literal.h
@@ -1,71 +1,71 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Hamster Wheel Literal Matcher: literal representation at build time.
- */
-
-#ifndef HWLM_LITERAL_H
-#define HWLM_LITERAL_H
-
-#include "hwlm.h"
-#include "ue2common.h"
-
-#include <string>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Hamster Wheel Literal Matcher: literal representation at build time.
+ */
+
+#ifndef HWLM_LITERAL_H
+#define HWLM_LITERAL_H
+
+#include "hwlm.h"
+#include "ue2common.h"
+
+#include <string>
#include <tuple>
-#include <vector>
-
-namespace ue2 {
-
-/** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */
-#define HWLM_MASKLEN 8
-
+#include <vector>
+
+namespace ue2 {
+
+/** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */
+#define HWLM_MASKLEN 8
+
#define INVALID_LIT_ID ~0U
-/** \brief Class representing a literal, fed to \ref hwlmBuild. */
-struct hwlmLiteral {
- std::string s; //!< \brief The literal itself.
-
- /** \brief The ID to pass to the callback if this literal matches.
- *
- * Note that the special value 0xFFFFFFFF is reserved for internal use and
- * should not be used. */
- u32 id;
-
- bool nocase; //!< \brief True if literal is case-insensitive.
-
- /** \brief Matches for runs of this literal can be quashed.
- *
- * Advisory flag meaning that there is no value in returning runs of
- * additional matches for a literal after the first one, so such matches
- * can be quashed by the literal matcher. */
- bool noruns;
-
+/** \brief Class representing a literal, fed to \ref hwlmBuild. */
+struct hwlmLiteral {
+ std::string s; //!< \brief The literal itself.
+
+ /** \brief The ID to pass to the callback if this literal matches.
+ *
+ * Note that the special value 0xFFFFFFFF is reserved for internal use and
+ * should not be used. */
+ u32 id;
+
+ bool nocase; //!< \brief True if literal is case-insensitive.
+
+ /** \brief Matches for runs of this literal can be quashed.
+ *
+ * Advisory flag meaning that there is no value in returning runs of
+ * additional matches for a literal after the first one, so such matches
+ * can be quashed by the literal matcher. */
+ bool noruns;
+
/** \brief included literal id. */
u32 included_id = INVALID_LIT_ID;
@@ -81,53 +81,53 @@ struct hwlmLiteral {
*/
u8 squash = 0;
- /** \brief Set of groups that literal belongs to.
- *
- * Use \ref HWLM_ALL_GROUPS for a literal that could match regardless of
- * the groups that are switched on. */
- hwlm_group_t groups;
-
- /** \brief Supplementary comparison mask.
- *
- * These two values add a supplementary comparison that is done over the
- * final 8 bytes of the string -- if v is those bytes, then the string must
- * match as well as (v & msk) == cmp.
- *
- * An empty msk is the safe way of not adding any comparison to the string
- * unnecessarily filling in msk may turn off optimizations.
- *
- * The msk/cmp mechanism must NOT place a value into the literal that
- * conflicts with the contents of the string, but can be allowed to add
- * additional power within the string -- for example, to allow some case
- * sensitivity within a case-insensitive string.
-
- * Values are stored in memory order -- i.e. the last byte of the mask
- * corresponds to the last byte of the string. Both vectors must be the
- * same size, and must not exceed \ref HWLM_MASKLEN in length.
- */
- std::vector<u8> msk;
-
- /** \brief Supplementary comparison value.
- *
- * See documentation for \ref msk.
- */
- std::vector<u8> cmp;
-
- /** \brief Complete constructor, takes group information and msk/cmp.
- *
- * This constructor takes a msk/cmp pair. Both must be vectors of length <=
- * \ref HWLM_MASKLEN. */
- hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in,
- u32 id_in, hwlm_group_t groups_in,
- const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in);
+ /** \brief Set of groups that literal belongs to.
+ *
+ * Use \ref HWLM_ALL_GROUPS for a literal that could match regardless of
+ * the groups that are switched on. */
+ hwlm_group_t groups;
+
+ /** \brief Supplementary comparison mask.
+ *
+ * These two values add a supplementary comparison that is done over the
+ * final 8 bytes of the string -- if v is those bytes, then the string must
+ * match as well as (v & msk) == cmp.
+ *
+ * An empty msk is the safe way of not adding any comparison to the string
+ * unnecessarily filling in msk may turn off optimizations.
+ *
+ * The msk/cmp mechanism must NOT place a value into the literal that
+ * conflicts with the contents of the string, but can be allowed to add
+ * additional power within the string -- for example, to allow some case
+ * sensitivity within a case-insensitive string.
+
+ * Values are stored in memory order -- i.e. the last byte of the mask
+ * corresponds to the last byte of the string. Both vectors must be the
+ * same size, and must not exceed \ref HWLM_MASKLEN in length.
+ */
+ std::vector<u8> msk;
+
+ /** \brief Supplementary comparison value.
+ *
+ * See documentation for \ref msk.
+ */
+ std::vector<u8> cmp;
+
+ /** \brief Complete constructor, takes group information and msk/cmp.
+ *
+ * This constructor takes a msk/cmp pair. Both must be vectors of length <=
+ * \ref HWLM_MASKLEN. */
+ hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in,
+ u32 id_in, hwlm_group_t groups_in,
+ const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in);
/** \brief Simple constructor: no group information, no msk/cmp.
*
* This constructor is only used in internal unit test. */
hwlmLiteral(const std::string &s_in, bool nocase_in, u32 id_in)
: hwlmLiteral(s_in, nocase_in, false, id_in, HWLM_ALL_GROUPS, {}, {}) {}
-};
-
+};
+
inline
bool operator<(const hwlmLiteral &a, const hwlmLiteral &b) {
return std::tie(a.id, a.s, a.nocase, a.noruns, a.groups, a.msk, a.cmp) <
@@ -141,13 +141,13 @@ bool operator==(const hwlmLiteral &a, const hwlmLiteral &b) {
a.cmp == b.cmp;
}
-/**
- * Consistency test; returns false if the given msk/cmp test can never match
- * the literal string s.
- */
-bool maskIsConsistent(const std::string &s, bool nocase,
- const std::vector<u8> &msk, const std::vector<u8> &cmp);
-
-} // namespace ue2
-
-#endif // HWLM_LITERAL_H
+/**
+ * Consistency test; returns false if the given msk/cmp test can never match
+ * the literal string s.
+ */
+bool maskIsConsistent(const std::string &s, bool nocase,
+ const std::vector<u8> &msk, const std::vector<u8> &cmp);
+
+} // namespace ue2
+
+#endif // HWLM_LITERAL_H
diff --git a/contrib/libs/hyperscan/src/hwlm/noodle_build.cpp b/contrib/libs/hyperscan/src/hwlm/noodle_build.cpp
index 711944455f..a0128d0ad7 100644
--- a/contrib/libs/hyperscan/src/hwlm/noodle_build.cpp
+++ b/contrib/libs/hyperscan/src/hwlm/noodle_build.cpp
@@ -1,53 +1,53 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Noodle literal matcher: build code.
- */
-
-#include "noodle_build.h"
+ * \brief Noodle literal matcher: build code.
+ */
+
+#include "noodle_build.h"
#include "hwlm_literal.h"
-#include "noodle_internal.h"
+#include "noodle_internal.h"
#include "util/bitutils.h"
-#include "util/compare.h"
-#include "util/verify_types.h"
+#include "util/compare.h"
+#include "util/verify_types.h"
#include "ue2common.h"
-
+
#include <cstring> // for memcpy
#include <vector>
using std::vector;
-namespace ue2 {
-
-static
+namespace ue2 {
+
+static
u64a make_u64a_mask(const vector<u8> &v) {
assert(v.size() <= sizeof(u64a));
if (v.size() > sizeof(u64a)) {
@@ -67,24 +67,24 @@ size_t findNoodFragOffset(const hwlmLiteral &lit) {
const auto &s = lit.s;
const size_t len = lit.s.length();
- size_t offset = 0;
- for (size_t i = 0; i + 1 < len; i++) {
- int diff = 0;
+ size_t offset = 0;
+ for (size_t i = 0; i + 1 < len; i++) {
+ int diff = 0;
const char c = s[i];
const char d = s[i + 1];
if (lit.nocase && ourisalpha(c)) {
- diff = (mytoupper(c) != mytoupper(d));
- } else {
- diff = (c != d);
- }
- offset = i;
- if (diff) {
- break;
- }
- }
- return offset;
-}
-
+ diff = (mytoupper(c) != mytoupper(d));
+ } else {
+ diff = (c != d);
+ }
+ offset = i;
+ if (diff) {
+ break;
+ }
+ }
+ return offset;
+}
+
bytecode_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit) {
const auto &s = lit.s;
@@ -115,11 +115,11 @@ bytecode_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit) {
}
auto n = make_zeroed_bytecode_ptr<noodTable>(sizeof(noodTable));
- assert(n);
+ assert(n);
DEBUG_PRINTF("size of nood %zu\n", sizeof(noodTable));
-
+
size_t key_offset = findNoodFragOffset(lit);
-
+
n->id = lit.id;
n->single = s.length() == 1 ? 1 : 0;
n->key_offset = verify_u8(s.length() - key_offset);
@@ -133,39 +133,39 @@ bytecode_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit) {
n->msk = make_u64a_mask(n_msk);
n->cmp = make_u64a_mask(n_cmp);
n->msk_len = mask_len;
-
- return n;
-}
-
+
+ return n;
+}
+
size_t noodSize(const noodTable *) {
return sizeof(noodTable);
-}
-
-} // namespace ue2
-
-#ifdef DUMP_SUPPORT
-#include <cctype>
-
-namespace ue2 {
-
-void noodPrintStats(const noodTable *n, FILE *f) {
- fprintf(f, "Noodle table\n");
+}
+
+} // namespace ue2
+
+#ifdef DUMP_SUPPORT
+#include <cctype>
+
+namespace ue2 {
+
+void noodPrintStats(const noodTable *n, FILE *f) {
+ fprintf(f, "Noodle table\n");
fprintf(f, "Key Offset: %u\n", n->key_offset);
fprintf(f, "Msk: %llx Cmp: %llx MskLen %u\n",
n->msk >> 8 * (8 - n->msk_len), n->cmp >> 8 * (8 - n->msk_len),
n->msk_len);
- fprintf(f, "String: ");
+ fprintf(f, "String: ");
for (u32 i = 0; i < n->msk_len; i++) {
const u8 *m = (const u8 *)&n->cmp;
if (isgraph(m[i]) && m[i] != '\\') {
fprintf(f, "%c", m[i]);
- } else {
+ } else {
fprintf(f, "\\x%02hhx", m[i]);
- }
- }
- fprintf(f, "\n");
-}
-
-} // namespace ue2
-
-#endif
+ }
+ }
+ fprintf(f, "\n");
+}
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/hwlm/noodle_build.h b/contrib/libs/hyperscan/src/hwlm/noodle_build.h
index b7cda6b5ab..b5725f0827 100644
--- a/contrib/libs/hyperscan/src/hwlm/noodle_build.h
+++ b/contrib/libs/hyperscan/src/hwlm/noodle_build.h
@@ -1,65 +1,65 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Noodle literal matcher: build code.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Noodle literal matcher: build code.
+ */
+
#ifndef NOODLE_BUILD_H
#define NOODLE_BUILD_H
-
-#include "ue2common.h"
+
+#include "ue2common.h"
#include "util/bytecode_ptr.h"
-
-struct noodTable;
-
-namespace ue2 {
-
+
+struct noodTable;
+
+namespace ue2 {
+
struct hwlmLiteral;
-/** \brief Construct a Noodle matcher for the given literal. */
+/** \brief Construct a Noodle matcher for the given literal. */
bytecode_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit);
-
-size_t noodSize(const noodTable *n);
-
-} // namespace ue2
-
-#ifdef DUMP_SUPPORT
-
-#include <cstdio>
-
-namespace ue2 {
-
-void noodPrintStats(const noodTable *n, FILE *f);
-
-} // namespace ue2
-
-#endif // DUMP_SUPPORT
-
+
+size_t noodSize(const noodTable *n);
+
+} // namespace ue2
+
+#ifdef DUMP_SUPPORT
+
+#include <cstdio>
+
+namespace ue2 {
+
+void noodPrintStats(const noodTable *n, FILE *f);
+
+} // namespace ue2
+
+#endif // DUMP_SUPPORT
+
#endif /* NOODLE_BUILD_H */
-
+
diff --git a/contrib/libs/hyperscan/src/hwlm/noodle_engine.c b/contrib/libs/hyperscan/src/hwlm/noodle_engine.c
index 7fda12de77..d4f6902a2d 100644
--- a/contrib/libs/hyperscan/src/hwlm/noodle_engine.c
+++ b/contrib/libs/hyperscan/src/hwlm/noodle_engine.c
@@ -1,60 +1,60 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Noodle literal matcher: runtime.
- */
-#include "hwlm.h"
-#include "noodle_engine.h"
-#include "noodle_internal.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Noodle literal matcher: runtime.
+ */
+#include "hwlm.h"
+#include "noodle_engine.h"
+#include "noodle_internal.h"
#include "scratch.h"
-#include "ue2common.h"
+#include "ue2common.h"
#include "util/arch.h"
-#include "util/bitutils.h"
-#include "util/compare.h"
+#include "util/bitutils.h"
+#include "util/compare.h"
#include "util/intrinsics.h"
#include "util/join.h"
-#include "util/masked_move.h"
+#include "util/masked_move.h"
#include "util/partial_store.h"
-#include "util/simd_utils.h"
-
-#include <ctype.h>
-#include <stdbool.h>
-#include <string.h>
-
-/** \brief Noodle runtime context. */
-struct cb_info {
- HWLMCallback cb; //!< callback function called on match
- u32 id; //!< ID to pass to callback on match
+#include "util/simd_utils.h"
+
+#include <ctype.h>
+#include <stdbool.h>
+#include <string.h>
+
+/** \brief Noodle runtime context. */
+struct cb_info {
+ HWLMCallback cb; //!< callback function called on match
+ u32 id; //!< ID to pass to callback on match
struct hs_scratch *scratch; //!< scratch to pass to callback
- size_t offsetAdj; //!< used in streaming mode
-};
-
+ size_t offsetAdj; //!< used in streaming mode
+};
+
#if defined(HAVE_AVX512)
#define CHUNKSIZE 64
#define MASK_TYPE m512
@@ -73,51 +73,51 @@ struct cb_info {
#endif
-#define RETURN_IF_TERMINATED(x) \
- { \
- if ((x) == HWLM_TERMINATED) { \
- return HWLM_TERMINATED; \
- } \
- }
-
-#define SINGLE_ZSCAN() \
- do { \
- while (unlikely(z)) { \
+#define RETURN_IF_TERMINATED(x) \
+ { \
+ if ((x) == HWLM_TERMINATED) { \
+ return HWLM_TERMINATED; \
+ } \
+ }
+
+#define SINGLE_ZSCAN() \
+ do { \
+ while (unlikely(z)) { \
Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \
- size_t matchPos = d - buf + pos; \
+ size_t matchPos = d - buf + pos; \
DEBUG_PRINTF("match pos %zu\n", matchPos); \
hwlmcb_rv_t rv = final(n, buf, len, 1, cbi, matchPos); \
- RETURN_IF_TERMINATED(rv); \
- } \
- } while (0)
-
-#define DOUBLE_ZSCAN() \
- do { \
- while (unlikely(z)) { \
+ RETURN_IF_TERMINATED(rv); \
+ } \
+ } while (0)
+
+#define DOUBLE_ZSCAN() \
+ do { \
+ while (unlikely(z)) { \
Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \
- size_t matchPos = d - buf + pos - 1; \
+ size_t matchPos = d - buf + pos - 1; \
DEBUG_PRINTF("match pos %zu\n", matchPos); \
hwlmcb_rv_t rv = final(n, buf, len, 0, cbi, matchPos); \
- RETURN_IF_TERMINATED(rv); \
- } \
- } while (0)
-
-static really_inline
-u8 caseClear8(u8 x, bool noCase) {
- return (u8)(noCase ? (x & (u8)0xdf) : x);
-}
-
-// Make sure the rest of the string is there. The single character scanner
-// is used only for single chars with case insensitivity used correctly,
-// so it can go straight to the callback if we get this far.
-static really_inline
+ RETURN_IF_TERMINATED(rv); \
+ } \
+ } while (0)
+
+static really_inline
+u8 caseClear8(u8 x, bool noCase) {
+ return (u8)(noCase ? (x & (u8)0xdf) : x);
+}
+
+// Make sure the rest of the string is there. The single character scanner
+// is used only for single chars with case insensitivity used correctly,
+// so it can go straight to the callback if we get this far.
+static really_inline
hwlm_error_t final(const struct noodTable *n, const u8 *buf, UNUSED size_t len,
char single, const struct cb_info *cbi, size_t pos) {
if (single) {
if (n->msk_len == 1) {
goto match;
- }
- }
+ }
+ }
assert(len >= n->msk_len);
u64a v =
partial_load_u64a(buf + pos + n->key_offset - n->msk_len, n->msk_len);
@@ -131,34 +131,34 @@ match:
pos -= cbi->offsetAdj;
DEBUG_PRINTF("match @ %zu\n", pos + n->key_offset);
hwlmcb_rv_t rv = cbi->cb(pos + n->key_offset - 1, cbi->id, cbi->scratch);
- if (rv == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATED;
- }
- return HWLM_SUCCESS;
-}
-
+ if (rv == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATED;
+ }
+ return HWLM_SUCCESS;
+}
+
#if defined(HAVE_AVX512)
#define CHUNKSIZE 64
#define MASK_TYPE m512
#include "noodle_engine_avx512.c"
#elif defined(HAVE_AVX2)
-#define CHUNKSIZE 32
-#define MASK_TYPE m256
-#include "noodle_engine_avx2.c"
-#else
-#define CHUNKSIZE 16
-#define MASK_TYPE m128
-#include "noodle_engine_sse.c"
-#endif
-
-static really_inline
+#define CHUNKSIZE 32
+#define MASK_TYPE m256
+#include "noodle_engine_avx2.c"
+#else
+#define CHUNKSIZE 16
+#define MASK_TYPE m128
+#include "noodle_engine_sse.c"
+#endif
+
+static really_inline
hwlm_error_t scanSingleMain(const struct noodTable *n, const u8 *buf,
size_t len, size_t start, bool noCase,
const struct cb_info *cbi) {
-
+
const MASK_TYPE mask1 = getMask(n->key0, noCase);
- const MASK_TYPE caseMask = getCaseMask();
-
+ const MASK_TYPE caseMask = getCaseMask();
+
size_t offset = start + n->msk_len - 1;
size_t end = len;
assert(offset < end);
@@ -169,231 +169,231 @@ hwlm_error_t scanSingleMain(const struct noodTable *n, const u8 *buf,
if (end - offset < CHUNKSIZE) {
rv = scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, offset,
end);
- return rv;
- }
-
+ return rv;
+ }
+
if (end - offset == CHUNKSIZE) {
rv = scanSingleUnaligned(n, buf, len, offset, noCase, caseMask, mask1,
cbi, offset, end);
- return rv;
- }
-
- uintptr_t data = (uintptr_t)buf;
+ return rv;
+ }
+
+ uintptr_t data = (uintptr_t)buf;
uintptr_t s2Start = ROUNDUP_N(data + offset, CHUNKSIZE) - data;
- uintptr_t last = data + end;
- uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data;
+ uintptr_t last = data + end;
+ uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data;
uintptr_t s3Start = end - CHUNKSIZE;
-
+
if (offset != s2Start) {
- // first scan out to the fast scan starting point
- DEBUG_PRINTF("stage 1: -> %zu\n", s2Start);
+ // first scan out to the fast scan starting point
+ DEBUG_PRINTF("stage 1: -> %zu\n", s2Start);
rv = scanSingleUnaligned(n, buf, len, offset, noCase, caseMask, mask1,
cbi, offset, s2Start);
- RETURN_IF_TERMINATED(rv);
- }
-
- if (likely(s2Start != s2End)) {
- // scan as far as we can, bounded by the last point this key can
- // possibly match
- DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s2End);
+ RETURN_IF_TERMINATED(rv);
+ }
+
+ if (likely(s2Start != s2End)) {
+ // scan as far as we can, bounded by the last point this key can
+ // possibly match
+ DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s2End);
rv = scanSingleFast(n, buf, len, noCase, caseMask, mask1, cbi, s2Start,
s2End);
- RETURN_IF_TERMINATED(rv);
- }
-
- // if we are done bail out
+ RETURN_IF_TERMINATED(rv);
+ }
+
+ // if we are done bail out
if (s2End == len) {
- return HWLM_SUCCESS;
- }
-
+ return HWLM_SUCCESS;
+ }
+
DEBUG_PRINTF("stage 3: %zu -> %zu\n", s2End, len);
rv = scanSingleUnaligned(n, buf, len, s3Start, noCase, caseMask, mask1, cbi,
s2End, len);
-
- return rv;
+
+ return rv;
#else // HAVE_AVX512
return scanSingle512(n, buf, len, noCase, caseMask, mask1, cbi, offset,
end);
#endif
-}
-
-static really_inline
+}
+
+static really_inline
hwlm_error_t scanDoubleMain(const struct noodTable *n, const u8 *buf,
size_t len, size_t start, bool noCase,
- const struct cb_info *cbi) {
- // we stop scanning for the key-fragment when the rest of the key can't
- // possibly fit in the remaining buffer
+ const struct cb_info *cbi) {
+ // we stop scanning for the key-fragment when the rest of the key can't
+ // possibly fit in the remaining buffer
size_t end = len - n->key_offset + 2;
-
+
// the first place the key can match
size_t offset = start + n->msk_len - n->key_offset;
- const MASK_TYPE caseMask = getCaseMask();
+ const MASK_TYPE caseMask = getCaseMask();
const MASK_TYPE mask1 = getMask(n->key0, noCase);
const MASK_TYPE mask2 = getMask(n->key1, noCase);
-
+
#if !defined(HAVE_AVX512)
hwlm_error_t rv;
if (end - offset < CHUNKSIZE) {
rv = scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi,
offset, end);
- return rv;
- }
+ return rv;
+ }
if (end - offset == CHUNKSIZE) {
rv = scanDoubleUnaligned(n, buf, len, offset, noCase, caseMask, mask1,
mask2, cbi, offset, end);
- return rv;
- }
-
- uintptr_t data = (uintptr_t)buf;
+ return rv;
+ }
+
+ uintptr_t data = (uintptr_t)buf;
uintptr_t s2Start = ROUNDUP_N(data + offset, CHUNKSIZE) - data;
- uintptr_t s1End = s2Start + 1;
- uintptr_t last = data + end;
- uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data;
- uintptr_t s3Start = end - CHUNKSIZE;
+ uintptr_t s1End = s2Start + 1;
+ uintptr_t last = data + end;
+ uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data;
+ uintptr_t s3Start = end - CHUNKSIZE;
uintptr_t off = offset;
-
+
if (s2Start != off) {
- // first scan out to the fast scan starting point plus one char past to
- // catch the key on the overlap
+ // first scan out to the fast scan starting point plus one char past to
+ // catch the key on the overlap
DEBUG_PRINTF("stage 1: %zu -> %zu\n", off, s2Start);
rv = scanDoubleUnaligned(n, buf, len, offset, noCase, caseMask, mask1,
mask2, cbi, off, s1End);
- RETURN_IF_TERMINATED(rv);
- }
- off = s1End;
-
- if (s2Start >= end) {
- DEBUG_PRINTF("s2 == mL %zu\n", end);
- return HWLM_SUCCESS;
- }
-
- if (likely(s2Start != s2End)) {
- // scan as far as we can, bounded by the last point this key can
- // possibly match
- DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s3Start);
+ RETURN_IF_TERMINATED(rv);
+ }
+ off = s1End;
+
+ if (s2Start >= end) {
+ DEBUG_PRINTF("s2 == mL %zu\n", end);
+ return HWLM_SUCCESS;
+ }
+
+ if (likely(s2Start != s2End)) {
+ // scan as far as we can, bounded by the last point this key can
+ // possibly match
+ DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s3Start);
rv = scanDoubleFast(n, buf, len, noCase, caseMask, mask1, mask2, cbi,
s2Start, s2End);
- RETURN_IF_TERMINATED(rv);
- off = s2End;
- }
-
- // if there isn't enough data left to match the key, bail out
- if (s2End == end) {
- return HWLM_SUCCESS;
- }
-
- DEBUG_PRINTF("stage 3: %zu -> %zu\n", s3Start, end);
+ RETURN_IF_TERMINATED(rv);
+ off = s2End;
+ }
+
+ // if there isn't enough data left to match the key, bail out
+ if (s2End == end) {
+ return HWLM_SUCCESS;
+ }
+
+ DEBUG_PRINTF("stage 3: %zu -> %zu\n", s3Start, end);
rv = scanDoubleUnaligned(n, buf, len, s3Start, noCase, caseMask, mask1,
mask2, cbi, off, end);
-
- return rv;
+
+ return rv;
#else // AVX512
return scanDouble512(n, buf, len, noCase, caseMask, mask1, mask2, cbi,
offset, end);
#endif // AVX512
-}
-
-
-static really_inline
+}
+
+
+static really_inline
hwlm_error_t scanSingleNoCase(const struct noodTable *n, const u8 *buf,
size_t len, size_t start,
- const struct cb_info *cbi) {
+ const struct cb_info *cbi) {
return scanSingleMain(n, buf, len, start, 1, cbi);
-}
-
-static really_inline
+}
+
+static really_inline
hwlm_error_t scanSingleCase(const struct noodTable *n, const u8 *buf,
size_t len, size_t start,
- const struct cb_info *cbi) {
+ const struct cb_info *cbi) {
return scanSingleMain(n, buf, len, start, 0, cbi);
-}
-
-// Single-character specialisation, used when keyLen = 1
-static really_inline
+}
+
+// Single-character specialisation, used when keyLen = 1
+static really_inline
hwlm_error_t scanSingle(const struct noodTable *n, const u8 *buf, size_t len,
size_t start, bool noCase, const struct cb_info *cbi) {
if (!ourisalpha(n->key0)) {
- noCase = 0; // force noCase off if we don't have an alphabetic char
- }
-
- // kinda ugly, but this forces constant propagation
- if (noCase) {
+ noCase = 0; // force noCase off if we don't have an alphabetic char
+ }
+
+ // kinda ugly, but this forces constant propagation
+ if (noCase) {
return scanSingleNoCase(n, buf, len, start, cbi);
- } else {
+ } else {
return scanSingleCase(n, buf, len, start, cbi);
- }
-}
-
-
-static really_inline
+ }
+}
+
+
+static really_inline
hwlm_error_t scanDoubleNoCase(const struct noodTable *n, const u8 *buf,
size_t len, size_t start,
- const struct cb_info *cbi) {
+ const struct cb_info *cbi) {
return scanDoubleMain(n, buf, len, start, 1, cbi);
-}
-
-static really_inline
+}
+
+static really_inline
hwlm_error_t scanDoubleCase(const struct noodTable *n, const u8 *buf,
size_t len, size_t start,
- const struct cb_info *cbi) {
+ const struct cb_info *cbi) {
return scanDoubleMain(n, buf, len, start, 0, cbi);
-}
-
-
-static really_inline
+}
+
+
+static really_inline
hwlm_error_t scanDouble(const struct noodTable *n, const u8 *buf, size_t len,
size_t start, bool noCase, const struct cb_info *cbi) {
- // kinda ugly, but this forces constant propagation
- if (noCase) {
+ // kinda ugly, but this forces constant propagation
+ if (noCase) {
return scanDoubleNoCase(n, buf, len, start, cbi);
- } else {
+ } else {
return scanDoubleCase(n, buf, len, start, cbi);
- }
-}
-
-// main entry point for the scan code
-static really_inline
+ }
+}
+
+// main entry point for the scan code
+static really_inline
hwlm_error_t scan(const struct noodTable *n, const u8 *buf, size_t len,
size_t start, char single, bool noCase,
const struct cb_info *cbi) {
if (len - start < n->msk_len) {
- // can't find string of length keyLen in a shorter buffer
- return HWLM_SUCCESS;
- }
-
+ // can't find string of length keyLen in a shorter buffer
+ return HWLM_SUCCESS;
+ }
+
if (single) {
return scanSingle(n, buf, len, start, noCase, cbi);
- } else {
+ } else {
return scanDouble(n, buf, len, start, noCase, cbi);
- }
-}
-
-/** \brief Block-mode scanner. */
-hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len,
+ }
+}
+
+/** \brief Block-mode scanner. */
+hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len,
size_t start, HWLMCallback cb,
struct hs_scratch *scratch) {
- assert(n && buf);
-
+ assert(n && buf);
+
struct cb_info cbi = {cb, n->id, scratch, 0};
DEBUG_PRINTF("nood scan of %zu bytes for %*s @ %p\n", len, n->msk_len,
(const char *)&n->cmp, buf);
return scan(n, buf, len, start, n->single, n->nocase, &cbi);
-}
-
-/** \brief Streaming-mode scanner. */
-hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf,
- size_t hlen, const u8 *buf, size_t len,
+}
+
+/** \brief Streaming-mode scanner. */
+hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf,
+ size_t hlen, const u8 *buf, size_t len,
HWLMCallback cb, struct hs_scratch *scratch) {
- assert(n);
-
+ assert(n);
+
if (len + hlen < n->msk_len) {
DEBUG_PRINTF("not enough bytes for a match\n");
return HWLM_SUCCESS;
}
-
+
struct cb_info cbi = {cb, n->id, scratch, 0};
DEBUG_PRINTF("nood scan of %zu bytes (%zu hlen) for %*s @ %p\n", len, hlen,
n->msk_len, (const char *)&n->cmp, buf);
@@ -404,14 +404,14 @@ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf,
* buffer plus what we've been given to scan. Since this is relatively
* short, just check against msk+cmp per byte offset for matches.
*/
- assert(hbuf);
+ assert(hbuf);
u8 ALIGN_DIRECTIVE temp_buf[HWLM_LITERAL_MAX_LEN * 2];
memset(temp_buf, 0, sizeof(temp_buf));
-
+
assert(n->msk_len);
size_t tl1 = MIN((size_t)n->msk_len - 1, hlen);
size_t tl2 = MIN((size_t)n->msk_len - 1, len);
-
+
assert(tl1 + tl2 <= sizeof(temp_buf));
assert(tl1 + tl2 >= n->msk_len);
assert(tl1 <= sizeof(u64a));
@@ -432,11 +432,11 @@ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf,
return HWLM_TERMINATED;
}
}
- }
- }
-
- assert(buf);
-
- cbi.offsetAdj = 0;
+ }
+ }
+
+ assert(buf);
+
+ cbi.offsetAdj = 0;
return scan(n, buf, len, 0, n->single, n->nocase, &cbi);
-}
+}
diff --git a/contrib/libs/hyperscan/src/hwlm/noodle_engine.h b/contrib/libs/hyperscan/src/hwlm/noodle_engine.h
index f910e28d69..64422c41f0 100644
--- a/contrib/libs/hyperscan/src/hwlm/noodle_engine.h
+++ b/contrib/libs/hyperscan/src/hwlm/noodle_engine.h
@@ -1,60 +1,60 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Noodle literal matcher: runtime API.
- */
-
-#ifndef NOODLE_ENGINE_H
-#define NOODLE_ENGINE_H
-
-#include "hwlm.h"
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-struct noodTable;
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Noodle literal matcher: runtime API.
+ */
+
+#ifndef NOODLE_ENGINE_H
+#define NOODLE_ENGINE_H
+
+#include "hwlm.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+struct noodTable;
struct hs_scratch;
-
-/** \brief Block-mode scanner. */
-hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len,
+
+/** \brief Block-mode scanner. */
+hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len,
size_t start, HWLMCallback cb,
struct hs_scratch *scratch);
-
-/** \brief Streaming-mode scanner. */
-hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf,
- size_t hlen, const u8 *buf, size_t len,
+
+/** \brief Streaming-mode scanner. */
+hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf,
+ size_t hlen, const u8 *buf, size_t len,
HWLMCallback cb, struct hs_scratch *scratch);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/hwlm/noodle_engine_avx2.c b/contrib/libs/hyperscan/src/hwlm/noodle_engine_avx2.c
index 2cb1f88149..5edc646af1 100644
--- a/contrib/libs/hyperscan/src/hwlm/noodle_engine_avx2.c
+++ b/contrib/libs/hyperscan/src/hwlm/noodle_engine_avx2.c
@@ -1,233 +1,233 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* noodle scan parts for AVX */
-
-static really_inline m256 getMask(u8 c, bool noCase) {
- u8 k = caseClear8(c, noCase);
- return set32x8(k);
-}
-
-static really_inline m256 getCaseMask(void) {
- return set32x8(0xdf);
-}
-
-static really_inline
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* noodle scan parts for AVX */
+
+static really_inline m256 getMask(u8 c, bool noCase) {
+ u8 k = caseClear8(c, noCase);
+ return set32x8(k);
+}
+
+static really_inline m256 getCaseMask(void) {
+ return set32x8(0xdf);
+}
+
+static really_inline
hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf,
size_t len, size_t offset, bool noCase,
m256 caseMask, m256 mask1,
const struct cb_info *cbi, size_t start,
size_t end) {
- const u8 *d = buf + offset;
- DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
- const size_t l = end - start;
-
- m256 v = loadu256(d);
-
- if (noCase) {
- v = and256(v, caseMask);
- }
-
- u32 z = movemask256(eq256(mask1, v));
-
- u32 buf_off = start - offset;
- u32 mask = (u32)((u64a)(1ULL << l) - 1) << buf_off;
- DEBUG_PRINTF("mask 0x%08x z 0x%08x\n", mask, z);
-
- z &= mask;
-
- SINGLE_ZSCAN();
-
- return HWLM_SUCCESS;
-}
-
-static really_inline
+ const u8 *d = buf + offset;
+ DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
+ const size_t l = end - start;
+
+ m256 v = loadu256(d);
+
+ if (noCase) {
+ v = and256(v, caseMask);
+ }
+
+ u32 z = movemask256(eq256(mask1, v));
+
+ u32 buf_off = start - offset;
+ u32 mask = (u32)((u64a)(1ULL << l) - 1) << buf_off;
+ DEBUG_PRINTF("mask 0x%08x z 0x%08x\n", mask, z);
+
+ z &= mask;
+
+ SINGLE_ZSCAN();
+
+ return HWLM_SUCCESS;
+}
+
+static really_inline
hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf,
size_t len, size_t offset, bool noCase,
m256 caseMask, m256 mask1, m256 mask2,
const struct cb_info *cbi, size_t start,
size_t end) {
- const u8 *d = buf + offset;
- DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
- size_t l = end - start;
-
- m256 v = loadu256(d);
-
- if (noCase) {
- v = and256(v, caseMask);
- }
-
- u32 z0 = movemask256(eq256(mask1, v));
- u32 z1 = movemask256(eq256(mask2, v));
- u32 z = (z0 << 1) & z1;
-
- // mask out where we can't match
- u32 buf_off = start - offset;
- u32 mask = (u32)((u64a)(1ULL << l) - 1) << buf_off;
- DEBUG_PRINTF("mask 0x%08x z 0x%08x\n", mask, z);
- z &= mask;
-
- DOUBLE_ZSCAN();
-
- return HWLM_SUCCESS;
-}
-
-// The short scan routine. It is used both to scan data up to an
-// alignment boundary if needed and to finish off data that the aligned scan
-// function can't handle (due to small/unaligned chunk at end)
-static really_inline
+ const u8 *d = buf + offset;
+ DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
+ size_t l = end - start;
+
+ m256 v = loadu256(d);
+
+ if (noCase) {
+ v = and256(v, caseMask);
+ }
+
+ u32 z0 = movemask256(eq256(mask1, v));
+ u32 z1 = movemask256(eq256(mask2, v));
+ u32 z = (z0 << 1) & z1;
+
+ // mask out where we can't match
+ u32 buf_off = start - offset;
+ u32 mask = (u32)((u64a)(1ULL << l) - 1) << buf_off;
+ DEBUG_PRINTF("mask 0x%08x z 0x%08x\n", mask, z);
+ z &= mask;
+
+ DOUBLE_ZSCAN();
+
+ return HWLM_SUCCESS;
+}
+
+// The short scan routine. It is used both to scan data up to an
+// alignment boundary if needed and to finish off data that the aligned scan
+// function can't handle (due to small/unaligned chunk at end)
+static really_inline
hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf,
size_t len, bool noCase, m256 caseMask, m256 mask1,
- const struct cb_info *cbi, size_t start,
- size_t end) {
- const u8 *d = buf + start;
- size_t l = end - start;
- DEBUG_PRINTF("l %zu\n", l);
- assert(l <= 32);
- if (!l) {
- return HWLM_SUCCESS;
- }
-
- m256 v;
-
- if (l < 4) {
- u8 *vp = (u8*)&v;
- switch (l) {
+ const struct cb_info *cbi, size_t start,
+ size_t end) {
+ const u8 *d = buf + start;
+ size_t l = end - start;
+ DEBUG_PRINTF("l %zu\n", l);
+ assert(l <= 32);
+ if (!l) {
+ return HWLM_SUCCESS;
+ }
+
+ m256 v;
+
+ if (l < 4) {
+ u8 *vp = (u8*)&v;
+ switch (l) {
case 3: vp[2] = d[2]; // fallthrough
case 2: vp[1] = d[1]; // fallthrough
case 1: vp[0] = d[0]; // fallthrough
- }
- } else {
- v = masked_move256_len(d, l);
- }
-
- if (noCase) {
- v = and256(v, caseMask);
- }
-
- // mask out where we can't match
- u32 mask = (0xFFFFFFFF >> (32 - l));
-
- u32 z = mask & movemask256(eq256(mask1, v));
-
- SINGLE_ZSCAN();
-
- return HWLM_SUCCESS;
-}
-
-static really_inline
+ }
+ } else {
+ v = masked_move256_len(d, l);
+ }
+
+ if (noCase) {
+ v = and256(v, caseMask);
+ }
+
+ // mask out where we can't match
+ u32 mask = (0xFFFFFFFF >> (32 - l));
+
+ u32 z = mask & movemask256(eq256(mask1, v));
+
+ SINGLE_ZSCAN();
+
+ return HWLM_SUCCESS;
+}
+
+static really_inline
hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf,
size_t len, bool noCase, m256 caseMask, m256 mask1,
m256 mask2, const struct cb_info *cbi,
size_t start, size_t end) {
- const u8 *d = buf + start;
- size_t l = end - start;
- if (!l) {
- return HWLM_SUCCESS;
- }
- assert(l <= 32);
- m256 v;
-
- DEBUG_PRINTF("d %zu\n", d - buf);
- if (l < 4) {
- u8 *vp = (u8*)&v;
- switch (l) {
+ const u8 *d = buf + start;
+ size_t l = end - start;
+ if (!l) {
+ return HWLM_SUCCESS;
+ }
+ assert(l <= 32);
+ m256 v;
+
+ DEBUG_PRINTF("d %zu\n", d - buf);
+ if (l < 4) {
+ u8 *vp = (u8*)&v;
+ switch (l) {
case 3: vp[2] = d[2]; // fallthrough
case 2: vp[1] = d[1]; // fallthrough
case 1: vp[0] = d[0]; // fallthrough
- }
- } else {
- v = masked_move256_len(d, l);
- }
- if (noCase) {
- v = and256(v, caseMask);
- }
-
- u32 z0 = movemask256(eq256(mask1, v));
- u32 z1 = movemask256(eq256(mask2, v));
- u32 z = (z0 << 1) & z1;
-
- // mask out where we can't match
- u32 mask = (0xFFFFFFFF >> (32 - l));
- z &= mask;
-
- DOUBLE_ZSCAN();
-
- return HWLM_SUCCESS;
-}
-
-static really_inline
+ }
+ } else {
+ v = masked_move256_len(d, l);
+ }
+ if (noCase) {
+ v = and256(v, caseMask);
+ }
+
+ u32 z0 = movemask256(eq256(mask1, v));
+ u32 z1 = movemask256(eq256(mask2, v));
+ u32 z = (z0 << 1) & z1;
+
+ // mask out where we can't match
+ u32 mask = (0xFFFFFFFF >> (32 - l));
+ z &= mask;
+
+ DOUBLE_ZSCAN();
+
+ return HWLM_SUCCESS;
+}
+
+static really_inline
hwlm_error_t scanSingleFast(const struct noodTable *n, const u8 *buf,
size_t len, bool noCase, m256 caseMask, m256 mask1,
- const struct cb_info *cbi, size_t start,
- size_t end) {
- const u8 *d = buf + start, *e = buf + end;
- assert(d < e);
-
- for (; d < e; d += 32) {
- m256 v = noCase ? and256(load256(d), caseMask) : load256(d);
-
- u32 z = movemask256(eq256(mask1, v));
-
- // On large packet buffers, this prefetch appears to get us about 2%.
- __builtin_prefetch(d + 128);
-
- SINGLE_ZSCAN();
- }
- return HWLM_SUCCESS;
-}
-
-static really_inline
+ const struct cb_info *cbi, size_t start,
+ size_t end) {
+ const u8 *d = buf + start, *e = buf + end;
+ assert(d < e);
+
+ for (; d < e; d += 32) {
+ m256 v = noCase ? and256(load256(d), caseMask) : load256(d);
+
+ u32 z = movemask256(eq256(mask1, v));
+
+ // On large packet buffers, this prefetch appears to get us about 2%.
+ __builtin_prefetch(d + 128);
+
+ SINGLE_ZSCAN();
+ }
+ return HWLM_SUCCESS;
+}
+
+static really_inline
hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf,
size_t len, bool noCase, m256 caseMask, m256 mask1,
m256 mask2, const struct cb_info *cbi, size_t start,
- size_t end) {
- const u8 *d = buf + start, *e = buf + end;
- DEBUG_PRINTF("start %zu end %zu \n", start, end);
- assert(d < e);
+ size_t end) {
+ const u8 *d = buf + start, *e = buf + end;
+ DEBUG_PRINTF("start %zu end %zu \n", start, end);
+ assert(d < e);
u32 lastz0 = 0;
-
- for (; d < e; d += 32) {
- m256 v = noCase ? and256(load256(d), caseMask) : load256(d);
-
- // we have to pull the masks out of the AVX registers because we can't
- // byte shift between the lanes
- u32 z0 = movemask256(eq256(mask1, v));
- u32 z1 = movemask256(eq256(mask2, v));
- u32 z = (lastz0 | (z0 << 1)) & z1;
+
+ for (; d < e; d += 32) {
+ m256 v = noCase ? and256(load256(d), caseMask) : load256(d);
+
+ // we have to pull the masks out of the AVX registers because we can't
+ // byte shift between the lanes
+ u32 z0 = movemask256(eq256(mask1, v));
+ u32 z1 = movemask256(eq256(mask2, v));
+ u32 z = (lastz0 | (z0 << 1)) & z1;
lastz0 = z0 >> 31;
-
- // On large packet buffers, this prefetch appears to get us about 2%.
- __builtin_prefetch(d + 128);
-
- DOUBLE_ZSCAN();
-
- }
- return HWLM_SUCCESS;
-}
-
+
+ // On large packet buffers, this prefetch appears to get us about 2%.
+ __builtin_prefetch(d + 128);
+
+ DOUBLE_ZSCAN();
+
+ }
+ return HWLM_SUCCESS;
+}
+
diff --git a/contrib/libs/hyperscan/src/hwlm/noodle_engine_sse.c b/contrib/libs/hyperscan/src/hwlm/noodle_engine_sse.c
index 5bdc7389a5..7cd53d7ced 100644
--- a/contrib/libs/hyperscan/src/hwlm/noodle_engine_sse.c
+++ b/contrib/libs/hyperscan/src/hwlm/noodle_engine_sse.c
@@ -1,203 +1,203 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* noodle scan parts for SSE */
-
-static really_inline m128 getMask(u8 c, bool noCase) {
- u8 k = caseClear8(c, noCase);
- return set16x8(k);
-}
-
-static really_inline m128 getCaseMask(void) {
- return set16x8(0xdf);
-}
-
-static really_inline
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* noodle scan parts for SSE */
+
+static really_inline m128 getMask(u8 c, bool noCase) {
+ u8 k = caseClear8(c, noCase);
+ return set16x8(k);
+}
+
+static really_inline m128 getCaseMask(void) {
+ return set16x8(0xdf);
+}
+
+static really_inline
hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf,
size_t len, bool noCase, m128 caseMask, m128 mask1,
- const struct cb_info *cbi, size_t start,
- size_t end) {
- const u8 *d = buf + start;
- size_t l = end - start;
- DEBUG_PRINTF("l %zu\n", l);
- assert(l <= 16);
- if (!l) {
- return HWLM_SUCCESS;
- }
- m128 v = zeroes128();
- // we don't have a clever way of doing this move yet
- memcpy(&v, d, l);
- if (noCase) {
- v = and128(v, caseMask);
- }
-
- // mask out where we can't match
- u32 mask = (0xFFFF >> (16 - l));
-
- u32 z = mask & movemask128(eq128(mask1, v));
-
- SINGLE_ZSCAN();
-
- return HWLM_SUCCESS;
-}
-
-static really_inline
+ const struct cb_info *cbi, size_t start,
+ size_t end) {
+ const u8 *d = buf + start;
+ size_t l = end - start;
+ DEBUG_PRINTF("l %zu\n", l);
+ assert(l <= 16);
+ if (!l) {
+ return HWLM_SUCCESS;
+ }
+ m128 v = zeroes128();
+ // we don't have a clever way of doing this move yet
+ memcpy(&v, d, l);
+ if (noCase) {
+ v = and128(v, caseMask);
+ }
+
+ // mask out where we can't match
+ u32 mask = (0xFFFF >> (16 - l));
+
+ u32 z = mask & movemask128(eq128(mask1, v));
+
+ SINGLE_ZSCAN();
+
+ return HWLM_SUCCESS;
+}
+
+static really_inline
hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf,
size_t len, size_t offset, bool noCase,
m128 caseMask, m128 mask1,
const struct cb_info *cbi, size_t start,
size_t end) {
- const u8 *d = buf + offset;
- DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
- const size_t l = end - start;
-
- m128 v = loadu128(d);
-
- if (noCase) {
- v = and128(v, caseMask);
- }
-
- u32 buf_off = start - offset;
- u32 mask = ((1 << l) - 1) << buf_off;
-
- u32 z = mask & movemask128(eq128(mask1, v));
-
- DEBUG_PRINTF("mask 0x%08x z 0x%08x\n", mask, z);
-
- z &= mask;
-
- SINGLE_ZSCAN();
-
- return HWLM_SUCCESS;
-}
-
-static really_inline
+ const u8 *d = buf + offset;
+ DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
+ const size_t l = end - start;
+
+ m128 v = loadu128(d);
+
+ if (noCase) {
+ v = and128(v, caseMask);
+ }
+
+ u32 buf_off = start - offset;
+ u32 mask = ((1 << l) - 1) << buf_off;
+
+ u32 z = mask & movemask128(eq128(mask1, v));
+
+ DEBUG_PRINTF("mask 0x%08x z 0x%08x\n", mask, z);
+
+ z &= mask;
+
+ SINGLE_ZSCAN();
+
+ return HWLM_SUCCESS;
+}
+
+static really_inline
hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf,
size_t len, bool noCase, m128 caseMask, m128 mask1,
m128 mask2, const struct cb_info *cbi,
size_t start, size_t end) {
- const u8 *d = buf + start;
- size_t l = end - start;
- if (!l) {
- return HWLM_SUCCESS;
- }
- assert(l <= 32);
-
- DEBUG_PRINTF("d %zu\n", d - buf);
- m128 v = zeroes128();
- memcpy(&v, d, l);
- if (noCase) {
- v = and128(v, caseMask);
- }
-
+ const u8 *d = buf + start;
+ size_t l = end - start;
+ if (!l) {
+ return HWLM_SUCCESS;
+ }
+ assert(l <= 32);
+
+ DEBUG_PRINTF("d %zu\n", d - buf);
+ m128 v = zeroes128();
+ memcpy(&v, d, l);
+ if (noCase) {
+ v = and128(v, caseMask);
+ }
+
u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1),
eq128(mask2, v)));
-
- // mask out where we can't match
- u32 mask = (0xFFFF >> (16 - l));
- z &= mask;
-
- DOUBLE_ZSCAN();
-
- return HWLM_SUCCESS;
-}
-
-static really_inline
+
+ // mask out where we can't match
+ u32 mask = (0xFFFF >> (16 - l));
+ z &= mask;
+
+ DOUBLE_ZSCAN();
+
+ return HWLM_SUCCESS;
+}
+
+static really_inline
hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf,
size_t len, size_t offset, bool noCase,
m128 caseMask, m128 mask1, m128 mask2,
const struct cb_info *cbi, size_t start,
size_t end) {
- const u8 *d = buf + offset;
- DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
- size_t l = end - start;
-
- m128 v = loadu128(d);
-
- if (noCase) {
- v = and128(v, caseMask);
- }
-
+ const u8 *d = buf + offset;
+ DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
+ size_t l = end - start;
+
+ m128 v = loadu128(d);
+
+ if (noCase) {
+ v = and128(v, caseMask);
+ }
+
u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1),
eq128(mask2, v)));
-
- // mask out where we can't match
- u32 buf_off = start - offset;
- u32 mask = ((1 << l) - 1) << buf_off;
- DEBUG_PRINTF("mask 0x%08x z 0x%08x\n", mask, z);
- z &= mask;
-
- DOUBLE_ZSCAN();
-
- return HWLM_SUCCESS;
-}
-
-static really_inline
+
+ // mask out where we can't match
+ u32 buf_off = start - offset;
+ u32 mask = ((1 << l) - 1) << buf_off;
+ DEBUG_PRINTF("mask 0x%08x z 0x%08x\n", mask, z);
+ z &= mask;
+
+ DOUBLE_ZSCAN();
+
+ return HWLM_SUCCESS;
+}
+
+static really_inline
hwlm_error_t scanSingleFast(const struct noodTable *n, const u8 *buf,
size_t len, bool noCase, m128 caseMask, m128 mask1,
- const struct cb_info *cbi, size_t start,
- size_t end) {
- const u8 *d = buf + start, *e = buf + end;
- assert(d < e);
-
- for (; d < e; d += 16) {
- m128 v = noCase ? and128(load128(d), caseMask) : load128(d);
-
- u32 z = movemask128(eq128(mask1, v));
-
- // On large packet buffers, this prefetch appears to get us about 2%.
- __builtin_prefetch(d + 128);
-
- SINGLE_ZSCAN();
- }
- return HWLM_SUCCESS;
-}
-
-static really_inline
+ const struct cb_info *cbi, size_t start,
+ size_t end) {
+ const u8 *d = buf + start, *e = buf + end;
+ assert(d < e);
+
+ for (; d < e; d += 16) {
+ m128 v = noCase ? and128(load128(d), caseMask) : load128(d);
+
+ u32 z = movemask128(eq128(mask1, v));
+
+ // On large packet buffers, this prefetch appears to get us about 2%.
+ __builtin_prefetch(d + 128);
+
+ SINGLE_ZSCAN();
+ }
+ return HWLM_SUCCESS;
+}
+
+static really_inline
hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf,
size_t len, bool noCase, m128 caseMask, m128 mask1,
m128 mask2, const struct cb_info *cbi, size_t start,
- size_t end) {
- const u8 *d = buf + start, *e = buf + end;
- assert(d < e);
- m128 lastz1 = zeroes128();
-
- for (; d < e; d += 16) {
- m128 v = noCase ? and128(load128(d), caseMask) : load128(d);
- m128 z1 = eq128(mask1, v);
- m128 z2 = eq128(mask2, v);
+ size_t end) {
+ const u8 *d = buf + start, *e = buf + end;
+ assert(d < e);
+ m128 lastz1 = zeroes128();
+
+ for (; d < e; d += 16) {
+ m128 v = noCase ? and128(load128(d), caseMask) : load128(d);
+ m128 z1 = eq128(mask1, v);
+ m128 z2 = eq128(mask2, v);
u32 z = movemask128(and128(palignr(z1, lastz1, 15), z2));
lastz1 = z1;
-
- // On large packet buffers, this prefetch appears to get us about 2%.
- __builtin_prefetch(d + 128);
- DEBUG_PRINTF("z 0x%08x\n", z);
- DOUBLE_ZSCAN();
- }
- return HWLM_SUCCESS;
-}
+
+ // On large packet buffers, this prefetch appears to get us about 2%.
+ __builtin_prefetch(d + 128);
+ DEBUG_PRINTF("z 0x%08x\n", z);
+ DOUBLE_ZSCAN();
+ }
+ return HWLM_SUCCESS;
+}
diff --git a/contrib/libs/hyperscan/src/hwlm/noodle_internal.h b/contrib/libs/hyperscan/src/hwlm/noodle_internal.h
index 2c578c28f3..8f76f177e1 100644
--- a/contrib/libs/hyperscan/src/hwlm/noodle_internal.h
+++ b/contrib/libs/hyperscan/src/hwlm/noodle_internal.h
@@ -1,42 +1,42 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Data structures for Noodle literal matcher engine.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Data structures for Noodle literal matcher engine.
+ */
+
#ifndef NOODLE_INTERNAL_H
#define NOODLE_INTERNAL_H
-
-#include "ue2common.h"
-
-struct noodTable {
- u32 id;
+
+#include "ue2common.h"
+
+struct noodTable {
+ u32 id;
u64a msk;
u64a cmp;
u8 msk_len;
@@ -45,7 +45,7 @@ struct noodTable {
u8 single;
u8 key0;
u8 key1;
-};
-
+};
+
#endif /* NOODLE_INTERNAL_H */
-
+
diff --git a/contrib/libs/hyperscan/src/nfa/accel.c b/contrib/libs/hyperscan/src/nfa/accel.c
index 82e94d40a7..2bc60945f9 100644
--- a/contrib/libs/hyperscan/src/nfa/accel.c
+++ b/contrib/libs/hyperscan/src/nfa/accel.c
@@ -1,86 +1,86 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "accel.h"
-#include "shufti.h"
-#include "truffle.h"
-#include "vermicelli.h"
-#include "ue2common.h"
-
-const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
- assert(ISALIGNED_N(accel, alignof(union AccelAux)));
- const u8 *rv;
-
- switch (accel->accel_type) {
- case ACCEL_NONE:
- DEBUG_PRINTF("accel none %p %p\n", c, c_end);
- return c;
-
- case ACCEL_VERM:
- DEBUG_PRINTF("accel verm %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = vermicelliExec(accel->verm.c, 0, c, c_end);
- break;
-
- case ACCEL_VERM_NOCASE:
- DEBUG_PRINTF("accel verm nc %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = vermicelliExec(accel->verm.c, 1, c, c_end);
- break;
-
- case ACCEL_DVERM:
- DEBUG_PRINTF("accel dverm %p %p\n", c, c_end);
- if (c + 16 + 1 >= c_end) {
- return c;
- }
-
- /* need to stop one early to get an accurate end state */
- rv = vermicelliDoubleExec(accel->dverm.c1, accel->dverm.c2, 0, c,
- c_end - 1);
- break;
-
- case ACCEL_DVERM_NOCASE:
- DEBUG_PRINTF("accel dverm nc %p %p\n", c, c_end);
- if (c + 16 + 1 >= c_end) {
- return c;
- }
-
- /* need to stop one early to get an accurate end state */
- rv = vermicelliDoubleExec(accel->dverm.c1, accel->dverm.c2, 1, c,
- c_end - 1);
- break;
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "accel.h"
+#include "shufti.h"
+#include "truffle.h"
+#include "vermicelli.h"
+#include "ue2common.h"
+
+const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
+ assert(ISALIGNED_N(accel, alignof(union AccelAux)));
+ const u8 *rv;
+
+ switch (accel->accel_type) {
+ case ACCEL_NONE:
+ DEBUG_PRINTF("accel none %p %p\n", c, c_end);
+ return c;
+
+ case ACCEL_VERM:
+ DEBUG_PRINTF("accel verm %p %p\n", c, c_end);
+ if (c + 15 >= c_end) {
+ return c;
+ }
+
+ rv = vermicelliExec(accel->verm.c, 0, c, c_end);
+ break;
+
+ case ACCEL_VERM_NOCASE:
+ DEBUG_PRINTF("accel verm nc %p %p\n", c, c_end);
+ if (c + 15 >= c_end) {
+ return c;
+ }
+
+ rv = vermicelliExec(accel->verm.c, 1, c, c_end);
+ break;
+
+ case ACCEL_DVERM:
+ DEBUG_PRINTF("accel dverm %p %p\n", c, c_end);
+ if (c + 16 + 1 >= c_end) {
+ return c;
+ }
+
+ /* need to stop one early to get an accurate end state */
+ rv = vermicelliDoubleExec(accel->dverm.c1, accel->dverm.c2, 0, c,
+ c_end - 1);
+ break;
+
+ case ACCEL_DVERM_NOCASE:
+ DEBUG_PRINTF("accel dverm nc %p %p\n", c, c_end);
+ if (c + 16 + 1 >= c_end) {
+ return c;
+ }
+
+ /* need to stop one early to get an accurate end state */
+ rv = vermicelliDoubleExec(accel->dverm.c1, accel->dverm.c2, 1, c,
+ c_end - 1);
+ break;
+
case ACCEL_DVERM_MASKED:
DEBUG_PRINTF("accel dverm masked %p %p\n", c, c_end);
if (c + 16 + 1 >= c_end) {
@@ -93,54 +93,54 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
c, c_end - 1);
break;
- case ACCEL_SHUFTI:
- DEBUG_PRINTF("accel shufti %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = shuftiExec(accel->shufti.lo, accel->shufti.hi, c, c_end);
- break;
-
- case ACCEL_TRUFFLE:
- DEBUG_PRINTF("accel Truffle %p %p\n", c, c_end);
- if (c + 15 >= c_end) {
- return c;
- }
-
- rv = truffleExec(accel->truffle.mask1, accel->truffle.mask2, c, c_end);
- break;
-
- case ACCEL_DSHUFTI:
- DEBUG_PRINTF("accel dshufti %p %p\n", c, c_end);
- if (c + 15 + 1 >= c_end) {
- return c;
- }
-
- /* need to stop one early to get an accurate end state */
- rv = shuftiDoubleExec(accel->dshufti.lo1,
- accel->dshufti.hi1,
- accel->dshufti.lo2,
- accel->dshufti.hi2, c, c_end - 1);
- break;
-
- case ACCEL_RED_TAPE:
- DEBUG_PRINTF("accel red tape %p %p\n", c, c_end);
- rv = c_end;
- break;
-
-
- default:
- assert(!"not here");
- return c;
- }
-
- DEBUG_PRINTF("adjusting for offset %u\n", accel->generic.offset);
- /* adjust offset to take into account the offset */
- rv = MAX(c + accel->generic.offset, rv);
- rv -= accel->generic.offset;
-
+ case ACCEL_SHUFTI:
+ DEBUG_PRINTF("accel shufti %p %p\n", c, c_end);
+ if (c + 15 >= c_end) {
+ return c;
+ }
+
+ rv = shuftiExec(accel->shufti.lo, accel->shufti.hi, c, c_end);
+ break;
+
+ case ACCEL_TRUFFLE:
+ DEBUG_PRINTF("accel Truffle %p %p\n", c, c_end);
+ if (c + 15 >= c_end) {
+ return c;
+ }
+
+ rv = truffleExec(accel->truffle.mask1, accel->truffle.mask2, c, c_end);
+ break;
+
+ case ACCEL_DSHUFTI:
+ DEBUG_PRINTF("accel dshufti %p %p\n", c, c_end);
+ if (c + 15 + 1 >= c_end) {
+ return c;
+ }
+
+ /* need to stop one early to get an accurate end state */
+ rv = shuftiDoubleExec(accel->dshufti.lo1,
+ accel->dshufti.hi1,
+ accel->dshufti.lo2,
+ accel->dshufti.hi2, c, c_end - 1);
+ break;
+
+ case ACCEL_RED_TAPE:
+ DEBUG_PRINTF("accel red tape %p %p\n", c, c_end);
+ rv = c_end;
+ break;
+
+
+ default:
+ assert(!"not here");
+ return c;
+ }
+
+ DEBUG_PRINTF("adjusting for offset %u\n", accel->generic.offset);
+ /* adjust offset to take into account the offset */
+ rv = MAX(c + accel->generic.offset, rv);
+ rv -= accel->generic.offset;
+
DEBUG_PRINTF("advanced %zd\n", rv - c);
- return rv;
-}
+ return rv;
+}
diff --git a/contrib/libs/hyperscan/src/nfa/accel.h b/contrib/libs/hyperscan/src/nfa/accel.h
index 71b93f126c..3a03d05967 100644
--- a/contrib/libs/hyperscan/src/nfa/accel.h
+++ b/contrib/libs/hyperscan/src/nfa/accel.h
@@ -1,92 +1,92 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Acceleration: data structures and common definitions.
- */
-
-#ifndef ACCEL_H
-#define ACCEL_H
-
-#include "ue2common.h"
-
-/* run time defs */
-#define BAD_ACCEL_DIST 4
-#define SMALL_ACCEL_PENALTY 8
-#define BIG_ACCEL_PENALTY 32
-
-/// Minimum length of the scan buffer for us to attempt acceleration.
-#define ACCEL_MIN_LEN 16
-
-enum AccelType {
- ACCEL_NONE,
- ACCEL_VERM,
- ACCEL_VERM_NOCASE,
- ACCEL_DVERM,
- ACCEL_DVERM_NOCASE,
- ACCEL_RVERM,
- ACCEL_RVERM_NOCASE,
- ACCEL_RDVERM,
- ACCEL_RDVERM_NOCASE,
- ACCEL_REOD,
- ACCEL_REOD_NOCASE,
- ACCEL_RDEOD,
- ACCEL_RDEOD_NOCASE,
- ACCEL_SHUFTI,
- ACCEL_DSHUFTI,
- ACCEL_TRUFFLE,
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Acceleration: data structures and common definitions.
+ */
+
+#ifndef ACCEL_H
+#define ACCEL_H
+
+#include "ue2common.h"
+
+/* run time defs */
+#define BAD_ACCEL_DIST 4
+#define SMALL_ACCEL_PENALTY 8
+#define BIG_ACCEL_PENALTY 32
+
+/// Minimum length of the scan buffer for us to attempt acceleration.
+#define ACCEL_MIN_LEN 16
+
+enum AccelType {
+ ACCEL_NONE,
+ ACCEL_VERM,
+ ACCEL_VERM_NOCASE,
+ ACCEL_DVERM,
+ ACCEL_DVERM_NOCASE,
+ ACCEL_RVERM,
+ ACCEL_RVERM_NOCASE,
+ ACCEL_RDVERM,
+ ACCEL_RDVERM_NOCASE,
+ ACCEL_REOD,
+ ACCEL_REOD_NOCASE,
+ ACCEL_RDEOD,
+ ACCEL_RDEOD_NOCASE,
+ ACCEL_SHUFTI,
+ ACCEL_DSHUFTI,
+ ACCEL_TRUFFLE,
ACCEL_RED_TAPE,
ACCEL_DVERM_MASKED,
-};
-
-/** \brief Structure for accel framework. */
-union AccelAux {
- u8 accel_type;
- struct {
- u8 accel_type;
- u8 offset;
- } generic;
- struct {
- u8 accel_type;
- u8 offset;
- u8 c; // uppercase if nocase
- } verm;
- struct {
- u8 accel_type;
- u8 offset;
- u8 c1; // uppercase if nocase
- u8 c2; // uppercase if nocase
+};
+
+/** \brief Structure for accel framework. */
+union AccelAux {
+ u8 accel_type;
+ struct {
+ u8 accel_type;
+ u8 offset;
+ } generic;
+ struct {
+ u8 accel_type;
+ u8 offset;
+ u8 c; // uppercase if nocase
+ } verm;
+ struct {
+ u8 accel_type;
+ u8 offset;
+ u8 c1; // uppercase if nocase
+ u8 c2; // uppercase if nocase
u8 m1; // masked variant
u8 m2; // masked variant
- } dverm;
- struct {
- u8 accel_type;
- u8 offset;
+ } dverm;
+ struct {
+ u8 accel_type;
+ u8 offset;
u8 c; // uppercase if nocase
u8 len;
} mverm;
@@ -100,29 +100,29 @@ union AccelAux {
struct {
u8 accel_type;
u8 offset;
- m128 lo;
- m128 hi;
- } shufti;
- struct {
- u8 accel_type;
- u8 offset;
- m128 lo1;
- m128 hi1;
- m128 lo2;
- m128 hi2;
- } dshufti;
- struct {
- u8 accel_type;
- u8 offset;
- m128 mask1;
- m128 mask2;
- } truffle;
-};
-
-/**
- * Runs the specified acceleration scheme between c and c_end, returns a point
- * such that the acceleration scheme does not match before.
- */
-const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end);
-
-#endif
+ m128 lo;
+ m128 hi;
+ } shufti;
+ struct {
+ u8 accel_type;
+ u8 offset;
+ m128 lo1;
+ m128 hi1;
+ m128 lo2;
+ m128 hi2;
+ } dshufti;
+ struct {
+ u8 accel_type;
+ u8 offset;
+ m128 mask1;
+ m128 mask2;
+ } truffle;
+};
+
+/**
+ * Runs the specified acceleration scheme between c and c_end, returns a point
+ * such that the acceleration scheme does not match before.
+ */
+const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end);
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/accelcompile.cpp b/contrib/libs/hyperscan/src/nfa/accelcompile.cpp
index b5e21fc700..a224410dc9 100644
--- a/contrib/libs/hyperscan/src/nfa/accelcompile.cpp
+++ b/contrib/libs/hyperscan/src/nfa/accelcompile.cpp
@@ -1,99 +1,99 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "accel.h"
-#include "accelcompile.h"
-#include "shufticompile.h"
-#include "trufflecompile.h"
-#include "nfagraph/ng_limex_accel.h" /* for constants */
-#include "util/bitutils.h"
-#include "util/verify_types.h"
-
-#include <map>
-#include <set>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-static
-void buildAccelSingle(const AccelInfo &info, AccelAux *aux) {
- assert(aux->accel_type == ACCEL_NONE);
- if (info.single_stops.all()) {
- return;
- }
-
- size_t outs = info.single_stops.count();
- DEBUG_PRINTF("%zu outs\n", outs);
- assert(outs && outs < 256);
- u32 offset = info.single_offset;
-
- if (outs == 1) {
- aux->accel_type = ACCEL_VERM;
- aux->verm.offset = offset;
- aux->verm.c = info.single_stops.find_first();
- DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
- return;
- }
-
- if (outs == 2 && info.single_stops.isCaselessChar()) {
- aux->accel_type = ACCEL_VERM_NOCASE;
- aux->verm.offset = offset;
- aux->verm.c = info.single_stops.find_first() & CASE_CLEAR;
- DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
- aux->verm.c);
- return;
- }
-
- DEBUG_PRINTF("attempting shufti for %zu chars\n", outs);
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "accel.h"
+#include "accelcompile.h"
+#include "shufticompile.h"
+#include "trufflecompile.h"
+#include "nfagraph/ng_limex_accel.h" /* for constants */
+#include "util/bitutils.h"
+#include "util/verify_types.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+static
+void buildAccelSingle(const AccelInfo &info, AccelAux *aux) {
+ assert(aux->accel_type == ACCEL_NONE);
+ if (info.single_stops.all()) {
+ return;
+ }
+
+ size_t outs = info.single_stops.count();
+ DEBUG_PRINTF("%zu outs\n", outs);
+ assert(outs && outs < 256);
+ u32 offset = info.single_offset;
+
+ if (outs == 1) {
+ aux->accel_type = ACCEL_VERM;
+ aux->verm.offset = offset;
+ aux->verm.c = info.single_stops.find_first();
+ DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
+ return;
+ }
+
+ if (outs == 2 && info.single_stops.isCaselessChar()) {
+ aux->accel_type = ACCEL_VERM_NOCASE;
+ aux->verm.offset = offset;
+ aux->verm.c = info.single_stops.find_first() & CASE_CLEAR;
+ DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
+ aux->verm.c);
+ return;
+ }
+
+ DEBUG_PRINTF("attempting shufti for %zu chars\n", outs);
if (-1 != shuftiBuildMasks(info.single_stops, (u8 *)&aux->shufti.lo,
(u8 *)&aux->shufti.hi)) {
- aux->accel_type = ACCEL_SHUFTI;
- aux->shufti.offset = offset;
- DEBUG_PRINTF("shufti built OK\n");
- return;
- } else {
- DEBUG_PRINTF("shufti build failed, falling through\n");
- }
-
- if (outs <= ACCEL_MAX_STOP_CHAR) {
- DEBUG_PRINTF("building Truffle for %zu chars\n", outs);
- aux->accel_type = ACCEL_TRUFFLE;
- aux->truffle.offset = offset;
+ aux->accel_type = ACCEL_SHUFTI;
+ aux->shufti.offset = offset;
+ DEBUG_PRINTF("shufti built OK\n");
+ return;
+ } else {
+ DEBUG_PRINTF("shufti build failed, falling through\n");
+ }
+
+ if (outs <= ACCEL_MAX_STOP_CHAR) {
+ DEBUG_PRINTF("building Truffle for %zu chars\n", outs);
+ aux->accel_type = ACCEL_TRUFFLE;
+ aux->truffle.offset = offset;
truffleBuildMasks(info.single_stops, (u8 *)&aux->truffle.mask1,
(u8 *)&aux->truffle.mask2);
- return;
- }
-
- DEBUG_PRINTF("unable to accelerate case with %zu outs\n", outs);
-}
-
+ return;
+ }
+
+ DEBUG_PRINTF("unable to accelerate case with %zu outs\n", outs);
+}
+
bool buildDvermMask(const flat_set<pair<u8, u8>> &escape_set, u8 *m1_out,
u8 *m2_out) {
u8 a1 = 0xff;
@@ -135,61 +135,61 @@ bool buildDvermMask(const flat_set<pair<u8, u8>> &escape_set, u8 *m1_out,
return true;
}
-static
-bool isCaselessDouble(const flat_set<pair<u8, u8>> &stop) {
- // test for vector containing <A,Z> <A,z> <a,Z> <a,z>
- if (stop.size() != 4) {
- return false;
- }
- const u8 a = stop.begin()->first & CASE_CLEAR;
- const u8 b = stop.begin()->second & CASE_CLEAR;
-
- flat_set<pair<u8, u8>>::const_iterator it, ite;
- for (it = stop.begin(), ite = stop.end(); it != ite; ++it) {
- if ((it->first & CASE_CLEAR) != a || (it->second & CASE_CLEAR) != b) {
- return false;
- }
- }
-
- return true;
-}
-
-static
-void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
- size_t outs1 = info.double_stop1.count();
- size_t outs2 = info.double_stop2.size();
-
- u8 offset = verify_u8(info.double_offset);
- DEBUG_PRINTF("outs1=%zu, outs2=%zu\n", outs1, outs2);
-
- assert(aux->accel_type == ACCEL_NONE);
-
- if (!outs2) {
- /* no double byte accel available */
- return;
- }
-
- // double-byte accel
- if (outs1 == 0 && outs2 == 1) {
- aux->accel_type = ACCEL_DVERM;
- aux->dverm.offset = offset;
- aux->dverm.c1 = info.double_stop2.begin()->first;
- aux->dverm.c2 = info.double_stop2.begin()->second;
- DEBUG_PRINTF("building double-vermicelli caseful for 0x%02hhx%02hhx\n",
- aux->dverm.c1, aux->dverm.c2);
- return;
- }
-
- if (outs1 == 0 && isCaselessDouble(info.double_stop2)) {
- aux->accel_type = ACCEL_DVERM_NOCASE;
- aux->dverm.offset = offset;
- aux->dverm.c1 = info.double_stop2.begin()->first & CASE_CLEAR;
- aux->dverm.c2 = info.double_stop2.begin()->second & CASE_CLEAR;
- DEBUG_PRINTF("building double-vermicelli caseless for 0x%02hhx%02hhx\n",
- aux->dverm.c1, aux->dverm.c2);
- return;
- }
-
+static
+bool isCaselessDouble(const flat_set<pair<u8, u8>> &stop) {
+ // test for vector containing <A,Z> <A,z> <a,Z> <a,z>
+ if (stop.size() != 4) {
+ return false;
+ }
+ const u8 a = stop.begin()->first & CASE_CLEAR;
+ const u8 b = stop.begin()->second & CASE_CLEAR;
+
+ flat_set<pair<u8, u8>>::const_iterator it, ite;
+ for (it = stop.begin(), ite = stop.end(); it != ite; ++it) {
+ if ((it->first & CASE_CLEAR) != a || (it->second & CASE_CLEAR) != b) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static
+void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
+ size_t outs1 = info.double_stop1.count();
+ size_t outs2 = info.double_stop2.size();
+
+ u8 offset = verify_u8(info.double_offset);
+ DEBUG_PRINTF("outs1=%zu, outs2=%zu\n", outs1, outs2);
+
+ assert(aux->accel_type == ACCEL_NONE);
+
+ if (!outs2) {
+ /* no double byte accel available */
+ return;
+ }
+
+ // double-byte accel
+ if (outs1 == 0 && outs2 == 1) {
+ aux->accel_type = ACCEL_DVERM;
+ aux->dverm.offset = offset;
+ aux->dverm.c1 = info.double_stop2.begin()->first;
+ aux->dverm.c2 = info.double_stop2.begin()->second;
+ DEBUG_PRINTF("building double-vermicelli caseful for 0x%02hhx%02hhx\n",
+ aux->dverm.c1, aux->dverm.c2);
+ return;
+ }
+
+ if (outs1 == 0 && isCaselessDouble(info.double_stop2)) {
+ aux->accel_type = ACCEL_DVERM_NOCASE;
+ aux->dverm.offset = offset;
+ aux->dverm.c1 = info.double_stop2.begin()->first & CASE_CLEAR;
+ aux->dverm.c2 = info.double_stop2.begin()->second & CASE_CLEAR;
+ DEBUG_PRINTF("building double-vermicelli caseless for 0x%02hhx%02hhx\n",
+ aux->dverm.c1, aux->dverm.c2);
+ return;
+ }
+
if (outs1 == 0) {
u8 m1;
u8 m2;
@@ -203,10 +203,10 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
aux->dverm.m2 = m2;
DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
aux->dverm.c1, aux->dverm.c2);
- return;
- }
- }
-
+ return;
+ }
+ }
+
if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438.
DEBUG_PRINTF("building double-shufti for %zu one-byte and %zu"
" two-byte literals\n", outs1, outs2);
@@ -220,29 +220,29 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
}
}
- // drop back to attempt single-byte accel
- DEBUG_PRINTF("dropping back to single-byte acceleration\n");
- aux->accel_type = ACCEL_NONE;
-}
-
-bool buildAccelAux(const AccelInfo &info, AccelAux *aux) {
- assert(aux->accel_type == ACCEL_NONE);
- if (info.single_stops.none()) {
- DEBUG_PRINTF("picked red tape\n");
- aux->accel_type = ACCEL_RED_TAPE;
- aux->generic.offset = info.single_offset;
+ // drop back to attempt single-byte accel
+ DEBUG_PRINTF("dropping back to single-byte acceleration\n");
+ aux->accel_type = ACCEL_NONE;
+}
+
+bool buildAccelAux(const AccelInfo &info, AccelAux *aux) {
+ assert(aux->accel_type == ACCEL_NONE);
+ if (info.single_stops.none()) {
+ DEBUG_PRINTF("picked red tape\n");
+ aux->accel_type = ACCEL_RED_TAPE;
+ aux->generic.offset = info.single_offset;
}
if (aux->accel_type == ACCEL_NONE) {
- buildAccelDouble(info, aux);
- }
- if (aux->accel_type == ACCEL_NONE) {
- buildAccelSingle(info, aux);
- }
-
- assert(aux->accel_type == ACCEL_NONE
- || aux->generic.offset == info.single_offset
- || aux->generic.offset == info.double_offset);
- return aux->accel_type != ACCEL_NONE;
-}
-
-} // namespace ue2
+ buildAccelDouble(info, aux);
+ }
+ if (aux->accel_type == ACCEL_NONE) {
+ buildAccelSingle(info, aux);
+ }
+
+ assert(aux->accel_type == ACCEL_NONE
+ || aux->generic.offset == info.single_offset
+ || aux->generic.offset == info.double_offset);
+ return aux->accel_type != ACCEL_NONE;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfa/accelcompile.h b/contrib/libs/hyperscan/src/nfa/accelcompile.h
index 926d25bb92..d0b3cdc74f 100644
--- a/contrib/libs/hyperscan/src/nfa/accelcompile.h
+++ b/contrib/libs/hyperscan/src/nfa/accelcompile.h
@@ -1,60 +1,60 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ACCEL_COMPILE_H
-#define ACCEL_COMPILE_H
-
-#include "ue2common.h"
-#include "util/charreach.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ACCEL_COMPILE_H
+#define ACCEL_COMPILE_H
+
+#include "ue2common.h"
+#include "util/charreach.h"
#include "util/flat_containers.h"
-
-union AccelAux;
-
-namespace ue2 {
-
-struct AccelInfo {
- AccelInfo() : single_offset(0U), double_offset(0U),
- single_stops(CharReach::dot()) {}
- u32 single_offset; /**< offset correction to apply to single schemes */
- u32 double_offset; /**< offset correction to apply to double schemes */
- CharReach double_stop1; /**< single-byte accel stop literals for double
- * schemes */
- flat_set<std::pair<u8, u8>> double_stop2; /**< double-byte accel stop
- * literals */
- CharReach single_stops; /**< escapes for single byte acceleration */
-};
-
-bool buildAccelAux(const AccelInfo &info, AccelAux *aux);
-
+
+union AccelAux;
+
+namespace ue2 {
+
+struct AccelInfo {
+ AccelInfo() : single_offset(0U), double_offset(0U),
+ single_stops(CharReach::dot()) {}
+ u32 single_offset; /**< offset correction to apply to single schemes */
+ u32 double_offset; /**< offset correction to apply to double schemes */
+ CharReach double_stop1; /**< single-byte accel stop literals for double
+ * schemes */
+ flat_set<std::pair<u8, u8>> double_stop2; /**< double-byte accel stop
+ * literals */
+ CharReach single_stops; /**< escapes for single byte acceleration */
+};
+
+bool buildAccelAux(const AccelInfo &info, AccelAux *aux);
+
/* returns true is the escape set can be handled with a masked double_verm */
bool buildDvermMask(const flat_set<std::pair<u8, u8>> &escape_set,
u8 *m1_out = nullptr, u8 *m2_out = nullptr);
-} // namespace ue2
-
-#endif
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/callback.h b/contrib/libs/hyperscan/src/nfa/callback.h
index aa025a9b78..9bdaa8d141 100644
--- a/contrib/libs/hyperscan/src/nfa/callback.h
+++ b/contrib/libs/hyperscan/src/nfa/callback.h
@@ -1,49 +1,49 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief NFA Callback definitions, used at runtime.
- */
-
-#ifndef NFA_CALLBACK_H
-#define NFA_CALLBACK_H
-
-#include "ue2common.h"
-
-/** \brief The type for an NFA callback.
- *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief NFA Callback definitions, used at runtime.
+ */
+
+#ifndef NFA_CALLBACK_H
+#define NFA_CALLBACK_H
+
+#include "ue2common.h"
+
+/** \brief The type for an NFA callback.
+ *
* This is a function that takes as arguments the current start and end offsets
* where the match occurs, the id of the match and the context pointer that was
* passed into the NFA API function that executed the NFA.
- *
+ *
* The start offset is the "start of match" (SOM) offset for the match. It is
* only provided by engines that natively support SOM tracking (e.g. Gough).
- *
+ *
* The end offset will be the offset after the character that caused the match.
* Thus, if we have a buffer containing 'abc', then a pattern that matches an
* empty string will have an offset of 0, a pattern that matches 'a' will have
@@ -52,21 +52,21 @@
* we have n characters in the buffer, there are n+1 different potential
* offsets for matches.
*
- * This function should return an int - currently the possible return values
- * are 0, which means 'stop running the engine' or non-zero, which means
- * 'continue matching'.
- */
+ * This function should return an int - currently the possible return values
+ * are 0, which means 'stop running the engine' or non-zero, which means
+ * 'continue matching'.
+ */
typedef int (*NfaCallback)(u64a start, u64a end, ReportID id, void *context);
-
-/**
- * standard \ref NfaCallback return value indicating that engine execution
- * should continue. (any non-zero value will serve this purpose)
- */
-#define MO_CONTINUE_MATCHING 1
-
-/**
- * \ref NfaCallback return value indicating that engine execution should halt.
- */
-#define MO_HALT_MATCHING 0
-
-#endif // NFA_CALLBACK_H
+
+/**
+ * standard \ref NfaCallback return value indicating that engine execution
+ * should continue. (any non-zero value will serve this purpose)
+ */
+#define MO_CONTINUE_MATCHING 1
+
+/**
+ * \ref NfaCallback return value indicating that engine execution should halt.
+ */
+#define MO_HALT_MATCHING 0
+
+#endif // NFA_CALLBACK_H
diff --git a/contrib/libs/hyperscan/src/nfa/castle.c b/contrib/libs/hyperscan/src/nfa/castle.c
index 8cdef7264b..7c158b31c0 100644
--- a/contrib/libs/hyperscan/src/nfa/castle.c
+++ b/contrib/libs/hyperscan/src/nfa/castle.c
@@ -1,117 +1,117 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Castle: multi-tenant repeat engine, runtime code.
- */
-
-#include "castle.h"
-
-#include "castle_internal.h"
-#include "nfa_api.h"
-#include "nfa_api_queue.h"
-#include "nfa_internal.h"
-#include "repeat.h"
-#include "shufti.h"
-#include "truffle.h"
-#include "vermicelli.h"
-#include "util/bitutils.h"
-#include "util/multibit.h"
-#include "util/partial_store.h"
-#include "ue2common.h"
-
-static really_inline
-const struct SubCastle *getSubCastle(const struct Castle *c, u32 num) {
- assert(num < c->numRepeats);
- const struct SubCastle *sub =
- (const struct SubCastle *)((const char *)c + sizeof(struct Castle));
- assert(ISALIGNED(sub));
- return &sub[num];
-}
-
-static really_inline
-const struct RepeatInfo *getRepeatInfo(const struct SubCastle *sub) {
- const struct RepeatInfo *repeatInfo =
- (const struct RepeatInfo *)((const char *)sub + sub->repeatInfoOffset);
- return repeatInfo;
-}
-
-static really_inline
-union RepeatControl *getControl(char *full_state, const struct SubCastle *sub) {
- union RepeatControl *rctrl =
- (union RepeatControl *)(full_state + sub->fullStateOffset);
- assert(ISALIGNED(rctrl));
- return rctrl;
-}
-
-static really_inline
-const union RepeatControl *getControlConst(const char *full_state,
- const struct SubCastle *sub) {
- const union RepeatControl *rctrl =
- (const union RepeatControl *)(full_state + sub->fullStateOffset);
- assert(ISALIGNED(rctrl));
- return rctrl;
-}
-
-enum MatchMode {
- CALLBACK_OUTPUT,
- STOP_AT_MATCH,
-};
-
-static really_inline
-char subCastleReportCurrent(const struct Castle *c, struct mq *q,
- const u64a offset, const u32 subIdx) {
- const struct SubCastle *sub = getSubCastle(c, subIdx);
- const struct RepeatInfo *info = getRepeatInfo(sub);
-
- union RepeatControl *rctrl = getControl(q->state, sub);
- char *rstate = (char *)q->streamState + sub->streamStateOffset +
- info->packedCtrlSize;
- enum RepeatMatch match =
- repeatHasMatch(info, rctrl, rstate, offset);
- DEBUG_PRINTF("repeatHasMatch returned %d\n", match);
- if (match == REPEAT_MATCH) {
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Castle: multi-tenant repeat engine, runtime code.
+ */
+
+#include "castle.h"
+
+#include "castle_internal.h"
+#include "nfa_api.h"
+#include "nfa_api_queue.h"
+#include "nfa_internal.h"
+#include "repeat.h"
+#include "shufti.h"
+#include "truffle.h"
+#include "vermicelli.h"
+#include "util/bitutils.h"
+#include "util/multibit.h"
+#include "util/partial_store.h"
+#include "ue2common.h"
+
+static really_inline
+const struct SubCastle *getSubCastle(const struct Castle *c, u32 num) {
+ assert(num < c->numRepeats);
+ const struct SubCastle *sub =
+ (const struct SubCastle *)((const char *)c + sizeof(struct Castle));
+ assert(ISALIGNED(sub));
+ return &sub[num];
+}
+
+static really_inline
+const struct RepeatInfo *getRepeatInfo(const struct SubCastle *sub) {
+ const struct RepeatInfo *repeatInfo =
+ (const struct RepeatInfo *)((const char *)sub + sub->repeatInfoOffset);
+ return repeatInfo;
+}
+
+static really_inline
+union RepeatControl *getControl(char *full_state, const struct SubCastle *sub) {
+ union RepeatControl *rctrl =
+ (union RepeatControl *)(full_state + sub->fullStateOffset);
+ assert(ISALIGNED(rctrl));
+ return rctrl;
+}
+
+static really_inline
+const union RepeatControl *getControlConst(const char *full_state,
+ const struct SubCastle *sub) {
+ const union RepeatControl *rctrl =
+ (const union RepeatControl *)(full_state + sub->fullStateOffset);
+ assert(ISALIGNED(rctrl));
+ return rctrl;
+}
+
+enum MatchMode {
+ CALLBACK_OUTPUT,
+ STOP_AT_MATCH,
+};
+
+static really_inline
+char subCastleReportCurrent(const struct Castle *c, struct mq *q,
+ const u64a offset, const u32 subIdx) {
+ const struct SubCastle *sub = getSubCastle(c, subIdx);
+ const struct RepeatInfo *info = getRepeatInfo(sub);
+
+ union RepeatControl *rctrl = getControl(q->state, sub);
+ char *rstate = (char *)q->streamState + sub->streamStateOffset +
+ info->packedCtrlSize;
+ enum RepeatMatch match =
+ repeatHasMatch(info, rctrl, rstate, offset);
+ DEBUG_PRINTF("repeatHasMatch returned %d\n", match);
+ if (match == REPEAT_MATCH) {
DEBUG_PRINTF("firing match at %llu for sub %u, report %u\n", offset,
subIdx, sub->report);
if (q->cb(0, offset, sub->report, q->context) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
-
- return MO_CONTINUE_MATCHING;
-}
-
-static really_inline
-int castleReportCurrent(const struct Castle *c, struct mq *q) {
- const u64a offset = q_cur_offset(q);
- DEBUG_PRINTF("offset=%llu\n", offset);
-
- if (c->exclusive) {
+ return MO_HALT_MATCHING;
+ }
+ }
+
+ return MO_CONTINUE_MATCHING;
+}
+
+static really_inline
+int castleReportCurrent(const struct Castle *c, struct mq *q) {
+ const u64a offset = q_cur_offset(q);
+ DEBUG_PRINTF("offset=%llu\n", offset);
+
+ if (c->exclusive) {
u8 *active = (u8 *)q->streamState;
u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
@@ -123,131 +123,131 @@ int castleReportCurrent(const struct Castle *c, struct mq *q) {
offset, activeIdx) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
- }
- }
-
+ }
+ }
+
if (c->exclusive != PURE_EXCLUSIVE) {
const u8 *active = (const u8 *)q->streamState + c->activeOffset;
- for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
- i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
- DEBUG_PRINTF("subcastle %u\n", i);
- if (subCastleReportCurrent(c, q, offset, i) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
- }
-
- return MO_CONTINUE_MATCHING;
-}
-
-static really_inline
-char subCastleInAccept(const struct Castle *c, struct mq *q,
- const ReportID report, const u64a offset,
- const u32 subIdx) {
- const struct SubCastle *sub = getSubCastle(c, subIdx);
-
- if (sub->report != report) {
- return 0;
- }
- const struct RepeatInfo *info = getRepeatInfo(sub);
-
- union RepeatControl *rctrl = getControl(q->state, sub);
- char *rstate = (char *)q->streamState + sub->streamStateOffset +
- info->packedCtrlSize;
- enum RepeatMatch match =
- repeatHasMatch(info, rctrl, rstate, offset);
- if (match == REPEAT_MATCH) {
- DEBUG_PRINTF("in an accept\n");
- return 1;
- }
-
- return 0;
-}
-
-static really_inline
-char castleInAccept(const struct Castle *c, struct mq *q,
- const ReportID report, const u64a offset) {
- DEBUG_PRINTF("offset=%llu\n", offset);
+ for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
+ i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
+ DEBUG_PRINTF("subcastle %u\n", i);
+ if (subCastleReportCurrent(c, q, offset, i) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+ }
+
+ return MO_CONTINUE_MATCHING;
+}
+
+static really_inline
+char subCastleInAccept(const struct Castle *c, struct mq *q,
+ const ReportID report, const u64a offset,
+ const u32 subIdx) {
+ const struct SubCastle *sub = getSubCastle(c, subIdx);
+
+ if (sub->report != report) {
+ return 0;
+ }
+ const struct RepeatInfo *info = getRepeatInfo(sub);
+
+ union RepeatControl *rctrl = getControl(q->state, sub);
+ char *rstate = (char *)q->streamState + sub->streamStateOffset +
+ info->packedCtrlSize;
+ enum RepeatMatch match =
+ repeatHasMatch(info, rctrl, rstate, offset);
+ if (match == REPEAT_MATCH) {
+ DEBUG_PRINTF("in an accept\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static really_inline
+char castleInAccept(const struct Castle *c, struct mq *q,
+ const ReportID report, const u64a offset) {
+ DEBUG_PRINTF("offset=%llu\n", offset);
/* ignore when just catching up due to full queue */
if (report == MO_INVALID_IDX) {
return 0;
}
-
- if (c->exclusive) {
+
+ if (c->exclusive) {
u8 *active = (u8 *)q->streamState;
u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
- DEBUG_PRINTF("subcastle %u\n", activeIdx);
- if (subCastleInAccept(c, q, report, offset, activeIdx)) {
- return 1;
- }
- }
- }
-
+ DEBUG_PRINTF("subcastle %u\n", activeIdx);
+ if (subCastleInAccept(c, q, report, offset, activeIdx)) {
+ return 1;
+ }
+ }
+ }
+
if (c->exclusive != PURE_EXCLUSIVE) {
const u8 *active = (const u8 *)q->streamState + c->activeOffset;
- for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
+ for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
- DEBUG_PRINTF("subcastle %u\n", i);
- if (subCastleInAccept(c, q, report, offset, i)) {
- return 1;
- }
- }
- }
-
- return 0;
-}
-
-static really_inline
-void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
- void *full_state, void *stream_state,
- const u32 subIdx) {
- const struct SubCastle *sub = getSubCastle(c, subIdx);
- const struct RepeatInfo *info = getRepeatInfo(sub);
-
- union RepeatControl *rctrl = getControl(full_state, sub);
- char *rstate = (char *)stream_state + sub->streamStateOffset +
- info->packedCtrlSize;
-
- if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) {
- DEBUG_PRINTF("sub %u is stale at offset %llu\n", subIdx, offset);
+ DEBUG_PRINTF("subcastle %u\n", i);
+ if (subCastleInAccept(c, q, report, offset, i)) {
+ return 1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static really_inline
+void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
+ void *full_state, void *stream_state,
+ const u32 subIdx) {
+ const struct SubCastle *sub = getSubCastle(c, subIdx);
+ const struct RepeatInfo *info = getRepeatInfo(sub);
+
+ union RepeatControl *rctrl = getControl(full_state, sub);
+ char *rstate = (char *)stream_state + sub->streamStateOffset +
+ info->packedCtrlSize;
+
+ if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) {
+ DEBUG_PRINTF("sub %u is stale at offset %llu\n", subIdx, offset);
if (sub->exclusiveId < c->numRepeats) {
u8 *active = (u8 *)stream_state;
u8 *groups = active + c->groupIterOffset;
mmbit_unset(groups, c->numGroups, sub->exclusiveId);
- } else {
+ } else {
u8 *active = (u8 *)stream_state + c->activeOffset;
mmbit_unset(active, c->numRepeats, subIdx);
- }
- }
-}
-
-static really_inline
-void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
- void *full_state, void *stream_state) {
- DEBUG_PRINTF("offset=%llu\n", offset);
-
+ }
+ }
+}
+
+static really_inline
+void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
+ void *full_state, void *stream_state) {
+ DEBUG_PRINTF("offset=%llu\n", offset);
+
if (!c->staleIterOffset) {
DEBUG_PRINTF("{no repeats can go stale}\n");
return; /* no subcastle can ever go stale */
}
- if (c->exclusive) {
+ if (c->exclusive) {
u8 *active = (u8 *)stream_state;
u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
- DEBUG_PRINTF("subcastle %u\n", activeIdx);
- subCastleDeactivateStaleSubs(c, offset, full_state,
- stream_state, activeIdx);
- }
- }
-
+ DEBUG_PRINTF("subcastle %u\n", activeIdx);
+ subCastleDeactivateStaleSubs(c, offset, full_state,
+ stream_state, activeIdx);
+ }
+ }
+
if (c->exclusive != PURE_EXCLUSIVE) {
const u8 *active = (const u8 *)stream_state + c->activeOffset;
const struct mmbit_sparse_iter *it
@@ -259,27 +259,27 @@ void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
u32 i = mmbit_sparse_iter_begin(active, numRepeats, &idx, it, si_state);
while(i != MMB_INVALID) {
- DEBUG_PRINTF("subcastle %u\n", i);
+ DEBUG_PRINTF("subcastle %u\n", i);
subCastleDeactivateStaleSubs(c, offset, full_state, stream_state, i);
i = mmbit_sparse_iter_next(active, numRepeats, i, &idx, it,
si_state);
- }
- }
-}
-
-static really_inline
-void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset,
+ }
+ }
+}
+
+static really_inline
+void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset,
void *full_state, void *stream_state,
UNUSED char stale_checked) {
- assert(top < c->numRepeats);
-
- const struct SubCastle *sub = getSubCastle(c, top);
- const struct RepeatInfo *info = getRepeatInfo(sub);
- union RepeatControl *rctrl = getControl(full_state, sub);
- char *rstate = (char *)stream_state + sub->streamStateOffset +
- info->packedCtrlSize;
-
- char is_alive = 0;
+ assert(top < c->numRepeats);
+
+ const struct SubCastle *sub = getSubCastle(c, top);
+ const struct RepeatInfo *info = getRepeatInfo(sub);
+ union RepeatControl *rctrl = getControl(full_state, sub);
+ char *rstate = (char *)stream_state + sub->streamStateOffset +
+ info->packedCtrlSize;
+
+ char is_alive = 0;
u8 *active = (u8 *)stream_state;
if (sub->exclusiveId < c->numRepeats) {
u8 *groups = active + c->groupIterOffset;
@@ -292,125 +292,125 @@ void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset,
if (!is_alive) {
partial_store_u32(active, top, c->activeIdxSize);
}
- } else {
+ } else {
active += c->activeOffset;
- is_alive = mmbit_set(active, c->numRepeats, top);
- }
-
- if (!is_alive) {
- DEBUG_PRINTF("first top for inactive repeat %u\n", top);
- } else {
- DEBUG_PRINTF("repeat %u is already alive\n", top);
- // Caller should ensure we're not stale.
+ is_alive = mmbit_set(active, c->numRepeats, top);
+ }
+
+ if (!is_alive) {
+ DEBUG_PRINTF("first top for inactive repeat %u\n", top);
+ } else {
+ DEBUG_PRINTF("repeat %u is already alive\n", top);
+ // Caller should ensure we're not stale.
assert(!stale_checked
|| repeatHasMatch(info, rctrl, rstate, offset) != REPEAT_STALE);
-
- // Ignore duplicate top events.
- u64a last = repeatLastTop(info, rctrl, rstate);
-
- assert(last <= offset);
- if (last == offset) {
- DEBUG_PRINTF("dupe top at %llu\n", offset);
- return;
- }
- }
-
- repeatStore(info, rctrl, rstate, offset, is_alive);
-}
-
-static really_inline
-void subCastleFindMatch(const struct Castle *c, const u64a begin,
- const u64a end, void *full_state, void *stream_state,
- size_t *mloc, char *found, const u32 subIdx) {
- const struct SubCastle *sub = getSubCastle(c, subIdx);
- const struct RepeatInfo *info = getRepeatInfo(sub);
- union RepeatControl *rctrl = getControl(full_state, sub);
- char *rstate = (char *)stream_state + sub->streamStateOffset +
- info->packedCtrlSize;
-
- u64a match = repeatNextMatch(info, rctrl, rstate, begin);
- if (match == 0) {
- DEBUG_PRINTF("no more matches for sub %u\n", subIdx);
+
+ // Ignore duplicate top events.
+ u64a last = repeatLastTop(info, rctrl, rstate);
+
+ assert(last <= offset);
+ if (last == offset) {
+ DEBUG_PRINTF("dupe top at %llu\n", offset);
+ return;
+ }
+ }
+
+ repeatStore(info, rctrl, rstate, offset, is_alive);
+}
+
+static really_inline
+void subCastleFindMatch(const struct Castle *c, const u64a begin,
+ const u64a end, void *full_state, void *stream_state,
+ size_t *mloc, char *found, const u32 subIdx) {
+ const struct SubCastle *sub = getSubCastle(c, subIdx);
+ const struct RepeatInfo *info = getRepeatInfo(sub);
+ union RepeatControl *rctrl = getControl(full_state, sub);
+ char *rstate = (char *)stream_state + sub->streamStateOffset +
+ info->packedCtrlSize;
+
+ u64a match = repeatNextMatch(info, rctrl, rstate, begin);
+ if (match == 0) {
+ DEBUG_PRINTF("no more matches for sub %u\n", subIdx);
if (sub->exclusiveId < c->numRepeats) {
u8 *groups = (u8 *)stream_state + c->groupIterOffset;
mmbit_unset(groups, c->numGroups, sub->exclusiveId);
- } else {
+ } else {
u8 *active = (u8 *)stream_state + c->activeOffset;
- mmbit_unset(active, c->numRepeats, subIdx);
- }
- return;
- } else if (match > end) {
- DEBUG_PRINTF("next match for sub %u at %llu is > horizon\n", subIdx,
- match);
- return;
- }
- DEBUG_PRINTF("sub %u earliest match at %llu\n", subIdx, match);
- size_t diff = match - begin;
- if (!(*found) || diff < *mloc) {
- *mloc = diff;
- DEBUG_PRINTF("mloc=%zu\n", *mloc);
- }
- *found = 1;
-}
-
-static really_inline
-char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end,
- void *full_state, void *stream_state, size_t *mloc) {
- DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end);
- assert(begin <= end);
-
- if (begin == end) {
- DEBUG_PRINTF("no work to do\n");
- return 0;
- }
-
- char found = 0;
- *mloc = 0;
-
- if (c->exclusive) {
+ mmbit_unset(active, c->numRepeats, subIdx);
+ }
+ return;
+ } else if (match > end) {
+ DEBUG_PRINTF("next match for sub %u at %llu is > horizon\n", subIdx,
+ match);
+ return;
+ }
+ DEBUG_PRINTF("sub %u earliest match at %llu\n", subIdx, match);
+ size_t diff = match - begin;
+ if (!(*found) || diff < *mloc) {
+ *mloc = diff;
+ DEBUG_PRINTF("mloc=%zu\n", *mloc);
+ }
+ *found = 1;
+}
+
+static really_inline
+char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end,
+ void *full_state, void *stream_state, size_t *mloc) {
+ DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end);
+ assert(begin <= end);
+
+ if (begin == end) {
+ DEBUG_PRINTF("no work to do\n");
+ return 0;
+ }
+
+ char found = 0;
+ *mloc = 0;
+
+ if (c->exclusive) {
u8 *active = (u8 *)stream_state;
u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
- DEBUG_PRINTF("subcastle %u\n", activeIdx);
- subCastleFindMatch(c, begin, end, full_state, stream_state, mloc,
- &found, activeIdx);
- }
- }
-
+ DEBUG_PRINTF("subcastle %u\n", activeIdx);
+ subCastleFindMatch(c, begin, end, full_state, stream_state, mloc,
+ &found, activeIdx);
+ }
+ }
+
if (c->exclusive != PURE_EXCLUSIVE) {
u8 *active = (u8 *)stream_state + c->activeOffset;
- for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
- i != MMB_INVALID;
- i = mmbit_iterate(active, c->numRepeats, i)) {
- DEBUG_PRINTF("subcastle %u\n", i);
- subCastleFindMatch(c, begin, end, full_state, stream_state, mloc,
- &found, i);
- }
- }
-
- return found;
-}
-
-static really_inline
-u64a subCastleNextMatch(const struct Castle *c, void *full_state,
- void *stream_state, const u64a loc,
- const u32 subIdx) {
- DEBUG_PRINTF("subcastle %u\n", subIdx);
- const struct SubCastle *sub = getSubCastle(c, subIdx);
- const struct RepeatInfo *info = getRepeatInfo(sub);
- const union RepeatControl *rctrl =
- getControlConst(full_state, sub);
- const char *rstate = (const char *)stream_state +
- sub->streamStateOffset +
- info->packedCtrlSize;
-
- return repeatNextMatch(info, rctrl, rstate, loc);
-}
-
-static really_inline
+ for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
+ i != MMB_INVALID;
+ i = mmbit_iterate(active, c->numRepeats, i)) {
+ DEBUG_PRINTF("subcastle %u\n", i);
+ subCastleFindMatch(c, begin, end, full_state, stream_state, mloc,
+ &found, i);
+ }
+ }
+
+ return found;
+}
+
+static really_inline
+u64a subCastleNextMatch(const struct Castle *c, void *full_state,
+ void *stream_state, const u64a loc,
+ const u32 subIdx) {
+ DEBUG_PRINTF("subcastle %u\n", subIdx);
+ const struct SubCastle *sub = getSubCastle(c, subIdx);
+ const struct RepeatInfo *info = getRepeatInfo(sub);
+ const union RepeatControl *rctrl =
+ getControlConst(full_state, sub);
+ const char *rstate = (const char *)stream_state +
+ sub->streamStateOffset +
+ info->packedCtrlSize;
+
+ return repeatNextMatch(info, rctrl, rstate, loc);
+}
+
+static really_inline
void set_matching(const struct Castle *c, const u64a match, u8 *active,
u8 *matching, const u32 active_size, const u32 active_id,
const u32 matching_id, u64a *offset, const u64a end) {
@@ -432,190 +432,190 @@ void set_matching(const struct Castle *c, const u64a match, u8 *active,
}
static really_inline
-void subCastleMatchLoop(const struct Castle *c, void *full_state,
- void *stream_state, const u64a end,
- const u64a loc, u64a *offset) {
+void subCastleMatchLoop(const struct Castle *c, void *full_state,
+ void *stream_state, const u64a end,
+ const u64a loc, u64a *offset) {
u8 *active = (u8 *)stream_state + c->activeOffset;
- u8 *matching = full_state;
- for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
- i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
- u64a match = subCastleNextMatch(c, full_state, stream_state, loc, i);
+ u8 *matching = full_state;
+ for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
+ i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
+ u64a match = subCastleNextMatch(c, full_state, stream_state, loc, i);
set_matching(c, match, active, matching, c->numRepeats, i,
i, offset, end);
- }
-}
-
-static really_inline
-char subCastleFireMatch(const struct Castle *c, const void *full_state,
- UNUSED const void *stream_state, NfaCallback cb,
- void *ctx, const u64a offset) {
- const u8 *matching = full_state;
-
- // Fire all matching sub-castles at this offset.
- for (u32 i = mmbit_iterate(matching, c->numRepeats, MMB_INVALID);
- i != MMB_INVALID;
- i = mmbit_iterate(matching, c->numRepeats, i)) {
- const struct SubCastle *sub = getSubCastle(c, i);
- DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, i);
+ }
+}
+
+static really_inline
+char subCastleFireMatch(const struct Castle *c, const void *full_state,
+ UNUSED const void *stream_state, NfaCallback cb,
+ void *ctx, const u64a offset) {
+ const u8 *matching = full_state;
+
+ // Fire all matching sub-castles at this offset.
+ for (u32 i = mmbit_iterate(matching, c->numRepeats, MMB_INVALID);
+ i != MMB_INVALID;
+ i = mmbit_iterate(matching, c->numRepeats, i)) {
+ const struct SubCastle *sub = getSubCastle(c, i);
+ DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, i);
if (cb(0, offset, sub->report, ctx) == MO_HALT_MATCHING) {
- DEBUG_PRINTF("caller told us to halt\n");
- return MO_HALT_MATCHING;
- }
- }
-
- return MO_CONTINUE_MATCHING;
-}
-
-static really_inline
-char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end,
- void *full_state, void *stream_state, NfaCallback cb,
- void *ctx) {
- DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end);
- assert(begin <= end);
-
- u8 *matching = full_state; // temp multibit
-
- u64a loc = begin;
- while (loc < end) {
-
- // Find minimum next offset for the next match(es) from amongst our
- // active sub-castles, and store the indices of the sub-castles that
- // match at that offset in the 'matching' mmbit, which is in the
- // full_state (scratch).
-
- u64a offset = end; // min offset of next match
- u32 activeIdx = 0;
+ DEBUG_PRINTF("caller told us to halt\n");
+ return MO_HALT_MATCHING;
+ }
+ }
+
+ return MO_CONTINUE_MATCHING;
+}
+
+static really_inline
+char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end,
+ void *full_state, void *stream_state, NfaCallback cb,
+ void *ctx) {
+ DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end);
+ assert(begin <= end);
+
+ u8 *matching = full_state; // temp multibit
+
+ u64a loc = begin;
+ while (loc < end) {
+
+ // Find minimum next offset for the next match(es) from amongst our
+ // active sub-castles, and store the indices of the sub-castles that
+ // match at that offset in the 'matching' mmbit, which is in the
+ // full_state (scratch).
+
+ u64a offset = end; // min offset of next match
+ u32 activeIdx = 0;
mmbit_clear(matching, c->numRepeats);
- if (c->exclusive) {
+ if (c->exclusive) {
u8 *active = (u8 *)stream_state;
u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
activeIdx = partial_load_u32(cur, c->activeIdxSize);
- u64a match = subCastleNextMatch(c, full_state, stream_state,
+ u64a match = subCastleNextMatch(c, full_state, stream_state,
loc, activeIdx);
set_matching(c, match, groups, matching, c->numGroups, i,
activeIdx, &offset, end);
- }
- }
-
+ }
+ }
+
if (c->exclusive != PURE_EXCLUSIVE) {
- subCastleMatchLoop(c, full_state, stream_state,
+ subCastleMatchLoop(c, full_state, stream_state,
end, loc, &offset);
- }
+ }
DEBUG_PRINTF("offset=%llu\n", offset);
if (!mmbit_any(matching, c->numRepeats)) {
DEBUG_PRINTF("no more matches\n");
- break;
- }
+ break;
+ }
if (subCastleFireMatch(c, full_state, stream_state,
cb, ctx, offset) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
loc = offset;
- }
-
- return MO_CONTINUE_MATCHING;
-}
-
-static really_inline
-char castleScanVerm(const struct Castle *c, const u8 *buf, const size_t begin,
- const size_t end, size_t *loc) {
- const u8 *ptr = vermicelliExec(c->u.verm.c, 0, buf + begin, buf + end);
- if (ptr == buf + end) {
- DEBUG_PRINTF("no escape found\n");
- return 0;
- }
-
- assert(loc);
- assert(ptr >= buf && ptr < buf + end);
- *loc = (size_t)(ptr - buf);
- DEBUG_PRINTF("escape found at offset %zu\n", *loc);
- return 1;
-}
-
-static really_inline
-char castleScanNVerm(const struct Castle *c, const u8 *buf, const size_t begin,
- const size_t end, size_t *loc) {
- const u8 *ptr = nvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end);
- if (ptr == buf + end) {
- DEBUG_PRINTF("no escape found\n");
- return 0;
- }
-
- assert(loc);
- assert(ptr >= buf && ptr < buf + end);
- *loc = (size_t)(ptr - buf);
- DEBUG_PRINTF("escape found at offset %zu\n", *loc);
- return 1;
-}
-
-static really_inline
-char castleScanShufti(const struct Castle *c, const u8 *buf, const size_t begin,
- const size_t end, size_t *loc) {
- const m128 mask_lo = c->u.shuf.mask_lo;
- const m128 mask_hi = c->u.shuf.mask_hi;
- const u8 *ptr = shuftiExec(mask_lo, mask_hi, buf + begin, buf + end);
- if (ptr == buf + end) {
- DEBUG_PRINTF("no escape found\n");
- return 0;
- }
-
- assert(loc);
- assert(ptr >= buf && ptr < buf + end);
- *loc = (size_t)(ptr - buf);
- DEBUG_PRINTF("escape found at offset %zu\n", *loc);
- return 1;
-}
-
-static really_inline
-char castleScanTruffle(const struct Castle *c, const u8 *buf, const size_t begin,
- const size_t end, size_t *loc) {
+ }
+
+ return MO_CONTINUE_MATCHING;
+}
+
+static really_inline
+char castleScanVerm(const struct Castle *c, const u8 *buf, const size_t begin,
+ const size_t end, size_t *loc) {
+ const u8 *ptr = vermicelliExec(c->u.verm.c, 0, buf + begin, buf + end);
+ if (ptr == buf + end) {
+ DEBUG_PRINTF("no escape found\n");
+ return 0;
+ }
+
+ assert(loc);
+ assert(ptr >= buf && ptr < buf + end);
+ *loc = (size_t)(ptr - buf);
+ DEBUG_PRINTF("escape found at offset %zu\n", *loc);
+ return 1;
+}
+
+static really_inline
+char castleScanNVerm(const struct Castle *c, const u8 *buf, const size_t begin,
+ const size_t end, size_t *loc) {
+ const u8 *ptr = nvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end);
+ if (ptr == buf + end) {
+ DEBUG_PRINTF("no escape found\n");
+ return 0;
+ }
+
+ assert(loc);
+ assert(ptr >= buf && ptr < buf + end);
+ *loc = (size_t)(ptr - buf);
+ DEBUG_PRINTF("escape found at offset %zu\n", *loc);
+ return 1;
+}
+
+static really_inline
+char castleScanShufti(const struct Castle *c, const u8 *buf, const size_t begin,
+ const size_t end, size_t *loc) {
+ const m128 mask_lo = c->u.shuf.mask_lo;
+ const m128 mask_hi = c->u.shuf.mask_hi;
+ const u8 *ptr = shuftiExec(mask_lo, mask_hi, buf + begin, buf + end);
+ if (ptr == buf + end) {
+ DEBUG_PRINTF("no escape found\n");
+ return 0;
+ }
+
+ assert(loc);
+ assert(ptr >= buf && ptr < buf + end);
+ *loc = (size_t)(ptr - buf);
+ DEBUG_PRINTF("escape found at offset %zu\n", *loc);
+ return 1;
+}
+
+static really_inline
+char castleScanTruffle(const struct Castle *c, const u8 *buf, const size_t begin,
+ const size_t end, size_t *loc) {
const u8 *ptr = truffleExec(c->u.truffle.mask1, c->u.truffle.mask2,
buf + begin, buf + end);
- if (ptr == buf + end) {
- DEBUG_PRINTF("no escape found\n");
- return 0;
- }
-
- assert(loc);
- assert(ptr >= buf && ptr < buf + end);
- *loc = (size_t)(ptr - buf);
- DEBUG_PRINTF("escape found at offset %zu\n", *loc);
- return 1;
-}
-
-static really_inline
-char castleScan(const struct Castle *c, const u8 *buf, const size_t begin,
- const size_t end, size_t *loc) {
- assert(begin <= end);
-
- if (begin == end) {
- return 0;
- }
-
- switch (c->type) {
- case CASTLE_DOT:
- // Nothing can stop a dot scan!
- return 0;
- case CASTLE_VERM:
- return castleScanVerm(c, buf, begin, end, loc);
- case CASTLE_NVERM:
- return castleScanNVerm(c, buf, begin, end, loc);
- case CASTLE_SHUFTI:
- return castleScanShufti(c, buf, begin, end, loc);
- case CASTLE_TRUFFLE:
- return castleScanTruffle(c, buf, begin, end, loc);
- default:
- DEBUG_PRINTF("unknown scan type!\n");
- assert(0);
- return 0;
- }
-}
-
-static really_inline
+ if (ptr == buf + end) {
+ DEBUG_PRINTF("no escape found\n");
+ return 0;
+ }
+
+ assert(loc);
+ assert(ptr >= buf && ptr < buf + end);
+ *loc = (size_t)(ptr - buf);
+ DEBUG_PRINTF("escape found at offset %zu\n", *loc);
+ return 1;
+}
+
+static really_inline
+char castleScan(const struct Castle *c, const u8 *buf, const size_t begin,
+ const size_t end, size_t *loc) {
+ assert(begin <= end);
+
+ if (begin == end) {
+ return 0;
+ }
+
+ switch (c->type) {
+ case CASTLE_DOT:
+ // Nothing can stop a dot scan!
+ return 0;
+ case CASTLE_VERM:
+ return castleScanVerm(c, buf, begin, end, loc);
+ case CASTLE_NVERM:
+ return castleScanNVerm(c, buf, begin, end, loc);
+ case CASTLE_SHUFTI:
+ return castleScanShufti(c, buf, begin, end, loc);
+ case CASTLE_TRUFFLE:
+ return castleScanTruffle(c, buf, begin, end, loc);
+ default:
+ DEBUG_PRINTF("unknown scan type!\n");
+ assert(0);
+ return 0;
+ }
+}
+
+static really_inline
char castleRevScanVerm(const struct Castle *c, const u8 *buf,
const size_t begin, const size_t end, size_t *loc) {
const u8 *ptr = rvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end);
@@ -713,25 +713,25 @@ char castleRevScan(const struct Castle *c, const u8 *buf, const size_t begin,
static really_inline
void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp,
char stale_checked) {
- const u32 event = q->items[q->cur].type;
- switch (event) {
- case MQE_TOP:
- assert(0); // should be a numbered top
- break;
- case MQE_START:
- case MQE_END:
- break;
- default:
- assert(event >= MQE_TOP_FIRST);
- assert(event < MQE_INVALID);
- u32 top = event - MQE_TOP_FIRST;
- DEBUG_PRINTF("top %u at offset %llu\n", top, sp);
+ const u32 event = q->items[q->cur].type;
+ switch (event) {
+ case MQE_TOP:
+ assert(0); // should be a numbered top
+ break;
+ case MQE_START:
+ case MQE_END:
+ break;
+ default:
+ assert(event >= MQE_TOP_FIRST);
+ assert(event < MQE_INVALID);
+ u32 top = event - MQE_TOP_FIRST;
+ DEBUG_PRINTF("top %u at offset %llu\n", top, sp);
castleProcessTop(c, top, sp, q->state, q->streamState, stale_checked);
- break;
- }
-}
-
-static really_inline
+ break;
+ }
+}
+
+static really_inline
void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) {
DEBUG_PRINTF("clearing active repeats due to escape\n");
if (c->exclusive) {
@@ -747,244 +747,244 @@ void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) {
static really_inline
char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end,
enum MatchMode mode) {
- assert(n && q);
+ assert(n && q);
assert(n->type == CASTLE_NFA);
-
- DEBUG_PRINTF("state=%p, streamState=%p\n", q->state, q->streamState);
-
- const struct Castle *c = getImplNfa(n);
-
- if (q->report_current) {
- int rv = castleReportCurrent(c, q);
- q->report_current = 0;
- if (rv == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
-
- if (q->cur == q->end) {
- return 1;
- }
-
+
+ DEBUG_PRINTF("state=%p, streamState=%p\n", q->state, q->streamState);
+
+ const struct Castle *c = getImplNfa(n);
+
+ if (q->report_current) {
+ int rv = castleReportCurrent(c, q);
+ q->report_current = 0;
+ if (rv == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+
+ if (q->cur == q->end) {
+ return 1;
+ }
+
u8 *active = (u8 *)q->streamState + c->activeOffset;// active multibit
-
- assert(q->cur + 1 < q->end); // require at least two items
- assert(q_cur_type(q) == MQE_START);
- u64a sp = q_cur_offset(q);
- q->cur++;
- DEBUG_PRINTF("sp=%llu, abs_end=%llu\n", sp, end + q->offset);
-
- while (q->cur < q->end) {
- DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
- q_cur_offset(q));
-
- char found = 0;
- if (c->exclusive) {
+
+ assert(q->cur + 1 < q->end); // require at least two items
+ assert(q_cur_type(q) == MQE_START);
+ u64a sp = q_cur_offset(q);
+ q->cur++;
+ DEBUG_PRINTF("sp=%llu, abs_end=%llu\n", sp, end + q->offset);
+
+ while (q->cur < q->end) {
+ DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
+ q_cur_offset(q));
+
+ char found = 0;
+ if (c->exclusive) {
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
found = mmbit_any(groups, c->numGroups);
- }
-
- if (!found && !mmbit_any(active, c->numRepeats)) {
- DEBUG_PRINTF("no repeats active, skipping scan\n");
- goto scan_done;
- }
-
- u64a ep = q_cur_offset(q);
- ep = MIN(ep, q->offset + end);
- if (sp < ep) {
- size_t eloc = 0;
- char escape_found = 0;
- DEBUG_PRINTF("scanning from sp=%llu to ep=%llu\n", sp, ep);
- assert(sp >= q->offset && ep >= q->offset);
- if (castleScan(c, q->buffer, sp - q->offset, ep - q->offset,
- &eloc)) {
- escape_found = 1;
- ep = q->offset + eloc;
- DEBUG_PRINTF("escape found at %llu\n", ep);
- assert(ep >= sp);
- }
-
- assert(sp <= ep);
-
- if (mode == STOP_AT_MATCH) {
- size_t mloc;
- if (castleFindMatch(c, sp, ep, q->state, q->streamState,
- &mloc)) {
- DEBUG_PRINTF("storing match at %llu\n", sp + mloc);
- q->cur--;
- assert(q->cur < MAX_MQE_LEN);
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = (s64a)(sp - q->offset) + mloc;
- return MO_MATCHES_PENDING;
- }
- } else {
- assert(mode == CALLBACK_OUTPUT);
- char rv = castleMatchLoop(c, sp, ep, q->state, q->streamState,
- q->cb, q->context);
- if (rv == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- assert(rv == MO_CONTINUE_MATCHING);
- }
-
- if (escape_found) {
+ }
+
+ if (!found && !mmbit_any(active, c->numRepeats)) {
+ DEBUG_PRINTF("no repeats active, skipping scan\n");
+ goto scan_done;
+ }
+
+ u64a ep = q_cur_offset(q);
+ ep = MIN(ep, q->offset + end);
+ if (sp < ep) {
+ size_t eloc = 0;
+ char escape_found = 0;
+ DEBUG_PRINTF("scanning from sp=%llu to ep=%llu\n", sp, ep);
+ assert(sp >= q->offset && ep >= q->offset);
+ if (castleScan(c, q->buffer, sp - q->offset, ep - q->offset,
+ &eloc)) {
+ escape_found = 1;
+ ep = q->offset + eloc;
+ DEBUG_PRINTF("escape found at %llu\n", ep);
+ assert(ep >= sp);
+ }
+
+ assert(sp <= ep);
+
+ if (mode == STOP_AT_MATCH) {
+ size_t mloc;
+ if (castleFindMatch(c, sp, ep, q->state, q->streamState,
+ &mloc)) {
+ DEBUG_PRINTF("storing match at %llu\n", sp + mloc);
+ q->cur--;
+ assert(q->cur < MAX_MQE_LEN);
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = (s64a)(sp - q->offset) + mloc;
+ return MO_MATCHES_PENDING;
+ }
+ } else {
+ assert(mode == CALLBACK_OUTPUT);
+ char rv = castleMatchLoop(c, sp, ep, q->state, q->streamState,
+ q->cb, q->context);
+ if (rv == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ assert(rv == MO_CONTINUE_MATCHING);
+ }
+
+ if (escape_found) {
clear_repeats(c, q, active);
- }
- }
-
- scan_done:
- if (q_cur_loc(q) > end) {
- q->cur--;
- assert(q->cur < MAX_MQE_LEN);
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = end;
- return MO_ALIVE;
- }
-
- sp = q_cur_offset(q);
+ }
+ }
+
+ scan_done:
+ if (q_cur_loc(q) > end) {
+ q->cur--;
+ assert(q->cur < MAX_MQE_LEN);
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ return MO_ALIVE;
+ }
+
+ sp = q_cur_offset(q);
castleHandleEvent(c, q, sp, 1);
- q->cur++;
- }
-
- if (c->exclusive) {
+ q->cur++;
+ }
+
+ if (c->exclusive) {
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
if (mmbit_any_precise(groups, c->numGroups)) {
return 1;
- }
- }
-
- return mmbit_any_precise(active, c->numRepeats);
-}
-
+ }
+ }
+
+ return mmbit_any_precise(active, c->numRepeats);
+}
+
char nfaExecCastle_Q(const struct NFA *n, struct mq *q, s64a end) {
- DEBUG_PRINTF("entry\n");
+ DEBUG_PRINTF("entry\n");
return nfaExecCastle_Q_i(n, q, end, CALLBACK_OUTPUT);
-}
-
+}
+
char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end) {
- DEBUG_PRINTF("entry\n");
+ DEBUG_PRINTF("entry\n");
return nfaExecCastle_Q_i(n, q, end, STOP_AT_MATCH);
-}
-
+}
+
static
s64a castleLastKillLoc(const struct Castle *c, struct mq *q) {
assert(q_cur_type(q) == MQE_START);
assert(q_last_type(q) == MQE_END);
s64a sp = q_cur_loc(q);
s64a ep = q_last_loc(q);
-
+
DEBUG_PRINTF("finding final squash in (%lld, %lld]\n", sp, ep);
-
+
size_t loc;
if (ep > 0) {
if (castleRevScan(c, q->buffer, sp > 0 ? sp : 0, ep, &loc)) {
return (s64a)loc;
- }
+ }
ep = 0;
}
-
+
if (sp < 0) {
s64a hlen = q->hlength;
if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) {
return (s64a)loc - hlen;
- }
+ }
ep = 0;
- }
+ }
return sp - 1; /* the repeats are never killed */
-}
-
+}
+
char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report) {
- assert(n && q);
+ assert(n && q);
assert(n->type == CASTLE_NFA);
- DEBUG_PRINTF("entry\n");
-
- if (q->cur == q->end) {
- return 1;
- }
-
- assert(q->cur + 1 < q->end); /* require at least two items */
- assert(q_cur_type(q) == MQE_START);
-
- const struct Castle *c = getImplNfa(n);
+ DEBUG_PRINTF("entry\n");
+
+ if (q->cur == q->end) {
+ return 1;
+ }
+
+ assert(q->cur + 1 < q->end); /* require at least two items */
+ assert(q_cur_type(q) == MQE_START);
+
+ const struct Castle *c = getImplNfa(n);
u8 *active = (u8 *)q->streamState + c->activeOffset;
-
+
u64a end_offset = q_last_loc(q) + q->offset;
s64a last_kill_loc = castleLastKillLoc(c, q);
DEBUG_PRINTF("all repeats killed at %lld (exec range %lld, %lld)\n",
last_kill_loc, q_cur_loc(q), q_last_loc(q));
assert(last_kill_loc < q_last_loc(q));
-
+
if (last_kill_loc != q_cur_loc(q) - 1) {
clear_repeats(c, q, active);
}
-
+
q->cur++; /* skip start event */
-
+
/* skip events prior to the repeats being squashed */
while (q_cur_loc(q) <= last_kill_loc) {
DEBUG_PRINTF("skipping moot event at %lld\n", q_cur_loc(q));
q->cur++;
assert(q->cur < q->end);
}
-
+
while (q->cur < q->end) {
DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
q_cur_offset(q));
u64a sp = q_cur_offset(q);
castleHandleEvent(c, q, sp, 0);
- q->cur++;
- }
-
+ q->cur++;
+ }
+
castleDeactivateStaleSubs(c, end_offset, q->state, q->streamState);
char found = 0;
- if (c->exclusive) {
+ if (c->exclusive) {
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
found = mmbit_any_precise(groups, c->numGroups);
- }
-
- if (!found && !mmbit_any_precise(active, c->numRepeats)) {
- DEBUG_PRINTF("castle is dead\n");
- return 0;
- }
-
+ }
+
+ if (!found && !mmbit_any_precise(active, c->numRepeats)) {
+ DEBUG_PRINTF("castle is dead\n");
+ return 0;
+ }
+
if (castleInAccept(c, q, report, end_offset)) {
- return MO_MATCHES_PENDING;
- }
-
- return 1;
-}
-
+ return MO_MATCHES_PENDING;
+ }
+
+ return 1;
+}
+
char nfaExecCastle_reportCurrent(const struct NFA *n, struct mq *q) {
- assert(n && q);
+ assert(n && q);
assert(n->type == CASTLE_NFA);
- DEBUG_PRINTF("entry\n");
-
- const struct Castle *c = getImplNfa(n);
- castleReportCurrent(c, q);
- return 0;
-}
-
+ DEBUG_PRINTF("entry\n");
+
+ const struct Castle *c = getImplNfa(n);
+ castleReportCurrent(c, q);
+ return 0;
+}
+
char nfaExecCastle_inAccept(const struct NFA *n, ReportID report,
struct mq *q) {
- assert(n && q);
+ assert(n && q);
assert(n->type == CASTLE_NFA);
- DEBUG_PRINTF("entry\n");
-
- const struct Castle *c = getImplNfa(n);
- return castleInAccept(c, q, report, q_cur_offset(q));
-}
-
+ DEBUG_PRINTF("entry\n");
+
+ const struct Castle *c = getImplNfa(n);
+ return castleInAccept(c, q, report, q_cur_offset(q));
+}
+
char nfaExecCastle_inAnyAccept(const struct NFA *n, struct mq *q) {
- assert(n && q);
+ assert(n && q);
assert(n->type == CASTLE_NFA);
- DEBUG_PRINTF("entry\n");
-
- const struct Castle *c = getImplNfa(n);
+ DEBUG_PRINTF("entry\n");
+
+ const struct Castle *c = getImplNfa(n);
const u64a offset = q_cur_offset(q);
DEBUG_PRINTF("offset=%llu\n", offset);
@@ -1025,125 +1025,125 @@ char nfaExecCastle_queueInitState(UNUSED const struct NFA *n, struct mq *q) {
DEBUG_PRINTF("entry\n");
const struct Castle *c = getImplNfa(n);
- assert(q->streamState);
- if (c->exclusive) {
+ assert(q->streamState);
+ if (c->exclusive) {
u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
mmbit_clear(groups, c->numGroups);
- }
-
+ }
+
if (c->exclusive != PURE_EXCLUSIVE) {
u8 *active = (u8 *)q->streamState + c->activeOffset;
- mmbit_clear(active, c->numRepeats);
- }
- return 0;
-}
-
+ mmbit_clear(active, c->numRepeats);
+ }
+ return 0;
+}
+
char nfaExecCastle_initCompressedState(const struct NFA *n, UNUSED u64a offset,
void *state, UNUSED u8 key) {
- assert(n && state);
+ assert(n && state);
assert(n->type == CASTLE_NFA);
- DEBUG_PRINTF("entry\n");
-
- const struct Castle *c = getImplNfa(n);
- if (c->exclusive) {
+ DEBUG_PRINTF("entry\n");
+
+ const struct Castle *c = getImplNfa(n);
+ if (c->exclusive) {
u8 *groups = (u8 *)state + c->groupIterOffset;
mmbit_clear(groups, c->numGroups);
- }
-
+ }
+
if (c->exclusive != PURE_EXCLUSIVE) {
u8 *active = (u8 *)state + c->activeOffset;
- mmbit_clear(active, c->numRepeats);
- }
- return 0;
-}
-
-static really_inline
-void subCastleQueueCompressState(const struct Castle *c, const u32 subIdx,
- const struct mq *q, const u64a offset) {
- const struct SubCastle *sub = getSubCastle(c, subIdx);
- const struct RepeatInfo *info = getRepeatInfo(sub);
- union RepeatControl *rctrl = getControl(q->state, sub);
- char *packed = (char *)q->streamState + sub->streamStateOffset;
- DEBUG_PRINTF("sub %u next match %llu\n", subIdx,
- repeatNextMatch(info, rctrl,
- packed + info->packedCtrlSize, offset));
- repeatPack(packed, info, rctrl, offset);
-}
-
+ mmbit_clear(active, c->numRepeats);
+ }
+ return 0;
+}
+
+static really_inline
+void subCastleQueueCompressState(const struct Castle *c, const u32 subIdx,
+ const struct mq *q, const u64a offset) {
+ const struct SubCastle *sub = getSubCastle(c, subIdx);
+ const struct RepeatInfo *info = getRepeatInfo(sub);
+ union RepeatControl *rctrl = getControl(q->state, sub);
+ char *packed = (char *)q->streamState + sub->streamStateOffset;
+ DEBUG_PRINTF("sub %u next match %llu\n", subIdx,
+ repeatNextMatch(info, rctrl,
+ packed + info->packedCtrlSize, offset));
+ repeatPack(packed, info, rctrl, offset);
+}
+
char nfaExecCastle_queueCompressState(const struct NFA *n, const struct mq *q,
s64a loc) {
- assert(n && q);
+ assert(n && q);
assert(n->type == CASTLE_NFA);
- DEBUG_PRINTF("entry, loc=%lld\n", loc);
-
- const struct Castle *c = getImplNfa(n);
-
- // Pack state for all active repeats.
- const u64a offset = q->offset + loc;
- DEBUG_PRINTF("offset=%llu\n", offset);
- if (c->exclusive) {
+ DEBUG_PRINTF("entry, loc=%lld\n", loc);
+
+ const struct Castle *c = getImplNfa(n);
+
+ // Pack state for all active repeats.
+ const u64a offset = q->offset + loc;
+ DEBUG_PRINTF("offset=%llu\n", offset);
+ if (c->exclusive) {
u8 *active = (u8 *)q->streamState;
u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
- DEBUG_PRINTF("packing state for sub %u\n", activeIdx);
- subCastleQueueCompressState(c, activeIdx, q, offset);
- }
- }
-
+ DEBUG_PRINTF("packing state for sub %u\n", activeIdx);
+ subCastleQueueCompressState(c, activeIdx, q, offset);
+ }
+ }
+
if (c->exclusive != PURE_EXCLUSIVE) {
const u8 *active = (const u8 *)q->streamState + c->activeOffset;
- for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
- i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
- DEBUG_PRINTF("packing state for sub %u\n", i);
- subCastleQueueCompressState(c, i, q, offset);
- }
- }
- return 0;
-}
-
-static really_inline
-void subCastleExpandState(const struct Castle *c, const u32 subIdx,
- void *dest, const void *src, const u64a offset) {
- const struct SubCastle *sub = getSubCastle(c, subIdx);
- const struct RepeatInfo *info = getRepeatInfo(sub);
- DEBUG_PRINTF("unpacking state for sub %u\n", subIdx);
- union RepeatControl *rctrl = getControl(dest, sub);
- const char *packed = (const char *)src + sub->streamStateOffset;
- repeatUnpack(packed, info, offset, rctrl);
- DEBUG_PRINTF("sub %u next match %llu\n", subIdx,
- repeatNextMatch(info, rctrl,
- packed + info->packedCtrlSize, offset));
-}
-
+ for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
+ i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
+ DEBUG_PRINTF("packing state for sub %u\n", i);
+ subCastleQueueCompressState(c, i, q, offset);
+ }
+ }
+ return 0;
+}
+
+static really_inline
+void subCastleExpandState(const struct Castle *c, const u32 subIdx,
+ void *dest, const void *src, const u64a offset) {
+ const struct SubCastle *sub = getSubCastle(c, subIdx);
+ const struct RepeatInfo *info = getRepeatInfo(sub);
+ DEBUG_PRINTF("unpacking state for sub %u\n", subIdx);
+ union RepeatControl *rctrl = getControl(dest, sub);
+ const char *packed = (const char *)src + sub->streamStateOffset;
+ repeatUnpack(packed, info, offset, rctrl);
+ DEBUG_PRINTF("sub %u next match %llu\n", subIdx,
+ repeatNextMatch(info, rctrl,
+ packed + info->packedCtrlSize, offset));
+}
+
char nfaExecCastle_expandState(const struct NFA *n, void *dest, const void *src,
u64a offset, UNUSED u8 key) {
- assert(n && dest && src);
+ assert(n && dest && src);
assert(n->type == CASTLE_NFA);
- DEBUG_PRINTF("entry, src=%p, dest=%p, offset=%llu\n", src, dest, offset);
-
- const struct Castle *c = getImplNfa(n);
-
- if (c->exclusive) {
+ DEBUG_PRINTF("entry, src=%p, dest=%p, offset=%llu\n", src, dest, offset);
+
+ const struct Castle *c = getImplNfa(n);
+
+ if (c->exclusive) {
const u8 *active = (const u8 *)src;
const u8 *groups = active + c->groupIterOffset;
for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
const u8 *cur = active + i * c->activeIdxSize;
const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
- subCastleExpandState(c, activeIdx, dest, src, offset);
- }
- }
-
+ subCastleExpandState(c, activeIdx, dest, src, offset);
+ }
+ }
+
if (c->exclusive != PURE_EXCLUSIVE) {
- // Unpack state for all active repeats.
+ // Unpack state for all active repeats.
const u8 *active = (const u8 *)src + c->activeOffset;
- for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
- i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
- subCastleExpandState(c, i, dest, src, offset);
- }
- }
- return 0;
-}
+ for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
+ i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
+ subCastleExpandState(c, i, dest, src, offset);
+ }
+ }
+ return 0;
+}
diff --git a/contrib/libs/hyperscan/src/nfa/castle.h b/contrib/libs/hyperscan/src/nfa/castle.h
index fdbd7d8592..cc7496ca71 100644
--- a/contrib/libs/hyperscan/src/nfa/castle.h
+++ b/contrib/libs/hyperscan/src/nfa/castle.h
@@ -1,43 +1,43 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef NFA_CASTLE_H
-#define NFA_CASTLE_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "ue2common.h"
-
-struct mq;
-struct NFA;
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef NFA_CASTLE_H
+#define NFA_CASTLE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "ue2common.h"
+
+struct mq;
+struct NFA;
+
char nfaExecCastle_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report);
@@ -52,14 +52,14 @@ char nfaExecCastle_queueCompressState(const struct NFA *nfa, const struct mq *q,
s64a loc);
char nfaExecCastle_expandState(const struct NFA *nfa, void *dest,
const void *src, u64a offset, u8 key);
-
+
#define nfaExecCastle_testEOD NFA_API_NO_IMPL
#define nfaExecCastle_B_Reverse NFA_API_NO_IMPL
#define nfaExecCastle_zombie_status NFA_API_ZOMBIE_NO_IMPL
-
-#ifdef __cplusplus
-}
-
-#endif // __cplusplus
-
-#endif
+
+#ifdef __cplusplus
+}
+
+#endif // __cplusplus
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/castle_internal.h b/contrib/libs/hyperscan/src/nfa/castle_internal.h
index 2b7b84b69a..429c232ff8 100644
--- a/contrib/libs/hyperscan/src/nfa/castle_internal.h
+++ b/contrib/libs/hyperscan/src/nfa/castle_internal.h
@@ -1,79 +1,79 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Castle: multi-tenant repeat engine, data structures.
- */
-
-#ifndef NFA_CASTLE_INTERNAL_H
-#define NFA_CASTLE_INTERNAL_H
-
-#include "ue2common.h"
-#include "repeat_internal.h"
-
-struct SubCastle {
- ReportID report; //!< report to raise on match
- u32 fullStateOffset; //!< offset within full state (scratch)
- u32 streamStateOffset; //!< offset within stream state
- u32 repeatInfoOffset; //!< offset of RepeatInfo structure
- // relative to the start of SubCastle
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Castle: multi-tenant repeat engine, data structures.
+ */
+
+#ifndef NFA_CASTLE_INTERNAL_H
+#define NFA_CASTLE_INTERNAL_H
+
+#include "ue2common.h"
+#include "repeat_internal.h"
+
+struct SubCastle {
+ ReportID report; //!< report to raise on match
+ u32 fullStateOffset; //!< offset within full state (scratch)
+ u32 streamStateOffset; //!< offset within stream state
+ u32 repeatInfoOffset; //!< offset of RepeatInfo structure
+ // relative to the start of SubCastle
u32 exclusiveId; //!< exclusive group id of this SubCastle,
// set to the number of SubCastles in Castle
// if it is not exclusive
-};
-
-#define CASTLE_DOT 0
-#define CASTLE_VERM 1
-#define CASTLE_NVERM 2
-#define CASTLE_SHUFTI 3
-#define CASTLE_TRUFFLE 4
-
+};
+
+#define CASTLE_DOT 0
+#define CASTLE_VERM 1
+#define CASTLE_NVERM 2
+#define CASTLE_SHUFTI 3
+#define CASTLE_TRUFFLE 4
+
enum ExclusiveType {
NOT_EXCLUSIVE, //!< no subcastles are exclusive
EXCLUSIVE, //!< a subset of subcastles are exclusive
PURE_EXCLUSIVE //!< all subcastles are exclusive
};
-/**
- * \brief Castle engine structure.
- *
- * A Castle is a collection of repeats that all share the same character
- * reachability.
- *
- * The whole engine is laid out in memory as:
- *
- * - struct NFA
- * - struct Castle
- * - struct SubCastle[numRepeats]
- * - tables for sparse model repeats
+/**
+ * \brief Castle engine structure.
+ *
+ * A Castle is a collection of repeats that all share the same character
+ * reachability.
+ *
+ * The whole engine is laid out in memory as:
+ *
+ * - struct NFA
+ * - struct Castle
+ * - struct SubCastle[numRepeats]
+ * - tables for sparse model repeats
* - sparse iterator for subcastles that may be stale
- *
- * Castle stores an "active repeats" multibit in stream state, followed by the
+ *
+ * Castle stores an "active repeats" multibit in stream state, followed by the
* packed repeat state for each SubCastle. If there are both exclusive and
* non-exclusive SubCastle groups, we use an active id for each exclusive group
* and a multibit for the non-exclusive group. We also store an "active
@@ -106,12 +106,12 @@ enum ExclusiveType {
* * ...
* * | |
* * |---|
- *
- * In full state (stored in scratch space) it stores a temporary multibit over
- * the repeats (used by \ref castleMatchLoop), followed by the repeat control
+ *
+ * In full state (stored in scratch space) it stores a temporary multibit over
+ * the repeats (used by \ref castleMatchLoop), followed by the repeat control
* blocks for each SubCastle.
- */
-struct ALIGN_AVX_DIRECTIVE Castle {
+ */
+struct ALIGN_AVX_DIRECTIVE Castle {
u32 numRepeats; //!< number of repeats in Castle
u32 numGroups; //!< number of exclusive groups
u8 type; //!< tells us which scanning mechanism (below) to use
@@ -125,19 +125,19 @@ struct ALIGN_AVX_DIRECTIVE Castle {
u32 groupIterOffset; //!< offset to a iterator to check the aliveness of
// exclusive groups
- union {
- struct {
- char c;
- } verm;
- struct {
- m128 mask_lo;
- m128 mask_hi;
- } shuf;
- struct {
- m128 mask1;
- m128 mask2;
- } truffle;
- } u;
-};
-
-#endif // NFA_CASTLE_INTERNAL_H
+ union {
+ struct {
+ char c;
+ } verm;
+ struct {
+ m128 mask_lo;
+ m128 mask_hi;
+ } shuf;
+ struct {
+ m128 mask1;
+ m128 mask2;
+ } truffle;
+ } u;
+};
+
+#endif // NFA_CASTLE_INTERNAL_H
diff --git a/contrib/libs/hyperscan/src/nfa/castlecompile.cpp b/contrib/libs/hyperscan/src/nfa/castlecompile.cpp
index fd0dd4a152..d4c361337a 100644
--- a/contrib/libs/hyperscan/src/nfa/castlecompile.cpp
+++ b/contrib/libs/hyperscan/src/nfa/castlecompile.cpp
@@ -1,273 +1,273 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Castle: multi-tenant repeat engine, compiler code.
- */
+ * \brief Castle: multi-tenant repeat engine, compiler code.
+ */
+
+#include "castlecompile.h"
-#include "castlecompile.h"
-
-#include "castle_internal.h"
+#include "castle_internal.h"
#include "limex_limits.h"
-#include "nfa_internal.h"
-#include "repeatcompile.h"
-#include "shufticompile.h"
-#include "trufflecompile.h"
-#include "nfagraph/ng_dump.h"
-#include "nfagraph/ng_equivalence.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_redundancy.h"
-#include "nfagraph/ng_util.h"
-#include "util/alloc.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
+#include "nfa_internal.h"
+#include "repeatcompile.h"
+#include "shufticompile.h"
+#include "trufflecompile.h"
+#include "nfagraph/ng_dump.h"
+#include "nfagraph/ng_equivalence.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_redundancy.h"
+#include "nfagraph/ng_util.h"
+#include "util/alloc.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
#include "util/flat_containers.h"
-#include "util/graph.h"
-#include "util/make_unique.h"
+#include "util/graph.h"
+#include "util/make_unique.h"
#include "util/multibit_build.h"
#include "util/report_manager.h"
-#include "util/verify_types.h"
-#include "grey.h"
-
-#include <stack>
-#include <cassert>
-
+#include "util/verify_types.h"
+#include "grey.h"
+
+#include <stack>
+#include <cassert>
+
#include <boost/graph/adjacency_list.hpp>
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_keys;
-using boost::adaptors::map_values;
-
-namespace ue2 {
-
-#define CLIQUE_GRAPH_MAX_SIZE 1000
-
-static
-u32 depth_to_u32(const depth &d) {
- assert(d.is_reachable());
- if (d.is_infinite()) {
- return REPEAT_INF;
- }
-
- u32 d_val = d;
- assert(d_val < REPEAT_INF);
- return d_val;
-}
-
-static
-void writeCastleScanEngine(const CharReach &cr, Castle *c) {
- if (cr.all()) {
- c->type = CASTLE_DOT;
- return;
- }
-
- if (cr.count() == 1) {
- c->type = CASTLE_NVERM;
- c->u.verm.c = cr.find_first();
- return;
- }
-
- const CharReach negated(~cr);
- if (negated.count() == 1) {
- c->type = CASTLE_VERM;
- c->u.verm.c = negated.find_first();
- return;
- }
-
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_keys;
+using boost::adaptors::map_values;
+
+namespace ue2 {
+
+#define CLIQUE_GRAPH_MAX_SIZE 1000
+
+static
+u32 depth_to_u32(const depth &d) {
+ assert(d.is_reachable());
+ if (d.is_infinite()) {
+ return REPEAT_INF;
+ }
+
+ u32 d_val = d;
+ assert(d_val < REPEAT_INF);
+ return d_val;
+}
+
+static
+void writeCastleScanEngine(const CharReach &cr, Castle *c) {
+ if (cr.all()) {
+ c->type = CASTLE_DOT;
+ return;
+ }
+
+ if (cr.count() == 1) {
+ c->type = CASTLE_NVERM;
+ c->u.verm.c = cr.find_first();
+ return;
+ }
+
+ const CharReach negated(~cr);
+ if (negated.count() == 1) {
+ c->type = CASTLE_VERM;
+ c->u.verm.c = negated.find_first();
+ return;
+ }
+
if (shuftiBuildMasks(negated, (u8 *)&c->u.shuf.mask_lo,
(u8 *)&c->u.shuf.mask_hi) != -1) {
- c->type = CASTLE_SHUFTI;
- return;
- }
-
- c->type = CASTLE_TRUFFLE;
+ c->type = CASTLE_SHUFTI;
+ return;
+ }
+
+ c->type = CASTLE_TRUFFLE;
truffleBuildMasks(negated, (u8 *)(u8 *)&c->u.truffle.mask1,
(u8 *)&c->u.truffle.mask2);
-}
-
-static
-bool literalOverlap(const vector<CharReach> &a, const vector<CharReach> &b,
- const size_t dist) {
- for (size_t i = 0; i < b.size(); i++) {
- if (i > dist) {
- return true;
- }
- size_t overlap_len = b.size() - i;
- if (overlap_len <= a.size()) {
- if (matches(a.end() - overlap_len, a.end(), b.begin(),
- b.end() - i)) {
- return false;
- }
- } else {
- assert(overlap_len > a.size());
- if (matches(a.begin(), a.end(), b.end() - i - a.size(),
- b.end() - i)) {
- return false;
- }
- }
- }
-
- return b.size() > dist;
-}
-
-struct CliqueVertexProps {
- CliqueVertexProps() {}
- explicit CliqueVertexProps(u32 state_in) : stateId(state_in) {}
-
- u32 stateId = ~0U;
-};
-
-typedef boost::adjacency_list<boost::listS, boost::listS, boost::undirectedS,
- CliqueVertexProps> CliqueGraph;
-typedef CliqueGraph::vertex_descriptor CliqueVertex;
-
-static
-void getNeighborInfo(const CliqueGraph &g, vector<u32> &neighbor,
- const CliqueVertex &cv, const set<u32> &group) {
- u32 id = g[cv].stateId;
-
- // find neighbors for cv
- for (const auto &v : adjacent_vertices_range(cv, g)) {
+}
+
+static
+bool literalOverlap(const vector<CharReach> &a, const vector<CharReach> &b,
+ const size_t dist) {
+ for (size_t i = 0; i < b.size(); i++) {
+ if (i > dist) {
+ return true;
+ }
+ size_t overlap_len = b.size() - i;
+ if (overlap_len <= a.size()) {
+ if (matches(a.end() - overlap_len, a.end(), b.begin(),
+ b.end() - i)) {
+ return false;
+ }
+ } else {
+ assert(overlap_len > a.size());
+ if (matches(a.begin(), a.end(), b.end() - i - a.size(),
+ b.end() - i)) {
+ return false;
+ }
+ }
+ }
+
+ return b.size() > dist;
+}
+
+struct CliqueVertexProps {
+ CliqueVertexProps() {}
+ explicit CliqueVertexProps(u32 state_in) : stateId(state_in) {}
+
+ u32 stateId = ~0U;
+};
+
+typedef boost::adjacency_list<boost::listS, boost::listS, boost::undirectedS,
+ CliqueVertexProps> CliqueGraph;
+typedef CliqueGraph::vertex_descriptor CliqueVertex;
+
+static
+void getNeighborInfo(const CliqueGraph &g, vector<u32> &neighbor,
+ const CliqueVertex &cv, const set<u32> &group) {
+ u32 id = g[cv].stateId;
+
+ // find neighbors for cv
+ for (const auto &v : adjacent_vertices_range(cv, g)) {
if (g[v].stateId != id && contains(group, g[v].stateId)) {
- neighbor.push_back(g[v].stateId);
- DEBUG_PRINTF("Neighbor:%u\n", g[v].stateId);
- }
- }
-}
-
-static
-void findCliqueGroup(CliqueGraph &cg, vector<u32> &clique) {
- stack<vector<u32>> gStack;
-
- // Create mapping between vertex and id
- map<u32, CliqueVertex> vertexMap;
- vector<u32> init;
- for (const auto &v : vertices_range(cg)) {
- vertexMap[cg[v].stateId] = v;
- init.push_back(cg[v].stateId);
- }
- gStack.push(init);
-
- // Get the vertex to start from
- CliqueGraph::vertex_iterator vi, ve;
- tie(vi, ve) = vertices(cg);
- while (!gStack.empty()) {
- vector<u32> g = gStack.top();
- gStack.pop();
-
- // Choose a vertex from the graph
- u32 id = g[0];
- const CliqueVertex &n = vertexMap.at(id);
- clique.push_back(id);
- // Corresponding vertex in the original graph
- vector<u32> neighbor;
- set<u32> subgraphId(g.begin(), g.end());
- getNeighborInfo(cg, neighbor, n, subgraphId);
- // Get graph consisting of neighbors for left branch
- if (!neighbor.empty()) {
- gStack.push(neighbor);
- }
- }
-}
-
-template<typename Graph>
-bool graph_empty(const Graph &g) {
- typename Graph::vertex_iterator vi, ve;
- tie(vi, ve) = vertices(g);
- return vi == ve;
-}
-
-static
-vector<u32> removeClique(CliqueGraph &cg) {
- vector<vector<u32>> cliquesVec(1);
+ neighbor.push_back(g[v].stateId);
+ DEBUG_PRINTF("Neighbor:%u\n", g[v].stateId);
+ }
+ }
+}
+
+static
+void findCliqueGroup(CliqueGraph &cg, vector<u32> &clique) {
+ stack<vector<u32>> gStack;
+
+ // Create mapping between vertex and id
+ map<u32, CliqueVertex> vertexMap;
+ vector<u32> init;
+ for (const auto &v : vertices_range(cg)) {
+ vertexMap[cg[v].stateId] = v;
+ init.push_back(cg[v].stateId);
+ }
+ gStack.push(init);
+
+ // Get the vertex to start from
+ CliqueGraph::vertex_iterator vi, ve;
+ tie(vi, ve) = vertices(cg);
+ while (!gStack.empty()) {
+ vector<u32> g = gStack.top();
+ gStack.pop();
+
+ // Choose a vertex from the graph
+ u32 id = g[0];
+ const CliqueVertex &n = vertexMap.at(id);
+ clique.push_back(id);
+ // Corresponding vertex in the original graph
+ vector<u32> neighbor;
+ set<u32> subgraphId(g.begin(), g.end());
+ getNeighborInfo(cg, neighbor, n, subgraphId);
+ // Get graph consisting of neighbors for left branch
+ if (!neighbor.empty()) {
+ gStack.push(neighbor);
+ }
+ }
+}
+
+template<typename Graph>
+bool graph_empty(const Graph &g) {
+ typename Graph::vertex_iterator vi, ve;
+ tie(vi, ve) = vertices(g);
+ return vi == ve;
+}
+
+static
+vector<u32> removeClique(CliqueGraph &cg) {
+ vector<vector<u32>> cliquesVec(1);
DEBUG_PRINTF("graph size:%zu\n", num_vertices(cg));
- findCliqueGroup(cg, cliquesVec[0]);
- while (!graph_empty(cg)) {
- const vector<u32> &c = cliquesVec.back();
- vector<CliqueVertex> dead;
- for (const auto &v : vertices_range(cg)) {
- if (find(c.begin(), c.end(), cg[v].stateId) != c.end()) {
- dead.push_back(v);
- }
- }
- for (const auto &v : dead) {
- clear_vertex(v, cg);
- remove_vertex(v, cg);
- }
- if (graph_empty(cg)) {
- break;
- }
- vector<u32> clique;
- findCliqueGroup(cg, clique);
- cliquesVec.push_back(clique);
- }
-
- // get the independent set with max size
- size_t max = 0;
- size_t id = 0;
- for (size_t j = 0; j < cliquesVec.size(); ++j) {
- if (cliquesVec[j].size() > max) {
- max = cliquesVec[j].size();
- id = j;
- }
- }
-
+ findCliqueGroup(cg, cliquesVec[0]);
+ while (!graph_empty(cg)) {
+ const vector<u32> &c = cliquesVec.back();
+ vector<CliqueVertex> dead;
+ for (const auto &v : vertices_range(cg)) {
+ if (find(c.begin(), c.end(), cg[v].stateId) != c.end()) {
+ dead.push_back(v);
+ }
+ }
+ for (const auto &v : dead) {
+ clear_vertex(v, cg);
+ remove_vertex(v, cg);
+ }
+ if (graph_empty(cg)) {
+ break;
+ }
+ vector<u32> clique;
+ findCliqueGroup(cg, clique);
+ cliquesVec.push_back(clique);
+ }
+
+ // get the independent set with max size
+ size_t max = 0;
+ size_t id = 0;
+ for (size_t j = 0; j < cliquesVec.size(); ++j) {
+ if (cliquesVec[j].size() > max) {
+ max = cliquesVec[j].size();
+ id = j;
+ }
+ }
+
DEBUG_PRINTF("clique size:%zu\n", cliquesVec[id].size());
- return cliquesVec[id];
-}
-
-// if the location of any reset character in one literal are after
-// the end locations where it overlaps with other literals,
-// then the literals are mutual exclusive
-static
+ return cliquesVec[id];
+}
+
+// if the location of any reset character in one literal are after
+// the end locations where it overlaps with other literals,
+// then the literals are mutual exclusive
+static
bool findExclusivePair(const size_t id1, const size_t id2,
const size_t lower,
- const vector<vector<size_t>> &min_reset_dist,
- const vector<vector<vector<CharReach>>> &triggers) {
- const auto &triggers1 = triggers[id1];
- const auto &triggers2 = triggers[id2];
+ const vector<vector<size_t>> &min_reset_dist,
+ const vector<vector<vector<CharReach>>> &triggers) {
+ const auto &triggers1 = triggers[id1];
+ const auto &triggers2 = triggers[id2];
for (size_t i = 0; i < triggers1.size(); ++i) {
for (size_t j = 0; j < triggers2.size(); ++j) {
- if (!literalOverlap(triggers1[i], triggers2[j],
+ if (!literalOverlap(triggers1[i], triggers2[j],
min_reset_dist[id2 - lower][j]) ||
- !literalOverlap(triggers2[j], triggers1[i],
+ !literalOverlap(triggers2[j], triggers1[i],
min_reset_dist[id1 - lower][i])) {
- return false;
- }
- }
- }
- return true;
-}
-
-static
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+static
vector<vector<u32>> checkExclusion(u32 &streamStateSize,
const CharReach &cr,
const vector<vector<vector<CharReach>>> &triggers,
@@ -276,20 +276,20 @@ vector<vector<u32>> checkExclusion(u32 &streamStateSize,
vector<vector<u32>> groups;
size_t trigSize = triggers.size();
DEBUG_PRINTF("trigSize %zu\n", trigSize);
-
+
size_t lower = 0;
size_t total = 0;
while (lower < trigSize) {
vector<CliqueVertex> vertices;
unique_ptr<CliqueGraph> cg = std::make_unique<CliqueGraph>();
-
+
vector<vector<size_t>> min_reset_dist;
size_t upper = min(lower + CLIQUE_GRAPH_MAX_SIZE, trigSize);
// get min reset distance for each repeat
for (size_t i = lower; i < upper; i++) {
CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg);
vertices.push_back(v);
-
+
const vector<size_t> &tmp_dist =
minResetDistToEnd(triggers[i], cr);
min_reset_dist.push_back(tmp_dist);
@@ -304,8 +304,8 @@ vector<vector<u32>> checkExclusion(u32 &streamStateSize,
CliqueVertex d = vertices[j - lower];
add_edge(s, d, *cg);
}
- }
- }
+ }
+ }
// find the largest exclusive group
auto clique = removeClique(*cg);
@@ -317,17 +317,17 @@ vector<vector<u32>> checkExclusion(u32 &streamStateSize,
}
lower += CLIQUE_GRAPH_MAX_SIZE;
- }
+ }
DEBUG_PRINTF("clique size %zu, num of repeats %zu\n",
total, numRepeats);
if (total == numRepeats) {
exclusive = PURE_EXCLUSIVE;
streamStateSize = 0;
};
-
+
return groups;
-}
-
+}
+
namespace {
struct ExclusiveInfo {
@@ -339,37 +339,37 @@ struct ExclusiveInfo {
};
}
-static
-void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
- vector<RepeatInfo> &infos, vector<u64a> &patchSize,
- const vector<pair<depth, bool>> &repeatInfoPair,
- u32 &scratchStateSize, u32 &streamStateSize,
- u32 &tableSize, vector<u64a> &tables, u32 &sparseRepeats,
+static
+void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
+ vector<RepeatInfo> &infos, vector<u64a> &patchSize,
+ const vector<pair<depth, bool>> &repeatInfoPair,
+ u32 &scratchStateSize, u32 &streamStateSize,
+ u32 &tableSize, vector<u64a> &tables, u32 &sparseRepeats,
const ExclusiveInfo &exclusiveInfo,
vector<u32> &may_stale, const ReportManager &rm) {
const bool remap_reports = has_managed_reports(proto.kind);
- u32 i = 0;
+ u32 i = 0;
const auto &groupId = exclusiveInfo.groupId;
const auto &numGroups = exclusiveInfo.numGroups;
vector<u32> maxStreamSize(numGroups, 0);
- for (auto it = proto.repeats.begin(), ite = proto.repeats.end();
- it != ite; ++it, ++i) {
- const PureRepeat &pr = it->second;
- depth min_period = repeatInfoPair[i].first;
- bool is_reset = repeatInfoPair[i].second;
-
- enum RepeatType rtype = chooseRepeatType(pr.bounds.min, pr.bounds.max,
+ for (auto it = proto.repeats.begin(), ite = proto.repeats.end();
+ it != ite; ++it, ++i) {
+ const PureRepeat &pr = it->second;
+ depth min_period = repeatInfoPair[i].first;
+ bool is_reset = repeatInfoPair[i].second;
+
+ enum RepeatType rtype = chooseRepeatType(pr.bounds.min, pr.bounds.max,
min_period, is_reset, true);
- RepeatStateInfo rsi(rtype, pr.bounds.min, pr.bounds.max, min_period);
-
- DEBUG_PRINTF("sub %u: selected %s model for %s repeat\n", i,
- repeatTypeName(rtype), pr.bounds.str().c_str());
-
- SubCastle &sub = subs[i];
- RepeatInfo &info = infos[i];
-
+ RepeatStateInfo rsi(rtype, pr.bounds.min, pr.bounds.max, min_period);
+
+ DEBUG_PRINTF("sub %u: selected %s model for %s repeat\n", i,
+ repeatTypeName(rtype), pr.bounds.str().c_str());
+
+ SubCastle &sub = subs[i];
+ RepeatInfo &info = infos[i];
+
info.packedCtrlSize = rsi.packedCtrlSize;
u32 subStreamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize);
@@ -379,46 +379,46 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
maxStreamSize[id] = max(maxStreamSize[id], subStreamStateSize);
// SubCastle full/stream state offsets are written in for the group
// below.
- } else {
- sub.fullStateOffset = scratchStateSize;
- sub.streamStateOffset = streamStateSize;
+ } else {
+ sub.fullStateOffset = scratchStateSize;
+ sub.streamStateOffset = streamStateSize;
scratchStateSize += verify_u32(sizeof(RepeatControl));
- streamStateSize += subStreamStateSize;
- }
-
+ streamStateSize += subStreamStateSize;
+ }
+
if (pr.bounds.max.is_finite()) {
may_stale.push_back(i);
}
- info.type = verify_u8(rtype);
- info.repeatMin = depth_to_u32(pr.bounds.min);
- info.repeatMax = depth_to_u32(pr.bounds.max);
- info.stateSize = rsi.stateSize;
- info.horizon = rsi.horizon;
- info.minPeriod = min_period.is_finite() ? (u32)min_period : ~0U;
- assert(rsi.packedFieldSizes.size()
- <= ARRAY_LENGTH(info.packedFieldSizes));
- copy(rsi.packedFieldSizes.begin(), rsi.packedFieldSizes.end(),
- info.packedFieldSizes);
- info.patchCount = rsi.patchCount;
- info.patchSize = rsi.patchSize;
- info.encodingSize = rsi.encodingSize;
- info.patchesOffset = rsi.patchesOffset;
-
+ info.type = verify_u8(rtype);
+ info.repeatMin = depth_to_u32(pr.bounds.min);
+ info.repeatMax = depth_to_u32(pr.bounds.max);
+ info.stateSize = rsi.stateSize;
+ info.horizon = rsi.horizon;
+ info.minPeriod = min_period.is_finite() ? (u32)min_period : ~0U;
+ assert(rsi.packedFieldSizes.size()
+ <= ARRAY_LENGTH(info.packedFieldSizes));
+ copy(rsi.packedFieldSizes.begin(), rsi.packedFieldSizes.end(),
+ info.packedFieldSizes);
+ info.patchCount = rsi.patchCount;
+ info.patchSize = rsi.patchSize;
+ info.encodingSize = rsi.encodingSize;
+ info.patchesOffset = rsi.patchesOffset;
+
assert(pr.reports.size() == 1);
ReportID id = *pr.reports.begin();
sub.report = remap_reports ? rm.getProgramOffset(id) : id;
-
- if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
+
+ if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
for (u32 j = 0; j < rsi.patchSize; j++) {
tables.push_back(rsi.table[j]);
}
sparseRepeats++;
patchSize[i] = rsi.patchSize;
tableSize += rsi.patchSize;
- }
- }
-
+ }
+ }
+
vector<u32> scratchOffset(numGroups, 0);
vector<u32> streamOffset(numGroups, 0);
for (const auto &j : groupId) {
@@ -426,8 +426,8 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
u32 id = j.second;
SubCastle &sub = subs[top];
if (!scratchOffset[id]) {
- sub.fullStateOffset = scratchStateSize;
- sub.streamStateOffset = streamStateSize;
+ sub.fullStateOffset = scratchStateSize;
+ sub.streamStateOffset = streamStateSize;
scratchOffset[id] = scratchStateSize;
streamOffset[id] = streamStateSize;
scratchStateSize += verify_u32(sizeof(RepeatControl));
@@ -435,107 +435,107 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs,
} else {
sub.fullStateOffset = scratchOffset[id];
sub.streamStateOffset = streamOffset[id];
- }
- }
-}
-
+ }
+ }
+}
+
bytecode_ptr<NFA>
-buildCastle(const CastleProto &proto,
- const map<u32, vector<vector<CharReach>>> &triggers,
+buildCastle(const CastleProto &proto,
+ const map<u32, vector<vector<CharReach>>> &triggers,
const CompileContext &cc, const ReportManager &rm) {
- assert(cc.grey.allowCastle);
-
- const size_t numRepeats = proto.repeats.size();
- assert(numRepeats > 0 && numRepeats <= proto.max_occupancy);
-
- const CharReach &cr = proto.reach();
-
- DEBUG_PRINTF("reach %s, %zu repeats\n", describeClass(cr).c_str(),
- numRepeats);
-
- vector<SubCastle> subs(numRepeats);
- memset(&subs[0], 0, sizeof(SubCastle) * numRepeats);
-
- vector<RepeatInfo> infos(numRepeats);
- memset(&infos[0], 0, sizeof(RepeatInfo) * numRepeats);
-
- vector<u64a> patchSize(numRepeats);
- memset(&patchSize[0], 0, sizeof(u64a) * numRepeats);
-
- vector<u64a> tables;
-
- // We start with enough stream state to store the active bitfield.
- u32 streamStateSize = mmbit_size(numRepeats);
-
- // We have a copy of the stream state in scratch for castleMatchLoop.
- u32 scratchStateSize = ROUNDUP_N(streamStateSize, alignof(RepeatControl));
-
- depth minWidth(depth::infinity());
- depth maxWidth(0);
-
- u32 i = 0;
+ assert(cc.grey.allowCastle);
+
+ const size_t numRepeats = proto.repeats.size();
+ assert(numRepeats > 0 && numRepeats <= proto.max_occupancy);
+
+ const CharReach &cr = proto.reach();
+
+ DEBUG_PRINTF("reach %s, %zu repeats\n", describeClass(cr).c_str(),
+ numRepeats);
+
+ vector<SubCastle> subs(numRepeats);
+ memset(&subs[0], 0, sizeof(SubCastle) * numRepeats);
+
+ vector<RepeatInfo> infos(numRepeats);
+ memset(&infos[0], 0, sizeof(RepeatInfo) * numRepeats);
+
+ vector<u64a> patchSize(numRepeats);
+ memset(&patchSize[0], 0, sizeof(u64a) * numRepeats);
+
+ vector<u64a> tables;
+
+ // We start with enough stream state to store the active bitfield.
+ u32 streamStateSize = mmbit_size(numRepeats);
+
+ // We have a copy of the stream state in scratch for castleMatchLoop.
+ u32 scratchStateSize = ROUNDUP_N(streamStateSize, alignof(RepeatControl));
+
+ depth minWidth(depth::infinity());
+ depth maxWidth(0);
+
+ u32 i = 0;
ExclusiveInfo exclusiveInfo;
vector<vector<vector<CharReach>>> candidateTriggers;
- vector<u32> candidateRepeats;
- vector<pair<depth, bool>> repeatInfoPair;
- for (auto it = proto.repeats.begin(), ite = proto.repeats.end();
- it != ite; ++it, ++i) {
- const u32 top = it->first;
- const PureRepeat &pr = it->second;
- assert(pr.reach == cr);
- assert(pr.reports.size() == 1);
-
- if (top != i) {
- // Tops have not been remapped?
- assert(0);
- throw std::logic_error("Tops not remapped");
- }
-
- minWidth = min(minWidth, pr.bounds.min);
- maxWidth = max(maxWidth, pr.bounds.max);
-
- bool is_reset = false;
- depth min_period = depth::infinity();
-
- // If we've got a top in the castle without any trigger information, it
- // possibly means that we've got a repeat that we can't trigger. We do
- // need to cope with it though.
- if (contains(triggers, top)) {
+ vector<u32> candidateRepeats;
+ vector<pair<depth, bool>> repeatInfoPair;
+ for (auto it = proto.repeats.begin(), ite = proto.repeats.end();
+ it != ite; ++it, ++i) {
+ const u32 top = it->first;
+ const PureRepeat &pr = it->second;
+ assert(pr.reach == cr);
+ assert(pr.reports.size() == 1);
+
+ if (top != i) {
+ // Tops have not been remapped?
+ assert(0);
+ throw std::logic_error("Tops not remapped");
+ }
+
+ minWidth = min(minWidth, pr.bounds.min);
+ maxWidth = max(maxWidth, pr.bounds.max);
+
+ bool is_reset = false;
+ depth min_period = depth::infinity();
+
+ // If we've got a top in the castle without any trigger information, it
+ // possibly means that we've got a repeat that we can't trigger. We do
+ // need to cope with it though.
+ if (contains(triggers, top)) {
min_period = depth(minPeriod(triggers.at(top), cr, &is_reset));
- }
-
- if (min_period > pr.bounds.max) {
- DEBUG_PRINTF("trigger is longer than repeat; only need one offset\n");
- is_reset = true;
- }
-
- repeatInfoPair.push_back(make_pair(min_period, is_reset));
-
+ }
+
+ if (min_period > pr.bounds.max) {
+ DEBUG_PRINTF("trigger is longer than repeat; only need one offset\n");
+ is_reset = true;
+ }
+
+ repeatInfoPair.push_back(make_pair(min_period, is_reset));
+
candidateTriggers.push_back(triggers.at(top));
candidateRepeats.push_back(i);
- }
-
- // Case 1: exclusive repeats
+ }
+
+ // Case 1: exclusive repeats
enum ExclusiveType exclusive = NOT_EXCLUSIVE;
- u32 activeIdxSize = 0;
+ u32 activeIdxSize = 0;
u32 groupIterOffset = 0;
- if (cc.grey.castleExclusive) {
+ if (cc.grey.castleExclusive) {
auto cliqueGroups =
checkExclusion(streamStateSize, cr, candidateTriggers,
exclusive, numRepeats);
for (const auto &group : cliqueGroups) {
// mutual exclusive repeats group found,
// update state sizes
- activeIdxSize = calcPackedBytes(numRepeats + 1);
- streamStateSize += activeIdxSize;
-
- // replace with top values
+ activeIdxSize = calcPackedBytes(numRepeats + 1);
+ streamStateSize += activeIdxSize;
+
+ // replace with top values
for (const auto &val : group) {
const u32 top = candidateRepeats[val];
exclusiveInfo.groupId[top] = exclusiveInfo.numGroups;
- }
+ }
exclusiveInfo.numGroups++;
- }
+ }
if (exclusive) {
groupIterOffset = streamStateSize;
@@ -543,20 +543,20 @@ buildCastle(const CastleProto &proto,
}
DEBUG_PRINTF("num of groups:%u\n", exclusiveInfo.numGroups);
- }
+ }
candidateRepeats.clear();
-
- DEBUG_PRINTF("reach %s exclusive %u\n", describeClass(cr).c_str(),
- exclusive);
-
- u32 tableSize = 0;
- u32 sparseRepeats = 0;
+
+ DEBUG_PRINTF("reach %s exclusive %u\n", describeClass(cr).c_str(),
+ exclusive);
+
+ u32 tableSize = 0;
+ u32 sparseRepeats = 0;
vector<u32> may_stale; /* sub castles that may go stale */
- buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair,
- scratchStateSize, streamStateSize, tableSize,
+ buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair,
+ scratchStateSize, streamStateSize, tableSize,
tables, sparseRepeats, exclusiveInfo, may_stale, rm);
-
+
DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size());
vector<mmbit_sparse_iter> stale_iter;
if (!may_stale.empty()) {
@@ -565,75 +565,75 @@ buildCastle(const CastleProto &proto,
size_t total_size =
- sizeof(NFA) + // initial NFA structure
- sizeof(Castle) + // Castle structure
- sizeof(SubCastle) * subs.size() + // SubCastles themselves
- sizeof(RepeatInfo) * subs.size() + // RepeatInfo structure
- sizeof(u64a) * tableSize + // table size for
- // REPEAT_SPARSE_OPTIMAL_P
- sizeof(u64a) * sparseRepeats; // paddings for
- // REPEAT_SPARSE_OPTIMAL_P tables
-
+ sizeof(NFA) + // initial NFA structure
+ sizeof(Castle) + // Castle structure
+ sizeof(SubCastle) * subs.size() + // SubCastles themselves
+ sizeof(RepeatInfo) * subs.size() + // RepeatInfo structure
+ sizeof(u64a) * tableSize + // table size for
+ // REPEAT_SPARSE_OPTIMAL_P
+ sizeof(u64a) * sparseRepeats; // paddings for
+ // REPEAT_SPARSE_OPTIMAL_P tables
+
total_size = ROUNDUP_N(total_size, alignof(mmbit_sparse_iter));
total_size += byte_length(stale_iter); // stale sparse iter
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
nfa->type = verify_u8(CASTLE_NFA);
- nfa->length = verify_u32(total_size);
- nfa->nPositions = verify_u32(subs.size());
- nfa->streamStateSize = streamStateSize;
- nfa->scratchStateSize = scratchStateSize;
- nfa->minWidth = verify_u32(minWidth);
- nfa->maxWidth = maxWidth.is_finite() ? verify_u32(maxWidth) : 0;
-
+ nfa->length = verify_u32(total_size);
+ nfa->nPositions = verify_u32(subs.size());
+ nfa->streamStateSize = streamStateSize;
+ nfa->scratchStateSize = scratchStateSize;
+ nfa->minWidth = verify_u32(minWidth);
+ nfa->maxWidth = maxWidth.is_finite() ? verify_u32(maxWidth) : 0;
+
char * const base_ptr = (char *)nfa.get() + sizeof(NFA);
char *ptr = base_ptr;
- Castle *c = (Castle *)ptr;
- c->numRepeats = verify_u32(subs.size());
+ Castle *c = (Castle *)ptr;
+ c->numRepeats = verify_u32(subs.size());
c->numGroups = exclusiveInfo.numGroups;
c->exclusive = verify_s8(exclusive);
- c->activeIdxSize = verify_u8(activeIdxSize);
+ c->activeIdxSize = verify_u8(activeIdxSize);
c->activeOffset = verify_u32(c->numGroups * activeIdxSize);
c->groupIterOffset = groupIterOffset;
-
- writeCastleScanEngine(cr, c);
-
- ptr += sizeof(Castle);
- SubCastle *subCastles = ((SubCastle *)(ROUNDUP_PTR(ptr, alignof(u32))));
- copy(subs.begin(), subs.end(), subCastles);
-
- u32 length = 0;
- u32 tableIdx = 0;
- for (i = 0; i < numRepeats; i++) {
- u32 offset = sizeof(SubCastle) * (numRepeats - i) + length;
- SubCastle *sub = &subCastles[i];
- sub->repeatInfoOffset = offset;
-
- ptr = (char *)sub + offset;
- memcpy(ptr, &infos[i], sizeof(RepeatInfo));
-
- if (patchSize[i]) {
- RepeatInfo *info = (RepeatInfo *)ptr;
- u64a *table = ((u64a *)(ROUNDUP_PTR(((char *)(info) +
- sizeof(*info)), alignof(u64a))));
- copy(tables.begin() + tableIdx,
- tables.begin() + tableIdx + patchSize[i], table);
- u32 diff = (char *)table - (char *)info +
- sizeof(u64a) * patchSize[i];
- info->length = diff;
- length += diff;
- tableIdx += patchSize[i];
- } else {
- length += sizeof(RepeatInfo);
- }
-
- // set exclusive group info
+
+ writeCastleScanEngine(cr, c);
+
+ ptr += sizeof(Castle);
+ SubCastle *subCastles = ((SubCastle *)(ROUNDUP_PTR(ptr, alignof(u32))));
+ copy(subs.begin(), subs.end(), subCastles);
+
+ u32 length = 0;
+ u32 tableIdx = 0;
+ for (i = 0; i < numRepeats; i++) {
+ u32 offset = sizeof(SubCastle) * (numRepeats - i) + length;
+ SubCastle *sub = &subCastles[i];
+ sub->repeatInfoOffset = offset;
+
+ ptr = (char *)sub + offset;
+ memcpy(ptr, &infos[i], sizeof(RepeatInfo));
+
+ if (patchSize[i]) {
+ RepeatInfo *info = (RepeatInfo *)ptr;
+ u64a *table = ((u64a *)(ROUNDUP_PTR(((char *)(info) +
+ sizeof(*info)), alignof(u64a))));
+ copy(tables.begin() + tableIdx,
+ tables.begin() + tableIdx + patchSize[i], table);
+ u32 diff = (char *)table - (char *)info +
+ sizeof(u64a) * patchSize[i];
+ info->length = diff;
+ length += diff;
+ tableIdx += patchSize[i];
+ } else {
+ length += sizeof(RepeatInfo);
+ }
+
+ // set exclusive group info
if (contains(exclusiveInfo.groupId, i)) {
sub->exclusiveId = exclusiveInfo.groupId[i];
- } else {
+ } else {
sub->exclusiveId = numRepeats;
- }
- }
+ }
+ }
ptr = base_ptr + total_size - sizeof(NFA) - byte_length(stale_iter);
@@ -644,356 +644,356 @@ buildCastle(const CastleProto &proto,
ptr += byte_length(stale_iter);
}
- return nfa;
-}
-
-set<ReportID> all_reports(const CastleProto &proto) {
- set<ReportID> reports;
- for (const ReportID &report : proto.report_map | map_keys) {
- reports.insert(report);
- }
- return reports;
-}
-
-depth findMinWidth(const CastleProto &proto) {
- depth min_width(depth::infinity());
- for (const PureRepeat &pr : proto.repeats | map_values) {
- min_width = min(min_width, pr.bounds.min);
- }
- return min_width;
-}
-
-depth findMaxWidth(const CastleProto &proto) {
- depth max_width(0);
- for (const PureRepeat &pr : proto.repeats | map_values) {
- max_width = max(max_width, pr.bounds.max);
- }
- return max_width;
-}
-
-depth findMinWidth(const CastleProto &proto, u32 top) {
- if (!contains(proto.repeats, top)) {
- assert(0); // should not happen
- return depth::infinity();
- }
- return proto.repeats.at(top).bounds.min;
-}
-
-depth findMaxWidth(const CastleProto &proto, u32 top) {
- if (!contains(proto.repeats, top)) {
- assert(0); // should not happen
- return depth(0);
- }
- return proto.repeats.at(top).bounds.max;
-}
-
+ return nfa;
+}
+
+set<ReportID> all_reports(const CastleProto &proto) {
+ set<ReportID> reports;
+ for (const ReportID &report : proto.report_map | map_keys) {
+ reports.insert(report);
+ }
+ return reports;
+}
+
+depth findMinWidth(const CastleProto &proto) {
+ depth min_width(depth::infinity());
+ for (const PureRepeat &pr : proto.repeats | map_values) {
+ min_width = min(min_width, pr.bounds.min);
+ }
+ return min_width;
+}
+
+depth findMaxWidth(const CastleProto &proto) {
+ depth max_width(0);
+ for (const PureRepeat &pr : proto.repeats | map_values) {
+ max_width = max(max_width, pr.bounds.max);
+ }
+ return max_width;
+}
+
+depth findMinWidth(const CastleProto &proto, u32 top) {
+ if (!contains(proto.repeats, top)) {
+ assert(0); // should not happen
+ return depth::infinity();
+ }
+ return proto.repeats.at(top).bounds.min;
+}
+
+depth findMaxWidth(const CastleProto &proto, u32 top) {
+ if (!contains(proto.repeats, top)) {
+ assert(0); // should not happen
+ return depth(0);
+ }
+ return proto.repeats.at(top).bounds.max;
+}
+
CastleProto::CastleProto(nfa_kind k, const PureRepeat &pr) : kind(k) {
- assert(pr.reach.any());
- assert(pr.reports.size() == 1);
- u32 top = 0;
- repeats.emplace(top, pr);
- for (const auto &report : pr.reports) {
- report_map[report].insert(top);
- }
-}
-
-const CharReach &CastleProto::reach() const {
- assert(!repeats.empty());
- return repeats.begin()->second.reach;
-}
-
-u32 CastleProto::add(const PureRepeat &pr) {
- assert(repeats.size() < max_occupancy);
- assert(pr.reach == reach());
- assert(pr.reports.size() == 1);
- u32 top = next_top++;
- DEBUG_PRINTF("selected unused top %u\n", top);
- assert(!contains(repeats, top));
- repeats.emplace(top, pr);
- for (const auto &report : pr.reports) {
- report_map[report].insert(top);
- }
- return top;
-}
-
-void CastleProto::erase(u32 top) {
- DEBUG_PRINTF("erase top %u\n", top);
- assert(contains(repeats, top));
- repeats.erase(top);
- for (auto &m : report_map) {
- m.second.erase(top);
- }
-}
-
-u32 CastleProto::merge(const PureRepeat &pr) {
- assert(repeats.size() <= max_occupancy);
- assert(pr.reach == reach());
- assert(pr.reports.size() == 1);
-
- // First, see if this repeat is already in this castle.
- for (const auto &m : repeats) {
- if (m.second == pr) {
- DEBUG_PRINTF("repeat already present, with top %u\n", m.first);
- return m.first;
- }
- }
-
- if (repeats.size() == max_occupancy) {
- DEBUG_PRINTF("this castle is full\n");
- return max_occupancy;
- }
-
- return add(pr);
-}
-
-bool mergeCastle(CastleProto &c1, const CastleProto &c2,
- map<u32, u32> &top_map) {
- assert(&c1 != &c2);
+ assert(pr.reach.any());
+ assert(pr.reports.size() == 1);
+ u32 top = 0;
+ repeats.emplace(top, pr);
+ for (const auto &report : pr.reports) {
+ report_map[report].insert(top);
+ }
+}
+
+const CharReach &CastleProto::reach() const {
+ assert(!repeats.empty());
+ return repeats.begin()->second.reach;
+}
+
+u32 CastleProto::add(const PureRepeat &pr) {
+ assert(repeats.size() < max_occupancy);
+ assert(pr.reach == reach());
+ assert(pr.reports.size() == 1);
+ u32 top = next_top++;
+ DEBUG_PRINTF("selected unused top %u\n", top);
+ assert(!contains(repeats, top));
+ repeats.emplace(top, pr);
+ for (const auto &report : pr.reports) {
+ report_map[report].insert(top);
+ }
+ return top;
+}
+
+void CastleProto::erase(u32 top) {
+ DEBUG_PRINTF("erase top %u\n", top);
+ assert(contains(repeats, top));
+ repeats.erase(top);
+ for (auto &m : report_map) {
+ m.second.erase(top);
+ }
+}
+
+u32 CastleProto::merge(const PureRepeat &pr) {
+ assert(repeats.size() <= max_occupancy);
+ assert(pr.reach == reach());
+ assert(pr.reports.size() == 1);
+
+ // First, see if this repeat is already in this castle.
+ for (const auto &m : repeats) {
+ if (m.second == pr) {
+ DEBUG_PRINTF("repeat already present, with top %u\n", m.first);
+ return m.first;
+ }
+ }
+
+ if (repeats.size() == max_occupancy) {
+ DEBUG_PRINTF("this castle is full\n");
+ return max_occupancy;
+ }
+
+ return add(pr);
+}
+
+bool mergeCastle(CastleProto &c1, const CastleProto &c2,
+ map<u32, u32> &top_map) {
+ assert(&c1 != &c2);
assert(c1.kind == c2.kind);
-
- DEBUG_PRINTF("c1 has %zu repeats, c2 has %zu repeats\n", c1.repeats.size(),
- c2.repeats.size());
-
- if (c1.reach() != c2.reach()) {
- DEBUG_PRINTF("different reach!\n");
- return false;
- }
-
- if (c1.repeats.size() + c2.repeats.size() > c1.max_occupancy) {
- DEBUG_PRINTF("too many repeats to merge\n");
- return false;
- }
-
- top_map.clear();
-
- for (const auto &m : c2.repeats) {
- const u32 top = m.first;
- const PureRepeat &pr = m.second;
- DEBUG_PRINTF("top %u\n", top);
+
+ DEBUG_PRINTF("c1 has %zu repeats, c2 has %zu repeats\n", c1.repeats.size(),
+ c2.repeats.size());
+
+ if (c1.reach() != c2.reach()) {
+ DEBUG_PRINTF("different reach!\n");
+ return false;
+ }
+
+ if (c1.repeats.size() + c2.repeats.size() > c1.max_occupancy) {
+ DEBUG_PRINTF("too many repeats to merge\n");
+ return false;
+ }
+
+ top_map.clear();
+
+ for (const auto &m : c2.repeats) {
+ const u32 top = m.first;
+ const PureRepeat &pr = m.second;
+ DEBUG_PRINTF("top %u\n", top);
u32 new_top = c1.merge(pr);
- top_map[top] = new_top;
- DEBUG_PRINTF("adding repeat: map %u->%u\n", top, new_top);
- }
-
- assert(c1.repeats.size() <= c1.max_occupancy);
- return true;
-}
-
-void remapCastleTops(CastleProto &proto, map<u32, u32> &top_map) {
- map<u32, PureRepeat> out;
- top_map.clear();
-
- for (const auto &m : proto.repeats) {
- const u32 top = m.first;
- const PureRepeat &pr = m.second;
- u32 new_top = out.size();
- out.emplace(new_top, pr);
- top_map[top] = new_top;
- }
-
- proto.repeats.swap(out);
-
- // Remap report map.
- proto.report_map.clear();
- for (const auto &m : proto.repeats) {
- const u32 top = m.first;
- const PureRepeat &pr = m.second;
- for (const auto &report : pr.reports) {
- proto.report_map[report].insert(top);
- }
- }
-
- assert(proto.repeats.size() <= proto.max_occupancy);
-}
-
-namespace {
-struct HasReport {
- explicit HasReport(ReportID r) : report(r) {}
-
- bool operator()(const pair<u32, PureRepeat> &a) const {
- return contains(a.second.reports, report);
- }
-
-private:
- ReportID report;
-};
-}
-
-bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2,
- ReportID report2) {
- assert(!c1.repeats.empty());
- assert(!c2.repeats.empty());
+ top_map[top] = new_top;
+ DEBUG_PRINTF("adding repeat: map %u->%u\n", top, new_top);
+ }
+
+ assert(c1.repeats.size() <= c1.max_occupancy);
+ return true;
+}
+
+void remapCastleTops(CastleProto &proto, map<u32, u32> &top_map) {
+ map<u32, PureRepeat> out;
+ top_map.clear();
+
+ for (const auto &m : proto.repeats) {
+ const u32 top = m.first;
+ const PureRepeat &pr = m.second;
+ u32 new_top = out.size();
+ out.emplace(new_top, pr);
+ top_map[top] = new_top;
+ }
+
+ proto.repeats.swap(out);
+
+ // Remap report map.
+ proto.report_map.clear();
+ for (const auto &m : proto.repeats) {
+ const u32 top = m.first;
+ const PureRepeat &pr = m.second;
+ for (const auto &report : pr.reports) {
+ proto.report_map[report].insert(top);
+ }
+ }
+
+ assert(proto.repeats.size() <= proto.max_occupancy);
+}
+
+namespace {
+struct HasReport {
+ explicit HasReport(ReportID r) : report(r) {}
+
+ bool operator()(const pair<u32, PureRepeat> &a) const {
+ return contains(a.second.reports, report);
+ }
+
+private:
+ ReportID report;
+};
+}
+
+bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2,
+ ReportID report2) {
+ assert(!c1.repeats.empty());
+ assert(!c2.repeats.empty());
assert(c1.kind == c2.kind);
-
- if (c1.reach() != c2.reach()) {
- DEBUG_PRINTF("different reach\n");
- return false;
- }
-
- map<u32, PureRepeat>::const_iterator it = c1.repeats.begin(),
- ite = c1.repeats.end(),
- jt = c2.repeats.begin(),
- jte = c2.repeats.end();
-
- for (;; ++it, ++jt) {
- it = find_if(it, ite, HasReport(report1));
- jt = find_if(jt, jte, HasReport(report2));
-
- if (it == ite && jt == jte) {
- DEBUG_PRINTF("success, cases are equivalent!\n");
- return true;
- }
-
- if (it == ite || jt == jte) {
- DEBUG_PRINTF("no match for one repeat\n");
- break;
- }
-
- if (it->first != jt->first) {
- DEBUG_PRINTF("different tops\n");
- break;
- }
-
- const PureRepeat &r1 = it->second;
- const PureRepeat &r2 = jt->second;
- assert(r1.reach == c1.reach());
- assert(r2.reach == c1.reach());
- if (r1.bounds != r2.bounds) {
- DEBUG_PRINTF("different bounds\n");
- break;
- }
- }
-
- return false;
-}
-
-bool is_equal(const CastleProto &c1, const CastleProto &c2) {
- assert(!c1.repeats.empty());
- assert(!c2.repeats.empty());
+
+ if (c1.reach() != c2.reach()) {
+ DEBUG_PRINTF("different reach\n");
+ return false;
+ }
+
+ map<u32, PureRepeat>::const_iterator it = c1.repeats.begin(),
+ ite = c1.repeats.end(),
+ jt = c2.repeats.begin(),
+ jte = c2.repeats.end();
+
+ for (;; ++it, ++jt) {
+ it = find_if(it, ite, HasReport(report1));
+ jt = find_if(jt, jte, HasReport(report2));
+
+ if (it == ite && jt == jte) {
+ DEBUG_PRINTF("success, cases are equivalent!\n");
+ return true;
+ }
+
+ if (it == ite || jt == jte) {
+ DEBUG_PRINTF("no match for one repeat\n");
+ break;
+ }
+
+ if (it->first != jt->first) {
+ DEBUG_PRINTF("different tops\n");
+ break;
+ }
+
+ const PureRepeat &r1 = it->second;
+ const PureRepeat &r2 = jt->second;
+ assert(r1.reach == c1.reach());
+ assert(r2.reach == c1.reach());
+ if (r1.bounds != r2.bounds) {
+ DEBUG_PRINTF("different bounds\n");
+ break;
+ }
+ }
+
+ return false;
+}
+
+bool is_equal(const CastleProto &c1, const CastleProto &c2) {
+ assert(!c1.repeats.empty());
+ assert(!c2.repeats.empty());
assert(c1.kind == c2.kind);
-
- if (c1.reach() != c2.reach()) {
- DEBUG_PRINTF("different reach\n");
- return false;
- }
-
- return c1.repeats == c2.repeats;
-}
-
-bool requiresDedupe(const CastleProto &proto,
+
+ if (c1.reach() != c2.reach()) {
+ DEBUG_PRINTF("different reach\n");
+ return false;
+ }
+
+ return c1.repeats == c2.repeats;
+}
+
+bool requiresDedupe(const CastleProto &proto,
const flat_set<ReportID> &reports) {
- for (const auto &report : reports) {
- auto it = proto.report_map.find(report);
- if (it == end(proto.report_map)) {
- continue;
- }
- if (it->second.size() > 1) {
- DEBUG_PRINTF("castle proto %p has dupe report %u\n", &proto,
- report);
- return true;
- }
- }
- return false;
-}
-
-static
-void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) {
- DEBUG_PRINTF("top %u -> repeat %s\n", top, pr.bounds.str().c_str());
- NFAVertex u = g.start;
-
- // Mandatory repeats to min bound.
- u32 min_bound = pr.bounds.min; // always finite
- if (min_bound == 0) { // Vacuous case, we can only do this once.
- assert(!edge(g.start, g.accept, g).second);
+ for (const auto &report : reports) {
+ auto it = proto.report_map.find(report);
+ if (it == end(proto.report_map)) {
+ continue;
+ }
+ if (it->second.size() > 1) {
+ DEBUG_PRINTF("castle proto %p has dupe report %u\n", &proto,
+ report);
+ return true;
+ }
+ }
+ return false;
+}
+
+static
+void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) {
+ DEBUG_PRINTF("top %u -> repeat %s\n", top, pr.bounds.str().c_str());
+ NFAVertex u = g.start;
+
+ // Mandatory repeats to min bound.
+ u32 min_bound = pr.bounds.min; // always finite
+ if (min_bound == 0) { // Vacuous case, we can only do this once.
+ assert(!edge(g.start, g.accept, g).second);
NFAEdge e = add_edge(g.start, g.accept, g);
g[e].tops.insert(top);
- g[u].reports.insert(pr.reports.begin(), pr.reports.end());
- min_bound = 1;
- }
-
- for (u32 i = 0; i < min_bound; i++) {
- NFAVertex v = add_vertex(g);
- g[v].char_reach = pr.reach;
+ g[u].reports.insert(pr.reports.begin(), pr.reports.end());
+ min_bound = 1;
+ }
+
+ for (u32 i = 0; i < min_bound; i++) {
+ NFAVertex v = add_vertex(g);
+ g[v].char_reach = pr.reach;
NFAEdge e = add_edge(u, v, g);
- if (u == g.start) {
+ if (u == g.start) {
g[e].tops.insert(top);
- }
- u = v;
- }
-
- NFAVertex head = u;
-
- // Optional repeats to max bound.
- if (pr.bounds.max.is_finite()) {
- assert(pr.bounds.max > depth(0));
- const u32 max_bound = pr.bounds.max;
- for (u32 i = 0; i < max_bound - min_bound; i++) {
- NFAVertex v = add_vertex(g);
- g[v].char_reach = pr.reach;
- if (head != u) {
- add_edge(head, v, g);
- }
+ }
+ u = v;
+ }
+
+ NFAVertex head = u;
+
+ // Optional repeats to max bound.
+ if (pr.bounds.max.is_finite()) {
+ assert(pr.bounds.max > depth(0));
+ const u32 max_bound = pr.bounds.max;
+ for (u32 i = 0; i < max_bound - min_bound; i++) {
+ NFAVertex v = add_vertex(g);
+ g[v].char_reach = pr.reach;
+ if (head != u) {
+ add_edge(head, v, g);
+ }
NFAEdge e = add_edge(u, v, g);
- if (u == g.start) {
+ if (u == g.start) {
g[e].tops.insert(top);
- }
- u = v;
- }
- } else {
- assert(pr.bounds.max.is_infinite());
- add_edge(u, u, g);
- }
-
- // Connect to accept.
- add_edge(u, g.accept, g);
- g[u].reports.insert(pr.reports.begin(), pr.reports.end());
- if (u != head) {
- add_edge(head, g.accept, g);
- g[head].reports.insert(pr.reports.begin(), pr.reports.end());
- }
-}
-
-static
-bool hasZeroMinBound(const CastleProto &proto) {
- const depth zero(0);
- for (const PureRepeat &pr : proto.repeats | map_values) {
- if (pr.bounds.min == zero) {
- return true;
- }
- }
- return false;
-}
-
+ }
+ u = v;
+ }
+ } else {
+ assert(pr.bounds.max.is_infinite());
+ add_edge(u, u, g);
+ }
+
+ // Connect to accept.
+ add_edge(u, g.accept, g);
+ g[u].reports.insert(pr.reports.begin(), pr.reports.end());
+ if (u != head) {
+ add_edge(head, g.accept, g);
+ g[head].reports.insert(pr.reports.begin(), pr.reports.end());
+ }
+}
+
+static
+bool hasZeroMinBound(const CastleProto &proto) {
+ const depth zero(0);
+ for (const PureRepeat &pr : proto.repeats | map_values) {
+ if (pr.bounds.min == zero) {
+ return true;
+ }
+ }
+ return false;
+}
+
unique_ptr<NGHolder> makeHolder(const CastleProto &proto,
- const CompileContext &cc) {
- assert(!proto.repeats.empty());
-
- // Vacuous edges are only doable in the NGHolder if we are a single-top
- // Castle.
- if (hasZeroMinBound(proto)) {
- if (proto.repeats.size() != 1 || proto.repeats.begin()->first != 0) {
- DEBUG_PRINTF("can't build multi-top vacuous holder\n");
- return nullptr;
- }
- }
-
+ const CompileContext &cc) {
+ assert(!proto.repeats.empty());
+
+ // Vacuous edges are only doable in the NGHolder if we are a single-top
+ // Castle.
+ if (hasZeroMinBound(proto)) {
+ if (proto.repeats.size() != 1 || proto.repeats.begin()->first != 0) {
+ DEBUG_PRINTF("can't build multi-top vacuous holder\n");
+ return nullptr;
+ }
+ }
+
auto g = ue2::make_unique<NGHolder>(proto.kind);
-
- for (const auto &m : proto.repeats) {
- addToHolder(*g, m.first, m.second);
- }
-
+
+ for (const auto &m : proto.repeats) {
+ addToHolder(*g, m.first, m.second);
+ }
+
//dumpGraph("castle_holder.dot", *g);
-
- // Sanity checks.
- assert(allMatchStatesHaveReports(*g));
- assert(!has_parallel_edge(*g));
-
- reduceGraphEquivalences(*g, cc);
-
- removeRedundancy(*g, SOM_NONE);
-
- return g;
-}
-
-} // namespace ue2
+
+ // Sanity checks.
+ assert(allMatchStatesHaveReports(*g));
+ assert(!has_parallel_edge(*g));
+
+ reduceGraphEquivalences(*g, cc);
+
+ removeRedundancy(*g, SOM_NONE);
+
+ return g;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfa/castlecompile.h b/contrib/libs/hyperscan/src/nfa/castlecompile.h
index 1a0ef2421c..ea5f06dabc 100644
--- a/contrib/libs/hyperscan/src/nfa/castlecompile.h
+++ b/contrib/libs/hyperscan/src/nfa/castlecompile.h
@@ -1,171 +1,171 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Castle: multi-tenant repeat engine, compiler code.
- */
-
-#ifndef NFA_CASTLECOMPILE_H
-#define NFA_CASTLECOMPILE_H
-
-#include "nfa_kind.h"
-#include "ue2common.h"
-#include "nfagraph/ng_repeat.h"
+ * \brief Castle: multi-tenant repeat engine, compiler code.
+ */
+
+#ifndef NFA_CASTLECOMPILE_H
+#define NFA_CASTLECOMPILE_H
+
+#include "nfa_kind.h"
+#include "ue2common.h"
+#include "nfagraph/ng_repeat.h"
#include "util/bytecode_ptr.h"
-#include "util/depth.h"
+#include "util/depth.h"
#include "util/flat_containers.h"
-
-#include <map>
-#include <memory>
-#include <set>
+
+#include <map>
+#include <memory>
+#include <set>
#include <unordered_map>
-#include <vector>
-
-struct NFA;
-
-namespace ue2 {
-
-class CharReach;
-class NGHolder;
+#include <vector>
+
+struct NFA;
+
+namespace ue2 {
+
+class CharReach;
+class NGHolder;
class ReportManager;
-struct CompileContext;
-
-/**
- * \brief Prototype for a Castle engine: contains at least one CastleRepeat.
- *
- * Currently, all repeats in a Castle must have the same character
- * reachability.
- *
- * A CastleProto is converted into a single NFA, with each top triggering a
- * unique repeat. A CastleProto can contain at most CastleProto::max_occupancy
- * elements.
- */
-struct CastleProto {
- static constexpr size_t max_occupancy = 65536; // arbitrary limit
+struct CompileContext;
+
+/**
+ * \brief Prototype for a Castle engine: contains at least one CastleRepeat.
+ *
+ * Currently, all repeats in a Castle must have the same character
+ * reachability.
+ *
+ * A CastleProto is converted into a single NFA, with each top triggering a
+ * unique repeat. A CastleProto can contain at most CastleProto::max_occupancy
+ * elements.
+ */
+struct CastleProto {
+ static constexpr size_t max_occupancy = 65536; // arbitrary limit
CastleProto(nfa_kind k, const PureRepeat &pr);
- const CharReach &reach() const;
-
- /** \brief Add a new repeat. */
- u32 add(const PureRepeat &pr);
-
- /** \brief Remove a repeat. */
- void erase(u32 top);
-
- /**
- * \brief Merge in the given repeat, returning the top used.
- *
- * If the repeat already exists in this castle, we will re-use (and return)
- * the old top. If it doesn't, it will be added and assigned a new top.
- * Returns \ref max_occupancy if capacity would be exceeded.
- */
- u32 merge(const PureRepeat &pr);
-
- /** \brief Mapping from unique top id to repeat. */
- std::map<u32, PureRepeat> repeats;
-
- /** \brief Mapping from report to associated tops. */
+ const CharReach &reach() const;
+
+ /** \brief Add a new repeat. */
+ u32 add(const PureRepeat &pr);
+
+ /** \brief Remove a repeat. */
+ void erase(u32 top);
+
+ /**
+ * \brief Merge in the given repeat, returning the top used.
+ *
+ * If the repeat already exists in this castle, we will re-use (and return)
+ * the old top. If it doesn't, it will be added and assigned a new top.
+ * Returns \ref max_occupancy if capacity would be exceeded.
+ */
+ u32 merge(const PureRepeat &pr);
+
+ /** \brief Mapping from unique top id to repeat. */
+ std::map<u32, PureRepeat> repeats;
+
+ /** \brief Mapping from report to associated tops. */
std::unordered_map<ReportID, flat_set<u32>> report_map;
-
- /**
- * \brief Next top id to use. Repeats may be removed without top remapping,
- * so we track this explicitly instead of using repeats.size().
- */
- u32 next_top = 1;
+
+ /**
+ * \brief Next top id to use. Repeats may be removed without top remapping,
+ * so we track this explicitly instead of using repeats.size().
+ */
+ u32 next_top = 1;
/** \brief Kind for this engine. */
nfa_kind kind;
-};
-
-std::set<ReportID> all_reports(const CastleProto &proto);
-depth findMinWidth(const CastleProto &proto);
-depth findMaxWidth(const CastleProto &proto);
-depth findMinWidth(const CastleProto &proto, u32 top);
-depth findMaxWidth(const CastleProto &proto, u32 top);
-
-/**
- * \brief Remap tops to be contiguous.
- *
- * Remap the tops in the given CastleProto so that they're contiguous in the
- * range [0 .. N-1].
- */
-void remapCastleTops(CastleProto &proto, std::map<u32, u32> &top_map);
-
-/**
- * \brief Construct an NFA from a CastleProto.
- *
- * NOTE: Tops must be contiguous, i.e. \ref remapCastleTops must have been run
- * first.
- */
+};
+
+std::set<ReportID> all_reports(const CastleProto &proto);
+depth findMinWidth(const CastleProto &proto);
+depth findMaxWidth(const CastleProto &proto);
+depth findMinWidth(const CastleProto &proto, u32 top);
+depth findMaxWidth(const CastleProto &proto, u32 top);
+
+/**
+ * \brief Remap tops to be contiguous.
+ *
+ * Remap the tops in the given CastleProto so that they're contiguous in the
+ * range [0 .. N-1].
+ */
+void remapCastleTops(CastleProto &proto, std::map<u32, u32> &top_map);
+
+/**
+ * \brief Construct an NFA from a CastleProto.
+ *
+ * NOTE: Tops must be contiguous, i.e. \ref remapCastleTops must have been run
+ * first.
+ */
bytecode_ptr<NFA>
-buildCastle(const CastleProto &proto,
- const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
+buildCastle(const CastleProto &proto,
+ const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
const CompileContext &cc, const ReportManager &rm);
-
-/**
+
+/**
* \brief Merge two CastleProto prototypes together, if possible. If a
* particular repeat from c2 is already in c1, then it will be reused rather
* than adding a duplicate repeat.
- *
- * Returns true if merge of all repeats in c2 into c1 succeeds, and fills
- * mapping with the repeat indices.
- */
-bool mergeCastle(CastleProto &c1, const CastleProto &c2,
- std::map<u32, u32> &top_map);
-
-/**
- * \brief True if the two castles are identical with respect to the reports
- * given; i.e. the same tops lead to the same repeats, just with report1 in c1
- * and report2 in c2.
- *
- * Repeats leading to other reports are ignored.
- */
-bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2,
- ReportID report2);
-
-/**
- * \brief True if the two castles given are identical.
- */
-bool is_equal(const CastleProto &c1, const CastleProto &c2);
-
-/**
- * \brief True if the given castle contains more than a single instance of any
- * of the reports in the given set.
- */
-bool requiresDedupe(const CastleProto &proto,
+ *
+ * Returns true if merge of all repeats in c2 into c1 succeeds, and fills
+ * mapping with the repeat indices.
+ */
+bool mergeCastle(CastleProto &c1, const CastleProto &c2,
+ std::map<u32, u32> &top_map);
+
+/**
+ * \brief True if the two castles are identical with respect to the reports
+ * given; i.e. the same tops lead to the same repeats, just with report1 in c1
+ * and report2 in c2.
+ *
+ * Repeats leading to other reports are ignored.
+ */
+bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2,
+ ReportID report2);
+
+/**
+ * \brief True if the two castles given are identical.
+ */
+bool is_equal(const CastleProto &c1, const CastleProto &c2);
+
+/**
+ * \brief True if the given castle contains more than a single instance of any
+ * of the reports in the given set.
+ */
+bool requiresDedupe(const CastleProto &proto,
const flat_set<ReportID> &reports);
-
-/**
- * \brief Build an NGHolder from a CastleProto.
- */
+
+/**
+ * \brief Build an NGHolder from a CastleProto.
+ */
std::unique_ptr<NGHolder> makeHolder(const CastleProto &castle,
- const CompileContext &cc);
-
-} // namespace ue2
-
-#endif // NFA_CASTLECOMPILE_H
+ const CompileContext &cc);
+
+} // namespace ue2
+
+#endif // NFA_CASTLECOMPILE_H
diff --git a/contrib/libs/hyperscan/src/nfa/dfa_min.cpp b/contrib/libs/hyperscan/src/nfa/dfa_min.cpp
index 8c0fc09ff5..1a07e8a7d3 100644
--- a/contrib/libs/hyperscan/src/nfa/dfa_min.cpp
+++ b/contrib/libs/hyperscan/src/nfa/dfa_min.cpp
@@ -1,111 +1,111 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
* \brief Build code for DFA minimization.
*/
-
-/**
+
+/**
* /Summary of the Hopcroft minimisation algorithm/
*
- * partition := {F, Q \ F};
- * work_queue := {F};
- * while (work_queue is not empty) do
- * choose and remove a set A from work_queue
- * for each c in . do
- * let X be the set of states for which a transition on c
- * leads to a state in A
- * for each set Y in partition for which X . Y is nonempty and
- * Y \ X is nonempty do
- * replace Y in partition by the two sets X . Y and Y \ X
- * if Y is in work_queue
- * replace Y in work_queue by the same two sets
- * else
- * if |X . Y| <= |Y \ X|
- * add X . Y to work_queue
- * else
- * add Y \ X to work_queue
- * end;
- * end;
- * end;
- */
-
-#include "dfa_min.h"
-
-#include "grey.h"
+ * partition := {F, Q \ F};
+ * work_queue := {F};
+ * while (work_queue is not empty) do
+ * choose and remove a set A from work_queue
+ * for each c in . do
+ * let X be the set of states for which a transition on c
+ * leads to a state in A
+ * for each set Y in partition for which X . Y is nonempty and
+ * Y \ X is nonempty do
+ * replace Y in partition by the two sets X . Y and Y \ X
+ * if Y is in work_queue
+ * replace Y in work_queue by the same two sets
+ * else
+ * if |X . Y| <= |Y \ X|
+ * add X . Y to work_queue
+ * else
+ * add Y \ X to work_queue
+ * end;
+ * end;
+ * end;
+ */
+
+#include "dfa_min.h"
+
+#include "grey.h"
#include "mcclellancompile_util.h"
#include "rdfa.h"
-#include "ue2common.h"
+#include "ue2common.h"
#include "util/container.h"
#include "util/flat_containers.h"
#include "util/noncopyable.h"
-#include "util/partitioned_set.h"
-
-#include <algorithm>
-#include <functional>
+#include "util/partitioned_set.h"
+
+#include <algorithm>
+#include <functional>
#include <iterator>
-#include <map>
+#include <map>
#include <queue>
-#include <set>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-namespace {
-
-struct hopcroft_state_info {
+#include <set>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+namespace {
+
+struct hopcroft_state_info {
explicit hopcroft_state_info(size_t alpha_size) : prev(alpha_size) {}
/** \brief Mapping from symbol to a list of predecessors that transition to
* this state on that symbol. */
vector<vector<dstate_id_t>> prev;
-};
-
+};
+
struct HopcroftInfo : noncopyable {
size_t alpha_size; //!< Size of DFA alphabet.
queue<size_t> work_queue; //!< Hopcroft work queue of partition indices.
partitioned_set<dstate_id_t> partition; //!< Partition set of DFA states.
vector<hopcroft_state_info> states; //!< Pre-calculated state info (preds)
-
+
explicit HopcroftInfo(const raw_dfa &rdfa);
-};
-
+};
+
} // namespace
-
-/**
+
+/**
* \brief Create an initial partitioning and work_queue.
- *
+ *
* Initial partition contains {accepting states..., Non-accepting states}
* Initial work_queue contains accepting state subsets
- *
+ *
* The initial partitioning needs to distinguish between the different
* reporting behaviours (unlike standard Hopcroft) --> more than one subset
* possible for the accepting states.
@@ -115,36 +115,36 @@ struct HopcroftInfo : noncopyable {
* Reports of each state are searched against the map and
* added to the corresponding id -> partition[id] and work_queue[id].
* Non Accept states are added to partition[id+1].
- */
-static
+ */
+static
vector<size_t> create_map(const raw_dfa &rdfa, queue<size_t> &work_queue) {
- using ReportKey = pair<flat_set<ReportID>, flat_set<ReportID>>;
- map<ReportKey, size_t> subset_map;
- vector<size_t> state_to_subset(rdfa.states.size(), INVALID_SUBSET);
-
- for (size_t i = 0; i < rdfa.states.size(); i++) {
+ using ReportKey = pair<flat_set<ReportID>, flat_set<ReportID>>;
+ map<ReportKey, size_t> subset_map;
+ vector<size_t> state_to_subset(rdfa.states.size(), INVALID_SUBSET);
+
+ for (size_t i = 0; i < rdfa.states.size(); i++) {
const auto &ds = rdfa.states[i];
if (!ds.reports.empty() || !ds.reports_eod.empty()) {
ReportKey key(ds.reports, ds.reports_eod);
- if (contains(subset_map, key)) {
- state_to_subset[i] = subset_map[key];
- } else {
- size_t sub = subset_map.size();
+ if (contains(subset_map, key)) {
+ state_to_subset[i] = subset_map[key];
+ } else {
+ size_t sub = subset_map.size();
subset_map.emplace(std::move(key), sub);
- state_to_subset[i] = sub;
+ state_to_subset[i] = sub;
work_queue.push(sub);
- }
- }
- }
-
+ }
+ }
+ }
+
/* Give non-accept states their own subset. */
- size_t non_accept_sub = subset_map.size();
+ size_t non_accept_sub = subset_map.size();
replace(state_to_subset.begin(), state_to_subset.end(), INVALID_SUBSET,
non_accept_sub);
-
- return state_to_subset;
-}
-
+
+ return state_to_subset;
+}
+
HopcroftInfo::HopcroftInfo(const raw_dfa &rdfa)
: alpha_size(rdfa.alpha_size), partition(create_map(rdfa, work_queue)),
states(rdfa.states.size(), hopcroft_state_info(alpha_size)) {
@@ -153,51 +153,51 @@ HopcroftInfo::HopcroftInfo(const raw_dfa &rdfa)
for (size_t sym = 0; sym < alpha_size; sym++) {
dstate_id_t present_state = rdfa.states[i].next[sym];
states[present_state].prev[sym].push_back(i);
- }
- }
-}
-
-/**
- * For a split set X, each subset S (given by part_index) in the partition, two
- * sets are created: v_inter (X intersection S) and v_sub (S - X).
- *
- * For each subset S in the partition that could be split (v_inter is nonempty
- * and v_sub is nonempty):
- * - replace S in partition by the two sets v_inter and v_sub.
- * - if S is in work_queue:
- * - replace S in work_queue by the two subsets.
- * - else:
- * - replace S in work_queue by the smaller of the two sets.
- */
-static
+ }
+ }
+}
+
+/**
+ * For a split set X, each subset S (given by part_index) in the partition, two
+ * sets are created: v_inter (X intersection S) and v_sub (S - X).
+ *
+ * For each subset S in the partition that could be split (v_inter is nonempty
+ * and v_sub is nonempty):
+ * - replace S in partition by the two sets v_inter and v_sub.
+ * - if S is in work_queue:
+ * - replace S in work_queue by the two subsets.
+ * - else:
+ * - replace S in work_queue by the smaller of the two sets.
+ */
+static
void split_and_replace_set(const size_t part_index, HopcroftInfo &info,
const flat_set<dstate_id_t> &splitter) {
- /* singleton sets cannot be split */
+ /* singleton sets cannot be split */
if (info.partition[part_index].size() == 1) {
- return;
- }
-
+ return;
+ }
+
size_t small_index = info.partition.split(part_index, splitter);
-
- if (small_index == INVALID_SUBSET) {
- /* the set could not be split */
- return;
- }
-
- /* larger subset remains at the input subset index, if the input subset was
- * already in the work queue then the larger subset will remain there. */
-
+
+ if (small_index == INVALID_SUBSET) {
+ /* the set could not be split */
+ return;
+ }
+
+ /* larger subset remains at the input subset index, if the input subset was
+ * already in the work queue then the larger subset will remain there. */
+
info.work_queue.push(small_index);
-}
-
-/**
+}
+
+/**
* \brief Core of the Hopcroft minimisation algorithm.
- */
-static
+ */
+static
void dfa_min(HopcroftInfo &info) {
flat_set<dstate_id_t> curr, sym_preds;
- vector<size_t> cand_subsets;
-
+ vector<size_t> cand_subsets;
+
while (!info.work_queue.empty()) {
/* Choose and remove a set of states (curr, or A in the description
* above) from the work queue. Note that we copy the set because the
@@ -205,7 +205,7 @@ void dfa_min(HopcroftInfo &info) {
curr.clear();
insert(&curr, info.partition[info.work_queue.front()]);
info.work_queue.pop();
-
+
for (size_t sym = 0; sym < info.alpha_size; sym++) {
/* Find the set of states sym_preds for which a transition on the
* given symbol leads to a state in curr. */
@@ -215,104 +215,104 @@ void dfa_min(HopcroftInfo &info) {
}
if (sym_preds.empty()) {
- continue;
- }
-
+ continue;
+ }
+
/* we only need to consider subsets with at least one member in
* sym_preds for splitting */
- cand_subsets.clear();
+ cand_subsets.clear();
info.partition.find_overlapping(sym_preds, &cand_subsets);
-
- for (size_t sub : cand_subsets) {
+
+ for (size_t sub : cand_subsets) {
split_and_replace_set(sub, info, sym_preds);
- }
- }
- }
-}
-
-/**
+ }
+ }
+ }
+}
+
+/**
* \brief Build the new DFA state table.
- */
-static
+ */
+static
void mapping_new_states(const HopcroftInfo &info,
vector<dstate_id_t> &old_to_new, raw_dfa &rdfa) {
const size_t num_partitions = info.partition.size();
-
- // Mapping from equiv class's first state to equiv class index.
- map<dstate_id_t, size_t> ordering;
-
- // New state id for each equiv class.
- vector<dstate_id_t> eq_state(num_partitions);
-
- for (size_t i = 0; i < num_partitions; i++) {
+
+ // Mapping from equiv class's first state to equiv class index.
+ map<dstate_id_t, size_t> ordering;
+
+ // New state id for each equiv class.
+ vector<dstate_id_t> eq_state(num_partitions);
+
+ for (size_t i = 0; i < num_partitions; i++) {
ordering[*info.partition[i].begin()] = i;
- }
-
- dstate_id_t new_id = 0;
- for (const auto &m : ordering) {
- eq_state[m.second] = new_id++;
- }
-
+ }
+
+ dstate_id_t new_id = 0;
+ for (const auto &m : ordering) {
+ eq_state[m.second] = new_id++;
+ }
+
for (size_t t = 0; t < info.partition.size(); t++) {
for (dstate_id_t id : info.partition[t]) {
- old_to_new[id] = eq_state[t];
- }
- }
-
- vector<dstate> new_states;
- new_states.reserve(num_partitions);
+ old_to_new[id] = eq_state[t];
+ }
+ }
+
+ vector<dstate> new_states;
+ new_states.reserve(num_partitions);
for (const auto &m : ordering) {
new_states.push_back(rdfa.states[m.first]);
- }
+ }
rdfa.states = std::move(new_states);
-}
-
-static
+}
+
+static
void renumber_new_states(const HopcroftInfo &info,
const vector<dstate_id_t> &old_to_new, raw_dfa &rdfa) {
for (size_t i = 0; i < info.partition.size(); i++) {
for (size_t sym = 0; sym < info.alpha_size; sym++) {
dstate_id_t output = rdfa.states[i].next[sym];
rdfa.states[i].next[sym] = old_to_new[output];
- }
- dstate_id_t dad = rdfa.states[i].daddy;
- rdfa.states[i].daddy = old_to_new[dad];
- }
-
- rdfa.start_floating = old_to_new[rdfa.start_floating];
- rdfa.start_anchored = old_to_new[rdfa.start_anchored];
-}
-
-static
+ }
+ dstate_id_t dad = rdfa.states[i].daddy;
+ rdfa.states[i].daddy = old_to_new[dad];
+ }
+
+ rdfa.start_floating = old_to_new[rdfa.start_floating];
+ rdfa.start_anchored = old_to_new[rdfa.start_anchored];
+}
+
+static
void new_dfa(raw_dfa &rdfa, const HopcroftInfo &info) {
if (info.partition.size() == info.states.size()) {
return;
- }
+ }
vector<dstate_id_t> old_to_new(info.states.size());
mapping_new_states(info, old_to_new, rdfa);
renumber_new_states(info, old_to_new, rdfa);
-}
-
-void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) {
- if (!grey.minimizeDFA) {
- return;
- }
-
+}
+
+void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) {
+ if (!grey.minimizeDFA) {
+ return;
+ }
+
if (is_dead(rdfa)) {
DEBUG_PRINTF("dfa is empty\n");
}
- UNUSED const size_t states_before = rdfa.states.size();
-
+ UNUSED const size_t states_before = rdfa.states.size();
+
HopcroftInfo info(rdfa);
-
+
dfa_min(info);
new_dfa(rdfa, info);
-
- DEBUG_PRINTF("reduced from %zu to %zu states\n", states_before,
- rdfa.states.size());
-}
-
-} // namespace ue2
+
+ DEBUG_PRINTF("reduced from %zu to %zu states\n", states_before,
+ rdfa.states.size());
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfa/dfa_min.h b/contrib/libs/hyperscan/src/nfa/dfa_min.h
index 7ccd59e590..61ca6c21a4 100644
--- a/contrib/libs/hyperscan/src/nfa/dfa_min.h
+++ b/contrib/libs/hyperscan/src/nfa/dfa_min.h
@@ -1,46 +1,46 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
* \brief Build code for DFA minimization.
- */
-
-#ifndef DFA_MIN_H
-#define DFA_MIN_H
-
-namespace ue2 {
-
-struct raw_dfa;
-struct Grey;
-
-void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey);
-
-} // namespace ue2
-
-#endif
+ */
+
+#ifndef DFA_MIN_H
+#define DFA_MIN_H
+
+namespace ue2 {
+
+struct raw_dfa;
+struct Grey;
+
+void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/gough.c b/contrib/libs/hyperscan/src/nfa/gough.c
index 176fd22e82..44acd4c286 100644
--- a/contrib/libs/hyperscan/src/nfa/gough.c
+++ b/contrib/libs/hyperscan/src/nfa/gough.c
@@ -1,1036 +1,1036 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "gough.h"
-
-#include "accel.h"
-#include "gough_internal.h"
-#include "mcclellan.h"
-#include "nfa_api.h"
-#include "nfa_api_queue.h"
-#include "nfa_internal.h"
-#include "util/bitutils.h"
-#include "util/compare.h"
-#include "util/simd_utils.h"
-#include "util/unaligned.h"
-#include "ue2common.h"
-#include <string.h>
-
-#include "mcclellan_common_impl.h"
-
-#define GOUGH_SOM_EARLY (~0ULL)
-
-static really_inline
-void compressSomValue(u32 comp_slot_width, u64a curr_offset,
- void *dest_som_base, u32 i, u64a val) {
- void *dest_som = (u8 *)dest_som_base + i * comp_slot_width;
- /* gough does not initialise all slots, so may contain garbage */
- u64a delta = curr_offset - val;
- switch (comp_slot_width) {
- case 2:
- if (delta >= (u16)~0U) {
- delta = GOUGH_SOM_EARLY;
- }
- unaligned_store_u16(dest_som, delta);
- break;
- case 4:
- if (delta >= (u32)~0U) {
- delta = GOUGH_SOM_EARLY;
- }
- unaligned_store_u32(dest_som, delta);
- break;
- case 8:
- if (delta >= ~0ULL) {
- delta = GOUGH_SOM_EARLY;
- }
- unaligned_store_u64a(dest_som, delta);
- break;
- default:
- assert(0);
- }
-}
-
-static really_inline
-u64a expandSomValue(u32 comp_slot_width, u64a curr_offset,
- const void *src_som_base, u32 i) {
- /* Note: gough does not initialise all slots, so we may end up decompressing
- * garbage */
-
- const void *src_som = (const u8 *)src_som_base + i * comp_slot_width;
- u64a val = 0;
- switch (comp_slot_width) {
- case 2:
- val = unaligned_load_u16(src_som);
- if (val == (u16)~0U) {
- return GOUGH_SOM_EARLY;
- }
- break;
- case 4:
- val = unaligned_load_u32(src_som);
- if (val == (u32)~0U) {
- return GOUGH_SOM_EARLY;
- }
- break;
- case 8:
- val = unaligned_load_u64a(src_som);
- if (val == ~0ULL) {
- return GOUGH_SOM_EARLY;
- }
- break;
-
- default:
- assert(0);
- }
- return curr_offset - val;
-}
-
-static really_inline
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "gough.h"
+
+#include "accel.h"
+#include "gough_internal.h"
+#include "mcclellan.h"
+#include "nfa_api.h"
+#include "nfa_api_queue.h"
+#include "nfa_internal.h"
+#include "util/bitutils.h"
+#include "util/compare.h"
+#include "util/simd_utils.h"
+#include "util/unaligned.h"
+#include "ue2common.h"
+#include <string.h>
+
+#include "mcclellan_common_impl.h"
+
+#define GOUGH_SOM_EARLY (~0ULL)
+
+static really_inline
+void compressSomValue(u32 comp_slot_width, u64a curr_offset,
+ void *dest_som_base, u32 i, u64a val) {
+ void *dest_som = (u8 *)dest_som_base + i * comp_slot_width;
+ /* gough does not initialise all slots, so may contain garbage */
+ u64a delta = curr_offset - val;
+ switch (comp_slot_width) {
+ case 2:
+ if (delta >= (u16)~0U) {
+ delta = GOUGH_SOM_EARLY;
+ }
+ unaligned_store_u16(dest_som, delta);
+ break;
+ case 4:
+ if (delta >= (u32)~0U) {
+ delta = GOUGH_SOM_EARLY;
+ }
+ unaligned_store_u32(dest_som, delta);
+ break;
+ case 8:
+ if (delta >= ~0ULL) {
+ delta = GOUGH_SOM_EARLY;
+ }
+ unaligned_store_u64a(dest_som, delta);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static really_inline
+u64a expandSomValue(u32 comp_slot_width, u64a curr_offset,
+ const void *src_som_base, u32 i) {
+ /* Note: gough does not initialise all slots, so we may end up decompressing
+ * garbage */
+
+ const void *src_som = (const u8 *)src_som_base + i * comp_slot_width;
+ u64a val = 0;
+ switch (comp_slot_width) {
+ case 2:
+ val = unaligned_load_u16(src_som);
+ if (val == (u16)~0U) {
+ return GOUGH_SOM_EARLY;
+ }
+ break;
+ case 4:
+ val = unaligned_load_u32(src_som);
+ if (val == (u32)~0U) {
+ return GOUGH_SOM_EARLY;
+ }
+ break;
+ case 8:
+ val = unaligned_load_u64a(src_som);
+ if (val == ~0ULL) {
+ return GOUGH_SOM_EARLY;
+ }
+ break;
+
+ default:
+ assert(0);
+ }
+ return curr_offset - val;
+}
+
+static really_inline
char doReports(NfaCallback cb, void *ctxt, const struct mcclellan *m,
- const struct gough_som_info *som, u16 s, u64a loc,
- char eod, u16 * const cached_accept_state,
- u32 * const cached_accept_id, u32 * const cached_accept_som) {
- DEBUG_PRINTF("reporting state = %hu, loc=%llu, eod %hhu\n",
- (u16)(s & STATE_MASK), loc, eod);
-
- if (!eod && s == *cached_accept_state) {
- u64a from = *cached_accept_som == INVALID_SLOT ? loc
- : som->slots[*cached_accept_som];
- if (cb(from, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING; /* termination requested */
- }
-
- return MO_CONTINUE_MATCHING; /* continue execution */
- }
-
- const struct mstate_aux *aux = get_aux(m, s);
- size_t offset = eod ? aux->accept_eod : aux->accept;
-
- assert(offset);
- const struct gough_report_list *rl
- = (const void *)((const char *)m + offset - sizeof(struct NFA));
- assert(ISALIGNED(rl));
-
- DEBUG_PRINTF("report list size %u\n", rl->count);
- u32 count = rl->count;
-
- if (!eod && count == 1) {
- *cached_accept_state = s;
- *cached_accept_id = rl->report[0].r;
- *cached_accept_som = rl->report[0].som;
-
- u64a from = *cached_accept_som == INVALID_SLOT ? loc
- : som->slots[*cached_accept_som];
- DEBUG_PRINTF("reporting %u, using som[%u]=%llu\n", rl->report[0].r,
- *cached_accept_som, from);
- if (cb(from, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING; /* termination requested */
- }
-
- return MO_CONTINUE_MATCHING; /* continue execution */
- }
-
- for (u32 i = 0; i < count; i++) {
- u32 slot = rl->report[i].som;
- u64a from = slot == INVALID_SLOT ? loc : som->slots[slot];
- DEBUG_PRINTF("reporting %u, using som[%u] = %llu\n",
- rl->report[i].r, slot, from);
- if (cb(from, loc, rl->report[i].r, ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING; /* termination requested */
- }
- }
-
- return MO_CONTINUE_MATCHING; /* continue execution */
-}
-
-#ifdef DUMP_SUPPORT
-static UNUSED
-const char *dump_op(u8 op) {
- switch (op) {
- case GOUGH_INS_END:
- return "END";
- case GOUGH_INS_MOV:
- return "MOV";
- case GOUGH_INS_NEW:
- return "NEW";
- case GOUGH_INS_MIN:
- return "MIN";
- default:
- return "???";
- }
-}
-#endif
-
-static really_inline
-void run_prog_i(UNUSED const struct NFA *nfa,
- const struct gough_ins *pc, u64a som_offset,
- struct gough_som_info *som) {
- DEBUG_PRINTF("run prog at som_offset of %llu\n", som_offset);
- while (1) {
- assert((const u8 *)pc >= (const u8 *)nfa);
- assert((const u8 *)pc < (const u8 *)nfa + nfa->length);
- u32 dest = pc->dest;
- u32 src = pc->src;
- assert(pc->op == GOUGH_INS_END
- || dest < (nfa->scratchStateSize - 16) / 8);
- DEBUG_PRINTF("%s %u %u\n", dump_op(pc->op), dest, src);
- switch (pc->op) {
- case GOUGH_INS_END:
- return;
- case GOUGH_INS_MOV:
- som->slots[dest] = som->slots[src];
- break;
- case GOUGH_INS_NEW:
- /* note: c has already been advanced */
- DEBUG_PRINTF("current offset %llu; adjust %u\n", som_offset,
- pc->src);
- assert(som_offset >= pc->src);
- som->slots[dest] = som_offset - pc->src;
- break;
- case GOUGH_INS_MIN:
- /* TODO: shift all values along by one so that a normal min works
- */
- if (som->slots[src] == GOUGH_SOM_EARLY) {
- som->slots[dest] = som->slots[src];
- } else if (som->slots[dest] != GOUGH_SOM_EARLY) {
- LIMIT_TO_AT_MOST(&som->slots[dest], som->slots[src]);
- }
- break;
- default:
- assert(0);
- return;
- }
- DEBUG_PRINTF("dest slot[%u] = %llu\n", dest, som->slots[dest]);
- ++pc;
- }
-}
-
-static really_inline
-void run_prog(const struct NFA *nfa, const u32 *edge_prog_table,
- const u8 *buf, u64a offAdj, const u8 *c, u32 edge_num,
- struct gough_som_info *som) {
- DEBUG_PRINTF("taking edge %u\n", edge_num);
- u32 prog_offset = edge_prog_table[edge_num];
- if (!prog_offset) {
- DEBUG_PRINTF("no prog on edge\n");
- return;
- }
-
- const struct gough_ins *pc = (const void *)((const u8 *)nfa + prog_offset);
- u64a curr_offset = (u64a)(c - buf) + offAdj - 1;
- run_prog_i(nfa, pc, curr_offset, som);
-}
-
-static never_inline
-void run_accel_prog(const struct NFA *nfa, const struct gough_accel *gacc,
- const u8 *buf, u64a offAdj, const u8 *c, const u8 *c2,
- struct gough_som_info *som) {
- assert(gacc->prog_offset);
- assert(c2 > c);
-
- const struct gough_ins *pc
- = (const void *)((const u8 *)nfa + gacc->prog_offset);
- s64a margin_dist = gacc->margin_dist;
-
- DEBUG_PRINTF("run accel after skip %lld margin; advanced %zd\n",
- margin_dist, c2 - c);
-
- if (c2 - c <= 2 * margin_dist) {
- while (c < c2) {
- u64a curr_offset = (u64a)(c - buf) + offAdj;
- run_prog_i(nfa, pc, curr_offset, som);
- c++;
- }
- } else {
- u64a curr_offset = (u64a)(c - buf) + offAdj;
- for (s64a i = 0; i < margin_dist; i++) {
- run_prog_i(nfa, pc, curr_offset + i, som);
- }
-
- curr_offset = (u64a)(c2 - buf) + offAdj - margin_dist;
- for (s64a i = 0; i < margin_dist; i++) {
- run_prog_i(nfa, pc, curr_offset + i, som);
- }
- }
-}
-
-static never_inline
-u16 goughEnableStarts(const struct mcclellan *m, u16 s, u64a som_offset,
- struct gough_som_info *som) {
- DEBUG_PRINTF("top triggered while at %hu\n", s);
- const struct mstate_aux *aux = get_aux(m, s);
- DEBUG_PRINTF("now going to state %hu\n", aux->top);
-
- const u32 *top_offsets = get_gough_top_offsets(m);
- if (!top_offsets) {
- return aux->top;
- }
-
- u32 prog_offset = top_offsets[s];
- if (!prog_offset) {
- return aux->top;
- }
-
- DEBUG_PRINTF("doing som for top\n");
- const struct NFA *nfa
- = (const struct NFA *)((const char *)m - sizeof(struct NFA));
- const struct gough_ins *pc = (const void *)((const u8 *)nfa
- + prog_offset);
- run_prog_i(nfa, pc, som_offset, som);
- return aux->top;
-}
-
-static really_inline
-char goughExec16_i(const struct mcclellan *m, struct gough_som_info *som,
- u16 *state, const u8 *buf, size_t len, u64a offAdj,
+ const struct gough_som_info *som, u16 s, u64a loc,
+ char eod, u16 * const cached_accept_state,
+ u32 * const cached_accept_id, u32 * const cached_accept_som) {
+ DEBUG_PRINTF("reporting state = %hu, loc=%llu, eod %hhu\n",
+ (u16)(s & STATE_MASK), loc, eod);
+
+ if (!eod && s == *cached_accept_state) {
+ u64a from = *cached_accept_som == INVALID_SLOT ? loc
+ : som->slots[*cached_accept_som];
+ if (cb(from, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING; /* termination requested */
+ }
+
+ return MO_CONTINUE_MATCHING; /* continue execution */
+ }
+
+ const struct mstate_aux *aux = get_aux(m, s);
+ size_t offset = eod ? aux->accept_eod : aux->accept;
+
+ assert(offset);
+ const struct gough_report_list *rl
+ = (const void *)((const char *)m + offset - sizeof(struct NFA));
+ assert(ISALIGNED(rl));
+
+ DEBUG_PRINTF("report list size %u\n", rl->count);
+ u32 count = rl->count;
+
+ if (!eod && count == 1) {
+ *cached_accept_state = s;
+ *cached_accept_id = rl->report[0].r;
+ *cached_accept_som = rl->report[0].som;
+
+ u64a from = *cached_accept_som == INVALID_SLOT ? loc
+ : som->slots[*cached_accept_som];
+ DEBUG_PRINTF("reporting %u, using som[%u]=%llu\n", rl->report[0].r,
+ *cached_accept_som, from);
+ if (cb(from, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING; /* termination requested */
+ }
+
+ return MO_CONTINUE_MATCHING; /* continue execution */
+ }
+
+ for (u32 i = 0; i < count; i++) {
+ u32 slot = rl->report[i].som;
+ u64a from = slot == INVALID_SLOT ? loc : som->slots[slot];
+ DEBUG_PRINTF("reporting %u, using som[%u] = %llu\n",
+ rl->report[i].r, slot, from);
+ if (cb(from, loc, rl->report[i].r, ctxt) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING; /* termination requested */
+ }
+ }
+
+ return MO_CONTINUE_MATCHING; /* continue execution */
+}
+
+#ifdef DUMP_SUPPORT
+static UNUSED
+const char *dump_op(u8 op) {
+ switch (op) {
+ case GOUGH_INS_END:
+ return "END";
+ case GOUGH_INS_MOV:
+ return "MOV";
+ case GOUGH_INS_NEW:
+ return "NEW";
+ case GOUGH_INS_MIN:
+ return "MIN";
+ default:
+ return "???";
+ }
+}
+#endif
+
+static really_inline
+void run_prog_i(UNUSED const struct NFA *nfa,
+ const struct gough_ins *pc, u64a som_offset,
+ struct gough_som_info *som) {
+ DEBUG_PRINTF("run prog at som_offset of %llu\n", som_offset);
+ while (1) {
+ assert((const u8 *)pc >= (const u8 *)nfa);
+ assert((const u8 *)pc < (const u8 *)nfa + nfa->length);
+ u32 dest = pc->dest;
+ u32 src = pc->src;
+ assert(pc->op == GOUGH_INS_END
+ || dest < (nfa->scratchStateSize - 16) / 8);
+ DEBUG_PRINTF("%s %u %u\n", dump_op(pc->op), dest, src);
+ switch (pc->op) {
+ case GOUGH_INS_END:
+ return;
+ case GOUGH_INS_MOV:
+ som->slots[dest] = som->slots[src];
+ break;
+ case GOUGH_INS_NEW:
+ /* note: c has already been advanced */
+ DEBUG_PRINTF("current offset %llu; adjust %u\n", som_offset,
+ pc->src);
+ assert(som_offset >= pc->src);
+ som->slots[dest] = som_offset - pc->src;
+ break;
+ case GOUGH_INS_MIN:
+ /* TODO: shift all values along by one so that a normal min works
+ */
+ if (som->slots[src] == GOUGH_SOM_EARLY) {
+ som->slots[dest] = som->slots[src];
+ } else if (som->slots[dest] != GOUGH_SOM_EARLY) {
+ LIMIT_TO_AT_MOST(&som->slots[dest], som->slots[src]);
+ }
+ break;
+ default:
+ assert(0);
+ return;
+ }
+ DEBUG_PRINTF("dest slot[%u] = %llu\n", dest, som->slots[dest]);
+ ++pc;
+ }
+}
+
+static really_inline
+void run_prog(const struct NFA *nfa, const u32 *edge_prog_table,
+ const u8 *buf, u64a offAdj, const u8 *c, u32 edge_num,
+ struct gough_som_info *som) {
+ DEBUG_PRINTF("taking edge %u\n", edge_num);
+ u32 prog_offset = edge_prog_table[edge_num];
+ if (!prog_offset) {
+ DEBUG_PRINTF("no prog on edge\n");
+ return;
+ }
+
+ const struct gough_ins *pc = (const void *)((const u8 *)nfa + prog_offset);
+ u64a curr_offset = (u64a)(c - buf) + offAdj - 1;
+ run_prog_i(nfa, pc, curr_offset, som);
+}
+
+static never_inline
+void run_accel_prog(const struct NFA *nfa, const struct gough_accel *gacc,
+ const u8 *buf, u64a offAdj, const u8 *c, const u8 *c2,
+ struct gough_som_info *som) {
+ assert(gacc->prog_offset);
+ assert(c2 > c);
+
+ const struct gough_ins *pc
+ = (const void *)((const u8 *)nfa + gacc->prog_offset);
+ s64a margin_dist = gacc->margin_dist;
+
+ DEBUG_PRINTF("run accel after skip %lld margin; advanced %zd\n",
+ margin_dist, c2 - c);
+
+ if (c2 - c <= 2 * margin_dist) {
+ while (c < c2) {
+ u64a curr_offset = (u64a)(c - buf) + offAdj;
+ run_prog_i(nfa, pc, curr_offset, som);
+ c++;
+ }
+ } else {
+ u64a curr_offset = (u64a)(c - buf) + offAdj;
+ for (s64a i = 0; i < margin_dist; i++) {
+ run_prog_i(nfa, pc, curr_offset + i, som);
+ }
+
+ curr_offset = (u64a)(c2 - buf) + offAdj - margin_dist;
+ for (s64a i = 0; i < margin_dist; i++) {
+ run_prog_i(nfa, pc, curr_offset + i, som);
+ }
+ }
+}
+
+static never_inline
+u16 goughEnableStarts(const struct mcclellan *m, u16 s, u64a som_offset,
+ struct gough_som_info *som) {
+ DEBUG_PRINTF("top triggered while at %hu\n", s);
+ const struct mstate_aux *aux = get_aux(m, s);
+ DEBUG_PRINTF("now going to state %hu\n", aux->top);
+
+ const u32 *top_offsets = get_gough_top_offsets(m);
+ if (!top_offsets) {
+ return aux->top;
+ }
+
+ u32 prog_offset = top_offsets[s];
+ if (!prog_offset) {
+ return aux->top;
+ }
+
+ DEBUG_PRINTF("doing som for top\n");
+ const struct NFA *nfa
+ = (const struct NFA *)((const char *)m - sizeof(struct NFA));
+ const struct gough_ins *pc = (const void *)((const u8 *)nfa
+ + prog_offset);
+ run_prog_i(nfa, pc, som_offset, som);
+ return aux->top;
+}
+
+static really_inline
+char goughExec16_i(const struct mcclellan *m, struct gough_som_info *som,
+ u16 *state, const u8 *buf, size_t len, u64a offAdj,
NfaCallback cb, void *ctxt, const u8 **c_final,
- enum MatchMode mode) {
- assert(ISALIGNED_N(state, 2));
-
- u16 s = *state;
- const struct NFA *nfa
- = (const struct NFA *)((const char *)m - sizeof(struct NFA));
- const u8 *c = buf, *c_end = buf + len;
- const u16 *succ_table = (const u16 *)((const char *)m
- + sizeof(struct mcclellan));
- assert(ISALIGNED_N(succ_table, 2));
- const u16 sherman_base = m->sherman_limit;
- const char *sherman_base_offset
- = (const char *)nfa + m->sherman_offset;
- const u32 as = m->alphaShift;
-
- s &= STATE_MASK;
-
- u32 cached_accept_id = 0;
- u16 cached_accept_state = 0;
- u32 cached_accept_som = 0;
-
- const u32 *edge_prog_table = (const u32 *)(get_gough(m) + 1);
-
- DEBUG_PRINTF("s: %hu, len %zu\n", s, len);
-
- const u8 *min_accel_offset = c;
- if (!m->has_accel || len < ACCEL_MIN_LEN) {
- min_accel_offset = c_end;
- goto without_accel;
- }
-
- goto with_accel;
-
-without_accel:
- while (c < min_accel_offset && s) {
- u8 cprime = m->remap[*(c++)];
- DEBUG_PRINTF("c: %02hhx cp:%02hhx (s=%hu)\n", *(c-1), cprime, s);
-
- u32 edge_num = ((u32)s << as) + cprime;
- run_prog(nfa, edge_prog_table, buf, offAdj, c, edge_num, som);
- if (s < sherman_base) {
- DEBUG_PRINTF("doing normal\n");
- assert(s < m->state_count);
- s = succ_table[edge_num];
- } else {
- const char *sherman_state
- = findShermanState(m, sherman_base_offset, sherman_base, s);
- DEBUG_PRINTF("doing sherman\n");
- s = doSherman16(sherman_state, cprime, succ_table, as);
- }
- DEBUG_PRINTF("s: %hu (%hu)\n", s, (u16)(s & STATE_MASK));
-
- if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
- if (mode == STOP_AT_MATCH) {
- *state = s & STATE_MASK;
- *c_final = c - 1;
- return MO_CONTINUE_MATCHING;
- }
-
- u64a loc = (c - 1) - buf + offAdj + 1;
- if (doReports(cb, ctxt, m, som, s & STATE_MASK, loc, 0,
- &cached_accept_state, &cached_accept_id,
- &cached_accept_som) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
-
- s &= STATE_MASK;
- }
-
-with_accel:
- while (c < c_end && s) {
- u8 cprime = m->remap[*(c++)];
- DEBUG_PRINTF("c: %02hhx cp:%02hhx (s=%hu)\n", *(c-1), cprime, s);
-
- u32 edge_num = ((u32)s << as) + cprime;
- run_prog(nfa, edge_prog_table, buf, offAdj, c, edge_num, som);
- if (s < sherman_base) {
- DEBUG_PRINTF("doing normal\n");
- assert(s < m->state_count);
- s = succ_table[edge_num];
- } else {
- const char *sherman_state
- = findShermanState(m, sherman_base_offset, sherman_base, s);
- DEBUG_PRINTF("doing sherman\n");
- s = doSherman16(sherman_state, cprime, succ_table, as);
- }
- DEBUG_PRINTF("s: %hu (%hu)\n", s, (u16)(s & STATE_MASK));
-
- if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
- if (mode == STOP_AT_MATCH) {
- *state = s & STATE_MASK;
- *c_final = c - 1;
- return MO_CONTINUE_MATCHING;
- }
-
- u64a loc = (c - 1) - buf + offAdj + 1;
-
- if (doReports(cb, ctxt, m, som, s & STATE_MASK, loc, 0,
- &cached_accept_state, &cached_accept_id,
- &cached_accept_som)
- == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- } else if (s & ACCEL_FLAG) {
- DEBUG_PRINTF("skipping\n");
- const struct mstate_aux *this_aux = get_aux(m, s & STATE_MASK);
- u32 accel_offset = this_aux->accel_offset;
-
- assert(accel_offset >= m->aux_offset);
- assert(accel_offset < m->sherman_offset);
-
- const struct gough_accel *gacc
- = (const void *)((const char *)m + accel_offset);
- assert(!gacc->prog_offset == !gacc->margin_dist);
- const u8 *c2 = run_accel(&gacc->accel, c, c_end);
-
- if (c2 != c && gacc->prog_offset) {
- run_accel_prog(nfa, gacc, buf, offAdj, c, c2, som);
- }
-
- if (c2 < min_accel_offset + BAD_ACCEL_DIST) {
- min_accel_offset = c2 + BIG_ACCEL_PENALTY;
- } else {
- min_accel_offset = c2 + SMALL_ACCEL_PENALTY;
- }
-
- if (min_accel_offset >= c_end - ACCEL_MIN_LEN) {
- min_accel_offset = c_end;
- }
-
- DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n",
- c2 - c, min_accel_offset - c2, c_end - c2);
-
- c = c2;
- s &= STATE_MASK;
- goto without_accel;
- }
-
- s &= STATE_MASK;
- }
-
- if (mode == STOP_AT_MATCH) {
- *c_final = c_end;
- }
- *state = s;
-
- return MO_CONTINUE_MATCHING;
-}
-
-static really_inline
-char goughExec8_i(const struct mcclellan *m, struct gough_som_info *som,
- u8 *state, const u8 *buf, size_t len, u64a offAdj,
+ enum MatchMode mode) {
+ assert(ISALIGNED_N(state, 2));
+
+ u16 s = *state;
+ const struct NFA *nfa
+ = (const struct NFA *)((const char *)m - sizeof(struct NFA));
+ const u8 *c = buf, *c_end = buf + len;
+ const u16 *succ_table = (const u16 *)((const char *)m
+ + sizeof(struct mcclellan));
+ assert(ISALIGNED_N(succ_table, 2));
+ const u16 sherman_base = m->sherman_limit;
+ const char *sherman_base_offset
+ = (const char *)nfa + m->sherman_offset;
+ const u32 as = m->alphaShift;
+
+ s &= STATE_MASK;
+
+ u32 cached_accept_id = 0;
+ u16 cached_accept_state = 0;
+ u32 cached_accept_som = 0;
+
+ const u32 *edge_prog_table = (const u32 *)(get_gough(m) + 1);
+
+ DEBUG_PRINTF("s: %hu, len %zu\n", s, len);
+
+ const u8 *min_accel_offset = c;
+ if (!m->has_accel || len < ACCEL_MIN_LEN) {
+ min_accel_offset = c_end;
+ goto without_accel;
+ }
+
+ goto with_accel;
+
+without_accel:
+ while (c < min_accel_offset && s) {
+ u8 cprime = m->remap[*(c++)];
+ DEBUG_PRINTF("c: %02hhx cp:%02hhx (s=%hu)\n", *(c-1), cprime, s);
+
+ u32 edge_num = ((u32)s << as) + cprime;
+ run_prog(nfa, edge_prog_table, buf, offAdj, c, edge_num, som);
+ if (s < sherman_base) {
+ DEBUG_PRINTF("doing normal\n");
+ assert(s < m->state_count);
+ s = succ_table[edge_num];
+ } else {
+ const char *sherman_state
+ = findShermanState(m, sherman_base_offset, sherman_base, s);
+ DEBUG_PRINTF("doing sherman\n");
+ s = doSherman16(sherman_state, cprime, succ_table, as);
+ }
+ DEBUG_PRINTF("s: %hu (%hu)\n", s, (u16)(s & STATE_MASK));
+
+ if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
+ if (mode == STOP_AT_MATCH) {
+ *state = s & STATE_MASK;
+ *c_final = c - 1;
+ return MO_CONTINUE_MATCHING;
+ }
+
+ u64a loc = (c - 1) - buf + offAdj + 1;
+ if (doReports(cb, ctxt, m, som, s & STATE_MASK, loc, 0,
+ &cached_accept_state, &cached_accept_id,
+ &cached_accept_som) == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+
+ s &= STATE_MASK;
+ }
+
+with_accel:
+ while (c < c_end && s) {
+ u8 cprime = m->remap[*(c++)];
+ DEBUG_PRINTF("c: %02hhx cp:%02hhx (s=%hu)\n", *(c-1), cprime, s);
+
+ u32 edge_num = ((u32)s << as) + cprime;
+ run_prog(nfa, edge_prog_table, buf, offAdj, c, edge_num, som);
+ if (s < sherman_base) {
+ DEBUG_PRINTF("doing normal\n");
+ assert(s < m->state_count);
+ s = succ_table[edge_num];
+ } else {
+ const char *sherman_state
+ = findShermanState(m, sherman_base_offset, sherman_base, s);
+ DEBUG_PRINTF("doing sherman\n");
+ s = doSherman16(sherman_state, cprime, succ_table, as);
+ }
+ DEBUG_PRINTF("s: %hu (%hu)\n", s, (u16)(s & STATE_MASK));
+
+ if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
+ if (mode == STOP_AT_MATCH) {
+ *state = s & STATE_MASK;
+ *c_final = c - 1;
+ return MO_CONTINUE_MATCHING;
+ }
+
+ u64a loc = (c - 1) - buf + offAdj + 1;
+
+ if (doReports(cb, ctxt, m, som, s & STATE_MASK, loc, 0,
+ &cached_accept_state, &cached_accept_id,
+ &cached_accept_som)
+ == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ } else if (s & ACCEL_FLAG) {
+ DEBUG_PRINTF("skipping\n");
+ const struct mstate_aux *this_aux = get_aux(m, s & STATE_MASK);
+ u32 accel_offset = this_aux->accel_offset;
+
+ assert(accel_offset >= m->aux_offset);
+ assert(accel_offset < m->sherman_offset);
+
+ const struct gough_accel *gacc
+ = (const void *)((const char *)m + accel_offset);
+ assert(!gacc->prog_offset == !gacc->margin_dist);
+ const u8 *c2 = run_accel(&gacc->accel, c, c_end);
+
+ if (c2 != c && gacc->prog_offset) {
+ run_accel_prog(nfa, gacc, buf, offAdj, c, c2, som);
+ }
+
+ if (c2 < min_accel_offset + BAD_ACCEL_DIST) {
+ min_accel_offset = c2 + BIG_ACCEL_PENALTY;
+ } else {
+ min_accel_offset = c2 + SMALL_ACCEL_PENALTY;
+ }
+
+ if (min_accel_offset >= c_end - ACCEL_MIN_LEN) {
+ min_accel_offset = c_end;
+ }
+
+ DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n",
+ c2 - c, min_accel_offset - c2, c_end - c2);
+
+ c = c2;
+ s &= STATE_MASK;
+ goto without_accel;
+ }
+
+ s &= STATE_MASK;
+ }
+
+ if (mode == STOP_AT_MATCH) {
+ *c_final = c_end;
+ }
+ *state = s;
+
+ return MO_CONTINUE_MATCHING;
+}
+
+static really_inline
+char goughExec8_i(const struct mcclellan *m, struct gough_som_info *som,
+ u8 *state, const u8 *buf, size_t len, u64a offAdj,
NfaCallback cb, void *ctxt, const u8 **c_final,
- enum MatchMode mode) {
- u8 s = *state;
- const u8 *c = buf, *c_end = buf + len;
- const u8 *succ_table = (const u8 *)((const char *)m
- + sizeof(struct mcclellan));
- const u32 as = m->alphaShift;
- const struct mstate_aux *aux;
-
- const struct NFA *nfa
- = (const struct NFA *)((const char *)m - sizeof(struct NFA));
- aux = (const struct mstate_aux *)((const char *)nfa + m->aux_offset);
-
- const u32 *edge_prog_table = (const u32 *)(get_gough(m) + 1);
-
- u16 accel_limit = m->accel_limit_8;
- u16 accept_limit = m->accept_limit_8;
-
- u32 cached_accept_id = 0;
- u16 cached_accept_state = 0;
- u32 cached_accept_som = 0;
-
- DEBUG_PRINTF("accel %hu, accept %hu\n", accel_limit, accept_limit);
-
- DEBUG_PRINTF("s: %hhu, len %zu\n", s, len);
-
- const u8 *min_accel_offset = c;
- if (!m->has_accel || len < ACCEL_MIN_LEN) {
- min_accel_offset = c_end;
- goto without_accel;
- }
-
- goto with_accel;
-
-without_accel:
- while (c < min_accel_offset && s) {
- u8 cprime = m->remap[*(c++)];
- DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *(c-1),
- ourisprint(*(c-1)) ? *(c-1) : '?', cprime);
-
- u32 edge_num = ((u32)s << as) + cprime;
-
- run_prog(nfa, edge_prog_table, buf, offAdj, c, edge_num, som);
-
- s = succ_table[edge_num];
- DEBUG_PRINTF("s: %hhu\n", s);
-
- if (mode != NO_MATCHES && s >= accept_limit) {
- if (mode == STOP_AT_MATCH) {
- DEBUG_PRINTF("match - pausing\n");
- *state = s;
- *c_final = c - 1;
- return MO_CONTINUE_MATCHING;
- }
-
- u64a loc = (c - 1) - buf + offAdj + 1;
- if (doReports(cb, ctxt, m, som, s, loc, 0,
- &cached_accept_state, &cached_accept_id,
- &cached_accept_som)
- == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
- }
-
-with_accel:
- while (c < c_end && s) {
- u8 cprime = m->remap[*(c++)];
- DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *(c-1),
- ourisprint(*(c-1)) ? *(c-1) : '?', cprime);
-
- u32 edge_num = ((u32)s << as) + cprime;
-
- run_prog(nfa, edge_prog_table, buf, offAdj, c, edge_num, som);
-
- s = succ_table[edge_num];
- DEBUG_PRINTF("s: %hhu\n", s);
-
- if (s >= accel_limit) { /* accept_limit >= accel_limit */
- if (mode != NO_MATCHES && s >= accept_limit) {
- if (mode == STOP_AT_MATCH) {
- DEBUG_PRINTF("match - pausing\n");
- *state = s;
- *c_final = c - 1;
- return MO_CONTINUE_MATCHING;
- }
-
- u64a loc = (c - 1) - buf + offAdj + 1;
- if (doReports(cb, ctxt, m, som, s, loc, 0,
- &cached_accept_state, &cached_accept_id,
- &cached_accept_som)
- == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- } else if (aux[s].accel_offset) {
- DEBUG_PRINTF("skipping\n");
-
- const struct gough_accel *gacc
- = (const void *)((const char *)m + aux[s].accel_offset);
- const u8 *c2 = run_accel(&gacc->accel, c, c_end);
-
- if (c2 != c && gacc->prog_offset) {
- run_accel_prog(nfa, gacc, buf, offAdj, c, c2, som);
- }
-
- if (c2 < min_accel_offset + BAD_ACCEL_DIST) {
- min_accel_offset = c2 + BIG_ACCEL_PENALTY;
- } else {
- min_accel_offset = c2 + SMALL_ACCEL_PENALTY;
- }
-
- if (min_accel_offset >= c_end - ACCEL_MIN_LEN) {
- min_accel_offset = c_end;
- }
-
- DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n",
- c2 - c, min_accel_offset - c2, c_end - c2);
-
- c = c2;
- goto without_accel;
- }
- }
- }
-
- *state = s;
- if (mode == STOP_AT_MATCH) {
- *c_final = c_end;
- }
- return MO_CONTINUE_MATCHING;
-}
-
-static never_inline
-char goughExec8_i_ni(const struct mcclellan *m, struct gough_som_info *som,
- u8 *state, const u8 *buf, size_t len, u64a offAdj,
+ enum MatchMode mode) {
+ u8 s = *state;
+ const u8 *c = buf, *c_end = buf + len;
+ const u8 *succ_table = (const u8 *)((const char *)m
+ + sizeof(struct mcclellan));
+ const u32 as = m->alphaShift;
+ const struct mstate_aux *aux;
+
+ const struct NFA *nfa
+ = (const struct NFA *)((const char *)m - sizeof(struct NFA));
+ aux = (const struct mstate_aux *)((const char *)nfa + m->aux_offset);
+
+ const u32 *edge_prog_table = (const u32 *)(get_gough(m) + 1);
+
+ u16 accel_limit = m->accel_limit_8;
+ u16 accept_limit = m->accept_limit_8;
+
+ u32 cached_accept_id = 0;
+ u16 cached_accept_state = 0;
+ u32 cached_accept_som = 0;
+
+ DEBUG_PRINTF("accel %hu, accept %hu\n", accel_limit, accept_limit);
+
+ DEBUG_PRINTF("s: %hhu, len %zu\n", s, len);
+
+ const u8 *min_accel_offset = c;
+ if (!m->has_accel || len < ACCEL_MIN_LEN) {
+ min_accel_offset = c_end;
+ goto without_accel;
+ }
+
+ goto with_accel;
+
+without_accel:
+ while (c < min_accel_offset && s) {
+ u8 cprime = m->remap[*(c++)];
+ DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *(c-1),
+ ourisprint(*(c-1)) ? *(c-1) : '?', cprime);
+
+ u32 edge_num = ((u32)s << as) + cprime;
+
+ run_prog(nfa, edge_prog_table, buf, offAdj, c, edge_num, som);
+
+ s = succ_table[edge_num];
+ DEBUG_PRINTF("s: %hhu\n", s);
+
+ if (mode != NO_MATCHES && s >= accept_limit) {
+ if (mode == STOP_AT_MATCH) {
+ DEBUG_PRINTF("match - pausing\n");
+ *state = s;
+ *c_final = c - 1;
+ return MO_CONTINUE_MATCHING;
+ }
+
+ u64a loc = (c - 1) - buf + offAdj + 1;
+ if (doReports(cb, ctxt, m, som, s, loc, 0,
+ &cached_accept_state, &cached_accept_id,
+ &cached_accept_som)
+ == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+ }
+
+with_accel:
+ while (c < c_end && s) {
+ u8 cprime = m->remap[*(c++)];
+ DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *(c-1),
+ ourisprint(*(c-1)) ? *(c-1) : '?', cprime);
+
+ u32 edge_num = ((u32)s << as) + cprime;
+
+ run_prog(nfa, edge_prog_table, buf, offAdj, c, edge_num, som);
+
+ s = succ_table[edge_num];
+ DEBUG_PRINTF("s: %hhu\n", s);
+
+ if (s >= accel_limit) { /* accept_limit >= accel_limit */
+ if (mode != NO_MATCHES && s >= accept_limit) {
+ if (mode == STOP_AT_MATCH) {
+ DEBUG_PRINTF("match - pausing\n");
+ *state = s;
+ *c_final = c - 1;
+ return MO_CONTINUE_MATCHING;
+ }
+
+ u64a loc = (c - 1) - buf + offAdj + 1;
+ if (doReports(cb, ctxt, m, som, s, loc, 0,
+ &cached_accept_state, &cached_accept_id,
+ &cached_accept_som)
+ == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ } else if (aux[s].accel_offset) {
+ DEBUG_PRINTF("skipping\n");
+
+ const struct gough_accel *gacc
+ = (const void *)((const char *)m + aux[s].accel_offset);
+ const u8 *c2 = run_accel(&gacc->accel, c, c_end);
+
+ if (c2 != c && gacc->prog_offset) {
+ run_accel_prog(nfa, gacc, buf, offAdj, c, c2, som);
+ }
+
+ if (c2 < min_accel_offset + BAD_ACCEL_DIST) {
+ min_accel_offset = c2 + BIG_ACCEL_PENALTY;
+ } else {
+ min_accel_offset = c2 + SMALL_ACCEL_PENALTY;
+ }
+
+ if (min_accel_offset >= c_end - ACCEL_MIN_LEN) {
+ min_accel_offset = c_end;
+ }
+
+ DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n",
+ c2 - c, min_accel_offset - c2, c_end - c2);
+
+ c = c2;
+ goto without_accel;
+ }
+ }
+ }
+
+ *state = s;
+ if (mode == STOP_AT_MATCH) {
+ *c_final = c_end;
+ }
+ return MO_CONTINUE_MATCHING;
+}
+
+static never_inline
+char goughExec8_i_ni(const struct mcclellan *m, struct gough_som_info *som,
+ u8 *state, const u8 *buf, size_t len, u64a offAdj,
NfaCallback cb, void *ctxt, const u8 **final_point,
- enum MatchMode mode) {
- return goughExec8_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point,
- mode);
-}
-
-static never_inline
-char goughExec16_i_ni(const struct mcclellan *m, struct gough_som_info *som,
- u16 *state, const u8 *buf, size_t len, u64a offAdj,
+ enum MatchMode mode) {
+ return goughExec8_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point,
+ mode);
+}
+
+static never_inline
+char goughExec16_i_ni(const struct mcclellan *m, struct gough_som_info *som,
+ u16 *state, const u8 *buf, size_t len, u64a offAdj,
NfaCallback cb, void *ctxt, const u8 **final_point,
- enum MatchMode mode) {
- return goughExec16_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point,
- mode);
-}
-
-static really_inline
-struct gough_som_info *getSomInfo(char *state_base) {
- return (struct gough_som_info *)(state_base + 16);
-}
-
-static really_inline
-const struct gough_som_info *getSomInfoConst(const char *state_base) {
- return (const struct gough_som_info *)(state_base + 16);
-}
-
-static really_inline
-char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
+ enum MatchMode mode) {
+ return goughExec16_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point,
+ mode);
+}
+
+static really_inline
+struct gough_som_info *getSomInfo(char *state_base) {
+ return (struct gough_som_info *)(state_base + 16);
+}
+
+static really_inline
+const struct gough_som_info *getSomInfoConst(const char *state_base) {
+ return (const struct gough_som_info *)(state_base + 16);
+}
+
+static really_inline
+char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
const u8 *hend, NfaCallback cb, void *context,
- struct mq *q, s64a end, enum MatchMode mode) {
- DEBUG_PRINTF("enter\n");
- struct gough_som_info *som = getSomInfo(q->state);
- assert(n->type == GOUGH_NFA_8);
- const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
- s64a sp;
- u8 s = *(u8 *)q->state;
-
- if (q->report_current) {
- assert(s);
- assert(s >= m->accept_limit_8);
-
- u32 cached_accept_id = 0;
- u16 cached_accept_state = 0;
- u32 cached_accept_som = 0;
-
- int rv = doReports(cb, context, m, som, s, q_cur_offset(q), 0,
- &cached_accept_state, &cached_accept_id,
- &cached_accept_som);
-
- q->report_current = 0;
-
- if (rv == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
-
- sp = q_cur_loc(q);
- q->cur++;
-
- const u8 *cur_buf = sp < 0 ? hend : buffer;
-
- if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
- /* this is as far as we go */
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = end;
- *(u8 *)q->state = s;
- return MO_ALIVE;
- }
-
- while (1) {
- DEBUG_PRINTF("%s @ %llu [som %llu]\n",
- q->items[q->cur].type == MQE_TOP ? "TOP" :
- q->items[q->cur].type == MQE_END ? "END" : "???",
- q->items[q->cur].location + offset, q->items[q->cur].som);
- assert(q->cur < q->end);
- s64a ep = q->items[q->cur].location;
- if (mode != NO_MATCHES) {
- ep = MIN(ep, end);
- }
-
- assert(ep >= sp);
- DEBUG_PRINTF("run to %lld from %lld\n", ep, sp);
-
- s64a local_ep = ep;
- if (sp < 0) {
- local_ep = MIN(0, ep);
- }
-
- const u8 *final_look;
- if (goughExec8_i_ni(m, som, &s, cur_buf + sp, local_ep - sp,
+ struct mq *q, s64a end, enum MatchMode mode) {
+ DEBUG_PRINTF("enter\n");
+ struct gough_som_info *som = getSomInfo(q->state);
+ assert(n->type == GOUGH_NFA_8);
+ const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
+ s64a sp;
+ u8 s = *(u8 *)q->state;
+
+ if (q->report_current) {
+ assert(s);
+ assert(s >= m->accept_limit_8);
+
+ u32 cached_accept_id = 0;
+ u16 cached_accept_state = 0;
+ u32 cached_accept_som = 0;
+
+ int rv = doReports(cb, context, m, som, s, q_cur_offset(q), 0,
+ &cached_accept_state, &cached_accept_id,
+ &cached_accept_som);
+
+ q->report_current = 0;
+
+ if (rv == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+
+ sp = q_cur_loc(q);
+ q->cur++;
+
+ const u8 *cur_buf = sp < 0 ? hend : buffer;
+
+ if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
+ /* this is as far as we go */
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ *(u8 *)q->state = s;
+ return MO_ALIVE;
+ }
+
+ while (1) {
+ DEBUG_PRINTF("%s @ %llu [som %llu]\n",
+ q->items[q->cur].type == MQE_TOP ? "TOP" :
+ q->items[q->cur].type == MQE_END ? "END" : "???",
+ q->items[q->cur].location + offset, q->items[q->cur].som);
+ assert(q->cur < q->end);
+ s64a ep = q->items[q->cur].location;
+ if (mode != NO_MATCHES) {
+ ep = MIN(ep, end);
+ }
+
+ assert(ep >= sp);
+ DEBUG_PRINTF("run to %lld from %lld\n", ep, sp);
+
+ s64a local_ep = ep;
+ if (sp < 0) {
+ local_ep = MIN(0, ep);
+ }
+
+ const u8 *final_look;
+ if (goughExec8_i_ni(m, som, &s, cur_buf + sp, local_ep - sp,
offset + sp, cb, context, &final_look, mode)
- == MO_HALT_MATCHING) {
- *(u8 *)q->state = 0;
- return 0;
- }
- if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) {
- /* found a match */
- DEBUG_PRINTF("found a match\n");
- assert(q->cur);
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = final_look - cur_buf + 1; /* due to
- * early -1 */
- *(u8 *)q->state = s;
- return MO_MATCHES_PENDING;
- }
-
- assert(q->cur);
- if (mode != NO_MATCHES && q->items[q->cur].location > end) {
- /* this is as far as we go */
- assert(q->cur);
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = end;
- *(u8 *)q->state = s;
- return MO_ALIVE;
- }
-
- sp = local_ep;
-
- if (sp == 0) {
- cur_buf = buffer;
- }
-
- if (sp != ep) {
- continue;
- }
-
- switch (q->items[q->cur].type) {
- case MQE_TOP:
- assert(!s || sp + offset > 0);
- if (sp + offset == 0) {
- s = (u8)m->start_anchored;
- break;
- }
- s = goughEnableStarts(m, s, q->items[q->cur].som, som);
- break;
- case MQE_END:
- *(u8 *)q->state = s;
- q->cur++;
- return s ? MO_ALIVE : 0;
- default:
- assert(!"invalid queue event");
- }
-
- q->cur++;
- }
-}
-
-
-static really_inline
-char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
+ == MO_HALT_MATCHING) {
+ *(u8 *)q->state = 0;
+ return 0;
+ }
+ if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) {
+ /* found a match */
+ DEBUG_PRINTF("found a match\n");
+ assert(q->cur);
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = final_look - cur_buf + 1; /* due to
+ * early -1 */
+ *(u8 *)q->state = s;
+ return MO_MATCHES_PENDING;
+ }
+
+ assert(q->cur);
+ if (mode != NO_MATCHES && q->items[q->cur].location > end) {
+ /* this is as far as we go */
+ assert(q->cur);
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ *(u8 *)q->state = s;
+ return MO_ALIVE;
+ }
+
+ sp = local_ep;
+
+ if (sp == 0) {
+ cur_buf = buffer;
+ }
+
+ if (sp != ep) {
+ continue;
+ }
+
+ switch (q->items[q->cur].type) {
+ case MQE_TOP:
+ assert(!s || sp + offset > 0);
+ if (sp + offset == 0) {
+ s = (u8)m->start_anchored;
+ break;
+ }
+ s = goughEnableStarts(m, s, q->items[q->cur].som, som);
+ break;
+ case MQE_END:
+ *(u8 *)q->state = s;
+ q->cur++;
+ return s ? MO_ALIVE : 0;
+ default:
+ assert(!"invalid queue event");
+ }
+
+ q->cur++;
+ }
+}
+
+
+static really_inline
+char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
const u8 *hend, NfaCallback cb, void *context,
- struct mq *q, s64a end, enum MatchMode mode) {
- struct gough_som_info *som = getSomInfo(q->state);
- assert(n->type == GOUGH_NFA_16);
- const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
- s64a sp;
-
- assert(ISALIGNED_N(q->state, 2));
- u16 s = *(u16 *)q->state;
-
- if (q->report_current) {
- assert(s);
- assert(get_aux(m, s)->accept);
-
- u32 cached_accept_id = 0;
- u16 cached_accept_state = 0;
- u32 cached_accept_som = 0;
-
- int rv = doReports(cb, context, m, som, s, q_cur_offset(q), 0,
- &cached_accept_state, &cached_accept_id,
- &cached_accept_som);
-
- q->report_current = 0;
-
- if (rv == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
-
- sp = q_cur_loc(q);
- q->cur++;
-
- const u8 *cur_buf = sp < 0 ? hend : buffer;
-
- assert(q->cur);
- if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
- /* this is as far as we go */
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = end;
- *(u16 *)q->state = s;
- return MO_ALIVE;
- }
-
- while (1) {
- assert(q->cur < q->end);
- s64a ep = q->items[q->cur].location;
- if (mode != NO_MATCHES) {
- ep = MIN(ep, end);
- }
-
- assert(ep >= sp);
-
- s64a local_ep = ep;
- if (sp < 0) {
- local_ep = MIN(0, ep);
- }
-
- /* do main buffer region */
- const u8 *final_look;
- if (goughExec16_i_ni(m, som, &s, cur_buf + sp, local_ep - sp,
+ struct mq *q, s64a end, enum MatchMode mode) {
+ struct gough_som_info *som = getSomInfo(q->state);
+ assert(n->type == GOUGH_NFA_16);
+ const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
+ s64a sp;
+
+ assert(ISALIGNED_N(q->state, 2));
+ u16 s = *(u16 *)q->state;
+
+ if (q->report_current) {
+ assert(s);
+ assert(get_aux(m, s)->accept);
+
+ u32 cached_accept_id = 0;
+ u16 cached_accept_state = 0;
+ u32 cached_accept_som = 0;
+
+ int rv = doReports(cb, context, m, som, s, q_cur_offset(q), 0,
+ &cached_accept_state, &cached_accept_id,
+ &cached_accept_som);
+
+ q->report_current = 0;
+
+ if (rv == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+
+ sp = q_cur_loc(q);
+ q->cur++;
+
+ const u8 *cur_buf = sp < 0 ? hend : buffer;
+
+ assert(q->cur);
+ if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
+ /* this is as far as we go */
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ *(u16 *)q->state = s;
+ return MO_ALIVE;
+ }
+
+ while (1) {
+ assert(q->cur < q->end);
+ s64a ep = q->items[q->cur].location;
+ if (mode != NO_MATCHES) {
+ ep = MIN(ep, end);
+ }
+
+ assert(ep >= sp);
+
+ s64a local_ep = ep;
+ if (sp < 0) {
+ local_ep = MIN(0, ep);
+ }
+
+ /* do main buffer region */
+ const u8 *final_look;
+ if (goughExec16_i_ni(m, som, &s, cur_buf + sp, local_ep - sp,
offset + sp, cb, context, &final_look, mode)
- == MO_HALT_MATCHING) {
- *(u16 *)q->state = 0;
- return 0;
- }
- if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) {
- /* this is as far as we go */
- assert(q->cur);
- DEBUG_PRINTF("state %hu final_look %zd\n", s,
- final_look - cur_buf);
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = final_look - cur_buf + 1; /* due to
- * early -1 */
- *(u16 *)q->state = s;
- return MO_MATCHES_PENDING;
- }
-
- assert(q->cur);
- if (mode != NO_MATCHES && q->items[q->cur].location > end) {
- /* this is as far as we go */
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = end;
- *(u16 *)q->state = s;
- return MO_ALIVE;
- }
-
- sp = local_ep;
-
- if (sp == 0) {
- cur_buf = buffer;
- }
-
- if (sp != ep) {
- continue;
- }
-
- switch (q->items[q->cur].type) {
- case MQE_TOP:
- assert(!s || sp + offset > 0);
- if (sp + offset == 0) {
- s = m->start_anchored;
- break;
- }
- s = goughEnableStarts(m, s, q->items[q->cur].som, som);
- break;
- case MQE_END:
- *(u16 *)q->state = s;
- q->cur++;
- return s ? MO_ALIVE : 0;
- default:
- assert(!"invalid queue event");
- }
-
- q->cur++;
- }
-}
-
-char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
+ == MO_HALT_MATCHING) {
+ *(u16 *)q->state = 0;
+ return 0;
+ }
+ if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) {
+ /* this is as far as we go */
+ assert(q->cur);
+ DEBUG_PRINTF("state %hu final_look %zd\n", s,
+ final_look - cur_buf);
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = final_look - cur_buf + 1; /* due to
+ * early -1 */
+ *(u16 *)q->state = s;
+ return MO_MATCHES_PENDING;
+ }
+
+ assert(q->cur);
+ if (mode != NO_MATCHES && q->items[q->cur].location > end) {
+ /* this is as far as we go */
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ *(u16 *)q->state = s;
+ return MO_ALIVE;
+ }
+
+ sp = local_ep;
+
+ if (sp == 0) {
+ cur_buf = buffer;
+ }
+
+ if (sp != ep) {
+ continue;
+ }
+
+ switch (q->items[q->cur].type) {
+ case MQE_TOP:
+ assert(!s || sp + offset > 0);
+ if (sp + offset == 0) {
+ s = m->start_anchored;
+ break;
+ }
+ s = goughEnableStarts(m, s, q->items[q->cur].som, som);
+ break;
+ case MQE_END:
+ *(u16 *)q->state = s;
+ q->cur++;
+ return s ? MO_ALIVE : 0;
+ default:
+ assert(!"invalid queue event");
+ }
+
+ q->cur++;
+ }
+}
+
+char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
NfaCallback cb = q->cb;
- void *context = q->context;
- assert(n->type == GOUGH_NFA_8);
- const u8 *hend = q->history + q->hlength;
-
- return nfaExecGough8_Q2i(n, offset, buffer, hend, cb, context, q, end,
- CALLBACK_OUTPUT);
-}
-
-char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
+ void *context = q->context;
+ assert(n->type == GOUGH_NFA_8);
+ const u8 *hend = q->history + q->hlength;
+
+ return nfaExecGough8_Q2i(n, offset, buffer, hend, cb, context, q, end,
+ CALLBACK_OUTPUT);
+}
+
+char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
NfaCallback cb = q->cb;
- void *context = q->context;
- assert(n->type == GOUGH_NFA_16);
- const u8 *hend = q->history + q->hlength;
-
- return nfaExecGough16_Q2i(n, offset, buffer, hend, cb, context, q, end,
- CALLBACK_OUTPUT);
-}
-
-char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
+ void *context = q->context;
+ assert(n->type == GOUGH_NFA_16);
+ const u8 *hend = q->history + q->hlength;
+
+ return nfaExecGough16_Q2i(n, offset, buffer, hend, cb, context, q, end,
+ CALLBACK_OUTPUT);
+}
+
+char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
NfaCallback cb = q->cb;
- void *context = q->context;
- assert(n->type == GOUGH_NFA_8);
- const u8 *hend = q->history + q->hlength;
-
- return nfaExecGough8_Q2i(n, offset, buffer, hend, cb, context, q, end,
- STOP_AT_MATCH);
-}
-
-char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
+ void *context = q->context;
+ assert(n->type == GOUGH_NFA_8);
+ const u8 *hend = q->history + q->hlength;
+
+ return nfaExecGough8_Q2i(n, offset, buffer, hend, cb, context, q, end,
+ STOP_AT_MATCH);
+}
+
+char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
NfaCallback cb = q->cb;
- void *context = q->context;
- assert(n->type == GOUGH_NFA_16);
- const u8 *hend = q->history + q->hlength;
-
- return nfaExecGough16_Q2i(n, offset, buffer, hend, cb, context, q, end,
- STOP_AT_MATCH);
-}
-
-char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
+ void *context = q->context;
+ assert(n->type == GOUGH_NFA_16);
+ const u8 *hend = q->history + q->hlength;
+
+ return nfaExecGough16_Q2i(n, offset, buffer, hend, cb, context, q, end,
+ STOP_AT_MATCH);
+}
+
+char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
NfaCallback cb = q->cb;
- void *context = q->context;
- assert(n->type == GOUGH_NFA_8);
- const u8 *hend = q->history + q->hlength;
-
- char rv = nfaExecGough8_Q2i(n, offset, buffer, hend, cb, context, q,
- 0 /* end */, NO_MATCHES);
- if (rv && nfaExecMcClellan8_inAccept(n, report, q)) {
- return MO_MATCHES_PENDING;
- } else {
- return rv;
- }
-}
-
-char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
+ void *context = q->context;
+ assert(n->type == GOUGH_NFA_8);
+ const u8 *hend = q->history + q->hlength;
+
+ char rv = nfaExecGough8_Q2i(n, offset, buffer, hend, cb, context, q,
+ 0 /* end */, NO_MATCHES);
+ if (rv && nfaExecMcClellan8_inAccept(n, report, q)) {
+ return MO_MATCHES_PENDING;
+ } else {
+ return rv;
+ }
+}
+
+char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
NfaCallback cb = q->cb;
- void *context = q->context;
- assert(n->type == GOUGH_NFA_16);
- const u8 *hend = q->history + q->hlength;
-
- char rv = nfaExecGough16_Q2i(n, offset, buffer, hend, cb, context, q,
- 0 /* end */, NO_MATCHES);
-
- if (rv && nfaExecMcClellan16_inAccept(n, report, q)) {
- return MO_MATCHES_PENDING;
- } else {
- return rv;
- }
-}
-
-char nfaExecGough8_initCompressedState(const struct NFA *nfa, u64a offset,
- void *state, UNUSED u8 key) {
- const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
- memset(state, 0, nfa->streamStateSize);
- u8 s = offset ? m->start_floating : m->start_anchored;
- if (s) {
- *(u8 *)state = s;
- return 1;
- }
- return 0;
-}
-
-char nfaExecGough16_initCompressedState(const struct NFA *nfa, u64a offset,
- void *state, UNUSED u8 key) {
- const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
- memset(state, 0, nfa->streamStateSize);
- u16 s = offset ? m->start_floating : m->start_anchored;
- if (s) {
- unaligned_store_u16(state, s);
- return 1;
- }
- return 0;
-}
-
-
-char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) {
- const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
+ void *context = q->context;
+ assert(n->type == GOUGH_NFA_16);
+ const u8 *hend = q->history + q->hlength;
+
+ char rv = nfaExecGough16_Q2i(n, offset, buffer, hend, cb, context, q,
+ 0 /* end */, NO_MATCHES);
+
+ if (rv && nfaExecMcClellan16_inAccept(n, report, q)) {
+ return MO_MATCHES_PENDING;
+ } else {
+ return rv;
+ }
+}
+
+char nfaExecGough8_initCompressedState(const struct NFA *nfa, u64a offset,
+ void *state, UNUSED u8 key) {
+ const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
+ memset(state, 0, nfa->streamStateSize);
+ u8 s = offset ? m->start_floating : m->start_anchored;
+ if (s) {
+ *(u8 *)state = s;
+ return 1;
+ }
+ return 0;
+}
+
+char nfaExecGough16_initCompressedState(const struct NFA *nfa, u64a offset,
+ void *state, UNUSED u8 key) {
+ const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
+ memset(state, 0, nfa->streamStateSize);
+ u16 s = offset ? m->start_floating : m->start_anchored;
+ if (s) {
+ unaligned_store_u16(state, s);
+ return 1;
+ }
+ return 0;
+}
+
+
+char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) {
+ const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
NfaCallback cb = q->cb;
- void *ctxt = q->context;
- u8 s = *(u8 *)q->state;
- u64a offset = q_cur_offset(q);
- struct gough_som_info *som = getSomInfo(q->state);
- assert(q_cur_type(q) == MQE_START);
- assert(s);
-
- if (s >= m->accept_limit_8) {
- u32 cached_accept_id = 0;
- u16 cached_accept_state = 0;
- u32 cached_accept_som = 0;
-
- doReports(cb, ctxt, m, som, s, offset, 0, &cached_accept_state,
- &cached_accept_id, &cached_accept_som);
- }
-
- return 0;
-}
-
-char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q) {
- const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
+ void *ctxt = q->context;
+ u8 s = *(u8 *)q->state;
+ u64a offset = q_cur_offset(q);
+ struct gough_som_info *som = getSomInfo(q->state);
+ assert(q_cur_type(q) == MQE_START);
+ assert(s);
+
+ if (s >= m->accept_limit_8) {
+ u32 cached_accept_id = 0;
+ u16 cached_accept_state = 0;
+ u32 cached_accept_som = 0;
+
+ doReports(cb, ctxt, m, som, s, offset, 0, &cached_accept_state,
+ &cached_accept_id, &cached_accept_som);
+ }
+
+ return 0;
+}
+
+char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q) {
+ const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
NfaCallback cb = q->cb;
- void *ctxt = q->context;
- u16 s = *(u16 *)q->state;
- const struct mstate_aux *aux = get_aux(m, s);
- u64a offset = q_cur_offset(q);
- struct gough_som_info *som = getSomInfo(q->state);
- assert(q_cur_type(q) == MQE_START);
- DEBUG_PRINTF("state %hu\n", s);
- assert(s);
-
- if (aux->accept) {
- u32 cached_accept_id = 0;
- u16 cached_accept_state = 0;
- u32 cached_accept_som = 0;
-
- doReports(cb, ctxt, m, som, s, offset, 0, &cached_accept_state,
- &cached_accept_id, &cached_accept_som);
- }
-
- return 0;
-}
-
-char nfaExecGough8_inAccept(const struct NFA *n, ReportID report,
- struct mq *q) {
- return nfaExecMcClellan8_inAccept(n, report, q);
-}
-
-char nfaExecGough16_inAccept(const struct NFA *n, ReportID report,
- struct mq *q) {
- return nfaExecMcClellan16_inAccept(n, report, q);
-}
-
+ void *ctxt = q->context;
+ u16 s = *(u16 *)q->state;
+ const struct mstate_aux *aux = get_aux(m, s);
+ u64a offset = q_cur_offset(q);
+ struct gough_som_info *som = getSomInfo(q->state);
+ assert(q_cur_type(q) == MQE_START);
+ DEBUG_PRINTF("state %hu\n", s);
+ assert(s);
+
+ if (aux->accept) {
+ u32 cached_accept_id = 0;
+ u16 cached_accept_state = 0;
+ u32 cached_accept_som = 0;
+
+ doReports(cb, ctxt, m, som, s, offset, 0, &cached_accept_state,
+ &cached_accept_id, &cached_accept_som);
+ }
+
+ return 0;
+}
+
+char nfaExecGough8_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q) {
+ return nfaExecMcClellan8_inAccept(n, report, q);
+}
+
+char nfaExecGough16_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q) {
+ return nfaExecMcClellan16_inAccept(n, report, q);
+}
+
char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q) {
return nfaExecMcClellan8_inAnyAccept(n, q);
}
@@ -1039,105 +1039,105 @@ char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q) {
return nfaExecMcClellan16_inAnyAccept(n, q);
}
-static
+static
char goughCheckEOD(const struct NFA *nfa, u16 s,
- const struct gough_som_info *som,
+ const struct gough_som_info *som,
u64a offset, NfaCallback cb, void *ctxt) {
- const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
- const struct mstate_aux *aux = get_aux(m, s);
-
+ const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
+ const struct mstate_aux *aux = get_aux(m, s);
+
if (!aux->accept_eod) {
return MO_CONTINUE_MATCHING;
- }
+ }
return doReports(cb, ctxt, m, som, s, offset, 1, NULL, NULL, NULL);
-}
-
-char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state,
+}
+
+char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state,
UNUSED const char *streamState, u64a offset,
NfaCallback callback, void *context) {
- const struct gough_som_info *som = getSomInfoConst(state);
+ const struct gough_som_info *som = getSomInfoConst(state);
return goughCheckEOD(nfa, *(const u8 *)state, som, offset, callback,
context);
-}
-
-char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state,
+}
+
+char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state,
UNUSED const char *streamState, u64a offset,
NfaCallback callback, void *context) {
- assert(ISALIGNED_N(state, 8));
- const struct gough_som_info *som = getSomInfoConst(state);
+ assert(ISALIGNED_N(state, 8));
+ const struct gough_som_info *som = getSomInfoConst(state);
return goughCheckEOD(nfa, *(const u16 *)state, som, offset, callback,
context);
-}
-
-char nfaExecGough8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) {
- memset(q->state, 0, nfa->scratchStateSize);
- return 0;
-}
-
-char nfaExecGough16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) {
- memset(q->state, 0, nfa->scratchStateSize);
- assert(ISALIGNED_N(q->state, 2));
- return 0;
-}
-
-static really_inline
-void compSomSpace(const struct NFA *nfa, u8 *dest_som_base,
- const struct gough_som_info *src, u64a curr_offset) {
- const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
- const struct gough_info *gi = get_gough(m);
- u32 count = gi->stream_som_loc_count;
- u32 width = gi->stream_som_loc_width;
-
- for (u32 i = 0; i < count; i++) {
- compressSomValue(width, curr_offset, dest_som_base, i, src->slots[i]);
- }
-}
-
-static really_inline
-void expandSomSpace(const struct NFA *nfa, struct gough_som_info *som,
- const u8 *src_som_base, u64a curr_offset) {
- const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
- const struct gough_info *gi = get_gough(m);
- u32 count = gi->stream_som_loc_count;
- u32 width = gi->stream_som_loc_width;
-
- for (u32 i = 0; i < count; i++) {
- som->slots[i] = expandSomValue(width, curr_offset, src_som_base, i);
- }
-}
-
-char nfaExecGough8_queueCompressState(const struct NFA *nfa, const struct mq *q,
- s64a loc) {
- void *dest = q->streamState;
- const void *src = q->state;
-
- *(u8 *)dest = *(const u8 *)src;
- compSomSpace(nfa, (u8 *)dest + 1, getSomInfoConst(src), q->offset + loc);
- return 0;
-}
-
-char nfaExecGough8_expandState(const struct NFA *nfa, void *dest,
- const void *src, u64a offset, UNUSED u8 key) {
- *(u8 *)dest = *(const u8 *)src;
- expandSomSpace(nfa, getSomInfo(dest), (const u8 *)src + 1, offset);
- return 0;
-}
-
-char nfaExecGough16_queueCompressState(const struct NFA *nfa,
- const struct mq *q, s64a loc) {
- void *dest = q->streamState;
- const void *src = q->state;
-
- assert(ISALIGNED_N(src, 2));
- unaligned_store_u16(dest, *(const u16 *)(src));
- compSomSpace(nfa, (u8 *)dest + 2, getSomInfoConst(src), q->offset + loc);
- return 0;
-}
-
-char nfaExecGough16_expandState(const struct NFA *nfa, void *dest,
- const void *src, u64a offset, UNUSED u8 key) {
- assert(ISALIGNED_N(dest, 2));
- *(u16 *)dest = unaligned_load_u16(src);
- expandSomSpace(nfa, getSomInfo(dest), (const u8 *)src + 2, offset);
- return 0;
-}
+}
+
+char nfaExecGough8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) {
+ memset(q->state, 0, nfa->scratchStateSize);
+ return 0;
+}
+
+char nfaExecGough16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) {
+ memset(q->state, 0, nfa->scratchStateSize);
+ assert(ISALIGNED_N(q->state, 2));
+ return 0;
+}
+
+static really_inline
+void compSomSpace(const struct NFA *nfa, u8 *dest_som_base,
+ const struct gough_som_info *src, u64a curr_offset) {
+ const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
+ const struct gough_info *gi = get_gough(m);
+ u32 count = gi->stream_som_loc_count;
+ u32 width = gi->stream_som_loc_width;
+
+ for (u32 i = 0; i < count; i++) {
+ compressSomValue(width, curr_offset, dest_som_base, i, src->slots[i]);
+ }
+}
+
+static really_inline
+void expandSomSpace(const struct NFA *nfa, struct gough_som_info *som,
+ const u8 *src_som_base, u64a curr_offset) {
+ const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
+ const struct gough_info *gi = get_gough(m);
+ u32 count = gi->stream_som_loc_count;
+ u32 width = gi->stream_som_loc_width;
+
+ for (u32 i = 0; i < count; i++) {
+ som->slots[i] = expandSomValue(width, curr_offset, src_som_base, i);
+ }
+}
+
+char nfaExecGough8_queueCompressState(const struct NFA *nfa, const struct mq *q,
+ s64a loc) {
+ void *dest = q->streamState;
+ const void *src = q->state;
+
+ *(u8 *)dest = *(const u8 *)src;
+ compSomSpace(nfa, (u8 *)dest + 1, getSomInfoConst(src), q->offset + loc);
+ return 0;
+}
+
+char nfaExecGough8_expandState(const struct NFA *nfa, void *dest,
+ const void *src, u64a offset, UNUSED u8 key) {
+ *(u8 *)dest = *(const u8 *)src;
+ expandSomSpace(nfa, getSomInfo(dest), (const u8 *)src + 1, offset);
+ return 0;
+}
+
+char nfaExecGough16_queueCompressState(const struct NFA *nfa,
+ const struct mq *q, s64a loc) {
+ void *dest = q->streamState;
+ const void *src = q->state;
+
+ assert(ISALIGNED_N(src, 2));
+ unaligned_store_u16(dest, *(const u16 *)(src));
+ compSomSpace(nfa, (u8 *)dest + 2, getSomInfoConst(src), q->offset + loc);
+ return 0;
+}
+
+char nfaExecGough16_expandState(const struct NFA *nfa, void *dest,
+ const void *src, u64a offset, UNUSED u8 key) {
+ assert(ISALIGNED_N(dest, 2));
+ *(u16 *)dest = unaligned_load_u16(src);
+ expandSomSpace(nfa, getSomInfo(dest), (const u8 *)src + 2, offset);
+ return 0;
+}
diff --git a/contrib/libs/hyperscan/src/nfa/gough.h b/contrib/libs/hyperscan/src/nfa/gough.h
index 9f32818ef8..a7f4889232 100644
--- a/contrib/libs/hyperscan/src/nfa/gough.h
+++ b/contrib/libs/hyperscan/src/nfa/gough.h
@@ -1,82 +1,82 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef GOUGH_H
-#define GOUGH_H
-
-#include "callback.h"
-#include "ue2common.h"
-
-struct NFA;
-struct mq;
-
-// 8-bit Gough
-
-char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state,
- const char *streamState, u64a offset,
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef GOUGH_H
+#define GOUGH_H
+
+#include "callback.h"
+#include "ue2common.h"
+
+struct NFA;
+struct mq;
+
+// 8-bit Gough
+
+char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state,
+ const char *streamState, u64a offset,
NfaCallback callback, void *context);
-char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report);
-char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q);
-char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, struct mq *q);
+char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report);
+char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q);
+char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, struct mq *q);
char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q);
-char nfaExecGough8_queueInitState(const struct NFA *n, struct mq *q);
-char nfaExecGough8_initCompressedState(const struct NFA *n, u64a offset,
- void *state, u8 key);
-char nfaExecGough8_queueCompressState(const struct NFA *nfa, const struct mq *q,
- s64a loc);
-char nfaExecGough8_expandState(const struct NFA *nfa, void *dest,
- const void *src, u64a offset, u8 key);
-
-#define nfaExecGough8_B_Reverse NFA_API_NO_IMPL
-#define nfaExecGough8_zombie_status NFA_API_ZOMBIE_NO_IMPL
-
-// 16-bit Gough
-
-char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state,
- const char *streamState, u64a offset,
+char nfaExecGough8_queueInitState(const struct NFA *n, struct mq *q);
+char nfaExecGough8_initCompressedState(const struct NFA *n, u64a offset,
+ void *state, u8 key);
+char nfaExecGough8_queueCompressState(const struct NFA *nfa, const struct mq *q,
+ s64a loc);
+char nfaExecGough8_expandState(const struct NFA *nfa, void *dest,
+ const void *src, u64a offset, u8 key);
+
+#define nfaExecGough8_B_Reverse NFA_API_NO_IMPL
+#define nfaExecGough8_zombie_status NFA_API_ZOMBIE_NO_IMPL
+
+// 16-bit Gough
+
+char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state,
+ const char *streamState, u64a offset,
NfaCallback callback, void *context);
-char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report);
-char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q);
-char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, struct mq *q);
+char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report);
+char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q);
+char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, struct mq *q);
char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q);
-char nfaExecGough16_queueInitState(const struct NFA *n, struct mq *q);
-char nfaExecGough16_initCompressedState(const struct NFA *n, u64a offset,
- void *state, u8 key);
-char nfaExecGough16_queueCompressState(const struct NFA *nfa,
- const struct mq *q, s64a loc);
-char nfaExecGough16_expandState(const struct NFA *nfa, void *dest,
- const void *src, u64a offset, u8 key);
-
-#define nfaExecGough16_B_Reverse NFA_API_NO_IMPL
-#define nfaExecGough16_zombie_status NFA_API_ZOMBIE_NO_IMPL
-
-#endif
+char nfaExecGough16_queueInitState(const struct NFA *n, struct mq *q);
+char nfaExecGough16_initCompressedState(const struct NFA *n, u64a offset,
+ void *state, u8 key);
+char nfaExecGough16_queueCompressState(const struct NFA *nfa,
+ const struct mq *q, s64a loc);
+char nfaExecGough16_expandState(const struct NFA *nfa, void *dest,
+ const void *src, u64a offset, u8 key);
+
+#define nfaExecGough16_B_Reverse NFA_API_NO_IMPL
+#define nfaExecGough16_zombie_status NFA_API_ZOMBIE_NO_IMPL
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/gough_internal.h b/contrib/libs/hyperscan/src/nfa/gough_internal.h
index 42d73970cf..8bf06e0f7f 100644
--- a/contrib/libs/hyperscan/src/nfa/gough_internal.h
+++ b/contrib/libs/hyperscan/src/nfa/gough_internal.h
@@ -1,134 +1,134 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef GOUGH_INTERNAL_H
-#define GOUGH_INTERNAL_H
-
-#include "accel.h"
-#include "mcclellan_internal.h"
-#include "ue2common.h"
-
-#define INVALID_SLOT (~0U)
-
-#define GOUGH_INS_END 0
-#define GOUGH_INS_MOV 1
-#define GOUGH_INS_NEW 2
-#define GOUGH_INS_MIN 3
-/* todo: add instructions targeting acc reg? */
-
-struct gough_ins {
- u32 op; /* u32 to avoid padding */
- u32 dest;
- u32 src; /* for GOUGH_INS_NEW, this specifies the adjustment to apply to the
- * current offset */
-};
-
-/*
- * HAPPY FUN ASCII ART TIME
- *
- * ----
- * | | struct NFA
- * ----
- * ~~~~ normal(ish) mcclellan engine
- * ~~~~
- * ~~~~
- * ~~~~
- * ~~~~
- * ~~~~
- * ~~~~
- * ~~~~
- * ---- = m->haig_offset
- * | | } struct gough_info
- * ----
- * | | }
- * | | } edge prog table -> provides the offset of the start of the program
- * | | } to run when the edge is taken. 0 indicates no
- * | | } work to do
- * ---- = h->top_prog_offset
- * | | }
- * | | } top prog table -> provides the offset of the start of the program
- * | | } to run when a top is taken from this state. 0
- * | | } indicates nothing to do
- * ---- = h->prog_base_offset
- * | | }
- * | | } programs to run
- * | | }
- * | | }
- * ----
- */
-
-struct gough_info {
- u32 top_prog_offset; /**< offset to the base of the top prog table */
- u32 prog_base_offset; /**< not used at runtime */
- u32 stream_som_loc_count; /**< number of som locs in the stream state */
- u8 stream_som_loc_width; /**< number of bytes per som loc */
-};
-
-static really_inline
-const struct gough_info *get_gough(const struct mcclellan *m) {
- assert(m->haig_offset);
- const char *n = (const char *)m - sizeof(struct NFA);
- return (const struct gough_info *)(n + m->haig_offset);
-}
-
-static really_inline
-const u32 *get_gough_top_offsets(const struct mcclellan *m) {
- const struct gough_info *g = get_gough(m);
- if (!g->top_prog_offset) {
- return NULL;
- }
- const char *n = (const char *)m - sizeof(struct NFA);
- return (const u32 *)(n + g->top_prog_offset);
-}
-
-/* Gough state representation in scratch.
- *
- * During execution, gough tracks a number of variables containing potential
- * starts of match. These are all stored in a large array of u64a slots.
- */
-struct gough_som_info {
- u64a slots[1]; /* 'flexible' member array */
-};
-
-struct gough_report {
- ReportID r;
- u32 som; /* som slot to report */
-};
-
-struct gough_report_list {
- u32 count;
- struct gough_report report[];
-};
-
-struct gough_accel {
- union AccelAux accel;
- u8 margin_dist;
- u32 prog_offset;
-};
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef GOUGH_INTERNAL_H
+#define GOUGH_INTERNAL_H
+
+#include "accel.h"
+#include "mcclellan_internal.h"
+#include "ue2common.h"
+
+#define INVALID_SLOT (~0U)
+
+#define GOUGH_INS_END 0
+#define GOUGH_INS_MOV 1
+#define GOUGH_INS_NEW 2
+#define GOUGH_INS_MIN 3
+/* todo: add instructions targeting acc reg? */
+
+struct gough_ins {
+ u32 op; /* u32 to avoid padding */
+ u32 dest;
+ u32 src; /* for GOUGH_INS_NEW, this specifies the adjustment to apply to the
+ * current offset */
+};
+
+/*
+ * HAPPY FUN ASCII ART TIME
+ *
+ * ----
+ * | | struct NFA
+ * ----
+ * ~~~~ normal(ish) mcclellan engine
+ * ~~~~
+ * ~~~~
+ * ~~~~
+ * ~~~~
+ * ~~~~
+ * ~~~~
+ * ~~~~
+ * ---- = m->haig_offset
+ * | | } struct gough_info
+ * ----
+ * | | }
+ * | | } edge prog table -> provides the offset of the start of the program
+ * | | } to run when the edge is taken. 0 indicates no
+ * | | } work to do
+ * ---- = h->top_prog_offset
+ * | | }
+ * | | } top prog table -> provides the offset of the start of the program
+ * | | } to run when a top is taken from this state. 0
+ * | | } indicates nothing to do
+ * ---- = h->prog_base_offset
+ * | | }
+ * | | } programs to run
+ * | | }
+ * | | }
+ * ----
+ */
+
+struct gough_info {
+ u32 top_prog_offset; /**< offset to the base of the top prog table */
+ u32 prog_base_offset; /**< not used at runtime */
+ u32 stream_som_loc_count; /**< number of som locs in the stream state */
+ u8 stream_som_loc_width; /**< number of bytes per som loc */
+};
+
+static really_inline
+const struct gough_info *get_gough(const struct mcclellan *m) {
+ assert(m->haig_offset);
+ const char *n = (const char *)m - sizeof(struct NFA);
+ return (const struct gough_info *)(n + m->haig_offset);
+}
+
+static really_inline
+const u32 *get_gough_top_offsets(const struct mcclellan *m) {
+ const struct gough_info *g = get_gough(m);
+ if (!g->top_prog_offset) {
+ return NULL;
+ }
+ const char *n = (const char *)m - sizeof(struct NFA);
+ return (const u32 *)(n + g->top_prog_offset);
+}
+
+/* Gough state representation in scratch.
+ *
+ * During execution, gough tracks a number of variables containing potential
+ * starts of match. These are all stored in a large array of u64a slots.
+ */
+struct gough_som_info {
+ u64a slots[1]; /* 'flexible' member array */
+};
+
+struct gough_report {
+ ReportID r;
+ u32 som; /* som slot to report */
+};
+
+struct gough_report_list {
+ u32 count;
+ struct gough_report report[];
+};
+
+struct gough_accel {
+ union AccelAux accel;
+ u8 margin_dist;
+ u32 prog_offset;
+};
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/goughcompile.cpp b/contrib/libs/hyperscan/src/nfa/goughcompile.cpp
index 0fd64bf126..d41c6f4235 100644
--- a/contrib/libs/hyperscan/src/nfa/goughcompile.cpp
+++ b/contrib/libs/hyperscan/src/nfa/goughcompile.cpp
@@ -1,1170 +1,1170 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "goughcompile.h"
-
-#include "accel.h"
-#include "goughcompile_dump.h"
-#include "goughcompile_internal.h"
-#include "gough_internal.h"
-#include "grey.h"
-#include "mcclellancompile.h"
-#include "nfa_internal.h"
-#include "util/compile_context.h"
-#include "util/container.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "goughcompile.h"
+
+#include "accel.h"
+#include "goughcompile_dump.h"
+#include "goughcompile_internal.h"
+#include "gough_internal.h"
+#include "grey.h"
+#include "mcclellancompile.h"
+#include "nfa_internal.h"
+#include "util/compile_context.h"
+#include "util/container.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
-#include "util/make_unique.h"
-#include "util/order_check.h"
+#include "util/graph_range.h"
+#include "util/make_unique.h"
+#include "util/order_check.h"
#include "util/report_manager.h"
-#include "util/verify_types.h"
-
-#include "ue2common.h"
-
-#include <algorithm>
-#include <boost/dynamic_bitset.hpp>
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_keys;
-using boost::adaptors::map_values;
-using boost::vertex_index;
-
-namespace ue2 {
-
-void raw_som_dfa::stripExtraEodReports(void) {
- /* if a state generates a given report as a normal accept - then it does
- * not also need to generate an eod report for it */
- for (vector<dstate_som>::iterator it = state_som.begin();
- it != state_som.end(); ++it) {
- for (const som_report &sr : it->reports) {
- it->reports_eod.erase(sr);
- }
- dstate &norm = states[it - state_som.begin()];
- norm.reports_eod.clear();
- for (const som_report &sr : it->reports_eod) {
- norm.reports_eod.insert(sr.report);
- }
- }
-}
-
-namespace {
-
-class gough_build_strat : public mcclellan_build_strat {
-public:
+#include "util/verify_types.h"
+
+#include "ue2common.h"
+
+#include <algorithm>
+#include <boost/dynamic_bitset.hpp>
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_keys;
+using boost::adaptors::map_values;
+using boost::vertex_index;
+
+namespace ue2 {
+
+void raw_som_dfa::stripExtraEodReports(void) {
+ /* if a state generates a given report as a normal accept - then it does
+ * not also need to generate an eod report for it */
+ for (vector<dstate_som>::iterator it = state_som.begin();
+ it != state_som.end(); ++it) {
+ for (const som_report &sr : it->reports) {
+ it->reports_eod.erase(sr);
+ }
+ dstate &norm = states[it - state_som.begin()];
+ norm.reports_eod.clear();
+ for (const som_report &sr : it->reports_eod) {
+ norm.reports_eod.insert(sr.report);
+ }
+ }
+}
+
+namespace {
+
+class gough_build_strat : public mcclellan_build_strat {
+public:
gough_build_strat(
raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm_in,
const map<dstate_id_t, gough_accel_state_info> &accel_info)
: mcclellan_build_strat(r, rm_in, false), rdfa(r), gg(g),
- accel_gough_info(accel_info) {}
- unique_ptr<raw_report_info> gatherReports(vector<u32> &reports /* out */,
- vector<u32> &reports_eod /* out */,
- u8 *isSingleReport /* out */,
- ReportID *arbReport /* out */) const override;
+ accel_gough_info(accel_info) {}
+ unique_ptr<raw_report_info> gatherReports(vector<u32> &reports /* out */,
+ vector<u32> &reports_eod /* out */,
+ u8 *isSingleReport /* out */,
+ ReportID *arbReport /* out */) const override;
AccelScheme find_escape_strings(dstate_id_t this_idx) const override;
- size_t accelSize(void) const override { return sizeof(gough_accel); }
+ size_t accelSize(void) const override { return sizeof(gough_accel); }
void buildAccel(dstate_id_t this_idx, const AccelScheme &info,
void *accel_out) override;
u32 max_allowed_offset_accel() const override { return 0; }
DfaType getType() const override { return Gough; }
-
- raw_som_dfa &rdfa;
- const GoughGraph &gg;
- map<dstate_id_t, gough_accel_state_info> accel_gough_info;
- map<gough_accel *, dstate_id_t> built_accel;
-};
-
-}
-
-GoughSSAVar::~GoughSSAVar() {
-}
-
-void GoughSSAVar::clear_outputs() {
- for (GoughSSAVarWithInputs *var : outputs) {
- var->remove_input_raw(this);
- }
- outputs.clear();
-}
-
-void GoughSSAVarWithInputs::clear_all() {
- clear_inputs();
- clear_outputs();
-}
-
-void GoughSSAVarMin::clear_inputs() {
- for (GoughSSAVar *var : inputs) {
- assert(contains(var->outputs, this));
- var->outputs.erase(this);
- }
- inputs.clear();
-}
-
-void GoughSSAVarMin::replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) {
- assert(contains(inputs, old_v));
- inputs.erase(old_v);
- old_v->outputs.erase(this);
- inputs.insert(new_v);
- new_v->outputs.insert(this);
-}
-
-static
-void translateRawReports(UNUSED GoughGraph &cfg, UNUSED const raw_som_dfa &raw,
- const flat_map<u32, GoughSSAVarJoin *> &joins_at_s,
- UNUSED GoughVertex s,
- const set<som_report> &reports_in,
- vector<pair<ReportID, GoughSSAVar *> > *reports_out) {
- for (const som_report &sr : reports_in) {
- DEBUG_PRINTF("state %u: report %u slot %d\n", cfg[s].state_id,
- sr.report, sr.slot);
- GoughSSAVar *var = nullptr;
- if (sr.slot == CREATE_NEW_SOM) {
- assert(!generates_callbacks(raw.kind));
- } else {
- var = joins_at_s.at(sr.slot);
- }
- reports_out->push_back(make_pair(sr.report, var));
- }
-}
-
-static
-void makeCFG_reports(GoughGraph &cfg, const raw_som_dfa &raw,
- const vector<flat_map<u32, GoughSSAVarJoin *> > &joins,
- const vector<GoughVertex> &vertices) {
- for (u32 i = 1; i < raw.states.size(); ++i) {
- GoughVertex s = vertices[i];
- const flat_map<u32, GoughSSAVarJoin *> &joins_at_s
- = joins[get(vertex_index, cfg, s)];
- translateRawReports(cfg, raw, joins_at_s, s,
- raw.state_som[i].reports, &cfg[s].reports);
- translateRawReports(cfg, raw, joins_at_s, s,
- raw.state_som[i].reports_eod, &cfg[s].reports_eod);
- }
-}
-
-static never_inline
-void makeCFG_top_edge(GoughGraph &cfg, const vector<GoughVertex> &vertices,
- const vector<flat_map<u32, GoughSSAVarJoin *> > &joins,
- u32 trigger_slot, const som_tran_info &src_slots,
- const som_tran_info &dest_slot_pred,
- dstate_id_t i, dstate_id_t n, const GoughEdge &e) {
- GoughVertex s = vertices[i];
- GoughVertex t = vertices[n];
- const flat_map<u32, GoughSSAVarJoin *> &joins_at_s
- = joins[get(vertex_index, cfg, s)];
- const flat_map<u32, GoughSSAVarJoin *> &joins_at_t
- = joins[get(vertex_index, cfg, t)];
-
- DEBUG_PRINTF("top for %u -> %u\n", i, n);
-
- for (som_tran_info::const_iterator it = dest_slot_pred.begin();
- it != dest_slot_pred.end(); ++it) {
- /* for ordering, need to ensure that new values feeding directly
- * into mins come first */
- u32 slot_id = it->first;
-
- shared_ptr<GoughSSAVarNew> vnew;
- if (slot_id == trigger_slot) {
- vnew = make_shared<GoughSSAVarNew>(0U);
- cfg[e].vars.push_back(vnew);
- } else {
- assert(contains(src_slots, slot_id));
- }
-
- GoughSSAVar *final_var;
- if (vnew && !contains(src_slots, slot_id)) {
- final_var = vnew.get();
- DEBUG_PRINTF("bypassing min on join %u\n", slot_id);
- } else if (!vnew) {
- final_var = joins_at_s.at(slot_id);
- DEBUG_PRINTF("bypassing min on join %u\n", slot_id);
- } else {
- assert(vnew);
- assert(contains(src_slots, slot_id));
-
- shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>();
- cfg[e].vars.push_back(vmin);
- final_var = vmin.get();
-
- DEBUG_PRINTF("slot %u gets a new value\n", slot_id);
- vmin->add_input(vnew.get());
-
- DEBUG_PRINTF("slot %u is constant\n", slot_id);
- vmin->add_input(joins_at_s.at(slot_id));
- }
-
- /* wire to destination target */
- GoughSSAVarJoin *vk = joins_at_t.at(slot_id);
- vk->add_input(final_var, e);
- }
-}
-
-static never_inline
-void makeCFG_edge(GoughGraph &cfg, const map<u32, u32> &som_creators,
- const vector<GoughVertex> &vertices,
- const vector<flat_map<u32, GoughSSAVarJoin *> > &joins,
- const som_tran_info &src_slots,
- const som_tran_info &dest_slot_pred, dstate_id_t i,
- dstate_id_t n, const GoughEdge &e) {
- GoughVertex s = vertices[i];
- GoughVertex t = vertices[n];
- const flat_map<u32, GoughSSAVarJoin *> &joins_at_s
- = joins[get(vertex_index, cfg, s)];
- const flat_map<u32, GoughSSAVarJoin *> &joins_at_t
- = joins[get(vertex_index, cfg, t)];
-
- map<u32, shared_ptr<GoughSSAVarNew> > vnew_by_adj;
- for (som_tran_info::const_iterator it = dest_slot_pred.begin();
- it != dest_slot_pred.end(); ++it) {
- /* for ordering, need to ensure that new values feeding directly
- * into mins come first */
- u32 slot_id = it->first;
-
- if (contains(som_creators, slot_id) && !som_creators.at(slot_id)) {
- continue;
- }
-
- shared_ptr<GoughSSAVarNew> vnew;
- const vector<u32> &inputs = it->second;
- u32 useful_input_count = 0;
- u32 first_useful_input = ~0U;
-
- for (const u32 &input_slot : inputs) {
- if (!contains(src_slots, input_slot)) {
- continue;
- }
- DEBUG_PRINTF("%u is useful\n", input_slot);
-
- if (!vnew || !contains(som_creators, input_slot)) {
- useful_input_count++;
- if (useful_input_count == 1) {
- first_useful_input = input_slot;
- }
- }
-
- if (contains(som_creators, input_slot)) {
- u32 adjust = som_creators.at(input_slot);
-
- if (vnew && vnew->adjust >= adjust) {
- DEBUG_PRINTF("skipping %u as domininated by adj%u\n",
- adjust, vnew->adjust);
- continue; /* deeper starts can be seen to statically
- dominate */
- }
-
- if (contains(vnew_by_adj, adjust)) {
- vnew = vnew_by_adj[adjust];
- } else {
- vnew = make_shared<GoughSSAVarNew>(adjust);
- cfg[e].vars.push_back(vnew);
- vnew_by_adj[adjust] = vnew;
- }
- assert(vnew);
- }
- }
-
- /* If we have a new start of match (with no offset or 1 byte offset) and
- * other variables coming in, the new will always be dominated by the
- * existing variables (as they must be at least one byte into the match)
- * -- and so can be dropped. */
- if (vnew && vnew->adjust < 2 && useful_input_count > 1) {
- useful_input_count--;
- vnew.reset();
-
- /* need to reestablish the first useful input */
- for (const u32 &input_slot : inputs) {
- if (!contains(src_slots, input_slot)) {
- continue;
- }
- if (!contains(som_creators, input_slot)) {
- first_useful_input = input_slot;
- }
- }
-
- }
-
- GoughSSAVar *final_var;
- if (useful_input_count == 1) {
- if (vnew) {
- final_var = vnew.get();
- } else {
- assert(first_useful_input != ~0U);
- final_var = joins_at_s.at(first_useful_input);
- }
- DEBUG_PRINTF("bypassing min on join %u\n", slot_id);
- } else {
- shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>();
- cfg[e].vars.push_back(vmin);
- final_var = vmin.get();
-
- if (vnew) {
- vmin->add_input(vnew.get());
- }
-
- /* wire the normal inputs to the min */
- for (const u32 &input_slot : inputs) {
- if (!contains(src_slots, input_slot)) {
- continue;
- }
- if (!contains(som_creators, input_slot)) {
- vmin->add_input(joins_at_s.at(input_slot));
- }
- }
- assert(vmin->get_inputs().size() > 1);
- DEBUG_PRINTF("wire min to join %u\n", slot_id);
- }
-
- GoughSSAVarJoin *vk = joins_at_t.at(slot_id);
- assert(final_var);
- vk->add_input(final_var, e);
- }
-}
-
-static never_inline
-unique_ptr<GoughGraph> makeCFG(const raw_som_dfa &raw) {
- vector<GoughVertex> vertices;
- vertices.reserve(raw.states.size());
- unique_ptr<GoughGraph> cfg = ue2::make_unique<GoughGraph>();
- u32 min_state = !is_triggered(raw.kind);
-
- if (min_state) {
- vertices.push_back(GoughGraph::null_vertex()); /* skip dead state */
- }
-
- vector<flat_map<u32, GoughSSAVarJoin *> > joins(raw.states.size());
- for (u32 i = min_state; i < raw.states.size(); ++i) {
- GoughVertex v = add_vertex(GoughVertexProps(i), *cfg);
- vertices.push_back(v);
-
- /* create JOIN variables */
- for (som_tran_info::const_iterator it = raw.state_som[i].preds.begin();
- it != raw.state_som[i].preds.end(); ++it) {
- u32 slot_id = it->first;
- if (!contains(raw.new_som_nfa_states, slot_id)
- || raw.new_som_nfa_states.at(slot_id)) {
- (*cfg)[v].vars.push_back(make_shared<GoughSSAVarJoin>());
- joins[get(vertex_index, *cfg, v)][slot_id]
- = (*cfg)[v].vars.back().get();
- DEBUG_PRINTF("dfa %u:: slot %u\n", i, slot_id);
- }
- }
- }
-
- u16 top_sym = raw.alpha_remap[TOP];
+
+ raw_som_dfa &rdfa;
+ const GoughGraph &gg;
+ map<dstate_id_t, gough_accel_state_info> accel_gough_info;
+ map<gough_accel *, dstate_id_t> built_accel;
+};
+
+}
+
+GoughSSAVar::~GoughSSAVar() {
+}
+
+void GoughSSAVar::clear_outputs() {
+ for (GoughSSAVarWithInputs *var : outputs) {
+ var->remove_input_raw(this);
+ }
+ outputs.clear();
+}
+
+void GoughSSAVarWithInputs::clear_all() {
+ clear_inputs();
+ clear_outputs();
+}
+
+void GoughSSAVarMin::clear_inputs() {
+ for (GoughSSAVar *var : inputs) {
+ assert(contains(var->outputs, this));
+ var->outputs.erase(this);
+ }
+ inputs.clear();
+}
+
+void GoughSSAVarMin::replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) {
+ assert(contains(inputs, old_v));
+ inputs.erase(old_v);
+ old_v->outputs.erase(this);
+ inputs.insert(new_v);
+ new_v->outputs.insert(this);
+}
+
+static
+void translateRawReports(UNUSED GoughGraph &cfg, UNUSED const raw_som_dfa &raw,
+ const flat_map<u32, GoughSSAVarJoin *> &joins_at_s,
+ UNUSED GoughVertex s,
+ const set<som_report> &reports_in,
+ vector<pair<ReportID, GoughSSAVar *> > *reports_out) {
+ for (const som_report &sr : reports_in) {
+ DEBUG_PRINTF("state %u: report %u slot %d\n", cfg[s].state_id,
+ sr.report, sr.slot);
+ GoughSSAVar *var = nullptr;
+ if (sr.slot == CREATE_NEW_SOM) {
+ assert(!generates_callbacks(raw.kind));
+ } else {
+ var = joins_at_s.at(sr.slot);
+ }
+ reports_out->push_back(make_pair(sr.report, var));
+ }
+}
+
+static
+void makeCFG_reports(GoughGraph &cfg, const raw_som_dfa &raw,
+ const vector<flat_map<u32, GoughSSAVarJoin *> > &joins,
+ const vector<GoughVertex> &vertices) {
+ for (u32 i = 1; i < raw.states.size(); ++i) {
+ GoughVertex s = vertices[i];
+ const flat_map<u32, GoughSSAVarJoin *> &joins_at_s
+ = joins[get(vertex_index, cfg, s)];
+ translateRawReports(cfg, raw, joins_at_s, s,
+ raw.state_som[i].reports, &cfg[s].reports);
+ translateRawReports(cfg, raw, joins_at_s, s,
+ raw.state_som[i].reports_eod, &cfg[s].reports_eod);
+ }
+}
+
+static never_inline
+void makeCFG_top_edge(GoughGraph &cfg, const vector<GoughVertex> &vertices,
+ const vector<flat_map<u32, GoughSSAVarJoin *> > &joins,
+ u32 trigger_slot, const som_tran_info &src_slots,
+ const som_tran_info &dest_slot_pred,
+ dstate_id_t i, dstate_id_t n, const GoughEdge &e) {
+ GoughVertex s = vertices[i];
+ GoughVertex t = vertices[n];
+ const flat_map<u32, GoughSSAVarJoin *> &joins_at_s
+ = joins[get(vertex_index, cfg, s)];
+ const flat_map<u32, GoughSSAVarJoin *> &joins_at_t
+ = joins[get(vertex_index, cfg, t)];
+
+ DEBUG_PRINTF("top for %u -> %u\n", i, n);
+
+ for (som_tran_info::const_iterator it = dest_slot_pred.begin();
+ it != dest_slot_pred.end(); ++it) {
+ /* for ordering, need to ensure that new values feeding directly
+ * into mins come first */
+ u32 slot_id = it->first;
+
+ shared_ptr<GoughSSAVarNew> vnew;
+ if (slot_id == trigger_slot) {
+ vnew = make_shared<GoughSSAVarNew>(0U);
+ cfg[e].vars.push_back(vnew);
+ } else {
+ assert(contains(src_slots, slot_id));
+ }
+
+ GoughSSAVar *final_var;
+ if (vnew && !contains(src_slots, slot_id)) {
+ final_var = vnew.get();
+ DEBUG_PRINTF("bypassing min on join %u\n", slot_id);
+ } else if (!vnew) {
+ final_var = joins_at_s.at(slot_id);
+ DEBUG_PRINTF("bypassing min on join %u\n", slot_id);
+ } else {
+ assert(vnew);
+ assert(contains(src_slots, slot_id));
+
+ shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>();
+ cfg[e].vars.push_back(vmin);
+ final_var = vmin.get();
+
+ DEBUG_PRINTF("slot %u gets a new value\n", slot_id);
+ vmin->add_input(vnew.get());
+
+ DEBUG_PRINTF("slot %u is constant\n", slot_id);
+ vmin->add_input(joins_at_s.at(slot_id));
+ }
+
+ /* wire to destination target */
+ GoughSSAVarJoin *vk = joins_at_t.at(slot_id);
+ vk->add_input(final_var, e);
+ }
+}
+
+static never_inline
+void makeCFG_edge(GoughGraph &cfg, const map<u32, u32> &som_creators,
+ const vector<GoughVertex> &vertices,
+ const vector<flat_map<u32, GoughSSAVarJoin *> > &joins,
+ const som_tran_info &src_slots,
+ const som_tran_info &dest_slot_pred, dstate_id_t i,
+ dstate_id_t n, const GoughEdge &e) {
+ GoughVertex s = vertices[i];
+ GoughVertex t = vertices[n];
+ const flat_map<u32, GoughSSAVarJoin *> &joins_at_s
+ = joins[get(vertex_index, cfg, s)];
+ const flat_map<u32, GoughSSAVarJoin *> &joins_at_t
+ = joins[get(vertex_index, cfg, t)];
+
+ map<u32, shared_ptr<GoughSSAVarNew> > vnew_by_adj;
+ for (som_tran_info::const_iterator it = dest_slot_pred.begin();
+ it != dest_slot_pred.end(); ++it) {
+ /* for ordering, need to ensure that new values feeding directly
+ * into mins come first */
+ u32 slot_id = it->first;
+
+ if (contains(som_creators, slot_id) && !som_creators.at(slot_id)) {
+ continue;
+ }
+
+ shared_ptr<GoughSSAVarNew> vnew;
+ const vector<u32> &inputs = it->second;
+ u32 useful_input_count = 0;
+ u32 first_useful_input = ~0U;
+
+ for (const u32 &input_slot : inputs) {
+ if (!contains(src_slots, input_slot)) {
+ continue;
+ }
+ DEBUG_PRINTF("%u is useful\n", input_slot);
+
+ if (!vnew || !contains(som_creators, input_slot)) {
+ useful_input_count++;
+ if (useful_input_count == 1) {
+ first_useful_input = input_slot;
+ }
+ }
+
+ if (contains(som_creators, input_slot)) {
+ u32 adjust = som_creators.at(input_slot);
+
+ if (vnew && vnew->adjust >= adjust) {
+ DEBUG_PRINTF("skipping %u as domininated by adj%u\n",
+ adjust, vnew->adjust);
+ continue; /* deeper starts can be seen to statically
+ dominate */
+ }
+
+ if (contains(vnew_by_adj, adjust)) {
+ vnew = vnew_by_adj[adjust];
+ } else {
+ vnew = make_shared<GoughSSAVarNew>(adjust);
+ cfg[e].vars.push_back(vnew);
+ vnew_by_adj[adjust] = vnew;
+ }
+ assert(vnew);
+ }
+ }
+
+ /* If we have a new start of match (with no offset or 1 byte offset) and
+ * other variables coming in, the new will always be dominated by the
+ * existing variables (as they must be at least one byte into the match)
+ * -- and so can be dropped. */
+ if (vnew && vnew->adjust < 2 && useful_input_count > 1) {
+ useful_input_count--;
+ vnew.reset();
+
+ /* need to reestablish the first useful input */
+ for (const u32 &input_slot : inputs) {
+ if (!contains(src_slots, input_slot)) {
+ continue;
+ }
+ if (!contains(som_creators, input_slot)) {
+ first_useful_input = input_slot;
+ }
+ }
+
+ }
+
+ GoughSSAVar *final_var;
+ if (useful_input_count == 1) {
+ if (vnew) {
+ final_var = vnew.get();
+ } else {
+ assert(first_useful_input != ~0U);
+ final_var = joins_at_s.at(first_useful_input);
+ }
+ DEBUG_PRINTF("bypassing min on join %u\n", slot_id);
+ } else {
+ shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>();
+ cfg[e].vars.push_back(vmin);
+ final_var = vmin.get();
+
+ if (vnew) {
+ vmin->add_input(vnew.get());
+ }
+
+ /* wire the normal inputs to the min */
+ for (const u32 &input_slot : inputs) {
+ if (!contains(src_slots, input_slot)) {
+ continue;
+ }
+ if (!contains(som_creators, input_slot)) {
+ vmin->add_input(joins_at_s.at(input_slot));
+ }
+ }
+ assert(vmin->get_inputs().size() > 1);
+ DEBUG_PRINTF("wire min to join %u\n", slot_id);
+ }
+
+ GoughSSAVarJoin *vk = joins_at_t.at(slot_id);
+ assert(final_var);
+ vk->add_input(final_var, e);
+ }
+}
+
+static never_inline
+unique_ptr<GoughGraph> makeCFG(const raw_som_dfa &raw) {
+ vector<GoughVertex> vertices;
+ vertices.reserve(raw.states.size());
+ unique_ptr<GoughGraph> cfg = ue2::make_unique<GoughGraph>();
+ u32 min_state = !is_triggered(raw.kind);
+
+ if (min_state) {
+ vertices.push_back(GoughGraph::null_vertex()); /* skip dead state */
+ }
+
+ vector<flat_map<u32, GoughSSAVarJoin *> > joins(raw.states.size());
+ for (u32 i = min_state; i < raw.states.size(); ++i) {
+ GoughVertex v = add_vertex(GoughVertexProps(i), *cfg);
+ vertices.push_back(v);
+
+ /* create JOIN variables */
+ for (som_tran_info::const_iterator it = raw.state_som[i].preds.begin();
+ it != raw.state_som[i].preds.end(); ++it) {
+ u32 slot_id = it->first;
+ if (!contains(raw.new_som_nfa_states, slot_id)
+ || raw.new_som_nfa_states.at(slot_id)) {
+ (*cfg)[v].vars.push_back(make_shared<GoughSSAVarJoin>());
+ joins[get(vertex_index, *cfg, v)][slot_id]
+ = (*cfg)[v].vars.back().get();
+ DEBUG_PRINTF("dfa %u:: slot %u\n", i, slot_id);
+ }
+ }
+ }
+
+ u16 top_sym = raw.alpha_remap[TOP];
DEBUG_PRINTF("top: %hu, kind %s\n", top_sym, to_string(raw.kind).c_str());
-
- /* create edges, JOIN variables (on edge targets) */
- map<dstate_id_t, GoughEdge> seen;
- for (u32 i = min_state; i < raw.states.size(); ++i) {
- seen.clear(); /* seen is really local to each state */
-
- DEBUG_PRINTF("creating edges out of %u/%zu\n", i, raw.states.size());
- GoughVertex s = vertices[i];
- const vector<dstate_id_t> &next = raw.states[i].next;
- for (u32 j = 0; j < next.size(); ++j) {
- if (!is_triggered(raw.kind) && j == top_sym) {
- continue;
- }
-
- dstate_id_t n = next[j];
- DEBUG_PRINTF(" edge to %hu out on %u\n", n, j);
- assert(n < raw.states.size());
- GoughVertex t = vertices[n];
-
- if (j == top_sym) {
- GoughEdge e = add_edge(s, t, *cfg).first;
- (*cfg)[e].top = true;
- makeCFG_top_edge(*cfg, vertices, joins, raw.trigger_nfa_state,
- raw.state_som[i].preds, raw.state_som[n].preds,
- i, n, e);
- } else {
- if (contains(seen, n)) {
- const GoughEdge &e = seen[n];
- (*cfg)[e].reach.set(j);
- continue;
- }
-
- GoughEdge e = add_edge(s, t, *cfg).first;
- (*cfg)[e].reach.set(j);
-
- seen[n] = e;
-
- makeCFG_edge(*cfg, raw.new_som_nfa_states, vertices, joins,
- raw.state_som[i].preds, raw.state_som[n].preds,
- i, n, e);
- }
- }
- }
-
- /* populate reports */
- makeCFG_reports(*cfg, raw, joins, vertices);
-
- using boost::graph_bundle;
- if (is_triggered(raw.kind)) {
- (*cfg)[graph_bundle].initial_vertex = vertices[DEAD_STATE];
- } else {
- (*cfg)[graph_bundle].initial_vertex = vertices[raw.start_anchored];
- }
-
- return cfg;
-}
-
-static
-void copy_propagate_report_set(vector<pair<ReportID, GoughSSAVar *> > &rep) {
- vector<pair<ReportID, GoughSSAVar *> >::iterator it = rep.begin();
- while (it != rep.end()) {
- GoughSSAVar *var = it->second;
- if (!var) {
- ++it;
- continue;
- }
- const flat_set<GoughSSAVar *> &inputs = var->get_inputs();
- if (inputs.size() != 1) {
- ++it;
- continue;
- }
- it->second = *inputs.begin(); /* note may result in dupes,
- filter later */
- }
-}
-
-template<typename VarP>
-void copy_propagate_update_vars(vector<VarP> &vars, bool *changes) {
- for (u32 i = 0; i < vars.size(); i++) {
- GoughSSAVar *vp = vars[i].get();
- const flat_set<GoughSSAVar *> &inputs = vp->get_inputs();
-
- /* no need to worry about data coming from self; ignore self loops */
- GoughSSAVar *new_input = nullptr;
-
- if (inputs.size() == 1) {
- new_input = *inputs.begin();
- } else if (inputs.size() == 2) {
- flat_set<GoughSSAVar *>::const_iterator jt = inputs.begin();
- GoughSSAVar *i_0 = *jt;
- GoughSSAVar *i_1 = *++jt;
-
- if (i_0 == vp) {
- new_input = i_1;
- } else if (i_1 == vp) {
- new_input = i_0;
- }
- }
-
- if (!new_input) {
- continue;
- }
-
- assert(new_input != vp);
-
- /* copy set as it will be modified by iteration */
- const flat_set<GoughSSAVarWithInputs *> outputs = vp->get_outputs();
-
- for (GoughSSAVar *curr : outputs) {
- curr->replace_input(vp, new_input);
- *changes = true;
- }
- }
-}
-
-static
-void copy_propagation(GoughGraph &g, const Grey &grey) {
- if (!grey.goughCopyPropagate) {
- return;
- }
- /* TODO order visit of variables sensibly */
- bool changes = false;
- do {
- DEBUG_PRINTF("new iteration\n");
- changes = false;
- for (auto v : vertices_range(g)) {
- copy_propagate_update_vars(g[v].vars, &changes);
- }
- for (const auto &e : edges_range(g)) {
- copy_propagate_update_vars(g[e].vars, &changes);
- }
- } while(changes);
-
- /* see if any reports can also be moved along */
- for (auto v : vertices_range(g)) {
- copy_propagate_report_set(g[v].reports);
- copy_propagate_report_set(g[v].reports_eod);
- }
-}
-
-static
-void mark_live_reports(const vector<pair<ReportID, GoughSSAVar *> > &reps,
- vector<GoughSSAVar *> *queue) {
- for (const auto &r : reps) {
- GoughSSAVar *var = r.second;
- if (!var || var->seen) {
- continue;
- }
- var->seen = true;
- queue->push_back(var);
- }
-}
-
-static
-void remove_dead(GoughGraph &g) {
- vector<GoughSSAVar *> queue;
-
- for (auto v : vertices_range(g)) {
- mark_live_reports(g[v].reports, &queue);
- mark_live_reports(g[v].reports_eod, &queue);
- }
-
- while (!queue.empty()) {
- GoughSSAVar *v = queue.back();
- queue.pop_back();
- for (GoughSSAVar *var : v->get_inputs()) {
- if (var->seen) {
- continue;
- }
- var->seen = true;
- queue.push_back(var);
- }
- }
-
- /* remove unused variables */
- for (auto v : vertices_range(g)) {
- for (u32 i = 0; i < g[v].vars.size(); i++) {
- GoughSSAVar *var = g[v].vars[i].get();
- if (var->seen) {
- continue;
- }
- var->clear_all();
- g[v].vars.erase(g[v].vars.begin() + i);
- i--;
- }
- }
- for (const auto &e : edges_range(g)) {
- for (u32 i = 0; i < g[e].vars.size(); i++) {
- GoughSSAVar *var = g[e].vars[i].get();
- if (var->seen) {
- continue;
- }
- var->clear_all();
- g[e].vars.erase(g[e].vars.begin() + i);
- i--;
- }
- }
-}
-
-static
-gough_ins make_gough_ins(u8 op, u32 dest = INVALID_SLOT,
- u32 src = INVALID_SLOT) {
- assert(dest != INVALID_SLOT || op == GOUGH_INS_END);
- assert(src != INVALID_SLOT || op == GOUGH_INS_END || op == GOUGH_INS_NEW);
- gough_ins rv;
- rv.op = op;
- rv.dest = dest;
- rv.src = src;
- return rv;
-}
-
-void GoughSSAVarNew::generate(vector<gough_ins> *out) const {
- assert(slot != INVALID_SLOT);
- out->push_back(make_gough_ins(GOUGH_INS_NEW, slot, adjust));
-}
-
-#ifndef NDEBUG
-template<typename C, typename K>
-bool contains_loose(const C &container, const K &key) {
- for (const auto &elem : container) {
- if (elem == key) {
- return true;
- }
- }
- return false;
-}
-#endif
-
-void GoughSSAVarMin::generate(vector<gough_ins> *out) const {
- assert(slot != INVALID_SLOT);
- assert(!inputs.empty());
- // assert(inputs.size() > 1);
- vector<u32> input_slots; /* for determinism */
- bool first = true;
- for (const GoughSSAVar *var : inputs) {
- assert(contains_loose(var->outputs, this));
- if (var->slot == slot) {
- /* if the destination is one of the sources, no need to move it */
- first = false;
- } else {
- input_slots.push_back(var->slot);
- }
- }
-
- sort(input_slots.begin(), input_slots.end());
-
- for (const u32 &input_slot : input_slots) {
- if (first) {
- out->push_back(make_gough_ins(GOUGH_INS_MOV, slot, input_slot));
- first = false;
- } else {
- out->push_back(make_gough_ins(GOUGH_INS_MIN, slot, input_slot));
- }
- }
-}
-
-void GoughSSAVarMin::remove_input_raw(GoughSSAVar *v) {
- assert(contains(inputs, v));
- inputs.erase(v);
-}
-
-void GoughSSAVarJoin::generate(UNUSED vector<gough_ins> *out) const {
- assert(0);
-}
-
-GoughSSAVar *GoughSSAVarJoin::get_input(const GoughEdge &prev) const {
- for (const auto &var_edge : input_map) {
- if (contains(var_edge.second, prev)) {
- return var_edge.first;
- }
- }
- assert(0);
- return nullptr;
-}
-
-const flat_set<GoughEdge> &GoughSSAVarJoin::get_edges_for_input(
- GoughSSAVar *input) const {
- return input_map.at(input);
-}
-
-const map<GoughSSAVar *, flat_set<GoughEdge> > &GoughSSAVarJoin::get_input_map()
- const {
- return input_map;
-}
-
-void GoughSSAVarJoin::clear_inputs() {
- for (GoughSSAVar *var : input_map | map_keys) {
- assert(contains(var->outputs, this));
- var->outputs.erase(this);
- }
- input_map.clear();
- inputs.clear();
-}
-
-void GoughSSAVarJoin::replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) {
- assert(contains(input_map, old_v));
- assert(contains(inputs, old_v));
- if (old_v == new_v) {
- assert(0);
- return;
- }
- insert(&input_map[new_v], input_map[old_v]);
- input_map.erase(old_v);
- inputs.erase(old_v);
- inputs.insert(new_v);
- old_v->outputs.erase(this);
- new_v->outputs.insert(this);
-}
-
-void GoughSSAVarJoin::add_input(GoughSSAVar *v, GoughEdge prev) {
- input_map[v].insert(prev);
- inputs.insert(v);
- v->outputs.insert(this);
-}
-
-void GoughSSAVarJoin::remove_input_raw(GoughSSAVar *v) {
- assert(contains(inputs, v));
- assert(contains(input_map, v));
- input_map.erase(v);
- inputs.erase(v);
-}
-
-static
-u32 highest_slot_used(const vector<gough_ins> &program) {
- u32 rv = INVALID_SLOT;
- for (const gough_ins &ins : program) {
- if (rv == INVALID_SLOT) {
- rv = ins.dest;
- } else if (ins.dest != INVALID_SLOT) {
- ENSURE_AT_LEAST(&rv, ins.dest);
- }
- if (rv == INVALID_SLOT) {
- rv = ins.src;
- } else if (ins.src != INVALID_SLOT) {
- ENSURE_AT_LEAST(&rv, ins.src);
- }
- }
- assert(rv != INVALID_SLOT);
- return rv;
-}
-
-static
-u32 highest_slot_used(const map<gough_edge_id, vector<gough_ins> > &blocks) {
- u32 rv = INVALID_SLOT;
- for (const vector<gough_ins> &ins_list : blocks | map_values) {
- u32 used = highest_slot_used(ins_list);
- if (rv == INVALID_SLOT) {
- rv = used;
- } else if (used != INVALID_SLOT) {
- ENSURE_AT_LEAST(&rv, used);
- }
- }
- return rv;
-}
-
-static
-void add_to_block(const vector<shared_ptr<GoughSSAVar> > &vars,
- vector<gough_ins> *out) {
- for (const auto &var : vars) {
- var->generate(out);
- }
-}
-
-namespace {
-struct edge_join_info {
- bool empty() const { return dest_to_src.empty(); }
-
- void insert(u32 src, u32 dest) {
- assert(!contains(dest_to_src, dest));
- assert(src != dest);
- dest_to_src[dest] = src;
- src_to_dest[src].insert(dest);
- }
-
- void erase(u32 src, u32 dest) {
- assert(dest_to_src.at(dest) == src);
- dest_to_src.erase(dest);
- src_to_dest[src].erase(dest);
-
- if (src_to_dest[src].empty()) {
- src_to_dest.erase(src);
- }
- }
-
- bool is_src(u32 v) const {
- bool rv = contains(src_to_dest, v);
- assert(!rv || !src_to_dest.at(v).empty());
- return rv;
- }
-
- bool is_dest(u32 v) const {
- return contains(dest_to_src, v);
- }
-
- void remap_src(u32 old_src, u32 new_src) {
- assert(is_src(old_src));
- assert(!is_src(new_src));
-
- for (const u32 &e : src_to_dest[old_src]) {
- assert(e != new_src);
- dest_to_src[e] = new_src;
- }
- src_to_dest[new_src].swap(src_to_dest[old_src]);
- src_to_dest.erase(old_src);
-
- assert(!is_src(old_src));
- assert(is_src(new_src));
- }
-
- /* returns an arbitrary unresolved entry */
- void get_pending(u32 *src, u32 *dest) {
- assert(!empty());
- *dest = dest_to_src.begin()->first;
- *src = dest_to_src.begin()->second;
- }
-
- const map<u32, u32> &get_dest_mapping() const { return dest_to_src; }
-
-private:
- map<u32, set<u32> > src_to_dest;
- map<u32, u32> dest_to_src;
-};
-
-}
-
-static
-void prep_joins_for_generation(const GoughGraph &g, GoughVertex v,
- map<GoughEdge, edge_join_info> *edge_info) {
- DEBUG_PRINTF("writing out joins for %u\n", g[v].state_id);
- for (const auto &var : g[v].vars) {
- u32 dest_slot = var->slot;
- for (const auto &var_edges : var->get_input_map()) {
- u32 input = var_edges.first->slot;
- if (dest_slot == input) {
- continue;
- }
-
- for (const GoughEdge &incoming_edge : var_edges.second) {
- (*edge_info)[incoming_edge].insert(input, dest_slot);
- DEBUG_PRINTF("need %u<-%u\n", dest_slot, input);
- }
- }
- }
-}
-
-static
-void add_simple_joins(edge_join_info &eji, vector<gough_ins> *out) {
- /* any slot whose value we don't need can be written to immediately */
- const map<u32, u32> &dest_to_src = eji.get_dest_mapping();
-
- bool changed;
- do {
- changed = false;
- for (map<u32, u32>::const_iterator it = dest_to_src.begin();
- it != dest_to_src.end();) {
- u32 src = it->second;
- u32 dest = it->first;
- ++it; /* avoid iterator being invalidated */
-
- if (eji.is_src(dest)) {
- continue; /* conflict; not simple (yet) */
- }
-
- /* value of destination slot is not used by any remaining joins;
- * we can output this join immediately */
- DEBUG_PRINTF("out %u<-%u\n", dest, src);
- out->push_back(make_gough_ins(GOUGH_INS_MOV, dest, src));
-
- eji.erase(src, dest);
-
- if (eji.is_dest(src) && eji.is_src(src)) {
- /* we can unblock src being used as an output by shifting
- * across everybody using src as input to using dest (as == src
- * now) */
- eji.remap_src(src, dest);
- }
- changed = true;
- }
- } while (changed);
-}
-
-static
-void add_joins_to_block(edge_join_info &eji, vector<gough_ins> *out,
- u32 base_temp_slot) {
- /* joins happen concurrently: none of them should see the outputs of another
- * join happening due to the same entry of the vertex. If there are
- * conflicts we may have to handle things by using a temp output slot for
- * each join and then copying into the final slot.
- */
-
- add_simple_joins(eji, out);
- while (!eji.empty()) {
- u32 split;
- u32 input_for_split;
- eji.get_pending(&input_for_split, &split);
-
- assert(eji.is_src(split)); /* otherwise should be handled by simple */
-
- /* stash the initial value of the split register in a temp register */
- u32 temp = base_temp_slot++;
- DEBUG_PRINTF("out %u<-%u\n", temp, split);
- out->push_back(make_gough_ins(GOUGH_INS_MOV, temp, split));
- eji.remap_src(split, temp); /* update maps */
-
- /* split can now be safely written out to as all the uses of it as an
- * input now refer to temp instead */
-
- DEBUG_PRINTF("out %u<-%u\n", split, input_for_split);
- out->push_back(make_gough_ins(GOUGH_INS_MOV, split, input_for_split));
- eji.erase(input_for_split, split);
-
- /* handle any uncovered simple cases */
- add_simple_joins(eji, out);
- }
-}
-
-static
-void build_blocks(const GoughGraph &g,
- map<gough_edge_id, vector<gough_ins> > *blocks,
- u32 base_temp_slot) {
- for (const auto &e : edges_range(g)) {
- if (g[e].vars.empty()) {
- continue;
- }
-
- vector<gough_ins> &block = (*blocks)[gough_edge_id(g, e)];
- add_to_block(g[e].vars, &block);
- assert(!block.empty());
- }
-
- for (const auto t : vertices_range(g)) {
- if (g[t].vars.empty()) {
- continue;
- }
-
- map<GoughEdge, edge_join_info> eji;
- prep_joins_for_generation(g, t, &eji);
-
- for (auto &m : eji) {
- vector<gough_ins> &block = (*blocks)[gough_edge_id(g, m.first)];
- u32 cur_base = base_temp_slot;
- if (!block.empty()) {
- /* some temp slots may already be in use by short-lived vars */
- ENSURE_AT_LEAST(&cur_base, highest_slot_used(block) + 1);
- }
-
- add_joins_to_block(m.second, &block, cur_base);
- if (block.empty()) {
- blocks->erase(gough_edge_id(g, m.first));
- }
- }
- }
-
- for (vector<gough_ins> &ins_list : *blocks | map_values) {
- assert(!ins_list.empty());
- ins_list.push_back(make_gough_ins(GOUGH_INS_END));
- }
-}
-
-static
-void copy_in_blocks(raw_som_dfa &raw, u8 alphaShift, const GoughGraph &cfg,
- const map<gough_edge_id, vector<gough_ins> > &blocks,
- u32 *edge_blocks, u32 *top_blocks, u32 base_offset,
- map<vector<gough_ins>, u32> *prog_offsets,
- vector<gough_ins> *out) {
- u32 impl_alpha_size = 1U << alphaShift;
- UNUSED u32 top_sym = raw.alpha_remap[TOP];
- assert(top_sym == raw.alpha_size - 1U);
- map<vector<gough_ins>, u32> &processed = *prog_offsets;
-
- for (const auto &e : edges_range(cfg)) {
- if (!contains(blocks, gough_edge_id(cfg, e))) {
- continue;
- }
- const vector<gough_ins> &block = blocks.at(gough_edge_id(cfg, e));
- u32 prog_offset;
- if (!contains(processed, block)) {
- prog_offset = base_offset + byte_length(*out);
- insert(out, out->end(), block);
- processed[block] = prog_offset;
- } else {
- prog_offset = processed[block];
- }
-
- /* update edges */
- u32 s_id = cfg[source(e, cfg)].state_id;
- UNUSED u32 t_id = cfg[target(e, cfg)].state_id;
- u32 impl_src_id = raw.states[s_id].impl_id;
- DEBUG_PRINTF("%u: writing out block for edge_%u_%u at %u:\n",
- impl_src_id, s_id, t_id,prog_offset);
-
- for (u32 j = cfg[e].reach.find_first(); j != CharReach::npos;
- j = cfg[e].reach.find_next(j)) {
- assert(raw.states[s_id].next[j] == t_id);
- u32 edge_index = impl_src_id * impl_alpha_size + j;
- DEBUG_PRINTF("\tsetting on %u, %u\n", j, edge_index);
- edge_blocks[edge_index] = prog_offset;
- }
-
- if (cfg[e].top) {
- assert(raw.states[s_id].next[top_sym] == t_id);
- DEBUG_PRINTF("\tsetting top on %u to block at %u\n", impl_src_id,
- prog_offset);
- top_blocks[impl_src_id] = prog_offset;
- }
- }
-}
-
-bool find_normal_self_loop(GoughVertex v, const GoughGraph &g, GoughEdge *out) {
- for (const auto &e : out_edges_range(v, g)) {
- if (target(e, g) != v) {
- continue;
- }
- if (g[e].top) {
- assert(g[e].reach.find_first() == CharReach::npos);
- continue; /* corresponds to a top, not a normal transition */
- }
-
- *out = e;
- return true;
- }
-
- return false;
-}
-
-static never_inline
-void update_accel_prog_offset(const gough_build_strat &gbs,
- const map<gough_edge_id, vector<gough_ins> > &blocks,
- const map<vector<gough_ins>, u32> &prog_offsets) {
- map<dstate_id_t, GoughVertex> verts;
- for (auto v : vertices_range(gbs.gg)) {
- verts[gbs.gg[v].state_id] = v;
- }
-
- for (auto &m : gbs.built_accel) {
- gough_accel *ga = m.first;
- assert(!ga->prog_offset);
- GoughVertex v = verts[m.second];
- GoughEdge e;
- UNUSED bool rv = find_normal_self_loop(v, gbs.gg, &e);
- assert(rv);
-
- if (!rv) {
- continue;
- }
-
- DEBUG_PRINTF("updating state %u accel with margin %hhu\n",
- gbs.gg[v].state_id, ga->margin_dist);
- if (contains(blocks, gough_edge_id(gbs.gg, e))) {
- const vector<gough_ins> &block
- = blocks.at(gough_edge_id(gbs.gg, e));
- ga->prog_offset = prog_offsets.at(block);
- DEBUG_PRINTF("prog offset %u\n", ga->prog_offset);
- } else {
- ga->margin_dist = 0;
- DEBUG_PRINTF("removing margin as no som\n");
- }
- }
-}
-
+
+ /* create edges, JOIN variables (on edge targets) */
+ map<dstate_id_t, GoughEdge> seen;
+ for (u32 i = min_state; i < raw.states.size(); ++i) {
+ seen.clear(); /* seen is really local to each state */
+
+ DEBUG_PRINTF("creating edges out of %u/%zu\n", i, raw.states.size());
+ GoughVertex s = vertices[i];
+ const vector<dstate_id_t> &next = raw.states[i].next;
+ for (u32 j = 0; j < next.size(); ++j) {
+ if (!is_triggered(raw.kind) && j == top_sym) {
+ continue;
+ }
+
+ dstate_id_t n = next[j];
+ DEBUG_PRINTF(" edge to %hu out on %u\n", n, j);
+ assert(n < raw.states.size());
+ GoughVertex t = vertices[n];
+
+ if (j == top_sym) {
+ GoughEdge e = add_edge(s, t, *cfg).first;
+ (*cfg)[e].top = true;
+ makeCFG_top_edge(*cfg, vertices, joins, raw.trigger_nfa_state,
+ raw.state_som[i].preds, raw.state_som[n].preds,
+ i, n, e);
+ } else {
+ if (contains(seen, n)) {
+ const GoughEdge &e = seen[n];
+ (*cfg)[e].reach.set(j);
+ continue;
+ }
+
+ GoughEdge e = add_edge(s, t, *cfg).first;
+ (*cfg)[e].reach.set(j);
+
+ seen[n] = e;
+
+ makeCFG_edge(*cfg, raw.new_som_nfa_states, vertices, joins,
+ raw.state_som[i].preds, raw.state_som[n].preds,
+ i, n, e);
+ }
+ }
+ }
+
+ /* populate reports */
+ makeCFG_reports(*cfg, raw, joins, vertices);
+
+ using boost::graph_bundle;
+ if (is_triggered(raw.kind)) {
+ (*cfg)[graph_bundle].initial_vertex = vertices[DEAD_STATE];
+ } else {
+ (*cfg)[graph_bundle].initial_vertex = vertices[raw.start_anchored];
+ }
+
+ return cfg;
+}
+
+static
+void copy_propagate_report_set(vector<pair<ReportID, GoughSSAVar *> > &rep) {
+ vector<pair<ReportID, GoughSSAVar *> >::iterator it = rep.begin();
+ while (it != rep.end()) {
+ GoughSSAVar *var = it->second;
+ if (!var) {
+ ++it;
+ continue;
+ }
+ const flat_set<GoughSSAVar *> &inputs = var->get_inputs();
+ if (inputs.size() != 1) {
+ ++it;
+ continue;
+ }
+ it->second = *inputs.begin(); /* note may result in dupes,
+ filter later */
+ }
+}
+
+template<typename VarP>
+void copy_propagate_update_vars(vector<VarP> &vars, bool *changes) {
+ for (u32 i = 0; i < vars.size(); i++) {
+ GoughSSAVar *vp = vars[i].get();
+ const flat_set<GoughSSAVar *> &inputs = vp->get_inputs();
+
+ /* no need to worry about data coming from self; ignore self loops */
+ GoughSSAVar *new_input = nullptr;
+
+ if (inputs.size() == 1) {
+ new_input = *inputs.begin();
+ } else if (inputs.size() == 2) {
+ flat_set<GoughSSAVar *>::const_iterator jt = inputs.begin();
+ GoughSSAVar *i_0 = *jt;
+ GoughSSAVar *i_1 = *++jt;
+
+ if (i_0 == vp) {
+ new_input = i_1;
+ } else if (i_1 == vp) {
+ new_input = i_0;
+ }
+ }
+
+ if (!new_input) {
+ continue;
+ }
+
+ assert(new_input != vp);
+
+ /* copy set as it will be modified by iteration */
+ const flat_set<GoughSSAVarWithInputs *> outputs = vp->get_outputs();
+
+ for (GoughSSAVar *curr : outputs) {
+ curr->replace_input(vp, new_input);
+ *changes = true;
+ }
+ }
+}
+
+static
+void copy_propagation(GoughGraph &g, const Grey &grey) {
+ if (!grey.goughCopyPropagate) {
+ return;
+ }
+ /* TODO order visit of variables sensibly */
+ bool changes = false;
+ do {
+ DEBUG_PRINTF("new iteration\n");
+ changes = false;
+ for (auto v : vertices_range(g)) {
+ copy_propagate_update_vars(g[v].vars, &changes);
+ }
+ for (const auto &e : edges_range(g)) {
+ copy_propagate_update_vars(g[e].vars, &changes);
+ }
+ } while(changes);
+
+ /* see if any reports can also be moved along */
+ for (auto v : vertices_range(g)) {
+ copy_propagate_report_set(g[v].reports);
+ copy_propagate_report_set(g[v].reports_eod);
+ }
+}
+
+static
+void mark_live_reports(const vector<pair<ReportID, GoughSSAVar *> > &reps,
+ vector<GoughSSAVar *> *queue) {
+ for (const auto &r : reps) {
+ GoughSSAVar *var = r.second;
+ if (!var || var->seen) {
+ continue;
+ }
+ var->seen = true;
+ queue->push_back(var);
+ }
+}
+
+static
+void remove_dead(GoughGraph &g) {
+ vector<GoughSSAVar *> queue;
+
+ for (auto v : vertices_range(g)) {
+ mark_live_reports(g[v].reports, &queue);
+ mark_live_reports(g[v].reports_eod, &queue);
+ }
+
+ while (!queue.empty()) {
+ GoughSSAVar *v = queue.back();
+ queue.pop_back();
+ for (GoughSSAVar *var : v->get_inputs()) {
+ if (var->seen) {
+ continue;
+ }
+ var->seen = true;
+ queue.push_back(var);
+ }
+ }
+
+ /* remove unused variables */
+ for (auto v : vertices_range(g)) {
+ for (u32 i = 0; i < g[v].vars.size(); i++) {
+ GoughSSAVar *var = g[v].vars[i].get();
+ if (var->seen) {
+ continue;
+ }
+ var->clear_all();
+ g[v].vars.erase(g[v].vars.begin() + i);
+ i--;
+ }
+ }
+ for (const auto &e : edges_range(g)) {
+ for (u32 i = 0; i < g[e].vars.size(); i++) {
+ GoughSSAVar *var = g[e].vars[i].get();
+ if (var->seen) {
+ continue;
+ }
+ var->clear_all();
+ g[e].vars.erase(g[e].vars.begin() + i);
+ i--;
+ }
+ }
+}
+
+static
+gough_ins make_gough_ins(u8 op, u32 dest = INVALID_SLOT,
+ u32 src = INVALID_SLOT) {
+ assert(dest != INVALID_SLOT || op == GOUGH_INS_END);
+ assert(src != INVALID_SLOT || op == GOUGH_INS_END || op == GOUGH_INS_NEW);
+ gough_ins rv;
+ rv.op = op;
+ rv.dest = dest;
+ rv.src = src;
+ return rv;
+}
+
+void GoughSSAVarNew::generate(vector<gough_ins> *out) const {
+ assert(slot != INVALID_SLOT);
+ out->push_back(make_gough_ins(GOUGH_INS_NEW, slot, adjust));
+}
+
+#ifndef NDEBUG
+template<typename C, typename K>
+bool contains_loose(const C &container, const K &key) {
+ for (const auto &elem : container) {
+ if (elem == key) {
+ return true;
+ }
+ }
+ return false;
+}
+#endif
+
+void GoughSSAVarMin::generate(vector<gough_ins> *out) const {
+ assert(slot != INVALID_SLOT);
+ assert(!inputs.empty());
+ // assert(inputs.size() > 1);
+ vector<u32> input_slots; /* for determinism */
+ bool first = true;
+ for (const GoughSSAVar *var : inputs) {
+ assert(contains_loose(var->outputs, this));
+ if (var->slot == slot) {
+ /* if the destination is one of the sources, no need to move it */
+ first = false;
+ } else {
+ input_slots.push_back(var->slot);
+ }
+ }
+
+ sort(input_slots.begin(), input_slots.end());
+
+ for (const u32 &input_slot : input_slots) {
+ if (first) {
+ out->push_back(make_gough_ins(GOUGH_INS_MOV, slot, input_slot));
+ first = false;
+ } else {
+ out->push_back(make_gough_ins(GOUGH_INS_MIN, slot, input_slot));
+ }
+ }
+}
+
+void GoughSSAVarMin::remove_input_raw(GoughSSAVar *v) {
+ assert(contains(inputs, v));
+ inputs.erase(v);
+}
+
+void GoughSSAVarJoin::generate(UNUSED vector<gough_ins> *out) const {
+ assert(0);
+}
+
+GoughSSAVar *GoughSSAVarJoin::get_input(const GoughEdge &prev) const {
+ for (const auto &var_edge : input_map) {
+ if (contains(var_edge.second, prev)) {
+ return var_edge.first;
+ }
+ }
+ assert(0);
+ return nullptr;
+}
+
+const flat_set<GoughEdge> &GoughSSAVarJoin::get_edges_for_input(
+ GoughSSAVar *input) const {
+ return input_map.at(input);
+}
+
+const map<GoughSSAVar *, flat_set<GoughEdge> > &GoughSSAVarJoin::get_input_map()
+ const {
+ return input_map;
+}
+
+void GoughSSAVarJoin::clear_inputs() {
+ for (GoughSSAVar *var : input_map | map_keys) {
+ assert(contains(var->outputs, this));
+ var->outputs.erase(this);
+ }
+ input_map.clear();
+ inputs.clear();
+}
+
+void GoughSSAVarJoin::replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) {
+ assert(contains(input_map, old_v));
+ assert(contains(inputs, old_v));
+ if (old_v == new_v) {
+ assert(0);
+ return;
+ }
+ insert(&input_map[new_v], input_map[old_v]);
+ input_map.erase(old_v);
+ inputs.erase(old_v);
+ inputs.insert(new_v);
+ old_v->outputs.erase(this);
+ new_v->outputs.insert(this);
+}
+
+void GoughSSAVarJoin::add_input(GoughSSAVar *v, GoughEdge prev) {
+ input_map[v].insert(prev);
+ inputs.insert(v);
+ v->outputs.insert(this);
+}
+
+void GoughSSAVarJoin::remove_input_raw(GoughSSAVar *v) {
+ assert(contains(inputs, v));
+ assert(contains(input_map, v));
+ input_map.erase(v);
+ inputs.erase(v);
+}
+
+static
+u32 highest_slot_used(const vector<gough_ins> &program) {
+ u32 rv = INVALID_SLOT;
+ for (const gough_ins &ins : program) {
+ if (rv == INVALID_SLOT) {
+ rv = ins.dest;
+ } else if (ins.dest != INVALID_SLOT) {
+ ENSURE_AT_LEAST(&rv, ins.dest);
+ }
+ if (rv == INVALID_SLOT) {
+ rv = ins.src;
+ } else if (ins.src != INVALID_SLOT) {
+ ENSURE_AT_LEAST(&rv, ins.src);
+ }
+ }
+ assert(rv != INVALID_SLOT);
+ return rv;
+}
+
+static
+u32 highest_slot_used(const map<gough_edge_id, vector<gough_ins> > &blocks) {
+ u32 rv = INVALID_SLOT;
+ for (const vector<gough_ins> &ins_list : blocks | map_values) {
+ u32 used = highest_slot_used(ins_list);
+ if (rv == INVALID_SLOT) {
+ rv = used;
+ } else if (used != INVALID_SLOT) {
+ ENSURE_AT_LEAST(&rv, used);
+ }
+ }
+ return rv;
+}
+
+static
+void add_to_block(const vector<shared_ptr<GoughSSAVar> > &vars,
+ vector<gough_ins> *out) {
+ for (const auto &var : vars) {
+ var->generate(out);
+ }
+}
+
+namespace {
+struct edge_join_info {
+ bool empty() const { return dest_to_src.empty(); }
+
+ void insert(u32 src, u32 dest) {
+ assert(!contains(dest_to_src, dest));
+ assert(src != dest);
+ dest_to_src[dest] = src;
+ src_to_dest[src].insert(dest);
+ }
+
+ void erase(u32 src, u32 dest) {
+ assert(dest_to_src.at(dest) == src);
+ dest_to_src.erase(dest);
+ src_to_dest[src].erase(dest);
+
+ if (src_to_dest[src].empty()) {
+ src_to_dest.erase(src);
+ }
+ }
+
+ bool is_src(u32 v) const {
+ bool rv = contains(src_to_dest, v);
+ assert(!rv || !src_to_dest.at(v).empty());
+ return rv;
+ }
+
+ bool is_dest(u32 v) const {
+ return contains(dest_to_src, v);
+ }
+
+ void remap_src(u32 old_src, u32 new_src) {
+ assert(is_src(old_src));
+ assert(!is_src(new_src));
+
+ for (const u32 &e : src_to_dest[old_src]) {
+ assert(e != new_src);
+ dest_to_src[e] = new_src;
+ }
+ src_to_dest[new_src].swap(src_to_dest[old_src]);
+ src_to_dest.erase(old_src);
+
+ assert(!is_src(old_src));
+ assert(is_src(new_src));
+ }
+
+ /* returns an arbitrary unresolved entry */
+ void get_pending(u32 *src, u32 *dest) {
+ assert(!empty());
+ *dest = dest_to_src.begin()->first;
+ *src = dest_to_src.begin()->second;
+ }
+
+ const map<u32, u32> &get_dest_mapping() const { return dest_to_src; }
+
+private:
+ map<u32, set<u32> > src_to_dest;
+ map<u32, u32> dest_to_src;
+};
+
+}
+
+static
+void prep_joins_for_generation(const GoughGraph &g, GoughVertex v,
+ map<GoughEdge, edge_join_info> *edge_info) {
+ DEBUG_PRINTF("writing out joins for %u\n", g[v].state_id);
+ for (const auto &var : g[v].vars) {
+ u32 dest_slot = var->slot;
+ for (const auto &var_edges : var->get_input_map()) {
+ u32 input = var_edges.first->slot;
+ if (dest_slot == input) {
+ continue;
+ }
+
+ for (const GoughEdge &incoming_edge : var_edges.second) {
+ (*edge_info)[incoming_edge].insert(input, dest_slot);
+ DEBUG_PRINTF("need %u<-%u\n", dest_slot, input);
+ }
+ }
+ }
+}
+
+static
+void add_simple_joins(edge_join_info &eji, vector<gough_ins> *out) {
+ /* any slot whose value we don't need can be written to immediately */
+ const map<u32, u32> &dest_to_src = eji.get_dest_mapping();
+
+ bool changed;
+ do {
+ changed = false;
+ for (map<u32, u32>::const_iterator it = dest_to_src.begin();
+ it != dest_to_src.end();) {
+ u32 src = it->second;
+ u32 dest = it->first;
+ ++it; /* avoid iterator being invalidated */
+
+ if (eji.is_src(dest)) {
+ continue; /* conflict; not simple (yet) */
+ }
+
+ /* value of destination slot is not used by any remaining joins;
+ * we can output this join immediately */
+ DEBUG_PRINTF("out %u<-%u\n", dest, src);
+ out->push_back(make_gough_ins(GOUGH_INS_MOV, dest, src));
+
+ eji.erase(src, dest);
+
+ if (eji.is_dest(src) && eji.is_src(src)) {
+ /* we can unblock src being used as an output by shifting
+ * across everybody using src as input to using dest (as == src
+ * now) */
+ eji.remap_src(src, dest);
+ }
+ changed = true;
+ }
+ } while (changed);
+}
+
+static
+void add_joins_to_block(edge_join_info &eji, vector<gough_ins> *out,
+ u32 base_temp_slot) {
+ /* joins happen concurrently: none of them should see the outputs of another
+ * join happening due to the same entry of the vertex. If there are
+ * conflicts we may have to handle things by using a temp output slot for
+ * each join and then copying into the final slot.
+ */
+
+ add_simple_joins(eji, out);
+ while (!eji.empty()) {
+ u32 split;
+ u32 input_for_split;
+ eji.get_pending(&input_for_split, &split);
+
+ assert(eji.is_src(split)); /* otherwise should be handled by simple */
+
+ /* stash the initial value of the split register in a temp register */
+ u32 temp = base_temp_slot++;
+ DEBUG_PRINTF("out %u<-%u\n", temp, split);
+ out->push_back(make_gough_ins(GOUGH_INS_MOV, temp, split));
+ eji.remap_src(split, temp); /* update maps */
+
+ /* split can now be safely written out to as all the uses of it as an
+ * input now refer to temp instead */
+
+ DEBUG_PRINTF("out %u<-%u\n", split, input_for_split);
+ out->push_back(make_gough_ins(GOUGH_INS_MOV, split, input_for_split));
+ eji.erase(input_for_split, split);
+
+ /* handle any uncovered simple cases */
+ add_simple_joins(eji, out);
+ }
+}
+
+static
+void build_blocks(const GoughGraph &g,
+ map<gough_edge_id, vector<gough_ins> > *blocks,
+ u32 base_temp_slot) {
+ for (const auto &e : edges_range(g)) {
+ if (g[e].vars.empty()) {
+ continue;
+ }
+
+ vector<gough_ins> &block = (*blocks)[gough_edge_id(g, e)];
+ add_to_block(g[e].vars, &block);
+ assert(!block.empty());
+ }
+
+ for (const auto t : vertices_range(g)) {
+ if (g[t].vars.empty()) {
+ continue;
+ }
+
+ map<GoughEdge, edge_join_info> eji;
+ prep_joins_for_generation(g, t, &eji);
+
+ for (auto &m : eji) {
+ vector<gough_ins> &block = (*blocks)[gough_edge_id(g, m.first)];
+ u32 cur_base = base_temp_slot;
+ if (!block.empty()) {
+ /* some temp slots may already be in use by short-lived vars */
+ ENSURE_AT_LEAST(&cur_base, highest_slot_used(block) + 1);
+ }
+
+ add_joins_to_block(m.second, &block, cur_base);
+ if (block.empty()) {
+ blocks->erase(gough_edge_id(g, m.first));
+ }
+ }
+ }
+
+ for (vector<gough_ins> &ins_list : *blocks | map_values) {
+ assert(!ins_list.empty());
+ ins_list.push_back(make_gough_ins(GOUGH_INS_END));
+ }
+}
+
+static
+void copy_in_blocks(raw_som_dfa &raw, u8 alphaShift, const GoughGraph &cfg,
+ const map<gough_edge_id, vector<gough_ins> > &blocks,
+ u32 *edge_blocks, u32 *top_blocks, u32 base_offset,
+ map<vector<gough_ins>, u32> *prog_offsets,
+ vector<gough_ins> *out) {
+ u32 impl_alpha_size = 1U << alphaShift;
+ UNUSED u32 top_sym = raw.alpha_remap[TOP];
+ assert(top_sym == raw.alpha_size - 1U);
+ map<vector<gough_ins>, u32> &processed = *prog_offsets;
+
+ for (const auto &e : edges_range(cfg)) {
+ if (!contains(blocks, gough_edge_id(cfg, e))) {
+ continue;
+ }
+ const vector<gough_ins> &block = blocks.at(gough_edge_id(cfg, e));
+ u32 prog_offset;
+ if (!contains(processed, block)) {
+ prog_offset = base_offset + byte_length(*out);
+ insert(out, out->end(), block);
+ processed[block] = prog_offset;
+ } else {
+ prog_offset = processed[block];
+ }
+
+ /* update edges */
+ u32 s_id = cfg[source(e, cfg)].state_id;
+ UNUSED u32 t_id = cfg[target(e, cfg)].state_id;
+ u32 impl_src_id = raw.states[s_id].impl_id;
+ DEBUG_PRINTF("%u: writing out block for edge_%u_%u at %u:\n",
+ impl_src_id, s_id, t_id,prog_offset);
+
+ for (u32 j = cfg[e].reach.find_first(); j != CharReach::npos;
+ j = cfg[e].reach.find_next(j)) {
+ assert(raw.states[s_id].next[j] == t_id);
+ u32 edge_index = impl_src_id * impl_alpha_size + j;
+ DEBUG_PRINTF("\tsetting on %u, %u\n", j, edge_index);
+ edge_blocks[edge_index] = prog_offset;
+ }
+
+ if (cfg[e].top) {
+ assert(raw.states[s_id].next[top_sym] == t_id);
+ DEBUG_PRINTF("\tsetting top on %u to block at %u\n", impl_src_id,
+ prog_offset);
+ top_blocks[impl_src_id] = prog_offset;
+ }
+ }
+}
+
+bool find_normal_self_loop(GoughVertex v, const GoughGraph &g, GoughEdge *out) {
+ for (const auto &e : out_edges_range(v, g)) {
+ if (target(e, g) != v) {
+ continue;
+ }
+ if (g[e].top) {
+ assert(g[e].reach.find_first() == CharReach::npos);
+ continue; /* corresponds to a top, not a normal transition */
+ }
+
+ *out = e;
+ return true;
+ }
+
+ return false;
+}
+
+static never_inline
+void update_accel_prog_offset(const gough_build_strat &gbs,
+ const map<gough_edge_id, vector<gough_ins> > &blocks,
+ const map<vector<gough_ins>, u32> &prog_offsets) {
+ map<dstate_id_t, GoughVertex> verts;
+ for (auto v : vertices_range(gbs.gg)) {
+ verts[gbs.gg[v].state_id] = v;
+ }
+
+ for (auto &m : gbs.built_accel) {
+ gough_accel *ga = m.first;
+ assert(!ga->prog_offset);
+ GoughVertex v = verts[m.second];
+ GoughEdge e;
+ UNUSED bool rv = find_normal_self_loop(v, gbs.gg, &e);
+ assert(rv);
+
+ if (!rv) {
+ continue;
+ }
+
+ DEBUG_PRINTF("updating state %u accel with margin %hhu\n",
+ gbs.gg[v].state_id, ga->margin_dist);
+ if (contains(blocks, gough_edge_id(gbs.gg, e))) {
+ const vector<gough_ins> &block
+ = blocks.at(gough_edge_id(gbs.gg, e));
+ ga->prog_offset = prog_offsets.at(block);
+ DEBUG_PRINTF("prog offset %u\n", ga->prog_offset);
+ } else {
+ ga->margin_dist = 0;
+ DEBUG_PRINTF("removing margin as no som\n");
+ }
+ }
+}
+
bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
const CompileContext &cc,
const ReportManager &rm) {
- assert(somPrecision == 2 || somPrecision == 4 || somPrecision == 8
- || !cc.streaming);
-
- if (!cc.grey.allowGough) {
- return nullptr;
- }
-
- DEBUG_PRINTF("hello world\n");
- unique_ptr<GoughGraph> cfg = makeCFG(raw);
- dump(*cfg, "init", cc.grey);
- copy_propagation(*cfg, cc.grey);
- remove_dead(*cfg);
- dump(*cfg, "prop", cc.grey);
- u32 slot_count = assign_slots(*cfg, cc.grey);
- dump(*cfg, "slots", cc.grey);
-
- map<gough_edge_id, vector<gough_ins> > blocks;
- build_blocks(*cfg, &blocks, slot_count);
- DEBUG_PRINTF("%u slots\n", highest_slot_used(blocks) + 1);
-
- u32 scratch_slot_count = highest_slot_used(blocks) + 1;
- assert(slot_count <= scratch_slot_count);
-
- dump(*cfg, "final", cc.grey);
- dump_blocks(blocks, "final", cc.grey);
-
- gough_info gi;
- memset(&gi, 0, sizeof(gi));
-
- map<dstate_id_t, gough_accel_state_info> accel_allowed;
- find_allowed_accel_states(*cfg, blocks, &accel_allowed);
+ assert(somPrecision == 2 || somPrecision == 4 || somPrecision == 8
+ || !cc.streaming);
+
+ if (!cc.grey.allowGough) {
+ return nullptr;
+ }
+
+ DEBUG_PRINTF("hello world\n");
+ unique_ptr<GoughGraph> cfg = makeCFG(raw);
+ dump(*cfg, "init", cc.grey);
+ copy_propagation(*cfg, cc.grey);
+ remove_dead(*cfg);
+ dump(*cfg, "prop", cc.grey);
+ u32 slot_count = assign_slots(*cfg, cc.grey);
+ dump(*cfg, "slots", cc.grey);
+
+ map<gough_edge_id, vector<gough_ins> > blocks;
+ build_blocks(*cfg, &blocks, slot_count);
+ DEBUG_PRINTF("%u slots\n", highest_slot_used(blocks) + 1);
+
+ u32 scratch_slot_count = highest_slot_used(blocks) + 1;
+ assert(slot_count <= scratch_slot_count);
+
+ dump(*cfg, "final", cc.grey);
+ dump_blocks(blocks, "final", cc.grey);
+
+ gough_info gi;
+ memset(&gi, 0, sizeof(gi));
+
+ map<dstate_id_t, gough_accel_state_info> accel_allowed;
+ find_allowed_accel_states(*cfg, blocks, &accel_allowed);
gough_build_strat gbs(raw, *cfg, rm, accel_allowed);
auto basic_dfa = mcclellanCompile_i(raw, gbs, cc);
- assert(basic_dfa);
- if (!basic_dfa) {
- return nullptr;
- }
-
- u8 alphaShift
- = ((const mcclellan *)getImplNfa(basic_dfa.get()))->alphaShift;
- u32 edge_count = (1U << alphaShift) * raw.states.size();
-
- u32 curr_offset = ROUNDUP_N(basic_dfa->length, 4);
-
- u32 haig_offset = curr_offset;
- curr_offset += sizeof(gi);
- /* reserve space for edge->program mapping */
- u32 edge_prog_offset = curr_offset;
- curr_offset += sizeof(u32) * edge_count;
- vector<u32> edge_blocks(edge_count);
-
- u32 top_prog_offset = 0;
- if (is_triggered(raw.kind)) {
- /* reserve space for edge->program mapping */
- top_prog_offset = curr_offset;
- curr_offset += sizeof(u32) * raw.states.size();
- }
- gi.top_prog_offset = top_prog_offset;
- vector<u32> top_blocks(raw.states.size());
-
- /* reserve space for blocks */
- u32 prog_base_offset = curr_offset;
- gi.prog_base_offset = prog_base_offset;
-
- vector<gough_ins> temp_blocks;
- map<vector<gough_ins>, u32> prog_offsets;
- copy_in_blocks(raw, alphaShift, *cfg, blocks, &edge_blocks[0],
- &top_blocks[0], prog_base_offset, &prog_offsets,
- &temp_blocks);
- update_accel_prog_offset(gbs, blocks, prog_offsets);
-
- u32 total_prog_size = byte_length(temp_blocks);
- curr_offset += total_prog_size;
-
- gi.stream_som_loc_count = slot_count;
- gi.stream_som_loc_width = somPrecision;
-
- u32 gough_size = ROUNDUP_N(curr_offset, 16);
+ assert(basic_dfa);
+ if (!basic_dfa) {
+ return nullptr;
+ }
+
+ u8 alphaShift
+ = ((const mcclellan *)getImplNfa(basic_dfa.get()))->alphaShift;
+ u32 edge_count = (1U << alphaShift) * raw.states.size();
+
+ u32 curr_offset = ROUNDUP_N(basic_dfa->length, 4);
+
+ u32 haig_offset = curr_offset;
+ curr_offset += sizeof(gi);
+ /* reserve space for edge->program mapping */
+ u32 edge_prog_offset = curr_offset;
+ curr_offset += sizeof(u32) * edge_count;
+ vector<u32> edge_blocks(edge_count);
+
+ u32 top_prog_offset = 0;
+ if (is_triggered(raw.kind)) {
+ /* reserve space for edge->program mapping */
+ top_prog_offset = curr_offset;
+ curr_offset += sizeof(u32) * raw.states.size();
+ }
+ gi.top_prog_offset = top_prog_offset;
+ vector<u32> top_blocks(raw.states.size());
+
+ /* reserve space for blocks */
+ u32 prog_base_offset = curr_offset;
+ gi.prog_base_offset = prog_base_offset;
+
+ vector<gough_ins> temp_blocks;
+ map<vector<gough_ins>, u32> prog_offsets;
+ copy_in_blocks(raw, alphaShift, *cfg, blocks, &edge_blocks[0],
+ &top_blocks[0], prog_base_offset, &prog_offsets,
+ &temp_blocks);
+ update_accel_prog_offset(gbs, blocks, prog_offsets);
+
+ u32 total_prog_size = byte_length(temp_blocks);
+ curr_offset += total_prog_size;
+
+ gi.stream_som_loc_count = slot_count;
+ gi.stream_som_loc_width = somPrecision;
+
+ u32 gough_size = ROUNDUP_N(curr_offset, 16);
auto gough_dfa = make_zeroed_bytecode_ptr<NFA>(gough_size);
-
- memcpy(gough_dfa.get(), basic_dfa.get(), basic_dfa->length);
- memcpy((char *)gough_dfa.get() + haig_offset, &gi, sizeof(gi));
- if (gough_dfa->type == MCCLELLAN_NFA_16) {
- gough_dfa->type = GOUGH_NFA_16;
- } else {
- assert(gough_dfa->type == MCCLELLAN_NFA_8);
- gough_dfa->type = GOUGH_NFA_8;
- }
-
- /* update stream state requirements */
- u32 base_state_size = gough_dfa->type == GOUGH_NFA_8 ? 1 : 2;
- gough_dfa->streamStateSize = base_state_size + slot_count * somPrecision;
- gough_dfa->scratchStateSize = (u32)(16 + scratch_slot_count * sizeof(u64a));
-
- mcclellan *m = (mcclellan *)getMutableImplNfa(gough_dfa.get());
- m->haig_offset = haig_offset;
-
- /* update nfa length, haig_info offset (leave mcclellan length alone) */
- gough_dfa->length = gough_size;
-
- /* copy in blocks */
- copy_bytes((u8 *)gough_dfa.get() + edge_prog_offset, edge_blocks);
- if (top_prog_offset) {
- copy_bytes((u8 *)gough_dfa.get() + top_prog_offset, top_blocks);
- }
- copy_bytes((u8 *)gough_dfa.get() + prog_base_offset, temp_blocks);
-
- return gough_dfa;
-}
-
+
+ memcpy(gough_dfa.get(), basic_dfa.get(), basic_dfa->length);
+ memcpy((char *)gough_dfa.get() + haig_offset, &gi, sizeof(gi));
+ if (gough_dfa->type == MCCLELLAN_NFA_16) {
+ gough_dfa->type = GOUGH_NFA_16;
+ } else {
+ assert(gough_dfa->type == MCCLELLAN_NFA_8);
+ gough_dfa->type = GOUGH_NFA_8;
+ }
+
+ /* update stream state requirements */
+ u32 base_state_size = gough_dfa->type == GOUGH_NFA_8 ? 1 : 2;
+ gough_dfa->streamStateSize = base_state_size + slot_count * somPrecision;
+ gough_dfa->scratchStateSize = (u32)(16 + scratch_slot_count * sizeof(u64a));
+
+ mcclellan *m = (mcclellan *)getMutableImplNfa(gough_dfa.get());
+ m->haig_offset = haig_offset;
+
+ /* update nfa length, haig_info offset (leave mcclellan length alone) */
+ gough_dfa->length = gough_size;
+
+ /* copy in blocks */
+ copy_bytes((u8 *)gough_dfa.get() + edge_prog_offset, edge_blocks);
+ if (top_prog_offset) {
+ copy_bytes((u8 *)gough_dfa.get() + top_prog_offset, top_blocks);
+ }
+ copy_bytes((u8 *)gough_dfa.get() + prog_base_offset, temp_blocks);
+
+ return gough_dfa;
+}
+
AccelScheme gough_build_strat::find_escape_strings(dstate_id_t this_idx) const {
AccelScheme rv;
- if (!contains(accel_gough_info, this_idx)) {
+ if (!contains(accel_gough_info, this_idx)) {
rv.cr = CharReach::dot();
rv.double_byte.clear();
return rv;
- }
-
+ }
+
rv = mcclellan_build_strat::find_escape_strings(this_idx);
-
+
assert(!rv.offset || rv.cr.all()); /* should have been limited by strat */
if (rv.offset) {
rv.cr = CharReach::dot();
rv.double_byte.clear();
return rv;
- }
+ }
if (rv.double_offset
|| !accel_gough_info.at(this_idx).two_byte) {
@@ -1172,163 +1172,163 @@ AccelScheme gough_build_strat::find_escape_strings(dstate_id_t this_idx) const {
}
return rv;
-}
-
+}
+
void gough_build_strat::buildAccel(dstate_id_t this_idx, const AccelScheme &info,
void *accel_out) {
- assert(mcclellan_build_strat::accelSize() == sizeof(AccelAux));
- gough_accel *accel = (gough_accel *)accel_out;
- /* build a plain accelaux so we can work out where we can get to */
+ assert(mcclellan_build_strat::accelSize() == sizeof(AccelAux));
+ gough_accel *accel = (gough_accel *)accel_out;
+ /* build a plain accelaux so we can work out where we can get to */
mcclellan_build_strat::buildAccel(this_idx, info, &accel->accel);
- DEBUG_PRINTF("state %hu is accel with type %hhu\n", this_idx,
- accel->accel.accel_type);
- if (accel->accel.accel_type == ACCEL_NONE) {
- return;
- }
-
+ DEBUG_PRINTF("state %hu is accel with type %hhu\n", this_idx,
+ accel->accel.accel_type);
+ if (accel->accel.accel_type == ACCEL_NONE) {
+ return;
+ }
+
assert(!accel->accel.generic.offset);
- assert(contains(accel_gough_info, this_idx));
- accel->margin_dist = verify_u8(accel_gough_info.at(this_idx).margin);
- built_accel[accel] = this_idx;
- DEBUG_PRINTF("state %hu is accel with margin %hhu\n", this_idx,
- accel->margin_dist);
-}
-
-namespace {
-struct raw_gough_report_list {
- set<som_report> reports;
-
+ assert(contains(accel_gough_info, this_idx));
+ accel->margin_dist = verify_u8(accel_gough_info.at(this_idx).margin);
+ built_accel[accel] = this_idx;
+ DEBUG_PRINTF("state %hu is accel with margin %hhu\n", this_idx,
+ accel->margin_dist);
+}
+
+namespace {
+struct raw_gough_report_list {
+ set<som_report> reports;
+
raw_gough_report_list(
const vector<pair<ReportID, GoughSSAVar *>> &raw_reports,
const ReportManager &rm, bool do_remap) {
- for (const auto &m : raw_reports) {
+ for (const auto &m : raw_reports) {
ReportID r = do_remap ? rm.getProgramOffset(m.first) : m.first;
- u32 impl_slot = INVALID_SLOT;
- if (m.second) {
- impl_slot = m.second->slot;
- assert(impl_slot != INVALID_SLOT);
- }
- reports.emplace(r, impl_slot);
- }
- }
-
- bool operator<(const raw_gough_report_list &b) const {
- return reports < b.reports;
- }
-};
-
-struct raw_gough_report_info_impl : public raw_report_info {
- vector<raw_gough_report_list> rl;
- u32 getReportListSize() const override;
- size_t size() const override;
- void fillReportLists(NFA *n, size_t base_offset,
- vector<u32> &ro /* out */) const override;
-};
-}
-
-unique_ptr<raw_report_info> gough_build_strat::gatherReports(
- vector<u32> &reports,
- vector<u32> &reports_eod,
- u8 *isSingleReport,
- ReportID *arbReport) const {
- DEBUG_PRINTF("gathering reports\n");
-
+ u32 impl_slot = INVALID_SLOT;
+ if (m.second) {
+ impl_slot = m.second->slot;
+ assert(impl_slot != INVALID_SLOT);
+ }
+ reports.emplace(r, impl_slot);
+ }
+ }
+
+ bool operator<(const raw_gough_report_list &b) const {
+ return reports < b.reports;
+ }
+};
+
+struct raw_gough_report_info_impl : public raw_report_info {
+ vector<raw_gough_report_list> rl;
+ u32 getReportListSize() const override;
+ size_t size() const override;
+ void fillReportLists(NFA *n, size_t base_offset,
+ vector<u32> &ro /* out */) const override;
+};
+}
+
+unique_ptr<raw_report_info> gough_build_strat::gatherReports(
+ vector<u32> &reports,
+ vector<u32> &reports_eod,
+ u8 *isSingleReport,
+ ReportID *arbReport) const {
+ DEBUG_PRINTF("gathering reports\n");
+
const bool remap_reports = has_managed_reports(rdfa.kind);
auto ri = ue2::make_unique<raw_gough_report_info_impl>();
map<raw_gough_report_list, u32> rev;
- assert(!rdfa.states.empty());
-
- vector<GoughVertex> verts(rdfa.states.size());
- for (auto v : vertices_range(gg)) {
- verts[gg[v].state_id] = v;
- }
-
- for (u32 state_id = 0; state_id < verts.size(); state_id++) {
- assert(state_id < rdfa.states.size());
- GoughVertex v = verts[state_id];
- assert(v != GoughGraph::null_vertex() || !state_id);
-
- DEBUG_PRINTF("i = %zu [%zu]\n", reports.size(), gg[v].reports.size());
- if (v == GoughGraph::null_vertex() || gg[v].reports.empty()) {
- reports.push_back(MO_INVALID_IDX);
- continue;
- }
-
+ assert(!rdfa.states.empty());
+
+ vector<GoughVertex> verts(rdfa.states.size());
+ for (auto v : vertices_range(gg)) {
+ verts[gg[v].state_id] = v;
+ }
+
+ for (u32 state_id = 0; state_id < verts.size(); state_id++) {
+ assert(state_id < rdfa.states.size());
+ GoughVertex v = verts[state_id];
+ assert(v != GoughGraph::null_vertex() || !state_id);
+
+ DEBUG_PRINTF("i = %zu [%zu]\n", reports.size(), gg[v].reports.size());
+ if (v == GoughGraph::null_vertex() || gg[v].reports.empty()) {
+ reports.push_back(MO_INVALID_IDX);
+ continue;
+ }
+
raw_gough_report_list rrl(gg[v].reports, rm, remap_reports);
- DEBUG_PRINTF("non empty r %zu\n", reports.size());
- if (rev.find(rrl) != rev.end()) {
- reports.push_back(rev[rrl]);
- } else {
- DEBUG_PRINTF("adding to rl\n");
- rev[rrl] = ri->size();
- reports.push_back(ri->size());
- ri->rl.push_back(rrl);
- }
- }
-
- for (auto v : verts) {
- if (v == GoughGraph::null_vertex() || gg[v].reports_eod.empty()) {
- reports_eod.push_back(MO_INVALID_IDX);
- continue;
- }
-
- DEBUG_PRINTF("non empty r eod\n");
+ DEBUG_PRINTF("non empty r %zu\n", reports.size());
+ if (rev.find(rrl) != rev.end()) {
+ reports.push_back(rev[rrl]);
+ } else {
+ DEBUG_PRINTF("adding to rl\n");
+ rev[rrl] = ri->size();
+ reports.push_back(ri->size());
+ ri->rl.push_back(rrl);
+ }
+ }
+
+ for (auto v : verts) {
+ if (v == GoughGraph::null_vertex() || gg[v].reports_eod.empty()) {
+ reports_eod.push_back(MO_INVALID_IDX);
+ continue;
+ }
+
+ DEBUG_PRINTF("non empty r eod\n");
raw_gough_report_list rrl(gg[v].reports_eod, rm, remap_reports);
- if (rev.find(rrl) != rev.end()) {
- reports_eod.push_back(rev[rrl]);
- continue;
- }
-
- DEBUG_PRINTF("adding to rl eod %zu\n", gg[v].reports_eod.size());
- rev[rrl] = ri->size();
- reports_eod.push_back(ri->size());
- ri->rl.push_back(rrl);
- }
-
- /* TODO: support single report in gough */
- *isSingleReport = 0;
- *arbReport = MO_INVALID_IDX;
- assert(!ri->rl.empty()); /* all components should be able to generate
- reports */
- return move(ri);
-}
-
-u32 raw_gough_report_info_impl::getReportListSize() const {
- u32 sz = 0;
-
- for (const raw_gough_report_list &r : rl) {
- sz += sizeof(gough_report_list);
- sz += sizeof(gough_report) * r.reports.size();
- }
-
- return sz;
-}
-
-size_t raw_gough_report_info_impl::size() const {
- return rl.size();
-}
-
-void raw_gough_report_info_impl::fillReportLists(NFA *n, size_t base_offset,
- vector<u32> &ro) const {
- for (const raw_gough_report_list &r : rl) {
- ro.push_back(base_offset);
-
- gough_report_list *p = (gough_report_list *)((char *)n + base_offset);
- u32 i = 0;
-
- for (const som_report &sr : r.reports) {
- p->report[i].r = sr.report;
- p->report[i].som = sr.slot;
- i++;
- }
-
- p->count = verify_u32(r.reports.size());
-
- base_offset += sizeof(gough_report_list);
- base_offset += sizeof(gough_report) * r.reports.size();
- }
-}
-
-} // namespace ue2
+ if (rev.find(rrl) != rev.end()) {
+ reports_eod.push_back(rev[rrl]);
+ continue;
+ }
+
+ DEBUG_PRINTF("adding to rl eod %zu\n", gg[v].reports_eod.size());
+ rev[rrl] = ri->size();
+ reports_eod.push_back(ri->size());
+ ri->rl.push_back(rrl);
+ }
+
+ /* TODO: support single report in gough */
+ *isSingleReport = 0;
+ *arbReport = MO_INVALID_IDX;
+ assert(!ri->rl.empty()); /* all components should be able to generate
+ reports */
+ return move(ri);
+}
+
+u32 raw_gough_report_info_impl::getReportListSize() const {
+ u32 sz = 0;
+
+ for (const raw_gough_report_list &r : rl) {
+ sz += sizeof(gough_report_list);
+ sz += sizeof(gough_report) * r.reports.size();
+ }
+
+ return sz;
+}
+
+size_t raw_gough_report_info_impl::size() const {
+ return rl.size();
+}
+
+void raw_gough_report_info_impl::fillReportLists(NFA *n, size_t base_offset,
+ vector<u32> &ro) const {
+ for (const raw_gough_report_list &r : rl) {
+ ro.push_back(base_offset);
+
+ gough_report_list *p = (gough_report_list *)((char *)n + base_offset);
+ u32 i = 0;
+
+ for (const som_report &sr : r.reports) {
+ p->report[i].r = sr.report;
+ p->report[i].som = sr.slot;
+ i++;
+ }
+
+ p->count = verify_u32(r.reports.size());
+
+ base_offset += sizeof(gough_report_list);
+ base_offset += sizeof(gough_report) * r.reports.size();
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfa/goughcompile.h b/contrib/libs/hyperscan/src/nfa/goughcompile.h
index 4d03eb6450..00da1891ec 100644
--- a/contrib/libs/hyperscan/src/nfa/goughcompile.h
+++ b/contrib/libs/hyperscan/src/nfa/goughcompile.h
@@ -1,97 +1,97 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef GOUGHCOMPILE_H
-#define GOUGHCOMPILE_H
-
-#include "mcclellancompile.h"
-#include "nfa_kind.h"
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef GOUGHCOMPILE_H
+#define GOUGHCOMPILE_H
+
+#include "mcclellancompile.h"
+#include "nfa_kind.h"
+#include "ue2common.h"
#include "util/bytecode_ptr.h"
#include "util/flat_containers.h"
-#include "util/order_check.h"
-
-#include <map>
-#include <memory>
-#include <set>
-#include <vector>
-
-namespace ue2 {
-
-#define CREATE_NEW_SOM (~0U)
-
-/* dest nfa state -> som info for dest state is min of provided loc idx som
- * info */
-typedef flat_map<u32, std::vector<u32>> som_tran_info;
-
-struct som_report {
- som_report(ReportID r, u32 s) : report(r), slot(s) {}
-
- ReportID report;
- u32 slot;
-
- bool operator<(const som_report &b) const {
- const som_report &a = *this;
- ORDER_CHECK(report);
- ORDER_CHECK(slot);
- return false;
- }
-};
-
-struct dstate_som {
- std::set<som_report> reports;
- std::set<som_report> reports_eod;
- som_tran_info preds; /* live nfa states mapped back to pred states */
-};
-
-struct raw_som_dfa : public raw_dfa {
- raw_som_dfa(nfa_kind k, bool unordered_som_triggers_in, u32 trigger,
- u32 stream_som_loc_width_in)
- : raw_dfa(k), stream_som_loc_width(stream_som_loc_width_in),
- unordered_som_triggers(unordered_som_triggers_in),
- trigger_nfa_state(trigger) {
- assert(!unordered_som_triggers || is_triggered(kind));
- }
-
- std::vector<dstate_som> state_som;
- u32 stream_som_loc_width;
- bool unordered_som_triggers;
- void stripExtraEodReports(void) override;
-
- std::map<u32, u32> new_som_nfa_states; /* map nfa vertex id -> offset */
- u32 trigger_nfa_state; /* for triggered cases, slot_id that contains a new
- * som */
-};
-
+#include "util/order_check.h"
+
+#include <map>
+#include <memory>
+#include <set>
+#include <vector>
+
+namespace ue2 {
+
+#define CREATE_NEW_SOM (~0U)
+
+/* dest nfa state -> som info for dest state is min of provided loc idx som
+ * info */
+typedef flat_map<u32, std::vector<u32>> som_tran_info;
+
+struct som_report {
+ som_report(ReportID r, u32 s) : report(r), slot(s) {}
+
+ ReportID report;
+ u32 slot;
+
+ bool operator<(const som_report &b) const {
+ const som_report &a = *this;
+ ORDER_CHECK(report);
+ ORDER_CHECK(slot);
+ return false;
+ }
+};
+
+struct dstate_som {
+ std::set<som_report> reports;
+ std::set<som_report> reports_eod;
+ som_tran_info preds; /* live nfa states mapped back to pred states */
+};
+
+struct raw_som_dfa : public raw_dfa {
+ raw_som_dfa(nfa_kind k, bool unordered_som_triggers_in, u32 trigger,
+ u32 stream_som_loc_width_in)
+ : raw_dfa(k), stream_som_loc_width(stream_som_loc_width_in),
+ unordered_som_triggers(unordered_som_triggers_in),
+ trigger_nfa_state(trigger) {
+ assert(!unordered_som_triggers || is_triggered(kind));
+ }
+
+ std::vector<dstate_som> state_som;
+ u32 stream_som_loc_width;
+ bool unordered_som_triggers;
+ void stripExtraEodReports(void) override;
+
+ std::map<u32, u32> new_som_nfa_states; /* map nfa vertex id -> offset */
+ u32 trigger_nfa_state; /* for triggered cases, slot_id that contains a new
+ * som */
+};
+
bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
const CompileContext &cc,
const ReportManager &rm);
-
-} // namespace ue2
-
+
+} // namespace ue2
+
#endif // GOUGHCOMPILE_H
diff --git a/contrib/libs/hyperscan/src/nfa/goughcompile_accel.cpp b/contrib/libs/hyperscan/src/nfa/goughcompile_accel.cpp
index 3a3a44498e..849202a192 100644
--- a/contrib/libs/hyperscan/src/nfa/goughcompile_accel.cpp
+++ b/contrib/libs/hyperscan/src/nfa/goughcompile_accel.cpp
@@ -1,281 +1,281 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "goughcompile_internal.h"
-#include "gough_internal.h"
-#include "grey.h"
-#include "mcclellancompile.h"
-#include "util/container.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-
-#include "ue2common.h"
-
-#include <map>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-template<typename Graph>
-void add_edge_if_not_selfloop(const typename Graph::vertex_descriptor &u,
- const typename Graph::vertex_descriptor &v,
- Graph &g) {
- if (u != v) {
- add_edge(u, v, g);
- }
-}
-
-static
-bool can_accel_over_selfloop(const GoughVertexProps &vp, const GoughEdge &e,
- const GoughEdgeProps &ep, u32 *margin) {
- if (vp.vars.empty() && ep.vars.empty()) {
- /* if we update no som information, then it is trivial to accelerate */
- *margin = 0;
- return true;
- }
-
- /* if the effect of running a self loop stabilises after a small number of
- * iterations, it is possible to accelerate over the state and only then run
- * the block N times. To model this we create a graph which shows how the
- * value for a variable at the end of a self loop block is related to values
- * at the start */
-
- typedef boost::adjacency_list<boost::vecS, boost::vecS,
- boost::bidirectionalS> basic_graph;
- typedef basic_graph::vertex_descriptor basic_vertex;
- basic_graph bg;
-
- map<const GoughSSAVar *, basic_vertex> verts;
-
- /* create verts */
- for (const auto &var : ep.vars) {
- verts[var.get()] = add_vertex(bg);
- }
-
- for (const auto &var : vp.vars) {
- verts[var.get()] = add_vertex(bg);
- }
-
- /* wire edges */
- set<basic_vertex> done;
- for (const auto &var : ep.vars) {
- assert(contains(verts, var.get()));
- basic_vertex v = verts[var.get()];
- for (GoughSSAVar *pred : var->get_inputs()) {
- if (!contains(verts, pred)) {
- continue;
- }
- basic_vertex u = verts[pred];
- if (contains(done, u)) { /* u has already taken on new values this
- * iteration */
- for (auto p : inv_adjacent_vertices_range(u, bg)) {
- add_edge_if_not_selfloop(p, v, bg);
- }
- } else {
- add_edge_if_not_selfloop(u, v, bg);
- }
- }
- done.insert(v);
- }
-
- for (const auto &var : vp.vars) {
- GoughSSAVar *pred = var->get_input(e);
- assert(contains(verts, var.get()));
- basic_vertex v = verts[var.get()];
- if (!contains(verts, pred)) {
- continue;
- }
-
- basic_vertex u = verts[pred];
- if (contains(done, u)) { /* u has already taken on new values this
- * iteration */
- for (auto p : inv_adjacent_vertices_range(u, bg)) {
- add_edge_if_not_selfloop(p, v, bg);
- }
- } else {
- add_edge_if_not_selfloop(u, v, bg);
- }
- /* do not add v to done as all joins happen in parallel */
- }
-
- /* check for loops - non self loops may prevent settling */
-
- if (!is_dag(bg)) {
- DEBUG_PRINTF("can not %u accel as large loops\n", vp.state_id);
- return false;
- }
-
- *margin = num_vertices(bg); /* TODO: be less conservative */
-
- if (*margin > 50) {
- return false;
- }
-
- return true;
-}
-
-static
-bool verify_neighbour(const GoughGraph &g, GoughVertex u,
- const map<gough_edge_id, vector<gough_ins> > &blocks,
- const set<GoughVertex> &succs,
- const vector<gough_ins> &block_sl) {
- for (const auto &e : out_edges_range(u, g)) {
- if (!g[e].reach.any()) { /* ignore top edges */
- continue;
- }
-
- GoughVertex t = target(e, g);
- if (!contains(succs, t)) { /* must be an escape string */
- continue;
- }
-
- if (!contains(blocks, gough_edge_id(g, e))) {
- return false;
- }
-
- if (blocks.at(gough_edge_id(g, e)) != block_sl) {
- return false;
- }
- }
-
- return true;
-}
-
-static
-bool verify_neighbour_no_block(const GoughGraph &g, GoughVertex u,
- const map<gough_edge_id, vector<gough_ins> > &blocks,
- const set<GoughVertex> &succs) {
- for (const auto &e : out_edges_range(u, g)) {
- if (!g[e].reach.any()) { /* ignore top edges */
- continue;
- }
-
- GoughVertex t = target(e, g);
- if (!contains(succs, t)) { /* must be an escape string */
- continue;
- }
-
- if (contains(blocks, gough_edge_id(g, e))) {
- return false;
- }
- }
-
- return true;
-}
-
-/* Checks the som aspects of allowing two byte accel - it is expected that the
- * mcclellan logic will identify escape strings.
- *
- * For 2 byte acceleration to be correct we require that any non-escape sequence
- * characters xy from the accel state has the same effect as just the character
- * of y.
- *
- * The current way of ensuring this is to require:
- * (a) all edges out of the cyclic state behave identically to the cyclic self
- * loop edge
- * (b) edges out of the neighbouring state which do not correspond to escape
- * string behave identical to the cyclic state edges.
- *
- * TODO: these restrictions could be relaxed by looking at the effect on
- * relevant (live?) vars only, allowing additions to the escape string set, and
- * considering one byte escapes.
- */
-static
-bool allow_two_byte_accel(const GoughGraph &g,
- const map<gough_edge_id, vector<gough_ins> > &blocks,
- GoughVertex v, const GoughEdge &self_loop) {
- if (contains(blocks, gough_edge_id(g, self_loop))) {
- DEBUG_PRINTF("edge plan on self loop\n");
- const auto &block_sl = blocks.at(gough_edge_id(g, self_loop));
-
- set<GoughVertex> succs;
- for (const auto &e : out_edges_range(v, g)) {
- if (g[e].reach.none()) { /* ignore top edges */
- continue;
- }
-
- gough_edge_id ged(g, e);
- if (!contains(blocks, ged) || blocks.at(ged) != block_sl) {
- DEBUG_PRINTF("different out-edge behaviour\n");
- return false;
- }
- succs.insert(target(e, g));
- }
-
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w != v && !verify_neighbour(g, w, blocks, succs, block_sl)) {
- return false;
- }
- }
- } else {
- DEBUG_PRINTF("no edge plan on self loop\n");
- set<GoughVertex> succs;
- for (const auto &e : out_edges_range(v, g)) {
- if (g[e].reach.none()) { /* ignore top edges */
- continue;
- }
-
- gough_edge_id ged(g, e);
- if (contains(blocks, ged)) {
- DEBUG_PRINTF("different out-edge behaviour\n");
- return false;
- }
- succs.insert(target(e, g));
-
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w != v && !verify_neighbour_no_block(g, w, blocks, succs)) {
- return false;
- }
- }
- }
- }
-
- DEBUG_PRINTF("allowing two byte accel for %u\n", g[v].state_id);
- return true;
-}
-
-void find_allowed_accel_states(const GoughGraph &g,
- const map<gough_edge_id, vector<gough_ins> > &blocks,
- map<dstate_id_t, gough_accel_state_info> *out) {
- for (auto v : vertices_range(g)) {
- GoughEdge e;
- if (!find_normal_self_loop(v, g, &e)) {
- continue; /* not accelerable */
- }
- u32 margin = 0;
- if (!can_accel_over_selfloop(g[v], e, g[e], &margin)) {
- continue; /* not accelerable */
- }
- bool tba = allow_two_byte_accel(g, blocks, v, e);
- out->emplace(g[v].state_id, gough_accel_state_info(margin, tba));
- }
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "goughcompile_internal.h"
+#include "gough_internal.h"
+#include "grey.h"
+#include "mcclellancompile.h"
+#include "util/container.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+
+#include "ue2common.h"
+
+#include <map>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+template<typename Graph>
+void add_edge_if_not_selfloop(const typename Graph::vertex_descriptor &u,
+ const typename Graph::vertex_descriptor &v,
+ Graph &g) {
+ if (u != v) {
+ add_edge(u, v, g);
+ }
+}
+
+static
+bool can_accel_over_selfloop(const GoughVertexProps &vp, const GoughEdge &e,
+ const GoughEdgeProps &ep, u32 *margin) {
+ if (vp.vars.empty() && ep.vars.empty()) {
+ /* if we update no som information, then it is trivial to accelerate */
+ *margin = 0;
+ return true;
+ }
+
+ /* if the effect of running a self loop stabilises after a small number of
+ * iterations, it is possible to accelerate over the state and only then run
+ * the block N times. To model this we create a graph which shows how the
+ * value for a variable at the end of a self loop block is related to values
+ * at the start */
+
+ typedef boost::adjacency_list<boost::vecS, boost::vecS,
+ boost::bidirectionalS> basic_graph;
+ typedef basic_graph::vertex_descriptor basic_vertex;
+ basic_graph bg;
+
+ map<const GoughSSAVar *, basic_vertex> verts;
+
+ /* create verts */
+ for (const auto &var : ep.vars) {
+ verts[var.get()] = add_vertex(bg);
+ }
+
+ for (const auto &var : vp.vars) {
+ verts[var.get()] = add_vertex(bg);
+ }
+
+ /* wire edges */
+ set<basic_vertex> done;
+ for (const auto &var : ep.vars) {
+ assert(contains(verts, var.get()));
+ basic_vertex v = verts[var.get()];
+ for (GoughSSAVar *pred : var->get_inputs()) {
+ if (!contains(verts, pred)) {
+ continue;
+ }
+ basic_vertex u = verts[pred];
+ if (contains(done, u)) { /* u has already taken on new values this
+ * iteration */
+ for (auto p : inv_adjacent_vertices_range(u, bg)) {
+ add_edge_if_not_selfloop(p, v, bg);
+ }
+ } else {
+ add_edge_if_not_selfloop(u, v, bg);
+ }
+ }
+ done.insert(v);
+ }
+
+ for (const auto &var : vp.vars) {
+ GoughSSAVar *pred = var->get_input(e);
+ assert(contains(verts, var.get()));
+ basic_vertex v = verts[var.get()];
+ if (!contains(verts, pred)) {
+ continue;
+ }
+
+ basic_vertex u = verts[pred];
+ if (contains(done, u)) { /* u has already taken on new values this
+ * iteration */
+ for (auto p : inv_adjacent_vertices_range(u, bg)) {
+ add_edge_if_not_selfloop(p, v, bg);
+ }
+ } else {
+ add_edge_if_not_selfloop(u, v, bg);
+ }
+ /* do not add v to done as all joins happen in parallel */
+ }
+
+ /* check for loops - non self loops may prevent settling */
+
+ if (!is_dag(bg)) {
+ DEBUG_PRINTF("can not %u accel as large loops\n", vp.state_id);
+ return false;
+ }
+
+ *margin = num_vertices(bg); /* TODO: be less conservative */
+
+ if (*margin > 50) {
+ return false;
+ }
+
+ return true;
+}
+
+static
+bool verify_neighbour(const GoughGraph &g, GoughVertex u,
+ const map<gough_edge_id, vector<gough_ins> > &blocks,
+ const set<GoughVertex> &succs,
+ const vector<gough_ins> &block_sl) {
+ for (const auto &e : out_edges_range(u, g)) {
+ if (!g[e].reach.any()) { /* ignore top edges */
+ continue;
+ }
+
+ GoughVertex t = target(e, g);
+ if (!contains(succs, t)) { /* must be an escape string */
+ continue;
+ }
+
+ if (!contains(blocks, gough_edge_id(g, e))) {
+ return false;
+ }
+
+ if (blocks.at(gough_edge_id(g, e)) != block_sl) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static
+bool verify_neighbour_no_block(const GoughGraph &g, GoughVertex u,
+ const map<gough_edge_id, vector<gough_ins> > &blocks,
+ const set<GoughVertex> &succs) {
+ for (const auto &e : out_edges_range(u, g)) {
+ if (!g[e].reach.any()) { /* ignore top edges */
+ continue;
+ }
+
+ GoughVertex t = target(e, g);
+ if (!contains(succs, t)) { /* must be an escape string */
+ continue;
+ }
+
+ if (contains(blocks, gough_edge_id(g, e))) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/* Checks the som aspects of allowing two byte accel - it is expected that the
+ * mcclellan logic will identify escape strings.
+ *
+ * For 2 byte acceleration to be correct we require that any non-escape sequence
+ * characters xy from the accel state has the same effect as just the character
+ * of y.
+ *
+ * The current way of ensuring this is to require:
+ * (a) all edges out of the cyclic state behave identically to the cyclic self
+ * loop edge
+ * (b) edges out of the neighbouring state which do not correspond to escape
+ * string behave identical to the cyclic state edges.
+ *
+ * TODO: these restrictions could be relaxed by looking at the effect on
+ * relevant (live?) vars only, allowing additions to the escape string set, and
+ * considering one byte escapes.
+ */
+static
+bool allow_two_byte_accel(const GoughGraph &g,
+ const map<gough_edge_id, vector<gough_ins> > &blocks,
+ GoughVertex v, const GoughEdge &self_loop) {
+ if (contains(blocks, gough_edge_id(g, self_loop))) {
+ DEBUG_PRINTF("edge plan on self loop\n");
+ const auto &block_sl = blocks.at(gough_edge_id(g, self_loop));
+
+ set<GoughVertex> succs;
+ for (const auto &e : out_edges_range(v, g)) {
+ if (g[e].reach.none()) { /* ignore top edges */
+ continue;
+ }
+
+ gough_edge_id ged(g, e);
+ if (!contains(blocks, ged) || blocks.at(ged) != block_sl) {
+ DEBUG_PRINTF("different out-edge behaviour\n");
+ return false;
+ }
+ succs.insert(target(e, g));
+ }
+
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w != v && !verify_neighbour(g, w, blocks, succs, block_sl)) {
+ return false;
+ }
+ }
+ } else {
+ DEBUG_PRINTF("no edge plan on self loop\n");
+ set<GoughVertex> succs;
+ for (const auto &e : out_edges_range(v, g)) {
+ if (g[e].reach.none()) { /* ignore top edges */
+ continue;
+ }
+
+ gough_edge_id ged(g, e);
+ if (contains(blocks, ged)) {
+ DEBUG_PRINTF("different out-edge behaviour\n");
+ return false;
+ }
+ succs.insert(target(e, g));
+
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w != v && !verify_neighbour_no_block(g, w, blocks, succs)) {
+ return false;
+ }
+ }
+ }
+ }
+
+ DEBUG_PRINTF("allowing two byte accel for %u\n", g[v].state_id);
+ return true;
+}
+
+void find_allowed_accel_states(const GoughGraph &g,
+ const map<gough_edge_id, vector<gough_ins> > &blocks,
+ map<dstate_id_t, gough_accel_state_info> *out) {
+ for (auto v : vertices_range(g)) {
+ GoughEdge e;
+ if (!find_normal_self_loop(v, g, &e)) {
+ continue; /* not accelerable */
+ }
+ u32 margin = 0;
+ if (!can_accel_over_selfloop(g[v], e, g[e], &margin)) {
+ continue; /* not accelerable */
+ }
+ bool tba = allow_two_byte_accel(g, blocks, v, e);
+ out->emplace(g[v].state_id, gough_accel_state_info(margin, tba));
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfa/goughcompile_dump.h b/contrib/libs/hyperscan/src/nfa/goughcompile_dump.h
index 25db6b2a39..f63983a791 100644
--- a/contrib/libs/hyperscan/src/nfa/goughcompile_dump.h
+++ b/contrib/libs/hyperscan/src/nfa/goughcompile_dump.h
@@ -1,63 +1,63 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef GOUGHCOMPILE_DUMP_H
-#define GOUGHCOMPILE_DUMP_H
-
-#include "goughcompile_internal.h"
-
-#include <map>
-#include <string>
-
-namespace ue2 {
-
-struct Grey;
-#ifdef DUMP_SUPPORT
-
-std::string dump_name(const GoughVertexProps &vp);
-std::string dump_name(const gough_edge_id &e);
-void dump(const GoughGraph &g, const std::string &base, const Grey &grey);
-void dump_blocks(const std::map<gough_edge_id, std::vector<gough_ins> > &blocks,
- const std::string &base, const Grey &grey);
-#else
-
-static UNUSED
-void dump(UNUSED const GoughGraph &g, UNUSED const std::string &base,
- UNUSED const Grey &grey) {
-}
-static UNUSED
-void dump_blocks(
- UNUSED const std::map<gough_edge_id, std::vector<gough_ins> > &blocks,
- UNUSED const std::string &base, UNUSED const Grey &grey) {
-}
-
-#endif
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef GOUGHCOMPILE_DUMP_H
+#define GOUGHCOMPILE_DUMP_H
+
+#include "goughcompile_internal.h"
+
+#include <map>
+#include <string>
+
+namespace ue2 {
+
+struct Grey;
+#ifdef DUMP_SUPPORT
+
+std::string dump_name(const GoughVertexProps &vp);
+std::string dump_name(const gough_edge_id &e);
+void dump(const GoughGraph &g, const std::string &base, const Grey &grey);
+void dump_blocks(const std::map<gough_edge_id, std::vector<gough_ins> > &blocks,
+ const std::string &base, const Grey &grey);
+#else
+
+static UNUSED
+void dump(UNUSED const GoughGraph &g, UNUSED const std::string &base,
+ UNUSED const Grey &grey) {
+}
+static UNUSED
+void dump_blocks(
+ UNUSED const std::map<gough_edge_id, std::vector<gough_ins> > &blocks,
+ UNUSED const std::string &base, UNUSED const Grey &grey) {
+}
+
+#endif
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/goughcompile_internal.h b/contrib/libs/hyperscan/src/nfa/goughcompile_internal.h
index dbf2d1e3f7..e64540523b 100644
--- a/contrib/libs/hyperscan/src/nfa/goughcompile_internal.h
+++ b/contrib/libs/hyperscan/src/nfa/goughcompile_internal.h
@@ -1,225 +1,225 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef GOUGHCOMPILE_INTERNAL_H
-#define GOUGHCOMPILE_INTERNAL_H
-
-#include "gough_internal.h"
-#include "mcclellancompile.h"
-#include "ue2common.h"
-#include "util/charreach.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef GOUGHCOMPILE_INTERNAL_H
+#define GOUGHCOMPILE_INTERNAL_H
+
+#include "gough_internal.h"
+#include "mcclellancompile.h"
+#include "ue2common.h"
+#include "util/charreach.h"
#include "util/flat_containers.h"
#include "util/noncopyable.h"
-#include "util/order_check.h"
-
-#include <map>
-#include <memory>
-#include <set>
-#include <vector>
-
-#include <boost/graph/adjacency_list.hpp>
-
-namespace ue2 {
-
-struct Grey;
-struct GoughSSAVar;
-struct GoughSSAVarJoin;
-
-struct GoughVertexProps {
- GoughVertexProps() {}
- explicit GoughVertexProps(u32 state_in) : state_id(state_in) {}
- u32 state_id = ~0U;
-
- std::vector<std::shared_ptr<GoughSSAVarJoin> > vars; /* owns variables */
-
- std::vector<std::pair<ReportID, GoughSSAVar *> > reports; /**< report som,
- som variable */
- std::vector<std::pair<ReportID, GoughSSAVar *> > reports_eod;
-};
-
-struct GoughEdgeProps {
- GoughEdgeProps(void) : top(false) {}
- bool top;
- CharReach reach;
-
- std::vector<std::shared_ptr<GoughSSAVar> > vars; /* owns variables */
-};
-
-struct GoughGraphProps {
- boost::adjacency_list_traits<boost::vecS, boost::vecS>::vertex_descriptor
- initial_vertex; /* for triggered nfas, dead state;
- * for others start anchored or start floating
- */
-};
-
-typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS,
- GoughVertexProps, GoughEdgeProps, GoughGraphProps> GoughGraph;
-
-typedef GoughGraph::vertex_descriptor GoughVertex;
-typedef GoughGraph::edge_descriptor GoughEdge;
-
-struct gough_edge_id {
- gough_edge_id(const GoughGraph &g, const GoughEdge &e)
- : src(g[source(e, g)].state_id), dest(g[target(e, g)].state_id),
- first_char(g[e].reach.find_first()) {}
- bool operator<(const gough_edge_id &b) const {
- const gough_edge_id &a = *this;
- ORDER_CHECK(src);
- ORDER_CHECK(dest);
- ORDER_CHECK(first_char);
- return false;
- }
- const u32 src;
- const u32 dest;
- const u32 first_char; /* ~0U if only top */
-};
-
-struct GoughSSAVarWithInputs;
-struct GoughSSAVarMin;
-struct GoughSSAVarJoin;
-
+#include "util/order_check.h"
+
+#include <map>
+#include <memory>
+#include <set>
+#include <vector>
+
+#include <boost/graph/adjacency_list.hpp>
+
+namespace ue2 {
+
+struct Grey;
+struct GoughSSAVar;
+struct GoughSSAVarJoin;
+
+struct GoughVertexProps {
+ GoughVertexProps() {}
+ explicit GoughVertexProps(u32 state_in) : state_id(state_in) {}
+ u32 state_id = ~0U;
+
+ std::vector<std::shared_ptr<GoughSSAVarJoin> > vars; /* owns variables */
+
+ std::vector<std::pair<ReportID, GoughSSAVar *> > reports; /**< report som,
+ som variable */
+ std::vector<std::pair<ReportID, GoughSSAVar *> > reports_eod;
+};
+
+struct GoughEdgeProps {
+ GoughEdgeProps(void) : top(false) {}
+ bool top;
+ CharReach reach;
+
+ std::vector<std::shared_ptr<GoughSSAVar> > vars; /* owns variables */
+};
+
+struct GoughGraphProps {
+ boost::adjacency_list_traits<boost::vecS, boost::vecS>::vertex_descriptor
+ initial_vertex; /* for triggered nfas, dead state;
+ * for others start anchored or start floating
+ */
+};
+
+typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS,
+ GoughVertexProps, GoughEdgeProps, GoughGraphProps> GoughGraph;
+
+typedef GoughGraph::vertex_descriptor GoughVertex;
+typedef GoughGraph::edge_descriptor GoughEdge;
+
+struct gough_edge_id {
+ gough_edge_id(const GoughGraph &g, const GoughEdge &e)
+ : src(g[source(e, g)].state_id), dest(g[target(e, g)].state_id),
+ first_char(g[e].reach.find_first()) {}
+ bool operator<(const gough_edge_id &b) const {
+ const gough_edge_id &a = *this;
+ ORDER_CHECK(src);
+ ORDER_CHECK(dest);
+ ORDER_CHECK(first_char);
+ return false;
+ }
+ const u32 src;
+ const u32 dest;
+ const u32 first_char; /* ~0U if only top */
+};
+
+struct GoughSSAVarWithInputs;
+struct GoughSSAVarMin;
+struct GoughSSAVarJoin;
+
struct GoughSSAVar : noncopyable {
- GoughSSAVar(void) : seen(false), slot(INVALID_SLOT) {}
- virtual ~GoughSSAVar();
+ GoughSSAVar(void) : seen(false), slot(INVALID_SLOT) {}
+ virtual ~GoughSSAVar();
const flat_set<GoughSSAVar *> &get_inputs() const {
- return inputs;
- }
+ return inputs;
+ }
const flat_set<GoughSSAVarWithInputs *> &get_outputs() const {
- return outputs;
- }
- virtual void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) = 0;
-
- virtual void generate(std::vector<gough_ins> *out) const = 0;
-
- bool seen; /* for temp use by remove_dead alg */
- u32 slot;
-
- void clear_outputs();
-
- /** remove all inputs and outputs of the vertex, call before
- * removing vertex */
- virtual void clear_all() {
- clear_outputs();
- }
-protected:
+ return outputs;
+ }
+ virtual void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) = 0;
+
+ virtual void generate(std::vector<gough_ins> *out) const = 0;
+
+ bool seen; /* for temp use by remove_dead alg */
+ u32 slot;
+
+ void clear_outputs();
+
+ /** remove all inputs and outputs of the vertex, call before
+ * removing vertex */
+ virtual void clear_all() {
+ clear_outputs();
+ }
+protected:
flat_set<GoughSSAVar *> inputs;
flat_set<GoughSSAVarWithInputs *> outputs;
- friend struct GoughSSAVarWithInputs;
- friend struct GoughSSAVarMin;
- friend struct GoughSSAVarJoin;
-};
-
-struct GoughSSAVarNew : public GoughSSAVar {
- explicit GoughSSAVarNew(u32 adjust_in) : adjust(adjust_in) {}
-
- void replace_input(GoughSSAVar *, GoughSSAVar *) override {
- assert(0);
- }
-
- void generate(std::vector<gough_ins> *out) const override;
-
- const u32 adjust;
-};
-
-struct GoughSSAVarWithInputs : public GoughSSAVar {
- GoughSSAVarWithInputs(void) {}
- void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) override = 0;
- virtual void clear_inputs() = 0;
- void clear_all() override;
-protected:
- virtual void remove_input_raw(GoughSSAVar *v) = 0;
- friend struct GoughSSAVar;
-};
-
-struct GoughSSAVarMin : public GoughSSAVarWithInputs {
- GoughSSAVarMin(void) {}
- void generate(std::vector<gough_ins> *out) const override;
-
- void clear_inputs() override;
- void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) override;
-
- virtual void add_input(GoughSSAVar *v) {
- inputs.insert(v);
- v->outputs.insert(this);
- }
-
-protected:
- void remove_input_raw(GoughSSAVar *v) override;
-};
-
-struct GoughSSAVarJoin : public GoughSSAVarWithInputs {
- GoughSSAVarJoin(void) {}
-
- /* dummy; all joins at a point must be generated simultaneously */
- void generate(std::vector<gough_ins> *out) const override;
- GoughSSAVar *get_input(const GoughEdge &prev) const;
-
- void clear_inputs() override;
- void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) override;
-
- void add_input(GoughSSAVar *v, GoughEdge prev);
-
+ friend struct GoughSSAVarWithInputs;
+ friend struct GoughSSAVarMin;
+ friend struct GoughSSAVarJoin;
+};
+
+struct GoughSSAVarNew : public GoughSSAVar {
+ explicit GoughSSAVarNew(u32 adjust_in) : adjust(adjust_in) {}
+
+ void replace_input(GoughSSAVar *, GoughSSAVar *) override {
+ assert(0);
+ }
+
+ void generate(std::vector<gough_ins> *out) const override;
+
+ const u32 adjust;
+};
+
+struct GoughSSAVarWithInputs : public GoughSSAVar {
+ GoughSSAVarWithInputs(void) {}
+ void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) override = 0;
+ virtual void clear_inputs() = 0;
+ void clear_all() override;
+protected:
+ virtual void remove_input_raw(GoughSSAVar *v) = 0;
+ friend struct GoughSSAVar;
+};
+
+struct GoughSSAVarMin : public GoughSSAVarWithInputs {
+ GoughSSAVarMin(void) {}
+ void generate(std::vector<gough_ins> *out) const override;
+
+ void clear_inputs() override;
+ void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) override;
+
+ virtual void add_input(GoughSSAVar *v) {
+ inputs.insert(v);
+ v->outputs.insert(this);
+ }
+
+protected:
+ void remove_input_raw(GoughSSAVar *v) override;
+};
+
+struct GoughSSAVarJoin : public GoughSSAVarWithInputs {
+ GoughSSAVarJoin(void) {}
+
+ /* dummy; all joins at a point must be generated simultaneously */
+ void generate(std::vector<gough_ins> *out) const override;
+ GoughSSAVar *get_input(const GoughEdge &prev) const;
+
+ void clear_inputs() override;
+ void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) override;
+
+ void add_input(GoughSSAVar *v, GoughEdge prev);
+
const flat_set<GoughEdge> &get_edges_for_input(GoughSSAVar *input) const;
const std::map<GoughSSAVar *, flat_set<GoughEdge>> &get_input_map() const;
-
-protected:
- void remove_input_raw(GoughSSAVar *v) override;
-
-private:
+
+protected:
+ void remove_input_raw(GoughSSAVar *v) override;
+
+private:
std::map<GoughSSAVar *, flat_set<GoughEdge>> input_map;
-};
-
-struct gough_accel_state_info {
- u32 margin;
- bool two_byte;
-
- gough_accel_state_info(u32 margin_in, bool two_byte_in)
- : margin(margin_in), two_byte(two_byte_in) {
- }
-};
-
-u32 assign_slots(GoughGraph &g, const Grey &grey);
-void find_allowed_accel_states(const GoughGraph &g,
- const std::map<gough_edge_id, std::vector<gough_ins> > &blocks,
- std::map<dstate_id_t, gough_accel_state_info> *out);
-bool find_normal_self_loop(GoughVertex v, const GoughGraph &g, GoughEdge *out);
-
-} // namespace ue2
-
-// Note: C structure, can't be in namespace ue2
-static inline
-bool operator==(const gough_ins &a, const gough_ins &b) {
- return a.op == b.op && a.dest == b.dest && a.src == b.src;
-}
-
-static inline
-bool operator<(const gough_ins &a, const gough_ins &b) {
- return std::tie(a.op, a.src, a.dest) < std::tie(b.op, b.src, b.dest);
-}
-
-#endif
+};
+
+struct gough_accel_state_info {
+ u32 margin;
+ bool two_byte;
+
+ gough_accel_state_info(u32 margin_in, bool two_byte_in)
+ : margin(margin_in), two_byte(two_byte_in) {
+ }
+};
+
+u32 assign_slots(GoughGraph &g, const Grey &grey);
+void find_allowed_accel_states(const GoughGraph &g,
+ const std::map<gough_edge_id, std::vector<gough_ins> > &blocks,
+ std::map<dstate_id_t, gough_accel_state_info> *out);
+bool find_normal_self_loop(GoughVertex v, const GoughGraph &g, GoughEdge *out);
+
+} // namespace ue2
+
+// Note: C structure, can't be in namespace ue2
+static inline
+bool operator==(const gough_ins &a, const gough_ins &b) {
+ return a.op == b.op && a.dest == b.dest && a.src == b.src;
+}
+
+static inline
+bool operator<(const gough_ins &a, const gough_ins &b) {
+ return std::tie(a.op, a.src, a.dest) < std::tie(b.op, b.src, b.dest);
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/goughcompile_reg.cpp b/contrib/libs/hyperscan/src/nfa/goughcompile_reg.cpp
index 7d9e2e4b1c..48e515b9ad 100644
--- a/contrib/libs/hyperscan/src/nfa/goughcompile_reg.cpp
+++ b/contrib/libs/hyperscan/src/nfa/goughcompile_reg.cpp
@@ -1,502 +1,502 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "goughcompile.h"
-#include "goughcompile_dump.h"
-#include "goughcompile_internal.h"
-#include "gough_internal.h"
-#include "grey.h"
-#include "util/container.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "goughcompile.h"
+#include "goughcompile_dump.h"
+#include "goughcompile_internal.h"
+#include "gough_internal.h"
+#include "grey.h"
+#include "util/container.h"
#include "util/flat_containers.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-#include "util/order_check.h"
-
-#include "ue2common.h"
-
-#include <algorithm>
-#include <boost/graph/depth_first_search.hpp>
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_values;
-
-namespace ue2 {
-
-template<typename VarP, typename VarQ>
-void push_back_all_raw(vector<VarP> *out, const vector<VarQ> &in) {
- for (const auto &var : in) {
- out->push_back(var.get());
- }
-}
-
-static
-void all_vars(const GoughGraph &g, vector<GoughSSAVar *> *out) {
- for (auto v : vertices_range(g)) {
- push_back_all_raw(out, g[v].vars);
- }
- for (const auto &e : edges_range(g)) {
- push_back_all_raw(out, g[e].vars);
- }
-}
-
-namespace {
-struct GoughGraphAux {
- map<const GoughSSAVar *, GoughVertex> containing_v;
- map<const GoughSSAVar *, GoughEdge> containing_e;
- map<const GoughSSAVar *, set<GoughVertex> > reporters;
-};
-}
-
-static never_inline
-void fill_aux(const GoughGraph &g, GoughGraphAux *aux) {
- for (auto v : vertices_range(g)) {
- for (const auto &var : g[v].vars) {
- aux->containing_v[var.get()] = v;
- DEBUG_PRINTF("%u is on vertex %u\n", var->slot, g[v].state_id);
- }
-
- for (GoughSSAVar *var : g[v].reports | map_values) {
- aux->reporters[var].insert(v);
- }
-
- for (GoughSSAVar *var : g[v].reports_eod | map_values) {
- aux->reporters[var].insert(v);
- }
- }
- for (const auto &e : edges_range(g)) {
- for (const auto &var : g[e].vars) {
- aux->containing_e[var.get()] = e;
- DEBUG_PRINTF("%u is on edge %u->%u\n", var->slot,
- g[source(e, g)].state_id, g[target(e, g)].state_id);
- }
- }
-}
-
-static
-bool is_block_local(const GoughGraph &cfg, GoughSSAVar *var,
- const GoughGraphAux &aux) {
- /* if var used as a report, it cannot be considered block local */
- if (contains(aux.reporters, var)) {
- return false;
- }
-
- /* (useful) vertex/join vars never local - they are terminal in blocks
- * and so should be read by another block. */
- if (!contains(aux.containing_e, var)) {
- return false;
- }
-
- /* for other cases, require that all uses of var are later in the same edge
- * or on the target AND if on target it is sole on flow coming from the
- * edge in question. */
- const GoughEdge &e = aux.containing_e.at(var);
- GoughVertex t = target(e, cfg);
-
- size_t seen_outputs = 0;
- const flat_set<GoughSSAVarWithInputs *> &out = var->get_outputs();
- bool seen_var = false;
- for (const auto &e_var : cfg[e].vars) {
- if (seen_var) {
- GoughSSAVarWithInputs *w
- = dynamic_cast<GoughSSAVarWithInputs *>(e_var.get());
- if (contains(out, w)) {
- seen_outputs++;
- }
- } else {
- seen_var = var == e_var.get();
- }
- }
- assert(seen_var);
-
- for (const auto &t_var : cfg[t].vars) {
- if (contains(out, t_var.get())) {
- seen_outputs++;
- const flat_set<GoughEdge> &flow = t_var->get_edges_for_input(var);
- if (flow.size() != 1 || *flow.begin() != e) {
- /* this var is used by the target join var BUT on a different
- * flow, so this is not a block local variable */
- return false;
- }
- }
- }
-
- assert(seen_outputs <= out.size());
- return seen_outputs == out.size();
-}
-
-static
-void handle_pending_edge(const GoughGraph &g, const GoughEdge &e,
- GoughSSAVar *start, set<GoughVertex> &pending_vertex,
- set<const GoughSSAVar *> &rv) {
- const vector<shared_ptr<GoughSSAVar> > &vars = g[e].vars;
- bool marking = !start;
- DEBUG_PRINTF(" ---checking edge %u->%u %s %zu\n", g[source(e, g)].state_id,
- g[target(e, g)].state_id, marking ? "full" : "partial",
- vars.size());
- for (auto it = vars.rbegin(); it != vars.rend(); ++it) {
- GoughSSAVar *var = it->get();
- if (contains(rv, var)) {
- DEBUG_PRINTF("somebody has already processed this vertex [%u]\n",
- var->slot);
- return;
- }
- if (var == start) {
- assert(!marking);
- marking = true;
- continue;
- }
- if (marking) {
- rv.insert(var);
- }
- }
- assert(marking);
- GoughVertex s = source(e, g);
- for (const auto &var : g[s].vars) {
- DEBUG_PRINTF("interferes %u\n", var->slot);
- rv.insert(var.get());
- }
- pending_vertex.insert(s);
-}
-
-static
-void handle_pending_vars(GoughSSAVar *def, const GoughGraph &g,
- const GoughGraphAux &aux,
- const flat_set<GoughSSAVarWithInputs *> &pending_var,
- set<GoughVertex> &pending_vertex,
- set<const GoughSSAVar *> &rv) {
- for (GoughSSAVarWithInputs *var : pending_var) {
- if (contains(aux.containing_v, var)) {
- /* def is used by join vertex, value only needs to be live on some
- * incoming edges */
- GoughSSAVarJoin *vj = (GoughSSAVarJoin *)var;
- const flat_set<GoughEdge> &live_edges
- = vj->get_edges_for_input(def);
- for (const auto &e : live_edges) {
- handle_pending_edge(g, e, nullptr, pending_vertex, rv);
- }
- continue;
- }
- const GoughEdge &e = aux.containing_e.at(var);
- handle_pending_edge(g, e, var, pending_vertex, rv);
- }
-}
-
-static
-void handle_pending_vertex(GoughVertex def_v, const GoughGraph &g,
- GoughVertex current,
- set<GoughVertex> &pending_vertex,
- set<const GoughSSAVar *> &rv) {
- DEBUG_PRINTF("---checking vertex %u\n", g[current].state_id);
- if (def_v == current) {
- DEBUG_PRINTF("contains target vertex\n");
- return; /* we have reached def */
- }
- for (const auto &e : in_edges_range(current, g)) {
- handle_pending_edge(g, e, nullptr, pending_vertex, rv);
- }
-}
-
-static
-void handle_pending_vertices(GoughSSAVar *def, const GoughGraph &g,
- const GoughGraphAux &aux,
- set<GoughVertex> &pending_vertex,
- set<const GoughSSAVar *> &rv) {
- if (pending_vertex.empty()) {
- return;
- }
-
- GoughVertex def_v = GoughGraph::null_vertex();
- if (contains(aux.containing_v, def)) {
- def_v = aux.containing_v.at(def);
- }
+#include "util/graph.h"
+#include "util/graph_range.h"
+#include "util/order_check.h"
+
+#include "ue2common.h"
+
+#include <algorithm>
+#include <boost/graph/depth_first_search.hpp>
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_values;
+
+namespace ue2 {
+
+template<typename VarP, typename VarQ>
+void push_back_all_raw(vector<VarP> *out, const vector<VarQ> &in) {
+ for (const auto &var : in) {
+ out->push_back(var.get());
+ }
+}
+
+static
+void all_vars(const GoughGraph &g, vector<GoughSSAVar *> *out) {
+ for (auto v : vertices_range(g)) {
+ push_back_all_raw(out, g[v].vars);
+ }
+ for (const auto &e : edges_range(g)) {
+ push_back_all_raw(out, g[e].vars);
+ }
+}
+
+namespace {
+struct GoughGraphAux {
+ map<const GoughSSAVar *, GoughVertex> containing_v;
+ map<const GoughSSAVar *, GoughEdge> containing_e;
+ map<const GoughSSAVar *, set<GoughVertex> > reporters;
+};
+}
+
+static never_inline
+void fill_aux(const GoughGraph &g, GoughGraphAux *aux) {
+ for (auto v : vertices_range(g)) {
+ for (const auto &var : g[v].vars) {
+ aux->containing_v[var.get()] = v;
+ DEBUG_PRINTF("%u is on vertex %u\n", var->slot, g[v].state_id);
+ }
+
+ for (GoughSSAVar *var : g[v].reports | map_values) {
+ aux->reporters[var].insert(v);
+ }
+
+ for (GoughSSAVar *var : g[v].reports_eod | map_values) {
+ aux->reporters[var].insert(v);
+ }
+ }
+ for (const auto &e : edges_range(g)) {
+ for (const auto &var : g[e].vars) {
+ aux->containing_e[var.get()] = e;
+ DEBUG_PRINTF("%u is on edge %u->%u\n", var->slot,
+ g[source(e, g)].state_id, g[target(e, g)].state_id);
+ }
+ }
+}
+
+static
+bool is_block_local(const GoughGraph &cfg, GoughSSAVar *var,
+ const GoughGraphAux &aux) {
+ /* if var used as a report, it cannot be considered block local */
+ if (contains(aux.reporters, var)) {
+ return false;
+ }
+
+ /* (useful) vertex/join vars never local - they are terminal in blocks
+ * and so should be read by another block. */
+ if (!contains(aux.containing_e, var)) {
+ return false;
+ }
+
+ /* for other cases, require that all uses of var are later in the same edge
+ * or on the target AND if on target it is sole on flow coming from the
+ * edge in question. */
+ const GoughEdge &e = aux.containing_e.at(var);
+ GoughVertex t = target(e, cfg);
+
+ size_t seen_outputs = 0;
+ const flat_set<GoughSSAVarWithInputs *> &out = var->get_outputs();
+ bool seen_var = false;
+ for (const auto &e_var : cfg[e].vars) {
+ if (seen_var) {
+ GoughSSAVarWithInputs *w
+ = dynamic_cast<GoughSSAVarWithInputs *>(e_var.get());
+ if (contains(out, w)) {
+ seen_outputs++;
+ }
+ } else {
+ seen_var = var == e_var.get();
+ }
+ }
+ assert(seen_var);
+
+ for (const auto &t_var : cfg[t].vars) {
+ if (contains(out, t_var.get())) {
+ seen_outputs++;
+ const flat_set<GoughEdge> &flow = t_var->get_edges_for_input(var);
+ if (flow.size() != 1 || *flow.begin() != e) {
+ /* this var is used by the target join var BUT on a different
+ * flow, so this is not a block local variable */
+ return false;
+ }
+ }
+ }
+
+ assert(seen_outputs <= out.size());
+ return seen_outputs == out.size();
+}
+
+static
+void handle_pending_edge(const GoughGraph &g, const GoughEdge &e,
+ GoughSSAVar *start, set<GoughVertex> &pending_vertex,
+ set<const GoughSSAVar *> &rv) {
+ const vector<shared_ptr<GoughSSAVar> > &vars = g[e].vars;
+ bool marking = !start;
+ DEBUG_PRINTF(" ---checking edge %u->%u %s %zu\n", g[source(e, g)].state_id,
+ g[target(e, g)].state_id, marking ? "full" : "partial",
+ vars.size());
+ for (auto it = vars.rbegin(); it != vars.rend(); ++it) {
+ GoughSSAVar *var = it->get();
+ if (contains(rv, var)) {
+ DEBUG_PRINTF("somebody has already processed this vertex [%u]\n",
+ var->slot);
+ return;
+ }
+ if (var == start) {
+ assert(!marking);
+ marking = true;
+ continue;
+ }
+ if (marking) {
+ rv.insert(var);
+ }
+ }
+ assert(marking);
+ GoughVertex s = source(e, g);
+ for (const auto &var : g[s].vars) {
+ DEBUG_PRINTF("interferes %u\n", var->slot);
+ rv.insert(var.get());
+ }
+ pending_vertex.insert(s);
+}
+
+static
+void handle_pending_vars(GoughSSAVar *def, const GoughGraph &g,
+ const GoughGraphAux &aux,
+ const flat_set<GoughSSAVarWithInputs *> &pending_var,
+ set<GoughVertex> &pending_vertex,
+ set<const GoughSSAVar *> &rv) {
+ for (GoughSSAVarWithInputs *var : pending_var) {
+ if (contains(aux.containing_v, var)) {
+ /* def is used by join vertex, value only needs to be live on some
+ * incoming edges */
+ GoughSSAVarJoin *vj = (GoughSSAVarJoin *)var;
+ const flat_set<GoughEdge> &live_edges
+ = vj->get_edges_for_input(def);
+ for (const auto &e : live_edges) {
+ handle_pending_edge(g, e, nullptr, pending_vertex, rv);
+ }
+ continue;
+ }
+ const GoughEdge &e = aux.containing_e.at(var);
+ handle_pending_edge(g, e, var, pending_vertex, rv);
+ }
+}
+
+static
+void handle_pending_vertex(GoughVertex def_v, const GoughGraph &g,
+ GoughVertex current,
+ set<GoughVertex> &pending_vertex,
+ set<const GoughSSAVar *> &rv) {
+ DEBUG_PRINTF("---checking vertex %u\n", g[current].state_id);
+ if (def_v == current) {
+ DEBUG_PRINTF("contains target vertex\n");
+ return; /* we have reached def */
+ }
+ for (const auto &e : in_edges_range(current, g)) {
+ handle_pending_edge(g, e, nullptr, pending_vertex, rv);
+ }
+}
+
+static
+void handle_pending_vertices(GoughSSAVar *def, const GoughGraph &g,
+ const GoughGraphAux &aux,
+ set<GoughVertex> &pending_vertex,
+ set<const GoughSSAVar *> &rv) {
+ if (pending_vertex.empty()) {
+ return;
+ }
+
+ GoughVertex def_v = GoughGraph::null_vertex();
+ if (contains(aux.containing_v, def)) {
+ def_v = aux.containing_v.at(def);
+ }
unordered_set<GoughVertex> done;
- while (!pending_vertex.empty()) {
- GoughVertex current = *pending_vertex.begin();
- pending_vertex.erase(current);
- if (contains(done, current)) {
- continue;
- }
- done.insert(current);
- handle_pending_vertex(def_v, g, current, pending_vertex, rv);
- }
-}
-
-/* returns set of labels that the given def is live at */
-static never_inline
-set<const GoughSSAVar *> live_during(GoughSSAVar *def, const GoughGraph &g,
- const GoughGraphAux &aux) {
- DEBUG_PRINTF("checking who is defined during %u lifetime\n", def->slot);
- set<GoughVertex> pending_vertex;
-
- set<const GoughSSAVar *> rv;
- rv.insert(def);
-
- if (contains(aux.reporters, def)) {
- DEBUG_PRINTF("--> gets reported\n");
- const set<GoughVertex> &reporters = aux.reporters.at(def);
- for (auto v : reporters) {
- pending_vertex.insert(v);
- for (const auto &var : g[v].vars) {
- DEBUG_PRINTF("interferes %u\n", var->slot);
- rv.insert(var.get());
- }
- }
- }
-
- handle_pending_vars(def, g, aux, def->get_outputs(), pending_vertex, rv);
- handle_pending_vertices(def, g, aux, pending_vertex, rv);
-
- rv.erase(def);
- return rv;
-}
-
-template<typename VarP>
-void set_initial_slots(const vector<VarP> &vars, u32 *next_slot) {
- for (auto &var : vars) {
- assert(var->slot == INVALID_SLOT);
- var->slot = (*next_slot)++;
- }
-}
-
-/* crude, deterministic assignment of symbolic register slots.
- * returns number of slots given out
- */
-static
-u32 initial_slots(const GoughGraph &g) {
- u32 next_slot = 0;
- for (auto v : vertices_range(g)) {
- set_initial_slots(g[v].vars, &next_slot);
- }
- for (const auto &e : edges_range(g)) {
- set_initial_slots(g[e].vars, &next_slot);
- }
-
- return next_slot;
-}
-
-#define NO_COLOUR (~0U)
-
-static
-u32 available_colour(const flat_set<u32> &bad_colours) {
- u32 rv = 0;
- for (const u32 &colour : bad_colours) {
- if (colour != rv) {
- assert(colour > rv);
- break;
- }
- rv = colour + 1;
- }
-
- assert(rv != NO_COLOUR);
- return rv;
-}
-
-static
-void poison_colours(const set<const GoughSSAVar *> &live, u32 c,
- const vector<u32> &colour_map,
- vector<flat_set<u32> > *bad_colour) {
- for (const GoughSSAVar *var : live) {
- u32 var_index = var->slot;
- if (colour_map[var_index] != NO_COLOUR) {
- assert(c != colour_map[var_index]);
- } else {
- (*bad_colour)[var_index].insert(c);
- }
- }
-}
-
-static
-void find_bad_due_to_live(const set<const GoughSSAVar *> &live,
- const vector<u32> &colour_map, flat_set<u32> *out) {
- for (const GoughSSAVar *var : live) {
- u32 var_index = var->slot;
- if (colour_map[var_index] != NO_COLOUR) {
- out->insert(colour_map[var_index]);
- }
- }
-}
-
-static
-void sequential_vertex_colouring(const GoughGraph &g, const GoughGraphAux &aux,
- const vector<GoughSSAVar *> &order,
- vector<u32> &colour_map) {
- assert(order.size() < NO_COLOUR);
- colour_map.clear();
- colour_map.resize(order.size(), NO_COLOUR);
- vector<u32> temp(order.size(), ~0U);
- vector<flat_set<u32> > bad_colour(order.size());
-
- for (GoughSSAVar *var : order) {
- u32 var_index = var->slot;
- if (is_block_local(g, var, aux)) {
- DEBUG_PRINTF("%u is block local\n", var_index);
- /* ignore variable whose lifetime is limited to their local block
- * there is no need to assign stream state to these variables */
- continue;
- }
- assert(colour_map[var_index] == NO_COLOUR);
- set<const GoughSSAVar *> live = live_during(var, g, aux);
- flat_set<u32> &local_bad = bad_colour[var_index];
- find_bad_due_to_live(live, colour_map, &local_bad);
- DEBUG_PRINTF("colouring %u\n", var_index);
- u32 c = available_colour(local_bad);
- colour_map[var_index] = c;
- assert(!contains(bad_colour[var_index], c));
- poison_colours(live, c, colour_map, &bad_colour);
-
- flat_set<u32> temp_set;
- local_bad.swap(temp_set);
- DEBUG_PRINTF(" %u coloured %u\n", var_index, c);
- }
-}
-
-template<typename VarP>
-void add_to_dom_ordering(const vector<VarP> &vars,
- vector<GoughSSAVar *> *out) {
- for (const auto &var : vars) {
- out->push_back(var.get());
- }
-}
-
-namespace {
-class FinishVisitor : public boost::default_dfs_visitor {
-public:
- explicit FinishVisitor(vector<GoughVertex> *o) : out(o) {}
- void finish_vertex(const GoughVertex v, const GoughGraph &) {
- out->push_back(v);
- }
- vector<GoughVertex> *out;
-};
-}
-
-static
-void find_dom_ordering(const GoughGraph &cfg, vector<GoughSSAVar *> *out) {
- vector<GoughVertex> g_order;
-
- /* due to construction quirks, default vertex order provides entry points */
- depth_first_search(cfg, visitor(FinishVisitor(&g_order))
- .root_vertex(cfg[boost::graph_bundle].initial_vertex));
-
- for (auto it = g_order.rbegin(); it != g_order.rend(); ++it) {
- add_to_dom_ordering(cfg[*it].vars, out);
- for (const auto &e : out_edges_range(*it, cfg)) {
- add_to_dom_ordering(cfg[e].vars, out);
- }
- }
-}
-
-static
-void create_slot_mapping(const GoughGraph &cfg, UNUSED u32 old_slot_count,
- vector<u32> *old_new) {
- /* Interference graphs from SSA form are chordal -> optimally colourable in
- * poly time.
- *
- * Chordal graphs can be coloured by walking in perfect elimination order.
- * If the SSA CFG is iterated over in a way that respects dominance
- * relationship, the interference graph will be iterated in a perfect
- * elimination order.
- *
- * We can avoid creating the full interference graph and use liveness
- * information as we iterate over the definitions to perform the colouring.
- *
- * See S Hack various 2006-
- */
- vector<GoughSSAVar *> dom_order;
-
- GoughGraphAux aux;
- fill_aux(cfg, &aux);
-
- find_dom_ordering(cfg, &dom_order);
- assert(dom_order.size() == old_slot_count);
- sequential_vertex_colouring(cfg, aux, dom_order, *old_new);
-}
-
-static
-void update_local_slots(GoughGraph &g, set<GoughSSAVar *> &locals,
- u32 local_base) {
- DEBUG_PRINTF("%zu local variables\n", locals.size());
- /* local variables only occur on edges (joins are never local) */
-
- u32 allocated_count = 0;
- for (const auto &e : edges_range(g)) {
- u32 next_slot = local_base;
- for (auto &var : g[e].vars) {
- if (contains(locals, var.get())) {
- DEBUG_PRINTF("updating slot %u using local %u\n", var->slot,
- next_slot);
- var->slot = next_slot++;
- allocated_count++;
- }
- }
- }
-
- assert(allocated_count == locals.size());
-}
-
-static never_inline
-u32 update_slots(GoughGraph &g, const vector<u32> &old_new,
- UNUSED u32 old_slot_count) {
- vector<GoughSSAVar *> vars;
- set<GoughSSAVar *> locals;
- all_vars(g, &vars);
- u32 slot_count = 0;
- for (GoughSSAVar *v : vars) {
- assert(v->slot < old_new.size());
- DEBUG_PRINTF("updating slot %u to %u\n", v->slot, old_new[v->slot]);
- if (old_new[v->slot] != NO_COLOUR) { /* not local, assign final slot */
- v->slot = old_new[v->slot];
- ENSURE_AT_LEAST(&slot_count, v->slot + 1);
- } else {
- locals.insert(v);
- }
- }
- assert(slot_count <= old_slot_count);
- DEBUG_PRINTF("reduce stream slots from %u to %u\n", old_slot_count,
- slot_count);
- update_local_slots(g, locals, slot_count);
-
- return slot_count;
-}
-
-u32 assign_slots(GoughGraph &cfg, const Grey &grey) {
- u32 slot_count = initial_slots(cfg);
-
- if (!grey.goughRegisterAllocate) {
- return slot_count;
- }
- dump(cfg, "slots_pre", grey);
-
- vector<u32> old_new;
- create_slot_mapping(cfg, slot_count, &old_new);
- slot_count = update_slots(cfg, old_new, slot_count);
-
- return slot_count;
-}
-
-} // namespace ue2
+ while (!pending_vertex.empty()) {
+ GoughVertex current = *pending_vertex.begin();
+ pending_vertex.erase(current);
+ if (contains(done, current)) {
+ continue;
+ }
+ done.insert(current);
+ handle_pending_vertex(def_v, g, current, pending_vertex, rv);
+ }
+}
+
+/* returns set of labels that the given def is live at */
+static never_inline
+set<const GoughSSAVar *> live_during(GoughSSAVar *def, const GoughGraph &g,
+ const GoughGraphAux &aux) {
+ DEBUG_PRINTF("checking who is defined during %u lifetime\n", def->slot);
+ set<GoughVertex> pending_vertex;
+
+ set<const GoughSSAVar *> rv;
+ rv.insert(def);
+
+ if (contains(aux.reporters, def)) {
+ DEBUG_PRINTF("--> gets reported\n");
+ const set<GoughVertex> &reporters = aux.reporters.at(def);
+ for (auto v : reporters) {
+ pending_vertex.insert(v);
+ for (const auto &var : g[v].vars) {
+ DEBUG_PRINTF("interferes %u\n", var->slot);
+ rv.insert(var.get());
+ }
+ }
+ }
+
+ handle_pending_vars(def, g, aux, def->get_outputs(), pending_vertex, rv);
+ handle_pending_vertices(def, g, aux, pending_vertex, rv);
+
+ rv.erase(def);
+ return rv;
+}
+
+template<typename VarP>
+void set_initial_slots(const vector<VarP> &vars, u32 *next_slot) {
+ for (auto &var : vars) {
+ assert(var->slot == INVALID_SLOT);
+ var->slot = (*next_slot)++;
+ }
+}
+
+/* crude, deterministic assignment of symbolic register slots.
+ * returns number of slots given out
+ */
+static
+u32 initial_slots(const GoughGraph &g) {
+ u32 next_slot = 0;
+ for (auto v : vertices_range(g)) {
+ set_initial_slots(g[v].vars, &next_slot);
+ }
+ for (const auto &e : edges_range(g)) {
+ set_initial_slots(g[e].vars, &next_slot);
+ }
+
+ return next_slot;
+}
+
+#define NO_COLOUR (~0U)
+
+static
+u32 available_colour(const flat_set<u32> &bad_colours) {
+ u32 rv = 0;
+ for (const u32 &colour : bad_colours) {
+ if (colour != rv) {
+ assert(colour > rv);
+ break;
+ }
+ rv = colour + 1;
+ }
+
+ assert(rv != NO_COLOUR);
+ return rv;
+}
+
+static
+void poison_colours(const set<const GoughSSAVar *> &live, u32 c,
+ const vector<u32> &colour_map,
+ vector<flat_set<u32> > *bad_colour) {
+ for (const GoughSSAVar *var : live) {
+ u32 var_index = var->slot;
+ if (colour_map[var_index] != NO_COLOUR) {
+ assert(c != colour_map[var_index]);
+ } else {
+ (*bad_colour)[var_index].insert(c);
+ }
+ }
+}
+
+static
+void find_bad_due_to_live(const set<const GoughSSAVar *> &live,
+ const vector<u32> &colour_map, flat_set<u32> *out) {
+ for (const GoughSSAVar *var : live) {
+ u32 var_index = var->slot;
+ if (colour_map[var_index] != NO_COLOUR) {
+ out->insert(colour_map[var_index]);
+ }
+ }
+}
+
+static
+void sequential_vertex_colouring(const GoughGraph &g, const GoughGraphAux &aux,
+ const vector<GoughSSAVar *> &order,
+ vector<u32> &colour_map) {
+ assert(order.size() < NO_COLOUR);
+ colour_map.clear();
+ colour_map.resize(order.size(), NO_COLOUR);
+ vector<u32> temp(order.size(), ~0U);
+ vector<flat_set<u32> > bad_colour(order.size());
+
+ for (GoughSSAVar *var : order) {
+ u32 var_index = var->slot;
+ if (is_block_local(g, var, aux)) {
+ DEBUG_PRINTF("%u is block local\n", var_index);
+ /* ignore variable whose lifetime is limited to their local block
+ * there is no need to assign stream state to these variables */
+ continue;
+ }
+ assert(colour_map[var_index] == NO_COLOUR);
+ set<const GoughSSAVar *> live = live_during(var, g, aux);
+ flat_set<u32> &local_bad = bad_colour[var_index];
+ find_bad_due_to_live(live, colour_map, &local_bad);
+ DEBUG_PRINTF("colouring %u\n", var_index);
+ u32 c = available_colour(local_bad);
+ colour_map[var_index] = c;
+ assert(!contains(bad_colour[var_index], c));
+ poison_colours(live, c, colour_map, &bad_colour);
+
+ flat_set<u32> temp_set;
+ local_bad.swap(temp_set);
+ DEBUG_PRINTF(" %u coloured %u\n", var_index, c);
+ }
+}
+
+template<typename VarP>
+void add_to_dom_ordering(const vector<VarP> &vars,
+ vector<GoughSSAVar *> *out) {
+ for (const auto &var : vars) {
+ out->push_back(var.get());
+ }
+}
+
+namespace {
+class FinishVisitor : public boost::default_dfs_visitor {
+public:
+ explicit FinishVisitor(vector<GoughVertex> *o) : out(o) {}
+ void finish_vertex(const GoughVertex v, const GoughGraph &) {
+ out->push_back(v);
+ }
+ vector<GoughVertex> *out;
+};
+}
+
+static
+void find_dom_ordering(const GoughGraph &cfg, vector<GoughSSAVar *> *out) {
+ vector<GoughVertex> g_order;
+
+ /* due to construction quirks, default vertex order provides entry points */
+ depth_first_search(cfg, visitor(FinishVisitor(&g_order))
+ .root_vertex(cfg[boost::graph_bundle].initial_vertex));
+
+ for (auto it = g_order.rbegin(); it != g_order.rend(); ++it) {
+ add_to_dom_ordering(cfg[*it].vars, out);
+ for (const auto &e : out_edges_range(*it, cfg)) {
+ add_to_dom_ordering(cfg[e].vars, out);
+ }
+ }
+}
+
+static
+void create_slot_mapping(const GoughGraph &cfg, UNUSED u32 old_slot_count,
+ vector<u32> *old_new) {
+ /* Interference graphs from SSA form are chordal -> optimally colourable in
+ * poly time.
+ *
+ * Chordal graphs can be coloured by walking in perfect elimination order.
+ * If the SSA CFG is iterated over in a way that respects dominance
+ * relationship, the interference graph will be iterated in a perfect
+ * elimination order.
+ *
+ * We can avoid creating the full interference graph and use liveness
+ * information as we iterate over the definitions to perform the colouring.
+ *
+ * See S Hack various 2006-
+ */
+ vector<GoughSSAVar *> dom_order;
+
+ GoughGraphAux aux;
+ fill_aux(cfg, &aux);
+
+ find_dom_ordering(cfg, &dom_order);
+ assert(dom_order.size() == old_slot_count);
+ sequential_vertex_colouring(cfg, aux, dom_order, *old_new);
+}
+
+static
+void update_local_slots(GoughGraph &g, set<GoughSSAVar *> &locals,
+ u32 local_base) {
+ DEBUG_PRINTF("%zu local variables\n", locals.size());
+ /* local variables only occur on edges (joins are never local) */
+
+ u32 allocated_count = 0;
+ for (const auto &e : edges_range(g)) {
+ u32 next_slot = local_base;
+ for (auto &var : g[e].vars) {
+ if (contains(locals, var.get())) {
+ DEBUG_PRINTF("updating slot %u using local %u\n", var->slot,
+ next_slot);
+ var->slot = next_slot++;
+ allocated_count++;
+ }
+ }
+ }
+
+ assert(allocated_count == locals.size());
+}
+
+static never_inline
+u32 update_slots(GoughGraph &g, const vector<u32> &old_new,
+ UNUSED u32 old_slot_count) {
+ vector<GoughSSAVar *> vars;
+ set<GoughSSAVar *> locals;
+ all_vars(g, &vars);
+ u32 slot_count = 0;
+ for (GoughSSAVar *v : vars) {
+ assert(v->slot < old_new.size());
+ DEBUG_PRINTF("updating slot %u to %u\n", v->slot, old_new[v->slot]);
+ if (old_new[v->slot] != NO_COLOUR) { /* not local, assign final slot */
+ v->slot = old_new[v->slot];
+ ENSURE_AT_LEAST(&slot_count, v->slot + 1);
+ } else {
+ locals.insert(v);
+ }
+ }
+ assert(slot_count <= old_slot_count);
+ DEBUG_PRINTF("reduce stream slots from %u to %u\n", old_slot_count,
+ slot_count);
+ update_local_slots(g, locals, slot_count);
+
+ return slot_count;
+}
+
+u32 assign_slots(GoughGraph &cfg, const Grey &grey) {
+ u32 slot_count = initial_slots(cfg);
+
+ if (!grey.goughRegisterAllocate) {
+ return slot_count;
+ }
+ dump(cfg, "slots_pre", grey);
+
+ vector<u32> old_new;
+ create_slot_mapping(cfg, slot_count, &old_new);
+ slot_count = update_slots(cfg, old_new, slot_count);
+
+ return slot_count;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfa/lbr.c b/contrib/libs/hyperscan/src/nfa/lbr.c
index c0433f87ca..d403733a65 100644
--- a/contrib/libs/hyperscan/src/nfa/lbr.c
+++ b/contrib/libs/hyperscan/src/nfa/lbr.c
@@ -1,531 +1,531 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Large Bounded Repeat (LBR) engine: runtime code.
- */
-#include "lbr.h"
-
-#include "lbr_internal.h"
-#include "nfa_api.h"
-#include "nfa_api_queue.h"
-#include "nfa_internal.h"
-#include "repeat.h"
-#include "repeat_internal.h"
-#include "shufti.h"
-#include "truffle.h"
-#include "vermicelli.h"
-#include "util/partial_store.h"
-#include "util/unaligned.h"
-
-/** \brief Sentinel value used to indicate that a repeat is dead/empty/unused.
- * * */
-#define REPEAT_DEAD 0xffffffffffffffffull
-
-enum MatchMode {
- CALLBACK_OUTPUT,
- STOP_AT_MATCH,
-};
-
-static really_inline
-const struct RepeatInfo *getRepeatInfo(const struct lbr_common *l) {
- const struct RepeatInfo *repeatInfo =
- (const struct RepeatInfo *)((const char *)l + l->repeatInfoOffset);
- return repeatInfo;
-}
-
-static really_inline
-void lbrCompressState(const struct lbr_common *l, u64a offset,
- const struct lbr_state *lstate, char *stream_state) {
- assert(l && lstate && stream_state);
- assert(ISALIGNED(lstate));
-
- const struct RepeatInfo *info = getRepeatInfo(l);
- repeatPack(stream_state, info, &lstate->ctrl, offset);
-}
-
-static really_inline
-void lbrExpandState(const struct lbr_common *l, u64a offset,
- const char *stream_state, struct lbr_state *lstate) {
- assert(l && stream_state && lstate);
- assert(ISALIGNED(lstate));
-
- const struct RepeatInfo *info = getRepeatInfo(l);
- repeatUnpack(stream_state, info, offset, &lstate->ctrl);
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Large Bounded Repeat (LBR) engine: runtime code.
+ */
+#include "lbr.h"
+
+#include "lbr_internal.h"
+#include "nfa_api.h"
+#include "nfa_api_queue.h"
+#include "nfa_internal.h"
+#include "repeat.h"
+#include "repeat_internal.h"
+#include "shufti.h"
+#include "truffle.h"
+#include "vermicelli.h"
+#include "util/partial_store.h"
+#include "util/unaligned.h"
+
+/** \brief Sentinel value used to indicate that a repeat is dead/empty/unused.
+ * * */
+#define REPEAT_DEAD 0xffffffffffffffffull
+
+enum MatchMode {
+ CALLBACK_OUTPUT,
+ STOP_AT_MATCH,
+};
+
+static really_inline
+const struct RepeatInfo *getRepeatInfo(const struct lbr_common *l) {
+ const struct RepeatInfo *repeatInfo =
+ (const struct RepeatInfo *)((const char *)l + l->repeatInfoOffset);
+ return repeatInfo;
+}
+
+static really_inline
+void lbrCompressState(const struct lbr_common *l, u64a offset,
+ const struct lbr_state *lstate, char *stream_state) {
+ assert(l && lstate && stream_state);
+ assert(ISALIGNED(lstate));
+
+ const struct RepeatInfo *info = getRepeatInfo(l);
+ repeatPack(stream_state, info, &lstate->ctrl, offset);
+}
+
+static really_inline
+void lbrExpandState(const struct lbr_common *l, u64a offset,
+ const char *stream_state, struct lbr_state *lstate) {
+ assert(l && stream_state && lstate);
+ assert(ISALIGNED(lstate));
+
+ const struct RepeatInfo *info = getRepeatInfo(l);
+ repeatUnpack(stream_state, info, offset, &lstate->ctrl);
lstate->lastEscape = 0;
-}
-
-static really_inline
-void clearRepeat(const struct RepeatInfo *info, struct lbr_state *lstate) {
- assert(info && lstate);
-
- DEBUG_PRINTF("clear repeat at %p\n", lstate);
-
- switch ((enum RepeatType)info->type) {
- case REPEAT_RING:
- lstate->ctrl.ring.offset = REPEAT_DEAD;
- break;
- case REPEAT_RANGE:
- lstate->ctrl.range.offset = REPEAT_DEAD;
- break;
- case REPEAT_FIRST:
- case REPEAT_LAST:
- lstate->ctrl.offset.offset = REPEAT_DEAD;
- break;
- case REPEAT_BITMAP:
- lstate->ctrl.bitmap.offset = REPEAT_DEAD;
- break;
- case REPEAT_SPARSE_OPTIMAL_P:
- lstate->ctrl.ring.offset = REPEAT_DEAD;
- break;
- case REPEAT_TRAILER:
- lstate->ctrl.trailer.offset = REPEAT_DEAD;
- break;
- default:
- assert(0);
- break;
- }
-}
-
-static really_inline
-char repeatIsDead(const struct RepeatInfo *info,
- const struct lbr_state *lstate) {
- assert(info && lstate);
-
- switch ((enum RepeatType)info->type) {
- case REPEAT_RING:
- return lstate->ctrl.ring.offset == REPEAT_DEAD;
- case REPEAT_RANGE:
- return lstate->ctrl.range.offset == REPEAT_DEAD;
- case REPEAT_FIRST:
- case REPEAT_LAST:
- return lstate->ctrl.offset.offset == REPEAT_DEAD;
- case REPEAT_BITMAP:
- return lstate->ctrl.bitmap.offset == REPEAT_DEAD;
- case REPEAT_SPARSE_OPTIMAL_P:
- return lstate->ctrl.ring.offset == REPEAT_DEAD;
- case REPEAT_TRAILER:
- return lstate->ctrl.trailer.offset == REPEAT_DEAD;
+}
+
+static really_inline
+void clearRepeat(const struct RepeatInfo *info, struct lbr_state *lstate) {
+ assert(info && lstate);
+
+ DEBUG_PRINTF("clear repeat at %p\n", lstate);
+
+ switch ((enum RepeatType)info->type) {
+ case REPEAT_RING:
+ lstate->ctrl.ring.offset = REPEAT_DEAD;
+ break;
+ case REPEAT_RANGE:
+ lstate->ctrl.range.offset = REPEAT_DEAD;
+ break;
+ case REPEAT_FIRST:
+ case REPEAT_LAST:
+ lstate->ctrl.offset.offset = REPEAT_DEAD;
+ break;
+ case REPEAT_BITMAP:
+ lstate->ctrl.bitmap.offset = REPEAT_DEAD;
+ break;
+ case REPEAT_SPARSE_OPTIMAL_P:
+ lstate->ctrl.ring.offset = REPEAT_DEAD;
+ break;
+ case REPEAT_TRAILER:
+ lstate->ctrl.trailer.offset = REPEAT_DEAD;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static really_inline
+char repeatIsDead(const struct RepeatInfo *info,
+ const struct lbr_state *lstate) {
+ assert(info && lstate);
+
+ switch ((enum RepeatType)info->type) {
+ case REPEAT_RING:
+ return lstate->ctrl.ring.offset == REPEAT_DEAD;
+ case REPEAT_RANGE:
+ return lstate->ctrl.range.offset == REPEAT_DEAD;
+ case REPEAT_FIRST:
+ case REPEAT_LAST:
+ return lstate->ctrl.offset.offset == REPEAT_DEAD;
+ case REPEAT_BITMAP:
+ return lstate->ctrl.bitmap.offset == REPEAT_DEAD;
+ case REPEAT_SPARSE_OPTIMAL_P:
+ return lstate->ctrl.ring.offset == REPEAT_DEAD;
+ case REPEAT_TRAILER:
+ return lstate->ctrl.trailer.offset == REPEAT_DEAD;
case REPEAT_ALWAYS:
assert(!"REPEAT_ALWAYS should only be used by Castle");
return 0;
- }
-
- assert(0);
- return 1;
-}
-
-/** Returns true if the LBR can produce matches at offsets greater than the
- * given one. TODO: can this be combined with lbrIsActive? */
-static really_inline
-char lbrIsAlive(const struct lbr_common *l, const struct lbr_state *lstate,
- const char *state, u64a offset) {
- assert(l && lstate && state);
-
- const struct RepeatInfo *info = getRepeatInfo(l);
- if (repeatIsDead(info, lstate)) {
- DEBUG_PRINTF("repeat is dead\n");
- return 0;
- }
-
- if (info->repeatMax == REPEAT_INF) {
- DEBUG_PRINTF("active repeat with inf max bound, alive\n");
- return 1;
- }
-
- assert(info->repeatMax < REPEAT_INF);
- const char *repeatState = state + info->packedCtrlSize;
- u64a lastTop = repeatLastTop(info, &lstate->ctrl, repeatState);
- if (offset < lastTop + info->repeatMax) {
- DEBUG_PRINTF("alive, as we can still produce matches after %llu\n",
- offset);
- return 1;
- }
-
- DEBUG_PRINTF("dead\n");
- return 0;
-}
-
-/** Returns true if the LBR is matching at the given offset or it could produce
- * a match in the future. */
-static really_inline
-char lbrIsActive(const struct lbr_common *l, const struct lbr_state *lstate,
- const char *state, u64a offset) {
- assert(l && lstate && state);
- const struct RepeatInfo *info = getRepeatInfo(l);
- assert(!repeatIsDead(info, lstate)); // Guaranteed by caller.
-
- const char *repeatState = state + info->packedCtrlSize;
- if (repeatHasMatch(info, &lstate->ctrl, repeatState, offset) ==
- REPEAT_MATCH) {
- DEBUG_PRINTF("currently matching\n");
- return 1;
- }
-
- u64a i = repeatNextMatch(info, &lstate->ctrl, repeatState, offset);
- if (i != 0) {
- DEBUG_PRINTF("active, next match is at %llu\n", i);
- return 1;
- }
-
- DEBUG_PRINTF("no more matches\n");
- return 0;
-}
-
-static really_inline
-void lbrTop(const struct lbr_common *l, struct lbr_state *lstate, char *state,
- u64a offset) {
- assert(l && lstate && state);
- DEBUG_PRINTF("top at %llu\n", offset);
-
- const struct RepeatInfo *info = getRepeatInfo(l);
- char *repeatState = state + info->packedCtrlSize;
-
- char is_alive = !repeatIsDead(info, lstate);
- if (is_alive) {
- // Ignore duplicate TOPs.
- u64a last = repeatLastTop(info, &lstate->ctrl, repeatState);
- assert(last <= offset);
- if (last == offset) {
- return;
- }
- }
-
- repeatStore(info, &lstate->ctrl, repeatState, offset, is_alive);
-}
-
-static really_inline
-char lbrInAccept(const struct lbr_common *l, const struct lbr_state *lstate,
- const char *state, u64a offset, ReportID report) {
- assert(l && lstate && state);
- DEBUG_PRINTF("offset=%llu, report=%u\n", offset, report);
-
- if (report != l->report) {
- DEBUG_PRINTF("report=%u is not LBR report %u\n", report, l->report);
- return 0;
- }
-
- const struct RepeatInfo *info = getRepeatInfo(l);
- assert(!repeatIsDead(info, lstate)); // Guaranteed by caller.
-
- const char *repeatState = state + info->packedCtrlSize;
- return repeatHasMatch(info, &lstate->ctrl, repeatState, offset) ==
- REPEAT_MATCH;
-}
-
-static really_inline
-char lbrFindMatch(const struct lbr_common *l, const u64a begin, const u64a end,
- const struct lbr_state *lstate, const char *state,
- size_t *mloc) {
- DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end);
- assert(begin <= end);
-
- if (begin == end) {
- return 0;
- }
-
- const struct RepeatInfo *info = getRepeatInfo(l);
- const char *repeatState = state + info->packedCtrlSize;
- u64a i = repeatNextMatch(info, &lstate->ctrl, repeatState, begin);
- if (i == 0) {
- DEBUG_PRINTF("no more matches\n");
- return 0;
- }
- if (i > end) {
- DEBUG_PRINTF("next match at %llu is beyond the horizon\n", i);
- return 0;
- }
-
- DEBUG_PRINTF("stop at match at %llu\n", i);
- assert(mloc);
- *mloc = i - begin;
- return 1;
-}
-
-static really_inline
-char lbrMatchLoop(const struct lbr_common *l, const u64a begin, const u64a end,
- const struct lbr_state *lstate, const char *state,
- NfaCallback cb, void *ctx) {
- DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end);
- assert(begin <= end);
-
- if (begin == end) {
- return MO_CONTINUE_MATCHING;
- }
-
- const struct RepeatInfo *info = getRepeatInfo(l);
- const char *repeatState = state + info->packedCtrlSize;
-
- u64a i = begin;
- for (;;) {
- i = repeatNextMatch(info, &lstate->ctrl, repeatState, i);
- if (i == 0) {
- DEBUG_PRINTF("no more matches\n");
- return MO_CONTINUE_MATCHING;
- }
- if (i > end) {
- DEBUG_PRINTF("next match at %llu is beyond the horizon\n", i);
- return MO_CONTINUE_MATCHING;
- }
-
- DEBUG_PRINTF("firing match at %llu\n", i);
+ }
+
+ assert(0);
+ return 1;
+}
+
+/** Returns true if the LBR can produce matches at offsets greater than the
+ * given one. TODO: can this be combined with lbrIsActive? */
+static really_inline
+char lbrIsAlive(const struct lbr_common *l, const struct lbr_state *lstate,
+ const char *state, u64a offset) {
+ assert(l && lstate && state);
+
+ const struct RepeatInfo *info = getRepeatInfo(l);
+ if (repeatIsDead(info, lstate)) {
+ DEBUG_PRINTF("repeat is dead\n");
+ return 0;
+ }
+
+ if (info->repeatMax == REPEAT_INF) {
+ DEBUG_PRINTF("active repeat with inf max bound, alive\n");
+ return 1;
+ }
+
+ assert(info->repeatMax < REPEAT_INF);
+ const char *repeatState = state + info->packedCtrlSize;
+ u64a lastTop = repeatLastTop(info, &lstate->ctrl, repeatState);
+ if (offset < lastTop + info->repeatMax) {
+ DEBUG_PRINTF("alive, as we can still produce matches after %llu\n",
+ offset);
+ return 1;
+ }
+
+ DEBUG_PRINTF("dead\n");
+ return 0;
+}
+
+/** Returns true if the LBR is matching at the given offset or it could produce
+ * a match in the future. */
+static really_inline
+char lbrIsActive(const struct lbr_common *l, const struct lbr_state *lstate,
+ const char *state, u64a offset) {
+ assert(l && lstate && state);
+ const struct RepeatInfo *info = getRepeatInfo(l);
+ assert(!repeatIsDead(info, lstate)); // Guaranteed by caller.
+
+ const char *repeatState = state + info->packedCtrlSize;
+ if (repeatHasMatch(info, &lstate->ctrl, repeatState, offset) ==
+ REPEAT_MATCH) {
+ DEBUG_PRINTF("currently matching\n");
+ return 1;
+ }
+
+ u64a i = repeatNextMatch(info, &lstate->ctrl, repeatState, offset);
+ if (i != 0) {
+ DEBUG_PRINTF("active, next match is at %llu\n", i);
+ return 1;
+ }
+
+ DEBUG_PRINTF("no more matches\n");
+ return 0;
+}
+
+static really_inline
+void lbrTop(const struct lbr_common *l, struct lbr_state *lstate, char *state,
+ u64a offset) {
+ assert(l && lstate && state);
+ DEBUG_PRINTF("top at %llu\n", offset);
+
+ const struct RepeatInfo *info = getRepeatInfo(l);
+ char *repeatState = state + info->packedCtrlSize;
+
+ char is_alive = !repeatIsDead(info, lstate);
+ if (is_alive) {
+ // Ignore duplicate TOPs.
+ u64a last = repeatLastTop(info, &lstate->ctrl, repeatState);
+ assert(last <= offset);
+ if (last == offset) {
+ return;
+ }
+ }
+
+ repeatStore(info, &lstate->ctrl, repeatState, offset, is_alive);
+}
+
+static really_inline
+char lbrInAccept(const struct lbr_common *l, const struct lbr_state *lstate,
+ const char *state, u64a offset, ReportID report) {
+ assert(l && lstate && state);
+ DEBUG_PRINTF("offset=%llu, report=%u\n", offset, report);
+
+ if (report != l->report) {
+ DEBUG_PRINTF("report=%u is not LBR report %u\n", report, l->report);
+ return 0;
+ }
+
+ const struct RepeatInfo *info = getRepeatInfo(l);
+ assert(!repeatIsDead(info, lstate)); // Guaranteed by caller.
+
+ const char *repeatState = state + info->packedCtrlSize;
+ return repeatHasMatch(info, &lstate->ctrl, repeatState, offset) ==
+ REPEAT_MATCH;
+}
+
+static really_inline
+char lbrFindMatch(const struct lbr_common *l, const u64a begin, const u64a end,
+ const struct lbr_state *lstate, const char *state,
+ size_t *mloc) {
+ DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end);
+ assert(begin <= end);
+
+ if (begin == end) {
+ return 0;
+ }
+
+ const struct RepeatInfo *info = getRepeatInfo(l);
+ const char *repeatState = state + info->packedCtrlSize;
+ u64a i = repeatNextMatch(info, &lstate->ctrl, repeatState, begin);
+ if (i == 0) {
+ DEBUG_PRINTF("no more matches\n");
+ return 0;
+ }
+ if (i > end) {
+ DEBUG_PRINTF("next match at %llu is beyond the horizon\n", i);
+ return 0;
+ }
+
+ DEBUG_PRINTF("stop at match at %llu\n", i);
+ assert(mloc);
+ *mloc = i - begin;
+ return 1;
+}
+
+static really_inline
+char lbrMatchLoop(const struct lbr_common *l, const u64a begin, const u64a end,
+ const struct lbr_state *lstate, const char *state,
+ NfaCallback cb, void *ctx) {
+ DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end);
+ assert(begin <= end);
+
+ if (begin == end) {
+ return MO_CONTINUE_MATCHING;
+ }
+
+ const struct RepeatInfo *info = getRepeatInfo(l);
+ const char *repeatState = state + info->packedCtrlSize;
+
+ u64a i = begin;
+ for (;;) {
+ i = repeatNextMatch(info, &lstate->ctrl, repeatState, i);
+ if (i == 0) {
+ DEBUG_PRINTF("no more matches\n");
+ return MO_CONTINUE_MATCHING;
+ }
+ if (i > end) {
+ DEBUG_PRINTF("next match at %llu is beyond the horizon\n", i);
+ return MO_CONTINUE_MATCHING;
+ }
+
+ DEBUG_PRINTF("firing match at %llu\n", i);
if (cb(0, i, l->report, ctx) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
-
- assert(0);
- return MO_CONTINUE_MATCHING;
-}
-
-static really_inline
-char lbrRevScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf,
- UNUSED size_t begin, UNUSED size_t end,
- UNUSED size_t *loc) {
- assert(begin <= end);
+ return MO_HALT_MATCHING;
+ }
+ }
+
+ assert(0);
+ return MO_CONTINUE_MATCHING;
+}
+
+static really_inline
+char lbrRevScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf,
+ UNUSED size_t begin, UNUSED size_t end,
+ UNUSED size_t *loc) {
+ assert(begin <= end);
assert(nfa->type == LBR_NFA_DOT);
- // Nothing can kill a dot!
- return 0;
-}
-
-static really_inline
-char lbrRevScanVerm(const struct NFA *nfa, const u8 *buf,
- size_t begin, size_t end, size_t *loc) {
- assert(begin <= end);
+ // Nothing can kill a dot!
+ return 0;
+}
+
+static really_inline
+char lbrRevScanVerm(const struct NFA *nfa, const u8 *buf,
+ size_t begin, size_t end, size_t *loc) {
+ assert(begin <= end);
assert(nfa->type == LBR_NFA_VERM);
- const struct lbr_verm *l = getImplNfa(nfa);
-
- if (begin == end) {
- return 0;
- }
-
- const u8 *ptr = rvermicelliExec(l->c, 0, buf + begin, buf + end);
- if (ptr == buf + begin - 1) {
- DEBUG_PRINTF("no escape found\n");
- return 0;
- }
-
- assert(loc);
- *loc = (size_t)(ptr - buf);
- DEBUG_PRINTF("escape found at offset %zu\n", *loc);
- assert((char)*ptr == l->c);
- return 1;
-}
-
-static really_inline
-char lbrRevScanNVerm(const struct NFA *nfa, const u8 *buf,
- size_t begin, size_t end, size_t *loc) {
- assert(begin <= end);
+ const struct lbr_verm *l = getImplNfa(nfa);
+
+ if (begin == end) {
+ return 0;
+ }
+
+ const u8 *ptr = rvermicelliExec(l->c, 0, buf + begin, buf + end);
+ if (ptr == buf + begin - 1) {
+ DEBUG_PRINTF("no escape found\n");
+ return 0;
+ }
+
+ assert(loc);
+ *loc = (size_t)(ptr - buf);
+ DEBUG_PRINTF("escape found at offset %zu\n", *loc);
+ assert((char)*ptr == l->c);
+ return 1;
+}
+
+static really_inline
+char lbrRevScanNVerm(const struct NFA *nfa, const u8 *buf,
+ size_t begin, size_t end, size_t *loc) {
+ assert(begin <= end);
assert(nfa->type == LBR_NFA_NVERM);
- const struct lbr_verm *l = getImplNfa(nfa);
-
- if (begin == end) {
- return 0;
- }
-
- const u8 *ptr = rnvermicelliExec(l->c, 0, buf + begin, buf + end);
- if (ptr == buf + begin - 1) {
- DEBUG_PRINTF("no escape found\n");
- return 0;
- }
-
- assert(loc);
- *loc = (size_t)(ptr - buf);
- DEBUG_PRINTF("escape found at offset %zu\n", *loc);
- assert((char)*ptr != l->c);
- return 1;
-}
-
-static really_inline
-char lbrRevScanShuf(const struct NFA *nfa, const u8 *buf,
- size_t begin, size_t end,
- size_t *loc) {
- assert(begin <= end);
+ const struct lbr_verm *l = getImplNfa(nfa);
+
+ if (begin == end) {
+ return 0;
+ }
+
+ const u8 *ptr = rnvermicelliExec(l->c, 0, buf + begin, buf + end);
+ if (ptr == buf + begin - 1) {
+ DEBUG_PRINTF("no escape found\n");
+ return 0;
+ }
+
+ assert(loc);
+ *loc = (size_t)(ptr - buf);
+ DEBUG_PRINTF("escape found at offset %zu\n", *loc);
+ assert((char)*ptr != l->c);
+ return 1;
+}
+
+static really_inline
+char lbrRevScanShuf(const struct NFA *nfa, const u8 *buf,
+ size_t begin, size_t end,
+ size_t *loc) {
+ assert(begin <= end);
assert(nfa->type == LBR_NFA_SHUF);
- const struct lbr_shuf *l = getImplNfa(nfa);
-
- if (begin == end) {
- return 0;
- }
-
- const u8 *ptr = rshuftiExec(l->mask_lo, l->mask_hi, buf + begin, buf + end);
- if (ptr == buf + begin - 1) {
- DEBUG_PRINTF("no escape found\n");
- return 0;
- }
-
- assert(loc);
- *loc = (size_t)(ptr - buf);
- DEBUG_PRINTF("escape found at offset %zu\n", *loc);
- return 1;
-}
-
-static really_inline
-char lbrRevScanTruf(const struct NFA *nfa, const u8 *buf,
- size_t begin, size_t end,
- size_t *loc) {
- assert(begin <= end);
+ const struct lbr_shuf *l = getImplNfa(nfa);
+
+ if (begin == end) {
+ return 0;
+ }
+
+ const u8 *ptr = rshuftiExec(l->mask_lo, l->mask_hi, buf + begin, buf + end);
+ if (ptr == buf + begin - 1) {
+ DEBUG_PRINTF("no escape found\n");
+ return 0;
+ }
+
+ assert(loc);
+ *loc = (size_t)(ptr - buf);
+ DEBUG_PRINTF("escape found at offset %zu\n", *loc);
+ return 1;
+}
+
+static really_inline
+char lbrRevScanTruf(const struct NFA *nfa, const u8 *buf,
+ size_t begin, size_t end,
+ size_t *loc) {
+ assert(begin <= end);
assert(nfa->type == LBR_NFA_TRUF);
- const struct lbr_truf *l = getImplNfa(nfa);
-
- if (begin == end) {
- return 0;
- }
-
- const u8 *ptr = rtruffleExec(l->mask1, l->mask2, buf + begin, buf + end);
- if (ptr == buf + begin - 1) {
- DEBUG_PRINTF("no escape found\n");
- return 0;
- }
-
- assert(loc);
- *loc = (size_t)(ptr - buf);
- DEBUG_PRINTF("escape found at offset %zu\n", *loc);
- return 1;
-}
-
-static really_inline
-char lbrFwdScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf,
- UNUSED size_t begin, UNUSED size_t end,
- UNUSED size_t *loc) {
- assert(begin <= end);
+ const struct lbr_truf *l = getImplNfa(nfa);
+
+ if (begin == end) {
+ return 0;
+ }
+
+ const u8 *ptr = rtruffleExec(l->mask1, l->mask2, buf + begin, buf + end);
+ if (ptr == buf + begin - 1) {
+ DEBUG_PRINTF("no escape found\n");
+ return 0;
+ }
+
+ assert(loc);
+ *loc = (size_t)(ptr - buf);
+ DEBUG_PRINTF("escape found at offset %zu\n", *loc);
+ return 1;
+}
+
+static really_inline
+char lbrFwdScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf,
+ UNUSED size_t begin, UNUSED size_t end,
+ UNUSED size_t *loc) {
+ assert(begin <= end);
assert(nfa->type == LBR_NFA_DOT);
- // Nothing can kill a dot!
- return 0;
-}
-
-static really_inline
-char lbrFwdScanVerm(const struct NFA *nfa, const u8 *buf,
- size_t begin, size_t end, size_t *loc) {
- assert(begin <= end);
+ // Nothing can kill a dot!
+ return 0;
+}
+
+static really_inline
+char lbrFwdScanVerm(const struct NFA *nfa, const u8 *buf,
+ size_t begin, size_t end, size_t *loc) {
+ assert(begin <= end);
assert(nfa->type == LBR_NFA_VERM);
- const struct lbr_verm *l = getImplNfa(nfa);
-
- if (begin == end) {
- return 0;
- }
-
- const u8 *ptr = vermicelliExec(l->c, 0, buf + begin, buf + end);
- if (ptr == buf + end) {
- DEBUG_PRINTF("no escape found\n");
- return 0;
- }
-
- assert(loc);
- *loc = (size_t)(ptr - buf);
- DEBUG_PRINTF("escape found at offset %zu\n", *loc);
- assert((char)*ptr == l->c);
- return 1;
-}
-
-static really_inline
-char lbrFwdScanNVerm(const struct NFA *nfa, const u8 *buf,
- size_t begin, size_t end, size_t *loc) {
- assert(begin <= end);
+ const struct lbr_verm *l = getImplNfa(nfa);
+
+ if (begin == end) {
+ return 0;
+ }
+
+ const u8 *ptr = vermicelliExec(l->c, 0, buf + begin, buf + end);
+ if (ptr == buf + end) {
+ DEBUG_PRINTF("no escape found\n");
+ return 0;
+ }
+
+ assert(loc);
+ *loc = (size_t)(ptr - buf);
+ DEBUG_PRINTF("escape found at offset %zu\n", *loc);
+ assert((char)*ptr == l->c);
+ return 1;
+}
+
+static really_inline
+char lbrFwdScanNVerm(const struct NFA *nfa, const u8 *buf,
+ size_t begin, size_t end, size_t *loc) {
+ assert(begin <= end);
assert(nfa->type == LBR_NFA_NVERM);
- const struct lbr_verm *l = getImplNfa(nfa);
-
- if (begin == end) {
- return 0;
- }
-
- const u8 *ptr = nvermicelliExec(l->c, 0, buf + begin, buf + end);
- if (ptr == buf + end) {
- DEBUG_PRINTF("no escape found\n");
- return 0;
- }
-
- assert(loc);
- *loc = (size_t)(ptr - buf);
- DEBUG_PRINTF("escape found at offset %zu\n", *loc);
- assert((char)*ptr != l->c);
- return 1;
-}
-
-static really_inline
-char lbrFwdScanShuf(const struct NFA *nfa, const u8 *buf,
- size_t begin, size_t end,
- size_t *loc) {
- assert(begin <= end);
+ const struct lbr_verm *l = getImplNfa(nfa);
+
+ if (begin == end) {
+ return 0;
+ }
+
+ const u8 *ptr = nvermicelliExec(l->c, 0, buf + begin, buf + end);
+ if (ptr == buf + end) {
+ DEBUG_PRINTF("no escape found\n");
+ return 0;
+ }
+
+ assert(loc);
+ *loc = (size_t)(ptr - buf);
+ DEBUG_PRINTF("escape found at offset %zu\n", *loc);
+ assert((char)*ptr != l->c);
+ return 1;
+}
+
+static really_inline
+char lbrFwdScanShuf(const struct NFA *nfa, const u8 *buf,
+ size_t begin, size_t end,
+ size_t *loc) {
+ assert(begin <= end);
assert(nfa->type == LBR_NFA_SHUF);
- const struct lbr_shuf *l = getImplNfa(nfa);
-
- if (begin == end) {
- return 0;
- }
-
- const u8 *ptr = shuftiExec(l->mask_lo, l->mask_hi, buf + begin, buf + end);
- if (ptr == buf + end) {
- DEBUG_PRINTF("no escape found\n");
- return 0;
- }
-
- assert(loc);
- *loc = (size_t)(ptr - buf);
- DEBUG_PRINTF("escape found at offset %zu\n", *loc);
- return 1;
-}
-
-static really_inline
-char lbrFwdScanTruf(const struct NFA *nfa, const u8 *buf,
- size_t begin, size_t end,
- size_t *loc) {
- assert(begin <= end);
+ const struct lbr_shuf *l = getImplNfa(nfa);
+
+ if (begin == end) {
+ return 0;
+ }
+
+ const u8 *ptr = shuftiExec(l->mask_lo, l->mask_hi, buf + begin, buf + end);
+ if (ptr == buf + end) {
+ DEBUG_PRINTF("no escape found\n");
+ return 0;
+ }
+
+ assert(loc);
+ *loc = (size_t)(ptr - buf);
+ DEBUG_PRINTF("escape found at offset %zu\n", *loc);
+ return 1;
+}
+
+static really_inline
+char lbrFwdScanTruf(const struct NFA *nfa, const u8 *buf,
+ size_t begin, size_t end,
+ size_t *loc) {
+ assert(begin <= end);
assert(nfa->type == LBR_NFA_TRUF);
- const struct lbr_truf *l = getImplNfa(nfa);
-
- if (begin == end) {
- return 0;
- }
-
- const u8 *ptr = truffleExec(l->mask1, l->mask2, buf + begin, buf + end);
- if (ptr == buf + end) {
- DEBUG_PRINTF("no escape found\n");
- return 0;
- }
-
- assert(loc);
- *loc = (size_t)(ptr - buf);
- DEBUG_PRINTF("escape found at offset %zu\n", *loc);
- return 1;
-}
-
-#define ENGINE_ROOT_NAME Dot
-#include "lbr_common_impl.h"
-
-#define ENGINE_ROOT_NAME Verm
-#include "lbr_common_impl.h"
-
-#define ENGINE_ROOT_NAME NVerm
-#include "lbr_common_impl.h"
-
-#define ENGINE_ROOT_NAME Shuf
-#include "lbr_common_impl.h"
-
-#define ENGINE_ROOT_NAME Truf
-#include "lbr_common_impl.h"
+ const struct lbr_truf *l = getImplNfa(nfa);
+
+ if (begin == end) {
+ return 0;
+ }
+
+ const u8 *ptr = truffleExec(l->mask1, l->mask2, buf + begin, buf + end);
+ if (ptr == buf + end) {
+ DEBUG_PRINTF("no escape found\n");
+ return 0;
+ }
+
+ assert(loc);
+ *loc = (size_t)(ptr - buf);
+ DEBUG_PRINTF("escape found at offset %zu\n", *loc);
+ return 1;
+}
+
+#define ENGINE_ROOT_NAME Dot
+#include "lbr_common_impl.h"
+
+#define ENGINE_ROOT_NAME Verm
+#include "lbr_common_impl.h"
+
+#define ENGINE_ROOT_NAME NVerm
+#include "lbr_common_impl.h"
+
+#define ENGINE_ROOT_NAME Shuf
+#include "lbr_common_impl.h"
+
+#define ENGINE_ROOT_NAME Truf
+#include "lbr_common_impl.h"
diff --git a/contrib/libs/hyperscan/src/nfa/lbr.h b/contrib/libs/hyperscan/src/nfa/lbr.h
index 237bf8f4cb..a9e42046db 100644
--- a/contrib/libs/hyperscan/src/nfa/lbr.h
+++ b/contrib/libs/hyperscan/src/nfa/lbr.h
@@ -1,150 +1,150 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef LBR_H
-#define LBR_H
-
-#include "ue2common.h"
-
-struct mq;
-struct NFA;
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-// LBR Dot
-
-char nfaExecLbrDot_Q(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecLbrDot_Q2(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecLbrDot_QR(const struct NFA *n, struct mq *q, ReportID report);
-char nfaExecLbrDot_reportCurrent(const struct NFA *n, struct mq *q);
-char nfaExecLbrDot_inAccept(const struct NFA *n, ReportID report, struct mq *q);
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef LBR_H
+#define LBR_H
+
+#include "ue2common.h"
+
+struct mq;
+struct NFA;
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+// LBR Dot
+
+char nfaExecLbrDot_Q(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecLbrDot_Q2(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecLbrDot_QR(const struct NFA *n, struct mq *q, ReportID report);
+char nfaExecLbrDot_reportCurrent(const struct NFA *n, struct mq *q);
+char nfaExecLbrDot_inAccept(const struct NFA *n, ReportID report, struct mq *q);
char nfaExecLbrDot_inAnyAccept(const struct NFA *n, struct mq *q);
-char nfaExecLbrDot_queueInitState(const struct NFA *n, struct mq *q);
-char nfaExecLbrDot_initCompressedState(const struct NFA *n, u64a offset,
- void *state, u8 key);
-char nfaExecLbrDot_queueCompressState(const struct NFA *nfa, const struct mq *q,
- s64a loc);
-char nfaExecLbrDot_expandState(const struct NFA *nfa, void *dest,
- const void *src, u64a offset, u8 key);
-
-#define nfaExecLbrDot_testEOD NFA_API_NO_IMPL
-#define nfaExecLbrDot_B_Reverse NFA_API_NO_IMPL
-#define nfaExecLbrDot_zombie_status NFA_API_ZOMBIE_NO_IMPL
-
-// LBR Verm
-
-char nfaExecLbrVerm_Q(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecLbrVerm_Q2(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecLbrVerm_QR(const struct NFA *n, struct mq *q, ReportID report);
-char nfaExecLbrVerm_reportCurrent(const struct NFA *n, struct mq *q);
-char nfaExecLbrVerm_inAccept(const struct NFA *n, ReportID report,
- struct mq *q);
+char nfaExecLbrDot_queueInitState(const struct NFA *n, struct mq *q);
+char nfaExecLbrDot_initCompressedState(const struct NFA *n, u64a offset,
+ void *state, u8 key);
+char nfaExecLbrDot_queueCompressState(const struct NFA *nfa, const struct mq *q,
+ s64a loc);
+char nfaExecLbrDot_expandState(const struct NFA *nfa, void *dest,
+ const void *src, u64a offset, u8 key);
+
+#define nfaExecLbrDot_testEOD NFA_API_NO_IMPL
+#define nfaExecLbrDot_B_Reverse NFA_API_NO_IMPL
+#define nfaExecLbrDot_zombie_status NFA_API_ZOMBIE_NO_IMPL
+
+// LBR Verm
+
+char nfaExecLbrVerm_Q(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecLbrVerm_Q2(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecLbrVerm_QR(const struct NFA *n, struct mq *q, ReportID report);
+char nfaExecLbrVerm_reportCurrent(const struct NFA *n, struct mq *q);
+char nfaExecLbrVerm_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q);
char nfaExecLbrVerm_inAnyAccept(const struct NFA *n, struct mq *q);
-char nfaExecLbrVerm_queueInitState(const struct NFA *n, struct mq *q);
-char nfaExecLbrVerm_initCompressedState(const struct NFA *n, u64a offset,
- void *state, u8 key);
-char nfaExecLbrVerm_queueCompressState(const struct NFA *nfa,
- const struct mq *q, s64a loc);
-char nfaExecLbrVerm_expandState(const struct NFA *nfa, void *dest,
- const void *src, u64a offset, u8 key);
-
-#define nfaExecLbrVerm_testEOD NFA_API_NO_IMPL
-#define nfaExecLbrVerm_B_Reverse NFA_API_NO_IMPL
-#define nfaExecLbrVerm_zombie_status NFA_API_ZOMBIE_NO_IMPL
-
-// LBR Negated Verm
-
-char nfaExecLbrNVerm_Q(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecLbrNVerm_Q2(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecLbrNVerm_QR(const struct NFA *n, struct mq *q, ReportID report);
-char nfaExecLbrNVerm_reportCurrent(const struct NFA *n, struct mq *q);
-char nfaExecLbrNVerm_inAccept(const struct NFA *n, ReportID report,
- struct mq *q);
+char nfaExecLbrVerm_queueInitState(const struct NFA *n, struct mq *q);
+char nfaExecLbrVerm_initCompressedState(const struct NFA *n, u64a offset,
+ void *state, u8 key);
+char nfaExecLbrVerm_queueCompressState(const struct NFA *nfa,
+ const struct mq *q, s64a loc);
+char nfaExecLbrVerm_expandState(const struct NFA *nfa, void *dest,
+ const void *src, u64a offset, u8 key);
+
+#define nfaExecLbrVerm_testEOD NFA_API_NO_IMPL
+#define nfaExecLbrVerm_B_Reverse NFA_API_NO_IMPL
+#define nfaExecLbrVerm_zombie_status NFA_API_ZOMBIE_NO_IMPL
+
+// LBR Negated Verm
+
+char nfaExecLbrNVerm_Q(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecLbrNVerm_Q2(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecLbrNVerm_QR(const struct NFA *n, struct mq *q, ReportID report);
+char nfaExecLbrNVerm_reportCurrent(const struct NFA *n, struct mq *q);
+char nfaExecLbrNVerm_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q);
char nfaExecLbrNVerm_inAnyAccept(const struct NFA *n, struct mq *q);
-char nfaExecLbrNVerm_queueInitState(const struct NFA *n, struct mq *q);
-char nfaExecLbrNVerm_initCompressedState(const struct NFA *n, u64a offset,
- void *state, u8 key);
-char nfaExecLbrNVerm_queueCompressState(const struct NFA *nfa,
- const struct mq *q, s64a loc);
-char nfaExecLbrNVerm_expandState(const struct NFA *nfa, void *dest,
- const void *src, u64a offset, u8 key);
-
-#define nfaExecLbrNVerm_testEOD NFA_API_NO_IMPL
-#define nfaExecLbrNVerm_B_Reverse NFA_API_NO_IMPL
-#define nfaExecLbrNVerm_zombie_status NFA_API_ZOMBIE_NO_IMPL
-
-// LBR Shuf
-
-char nfaExecLbrShuf_Q(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecLbrShuf_Q2(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecLbrShuf_QR(const struct NFA *n, struct mq *q, ReportID report);
-char nfaExecLbrShuf_reportCurrent(const struct NFA *n, struct mq *q);
-char nfaExecLbrShuf_inAccept(const struct NFA *n, ReportID report,
- struct mq *q);
+char nfaExecLbrNVerm_queueInitState(const struct NFA *n, struct mq *q);
+char nfaExecLbrNVerm_initCompressedState(const struct NFA *n, u64a offset,
+ void *state, u8 key);
+char nfaExecLbrNVerm_queueCompressState(const struct NFA *nfa,
+ const struct mq *q, s64a loc);
+char nfaExecLbrNVerm_expandState(const struct NFA *nfa, void *dest,
+ const void *src, u64a offset, u8 key);
+
+#define nfaExecLbrNVerm_testEOD NFA_API_NO_IMPL
+#define nfaExecLbrNVerm_B_Reverse NFA_API_NO_IMPL
+#define nfaExecLbrNVerm_zombie_status NFA_API_ZOMBIE_NO_IMPL
+
+// LBR Shuf
+
+char nfaExecLbrShuf_Q(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecLbrShuf_Q2(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecLbrShuf_QR(const struct NFA *n, struct mq *q, ReportID report);
+char nfaExecLbrShuf_reportCurrent(const struct NFA *n, struct mq *q);
+char nfaExecLbrShuf_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q);
char nfaExecLbrShuf_inAnyAccept(const struct NFA *n, struct mq *q);
-char nfaExecLbrShuf_queueInitState(const struct NFA *n, struct mq *q);
-char nfaExecLbrShuf_initCompressedState(const struct NFA *n, u64a offset,
- void *state, u8 key);
-char nfaExecLbrShuf_queueCompressState(const struct NFA *nfa,
- const struct mq *q, s64a loc);
-char nfaExecLbrShuf_expandState(const struct NFA *nfa, void *dest,
- const void *src, u64a offset, u8 key);
-
-#define nfaExecLbrShuf_testEOD NFA_API_NO_IMPL
-#define nfaExecLbrShuf_B_Reverse NFA_API_NO_IMPL
-#define nfaExecLbrShuf_zombie_status NFA_API_ZOMBIE_NO_IMPL
-
-// LBR Truffle
-
-char nfaExecLbrTruf_Q(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecLbrTruf_Q2(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecLbrTruf_QR(const struct NFA *n, struct mq *q, ReportID report);
-char nfaExecLbrTruf_reportCurrent(const struct NFA *n, struct mq *q);
-char nfaExecLbrTruf_inAccept(const struct NFA *n, ReportID report,
- struct mq *q);
+char nfaExecLbrShuf_queueInitState(const struct NFA *n, struct mq *q);
+char nfaExecLbrShuf_initCompressedState(const struct NFA *n, u64a offset,
+ void *state, u8 key);
+char nfaExecLbrShuf_queueCompressState(const struct NFA *nfa,
+ const struct mq *q, s64a loc);
+char nfaExecLbrShuf_expandState(const struct NFA *nfa, void *dest,
+ const void *src, u64a offset, u8 key);
+
+#define nfaExecLbrShuf_testEOD NFA_API_NO_IMPL
+#define nfaExecLbrShuf_B_Reverse NFA_API_NO_IMPL
+#define nfaExecLbrShuf_zombie_status NFA_API_ZOMBIE_NO_IMPL
+
+// LBR Truffle
+
+char nfaExecLbrTruf_Q(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecLbrTruf_Q2(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecLbrTruf_QR(const struct NFA *n, struct mq *q, ReportID report);
+char nfaExecLbrTruf_reportCurrent(const struct NFA *n, struct mq *q);
+char nfaExecLbrTruf_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q);
char nfaExecLbrTruf_inAnyAccept(const struct NFA *n, struct mq *q);
-char nfaExecLbrTruf_queueInitState(const struct NFA *n, struct mq *q);
-char nfaExecLbrTruf_initCompressedState(const struct NFA *n, u64a offset,
- void *state, u8 key);
-char nfaExecLbrTruf_queueCompressState(const struct NFA *nfa,
- const struct mq *q, s64a loc);
-char nfaExecLbrTruf_expandState(const struct NFA *nfa, void *dest,
- const void *src, u64a offset, u8 key);
-
-#define nfaExecLbrTruf_testEOD NFA_API_NO_IMPL
-#define nfaExecLbrTruf_B_Reverse NFA_API_NO_IMPL
-#define nfaExecLbrTruf_zombie_status NFA_API_ZOMBIE_NO_IMPL
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+char nfaExecLbrTruf_queueInitState(const struct NFA *n, struct mq *q);
+char nfaExecLbrTruf_initCompressedState(const struct NFA *n, u64a offset,
+ void *state, u8 key);
+char nfaExecLbrTruf_queueCompressState(const struct NFA *nfa,
+ const struct mq *q, s64a loc);
+char nfaExecLbrTruf_expandState(const struct NFA *nfa, void *dest,
+ const void *src, u64a offset, u8 key);
+
+#define nfaExecLbrTruf_testEOD NFA_API_NO_IMPL
+#define nfaExecLbrTruf_B_Reverse NFA_API_NO_IMPL
+#define nfaExecLbrTruf_zombie_status NFA_API_ZOMBIE_NO_IMPL
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/lbr_common_impl.h b/contrib/libs/hyperscan/src/nfa/lbr_common_impl.h
index 40516c4988..5ae35431e4 100644
--- a/contrib/libs/hyperscan/src/nfa/lbr_common_impl.h
+++ b/contrib/libs/hyperscan/src/nfa/lbr_common_impl.h
@@ -1,99 +1,99 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Large Bounded Repeat (LBR) engine: runtime impl X-macros.
- */
-
-#include "util/join.h"
-
-#define ENGINE_EXEC_NAME JOIN(nfaExecLbr, ENGINE_ROOT_NAME)
-#define EXEC_FN JOIN(lbrExec, ENGINE_ROOT_NAME)
-#define FWDSCAN_FN JOIN(lbrFwdScan, ENGINE_ROOT_NAME)
-#define REVSCAN_FN JOIN(lbrRevScan, ENGINE_ROOT_NAME)
-
-char JOIN(ENGINE_EXEC_NAME, _queueCompressState)(const struct NFA *nfa,
- const struct mq *q, s64a loc) {
- assert(nfa && q);
- assert(isLbrType(nfa->type));
- DEBUG_PRINTF("entry, q->offset=%llu, loc=%lld\n", q->offset, loc);
-
- const struct lbr_common *l = getImplNfa(nfa);
- const struct lbr_state *lstate = (const struct lbr_state *)q->state;
-
- u64a offset = q->offset + loc;
- lbrCompressState(l, offset, lstate, q->streamState);
- return 0;
-}
-
-char JOIN(ENGINE_EXEC_NAME, _expandState)(const struct NFA *nfa, void *dest,
- const void *src, u64a offset,
- UNUSED u8 key) {
- assert(nfa);
- assert(isLbrType(nfa->type));
- DEBUG_PRINTF("entry, offset=%llu\n", offset);
-
- const struct lbr_common *l = getImplNfa(nfa);
- struct lbr_state *lstate = (struct lbr_state *)dest;
- lbrExpandState(l, offset, src, lstate);
- return 0;
-}
-
-char JOIN(ENGINE_EXEC_NAME, _reportCurrent)(const struct NFA *nfa,
- struct mq *q) {
- assert(nfa && q);
- assert(isLbrType(nfa->type));
-
- const struct lbr_common *l = getImplNfa(nfa);
- u64a offset = q_cur_offset(q);
- DEBUG_PRINTF("firing match %u at %llu\n", l->report, offset);
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Large Bounded Repeat (LBR) engine: runtime impl X-macros.
+ */
+
+#include "util/join.h"
+
+#define ENGINE_EXEC_NAME JOIN(nfaExecLbr, ENGINE_ROOT_NAME)
+#define EXEC_FN JOIN(lbrExec, ENGINE_ROOT_NAME)
+#define FWDSCAN_FN JOIN(lbrFwdScan, ENGINE_ROOT_NAME)
+#define REVSCAN_FN JOIN(lbrRevScan, ENGINE_ROOT_NAME)
+
+char JOIN(ENGINE_EXEC_NAME, _queueCompressState)(const struct NFA *nfa,
+ const struct mq *q, s64a loc) {
+ assert(nfa && q);
+ assert(isLbrType(nfa->type));
+ DEBUG_PRINTF("entry, q->offset=%llu, loc=%lld\n", q->offset, loc);
+
+ const struct lbr_common *l = getImplNfa(nfa);
+ const struct lbr_state *lstate = (const struct lbr_state *)q->state;
+
+ u64a offset = q->offset + loc;
+ lbrCompressState(l, offset, lstate, q->streamState);
+ return 0;
+}
+
+char JOIN(ENGINE_EXEC_NAME, _expandState)(const struct NFA *nfa, void *dest,
+ const void *src, u64a offset,
+ UNUSED u8 key) {
+ assert(nfa);
+ assert(isLbrType(nfa->type));
+ DEBUG_PRINTF("entry, offset=%llu\n", offset);
+
+ const struct lbr_common *l = getImplNfa(nfa);
+ struct lbr_state *lstate = (struct lbr_state *)dest;
+ lbrExpandState(l, offset, src, lstate);
+ return 0;
+}
+
+char JOIN(ENGINE_EXEC_NAME, _reportCurrent)(const struct NFA *nfa,
+ struct mq *q) {
+ assert(nfa && q);
+ assert(isLbrType(nfa->type));
+
+ const struct lbr_common *l = getImplNfa(nfa);
+ u64a offset = q_cur_offset(q);
+ DEBUG_PRINTF("firing match %u at %llu\n", l->report, offset);
q->cb(0, offset, l->report, q->context);
- return 0;
-}
-
-char JOIN(ENGINE_EXEC_NAME, _inAccept)(const struct NFA *nfa,
- ReportID report, struct mq *q) {
- assert(nfa && q);
- assert(isLbrType(nfa->type));
- DEBUG_PRINTF("entry\n");
-
- const struct lbr_common *l = getImplNfa(nfa);
- const struct RepeatInfo *info = getRepeatInfo(l);
- const struct lbr_state *lstate = (const struct lbr_state *)q->state;
- if (repeatIsDead(info, lstate)) {
- DEBUG_PRINTF("repeat is dead\n");
- return 0;
- }
-
- u64a offset = q->offset + q_last_loc(q);
- return lbrInAccept(l, lstate, q->streamState, offset, report);
-}
-
+ return 0;
+}
+
+char JOIN(ENGINE_EXEC_NAME, _inAccept)(const struct NFA *nfa,
+ ReportID report, struct mq *q) {
+ assert(nfa && q);
+ assert(isLbrType(nfa->type));
+ DEBUG_PRINTF("entry\n");
+
+ const struct lbr_common *l = getImplNfa(nfa);
+ const struct RepeatInfo *info = getRepeatInfo(l);
+ const struct lbr_state *lstate = (const struct lbr_state *)q->state;
+ if (repeatIsDead(info, lstate)) {
+ DEBUG_PRINTF("repeat is dead\n");
+ return 0;
+ }
+
+ u64a offset = q->offset + q_last_loc(q);
+ return lbrInAccept(l, lstate, q->streamState, offset, report);
+}
+
char JOIN(ENGINE_EXEC_NAME, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
assert(nfa && q);
assert(isLbrType(nfa->type));
@@ -103,360 +103,360 @@ char JOIN(ENGINE_EXEC_NAME, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
return JOIN(ENGINE_EXEC_NAME, _inAccept)(nfa, l->report, q);
}
-char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa,
- struct mq *q) {
- assert(nfa && q);
- assert(isLbrType(nfa->type));
- DEBUG_PRINTF("entry\n");
-
- const struct lbr_common *l = getImplNfa(nfa);
- const struct RepeatInfo *info = getRepeatInfo(l);
-
- assert(q->state);
- struct lbr_state *lstate = (struct lbr_state *)q->state;
- assert(ISALIGNED(lstate));
-
- lstate->lastEscape = 0;
- clearRepeat(info, lstate);
-
- return 0;
-}
-
-char JOIN(ENGINE_EXEC_NAME, _initCompressedState)(const struct NFA *nfa,
- u64a offset,
- void *state, UNUSED u8 key) {
- assert(nfa && state);
- assert(isLbrType(nfa->type));
- DEBUG_PRINTF("entry\n");
-
- const struct lbr_common *l = getImplNfa(nfa);
- const struct RepeatInfo *info = getRepeatInfo(l);
- struct lbr_state lstate; // temp control block on stack.
- clearRepeat(info, &lstate);
- lbrTop(l, &lstate, state, offset);
- lbrCompressState(l, offset, &lstate, state);
-
- return 1; // LBR is alive
-}
-
-// FIXME: this function could be much simpler for a Dot LBR, as all it needs to
-// do is find the next top.
-static really_inline
-char JOIN(ENGINE_EXEC_NAME, _TopScan)(const struct NFA *nfa, struct mq *q,
- s64a end) {
- const struct lbr_common *l = getImplNfa(nfa);
- const struct RepeatInfo *info = getRepeatInfo(l);
-
- const u64a offset = q->offset;
- struct lbr_state *lstate = (struct lbr_state *)q->state;
- assert(ISALIGNED(lstate));
-
- assert(repeatIsDead(info, lstate));
- assert(q->cur < q->end);
-
- DEBUG_PRINTF("entry, end=%lld, offset=%llu, lastEscape=%llu\n", end,
- offset, lstate->lastEscape);
-
- while (1) {
- // Find the next top with location >= the last escape we saw.
- for (; q->cur < q->end && q_cur_loc(q) <= end; q->cur++) {
- u32 event = q_cur_type(q);
- if ((event == MQE_TOP || event == MQE_TOP_FIRST) &&
- q_cur_offset(q) >= lstate->lastEscape) {
- goto found_top;
- }
- DEBUG_PRINTF("skip event type=%u offset=%lld\n", event, q_cur_offset(q));
- }
-
- // No more tops, we're done.
- break;
-
-found_top:;
- assert(q->cur < q->end);
-
- u64a sp = q_cur_offset(q);
- u64a first_match = sp + info->repeatMin;
- DEBUG_PRINTF("first possible match is at %llu\n", first_match);
-
- u64a ep = MIN(MIN(end, (s64a)q->length) + offset, first_match);
- if (ep > sp && sp >= offset) {
- size_t eloc;
- DEBUG_PRINTF("rev b%llu e%llu/%zu\n", sp - offset, ep - offset,
- q->length);
- assert(ep - offset <= q->length);
- if (REVSCAN_FN(nfa, q->buffer, sp - offset, ep - offset, &eloc)) {
- DEBUG_PRINTF("escape found at %llu\n", offset + eloc);
- lstate->lastEscape = eloc;
- q->cur++;
- continue;
- }
- }
-
- lbrTop(l, lstate, q->streamState, sp);
- return 1;
- }
-
- DEBUG_PRINTF("exhausted queue\n");
- return 0;
-}
-
-static really_inline
-char JOIN(ENGINE_EXEC_NAME, _Q_i)(const struct NFA *nfa, struct mq *q,
- s64a end, enum MatchMode mode) {
- assert(nfa && q);
- assert(isLbrType(nfa->type));
-
- const struct lbr_common *l = getImplNfa(nfa);
- const struct RepeatInfo *info = getRepeatInfo(l);
-
- struct lbr_state *lstate = (struct lbr_state *)q->state;
- assert(ISALIGNED(lstate));
-
-
- if (q->report_current) {
- DEBUG_PRINTF("report_current: fire match at %llu\n", q_cur_offset(q));
+char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa,
+ struct mq *q) {
+ assert(nfa && q);
+ assert(isLbrType(nfa->type));
+ DEBUG_PRINTF("entry\n");
+
+ const struct lbr_common *l = getImplNfa(nfa);
+ const struct RepeatInfo *info = getRepeatInfo(l);
+
+ assert(q->state);
+ struct lbr_state *lstate = (struct lbr_state *)q->state;
+ assert(ISALIGNED(lstate));
+
+ lstate->lastEscape = 0;
+ clearRepeat(info, lstate);
+
+ return 0;
+}
+
+char JOIN(ENGINE_EXEC_NAME, _initCompressedState)(const struct NFA *nfa,
+ u64a offset,
+ void *state, UNUSED u8 key) {
+ assert(nfa && state);
+ assert(isLbrType(nfa->type));
+ DEBUG_PRINTF("entry\n");
+
+ const struct lbr_common *l = getImplNfa(nfa);
+ const struct RepeatInfo *info = getRepeatInfo(l);
+ struct lbr_state lstate; // temp control block on stack.
+ clearRepeat(info, &lstate);
+ lbrTop(l, &lstate, state, offset);
+ lbrCompressState(l, offset, &lstate, state);
+
+ return 1; // LBR is alive
+}
+
+// FIXME: this function could be much simpler for a Dot LBR, as all it needs to
+// do is find the next top.
+static really_inline
+char JOIN(ENGINE_EXEC_NAME, _TopScan)(const struct NFA *nfa, struct mq *q,
+ s64a end) {
+ const struct lbr_common *l = getImplNfa(nfa);
+ const struct RepeatInfo *info = getRepeatInfo(l);
+
+ const u64a offset = q->offset;
+ struct lbr_state *lstate = (struct lbr_state *)q->state;
+ assert(ISALIGNED(lstate));
+
+ assert(repeatIsDead(info, lstate));
+ assert(q->cur < q->end);
+
+ DEBUG_PRINTF("entry, end=%lld, offset=%llu, lastEscape=%llu\n", end,
+ offset, lstate->lastEscape);
+
+ while (1) {
+ // Find the next top with location >= the last escape we saw.
+ for (; q->cur < q->end && q_cur_loc(q) <= end; q->cur++) {
+ u32 event = q_cur_type(q);
+ if ((event == MQE_TOP || event == MQE_TOP_FIRST) &&
+ q_cur_offset(q) >= lstate->lastEscape) {
+ goto found_top;
+ }
+ DEBUG_PRINTF("skip event type=%u offset=%lld\n", event, q_cur_offset(q));
+ }
+
+ // No more tops, we're done.
+ break;
+
+found_top:;
+ assert(q->cur < q->end);
+
+ u64a sp = q_cur_offset(q);
+ u64a first_match = sp + info->repeatMin;
+ DEBUG_PRINTF("first possible match is at %llu\n", first_match);
+
+ u64a ep = MIN(MIN(end, (s64a)q->length) + offset, first_match);
+ if (ep > sp && sp >= offset) {
+ size_t eloc;
+ DEBUG_PRINTF("rev b%llu e%llu/%zu\n", sp - offset, ep - offset,
+ q->length);
+ assert(ep - offset <= q->length);
+ if (REVSCAN_FN(nfa, q->buffer, sp - offset, ep - offset, &eloc)) {
+ DEBUG_PRINTF("escape found at %llu\n", offset + eloc);
+ lstate->lastEscape = eloc;
+ q->cur++;
+ continue;
+ }
+ }
+
+ lbrTop(l, lstate, q->streamState, sp);
+ return 1;
+ }
+
+ DEBUG_PRINTF("exhausted queue\n");
+ return 0;
+}
+
+static really_inline
+char JOIN(ENGINE_EXEC_NAME, _Q_i)(const struct NFA *nfa, struct mq *q,
+ s64a end, enum MatchMode mode) {
+ assert(nfa && q);
+ assert(isLbrType(nfa->type));
+
+ const struct lbr_common *l = getImplNfa(nfa);
+ const struct RepeatInfo *info = getRepeatInfo(l);
+
+ struct lbr_state *lstate = (struct lbr_state *)q->state;
+ assert(ISALIGNED(lstate));
+
+
+ if (q->report_current) {
+ DEBUG_PRINTF("report_current: fire match at %llu\n", q_cur_offset(q));
int rv = q->cb(0, q_cur_offset(q), l->report, q->context);
- q->report_current = 0;
- if (rv == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
-
- if (q->cur == q->end) {
- return 1;
- }
-
- assert(q->cur + 1 < q->end); /* require at least two items */
- assert(q_cur_type(q) == MQE_START);
- u64a sp = q_cur_offset(q);
- q->cur++;
- DEBUG_PRINTF("sp=%llu, abs_end=%llu\n", sp, end + q->offset);
-
- while (q->cur < q->end) {
- DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
- q_cur_offset(q));
-
- assert(sp >= q->offset); // not in history
-
- if (repeatIsDead(info, lstate)) {
- DEBUG_PRINTF("repeat is currently dead, skipping scan\n");
- goto scan_done;
- }
-
- u64a ep = q_cur_offset(q);
- ep = MIN(ep, q->offset + end);
- if (sp < ep) {
- size_t eloc = 0;
- char escape_found = 0;
- DEBUG_PRINTF("scanning from sp=%llu to ep=%llu\n", sp, ep);
- assert(sp >= q->offset && ep >= q->offset);
- if (FWDSCAN_FN(nfa, q->buffer, sp - q->offset, ep - q->offset, &eloc)) {
- escape_found = 1;
- ep = q->offset + eloc;
- DEBUG_PRINTF("escape found at %llu\n", ep);
- assert(ep >= sp);
- }
-
- assert(sp <= ep);
-
- if (mode == STOP_AT_MATCH) {
- size_t mloc;
- if (lbrFindMatch(l, sp, ep, lstate, q->streamState, &mloc)) {
- DEBUG_PRINTF("storing match at %llu\n", sp + mloc);
- q->cur--;
- assert(q->cur < MAX_MQE_LEN);
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = (s64a)(sp - q->offset) + mloc;
- return MO_MATCHES_PENDING;
- }
- } else {
- assert(mode == CALLBACK_OUTPUT);
- char rv = lbrMatchLoop(l, sp, ep, lstate, q->streamState, q->cb,
- q->context);
- if (rv == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- assert(rv == MO_CONTINUE_MATCHING);
- }
-
- if (escape_found) {
- DEBUG_PRINTF("clearing repeat due to escape\n");
- clearRepeat(info, lstate);
- }
- }
-
- scan_done:
- if (q_cur_loc(q) > end) {
- q->cur--;
- assert(q->cur < MAX_MQE_LEN);
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = end;
- return MO_ALIVE;
- }
-
- if (repeatIsDead(info, lstate)) {
- if (!JOIN(ENGINE_EXEC_NAME, _TopScan)(nfa, q, end)) {
- assert(repeatIsDead(info, lstate));
- if (q->cur < q->end && q_cur_loc(q) > end) {
- q->cur--;
- assert(q->cur < MAX_MQE_LEN);
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = end;
- return MO_ALIVE;
- }
- return 0;
- }
- DEBUG_PRINTF("cur offset = %llu\n", q_cur_offset(q));
- } else {
- switch (q_cur_type(q)) {
- case MQE_TOP:
- case MQE_TOP_FIRST:
- lbrTop(l, lstate, q->streamState, q_cur_offset(q));
- break;
- case MQE_START:
- case MQE_END:
- break;
- default:
- DEBUG_PRINTF("unhandled event %d!\n", q_cur_type(q));
- assert(0);
- break;
- }
- }
-
- sp = q_cur_offset(q);
- q->cur++;
- }
-
- return lbrIsAlive(l, lstate, q->streamState, sp);
-}
-
-char JOIN(ENGINE_EXEC_NAME, _Q)(const struct NFA *nfa, struct mq *q, s64a end) {
- DEBUG_PRINTF("entry, offset=%llu, end=%lld\n", q->offset, end);
- return JOIN(ENGINE_EXEC_NAME, _Q_i)(nfa, q, end, CALLBACK_OUTPUT);
-}
-
-char JOIN(ENGINE_EXEC_NAME, _Q2)(const struct NFA *nfa, struct mq *q, s64a end) {
- DEBUG_PRINTF("entry, offset=%llu, end=%lld\n", q->offset, end);
- return JOIN(ENGINE_EXEC_NAME, _Q_i)(nfa, q, end, STOP_AT_MATCH);
-}
-
-static really_inline
-void JOIN(ENGINE_EXEC_NAME, _StreamSilent)(const struct NFA *nfa, struct mq *q,
- const u8 *buf, size_t length) {
- const struct lbr_common *l = getImplNfa(nfa);
- const struct RepeatInfo *info = getRepeatInfo(l);
- struct lbr_state *lstate = (struct lbr_state *)q->state;
- assert(ISALIGNED(lstate));
-
- assert(!repeatIsDead(info, lstate));
-
- // This call doesn't produce matches, so we elide the lbrMatchLoop call
- // entirely and just do escape scans to maintain the repeat.
-
- size_t eloc = 0;
- char escaped = FWDSCAN_FN(nfa, buf, 0, length, &eloc);
- if (escaped) {
- assert(eloc < length);
- DEBUG_PRINTF("escape found at %zu, clearing repeat\n", eloc);
- clearRepeat(info, lstate);
- }
-}
-
-// Rose infix path.
-char JOIN(ENGINE_EXEC_NAME, _QR)(const struct NFA *nfa, struct mq *q,
- ReportID report) {
- assert(nfa && q);
- assert(isLbrType(nfa->type));
-
- if (q->cur == q->end) {
- return 1;
- }
-
- assert(q->cur + 1 < q->end); /* require at least two items */
- assert(q_cur_type(q) == MQE_START);
- u64a sp = q_cur_offset(q);
- q->cur++;
- DEBUG_PRINTF("sp=%llu\n", sp);
-
- const struct lbr_common *l = getImplNfa(nfa);
- const struct RepeatInfo *info = getRepeatInfo(l);
- struct lbr_state *lstate = (struct lbr_state *)q->state;
- assert(ISALIGNED(lstate));
- const s64a lastLoc = q_last_loc(q);
-
- while (q->cur < q->end) {
- DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
- q_cur_offset(q));
-
- if (repeatIsDead(info, lstate)) {
- DEBUG_PRINTF("repeat is dead\n");
- goto scan_done;
- }
-
- u64a ep = q_cur_offset(q);
-
- if (sp < q->offset) {
- DEBUG_PRINTF("HISTORY BUFFER SCAN\n");
- assert(q->offset - sp <= q->hlength);
- u64a local_ep = MIN(q->offset, ep);
- const u8 *ptr = q->history + q->hlength + sp - q->offset;
- JOIN(ENGINE_EXEC_NAME, _StreamSilent)(nfa, q, ptr, local_ep - sp);
- sp = local_ep;
- }
-
- if (repeatIsDead(info, lstate)) {
- DEBUG_PRINTF("repeat is dead\n");
- goto scan_done;
- }
-
- if (sp < ep) {
- DEBUG_PRINTF("MAIN BUFFER SCAN\n");
- assert(ep - q->offset <= q->length);
- const u8 *ptr = q->buffer + sp - q->offset;
- JOIN(ENGINE_EXEC_NAME, _StreamSilent)(nfa, q, ptr, ep - sp);
- }
-
- if (repeatIsDead(info, lstate)) {
-scan_done:
- if (!JOIN(ENGINE_EXEC_NAME, _TopScan)(nfa, q, lastLoc)) {
- assert(repeatIsDead(info, lstate));
- assert(q->cur == q->end);
- return 0;
- }
- } else {
- switch (q_cur_type(q)) {
- case MQE_TOP:
- case MQE_TOP_FIRST:
- lbrTop(l, lstate, q->streamState, q_cur_offset(q));
- break;
- case MQE_START:
- case MQE_END:
- break;
- default:
- DEBUG_PRINTF("unhandled event %d!\n", q_cur_type(q));
- assert(0);
- break;
- }
- }
-
- sp = q_cur_offset(q);
- q->cur++;
- }
-
- if (repeatIsDead(info, lstate)) {
- DEBUG_PRINTF("repeat is dead\n");
- return 0;
- }
-
- if (lbrInAccept(l, lstate, q->streamState, sp, report)) {
- return MO_MATCHES_PENDING;
- }
-
- return lbrIsActive(l, lstate, q->streamState, sp);
-}
-
-#undef ENGINE_EXEC_NAME
-#undef EXEC_FN
-#undef FWDSCAN_FN
-#undef REVSCAN_FN
-#undef ENGINE_ROOT_NAME
+ q->report_current = 0;
+ if (rv == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+
+ if (q->cur == q->end) {
+ return 1;
+ }
+
+ assert(q->cur + 1 < q->end); /* require at least two items */
+ assert(q_cur_type(q) == MQE_START);
+ u64a sp = q_cur_offset(q);
+ q->cur++;
+ DEBUG_PRINTF("sp=%llu, abs_end=%llu\n", sp, end + q->offset);
+
+ while (q->cur < q->end) {
+ DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
+ q_cur_offset(q));
+
+ assert(sp >= q->offset); // not in history
+
+ if (repeatIsDead(info, lstate)) {
+ DEBUG_PRINTF("repeat is currently dead, skipping scan\n");
+ goto scan_done;
+ }
+
+ u64a ep = q_cur_offset(q);
+ ep = MIN(ep, q->offset + end);
+ if (sp < ep) {
+ size_t eloc = 0;
+ char escape_found = 0;
+ DEBUG_PRINTF("scanning from sp=%llu to ep=%llu\n", sp, ep);
+ assert(sp >= q->offset && ep >= q->offset);
+ if (FWDSCAN_FN(nfa, q->buffer, sp - q->offset, ep - q->offset, &eloc)) {
+ escape_found = 1;
+ ep = q->offset + eloc;
+ DEBUG_PRINTF("escape found at %llu\n", ep);
+ assert(ep >= sp);
+ }
+
+ assert(sp <= ep);
+
+ if (mode == STOP_AT_MATCH) {
+ size_t mloc;
+ if (lbrFindMatch(l, sp, ep, lstate, q->streamState, &mloc)) {
+ DEBUG_PRINTF("storing match at %llu\n", sp + mloc);
+ q->cur--;
+ assert(q->cur < MAX_MQE_LEN);
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = (s64a)(sp - q->offset) + mloc;
+ return MO_MATCHES_PENDING;
+ }
+ } else {
+ assert(mode == CALLBACK_OUTPUT);
+ char rv = lbrMatchLoop(l, sp, ep, lstate, q->streamState, q->cb,
+ q->context);
+ if (rv == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ assert(rv == MO_CONTINUE_MATCHING);
+ }
+
+ if (escape_found) {
+ DEBUG_PRINTF("clearing repeat due to escape\n");
+ clearRepeat(info, lstate);
+ }
+ }
+
+ scan_done:
+ if (q_cur_loc(q) > end) {
+ q->cur--;
+ assert(q->cur < MAX_MQE_LEN);
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ return MO_ALIVE;
+ }
+
+ if (repeatIsDead(info, lstate)) {
+ if (!JOIN(ENGINE_EXEC_NAME, _TopScan)(nfa, q, end)) {
+ assert(repeatIsDead(info, lstate));
+ if (q->cur < q->end && q_cur_loc(q) > end) {
+ q->cur--;
+ assert(q->cur < MAX_MQE_LEN);
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ return MO_ALIVE;
+ }
+ return 0;
+ }
+ DEBUG_PRINTF("cur offset = %llu\n", q_cur_offset(q));
+ } else {
+ switch (q_cur_type(q)) {
+ case MQE_TOP:
+ case MQE_TOP_FIRST:
+ lbrTop(l, lstate, q->streamState, q_cur_offset(q));
+ break;
+ case MQE_START:
+ case MQE_END:
+ break;
+ default:
+ DEBUG_PRINTF("unhandled event %d!\n", q_cur_type(q));
+ assert(0);
+ break;
+ }
+ }
+
+ sp = q_cur_offset(q);
+ q->cur++;
+ }
+
+ return lbrIsAlive(l, lstate, q->streamState, sp);
+}
+
+char JOIN(ENGINE_EXEC_NAME, _Q)(const struct NFA *nfa, struct mq *q, s64a end) {
+ DEBUG_PRINTF("entry, offset=%llu, end=%lld\n", q->offset, end);
+ return JOIN(ENGINE_EXEC_NAME, _Q_i)(nfa, q, end, CALLBACK_OUTPUT);
+}
+
+char JOIN(ENGINE_EXEC_NAME, _Q2)(const struct NFA *nfa, struct mq *q, s64a end) {
+ DEBUG_PRINTF("entry, offset=%llu, end=%lld\n", q->offset, end);
+ return JOIN(ENGINE_EXEC_NAME, _Q_i)(nfa, q, end, STOP_AT_MATCH);
+}
+
+static really_inline
+void JOIN(ENGINE_EXEC_NAME, _StreamSilent)(const struct NFA *nfa, struct mq *q,
+ const u8 *buf, size_t length) {
+ const struct lbr_common *l = getImplNfa(nfa);
+ const struct RepeatInfo *info = getRepeatInfo(l);
+ struct lbr_state *lstate = (struct lbr_state *)q->state;
+ assert(ISALIGNED(lstate));
+
+ assert(!repeatIsDead(info, lstate));
+
+ // This call doesn't produce matches, so we elide the lbrMatchLoop call
+ // entirely and just do escape scans to maintain the repeat.
+
+ size_t eloc = 0;
+ char escaped = FWDSCAN_FN(nfa, buf, 0, length, &eloc);
+ if (escaped) {
+ assert(eloc < length);
+ DEBUG_PRINTF("escape found at %zu, clearing repeat\n", eloc);
+ clearRepeat(info, lstate);
+ }
+}
+
+// Rose infix path.
+char JOIN(ENGINE_EXEC_NAME, _QR)(const struct NFA *nfa, struct mq *q,
+ ReportID report) {
+ assert(nfa && q);
+ assert(isLbrType(nfa->type));
+
+ if (q->cur == q->end) {
+ return 1;
+ }
+
+ assert(q->cur + 1 < q->end); /* require at least two items */
+ assert(q_cur_type(q) == MQE_START);
+ u64a sp = q_cur_offset(q);
+ q->cur++;
+ DEBUG_PRINTF("sp=%llu\n", sp);
+
+ const struct lbr_common *l = getImplNfa(nfa);
+ const struct RepeatInfo *info = getRepeatInfo(l);
+ struct lbr_state *lstate = (struct lbr_state *)q->state;
+ assert(ISALIGNED(lstate));
+ const s64a lastLoc = q_last_loc(q);
+
+ while (q->cur < q->end) {
+ DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
+ q_cur_offset(q));
+
+ if (repeatIsDead(info, lstate)) {
+ DEBUG_PRINTF("repeat is dead\n");
+ goto scan_done;
+ }
+
+ u64a ep = q_cur_offset(q);
+
+ if (sp < q->offset) {
+ DEBUG_PRINTF("HISTORY BUFFER SCAN\n");
+ assert(q->offset - sp <= q->hlength);
+ u64a local_ep = MIN(q->offset, ep);
+ const u8 *ptr = q->history + q->hlength + sp - q->offset;
+ JOIN(ENGINE_EXEC_NAME, _StreamSilent)(nfa, q, ptr, local_ep - sp);
+ sp = local_ep;
+ }
+
+ if (repeatIsDead(info, lstate)) {
+ DEBUG_PRINTF("repeat is dead\n");
+ goto scan_done;
+ }
+
+ if (sp < ep) {
+ DEBUG_PRINTF("MAIN BUFFER SCAN\n");
+ assert(ep - q->offset <= q->length);
+ const u8 *ptr = q->buffer + sp - q->offset;
+ JOIN(ENGINE_EXEC_NAME, _StreamSilent)(nfa, q, ptr, ep - sp);
+ }
+
+ if (repeatIsDead(info, lstate)) {
+scan_done:
+ if (!JOIN(ENGINE_EXEC_NAME, _TopScan)(nfa, q, lastLoc)) {
+ assert(repeatIsDead(info, lstate));
+ assert(q->cur == q->end);
+ return 0;
+ }
+ } else {
+ switch (q_cur_type(q)) {
+ case MQE_TOP:
+ case MQE_TOP_FIRST:
+ lbrTop(l, lstate, q->streamState, q_cur_offset(q));
+ break;
+ case MQE_START:
+ case MQE_END:
+ break;
+ default:
+ DEBUG_PRINTF("unhandled event %d!\n", q_cur_type(q));
+ assert(0);
+ break;
+ }
+ }
+
+ sp = q_cur_offset(q);
+ q->cur++;
+ }
+
+ if (repeatIsDead(info, lstate)) {
+ DEBUG_PRINTF("repeat is dead\n");
+ return 0;
+ }
+
+ if (lbrInAccept(l, lstate, q->streamState, sp, report)) {
+ return MO_MATCHES_PENDING;
+ }
+
+ return lbrIsActive(l, lstate, q->streamState, sp);
+}
+
+#undef ENGINE_EXEC_NAME
+#undef EXEC_FN
+#undef FWDSCAN_FN
+#undef REVSCAN_FN
+#undef ENGINE_ROOT_NAME
diff --git a/contrib/libs/hyperscan/src/nfa/lbr_internal.h b/contrib/libs/hyperscan/src/nfa/lbr_internal.h
index 51bf42865c..8ba11dd4d2 100644
--- a/contrib/libs/hyperscan/src/nfa/lbr_internal.h
+++ b/contrib/libs/hyperscan/src/nfa/lbr_internal.h
@@ -1,82 +1,82 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Large Bounded Repeat (LBR): data structures.
- */
-
-#ifndef LBR_INTERNAL_H
-#define LBR_INTERNAL_H
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-#include "repeat_internal.h"
-
-/** \brief Common LBR header. */
-struct lbr_common {
- u32 repeatInfoOffset; //!< offset of RepeatInfo structure relative
- // to the start of lbr_common
- ReportID report; //!< report to raise on match
-};
-
-struct lbr_dot {
- struct lbr_common common;
-};
-
-struct lbr_verm {
- struct lbr_common common;
- char c; //!< escape char
-};
-
-struct lbr_shuf {
- struct lbr_common common;
- m128 mask_lo; //!< shufti lo mask for escape chars
- m128 mask_hi; //!< shufti hi mask for escape chars
-};
-
-struct lbr_truf {
- struct lbr_common common;
- m128 mask1;
- m128 mask2;
-};
-
-/** \brief Uncompressed ("full") state structure used by the LBR. This is
- * stored in scratch, not in stream state. */
-struct lbr_state {
- u64a lastEscape; //!< \brief offset of last escape seen.
- union RepeatControl ctrl; //!< \brief repeat control block. */
-};
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // LBR_INTERNAL_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Large Bounded Repeat (LBR): data structures.
+ */
+
+#ifndef LBR_INTERNAL_H
+#define LBR_INTERNAL_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "repeat_internal.h"
+
+/** \brief Common LBR header. */
+struct lbr_common {
+ u32 repeatInfoOffset; //!< offset of RepeatInfo structure relative
+ // to the start of lbr_common
+ ReportID report; //!< report to raise on match
+};
+
+struct lbr_dot {
+ struct lbr_common common;
+};
+
+struct lbr_verm {
+ struct lbr_common common;
+ char c; //!< escape char
+};
+
+struct lbr_shuf {
+ struct lbr_common common;
+ m128 mask_lo; //!< shufti lo mask for escape chars
+ m128 mask_hi; //!< shufti hi mask for escape chars
+};
+
+struct lbr_truf {
+ struct lbr_common common;
+ m128 mask1;
+ m128 mask2;
+};
+
+/** \brief Uncompressed ("full") state structure used by the LBR. This is
+ * stored in scratch, not in stream state. */
+struct lbr_state {
+ u64a lastEscape; //!< \brief offset of last escape seen.
+ union RepeatControl ctrl; //!< \brief repeat control block. */
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // LBR_INTERNAL_H
diff --git a/contrib/libs/hyperscan/src/nfa/limex.h b/contrib/libs/hyperscan/src/nfa/limex.h
index 0c9e276816..0223604dae 100644
--- a/contrib/libs/hyperscan/src/nfa/limex.h
+++ b/contrib/libs/hyperscan/src/nfa/limex.h
@@ -1,91 +1,91 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef LIMEX_H
-#define LIMEX_H
-
-#ifdef __cplusplus
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef LIMEX_H
+#define LIMEX_H
+
+#ifdef __cplusplus
#include <string>
-extern "C"
-{
-#endif
-
-#include "nfa_api.h"
-
-#if defined(DUMP_SUPPORT) && defined(__cplusplus)
-#define GENERATE_NFA_DUMP_DECL(gf_name) \
- } /* extern "C" */ \
- namespace ue2 { \
+extern "C"
+{
+#endif
+
+#include "nfa_api.h"
+
+#if defined(DUMP_SUPPORT) && defined(__cplusplus)
+#define GENERATE_NFA_DUMP_DECL(gf_name) \
+ } /* extern "C" */ \
+ namespace ue2 { \
void gf_name##_dump(const struct NFA *nfa, const std::string &base); \
- } /* namespace ue2 */ \
- extern "C" {
-
-#else
-#define GENERATE_NFA_DUMP_DECL(gf_name)
-#endif
-
-#define GENERATE_NFA_DECL(gf_name) \
- char gf_name##_testEOD(const struct NFA *nfa, const char *state, \
- const char *streamState, u64a offset, \
+ } /* namespace ue2 */ \
+ extern "C" {
+
+#else
+#define GENERATE_NFA_DUMP_DECL(gf_name)
+#endif
+
+#define GENERATE_NFA_DECL(gf_name) \
+ char gf_name##_testEOD(const struct NFA *nfa, const char *state, \
+ const char *streamState, u64a offset, \
NfaCallback callback, void *context); \
- char gf_name##_Q(const struct NFA *n, struct mq *q, s64a end); \
- char gf_name##_Q2(const struct NFA *n, struct mq *q, s64a end); \
- char gf_name##_QR(const struct NFA *n, struct mq *q, ReportID report); \
- char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \
- char gf_name##_inAccept(const struct NFA *n, ReportID report, \
- struct mq *q); \
+ char gf_name##_Q(const struct NFA *n, struct mq *q, s64a end); \
+ char gf_name##_Q2(const struct NFA *n, struct mq *q, s64a end); \
+ char gf_name##_QR(const struct NFA *n, struct mq *q, ReportID report); \
+ char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \
+ char gf_name##_inAccept(const struct NFA *n, ReportID report, \
+ struct mq *q); \
char gf_name##_inAnyAccept(const struct NFA *n, struct mq *q); \
- char gf_name##_queueInitState(const struct NFA *n, struct mq *q); \
- char gf_name##_initCompressedState(const struct NFA *n, u64a offset, \
- void *state, u8 key); \
- char gf_name##_B_Reverse(const struct NFA *n, u64a offset, const u8 *buf, \
- size_t buflen, const u8 *hbuf, size_t hlen, \
+ char gf_name##_queueInitState(const struct NFA *n, struct mq *q); \
+ char gf_name##_initCompressedState(const struct NFA *n, u64a offset, \
+ void *state, u8 key); \
+ char gf_name##_B_Reverse(const struct NFA *n, u64a offset, const u8 *buf, \
+ size_t buflen, const u8 *hbuf, size_t hlen, \
NfaCallback cb, void *context); \
- char gf_name##_queueCompressState(const struct NFA *nfa, \
- const struct mq *q, s64a loc); \
- char gf_name##_expandState(const struct NFA *nfa, void *dest, \
- const void *src, u64a offset, u8 key); \
+ char gf_name##_queueCompressState(const struct NFA *nfa, \
+ const struct mq *q, s64a loc); \
+ char gf_name##_expandState(const struct NFA *nfa, void *dest, \
+ const void *src, u64a offset, u8 key); \
enum nfa_zombie_status gf_name##_zombie_status(const struct NFA *nfa, \
struct mq *q, s64a loc); \
- GENERATE_NFA_DUMP_DECL(gf_name)
-
+ GENERATE_NFA_DUMP_DECL(gf_name)
+
GENERATE_NFA_DECL(nfaExecLimEx32)
GENERATE_NFA_DECL(nfaExecLimEx64)
GENERATE_NFA_DECL(nfaExecLimEx128)
GENERATE_NFA_DECL(nfaExecLimEx256)
GENERATE_NFA_DECL(nfaExecLimEx384)
GENERATE_NFA_DECL(nfaExecLimEx512)
-
-#undef GENERATE_NFA_DECL
-#undef GENERATE_NFA_DUMP_DECL
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+
+#undef GENERATE_NFA_DECL
+#undef GENERATE_NFA_DUMP_DECL
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/limex_accel.c b/contrib/libs/hyperscan/src/nfa/limex_accel.c
index 8553044d1a..4834b6a547 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_accel.c
+++ b/contrib/libs/hyperscan/src/nfa/limex_accel.c
@@ -1,85 +1,85 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Limex NFA: acceleration runtime.
- */
-
-#include "limex_accel.h"
-
-#include "accel.h"
-#include "limex_internal.h"
-#include "limex_limits.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Limex NFA: acceleration runtime.
+ */
+
+#include "limex_accel.h"
+
+#include "accel.h"
+#include "limex_internal.h"
+#include "limex_limits.h"
#include "limex_shuffle.h"
-#include "nfa_internal.h"
-#include "shufti.h"
-#include "truffle.h"
-#include "ue2common.h"
-#include "vermicelli.h"
+#include "nfa_internal.h"
+#include "shufti.h"
+#include "truffle.h"
+#include "ue2common.h"
+#include "vermicelli.h"
#include "util/arch.h"
-#include "util/bitutils.h"
-#include "util/simd_utils.h"
-
-static really_inline
-size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
- const u8 *input, u32 idx, size_t i, size_t end) {
- assert(accelTable);
- assert(aux);
-
- DEBUG_PRINTF("shuffle returned %u -> aux %u\n", idx, accelTable[idx]);
- assert(idx < (1 << NFA_MAX_ACCEL_STATES));
- if (!idx) {
- return end;
- }
-
- u8 aux_idx = accelTable[idx];
- if (!aux_idx) {
- assert(aux[0].accel_type == ACCEL_NONE);
- DEBUG_PRINTF("no accel, bailing\n");
- return i;
- }
-
- aux = aux + aux_idx;
+#include "util/bitutils.h"
+#include "util/simd_utils.h"
+
+static really_inline
+size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux,
+ const u8 *input, u32 idx, size_t i, size_t end) {
+ assert(accelTable);
+ assert(aux);
+
+ DEBUG_PRINTF("shuffle returned %u -> aux %u\n", idx, accelTable[idx]);
+ assert(idx < (1 << NFA_MAX_ACCEL_STATES));
+ if (!idx) {
+ return end;
+ }
+
+ u8 aux_idx = accelTable[idx];
+ if (!aux_idx) {
+ assert(aux[0].accel_type == ACCEL_NONE);
+ DEBUG_PRINTF("no accel, bailing\n");
+ return i;
+ }
+
+ aux = aux + aux_idx;
const u8 *ptr = run_accel(aux, &input[i], &input[end]);
- assert(ptr >= &input[i]);
- size_t j = (size_t)(ptr - input);
- DEBUG_PRINTF("accel skipped %zu of %zu chars\n", (j - i), (end - i));
- DEBUG_PRINTF("returning j=%zu (i=%zu, end=%zu)\n", j, i, end);
- return j;
-}
-
-size_t doAccel32(u32 s, u32 accel, const u8 *accelTable,
- const union AccelAux *aux, const u8 *input, size_t i,
- size_t end) {
+ assert(ptr >= &input[i]);
+ size_t j = (size_t)(ptr - input);
+ DEBUG_PRINTF("accel skipped %zu of %zu chars\n", (j - i), (end - i));
+ DEBUG_PRINTF("returning j=%zu (i=%zu, end=%zu)\n", j, i, end);
+ return j;
+}
+
+size_t doAccel32(u32 s, u32 accel, const u8 *accelTable,
+ const union AccelAux *aux, const u8 *input, size_t i,
+ size_t end) {
u32 idx = pext32(s, accel);
- return accelScanWrapper(accelTable, aux, input, idx, i, end);
-}
-
+ return accelScanWrapper(accelTable, aux, input, idx, i, end);
+}
+
#ifdef ARCH_64_BIT
size_t doAccel64(u64a s, u64a accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
@@ -96,61 +96,61 @@ size_t doAccel64(m128 s, m128 accel, const u8 *accelTable,
}
#endif
-size_t doAccel128(const m128 *state, const struct LimExNFA128 *limex,
- const u8 *accelTable, const union AccelAux *aux,
- const u8 *input, size_t i, size_t end) {
- u32 idx;
- m128 s = *state;
- DEBUG_PRINTF("using PSHUFB for 128-bit shuffle\n");
- m128 accelPerm = limex->accelPermute;
- m128 accelComp = limex->accelCompare;
+size_t doAccel128(const m128 *state, const struct LimExNFA128 *limex,
+ const u8 *accelTable, const union AccelAux *aux,
+ const u8 *input, size_t i, size_t end) {
+ u32 idx;
+ m128 s = *state;
+ DEBUG_PRINTF("using PSHUFB for 128-bit shuffle\n");
+ m128 accelPerm = limex->accelPermute;
+ m128 accelComp = limex->accelCompare;
idx = packedExtract128(s, accelPerm, accelComp);
- return accelScanWrapper(accelTable, aux, input, idx, i, end);
-}
-
-size_t doAccel256(const m256 *state, const struct LimExNFA256 *limex,
- const u8 *accelTable, const union AccelAux *aux,
- const u8 *input, size_t i, size_t end) {
- u32 idx;
- m256 s = *state;
- DEBUG_PRINTF("using PSHUFB for 256-bit shuffle\n");
- m256 accelPerm = limex->accelPermute;
- m256 accelComp = limex->accelCompare;
+ return accelScanWrapper(accelTable, aux, input, idx, i, end);
+}
+
+size_t doAccel256(const m256 *state, const struct LimExNFA256 *limex,
+ const u8 *accelTable, const union AccelAux *aux,
+ const u8 *input, size_t i, size_t end) {
+ u32 idx;
+ m256 s = *state;
+ DEBUG_PRINTF("using PSHUFB for 256-bit shuffle\n");
+ m256 accelPerm = limex->accelPermute;
+ m256 accelComp = limex->accelCompare;
#if !defined(HAVE_AVX2)
u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo);
u32 idx2 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi);
assert((idx1 & idx2) == 0); // should be no shared bits
idx = idx1 | idx2;
-#else
+#else
idx = packedExtract256(s, accelPerm, accelComp);
-#endif
- return accelScanWrapper(accelTable, aux, input, idx, i, end);
-}
-
-size_t doAccel384(const m384 *state, const struct LimExNFA384 *limex,
- const u8 *accelTable, const union AccelAux *aux,
- const u8 *input, size_t i, size_t end) {
- u32 idx;
- m384 s = *state;
- DEBUG_PRINTF("using PSHUFB for 384-bit shuffle\n");
- m384 accelPerm = limex->accelPermute;
- m384 accelComp = limex->accelCompare;
+#endif
+ return accelScanWrapper(accelTable, aux, input, idx, i, end);
+}
+
+size_t doAccel384(const m384 *state, const struct LimExNFA384 *limex,
+ const u8 *accelTable, const union AccelAux *aux,
+ const u8 *input, size_t i, size_t end) {
+ u32 idx;
+ m384 s = *state;
+ DEBUG_PRINTF("using PSHUFB for 384-bit shuffle\n");
+ m384 accelPerm = limex->accelPermute;
+ m384 accelComp = limex->accelCompare;
u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo);
u32 idx2 = packedExtract128(s.mid, accelPerm.mid, accelComp.mid);
u32 idx3 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi);
- assert((idx1 & idx2 & idx3) == 0); // should be no shared bits
- idx = idx1 | idx2 | idx3;
- return accelScanWrapper(accelTable, aux, input, idx, i, end);
-}
-
-size_t doAccel512(const m512 *state, const struct LimExNFA512 *limex,
- const u8 *accelTable, const union AccelAux *aux,
- const u8 *input, size_t i, size_t end) {
- u32 idx;
- m512 s = *state;
- DEBUG_PRINTF("using PSHUFB for 512-bit shuffle\n");
- m512 accelPerm = limex->accelPermute;
- m512 accelComp = limex->accelCompare;
+ assert((idx1 & idx2 & idx3) == 0); // should be no shared bits
+ idx = idx1 | idx2 | idx3;
+ return accelScanWrapper(accelTable, aux, input, idx, i, end);
+}
+
+size_t doAccel512(const m512 *state, const struct LimExNFA512 *limex,
+ const u8 *accelTable, const union AccelAux *aux,
+ const u8 *input, size_t i, size_t end) {
+ u32 idx;
+ m512 s = *state;
+ DEBUG_PRINTF("using PSHUFB for 512-bit shuffle\n");
+ m512 accelPerm = limex->accelPermute;
+ m512 accelComp = limex->accelCompare;
#if defined(HAVE_AVX512)
idx = packedExtract512(s, accelPerm, accelComp);
#elif defined(HAVE_AVX2)
@@ -158,13 +158,13 @@ size_t doAccel512(const m512 *state, const struct LimExNFA512 *limex,
u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi);
assert((idx1 & idx2) == 0); // should be no shared bits
idx = idx1 | idx2;
-#else
+#else
u32 idx1 = packedExtract128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo);
u32 idx2 = packedExtract128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi);
u32 idx3 = packedExtract128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo);
u32 idx4 = packedExtract128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi);
- assert((idx1 & idx2 & idx3 & idx4) == 0); // should be no shared bits
- idx = idx1 | idx2 | idx3 | idx4;
+ assert((idx1 & idx2 & idx3 & idx4) == 0); // should be no shared bits
+ idx = idx1 | idx2 | idx3 | idx4;
#endif
- return accelScanWrapper(accelTable, aux, input, idx, i, end);
-}
+ return accelScanWrapper(accelTable, aux, input, idx, i, end);
+}
diff --git a/contrib/libs/hyperscan/src/nfa/limex_accel.h b/contrib/libs/hyperscan/src/nfa/limex_accel.h
index 5c3f379cbf..e5c94e82ad 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_accel.h
+++ b/contrib/libs/hyperscan/src/nfa/limex_accel.h
@@ -1,55 +1,55 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Limex NFA: acceleration runtime.
- *
- * For the SIMD types (128 bits and above), we pass a pointer to the
- * implementation NFA structure instead of three masks: otherwise we spend all
- * our time building stack frames.
- */
-
-#ifndef LIMEX_ACCEL_H
-#define LIMEX_ACCEL_H
-
-#include "util/simd_utils.h" // for m128 etc
-
-union AccelAux;
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Limex NFA: acceleration runtime.
+ *
+ * For the SIMD types (128 bits and above), we pass a pointer to the
+ * implementation NFA structure instead of three masks: otherwise we spend all
+ * our time building stack frames.
+ */
+
+#ifndef LIMEX_ACCEL_H
+#define LIMEX_ACCEL_H
+
+#include "util/simd_utils.h" // for m128 etc
+
+union AccelAux;
struct LimExNFA64;
-struct LimExNFA128;
-struct LimExNFA256;
-struct LimExNFA384;
-struct LimExNFA512;
-
-size_t doAccel32(u32 s, u32 accel, const u8 *accelTable,
- const union AccelAux *aux, const u8 *input, size_t i,
- size_t end);
-
+struct LimExNFA128;
+struct LimExNFA256;
+struct LimExNFA384;
+struct LimExNFA512;
+
+size_t doAccel32(u32 s, u32 accel, const u8 *accelTable,
+ const union AccelAux *aux, const u8 *input, size_t i,
+ size_t end);
+
#ifdef ARCH_64_BIT
size_t doAccel64(u64a s, u64a accel, const u8 *accelTable,
const union AccelAux *aux, const u8 *input, size_t i,
@@ -60,20 +60,20 @@ size_t doAccel64(m128 s, m128 accel, const u8 *accelTable,
size_t end);
#endif
-size_t doAccel128(const m128 *s, const struct LimExNFA128 *limex,
- const u8 *accelTable, const union AccelAux *aux,
- const u8 *input, size_t i, size_t end);
-
-size_t doAccel256(const m256 *s, const struct LimExNFA256 *limex,
- const u8 *accelTable, const union AccelAux *aux,
- const u8 *input, size_t i, size_t end);
-
-size_t doAccel384(const m384 *s, const struct LimExNFA384 *limex,
- const u8 *accelTable, const union AccelAux *aux,
- const u8 *input, size_t i, size_t end);
-
-size_t doAccel512(const m512 *s, const struct LimExNFA512 *limex,
- const u8 *accelTable, const union AccelAux *aux,
- const u8 *input, size_t i, size_t end);
-
-#endif
+size_t doAccel128(const m128 *s, const struct LimExNFA128 *limex,
+ const u8 *accelTable, const union AccelAux *aux,
+ const u8 *input, size_t i, size_t end);
+
+size_t doAccel256(const m256 *s, const struct LimExNFA256 *limex,
+ const u8 *accelTable, const union AccelAux *aux,
+ const u8 *input, size_t i, size_t end);
+
+size_t doAccel384(const m384 *s, const struct LimExNFA384 *limex,
+ const u8 *accelTable, const union AccelAux *aux,
+ const u8 *input, size_t i, size_t end);
+
+size_t doAccel512(const m512 *s, const struct LimExNFA512 *limex,
+ const u8 *accelTable, const union AccelAux *aux,
+ const u8 *input, size_t i, size_t end);
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/limex_common_impl.h b/contrib/libs/hyperscan/src/nfa/limex_common_impl.h
index 2b429aeb04..e441945d70 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_common_impl.h
+++ b/contrib/libs/hyperscan/src/nfa/limex_common_impl.h
@@ -1,66 +1,66 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "repeat.h"
-#include "util/join.h"
-
-/* impl of limex functions which depend only on state size */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "repeat.h"
+#include "util/join.h"
+
+/* impl of limex functions which depend only on state size */
+
#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) \
|| !defined(INLINE_ATTR)
# error Must define SIZE, STATE_T, LOAD_FROM_ENG and INLINE_ATTR in includer.
-#endif
-
-#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE)
-
-#define TESTEOD_FN JOIN(moNfaTestEod, SIZE)
-#define LIMEX_INACCEPT_FN JOIN(limexInAccept, SIZE)
+#endif
+
+#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE)
+
+#define TESTEOD_FN JOIN(moNfaTestEod, SIZE)
+#define LIMEX_INACCEPT_FN JOIN(limexInAccept, SIZE)
#define LIMEX_INANYACCEPT_FN JOIN(limexInAnyAccept, SIZE)
-#define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE)
-#define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE)
-#define INITIAL_FN JOIN(moNfaInitial, SIZE)
-#define TOP_FN JOIN(moNfaTop, SIZE)
-#define TOPN_FN JOIN(moNfaTopN, SIZE)
+#define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE)
+#define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE)
+#define INITIAL_FN JOIN(moNfaInitial, SIZE)
+#define TOP_FN JOIN(moNfaTop, SIZE)
+#define TOPN_FN JOIN(moNfaTopN, SIZE)
#define PROCESS_ACCEPTS_IMPL_FN JOIN(moProcessAcceptsImpl, SIZE)
-#define PROCESS_ACCEPTS_FN JOIN(moProcessAccepts, SIZE)
-#define PROCESS_ACCEPTS_NOSQUASH_FN JOIN(moProcessAcceptsNoSquash, SIZE)
-#define CONTEXT_T JOIN(NFAContext, SIZE)
-#define ONES_STATE JOIN(ones_, STATE_T)
-#define AND_STATE JOIN(and_, STATE_T)
-#define OR_STATE JOIN(or_, STATE_T)
-#define ANDNOT_STATE JOIN(andnot_, STATE_T)
-#define CLEARBIT_STATE JOIN(clearbit_, STATE_T)
-#define TESTBIT_STATE JOIN(testbit_, STATE_T)
-#define ISNONZERO_STATE JOIN(isNonZero_, STATE_T)
-#define ISZERO_STATE JOIN(isZero_, STATE_T)
-#define SQUASH_UNTUG_BR_FN JOIN(lazyTug, SIZE)
-#define GET_NFA_REPEAT_INFO_FN JOIN(getNfaRepeatInfo, SIZE)
-
+#define PROCESS_ACCEPTS_FN JOIN(moProcessAccepts, SIZE)
+#define PROCESS_ACCEPTS_NOSQUASH_FN JOIN(moProcessAcceptsNoSquash, SIZE)
+#define CONTEXT_T JOIN(NFAContext, SIZE)
+#define ONES_STATE JOIN(ones_, STATE_T)
+#define AND_STATE JOIN(and_, STATE_T)
+#define OR_STATE JOIN(or_, STATE_T)
+#define ANDNOT_STATE JOIN(andnot_, STATE_T)
+#define CLEARBIT_STATE JOIN(clearbit_, STATE_T)
+#define TESTBIT_STATE JOIN(testbit_, STATE_T)
+#define ISNONZERO_STATE JOIN(isNonZero_, STATE_T)
+#define ISZERO_STATE JOIN(isZero_, STATE_T)
+#define SQUASH_UNTUG_BR_FN JOIN(lazyTug, SIZE)
+#define GET_NFA_REPEAT_INFO_FN JOIN(getNfaRepeatInfo, SIZE)
+
#if defined(ARCH_64_BIT) && (SIZE >= 64)
#define CHUNK_T u64a
#define FIND_AND_CLEAR_FN findAndClearLSB_64
@@ -75,56 +75,56 @@
#define NUM_STATE_CHUNKS (sizeof(STATE_T) / sizeof(CHUNK_T))
-static really_inline
-void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex,
- const union RepeatControl *repeat_ctrl,
- const char *repeat_state, u64a offset,
- STATE_T *accstate) {
- // switch off cyclic tug-accepts which aren't tuggable right now.
-
- /* TODO: might be nice to work which br to examine based on accstate rather
- * than iterating overall br */
-
- if (!limex->repeatCount) {
- return;
- }
-
- assert(repeat_ctrl);
- assert(repeat_state);
-
- for (u32 i = 0; i < limex->repeatCount; i++) {
- const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
-
- u32 cyclicState = info->cyclicState;
+static really_inline
+void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex,
+ const union RepeatControl *repeat_ctrl,
+ const char *repeat_state, u64a offset,
+ STATE_T *accstate) {
+ // switch off cyclic tug-accepts which aren't tuggable right now.
+
+ /* TODO: might be nice to work which br to examine based on accstate rather
+ * than iterating overall br */
+
+ if (!limex->repeatCount) {
+ return;
+ }
+
+ assert(repeat_ctrl);
+ assert(repeat_state);
+
+ for (u32 i = 0; i < limex->repeatCount; i++) {
+ const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
+
+ u32 cyclicState = info->cyclicState;
if (!TESTBIT_STATE(*accstate, cyclicState)) {
- continue;
- }
-
- DEBUG_PRINTF("repeat %u (cyclic state %u) is active\n", i, cyclicState);
- DEBUG_PRINTF("checking if offset %llu would match\n", offset);
-
- const union RepeatControl *ctrl = repeat_ctrl + i;
- const char *state = repeat_state + info->stateOffset;
- const struct RepeatInfo *repeat = getRepeatInfo(info);
- if (repeatHasMatch(repeat, ctrl, state, offset) != REPEAT_MATCH) {
- DEBUG_PRINTF("not ready to accept yet\n");
- CLEARBIT_STATE(accstate, cyclicState);
- }
- }
-}
-
+ continue;
+ }
+
+ DEBUG_PRINTF("repeat %u (cyclic state %u) is active\n", i, cyclicState);
+ DEBUG_PRINTF("checking if offset %llu would match\n", offset);
+
+ const union RepeatControl *ctrl = repeat_ctrl + i;
+ const char *state = repeat_state + info->stateOffset;
+ const struct RepeatInfo *repeat = getRepeatInfo(info);
+ if (repeatHasMatch(repeat, ctrl, state, offset) != REPEAT_MATCH) {
+ DEBUG_PRINTF("not ready to accept yet\n");
+ CLEARBIT_STATE(accstate, cyclicState);
+ }
+ }
+}
+
static really_inline
char PROCESS_ACCEPTS_IMPL_FN(const IMPL_NFA_T *limex, const STATE_T *s,
STATE_T *squash, const STATE_T *acceptMask,
const struct NFAAccept *acceptTable, u64a offset,
NfaCallback callback, void *context) {
- assert(s);
- assert(limex);
- assert(callback);
-
+ assert(s);
+ assert(limex);
+ assert(callback);
+
const STATE_T accept_mask = *acceptMask;
STATE_T accepts = AND_STATE(*s, accept_mask);
-
+
// Caller must ensure that we have at least one accept state on.
assert(ISNONZERO_STATE(accepts));
@@ -146,23 +146,23 @@ char PROCESS_ACCEPTS_IMPL_FN(const IMPL_NFA_T *limex, const STATE_T *s,
bit + i * (u32)sizeof(chunk) * 8, a->reports, offset);
int rv = limexRunAccept((const char *)limex, a, callback, context,
offset);
- if (unlikely(rv == MO_HALT_MATCHING)) {
- return 1;
- }
+ if (unlikely(rv == MO_HALT_MATCHING)) {
+ return 1;
+ }
if (squash != NULL && a->squash != MO_INVALID_IDX) {
DEBUG_PRINTF("applying squash mask at offset %u\n", a->squash);
const ENG_STATE_T *sq =
(const ENG_STATE_T *)((const char *)limex + a->squash);
*squash = AND_STATE(*squash, LOAD_FROM_ENG(sq));
- }
- }
+ }
+ }
base_index += POPCOUNT_FN(mask_chunks[i]);
- }
-
- return 0;
-}
-
-static never_inline
+ }
+
+ return 0;
+}
+
+static never_inline
char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s,
const STATE_T *acceptMask,
const struct NFAAccept *acceptTable, u64a offset,
@@ -171,10 +171,10 @@ char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s,
STATE_T squash = ONES_STATE;
return PROCESS_ACCEPTS_IMPL_FN(limex, s, &squash, acceptMask, acceptTable,
offset, callback, context);
-
+
*s = AND_STATE(*s, squash);
-}
-
+}
+
static never_inline
char PROCESS_ACCEPTS_NOSQUASH_FN(const IMPL_NFA_T *limex, const STATE_T *s,
const STATE_T *acceptMask,
@@ -188,169 +188,169 @@ char PROCESS_ACCEPTS_NOSQUASH_FN(const IMPL_NFA_T *limex, const STATE_T *s,
// Run EOD accepts. Note that repeat_ctrl and repeat_state may be NULL if this
// LimEx contains no repeat structures.
-static really_inline
-char TESTEOD_FN(const IMPL_NFA_T *limex, const STATE_T *s,
- const union RepeatControl *repeat_ctrl,
+static really_inline
+char TESTEOD_FN(const IMPL_NFA_T *limex, const STATE_T *s,
+ const union RepeatControl *repeat_ctrl,
const char *repeat_state, u64a offset,
- NfaCallback callback, void *context) {
- assert(limex && s);
-
- // There may not be any EOD accepts in this NFA.
- if (!limex->acceptEodCount) {
- return MO_CONTINUE_MATCHING;
- }
-
+ NfaCallback callback, void *context) {
+ assert(limex && s);
+
+ // There may not be any EOD accepts in this NFA.
+ if (!limex->acceptEodCount) {
+ return MO_CONTINUE_MATCHING;
+ }
+
const STATE_T acceptEodMask = LOAD_FROM_ENG(&limex->acceptAtEOD);
STATE_T foundAccepts = AND_STATE(*s, acceptEodMask);
-
+
SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state,
offset + 1 /* EOD 'symbol' */, &foundAccepts);
-
- if (unlikely(ISNONZERO_STATE(foundAccepts))) {
- const struct NFAAccept *acceptEodTable = getAcceptEodTable(limex);
+
+ if (unlikely(ISNONZERO_STATE(foundAccepts))) {
+ const struct NFAAccept *acceptEodTable = getAcceptEodTable(limex);
if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &foundAccepts, &acceptEodMask,
acceptEodTable, offset, callback,
- context)) {
- return MO_HALT_MATCHING;
- }
- }
-
- return MO_CONTINUE_MATCHING;
-}
-
-// Run accepts corresponding to current state.
-static really_inline
-char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) {
- assert(limex && q);
- assert(q->state);
- assert(q_cur_type(q) == MQE_START);
-
+ context)) {
+ return MO_HALT_MATCHING;
+ }
+ }
+
+ return MO_CONTINUE_MATCHING;
+}
+
+// Run accepts corresponding to current state.
+static really_inline
+char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) {
+ assert(limex && q);
+ assert(q->state);
+ assert(q_cur_type(q) == MQE_START);
+
STATE_T s = *(STATE_T *)q->state;
STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept);
- STATE_T foundAccepts = AND_STATE(s, acceptMask);
-
- if (unlikely(ISNONZERO_STATE(foundAccepts))) {
- DEBUG_PRINTF("found accepts\n");
- DEBUG_PRINTF("for nfa %p\n", limex);
- const struct NFAAccept *acceptTable = getAcceptTable(limex);
- u64a offset = q_cur_offset(q);
-
+ STATE_T foundAccepts = AND_STATE(s, acceptMask);
+
+ if (unlikely(ISNONZERO_STATE(foundAccepts))) {
+ DEBUG_PRINTF("found accepts\n");
+ DEBUG_PRINTF("for nfa %p\n", limex);
+ const struct NFAAccept *acceptTable = getAcceptTable(limex);
+ u64a offset = q_cur_offset(q);
+
if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &foundAccepts, &acceptMask,
acceptTable, offset, q->cb,
- q->context)) {
- return MO_HALT_MATCHING;
- }
- }
-
- return MO_CONTINUE_MATCHING;
-}
-
-static really_inline
-STATE_T INITIAL_FN(const IMPL_NFA_T *impl, char onlyDs) {
+ q->context)) {
+ return MO_HALT_MATCHING;
+ }
+ }
+
+ return MO_CONTINUE_MATCHING;
+}
+
+static really_inline
+STATE_T INITIAL_FN(const IMPL_NFA_T *impl, char onlyDs) {
return LOAD_FROM_ENG(onlyDs ? &impl->initDS : &impl->init);
-}
-
-static really_inline
-STATE_T TOP_FN(const IMPL_NFA_T *impl, char onlyDs, STATE_T state) {
- return OR_STATE(INITIAL_FN(impl, onlyDs), state);
-}
-
-static really_inline
-STATE_T TOPN_FN(const IMPL_NFA_T *limex, STATE_T state, u32 n) {
- assert(n < limex->topCount);
+}
+
+static really_inline
+STATE_T TOP_FN(const IMPL_NFA_T *impl, char onlyDs, STATE_T state) {
+ return OR_STATE(INITIAL_FN(impl, onlyDs), state);
+}
+
+static really_inline
+STATE_T TOPN_FN(const IMPL_NFA_T *limex, STATE_T state, u32 n) {
+ assert(n < limex->topCount);
const ENG_STATE_T *topsptr =
(const ENG_STATE_T *)((const char *)limex + limex->topOffset);
STATE_T top = LOAD_FROM_ENG(&topsptr[n]);
- return OR_STATE(top, state);
-}
-
-static really_inline
-void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx,
- u64a offset) {
- assert(limex);
- assert(ctx);
-
- if (!limex->repeatCount) {
- return;
- }
-
- DEBUG_PRINTF("expire estate at offset %llu\n", offset);
-
+ return OR_STATE(top, state);
+}
+
+static really_inline
+void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx,
+ u64a offset) {
+ assert(limex);
+ assert(ctx);
+
+ if (!limex->repeatCount) {
+ return;
+ }
+
+ DEBUG_PRINTF("expire estate at offset %llu\n", offset);
+
const STATE_T cyclics
= AND_STATE(ctx->s, LOAD_FROM_ENG(&limex->repeatCyclicMask));
- if (ISZERO_STATE(cyclics)) {
- DEBUG_PRINTF("no cyclic states are on\n");
- return;
- }
-
- for (u32 i = 0; i < limex->repeatCount; i++) {
- const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
-
- u32 cyclicState = info->cyclicState;
+ if (ISZERO_STATE(cyclics)) {
+ DEBUG_PRINTF("no cyclic states are on\n");
+ return;
+ }
+
+ for (u32 i = 0; i < limex->repeatCount; i++) {
+ const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
+
+ u32 cyclicState = info->cyclicState;
if (!TESTBIT_STATE(cyclics, cyclicState)) {
- continue;
- }
-
- DEBUG_PRINTF("repeat %u (cyclic state %u) is active\n", i,
- cyclicState);
-
- const struct RepeatInfo *repeat = getRepeatInfo(info);
- if (repeat->repeatMax == REPEAT_INF) {
- continue; // can't expire
- }
-
- const union RepeatControl *repeat_ctrl = ctx->repeat_ctrl + i;
- const char *repeat_state = ctx->repeat_state + info->stateOffset;
- u64a last_top = repeatLastTop(repeat, repeat_ctrl, repeat_state);
- assert(repeat->repeatMax < REPEAT_INF);
- DEBUG_PRINTF("offset %llu, last_top %llu repeatMax %u\n", offset,
- last_top, repeat->repeatMax);
- u64a adj = 0;
- /* if the cycle's tugs are active at repeat max, it is still alive */
+ continue;
+ }
+
+ DEBUG_PRINTF("repeat %u (cyclic state %u) is active\n", i,
+ cyclicState);
+
+ const struct RepeatInfo *repeat = getRepeatInfo(info);
+ if (repeat->repeatMax == REPEAT_INF) {
+ continue; // can't expire
+ }
+
+ const union RepeatControl *repeat_ctrl = ctx->repeat_ctrl + i;
+ const char *repeat_state = ctx->repeat_state + info->stateOffset;
+ u64a last_top = repeatLastTop(repeat, repeat_ctrl, repeat_state);
+ assert(repeat->repeatMax < REPEAT_INF);
+ DEBUG_PRINTF("offset %llu, last_top %llu repeatMax %u\n", offset,
+ last_top, repeat->repeatMax);
+ u64a adj = 0;
+ /* if the cycle's tugs are active at repeat max, it is still alive */
if (TESTBIT_STATE(LOAD_FROM_ENG(&limex->accept), cyclicState) ||
TESTBIT_STATE(LOAD_FROM_ENG(&limex->acceptAtEOD), cyclicState)) {
- DEBUG_PRINTF("lazy tug possible - may still be inspected\n");
- adj = 1;
- } else {
+ DEBUG_PRINTF("lazy tug possible - may still be inspected\n");
+ adj = 1;
+ } else {
const ENG_STATE_T *tug_mask =
(const ENG_STATE_T *)((const char *)info + info->tugMaskOffset);
if (ISNONZERO_STATE(AND_STATE(ctx->s, LOAD_FROM_ENG(tug_mask)))) {
- DEBUG_PRINTF("tug possible - may still be inspected\n");
- adj = 1;
- }
- }
-
- if (offset >= last_top + repeat->repeatMax + adj) {
- DEBUG_PRINTF("repeat state is stale, squashing state %u\n",
- cyclicState);
- CLEARBIT_STATE(&ctx->s, cyclicState);
- }
- }
-}
-
-// Specialised inAccept call: LimEx NFAs with the "lazy tug" optimisation (see
-// UE-1636) need to guard cyclic tug-accepts as well.
-static really_inline
-char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
- union RepeatControl *repeat_ctrl, char *repeat_state,
- u64a offset, ReportID report) {
- assert(limex);
-
+ DEBUG_PRINTF("tug possible - may still be inspected\n");
+ adj = 1;
+ }
+ }
+
+ if (offset >= last_top + repeat->repeatMax + adj) {
+ DEBUG_PRINTF("repeat state is stale, squashing state %u\n",
+ cyclicState);
+ CLEARBIT_STATE(&ctx->s, cyclicState);
+ }
+ }
+}
+
+// Specialised inAccept call: LimEx NFAs with the "lazy tug" optimisation (see
+// UE-1636) need to guard cyclic tug-accepts as well.
+static really_inline
+char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
+ union RepeatControl *repeat_ctrl, char *repeat_state,
+ u64a offset, ReportID report) {
+ assert(limex);
+
const STATE_T accept_mask = LOAD_FROM_ENG(&limex->accept);
STATE_T accepts = AND_STATE(state, accept_mask);
-
- // Are we in an accept state?
+
+ // Are we in an accept state?
if (ISZERO_STATE(accepts)) {
- DEBUG_PRINTF("no accept states are on\n");
- return 0;
- }
-
+ DEBUG_PRINTF("no accept states are on\n");
+ return 0;
+ }
+
SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accepts);
-
- DEBUG_PRINTF("looking for report %u\n", report);
-
+
+ DEBUG_PRINTF("looking for report %u\n", report);
+
const struct NFAAccept *acceptTable = getAcceptTable(limex);
-
+
CHUNK_T chunks[NUM_STATE_CHUNKS];
memcpy(chunks, &accepts, sizeof(accepts));
@@ -373,13 +373,13 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
DEBUG_PRINTF("report %u is on\n", report);
return 1;
}
- }
+ }
base_index += POPCOUNT_FN(mask_chunks[i]);
- }
-
- return 0;
-}
-
+ }
+
+ return 0;
+}
+
static really_inline
char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
union RepeatControl *repeat_ctrl, char *repeat_state,
@@ -400,30 +400,30 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
return ISNONZERO_STATE(accstate);
}
-#undef TESTEOD_FN
-#undef REPORTCURRENT_FN
-#undef EXPIRE_ESTATE_FN
-#undef LIMEX_INACCEPT_FN
+#undef TESTEOD_FN
+#undef REPORTCURRENT_FN
+#undef EXPIRE_ESTATE_FN
+#undef LIMEX_INACCEPT_FN
#undef LIMEX_INANYACCEPT_FN
-#undef INITIAL_FN
-#undef TOP_FN
-#undef TOPN_FN
-#undef CONTEXT_T
-#undef IMPL_NFA_T
-#undef ONES_STATE
-#undef AND_STATE
-#undef OR_STATE
-#undef ANDNOT_STATE
-#undef CLEARBIT_STATE
-#undef TESTBIT_STATE
-#undef ISNONZERO_STATE
-#undef ISZERO_STATE
+#undef INITIAL_FN
+#undef TOP_FN
+#undef TOPN_FN
+#undef CONTEXT_T
+#undef IMPL_NFA_T
+#undef ONES_STATE
+#undef AND_STATE
+#undef OR_STATE
+#undef ANDNOT_STATE
+#undef CLEARBIT_STATE
+#undef TESTBIT_STATE
+#undef ISNONZERO_STATE
+#undef ISZERO_STATE
#undef PROCESS_ACCEPTS_IMPL_FN
-#undef PROCESS_ACCEPTS_FN
-#undef PROCESS_ACCEPTS_NOSQUASH_FN
-#undef SQUASH_UNTUG_BR_FN
-#undef GET_NFA_REPEAT_INFO_FN
-
+#undef PROCESS_ACCEPTS_FN
+#undef PROCESS_ACCEPTS_NOSQUASH_FN
+#undef SQUASH_UNTUG_BR_FN
+#undef GET_NFA_REPEAT_INFO_FN
+
#undef CHUNK_T
#undef FIND_AND_CLEAR_FN
#undef POPCOUNT_FN
diff --git a/contrib/libs/hyperscan/src/nfa/limex_compile.cpp b/contrib/libs/hyperscan/src/nfa/limex_compile.cpp
index fcf90538b0..9233ae515e 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_compile.cpp
+++ b/contrib/libs/hyperscan/src/nfa/limex_compile.cpp
@@ -1,84 +1,84 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Main NFA build code.
- */
-
-#include "limex_compile.h"
-
-#include "accel.h"
-#include "accelcompile.h"
-#include "grey.h"
-#include "limex_internal.h"
-#include "limex_limits.h"
-#include "nfa_build_util.h"
+ * \brief Main NFA build code.
+ */
+
+#include "limex_compile.h"
+
+#include "accel.h"
+#include "accelcompile.h"
+#include "grey.h"
+#include "limex_internal.h"
+#include "limex_limits.h"
+#include "nfa_build_util.h"
#include "nfagraph/ng_dominators.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_limex_accel.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_squash.h"
-#include "nfagraph/ng_util.h"
-#include "ue2common.h"
-#include "repeatcompile.h"
-#include "util/alloc.h"
-#include "util/bitutils.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_limex_accel.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_squash.h"
+#include "nfagraph/ng_util.h"
+#include "ue2common.h"
+#include "repeatcompile.h"
+#include "util/alloc.h"
+#include "util/bitutils.h"
#include "util/bytecode_ptr.h"
-#include "util/charreach.h"
-#include "util/compile_context.h"
-#include "util/container.h"
+#include "util/charreach.h"
+#include "util/compile_context.h"
+#include "util/container.h"
#include "util/flat_containers.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
-#include "util/order_check.h"
+#include "util/order_check.h"
#include "util/unordered.h"
-#include "util/verify_types.h"
-
-#include <algorithm>
-#include <cassert>
-#include <cstddef>
-#include <cstdlib>
-#include <cstring>
-#include <map>
-#include <set>
-#include <vector>
-
-#include <boost/graph/breadth_first_search.hpp>
+#include "util/verify_types.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdlib>
+#include <cstring>
+#include <map>
+#include <set>
+#include <vector>
+
+#include <boost/graph/breadth_first_search.hpp>
#include <boost/graph/depth_first_search.hpp>
#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
+
+using namespace std;
using boost::adaptors::map_values;
-
-namespace ue2 {
-
+
+namespace ue2 {
+
/**
* \brief Special state index value meaning that the vertex will not
* participate in an (NFA/DFA/etc) implementation.
@@ -97,197 +97,197 @@ static constexpr u32 MAX_REPEAT_CHAR_REACH = 26;
/* Minimum bounded repeat trigger distance to consider as a fast NFA */
static constexpr u8 MIN_REPEAT_TRIGGER_DISTANCE = 6;
-namespace {
-
-struct precalcAccel {
- precalcAccel() : single_offset(0), double_offset(0) {}
- CharReach single_cr;
- u32 single_offset;
-
- CharReach double_cr;
- flat_set<pair<u8, u8>> double_lits; /* double-byte accel stop literals */
- u32 double_offset;
-};
-
+namespace {
+
+struct precalcAccel {
+ precalcAccel() : single_offset(0), double_offset(0) {}
+ CharReach single_cr;
+ u32 single_offset;
+
+ CharReach double_cr;
+ flat_set<pair<u8, u8>> double_lits; /* double-byte accel stop literals */
+ u32 double_offset;
+};
+
struct limex_accel_info {
unordered_set<NFAVertex> accelerable;
- map<NFAStateSet, precalcAccel> precalc;
+ map<NFAStateSet, precalcAccel> precalc;
unordered_map<NFAVertex, flat_set<NFAVertex>> friends;
unordered_map<NFAVertex, AccelScheme> accel_map;
-};
-
-static
+};
+
+static
unordered_map<NFAVertex, NFAStateSet>
reindexByStateId(const unordered_map<NFAVertex, NFAStateSet> &in,
const NGHolder &g,
const unordered_map<NFAVertex, u32> &state_ids,
- const u32 num_states) {
+ const u32 num_states) {
unordered_map<NFAVertex, NFAStateSet> out;
out.reserve(in.size());
-
- vector<u32> indexToState(num_vertices(g), NO_STATE);
- for (const auto &m : state_ids) {
- u32 vert_id = g[m.first].index;
- assert(vert_id < indexToState.size());
- indexToState[vert_id] = m.second;
- }
-
- for (const auto &m : in) {
- NFAVertex v = m.first;
- assert(m.second.size() <= indexToState.size());
-
- NFAStateSet mask(num_states);
- for (size_t i = m.second.find_first(); i != m.second.npos;
- i = m.second.find_next(i)) {
- u32 state_id = indexToState[i];
- if (state_id == NO_STATE) {
- continue;
- }
- mask.set(state_id);
- }
- out.emplace(v, mask);
- }
-
- return out;
-}
-
-struct build_info {
- build_info(NGHolder &hi,
+
+ vector<u32> indexToState(num_vertices(g), NO_STATE);
+ for (const auto &m : state_ids) {
+ u32 vert_id = g[m.first].index;
+ assert(vert_id < indexToState.size());
+ indexToState[vert_id] = m.second;
+ }
+
+ for (const auto &m : in) {
+ NFAVertex v = m.first;
+ assert(m.second.size() <= indexToState.size());
+
+ NFAStateSet mask(num_states);
+ for (size_t i = m.second.find_first(); i != m.second.npos;
+ i = m.second.find_next(i)) {
+ u32 state_id = indexToState[i];
+ if (state_id == NO_STATE) {
+ continue;
+ }
+ mask.set(state_id);
+ }
+ out.emplace(v, mask);
+ }
+
+ return out;
+}
+
+struct build_info {
+ build_info(NGHolder &hi,
const unordered_map<NFAVertex, u32> &states_in,
- const vector<BoundedRepeatData> &ri,
+ const vector<BoundedRepeatData> &ri,
const unordered_map<NFAVertex, NFAStateSet> &rsmi,
const unordered_map<NFAVertex, NFAStateSet> &smi,
const map<u32, set<NFAVertex>> &ti, const set<NFAVertex> &zi,
bool dai, bool sci, const CompileContext &cci, u32 nsi)
: h(hi), state_ids(states_in), repeats(ri), tops(ti), tugs(nsi),
zombies(zi), do_accel(dai), stateCompression(sci), cc(cci),
- num_states(nsi) {
- for (const auto &br : repeats) {
+ num_states(nsi) {
+ for (const auto &br : repeats) {
for (auto v : br.tug_triggers) {
assert(state_ids.at(v) != NO_STATE);
tugs.set(state_ids.at(v));
}
- br_cyclic[br.cyclic] =
- BoundedRepeatSummary(br.repeatMin, br.repeatMax);
- }
-
- // Convert squash maps to be indexed by state index rather than
- // vertex_index.
- squashMap = reindexByStateId(smi, h, state_ids, num_states);
- reportSquashMap = reindexByStateId(rsmi, h, state_ids, num_states);
- }
-
- NGHolder &h;
+ br_cyclic[br.cyclic] =
+ BoundedRepeatSummary(br.repeatMin, br.repeatMax);
+ }
+
+ // Convert squash maps to be indexed by state index rather than
+ // vertex_index.
+ squashMap = reindexByStateId(smi, h, state_ids, num_states);
+ reportSquashMap = reindexByStateId(rsmi, h, state_ids, num_states);
+ }
+
+ NGHolder &h;
const unordered_map<NFAVertex, u32> &state_ids;
- const vector<BoundedRepeatData> &repeats;
-
- // Squash maps; state sets are indexed by state_id.
+ const vector<BoundedRepeatData> &repeats;
+
+ // Squash maps; state sets are indexed by state_id.
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
unordered_map<NFAVertex, NFAStateSet> squashMap;
-
+
const map<u32, set<NFAVertex>> &tops;
NFAStateSet tugs;
- map<NFAVertex, BoundedRepeatSummary> br_cyclic;
- const set<NFAVertex> &zombies;
- bool do_accel;
- bool stateCompression;
- const CompileContext &cc;
- u32 num_states;
+ map<NFAVertex, BoundedRepeatSummary> br_cyclic;
+ const set<NFAVertex> &zombies;
+ bool do_accel;
+ bool stateCompression;
+ const CompileContext &cc;
+ u32 num_states;
limex_accel_info accel;
-};
-
+};
+
#define LAST_LIMEX_NFA LIMEX_NFA_512
-// Constants for scoring mechanism
+// Constants for scoring mechanism
const int SHIFT_COST = 10; // limex: cost per shift mask
-const int EXCEPTION_COST = 4; // limex: per exception
-
-template<NFAEngineType t> struct NFATraits { };
-
-template<template<NFAEngineType t> class sfunc, typename rv_t, typename arg_t,
- NFAEngineType lb>
-struct DISPATCH_BY_LIMEX_TYPE_INT {
- static rv_t doOp(NFAEngineType i, const arg_t &arg) {
- if (i == lb) {
- return sfunc<lb>::call(arg);
- } else {
- return DISPATCH_BY_LIMEX_TYPE_INT<sfunc, rv_t, arg_t,
- (NFAEngineType)(lb + 1)>
- ::doOp(i, arg);
- }
- }
-};
-
-template<template<NFAEngineType t> class sfunc, typename rv_t, typename arg_t>
-struct DISPATCH_BY_LIMEX_TYPE_INT<sfunc, rv_t, arg_t,
- (NFAEngineType)(LAST_LIMEX_NFA + 1)> {
- // dummy
- static rv_t doOp(NFAEngineType, const arg_t &) {
- assert(0);
- throw std::logic_error("Unreachable");
- }
-};
-
-#define DISPATCH_BY_LIMEX_TYPE(i, op, arg) \
- DISPATCH_BY_LIMEX_TYPE_INT<op, decltype(op<(NFAEngineType)0>::call(arg)), \
- decltype(arg), (NFAEngineType)0>::doOp(i, arg)
-
-// Given a number of states, find the size of the smallest container NFA it
-// will fit in. We support NFAs of the following sizes: 32, 64, 128, 256, 384,
-// 512.
-size_t findContainerSize(size_t states) {
- if (states > 256 && states <= 384) {
- return 384;
- }
- return 1ULL << (lg2(states - 1) + 1);
-}
-
-bool isLimitedTransition(int from, int to, int maxshift) {
- int diff = to - from;
-
- // within our shift?
- if (diff < 0 || diff > maxshift) {
- return false;
- }
-
- // can't jump over a bollard
- return (from & ~63) == (to & ~63);
-}
-
-// Fill a bit mask
-template<class Mask>
+const int EXCEPTION_COST = 4; // limex: per exception
+
+template<NFAEngineType t> struct NFATraits { };
+
+template<template<NFAEngineType t> class sfunc, typename rv_t, typename arg_t,
+ NFAEngineType lb>
+struct DISPATCH_BY_LIMEX_TYPE_INT {
+ static rv_t doOp(NFAEngineType i, const arg_t &arg) {
+ if (i == lb) {
+ return sfunc<lb>::call(arg);
+ } else {
+ return DISPATCH_BY_LIMEX_TYPE_INT<sfunc, rv_t, arg_t,
+ (NFAEngineType)(lb + 1)>
+ ::doOp(i, arg);
+ }
+ }
+};
+
+template<template<NFAEngineType t> class sfunc, typename rv_t, typename arg_t>
+struct DISPATCH_BY_LIMEX_TYPE_INT<sfunc, rv_t, arg_t,
+ (NFAEngineType)(LAST_LIMEX_NFA + 1)> {
+ // dummy
+ static rv_t doOp(NFAEngineType, const arg_t &) {
+ assert(0);
+ throw std::logic_error("Unreachable");
+ }
+};
+
+#define DISPATCH_BY_LIMEX_TYPE(i, op, arg) \
+ DISPATCH_BY_LIMEX_TYPE_INT<op, decltype(op<(NFAEngineType)0>::call(arg)), \
+ decltype(arg), (NFAEngineType)0>::doOp(i, arg)
+
+// Given a number of states, find the size of the smallest container NFA it
+// will fit in. We support NFAs of the following sizes: 32, 64, 128, 256, 384,
+// 512.
+size_t findContainerSize(size_t states) {
+ if (states > 256 && states <= 384) {
+ return 384;
+ }
+ return 1ULL << (lg2(states - 1) + 1);
+}
+
+bool isLimitedTransition(int from, int to, int maxshift) {
+ int diff = to - from;
+
+ // within our shift?
+ if (diff < 0 || diff > maxshift) {
+ return false;
+ }
+
+ // can't jump over a bollard
+ return (from & ~63) == (to & ~63);
+}
+
+// Fill a bit mask
+template<class Mask>
void maskFill(Mask &m, u8 c) {
- memset(&m, c, sizeof(m));
-}
-
-// Clear a bit mask.
-template<class Mask>
-void maskClear(Mask &m) {
- memset(&m, 0, sizeof(m));
-}
-
-template<class Mask>
-u8 *maskGetByte(Mask &m, u32 bit) {
- assert(bit < sizeof(m)*8);
- u8 *m8 = (u8 *)&m;
-
- return m8 + bit/8;
-}
-
-// Set a bit in a mask, starting from the little end.
-template<class Mask>
-void maskSetBit(Mask &m, const unsigned int bit) {
- u8 *byte = maskGetByte(m, bit);
- *byte |= 1U << (bit % 8);
-}
-
-template<class Mask>
-void maskSetBits(Mask &m, const NFAStateSet &bits) {
- for (size_t i = bits.find_first(); i != bits.npos; i = bits.find_next(i)) {
- maskSetBit(m, i);
- }
-}
-
+ memset(&m, c, sizeof(m));
+}
+
+// Clear a bit mask.
+template<class Mask>
+void maskClear(Mask &m) {
+ memset(&m, 0, sizeof(m));
+}
+
+template<class Mask>
+u8 *maskGetByte(Mask &m, u32 bit) {
+ assert(bit < sizeof(m)*8);
+ u8 *m8 = (u8 *)&m;
+
+ return m8 + bit/8;
+}
+
+// Set a bit in a mask, starting from the little end.
+template<class Mask>
+void maskSetBit(Mask &m, const unsigned int bit) {
+ u8 *byte = maskGetByte(m, bit);
+ *byte |= 1U << (bit % 8);
+}
+
+template<class Mask>
+void maskSetBits(Mask &m, const NFAStateSet &bits) {
+ for (size_t i = bits.find_first(); i != bits.npos; i = bits.find_next(i)) {
+ maskSetBit(m, i);
+ }
+}
+
template<class Mask>
bool isMaskZero(Mask &m) {
u8 *m8 = (u8 *)&m;
@@ -299,251 +299,251 @@ bool isMaskZero(Mask &m) {
return true;
}
-// Sets an entire byte in a mask to the given value
-template<class Mask>
-void maskSetByte(Mask &m, const unsigned int idx, const char val) {
- assert(idx < sizeof(m));
- char *m8 = (char *)&m;
- char &byte = m8[idx];
- byte = val;
-}
-
-// Clear a bit in the mask, starting from the little end.
-template<class Mask>
-void maskClearBit(Mask &m, const u32 bit) {
- u8 *byte = maskGetByte(m, bit);
- *byte &= ~(1U << (bit % 8));
-}
-
-/*
- * Common code: the following code operates on parts of the NFA that are common
- * to both the (defunct) General and the LimEx models.
- */
-
-static
-void buildReachMapping(const build_info &args, vector<NFAStateSet> &reach,
- vector<u8> &reachMap) {
- const NGHolder &h = args.h;
- const auto &state_ids = args.state_ids;
-
- // Build a list of vertices with a state index assigned.
- vector<NFAVertex> verts;
- verts.reserve(args.num_states);
- for (auto v : vertices_range(h)) {
- if (state_ids.at(v) != NO_STATE) {
- verts.push_back(v);
- }
- }
-
- // Build a mapping from set-of-states -> reachability.
- map<NFAStateSet, CharReach> mapping;
- NFAStateSet states(args.num_states);
- for (size_t i = 0; i < N_CHARS; i++) {
- states.reset();
- for (auto v : verts) {
- const CharReach &cr = h[v].char_reach;
- if (cr.test(i)) {
- u32 state_id = state_ids.at(v);
- states.set(state_id);
- }
- }
- mapping[states].set(i);
- }
-
- DEBUG_PRINTF("%zu distinct reachability entries\n", mapping.size());
- assert(!mapping.empty());
-
- // Build a vector of distinct reachability entries and a mapping from every
- // character to one of those entries.
-
- reach.reserve(mapping.size());
- reachMap.assign(N_CHARS, 0);
-
- u8 num = 0;
- for (auto mi = mapping.begin(), me = mapping.end(); mi != me; ++mi, ++num) {
- // Reach entry.
- reach.push_back(mi->first);
-
- // Character mapping.
- const CharReach &cr = mi->second;
- for (size_t i = cr.find_first(); i != CharReach::npos;
- i = cr.find_next(i)) {
- reachMap[i] = num;
- }
- }
-}
-
-struct AccelBuild {
+// Sets an entire byte in a mask to the given value
+template<class Mask>
+void maskSetByte(Mask &m, const unsigned int idx, const char val) {
+ assert(idx < sizeof(m));
+ char *m8 = (char *)&m;
+ char &byte = m8[idx];
+ byte = val;
+}
+
+// Clear a bit in the mask, starting from the little end.
+template<class Mask>
+void maskClearBit(Mask &m, const u32 bit) {
+ u8 *byte = maskGetByte(m, bit);
+ *byte &= ~(1U << (bit % 8));
+}
+
+/*
+ * Common code: the following code operates on parts of the NFA that are common
+ * to both the (defunct) General and the LimEx models.
+ */
+
+static
+void buildReachMapping(const build_info &args, vector<NFAStateSet> &reach,
+ vector<u8> &reachMap) {
+ const NGHolder &h = args.h;
+ const auto &state_ids = args.state_ids;
+
+ // Build a list of vertices with a state index assigned.
+ vector<NFAVertex> verts;
+ verts.reserve(args.num_states);
+ for (auto v : vertices_range(h)) {
+ if (state_ids.at(v) != NO_STATE) {
+ verts.push_back(v);
+ }
+ }
+
+ // Build a mapping from set-of-states -> reachability.
+ map<NFAStateSet, CharReach> mapping;
+ NFAStateSet states(args.num_states);
+ for (size_t i = 0; i < N_CHARS; i++) {
+ states.reset();
+ for (auto v : verts) {
+ const CharReach &cr = h[v].char_reach;
+ if (cr.test(i)) {
+ u32 state_id = state_ids.at(v);
+ states.set(state_id);
+ }
+ }
+ mapping[states].set(i);
+ }
+
+ DEBUG_PRINTF("%zu distinct reachability entries\n", mapping.size());
+ assert(!mapping.empty());
+
+ // Build a vector of distinct reachability entries and a mapping from every
+ // character to one of those entries.
+
+ reach.reserve(mapping.size());
+ reachMap.assign(N_CHARS, 0);
+
+ u8 num = 0;
+ for (auto mi = mapping.begin(), me = mapping.end(); mi != me; ++mi, ++num) {
+ // Reach entry.
+ reach.push_back(mi->first);
+
+ // Character mapping.
+ const CharReach &cr = mi->second;
+ for (size_t i = cr.find_first(); i != CharReach::npos;
+ i = cr.find_next(i)) {
+ reachMap[i] = num;
+ }
+ }
+}
+
+struct AccelBuild {
AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0) {}
- NFAVertex v;
- u32 state;
- u32 offset; // offset correction to apply
- CharReach stop1; // single-byte accel stop literals
- flat_set<pair<u8, u8>> stop2; // double-byte accel stop literals
-};
-
-static
-void findStopLiterals(const build_info &bi, NFAVertex v, AccelBuild &build) {
- u32 state = bi.state_ids.at(v);
- build.v = v;
- build.state = state;
- NFAStateSet ss(bi.num_states);
- ss.set(state);
-
- if (!contains(bi.accel.precalc, ss)) {
- build.stop1 = CharReach::dot();
- } else {
- const precalcAccel &precalc = bi.accel.precalc.at(ss);
- if (precalc.double_lits.empty()) {
- build.stop1 = precalc.single_cr;
- build.offset = precalc.single_offset;
- } else {
- build.stop1 = precalc.double_cr;
- build.stop2 = precalc.double_lits;
- build.offset = precalc.double_offset;
- }
- }
-
-#ifdef DEBUG
- printf("state %u stop1:", state);
- for (size_t j = build.stop1.find_first(); j != build.stop1.npos;
- j = build.stop1.find_next(j)) {
- printf(" 0x%02x", (u32)j);
- }
- printf("\n");
- printf("state %u stop2:", state);
- for (auto it = build.stop2.begin(); it != build.stop2.end(); ++it) {
- printf(" 0x%02hhx%02hhx", it->first, it->second);
- }
- printf("\n");
-#endif
-}
-
-// Generate all the data we need for at most NFA_MAX_ACCEL_STATES accelerable
-// states.
-static
-void gatherAccelStates(const build_info &bi, vector<AccelBuild> &accelStates) {
- for (auto v : bi.accel.accelerable) {
- DEBUG_PRINTF("state %u is accelerable\n", bi.state_ids.at(v));
- AccelBuild a;
- findStopLiterals(bi, v, a);
- accelStates.push_back(a);
- }
-
- // AccelStates should be sorted by state number, so that we build our accel
- // masks correctly.
- sort(accelStates.begin(), accelStates.end(),
- [](const AccelBuild &a, const AccelBuild &b) {
- return a.state < b.state;
- });
-
- // Our caller shouldn't have fed us too many accel states.
- assert(accelStates.size() <= NFA_MAX_ACCEL_STATES);
- if (accelStates.size() > NFA_MAX_ACCEL_STATES) {
- accelStates.resize(NFA_MAX_ACCEL_STATES);
- }
-}
-
-static
-void combineAccel(const AccelBuild &in, AccelBuild &out) {
- // stop1 and stop2 union
- out.stop1 |= in.stop1;
- out.stop2.insert(in.stop2.begin(), in.stop2.end());
- // offset is maximum of the two
- out.offset = max(out.offset, in.offset);
-}
-
-static
-void minimiseAccel(AccelBuild &build) {
- flat_set<pair<u8, u8>> new_stop2;
- // Any two-byte accels beginning with a one-byte accel should be removed
- for (const auto &si : build.stop2) {
- if (!build.stop1.test(si.first)) {
- new_stop2.insert(si);
- }
- }
- build.stop2 = new_stop2;
-}
-
-struct AccelAuxCmp {
- explicit AccelAuxCmp(const AccelAux &aux_in) : aux(aux_in) {}
- bool operator()(const AccelAux &a) const {
- return !memcmp(&a, &aux, sizeof(AccelAux));
- }
-private:
- const AccelAux &aux;
-};
-
-static
-bool allow_wide_accel(NFAVertex v, const NGHolder &g, NFAVertex sds_or_proxy) {
- return v == sds_or_proxy || edge(g.start, v, g).second;
-}
-
-static
-bool allow_wide_accel(const vector<NFAVertex> &vv, const NGHolder &g,
- NFAVertex sds_or_proxy) {
- for (auto v : vv) {
- if (allow_wide_accel(v, g, sds_or_proxy)) {
- return true;
- }
- }
-
- return false;
-}
-
-// identify and mark states that we feel are accelerable (for a limex NFA)
-/* Note: leftfix nfas allow accepts to be accelerated */
-static
-void nfaFindAccelSchemes(const NGHolder &g,
- const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
+ NFAVertex v;
+ u32 state;
+ u32 offset; // offset correction to apply
+ CharReach stop1; // single-byte accel stop literals
+ flat_set<pair<u8, u8>> stop2; // double-byte accel stop literals
+};
+
+static
+void findStopLiterals(const build_info &bi, NFAVertex v, AccelBuild &build) {
+ u32 state = bi.state_ids.at(v);
+ build.v = v;
+ build.state = state;
+ NFAStateSet ss(bi.num_states);
+ ss.set(state);
+
+ if (!contains(bi.accel.precalc, ss)) {
+ build.stop1 = CharReach::dot();
+ } else {
+ const precalcAccel &precalc = bi.accel.precalc.at(ss);
+ if (precalc.double_lits.empty()) {
+ build.stop1 = precalc.single_cr;
+ build.offset = precalc.single_offset;
+ } else {
+ build.stop1 = precalc.double_cr;
+ build.stop2 = precalc.double_lits;
+ build.offset = precalc.double_offset;
+ }
+ }
+
+#ifdef DEBUG
+ printf("state %u stop1:", state);
+ for (size_t j = build.stop1.find_first(); j != build.stop1.npos;
+ j = build.stop1.find_next(j)) {
+ printf(" 0x%02x", (u32)j);
+ }
+ printf("\n");
+ printf("state %u stop2:", state);
+ for (auto it = build.stop2.begin(); it != build.stop2.end(); ++it) {
+ printf(" 0x%02hhx%02hhx", it->first, it->second);
+ }
+ printf("\n");
+#endif
+}
+
+// Generate all the data we need for at most NFA_MAX_ACCEL_STATES accelerable
+// states.
+static
+void gatherAccelStates(const build_info &bi, vector<AccelBuild> &accelStates) {
+ for (auto v : bi.accel.accelerable) {
+ DEBUG_PRINTF("state %u is accelerable\n", bi.state_ids.at(v));
+ AccelBuild a;
+ findStopLiterals(bi, v, a);
+ accelStates.push_back(a);
+ }
+
+ // AccelStates should be sorted by state number, so that we build our accel
+ // masks correctly.
+ sort(accelStates.begin(), accelStates.end(),
+ [](const AccelBuild &a, const AccelBuild &b) {
+ return a.state < b.state;
+ });
+
+ // Our caller shouldn't have fed us too many accel states.
+ assert(accelStates.size() <= NFA_MAX_ACCEL_STATES);
+ if (accelStates.size() > NFA_MAX_ACCEL_STATES) {
+ accelStates.resize(NFA_MAX_ACCEL_STATES);
+ }
+}
+
+static
+void combineAccel(const AccelBuild &in, AccelBuild &out) {
+ // stop1 and stop2 union
+ out.stop1 |= in.stop1;
+ out.stop2.insert(in.stop2.begin(), in.stop2.end());
+ // offset is maximum of the two
+ out.offset = max(out.offset, in.offset);
+}
+
+static
+void minimiseAccel(AccelBuild &build) {
+ flat_set<pair<u8, u8>> new_stop2;
+ // Any two-byte accels beginning with a one-byte accel should be removed
+ for (const auto &si : build.stop2) {
+ if (!build.stop1.test(si.first)) {
+ new_stop2.insert(si);
+ }
+ }
+ build.stop2 = new_stop2;
+}
+
+struct AccelAuxCmp {
+ explicit AccelAuxCmp(const AccelAux &aux_in) : aux(aux_in) {}
+ bool operator()(const AccelAux &a) const {
+ return !memcmp(&a, &aux, sizeof(AccelAux));
+ }
+private:
+ const AccelAux &aux;
+};
+
+static
+bool allow_wide_accel(NFAVertex v, const NGHolder &g, NFAVertex sds_or_proxy) {
+ return v == sds_or_proxy || edge(g.start, v, g).second;
+}
+
+static
+bool allow_wide_accel(const vector<NFAVertex> &vv, const NGHolder &g,
+ NFAVertex sds_or_proxy) {
+ for (auto v : vv) {
+ if (allow_wide_accel(v, g, sds_or_proxy)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// identify and mark states that we feel are accelerable (for a limex NFA)
+/* Note: leftfix nfas allow accepts to be accelerated */
+static
+void nfaFindAccelSchemes(const NGHolder &g,
+ const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
unordered_map<NFAVertex, AccelScheme> *out) {
- vector<CharReach> refined_cr = reduced_cr(g, br_cyclic);
-
- NFAVertex sds_or_proxy = get_sds_or_proxy(g);
-
- for (auto v : vertices_range(g)) {
- // We want to skip any vertices that don't lead to at least one other
- // (self-loops don't count) vertex.
- if (!has_proper_successor(v, g)) {
+ vector<CharReach> refined_cr = reduced_cr(g, br_cyclic);
+
+ NFAVertex sds_or_proxy = get_sds_or_proxy(g);
+
+ for (auto v : vertices_range(g)) {
+ // We want to skip any vertices that don't lead to at least one other
+ // (self-loops don't count) vertex.
+ if (!has_proper_successor(v, g)) {
DEBUG_PRINTF("skipping vertex %zu\n", g[v].index);
- continue;
- }
-
- bool allow_wide = allow_wide_accel(v, g, sds_or_proxy);
-
- AccelScheme as;
- if (nfaCheckAccel(g, v, refined_cr, br_cyclic, &as, allow_wide)) {
+ continue;
+ }
+
+ bool allow_wide = allow_wide_accel(v, g, sds_or_proxy);
+
+ AccelScheme as;
+ if (nfaCheckAccel(g, v, refined_cr, br_cyclic, &as, allow_wide)) {
DEBUG_PRINTF("graph vertex %zu is accelerable with offset %u.\n",
- g[v].index, as.offset);
- (*out)[v] = as;
- }
- }
-}
-
-struct fas_visitor : public boost::default_bfs_visitor {
+ g[v].index, as.offset);
+ (*out)[v] = as;
+ }
+ }
+}
+
+struct fas_visitor : public boost::default_bfs_visitor {
fas_visitor(const unordered_map<NFAVertex, AccelScheme> &am_in,
unordered_map<NFAVertex, AccelScheme> *out_in)
- : accel_map(am_in), out(out_in) {}
-
+ : accel_map(am_in), out(out_in) {}
+
void discover_vertex(NFAVertex v, const NGHolder &) {
- if (accel_map.find(v) != accel_map.end()) {
- (*out)[v] = accel_map.find(v)->second;
- }
- if (out->size() >= NFA_MAX_ACCEL_STATES) {
- throw this; /* done */
- }
- }
+ if (accel_map.find(v) != accel_map.end()) {
+ (*out)[v] = accel_map.find(v)->second;
+ }
+ if (out->size() >= NFA_MAX_ACCEL_STATES) {
+ throw this; /* done */
+ }
+ }
const unordered_map<NFAVertex, AccelScheme> &accel_map;
unordered_map<NFAVertex, AccelScheme> *out;
-};
-
-static
+};
+
+static
void filterAccelStates(NGHolder &g, const map<u32, set<NFAVertex>> &tops,
unordered_map<NFAVertex, AccelScheme> *accel_map) {
- /* We want the NFA_MAX_ACCEL_STATES best acceleration states, everything
- * else should be ditched. We use a simple BFS to choose accel states near
- * the start. */
-
+ /* We want the NFA_MAX_ACCEL_STATES best acceleration states, everything
+ * else should be ditched. We use a simple BFS to choose accel states near
+ * the start. */
+
vector<NFAEdge> tempEdges;
for (const auto &vv : tops | map_values) {
for (NFAVertex v : vv) {
@@ -552,51 +552,51 @@ void filterAccelStates(NGHolder &g, const map<u32, set<NFAVertex>> &tops,
}
}
}
-
- // Similarly, connect (start, startDs) if necessary.
- if (!edge(g.start, g.startDs, g).second) {
+
+ // Similarly, connect (start, startDs) if necessary.
+ if (!edge(g.start, g.startDs, g).second) {
NFAEdge e = add_edge(g.start, g.startDs, g);
tempEdges.push_back(e); // Remove edge later.
- }
-
+ }
+
unordered_map<NFAVertex, AccelScheme> out;
-
- try {
+
+ try {
boost::breadth_first_search(g, g.start,
visitor(fas_visitor(*accel_map, &out))
.color_map(make_small_color_map(g)));
- } catch (fas_visitor *) {
- ; /* found max accel_states */
- }
-
+ } catch (fas_visitor *) {
+ ; /* found max accel_states */
+ }
+
remove_edges(tempEdges, g);
-
- assert(out.size() <= NFA_MAX_ACCEL_STATES);
- accel_map->swap(out);
-}
-
-static
+
+ assert(out.size() <= NFA_MAX_ACCEL_STATES);
+ accel_map->swap(out);
+}
+
+static
bool containsBadSubset(const limex_accel_info &accel,
- const NFAStateSet &state_set, const u32 effective_sds) {
- NFAStateSet subset(state_set.size());
- for (size_t j = state_set.find_first(); j != state_set.npos;
- j = state_set.find_next(j)) {
- subset = state_set;
- subset.reset(j);
-
- if (effective_sds != NO_STATE && subset.count() == 1 &&
- subset.test(effective_sds)) {
- continue;
- }
-
- if (subset.any() && !contains(accel.precalc, subset)) {
- return true;
- }
- }
- return false;
-}
-
-static
+ const NFAStateSet &state_set, const u32 effective_sds) {
+ NFAStateSet subset(state_set.size());
+ for (size_t j = state_set.find_first(); j != state_set.npos;
+ j = state_set.find_next(j)) {
+ subset = state_set;
+ subset.reset(j);
+
+ if (effective_sds != NO_STATE && subset.count() == 1 &&
+ subset.test(effective_sds)) {
+ continue;
+ }
+
+ if (subset.any() && !contains(accel.precalc, subset)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static
bool is_too_wide(const AccelScheme &as) {
return as.cr.count() > MAX_MERGED_ACCEL_STOPS;
}
@@ -619,86 +619,86 @@ void fillAccelInfo(build_info &bi) {
assert(accel_map.size() <= NFA_MAX_ACCEL_STATES);
- vector<CharReach> refined_cr = reduced_cr(g, br_cyclic);
-
- vector<NFAVertex> astates;
- for (const auto &m : accel_map) {
- astates.push_back(m.first);
- }
-
- NFAStateSet useful(num_states);
- NFAStateSet state_set(num_states);
- vector<NFAVertex> states;
-
- NFAVertex sds_or_proxy = get_sds_or_proxy(g);
- const u32 effective_sds = state_ids.at(sds_or_proxy);
-
- /* for each subset of the accel keys need to find an accel scheme */
- assert(astates.size() < 32);
+ vector<CharReach> refined_cr = reduced_cr(g, br_cyclic);
+
+ vector<NFAVertex> astates;
+ for (const auto &m : accel_map) {
+ astates.push_back(m.first);
+ }
+
+ NFAStateSet useful(num_states);
+ NFAStateSet state_set(num_states);
+ vector<NFAVertex> states;
+
+ NFAVertex sds_or_proxy = get_sds_or_proxy(g);
+ const u32 effective_sds = state_ids.at(sds_or_proxy);
+
+ /* for each subset of the accel keys need to find an accel scheme */
+ assert(astates.size() < 32);
sort(astates.begin(), astates.end());
-
- for (u32 i = 1, i_end = 1U << astates.size(); i < i_end; i++) {
- DEBUG_PRINTF("saving info for accel %u\n", i);
- states.clear();
- state_set.reset();
- for (u32 j = 0, j_end = astates.size(); j < j_end; j++) {
- if (i & (1U << j)) {
- NFAVertex v = astates[j];
- states.push_back(v);
- state_set.set(state_ids.at(v));
- }
- }
-
+
+ for (u32 i = 1, i_end = 1U << astates.size(); i < i_end; i++) {
+ DEBUG_PRINTF("saving info for accel %u\n", i);
+ states.clear();
+ state_set.reset();
+ for (u32 j = 0, j_end = astates.size(); j < j_end; j++) {
+ if (i & (1U << j)) {
+ NFAVertex v = astates[j];
+ states.push_back(v);
+ state_set.set(state_ids.at(v));
+ }
+ }
+
if (containsBadSubset(accel, state_set, effective_sds)) {
- DEBUG_PRINTF("accel %u has bad subset\n", i);
- continue; /* if a subset failed to build we would too */
- }
-
- const bool allow_wide = allow_wide_accel(states, g, sds_or_proxy);
-
- AccelScheme as = nfaFindAccel(g, states, refined_cr, br_cyclic,
+ DEBUG_PRINTF("accel %u has bad subset\n", i);
+ continue; /* if a subset failed to build we would too */
+ }
+
+ const bool allow_wide = allow_wide_accel(states, g, sds_or_proxy);
+
+ AccelScheme as = nfaFindAccel(g, states, refined_cr, br_cyclic,
allow_wide, true);
if (is_too_wide(as)) {
- DEBUG_PRINTF("accel %u too wide (%zu, %d)\n", i,
- as.cr.count(), MAX_MERGED_ACCEL_STOPS);
- continue;
- }
-
+ DEBUG_PRINTF("accel %u too wide (%zu, %d)\n", i,
+ as.cr.count(), MAX_MERGED_ACCEL_STOPS);
+ continue;
+ }
+
DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset,
as.double_offset);
-
+
precalcAccel &pa = accel.precalc[state_set];
- pa.single_offset = as.offset;
- pa.single_cr = as.cr;
-
+ pa.single_offset = as.offset;
+ pa.single_cr = as.cr;
+
if (as.double_byte.size() != 0) {
pa.double_offset = as.double_offset;
pa.double_lits = as.double_byte;
pa.double_cr = as.double_cr;
- }
+ }
useful |= state_set;
- }
-
- for (const auto &m : accel_map) {
- NFAVertex v = m.first;
- const u32 state_id = state_ids.at(v);
-
- /* if we we unable to make a scheme out of the state in any context,
- * there is not point marking it as accelerable */
- if (!useful.test(state_id)) {
- continue;
- }
-
- u32 offset = 0;
- state_set.reset();
- state_set.set(state_id);
-
+ }
+
+ for (const auto &m : accel_map) {
+ NFAVertex v = m.first;
+ const u32 state_id = state_ids.at(v);
+
+ /* if we we unable to make a scheme out of the state in any context,
+ * there is not point marking it as accelerable */
+ if (!useful.test(state_id)) {
+ continue;
+ }
+
+ u32 offset = 0;
+ state_set.reset();
+ state_set.set(state_id);
+
accel.accelerable.insert(v);
findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]);
- }
-}
-
+ }
+}
+
/** The AccelAux structure has large alignment specified, and this makes some
* compilers do odd things unless we specify a custom allocator. */
typedef vector<AccelAux, AlignedAllocator<AccelAux, alignof(AccelAux)>>
@@ -706,7 +706,7 @@ typedef vector<AccelAux, AlignedAllocator<AccelAux, alignof(AccelAux)>>
#define IMPOSSIBLE_ACCEL_MASK (~0U)
-static
+static
u32 getEffectiveAccelStates(const build_info &args,
const unordered_map<NFAVertex, NFAVertex> &dom_map,
u32 active_accel_mask,
@@ -752,8 +752,8 @@ u32 getEffectiveAccelStates(const build_info &args,
for (u32 accel_id = 0; accel_id < accelStates.size(); accel_id++) {
NFAVertex v = accelStates[accel_id].v;
accel_id_map[v] = accel_id;
- }
-
+ }
+
/* Note: we want a slightly less strict defn of dominate as skip edges
* prevent .* 'truly' dominating */
for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) {
@@ -850,45 +850,45 @@ u32 getEffectiveAccelStates(const build_info &args,
}
return active_accel_mask & ~ignored;
-}
-
-static
-void buildAccel(const build_info &args, NFAStateSet &accelMask,
- NFAStateSet &accelFriendsMask, AccelAuxVector &auxvec,
- vector<u8> &accelTable) {
+}
+
+static
+void buildAccel(const build_info &args, NFAStateSet &accelMask,
+ NFAStateSet &accelFriendsMask, AccelAuxVector &auxvec,
+ vector<u8> &accelTable) {
const limex_accel_info &accel = args.accel;
-
- // Init, all zeroes.
- accelMask.resize(args.num_states);
- accelFriendsMask.resize(args.num_states);
-
- if (!args.do_accel) {
- return;
- }
-
- vector<AccelBuild> accelStates;
- gatherAccelStates(args, accelStates);
-
- if (accelStates.empty()) {
- DEBUG_PRINTF("no accelerable states\n");
- return;
- }
-
+
+ // Init, all zeroes.
+ accelMask.resize(args.num_states);
+ accelFriendsMask.resize(args.num_states);
+
+ if (!args.do_accel) {
+ return;
+ }
+
+ vector<AccelBuild> accelStates;
+ gatherAccelStates(args, accelStates);
+
+ if (accelStates.empty()) {
+ DEBUG_PRINTF("no accelerable states\n");
+ return;
+ }
+
const auto dom_map = findDominators(args.h);
- // We have 2^n different accel entries, one for each possible
- // combination of accelerable states.
- assert(accelStates.size() < 32);
- const u32 accelCount = 1U << accelStates.size();
- assert(accelCount <= 256);
-
- // Set up a unioned AccelBuild for every possible combination of the set
- // bits in accelStates.
- vector<AccelBuild> accelOuts(accelCount);
+ // We have 2^n different accel entries, one for each possible
+ // combination of accelerable states.
+ assert(accelStates.size() < 32);
+ const u32 accelCount = 1U << accelStates.size();
+ assert(accelCount <= 256);
+
+ // Set up a unioned AccelBuild for every possible combination of the set
+ // bits in accelStates.
+ vector<AccelBuild> accelOuts(accelCount);
vector<u32> effective_accel_set;
effective_accel_set.push_back(0); /* empty is effectively empty */
- for (u32 i = 1; i < accelCount; i++) {
+ for (u32 i = 1; i < accelCount; i++) {
u32 effective_i = getEffectiveAccelStates(args, dom_map, i,
accelStates);
effective_accel_set.push_back(effective_i);
@@ -897,38 +897,38 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask,
DEBUG_PRINTF("this combination of accel states is not possible\n");
accelOuts[i].stop1 = CharReach::dot();
continue;
- }
+ }
while (effective_i) {
u32 base_accel_state = findAndClearLSB_32(&effective_i);
combineAccel(accelStates[base_accel_state], accelOuts[i]);
}
- minimiseAccel(accelOuts[i]);
- }
-
- accelTable.resize(accelCount);
-
- // We dedupe our AccelAux structures here, so that we only write one copy
- // of each unique accel scheme into the bytecode, using the accelTable as
- // an index.
-
- // Start with the NONE case.
- auxvec.push_back(AccelAux());
- memset(&auxvec[0], 0, sizeof(AccelAux));
- auxvec[0].accel_type = ACCEL_NONE; // no states on.
-
- AccelAux aux;
- for (u32 i = 1; i < accelCount; i++) {
- memset(&aux, 0, sizeof(aux));
-
+ minimiseAccel(accelOuts[i]);
+ }
+
+ accelTable.resize(accelCount);
+
+ // We dedupe our AccelAux structures here, so that we only write one copy
+ // of each unique accel scheme into the bytecode, using the accelTable as
+ // an index.
+
+ // Start with the NONE case.
+ auxvec.push_back(AccelAux());
+ memset(&auxvec[0], 0, sizeof(AccelAux));
+ auxvec[0].accel_type = ACCEL_NONE; // no states on.
+
+ AccelAux aux;
+ for (u32 i = 1; i < accelCount; i++) {
+ memset(&aux, 0, sizeof(aux));
+
NFAStateSet effective_states(args.num_states);
u32 effective_i = effective_accel_set[i];
-
- AccelInfo ainfo;
- ainfo.double_offset = accelOuts[i].offset;
- ainfo.double_stop1 = accelOuts[i].stop1;
- ainfo.double_stop2 = accelOuts[i].stop2;
-
+
+ AccelInfo ainfo;
+ ainfo.double_offset = accelOuts[i].offset;
+ ainfo.double_stop1 = accelOuts[i].stop1;
+ ainfo.double_stop2 = accelOuts[i].stop2;
+
if (effective_i != IMPOSSIBLE_ACCEL_MASK) {
while (effective_i) {
u32 base_accel_id = findAndClearLSB_32(&effective_i);
@@ -940,47 +940,47 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask,
ainfo.single_offset = precalc.single_offset;
ainfo.single_stops = precalc.single_cr;
}
- }
-
- buildAccelAux(ainfo, &aux);
-
- // FIXME: We may want a faster way to find AccelAux structures that
- // we've already built before.
- auto it = find_if(auxvec.begin(), auxvec.end(), AccelAuxCmp(aux));
- if (it == auxvec.end()) {
- accelTable[i] = verify_u8(auxvec.size());
- auxvec.push_back(aux);
- } else {
- accelTable[i] = verify_u8(it - auxvec.begin());
- }
- }
-
- DEBUG_PRINTF("%zu unique accel schemes (of max %u)\n", auxvec.size(),
- accelCount);
-
- // XXX: ACCEL_NONE?
- for (const auto &as : accelStates) {
- NFAVertex v = as.v;
- assert(v && args.state_ids.at(v) == as.state);
-
- accelMask.set(as.state);
- accelFriendsMask.set(as.state);
-
- if (!contains(accel.friends, v)) {
- continue;
- }
- // Add the friends of this state to the friends mask.
- const flat_set<NFAVertex> &friends = accel.friends.at(v);
- DEBUG_PRINTF("%u has %zu friends\n", as.state, friends.size());
- for (auto friend_v : friends) {
- u32 state_id = args.state_ids.at(friend_v);
- DEBUG_PRINTF("--> %u\n", state_id);
- accelFriendsMask.set(state_id);
- }
- }
-}
-
-static
+ }
+
+ buildAccelAux(ainfo, &aux);
+
+ // FIXME: We may want a faster way to find AccelAux structures that
+ // we've already built before.
+ auto it = find_if(auxvec.begin(), auxvec.end(), AccelAuxCmp(aux));
+ if (it == auxvec.end()) {
+ accelTable[i] = verify_u8(auxvec.size());
+ auxvec.push_back(aux);
+ } else {
+ accelTable[i] = verify_u8(it - auxvec.begin());
+ }
+ }
+
+ DEBUG_PRINTF("%zu unique accel schemes (of max %u)\n", auxvec.size(),
+ accelCount);
+
+ // XXX: ACCEL_NONE?
+ for (const auto &as : accelStates) {
+ NFAVertex v = as.v;
+ assert(v && args.state_ids.at(v) == as.state);
+
+ accelMask.set(as.state);
+ accelFriendsMask.set(as.state);
+
+ if (!contains(accel.friends, v)) {
+ continue;
+ }
+ // Add the friends of this state to the friends mask.
+ const flat_set<NFAVertex> &friends = accel.friends.at(v);
+ DEBUG_PRINTF("%u has %zu friends\n", as.state, friends.size());
+ for (auto friend_v : friends) {
+ u32 state_id = args.state_ids.at(friend_v);
+ DEBUG_PRINTF("--> %u\n", state_id);
+ accelFriendsMask.set(state_id);
+ }
+ }
+}
+
+static
u32 addSquashMask(const build_info &args, const NFAVertex &v,
vector<NFAStateSet> &squash) {
auto sit = args.reportSquashMap.find(v);
@@ -1049,7 +1049,7 @@ void buildAcceptsList(const build_info &args, ReportListCache &reports_cache,
sort(begin(verts), end(verts), cmp_state_id);
- const NGHolder &h = args.h;
+ const NGHolder &h = args.h;
for (const auto &v : verts) {
DEBUG_PRINTF("state=%u, reports: [%s]\n", args.state_ids.at(v),
as_string_list(h[v].reports).c_str());
@@ -1067,7 +1067,7 @@ void buildAcceptsList(const build_info &args, ReportListCache &reports_cache,
accepts.push_back(move(a));
}
}
-
+
static
void buildAccepts(const build_info &args, ReportListCache &reports_cache,
NFAStateSet &acceptMask, NFAStateSet &acceptEodMask,
@@ -1075,463 +1075,463 @@ void buildAccepts(const build_info &args, ReportListCache &reports_cache,
vector<ReportID> &reports, vector<NFAStateSet> &squash) {
const NGHolder &h = args.h;
- acceptMask.resize(args.num_states);
- acceptEodMask.resize(args.num_states);
-
+ acceptMask.resize(args.num_states);
+ acceptEodMask.resize(args.num_states);
+
vector<NFAVertex> verts_accept, verts_accept_eod;
- for (auto v : vertices_range(h)) {
- u32 state_id = args.state_ids.at(v);
-
- if (state_id == NO_STATE || !is_match_vertex(v, h)) {
- continue;
- }
-
- if (edge(v, h.accept, h).second) {
- acceptMask.set(state_id);
+ for (auto v : vertices_range(h)) {
+ u32 state_id = args.state_ids.at(v);
+
+ if (state_id == NO_STATE || !is_match_vertex(v, h)) {
+ continue;
+ }
+
+ if (edge(v, h.accept, h).second) {
+ acceptMask.set(state_id);
verts_accept.push_back(v);
- } else {
- assert(edge(v, h.acceptEod, h).second);
- acceptEodMask.set(state_id);
+ } else {
+ assert(edge(v, h.acceptEod, h).second);
+ acceptEodMask.set(state_id);
verts_accept_eod.push_back(v);
- }
+ }
}
-
+
buildAcceptsList(args, reports_cache, verts_accept, accepts, reports,
squash);
buildAcceptsList(args, reports_cache, verts_accept_eod, acceptsEod, reports,
squash);
-}
-
-static
-void buildTopMasks(const build_info &args, vector<NFAStateSet> &topMasks) {
- if (args.tops.empty()) {
- return; // No tops, probably an outfix NFA.
- }
-
- u32 numMasks = args.tops.rbegin()->first + 1; // max mask index
- DEBUG_PRINTF("we have %u top masks\n", numMasks);
-
- topMasks.assign(numMasks, NFAStateSet(args.num_states)); // all zeroes
-
- for (const auto &m : args.tops) {
- u32 mask_idx = m.first;
+}
+
+static
+void buildTopMasks(const build_info &args, vector<NFAStateSet> &topMasks) {
+ if (args.tops.empty()) {
+ return; // No tops, probably an outfix NFA.
+ }
+
+ u32 numMasks = args.tops.rbegin()->first + 1; // max mask index
+ DEBUG_PRINTF("we have %u top masks\n", numMasks);
+
+ topMasks.assign(numMasks, NFAStateSet(args.num_states)); // all zeroes
+
+ for (const auto &m : args.tops) {
+ u32 mask_idx = m.first;
for (NFAVertex v : m.second) {
u32 state_id = args.state_ids.at(v);
DEBUG_PRINTF("state %u is in top mask %u\n", state_id, mask_idx);
-
+
assert(mask_idx < numMasks);
assert(state_id != NO_STATE);
-
+
topMasks[mask_idx].set(state_id);
}
- }
-}
-
-static
-u32 uncompressedStateSize(u32 num_states) {
- // Number of bytes required to store all our states.
- return ROUNDUP_N(num_states, 8)/8;
-}
-
-static
-u32 compressedStateSize(const NGHolder &h, const NFAStateSet &maskedStates,
+ }
+}
+
+static
+u32 uncompressedStateSize(u32 num_states) {
+ // Number of bytes required to store all our states.
+ return ROUNDUP_N(num_states, 8)/8;
+}
+
+static
+u32 compressedStateSize(const NGHolder &h, const NFAStateSet &maskedStates,
const unordered_map<NFAVertex, u32> &state_ids) {
- // Shrink state requirement to enough to fit the compressed largest reach.
- vector<u32> allreach(N_CHARS, 0);
-
- for (auto v : vertices_range(h)) {
- u32 i = state_ids.at(v);
- if (i == NO_STATE || maskedStates.test(i)) {
- continue;
- }
- const CharReach &cr = h[v].char_reach;
- for (size_t j = cr.find_first(); j != cr.npos; j = cr.find_next(j)) {
- allreach[j]++; // state 'i' can reach character 'j'.
- }
- }
-
- u32 maxreach = *max_element(allreach.begin(), allreach.end());
- DEBUG_PRINTF("max reach is %u\n", maxreach);
- return (maxreach + 7) / 8;
-}
-
-static
-bool hasSquashableInitDs(const build_info &args) {
- const NGHolder &h = args.h;
-
- if (args.squashMap.empty()) {
- DEBUG_PRINTF("squash map is empty\n");
- return false;
- }
-
- NFAStateSet initDs(args.num_states);
- u32 sds_state = args.state_ids.at(h.startDs);
- if (sds_state == NO_STATE) {
- DEBUG_PRINTF("no states in initds\n");
- return false;
- }
-
- initDs.set(sds_state);
-
- /* TODO: simplify */
-
- // Check normal squash map.
- for (const auto &m : args.squashMap) {
- DEBUG_PRINTF("checking squash mask for state %u\n",
- args.state_ids.at(m.first));
- NFAStateSet squashed = ~(m.second); // flip mask
- assert(squashed.size() == initDs.size());
- if (squashed.intersects(initDs)) {
- DEBUG_PRINTF("state %u squashes initds states\n",
- args.state_ids.at(m.first));
- return true;
- }
- }
-
- // Check report squash map.
- for (const auto &m : args.reportSquashMap) {
- DEBUG_PRINTF("checking report squash mask for state %u\n",
- args.state_ids.at(m.first));
- NFAStateSet squashed = ~(m.second); // flip mask
- assert(squashed.size() == initDs.size());
- if (squashed.intersects(initDs)) {
- DEBUG_PRINTF("state %u squashes initds states\n",
- args.state_ids.at(m.first));
- return true;
- }
- }
-
- return false;
-}
-
-static
-bool hasInitDsStates(const NGHolder &h,
+ // Shrink state requirement to enough to fit the compressed largest reach.
+ vector<u32> allreach(N_CHARS, 0);
+
+ for (auto v : vertices_range(h)) {
+ u32 i = state_ids.at(v);
+ if (i == NO_STATE || maskedStates.test(i)) {
+ continue;
+ }
+ const CharReach &cr = h[v].char_reach;
+ for (size_t j = cr.find_first(); j != cr.npos; j = cr.find_next(j)) {
+ allreach[j]++; // state 'i' can reach character 'j'.
+ }
+ }
+
+ u32 maxreach = *max_element(allreach.begin(), allreach.end());
+ DEBUG_PRINTF("max reach is %u\n", maxreach);
+ return (maxreach + 7) / 8;
+}
+
+static
+bool hasSquashableInitDs(const build_info &args) {
+ const NGHolder &h = args.h;
+
+ if (args.squashMap.empty()) {
+ DEBUG_PRINTF("squash map is empty\n");
+ return false;
+ }
+
+ NFAStateSet initDs(args.num_states);
+ u32 sds_state = args.state_ids.at(h.startDs);
+ if (sds_state == NO_STATE) {
+ DEBUG_PRINTF("no states in initds\n");
+ return false;
+ }
+
+ initDs.set(sds_state);
+
+ /* TODO: simplify */
+
+ // Check normal squash map.
+ for (const auto &m : args.squashMap) {
+ DEBUG_PRINTF("checking squash mask for state %u\n",
+ args.state_ids.at(m.first));
+ NFAStateSet squashed = ~(m.second); // flip mask
+ assert(squashed.size() == initDs.size());
+ if (squashed.intersects(initDs)) {
+ DEBUG_PRINTF("state %u squashes initds states\n",
+ args.state_ids.at(m.first));
+ return true;
+ }
+ }
+
+ // Check report squash map.
+ for (const auto &m : args.reportSquashMap) {
+ DEBUG_PRINTF("checking report squash mask for state %u\n",
+ args.state_ids.at(m.first));
+ NFAStateSet squashed = ~(m.second); // flip mask
+ assert(squashed.size() == initDs.size());
+ if (squashed.intersects(initDs)) {
+ DEBUG_PRINTF("state %u squashes initds states\n",
+ args.state_ids.at(m.first));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static
+bool hasInitDsStates(const NGHolder &h,
const unordered_map<NFAVertex, u32> &state_ids) {
- if (state_ids.at(h.startDs) != NO_STATE) {
- return true;
- }
-
- if (is_triggered(h) && state_ids.at(h.start) != NO_STATE) {
- return true;
- }
-
- return false;
-}
-
-static
-void findMaskedCompressionStates(const build_info &args,
- NFAStateSet &maskedStates) {
- const NGHolder &h = args.h;
- if (!generates_callbacks(h)) {
- // Rose leftfixes can mask out initds, which is worth doing if it will
- // stay on forever (i.e. it's not squashable).
- u32 sds_i = args.state_ids.at(h.startDs);
- if (sds_i != NO_STATE && !hasSquashableInitDs(args)) {
- maskedStates.set(sds_i);
- DEBUG_PRINTF("masking out initds state\n");
- }
- }
-
- // Suffixes and outfixes can mask out leaf states, which should all be
- // accepts. Right now we can only do this when there is nothing in initDs,
- // as we switch that on unconditionally in the expand call.
+ if (state_ids.at(h.startDs) != NO_STATE) {
+ return true;
+ }
+
+ if (is_triggered(h) && state_ids.at(h.start) != NO_STATE) {
+ return true;
+ }
+
+ return false;
+}
+
+static
+void findMaskedCompressionStates(const build_info &args,
+ NFAStateSet &maskedStates) {
+ const NGHolder &h = args.h;
+ if (!generates_callbacks(h)) {
+ // Rose leftfixes can mask out initds, which is worth doing if it will
+ // stay on forever (i.e. it's not squashable).
+ u32 sds_i = args.state_ids.at(h.startDs);
+ if (sds_i != NO_STATE && !hasSquashableInitDs(args)) {
+ maskedStates.set(sds_i);
+ DEBUG_PRINTF("masking out initds state\n");
+ }
+ }
+
+ // Suffixes and outfixes can mask out leaf states, which should all be
+ // accepts. Right now we can only do this when there is nothing in initDs,
+ // as we switch that on unconditionally in the expand call.
if (!inspects_states_for_accepts(h)
&& !hasInitDsStates(h, args.state_ids)) {
- NFAStateSet nonleaf(args.num_states);
- for (const auto &e : edges_range(h)) {
- u32 from = args.state_ids.at(source(e, h));
- u32 to = args.state_ids.at(target(e, h));
- if (from == NO_STATE) {
- continue;
- }
-
- // We cannot mask out EOD accepts, as they have to perform an
- // action after they're switched on that may be delayed until the
- // next stream write.
- if (to == NO_STATE && target(e, h) != h.acceptEod) {
- continue;
- }
-
- nonleaf.set(from);
- }
-
- for (u32 i = 0; i < args.num_states; i++) {
- if (!nonleaf.test(i)) {
- maskedStates.set(i);
- }
- }
-
- DEBUG_PRINTF("masking out %zu leaf states\n", maskedStates.count());
- }
-}
-
-/** \brief Sets a given flag in the LimEx structure. */
-template<class implNFA_t>
-static
-void setLimexFlag(implNFA_t *limex, u32 flag) {
- assert(flag);
- assert((flag & (flag - 1)) == 0);
- limex->flags |= flag;
-}
-
-/** \brief Sets a given flag in the NFA structure */
-static
-void setNfaFlag(NFA *nfa, u32 flag) {
- assert(flag);
- assert((flag & (flag - 1)) == 0);
- nfa->flags |= flag;
-}
-
-// Some of our NFA types support compressing the state down if we're not using
-// all of it.
-template<class implNFA_t>
-static
-void findStateSize(const build_info &args, implNFA_t *limex) {
- // Nothing is masked off by default.
- maskFill(limex->compressMask, 0xff);
-
- u32 sizeUncompressed = uncompressedStateSize(args.num_states);
- assert(sizeUncompressed <= sizeof(limex->compressMask));
-
- if (!args.stateCompression) {
- DEBUG_PRINTF("compression disabled, uncompressed state size %u\n",
- sizeUncompressed);
- limex->stateSize = sizeUncompressed;
- return;
- }
-
- NFAStateSet maskedStates(args.num_states);
- findMaskedCompressionStates(args, maskedStates);
-
- u32 sizeCompressed = compressedStateSize(args.h, maskedStates, args.state_ids);
- assert(sizeCompressed <= sizeof(limex->compressMask));
-
- DEBUG_PRINTF("compressed=%u, uncompressed=%u\n", sizeCompressed,
- sizeUncompressed);
-
- // Must be at least a 10% saving.
- if ((sizeCompressed * 100) <= (sizeUncompressed * 90)) {
- DEBUG_PRINTF("using compression, state size %u\n",
- sizeCompressed);
- setLimexFlag(limex, LIMEX_FLAG_COMPRESS_STATE);
- limex->stateSize = sizeCompressed;
-
- if (maskedStates.any()) {
- DEBUG_PRINTF("masking %zu states\n", maskedStates.count());
- setLimexFlag(limex, LIMEX_FLAG_COMPRESS_MASKED);
- for (size_t i = maskedStates.find_first(); i != NFAStateSet::npos;
- i = maskedStates.find_next(i)) {
- maskClearBit(limex->compressMask, i);
- }
- }
- } else {
- DEBUG_PRINTF("not using compression, state size %u\n",
- sizeUncompressed);
- limex->stateSize = sizeUncompressed;
- }
-}
-
-/*
- * LimEx NFA: code for building NFAs in the Limited+Exceptional model. Most
- * transitions are limited, with transitions outside the constraints of our
- * shifts taken care of as 'exceptions'. Exceptions are also used to handle
- * accepts and squash behaviour.
- */
-
-/**
- * \brief Prototype exception class.
- *
- * Used to build up the map of exceptions before being converted to real
- * NFAException32 (etc) structures.
- */
-struct ExceptionProto {
- u32 reports_index = MO_INVALID_IDX;
- NFAStateSet succ_states;
- NFAStateSet squash_states;
- u32 repeat_index = MO_INVALID_IDX;
- enum LimExTrigger trigger = LIMEX_TRIGGER_NONE;
- enum LimExSquash squash = LIMEX_SQUASH_NONE;
-
- explicit ExceptionProto(u32 num_states)
- : succ_states(num_states), squash_states(num_states) {
- // Squash states are represented as the set of states to leave on,
- // so we start with all-ones.
- squash_states.set();
- }
-
- bool operator<(const ExceptionProto &b) const {
- const ExceptionProto &a = *this;
-
- ORDER_CHECK(reports_index);
- ORDER_CHECK(repeat_index);
- ORDER_CHECK(trigger);
- ORDER_CHECK(squash);
- ORDER_CHECK(succ_states);
- ORDER_CHECK(squash_states);
-
- return false;
- }
-};
-
-static
+ NFAStateSet nonleaf(args.num_states);
+ for (const auto &e : edges_range(h)) {
+ u32 from = args.state_ids.at(source(e, h));
+ u32 to = args.state_ids.at(target(e, h));
+ if (from == NO_STATE) {
+ continue;
+ }
+
+ // We cannot mask out EOD accepts, as they have to perform an
+ // action after they're switched on that may be delayed until the
+ // next stream write.
+ if (to == NO_STATE && target(e, h) != h.acceptEod) {
+ continue;
+ }
+
+ nonleaf.set(from);
+ }
+
+ for (u32 i = 0; i < args.num_states; i++) {
+ if (!nonleaf.test(i)) {
+ maskedStates.set(i);
+ }
+ }
+
+ DEBUG_PRINTF("masking out %zu leaf states\n", maskedStates.count());
+ }
+}
+
+/** \brief Sets a given flag in the LimEx structure. */
+template<class implNFA_t>
+static
+void setLimexFlag(implNFA_t *limex, u32 flag) {
+ assert(flag);
+ assert((flag & (flag - 1)) == 0);
+ limex->flags |= flag;
+}
+
+/** \brief Sets a given flag in the NFA structure */
+static
+void setNfaFlag(NFA *nfa, u32 flag) {
+ assert(flag);
+ assert((flag & (flag - 1)) == 0);
+ nfa->flags |= flag;
+}
+
+// Some of our NFA types support compressing the state down if we're not using
+// all of it.
+template<class implNFA_t>
+static
+void findStateSize(const build_info &args, implNFA_t *limex) {
+ // Nothing is masked off by default.
+ maskFill(limex->compressMask, 0xff);
+
+ u32 sizeUncompressed = uncompressedStateSize(args.num_states);
+ assert(sizeUncompressed <= sizeof(limex->compressMask));
+
+ if (!args.stateCompression) {
+ DEBUG_PRINTF("compression disabled, uncompressed state size %u\n",
+ sizeUncompressed);
+ limex->stateSize = sizeUncompressed;
+ return;
+ }
+
+ NFAStateSet maskedStates(args.num_states);
+ findMaskedCompressionStates(args, maskedStates);
+
+ u32 sizeCompressed = compressedStateSize(args.h, maskedStates, args.state_ids);
+ assert(sizeCompressed <= sizeof(limex->compressMask));
+
+ DEBUG_PRINTF("compressed=%u, uncompressed=%u\n", sizeCompressed,
+ sizeUncompressed);
+
+ // Must be at least a 10% saving.
+ if ((sizeCompressed * 100) <= (sizeUncompressed * 90)) {
+ DEBUG_PRINTF("using compression, state size %u\n",
+ sizeCompressed);
+ setLimexFlag(limex, LIMEX_FLAG_COMPRESS_STATE);
+ limex->stateSize = sizeCompressed;
+
+ if (maskedStates.any()) {
+ DEBUG_PRINTF("masking %zu states\n", maskedStates.count());
+ setLimexFlag(limex, LIMEX_FLAG_COMPRESS_MASKED);
+ for (size_t i = maskedStates.find_first(); i != NFAStateSet::npos;
+ i = maskedStates.find_next(i)) {
+ maskClearBit(limex->compressMask, i);
+ }
+ }
+ } else {
+ DEBUG_PRINTF("not using compression, state size %u\n",
+ sizeUncompressed);
+ limex->stateSize = sizeUncompressed;
+ }
+}
+
+/*
+ * LimEx NFA: code for building NFAs in the Limited+Exceptional model. Most
+ * transitions are limited, with transitions outside the constraints of our
+ * shifts taken care of as 'exceptions'. Exceptions are also used to handle
+ * accepts and squash behaviour.
+ */
+
+/**
+ * \brief Prototype exception class.
+ *
+ * Used to build up the map of exceptions before being converted to real
+ * NFAException32 (etc) structures.
+ */
+struct ExceptionProto {
+ u32 reports_index = MO_INVALID_IDX;
+ NFAStateSet succ_states;
+ NFAStateSet squash_states;
+ u32 repeat_index = MO_INVALID_IDX;
+ enum LimExTrigger trigger = LIMEX_TRIGGER_NONE;
+ enum LimExSquash squash = LIMEX_SQUASH_NONE;
+
+ explicit ExceptionProto(u32 num_states)
+ : succ_states(num_states), squash_states(num_states) {
+ // Squash states are represented as the set of states to leave on,
+ // so we start with all-ones.
+ squash_states.set();
+ }
+
+ bool operator<(const ExceptionProto &b) const {
+ const ExceptionProto &a = *this;
+
+ ORDER_CHECK(reports_index);
+ ORDER_CHECK(repeat_index);
+ ORDER_CHECK(trigger);
+ ORDER_CHECK(squash);
+ ORDER_CHECK(succ_states);
+ ORDER_CHECK(squash_states);
+
+ return false;
+ }
+};
+
+static
u32 buildExceptionMap(const build_info &args, ReportListCache &reports_cache,
const unordered_set<NFAEdge> &exceptional,
map<ExceptionProto, vector<u32>> &exceptionMap,
vector<ReportID> &reportList) {
- const NGHolder &h = args.h;
- const u32 num_states = args.num_states;
+ const NGHolder &h = args.h;
+ const u32 num_states = args.num_states;
u32 exceptionCount = 0;
-
+
unordered_map<NFAVertex, u32> pos_trigger;
unordered_map<NFAVertex, u32> tug_trigger;
-
- for (u32 i = 0; i < args.repeats.size(); i++) {
- const BoundedRepeatData &br = args.repeats[i];
- assert(!contains(pos_trigger, br.pos_trigger));
- pos_trigger[br.pos_trigger] = i;
- for (auto v : br.tug_triggers) {
- assert(!contains(tug_trigger, v));
- tug_trigger[v] = i;
- }
- }
-
- for (auto v : vertices_range(h)) {
- const u32 i = args.state_ids.at(v);
-
- if (i == NO_STATE) {
- continue;
- }
-
- bool addMe = false;
- ExceptionProto e(num_states);
-
- if (edge(v, h.accept, h).second && generates_callbacks(h)) {
- /* if nfa is never used to produce callbacks, no need to mark
- * states as exceptional */
- const auto &reports = h[v].reports;
-
- DEBUG_PRINTF("state %u is exceptional due to accept "
- "(%zu reports)\n", i, reports.size());
-
+
+ for (u32 i = 0; i < args.repeats.size(); i++) {
+ const BoundedRepeatData &br = args.repeats[i];
+ assert(!contains(pos_trigger, br.pos_trigger));
+ pos_trigger[br.pos_trigger] = i;
+ for (auto v : br.tug_triggers) {
+ assert(!contains(tug_trigger, v));
+ tug_trigger[v] = i;
+ }
+ }
+
+ for (auto v : vertices_range(h)) {
+ const u32 i = args.state_ids.at(v);
+
+ if (i == NO_STATE) {
+ continue;
+ }
+
+ bool addMe = false;
+ ExceptionProto e(num_states);
+
+ if (edge(v, h.accept, h).second && generates_callbacks(h)) {
+ /* if nfa is never used to produce callbacks, no need to mark
+ * states as exceptional */
+ const auto &reports = h[v].reports;
+
+ DEBUG_PRINTF("state %u is exceptional due to accept "
+ "(%zu reports)\n", i, reports.size());
+
if (reports.empty()) {
e.reports_index = MO_INVALID_IDX;
} else {
e.reports_index =
addReports(reports, reportList, reports_cache);
}
-
- // We may be applying a report squash too.
- auto mi = args.reportSquashMap.find(v);
- if (mi != args.reportSquashMap.end()) {
- DEBUG_PRINTF("report squashes states\n");
- assert(e.squash_states.size() == mi->second.size());
- e.squash_states = mi->second;
- e.squash = LIMEX_SQUASH_REPORT;
- }
-
- addMe = true;
- }
-
- if (contains(pos_trigger, v)) {
- u32 repeat_index = pos_trigger[v];
- assert(e.trigger == LIMEX_TRIGGER_NONE);
- e.trigger = LIMEX_TRIGGER_POS;
- e.repeat_index = repeat_index;
- DEBUG_PRINTF("state %u has pos trigger for repeat %u\n", i,
- repeat_index);
- addMe = true;
- }
-
- if (contains(tug_trigger, v)) {
- u32 repeat_index = tug_trigger[v];
- assert(e.trigger == LIMEX_TRIGGER_NONE);
- e.trigger = LIMEX_TRIGGER_TUG;
- e.repeat_index = repeat_index;
-
- // TUG triggers can squash the preceding cyclic state.
- u32 cyclic = args.state_ids.at(args.repeats[repeat_index].cyclic);
- e.squash_states.reset(cyclic);
- e.squash = LIMEX_SQUASH_TUG;
- DEBUG_PRINTF("state %u has tug trigger for repeat %u, can squash "
- "state %u\n", i, repeat_index, cyclic);
- addMe = true;
- }
-
- // are we a non-limited transition?
- for (const auto &oe : out_edges_range(v, h)) {
- if (contains(exceptional, oe)) {
- NFAVertex w = target(oe, h);
- u32 w_idx = args.state_ids.at(w);
- assert(w_idx != NO_STATE);
- e.succ_states.set(w_idx);
- DEBUG_PRINTF("exceptional transition %u->%u\n", i, w_idx);
- addMe = true;
- }
- }
-
- // do we lead SOLELY to a squasher state? (we use the successors as
- // a proxy for the out-edge here, so there must be only one for us
- // to do this safely)
- /* The above comment is IMHO bogus and would result in all squashing
- * being disabled around stars */
- if (e.trigger != LIMEX_TRIGGER_TUG) {
- for (auto w : adjacent_vertices_range(v, h)) {
- if (w == v) {
- continue;
- }
- u32 j = args.state_ids.at(w);
- if (j == NO_STATE) {
- continue;
- }
- DEBUG_PRINTF("we are checking if succ %u is a squasher\n", j);
- auto mi = args.squashMap.find(w);
- if (mi != args.squashMap.end()) {
- DEBUG_PRINTF("squasher edge (%u, %u)\n", i, j);
- DEBUG_PRINTF("e.squash_states.size() == %zu, "
- "mi->second.size() = %zu\n",
- e.squash_states.size(), mi->second.size());
- assert(e.squash_states.size() == mi->second.size());
- e.squash_states = mi->second;
-
- // NOTE: this might be being combined with the report
- // squashing above.
-
- e.squash = LIMEX_SQUASH_CYCLIC;
- DEBUG_PRINTF("squashing succ %u (turns off %zu states)\n",
- j, mi->second.size() - mi->second.count());
- addMe = true;
- }
- }
- }
-
- if (addMe) {
- // Add 'e' if it isn't in the map, and push state i on to its list
- // of states.
- assert(e.succ_states.size() == num_states);
- assert(e.squash_states.size() == num_states);
- exceptionMap[e].push_back(i);
+
+ // We may be applying a report squash too.
+ auto mi = args.reportSquashMap.find(v);
+ if (mi != args.reportSquashMap.end()) {
+ DEBUG_PRINTF("report squashes states\n");
+ assert(e.squash_states.size() == mi->second.size());
+ e.squash_states = mi->second;
+ e.squash = LIMEX_SQUASH_REPORT;
+ }
+
+ addMe = true;
+ }
+
+ if (contains(pos_trigger, v)) {
+ u32 repeat_index = pos_trigger[v];
+ assert(e.trigger == LIMEX_TRIGGER_NONE);
+ e.trigger = LIMEX_TRIGGER_POS;
+ e.repeat_index = repeat_index;
+ DEBUG_PRINTF("state %u has pos trigger for repeat %u\n", i,
+ repeat_index);
+ addMe = true;
+ }
+
+ if (contains(tug_trigger, v)) {
+ u32 repeat_index = tug_trigger[v];
+ assert(e.trigger == LIMEX_TRIGGER_NONE);
+ e.trigger = LIMEX_TRIGGER_TUG;
+ e.repeat_index = repeat_index;
+
+ // TUG triggers can squash the preceding cyclic state.
+ u32 cyclic = args.state_ids.at(args.repeats[repeat_index].cyclic);
+ e.squash_states.reset(cyclic);
+ e.squash = LIMEX_SQUASH_TUG;
+ DEBUG_PRINTF("state %u has tug trigger for repeat %u, can squash "
+ "state %u\n", i, repeat_index, cyclic);
+ addMe = true;
+ }
+
+ // are we a non-limited transition?
+ for (const auto &oe : out_edges_range(v, h)) {
+ if (contains(exceptional, oe)) {
+ NFAVertex w = target(oe, h);
+ u32 w_idx = args.state_ids.at(w);
+ assert(w_idx != NO_STATE);
+ e.succ_states.set(w_idx);
+ DEBUG_PRINTF("exceptional transition %u->%u\n", i, w_idx);
+ addMe = true;
+ }
+ }
+
+ // do we lead SOLELY to a squasher state? (we use the successors as
+ // a proxy for the out-edge here, so there must be only one for us
+ // to do this safely)
+ /* The above comment is IMHO bogus and would result in all squashing
+ * being disabled around stars */
+ if (e.trigger != LIMEX_TRIGGER_TUG) {
+ for (auto w : adjacent_vertices_range(v, h)) {
+ if (w == v) {
+ continue;
+ }
+ u32 j = args.state_ids.at(w);
+ if (j == NO_STATE) {
+ continue;
+ }
+ DEBUG_PRINTF("we are checking if succ %u is a squasher\n", j);
+ auto mi = args.squashMap.find(w);
+ if (mi != args.squashMap.end()) {
+ DEBUG_PRINTF("squasher edge (%u, %u)\n", i, j);
+ DEBUG_PRINTF("e.squash_states.size() == %zu, "
+ "mi->second.size() = %zu\n",
+ e.squash_states.size(), mi->second.size());
+ assert(e.squash_states.size() == mi->second.size());
+ e.squash_states = mi->second;
+
+ // NOTE: this might be being combined with the report
+ // squashing above.
+
+ e.squash = LIMEX_SQUASH_CYCLIC;
+ DEBUG_PRINTF("squashing succ %u (turns off %zu states)\n",
+ j, mi->second.size() - mi->second.count());
+ addMe = true;
+ }
+ }
+ }
+
+ if (addMe) {
+ // Add 'e' if it isn't in the map, and push state i on to its list
+ // of states.
+ assert(e.succ_states.size() == num_states);
+ assert(e.squash_states.size() == num_states);
+ exceptionMap[e].push_back(i);
exceptionCount++;
- }
- }
-
+ }
+ }
+
DEBUG_PRINTF("%u exceptions found (%zu unique)\n", exceptionCount,
exceptionMap.size());
return exceptionCount;
-}
-
-static
-u32 depth_to_u32(const depth &d) {
- assert(d.is_reachable());
- if (d.is_infinite()) {
- return REPEAT_INF;
- }
-
- u32 d_val = d;
- assert(d_val < REPEAT_INF);
- return d_val;
-}
-
+}
+
+static
+u32 depth_to_u32(const depth &d) {
+ assert(d.is_reachable());
+ if (d.is_infinite()) {
+ return REPEAT_INF;
+ }
+
+ u32 d_val = d;
+ assert(d_val < REPEAT_INF);
+ return d_val;
+}
+
static
bool isExceptionalTransition(u32 from, u32 to, const build_info &args,
u32 maxShift) {
@@ -1690,196 +1690,196 @@ bool cannotDie(const build_info &args) {
});
}
-template<NFAEngineType dtype>
-struct Factory {
- // typedefs for readability, for types derived from traits
- typedef typename NFATraits<dtype>::exception_t exception_t;
- typedef typename NFATraits<dtype>::implNFA_t implNFA_t;
- typedef typename NFATraits<dtype>::tableRow_t tableRow_t;
-
- static
- void allocState(NFA *nfa, u32 repeatscratchStateSize,
- u32 repeatStreamState) {
- implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa);
-
- // LimEx NFAs now store the following in state:
- // 1. state bitvector (always present)
- // 2. space associated with repeats
- // This function just needs to size these correctly.
-
- u32 stateSize = limex->stateSize;
-
- DEBUG_PRINTF("bitvector=%zu/%u, repeat full=%u, stream=%u\n",
- sizeof(limex->init), stateSize, repeatscratchStateSize,
- repeatStreamState);
-
+template<NFAEngineType dtype>
+struct Factory {
+ // typedefs for readability, for types derived from traits
+ typedef typename NFATraits<dtype>::exception_t exception_t;
+ typedef typename NFATraits<dtype>::implNFA_t implNFA_t;
+ typedef typename NFATraits<dtype>::tableRow_t tableRow_t;
+
+ static
+ void allocState(NFA *nfa, u32 repeatscratchStateSize,
+ u32 repeatStreamState) {
+ implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa);
+
+ // LimEx NFAs now store the following in state:
+ // 1. state bitvector (always present)
+ // 2. space associated with repeats
+ // This function just needs to size these correctly.
+
+ u32 stateSize = limex->stateSize;
+
+ DEBUG_PRINTF("bitvector=%zu/%u, repeat full=%u, stream=%u\n",
+ sizeof(limex->init), stateSize, repeatscratchStateSize,
+ repeatStreamState);
+
size_t scratchStateSize = NFATraits<dtype>::scratch_state_size;
- if (repeatscratchStateSize) {
- scratchStateSize
- = ROUNDUP_N(scratchStateSize, alignof(RepeatControl));
- scratchStateSize += repeatscratchStateSize;
- }
- size_t streamStateSize = stateSize + repeatStreamState;
-
- nfa->scratchStateSize = verify_u32(scratchStateSize);
- nfa->streamStateSize = verify_u32(streamStateSize);
- }
-
- static
- size_t repeatAllocSize(const BoundedRepeatData &br, u32 *tableOffset,
- u32 *tugMaskOffset) {
- size_t len = sizeof(NFARepeatInfo) + sizeof(RepeatInfo);
-
- // sparse lookup table.
- if (br.type == REPEAT_SPARSE_OPTIMAL_P) {
- len = ROUNDUP_N(len, alignof(u64a));
- *tableOffset = verify_u32(len);
- len += sizeof(u64a) * (br.repeatMax + 1);
- } else {
- *tableOffset = 0;
- }
-
- // tug mask.
- len = ROUNDUP_N(len, alignof(tableRow_t));
- *tugMaskOffset = verify_u32(len);
- len += sizeof(tableRow_t);
-
- // to simplify layout.
- len = ROUNDUP_CL(len);
-
- return len;
- }
-
- static
- void buildRepeats(const build_info &args,
+ if (repeatscratchStateSize) {
+ scratchStateSize
+ = ROUNDUP_N(scratchStateSize, alignof(RepeatControl));
+ scratchStateSize += repeatscratchStateSize;
+ }
+ size_t streamStateSize = stateSize + repeatStreamState;
+
+ nfa->scratchStateSize = verify_u32(scratchStateSize);
+ nfa->streamStateSize = verify_u32(streamStateSize);
+ }
+
+ static
+ size_t repeatAllocSize(const BoundedRepeatData &br, u32 *tableOffset,
+ u32 *tugMaskOffset) {
+ size_t len = sizeof(NFARepeatInfo) + sizeof(RepeatInfo);
+
+ // sparse lookup table.
+ if (br.type == REPEAT_SPARSE_OPTIMAL_P) {
+ len = ROUNDUP_N(len, alignof(u64a));
+ *tableOffset = verify_u32(len);
+ len += sizeof(u64a) * (br.repeatMax + 1);
+ } else {
+ *tableOffset = 0;
+ }
+
+ // tug mask.
+ len = ROUNDUP_N(len, alignof(tableRow_t));
+ *tugMaskOffset = verify_u32(len);
+ len += sizeof(tableRow_t);
+
+ // to simplify layout.
+ len = ROUNDUP_CL(len);
+
+ return len;
+ }
+
+ static
+ void buildRepeats(const build_info &args,
vector<bytecode_ptr<NFARepeatInfo>> &out,
u32 *scratchStateSize, u32 *streamState) {
- out.reserve(args.repeats.size());
-
- u32 repeat_idx = 0;
- for (auto it = args.repeats.begin(), ite = args.repeats.end();
- it != ite; ++it, ++repeat_idx) {
- const BoundedRepeatData &br = *it;
- assert(args.state_ids.at(br.cyclic) != NO_STATE);
-
- u32 tableOffset, tugMaskOffset;
- size_t len = repeatAllocSize(br, &tableOffset, &tugMaskOffset);
+ out.reserve(args.repeats.size());
+
+ u32 repeat_idx = 0;
+ for (auto it = args.repeats.begin(), ite = args.repeats.end();
+ it != ite; ++it, ++repeat_idx) {
+ const BoundedRepeatData &br = *it;
+ assert(args.state_ids.at(br.cyclic) != NO_STATE);
+
+ u32 tableOffset, tugMaskOffset;
+ size_t len = repeatAllocSize(br, &tableOffset, &tugMaskOffset);
auto info = make_zeroed_bytecode_ptr<NFARepeatInfo>(len);
- char *info_ptr = (char *)info.get();
-
- // Collect state space info.
- RepeatStateInfo rsi(br.type, br.repeatMin, br.repeatMax, br.minPeriod);
- u32 streamStateLen = rsi.packedCtrlSize + rsi.stateSize;
-
- // Fill the NFARepeatInfo structure.
- info->cyclicState = args.state_ids.at(br.cyclic);
- info->ctrlIndex = repeat_idx;
- info->packedCtrlOffset = *streamState;
- info->stateOffset = *streamState + rsi.packedCtrlSize;
- info->stateSize = streamStateLen;
- info->tugMaskOffset = tugMaskOffset;
-
- // Fill the RepeatInfo structure.
- RepeatInfo *repeat =
- (RepeatInfo *)(info_ptr + sizeof(NFARepeatInfo));
- repeat->type = br.type;
- repeat->repeatMin = depth_to_u32(br.repeatMin);
- repeat->repeatMax = depth_to_u32(br.repeatMax);
- repeat->horizon = rsi.horizon;
- repeat->packedCtrlSize = rsi.packedCtrlSize;
- repeat->stateSize = rsi.stateSize;
- copy_bytes(repeat->packedFieldSizes, rsi.packedFieldSizes);
- repeat->patchCount = rsi.patchCount;
- repeat->patchSize = rsi.patchSize;
- repeat->encodingSize = rsi.encodingSize;
- repeat->patchesOffset = rsi.patchesOffset;
-
- u32 repeat_len = sizeof(RepeatInfo);
- if (br.type == REPEAT_SPARSE_OPTIMAL_P) {
- repeat_len += sizeof(u64a) * (rsi.patchSize + 1);
- }
- repeat->length = repeat_len;
-
- // Copy in the sparse lookup table.
- if (br.type == REPEAT_SPARSE_OPTIMAL_P) {
- assert(!rsi.table.empty());
- copy_bytes(info_ptr + tableOffset, rsi.table);
- }
-
- // Fill the tug mask.
- tableRow_t *tugMask = (tableRow_t *)(info_ptr + tugMaskOffset);
- for (auto v : br.tug_triggers) {
- u32 state_id = args.state_ids.at(v);
- assert(state_id != NO_STATE);
- maskSetBit(*tugMask, state_id);
- }
-
- assert(streamStateLen);
- *streamState += streamStateLen;
- *scratchStateSize += sizeof(RepeatControl);
-
+ char *info_ptr = (char *)info.get();
+
+ // Collect state space info.
+ RepeatStateInfo rsi(br.type, br.repeatMin, br.repeatMax, br.minPeriod);
+ u32 streamStateLen = rsi.packedCtrlSize + rsi.stateSize;
+
+ // Fill the NFARepeatInfo structure.
+ info->cyclicState = args.state_ids.at(br.cyclic);
+ info->ctrlIndex = repeat_idx;
+ info->packedCtrlOffset = *streamState;
+ info->stateOffset = *streamState + rsi.packedCtrlSize;
+ info->stateSize = streamStateLen;
+ info->tugMaskOffset = tugMaskOffset;
+
+ // Fill the RepeatInfo structure.
+ RepeatInfo *repeat =
+ (RepeatInfo *)(info_ptr + sizeof(NFARepeatInfo));
+ repeat->type = br.type;
+ repeat->repeatMin = depth_to_u32(br.repeatMin);
+ repeat->repeatMax = depth_to_u32(br.repeatMax);
+ repeat->horizon = rsi.horizon;
+ repeat->packedCtrlSize = rsi.packedCtrlSize;
+ repeat->stateSize = rsi.stateSize;
+ copy_bytes(repeat->packedFieldSizes, rsi.packedFieldSizes);
+ repeat->patchCount = rsi.patchCount;
+ repeat->patchSize = rsi.patchSize;
+ repeat->encodingSize = rsi.encodingSize;
+ repeat->patchesOffset = rsi.patchesOffset;
+
+ u32 repeat_len = sizeof(RepeatInfo);
+ if (br.type == REPEAT_SPARSE_OPTIMAL_P) {
+ repeat_len += sizeof(u64a) * (rsi.patchSize + 1);
+ }
+ repeat->length = repeat_len;
+
+ // Copy in the sparse lookup table.
+ if (br.type == REPEAT_SPARSE_OPTIMAL_P) {
+ assert(!rsi.table.empty());
+ copy_bytes(info_ptr + tableOffset, rsi.table);
+ }
+
+ // Fill the tug mask.
+ tableRow_t *tugMask = (tableRow_t *)(info_ptr + tugMaskOffset);
+ for (auto v : br.tug_triggers) {
+ u32 state_id = args.state_ids.at(v);
+ assert(state_id != NO_STATE);
+ maskSetBit(*tugMask, state_id);
+ }
+
+ assert(streamStateLen);
+ *streamState += streamStateLen;
+ *scratchStateSize += sizeof(RepeatControl);
+
out.emplace_back(move(info));
- }
- }
-
- static
- void writeLimexMasks(const build_info &args, implNFA_t *limex) {
- const NGHolder &h = args.h;
-
- // Init masks.
- u32 s_i = args.state_ids.at(h.start);
- u32 sds_i = args.state_ids.at(h.startDs);
-
- if (s_i != NO_STATE) {
- maskSetBit(limex->init, s_i);
- if (is_triggered(h)) {
- maskSetBit(limex->initDS, s_i);
- }
- }
-
- if (sds_i != NO_STATE) {
- maskSetBit(limex->init, sds_i);
- maskSetBit(limex->initDS, sds_i);
- }
-
- // Zombie mask.
- for (auto v : args.zombies) {
- u32 state_id = args.state_ids.at(v);
- assert(state_id != NO_STATE);
- maskSetBit(limex->zombieMask, state_id);
- }
-
- // Repeat cyclic mask.
- for (const auto &br : args.repeats) {
- u32 cyclic = args.state_ids.at(br.cyclic);
- assert(cyclic != NO_STATE);
- maskSetBit(limex->repeatCyclicMask, cyclic);
- }
+ }
+ }
+
+ static
+ void writeLimexMasks(const build_info &args, implNFA_t *limex) {
+ const NGHolder &h = args.h;
+
+ // Init masks.
+ u32 s_i = args.state_ids.at(h.start);
+ u32 sds_i = args.state_ids.at(h.startDs);
+
+ if (s_i != NO_STATE) {
+ maskSetBit(limex->init, s_i);
+ if (is_triggered(h)) {
+ maskSetBit(limex->initDS, s_i);
+ }
+ }
+
+ if (sds_i != NO_STATE) {
+ maskSetBit(limex->init, sds_i);
+ maskSetBit(limex->initDS, sds_i);
+ }
+
+ // Zombie mask.
+ for (auto v : args.zombies) {
+ u32 state_id = args.state_ids.at(v);
+ assert(state_id != NO_STATE);
+ maskSetBit(limex->zombieMask, state_id);
+ }
+
+ // Repeat cyclic mask.
+ for (const auto &br : args.repeats) {
+ u32 cyclic = args.state_ids.at(br.cyclic);
+ assert(cyclic != NO_STATE);
+ maskSetBit(limex->repeatCyclicMask, cyclic);
+ }
/* also include tugs in repeat cyclic mask */
for (size_t i = args.tugs.find_first(); i != args.tugs.npos;
i = args.tugs.find_next(i)) {
maskSetBit(limex->repeatCyclicMask, i);
}
- }
-
- static
- void writeShiftMasks(const build_info &args, implNFA_t *limex) {
- const NGHolder &h = args.h;
+ }
+
+ static
+ void writeShiftMasks(const build_info &args, implNFA_t *limex) {
+ const NGHolder &h = args.h;
u32 maxShift = findMaxVarShift(args, limex->shiftCount);
u32 shiftMask = 0;
int shiftMaskIdx = 0;
-
- for (const auto &e : edges_range(h)) {
- u32 from = args.state_ids.at(source(e, h));
- u32 to = args.state_ids.at(target(e, h));
- if (from == NO_STATE || to == NO_STATE) {
- continue;
- }
-
- // We check for exceptional transitions here, as we don't want tug
- // trigger transitions emitted as limited transitions (even if they
- // could be in this model).
+
+ for (const auto &e : edges_range(h)) {
+ u32 from = args.state_ids.at(source(e, h));
+ u32 to = args.state_ids.at(target(e, h));
+ if (from == NO_STATE || to == NO_STATE) {
+ continue;
+ }
+
+ // We check for exceptional transitions here, as we don't want tug
+ // trigger transitions emitted as limited transitions (even if they
+ // could be in this model).
if (!isExceptionalTransition(from, to, args, maxShift)) {
u32 shift = to - from;
if ((shiftMask & (1UL << shift)) == 0UL) {
@@ -1893,55 +1893,55 @@ struct Factory {
break;
}
}
- }
- }
+ }
+ }
if (maxShift && limex->shiftCount > 1) {
for (u32 i = 0; i < limex->shiftCount; i++) {
assert(!isMaskZero(limex->shift[i]));
}
}
- }
-
- static
- void findExceptionalTransitions(const build_info &args,
+ }
+
+ static
+ void findExceptionalTransitions(const build_info &args,
unordered_set<NFAEdge> &exceptional,
u32 maxShift) {
- const NGHolder &h = args.h;
-
- for (const auto &e : edges_range(h)) {
- u32 from = args.state_ids.at(source(e, h));
- u32 to = args.state_ids.at(target(e, h));
- if (from == NO_STATE || to == NO_STATE) {
- continue;
- }
-
+ const NGHolder &h = args.h;
+
+ for (const auto &e : edges_range(h)) {
+ u32 from = args.state_ids.at(source(e, h));
+ u32 to = args.state_ids.at(target(e, h));
+ if (from == NO_STATE || to == NO_STATE) {
+ continue;
+ }
+
if (isExceptionalTransition(from, to, args, maxShift)) {
- exceptional.insert(e);
- }
- }
- }
-
- static
+ exceptional.insert(e);
+ }
+ }
+ }
+
+ static
void writeExceptions(const build_info &args,
const map<ExceptionProto, vector<u32>> &exceptionMap,
const vector<u32> &repeatOffsets, implNFA_t *limex,
const u32 exceptionsOffset,
const u32 reportListOffset) {
- DEBUG_PRINTF("exceptionsOffset=%u\n", exceptionsOffset);
-
- exception_t *etable = (exception_t *)((char *)limex + exceptionsOffset);
- assert(ISALIGNED(etable));
-
+ DEBUG_PRINTF("exceptionsOffset=%u\n", exceptionsOffset);
+
+ exception_t *etable = (exception_t *)((char *)limex + exceptionsOffset);
+ assert(ISALIGNED(etable));
+
map<u32, ExceptionProto> exception_by_state;
- for (const auto &m : exceptionMap) {
- const ExceptionProto &proto = m.first;
- const vector<u32> &states = m.second;
+ for (const auto &m : exceptionMap) {
+ const ExceptionProto &proto = m.first;
+ const vector<u32> &states = m.second;
for (u32 i : states) {
assert(!contains(exception_by_state, i));
exception_by_state.emplace(i, proto);
}
}
-
+
u32 ecount = 0;
for (const auto &m : exception_by_state) {
const ExceptionProto &proto = m.second;
@@ -1949,32 +1949,32 @@ struct Factory {
DEBUG_PRINTF("exception %u, triggered by state %u\n", ecount,
state_id);
- // Write the exception entry.
- exception_t &e = etable[ecount];
- maskSetBits(e.squash, proto.squash_states);
- maskSetBits(e.successors, proto.succ_states);
+ // Write the exception entry.
+ exception_t &e = etable[ecount];
+ maskSetBits(e.squash, proto.squash_states);
+ maskSetBits(e.successors, proto.succ_states);
if (proto.reports_index == MO_INVALID_IDX) {
e.reports = MO_INVALID_IDX;
} else {
e.reports = reportListOffset +
proto.reports_index * sizeof(ReportID);
}
- e.hasSquash = verify_u8(proto.squash);
- e.trigger = verify_u8(proto.trigger);
- u32 repeat_offset = proto.repeat_index == MO_INVALID_IDX
- ? MO_INVALID_IDX
- : repeatOffsets[proto.repeat_index];
- e.repeatOffset = repeat_offset;
-
+ e.hasSquash = verify_u8(proto.squash);
+ e.trigger = verify_u8(proto.trigger);
+ u32 repeat_offset = proto.repeat_index == MO_INVALID_IDX
+ ? MO_INVALID_IDX
+ : repeatOffsets[proto.repeat_index];
+ e.repeatOffset = repeat_offset;
+
// for the state that can switch it on
// set this bit in the exception mask
maskSetBit(limex->exceptionMask, state_id);
- ecount++;
- }
-
- limex->exceptionOffset = exceptionsOffset;
- limex->exceptionCount = ecount;
+ ecount++;
+ }
+
+ limex->exceptionOffset = exceptionsOffset;
+ limex->exceptionCount = ecount;
if (args.num_states > 64 && args.cc.target_info.has_avx512vbmi()) {
const u8 *exceptionMask = (const u8 *)(&limex->exceptionMask);
@@ -2028,118 +2028,118 @@ struct Factory {
setLimexFlag(limex, LIMEX_FLAG_EXTRACT_EXP);
}
}
- }
-
- static
- void writeReachMapping(const vector<NFAStateSet> &reach,
- const vector<u8> &reachMap, implNFA_t *limex,
- const u32 reachOffset) {
- DEBUG_PRINTF("reachOffset=%u\n", reachOffset);
-
- // Reach mapping is inside the LimEx structure.
- copy(reachMap.begin(), reachMap.end(), &limex->reachMap[0]);
-
- // Reach table is right after the LimEx structure.
- tableRow_t *reachMask = (tableRow_t *)((char *)limex + reachOffset);
- assert(ISALIGNED(reachMask));
- for (size_t i = 0, end = reach.size(); i < end; i++) {
- maskSetBits(reachMask[i], reach[i]);
- }
- limex->reachSize = verify_u32(reach.size());
- }
-
- static
- void writeTopMasks(const vector<NFAStateSet> &tops, implNFA_t *limex,
- const u32 topsOffset) {
- DEBUG_PRINTF("topsOffset=%u\n", topsOffset);
-
- limex->topOffset = topsOffset;
- tableRow_t *topMasks = (tableRow_t *)((char *)limex + topsOffset);
- assert(ISALIGNED(topMasks));
-
- for (size_t i = 0, end = tops.size(); i < end; i++) {
- maskSetBits(topMasks[i], tops[i]);
- }
-
- limex->topCount = verify_u32(tops.size());
- }
-
- static
- void writeAccelSsse3Masks(const NFAStateSet &accelMask, implNFA_t *limex) {
- char *perm_base = (char *)&limex->accelPermute;
- char *comp_base = (char *)&limex->accelCompare;
-
- u32 num = 0; // index in accel table.
- for (size_t i = accelMask.find_first(); i != accelMask.npos;
- i = accelMask.find_next(i), ++num) {
- u32 state_id = verify_u32(i);
- DEBUG_PRINTF("accel num=%u, state=%u\n", num, state_id);
-
- // PSHUFB permute and compare masks
- size_t mask_idx = sizeof(u_128) * (state_id / 128U);
- DEBUG_PRINTF("mask_idx=%zu\n", mask_idx);
- u_128 *perm = (u_128 *)(perm_base + mask_idx);
- u_128 *comp = (u_128 *)(comp_base + mask_idx);
- maskSetByte(*perm, num, ((state_id % 128U) / 8U));
- maskSetByte(*comp, num, ~(1U << (state_id % 8U)));
- }
- }
-
- static
- void writeAccel(const NFAStateSet &accelMask,
- const NFAStateSet &accelFriendsMask,
- const AccelAuxVector &accelAux,
- const vector<u8> &accelTable, implNFA_t *limex,
- const u32 accelTableOffset, const u32 accelAuxOffset) {
- DEBUG_PRINTF("accelTableOffset=%u, accelAuxOffset=%u\n",
- accelTableOffset, accelAuxOffset);
-
- // Write accel lookup table.
- limex->accelTableOffset = accelTableOffset;
- copy(accelTable.begin(), accelTable.end(),
- (u8 *)((char *)limex + accelTableOffset));
-
- // Write accel aux structures.
- limex->accelAuxOffset = accelAuxOffset;
- AccelAux *auxTable = (AccelAux *)((char *)limex + accelAuxOffset);
- assert(ISALIGNED(auxTable));
- copy(accelAux.begin(), accelAux.end(), auxTable);
-
- // Write LimEx structure members.
- limex->accelCount = verify_u32(accelTable.size());
- // FIXME: accelAuxCount is unused?
- limex->accelAuxCount = verify_u32(accelAux.size());
-
- // Write LimEx masks.
- maskSetBits(limex->accel, accelMask);
- maskSetBits(limex->accel_and_friends, accelFriendsMask);
-
- // We can use PSHUFB-based shuffles for models >= 128 states. These
- // require some additional masks in the bytecode.
- maskClear(limex->accelCompare);
- maskFill(limex->accelPermute, (char)0x80);
- if (NFATraits<dtype>::maxStates >= 128) {
- writeAccelSsse3Masks(accelMask, limex);
- }
- }
-
- static
- void writeAccepts(const NFAStateSet &acceptMask,
- const NFAStateSet &acceptEodMask,
- const vector<NFAAccept> &accepts,
- const vector<NFAAccept> &acceptsEod,
- const vector<NFAStateSet> &squash, implNFA_t *limex,
- const u32 acceptsOffset, const u32 acceptsEodOffset,
+ }
+
+ static
+ void writeReachMapping(const vector<NFAStateSet> &reach,
+ const vector<u8> &reachMap, implNFA_t *limex,
+ const u32 reachOffset) {
+ DEBUG_PRINTF("reachOffset=%u\n", reachOffset);
+
+ // Reach mapping is inside the LimEx structure.
+ copy(reachMap.begin(), reachMap.end(), &limex->reachMap[0]);
+
+ // Reach table is right after the LimEx structure.
+ tableRow_t *reachMask = (tableRow_t *)((char *)limex + reachOffset);
+ assert(ISALIGNED(reachMask));
+ for (size_t i = 0, end = reach.size(); i < end; i++) {
+ maskSetBits(reachMask[i], reach[i]);
+ }
+ limex->reachSize = verify_u32(reach.size());
+ }
+
+ static
+ void writeTopMasks(const vector<NFAStateSet> &tops, implNFA_t *limex,
+ const u32 topsOffset) {
+ DEBUG_PRINTF("topsOffset=%u\n", topsOffset);
+
+ limex->topOffset = topsOffset;
+ tableRow_t *topMasks = (tableRow_t *)((char *)limex + topsOffset);
+ assert(ISALIGNED(topMasks));
+
+ for (size_t i = 0, end = tops.size(); i < end; i++) {
+ maskSetBits(topMasks[i], tops[i]);
+ }
+
+ limex->topCount = verify_u32(tops.size());
+ }
+
+ static
+ void writeAccelSsse3Masks(const NFAStateSet &accelMask, implNFA_t *limex) {
+ char *perm_base = (char *)&limex->accelPermute;
+ char *comp_base = (char *)&limex->accelCompare;
+
+ u32 num = 0; // index in accel table.
+ for (size_t i = accelMask.find_first(); i != accelMask.npos;
+ i = accelMask.find_next(i), ++num) {
+ u32 state_id = verify_u32(i);
+ DEBUG_PRINTF("accel num=%u, state=%u\n", num, state_id);
+
+ // PSHUFB permute and compare masks
+ size_t mask_idx = sizeof(u_128) * (state_id / 128U);
+ DEBUG_PRINTF("mask_idx=%zu\n", mask_idx);
+ u_128 *perm = (u_128 *)(perm_base + mask_idx);
+ u_128 *comp = (u_128 *)(comp_base + mask_idx);
+ maskSetByte(*perm, num, ((state_id % 128U) / 8U));
+ maskSetByte(*comp, num, ~(1U << (state_id % 8U)));
+ }
+ }
+
+ static
+ void writeAccel(const NFAStateSet &accelMask,
+ const NFAStateSet &accelFriendsMask,
+ const AccelAuxVector &accelAux,
+ const vector<u8> &accelTable, implNFA_t *limex,
+ const u32 accelTableOffset, const u32 accelAuxOffset) {
+ DEBUG_PRINTF("accelTableOffset=%u, accelAuxOffset=%u\n",
+ accelTableOffset, accelAuxOffset);
+
+ // Write accel lookup table.
+ limex->accelTableOffset = accelTableOffset;
+ copy(accelTable.begin(), accelTable.end(),
+ (u8 *)((char *)limex + accelTableOffset));
+
+ // Write accel aux structures.
+ limex->accelAuxOffset = accelAuxOffset;
+ AccelAux *auxTable = (AccelAux *)((char *)limex + accelAuxOffset);
+ assert(ISALIGNED(auxTable));
+ copy(accelAux.begin(), accelAux.end(), auxTable);
+
+ // Write LimEx structure members.
+ limex->accelCount = verify_u32(accelTable.size());
+ // FIXME: accelAuxCount is unused?
+ limex->accelAuxCount = verify_u32(accelAux.size());
+
+ // Write LimEx masks.
+ maskSetBits(limex->accel, accelMask);
+ maskSetBits(limex->accel_and_friends, accelFriendsMask);
+
+ // We can use PSHUFB-based shuffles for models >= 128 states. These
+ // require some additional masks in the bytecode.
+ maskClear(limex->accelCompare);
+ maskFill(limex->accelPermute, (char)0x80);
+ if (NFATraits<dtype>::maxStates >= 128) {
+ writeAccelSsse3Masks(accelMask, limex);
+ }
+ }
+
+ static
+ void writeAccepts(const NFAStateSet &acceptMask,
+ const NFAStateSet &acceptEodMask,
+ const vector<NFAAccept> &accepts,
+ const vector<NFAAccept> &acceptsEod,
+ const vector<NFAStateSet> &squash, implNFA_t *limex,
+ const u32 acceptsOffset, const u32 acceptsEodOffset,
const u32 squashOffset, const u32 reportListOffset) {
char *limex_base = (char *)limex;
- DEBUG_PRINTF("acceptsOffset=%u, acceptsEodOffset=%u, squashOffset=%u\n",
- acceptsOffset, acceptsEodOffset, squashOffset);
-
- // LimEx masks (in structure)
- maskSetBits(limex->accept, acceptMask);
- maskSetBits(limex->acceptAtEOD, acceptEodMask);
-
+ DEBUG_PRINTF("acceptsOffset=%u, acceptsEodOffset=%u, squashOffset=%u\n",
+ acceptsOffset, acceptsEodOffset, squashOffset);
+
+ // LimEx masks (in structure)
+ maskSetBits(limex->accept, acceptMask);
+ maskSetBits(limex->acceptAtEOD, acceptEodMask);
+
// Transforms the indices (report list, squash mask) into offsets
// relative to the base of the limex.
auto transform_offset_fn = [&](NFAAccept a) {
@@ -2150,272 +2150,272 @@ struct Factory {
return a;
};
- // Write accept table.
- limex->acceptOffset = acceptsOffset;
- limex->acceptCount = verify_u32(accepts.size());
- DEBUG_PRINTF("NFA has %zu accepts\n", accepts.size());
+ // Write accept table.
+ limex->acceptOffset = acceptsOffset;
+ limex->acceptCount = verify_u32(accepts.size());
+ DEBUG_PRINTF("NFA has %zu accepts\n", accepts.size());
NFAAccept *acceptsTable = (NFAAccept *)(limex_base + acceptsOffset);
- assert(ISALIGNED(acceptsTable));
+ assert(ISALIGNED(acceptsTable));
transform(accepts.begin(), accepts.end(), acceptsTable,
transform_offset_fn);
-
- // Write eod accept table.
- limex->acceptEodOffset = acceptsEodOffset;
- limex->acceptEodCount = verify_u32(acceptsEod.size());
- DEBUG_PRINTF("NFA has %zu EOD accepts\n", acceptsEod.size());
+
+ // Write eod accept table.
+ limex->acceptEodOffset = acceptsEodOffset;
+ limex->acceptEodCount = verify_u32(acceptsEod.size());
+ DEBUG_PRINTF("NFA has %zu EOD accepts\n", acceptsEod.size());
NFAAccept *acceptsEodTable = (NFAAccept *)(limex_base + acceptsEodOffset);
- assert(ISALIGNED(acceptsEodTable));
+ assert(ISALIGNED(acceptsEodTable));
transform(acceptsEod.begin(), acceptsEod.end(), acceptsEodTable,
transform_offset_fn);
-
- // Write squash mask table.
- limex->squashCount = verify_u32(squash.size());
- limex->squashOffset = squashOffset;
- DEBUG_PRINTF("NFA has %zu report squash masks\n", squash.size());
+
+ // Write squash mask table.
+ limex->squashCount = verify_u32(squash.size());
+ limex->squashOffset = squashOffset;
+ DEBUG_PRINTF("NFA has %zu report squash masks\n", squash.size());
tableRow_t *mask = (tableRow_t *)(limex_base + squashOffset);
- assert(ISALIGNED(mask));
- for (size_t i = 0, end = squash.size(); i < end; i++) {
- maskSetBits(mask[i], squash[i]);
- }
- }
-
- static
+ assert(ISALIGNED(mask));
+ for (size_t i = 0, end = squash.size(); i < end; i++) {
+ maskSetBits(mask[i], squash[i]);
+ }
+ }
+
+ static
void writeRepeats(const vector<bytecode_ptr<NFARepeatInfo>> &repeats,
- vector<u32> &repeatOffsets, implNFA_t *limex,
- const u32 repeatOffsetsOffset, const u32 repeatOffset) {
- const u32 num_repeats = verify_u32(repeats.size());
-
- DEBUG_PRINTF("repeatOffsetsOffset=%u, repeatOffset=%u\n",
- repeatOffsetsOffset, repeatOffset);
-
- repeatOffsets.resize(num_repeats);
- u32 offset = repeatOffset;
-
- for (u32 i = 0; i < num_repeats; i++) {
- repeatOffsets[i] = offset;
+ vector<u32> &repeatOffsets, implNFA_t *limex,
+ const u32 repeatOffsetsOffset, const u32 repeatOffset) {
+ const u32 num_repeats = verify_u32(repeats.size());
+
+ DEBUG_PRINTF("repeatOffsetsOffset=%u, repeatOffset=%u\n",
+ repeatOffsetsOffset, repeatOffset);
+
+ repeatOffsets.resize(num_repeats);
+ u32 offset = repeatOffset;
+
+ for (u32 i = 0; i < num_repeats; i++) {
+ repeatOffsets[i] = offset;
assert(repeats[i]);
memcpy((char *)limex + offset, repeats[i].get(), repeats[i].size());
offset += repeats[i].size();
- }
-
- // Write repeat offset lookup table.
- assert(ISALIGNED_N((char *)limex + repeatOffsetsOffset, alignof(u32)));
- copy_bytes((char *)limex + repeatOffsetsOffset, repeatOffsets);
-
- limex->repeatOffset = repeatOffsetsOffset;
- limex->repeatCount = num_repeats;
- }
-
- static
+ }
+
+ // Write repeat offset lookup table.
+ assert(ISALIGNED_N((char *)limex + repeatOffsetsOffset, alignof(u32)));
+ copy_bytes((char *)limex + repeatOffsetsOffset, repeatOffsets);
+
+ limex->repeatOffset = repeatOffsetsOffset;
+ limex->repeatCount = num_repeats;
+ }
+
+ static
void writeReportList(const vector<ReportID> &reports, implNFA_t *limex,
const u32 reportListOffset) {
DEBUG_PRINTF("reportListOffset=%u\n", reportListOffset);
assert(ISALIGNED_N((char *)limex + reportListOffset,
- alignof(ReportID)));
+ alignof(ReportID)));
copy_bytes((char *)limex + reportListOffset, reports);
- }
-
- static
+ }
+
+ static
bytecode_ptr<NFA> generateNfa(const build_info &args) {
- if (args.num_states > NFATraits<dtype>::maxStates) {
- return nullptr;
- }
-
- // Build bounded repeat structures.
+ if (args.num_states > NFATraits<dtype>::maxStates) {
+ return nullptr;
+ }
+
+ // Build bounded repeat structures.
vector<bytecode_ptr<NFARepeatInfo>> repeats;
- u32 repeats_full_state = 0;
- u32 repeats_stream_state = 0;
- buildRepeats(args, repeats, &repeats_full_state, &repeats_stream_state);
- size_t repeatSize = 0;
- for (size_t i = 0; i < repeats.size(); i++) {
+ u32 repeats_full_state = 0;
+ u32 repeats_stream_state = 0;
+ buildRepeats(args, repeats, &repeats_full_state, &repeats_stream_state);
+ size_t repeatSize = 0;
+ for (size_t i = 0; i < repeats.size(); i++) {
repeatSize += repeats[i].size();
- }
-
+ }
+
// We track report lists that have already been written into the global
// list in case we can reuse them.
ReportListCache reports_cache;
-
+
unordered_set<NFAEdge> exceptional;
u32 shiftCount = findBestNumOfVarShifts(args);
assert(shiftCount);
u32 maxShift = findMaxVarShift(args, shiftCount);
findExceptionalTransitions(args, exceptional, maxShift);
-
+
map<ExceptionProto, vector<u32>> exceptionMap;
vector<ReportID> reportList;
-
+
u32 exceptionCount = buildExceptionMap(args, reports_cache, exceptional,
exceptionMap, reportList);
assert(exceptionCount <= args.num_states);
- // Build reach table and character mapping.
- vector<NFAStateSet> reach;
- vector<u8> reachMap;
- buildReachMapping(args, reach, reachMap);
-
- // Build top masks.
- vector<NFAStateSet> tops;
- buildTopMasks(args, tops);
-
- // Build all our accept info.
- NFAStateSet acceptMask, acceptEodMask;
- vector<NFAAccept> accepts, acceptsEod;
- vector<NFAStateSet> squash;
+ // Build reach table and character mapping.
+ vector<NFAStateSet> reach;
+ vector<u8> reachMap;
+ buildReachMapping(args, reach, reachMap);
+
+ // Build top masks.
+ vector<NFAStateSet> tops;
+ buildTopMasks(args, tops);
+
+ // Build all our accept info.
+ NFAStateSet acceptMask, acceptEodMask;
+ vector<NFAAccept> accepts, acceptsEod;
+ vector<NFAStateSet> squash;
buildAccepts(args, reports_cache, acceptMask, acceptEodMask, accepts,
acceptsEod, reportList, squash);
-
- // Build all our accel info.
- NFAStateSet accelMask, accelFriendsMask;
- AccelAuxVector accelAux;
- vector<u8> accelTable;
- buildAccel(args, accelMask, accelFriendsMask, accelAux, accelTable);
-
- // Compute the offsets in the bytecode for this LimEx NFA for all of
- // our structures. First, the NFA and LimEx structures. All other
- // offsets are relative to the start of the LimEx struct, starting with
- // the reach table.
- u32 offset = sizeof(implNFA_t);
-
- const u32 reachOffset = offset;
- offset += sizeof(tableRow_t) * reach.size();
-
- const u32 topsOffset = offset;
- offset += sizeof(tableRow_t) * tops.size();
-
- const u32 accelTableOffset = offset;
- offset += sizeof(u8) * accelTable.size();
-
- offset = ROUNDUP_N(offset, alignof(AccelAux));
- const u32 accelAuxOffset = offset;
- offset += sizeof(AccelAux) * accelAux.size();
-
- offset = ROUNDUP_N(offset, alignof(NFAAccept));
- const u32 acceptsOffset = offset;
- offset += sizeof(NFAAccept) * accepts.size();
- const u32 acceptsEodOffset = offset;
- offset += sizeof(NFAAccept) * acceptsEod.size();
-
- offset = ROUNDUP_CL(offset);
- const u32 squashOffset = offset;
- offset += sizeof(tableRow_t) * squash.size();
-
- offset = ROUNDUP_CL(offset);
- const u32 exceptionsOffset = offset;
+
+ // Build all our accel info.
+ NFAStateSet accelMask, accelFriendsMask;
+ AccelAuxVector accelAux;
+ vector<u8> accelTable;
+ buildAccel(args, accelMask, accelFriendsMask, accelAux, accelTable);
+
+ // Compute the offsets in the bytecode for this LimEx NFA for all of
+ // our structures. First, the NFA and LimEx structures. All other
+ // offsets are relative to the start of the LimEx struct, starting with
+ // the reach table.
+ u32 offset = sizeof(implNFA_t);
+
+ const u32 reachOffset = offset;
+ offset += sizeof(tableRow_t) * reach.size();
+
+ const u32 topsOffset = offset;
+ offset += sizeof(tableRow_t) * tops.size();
+
+ const u32 accelTableOffset = offset;
+ offset += sizeof(u8) * accelTable.size();
+
+ offset = ROUNDUP_N(offset, alignof(AccelAux));
+ const u32 accelAuxOffset = offset;
+ offset += sizeof(AccelAux) * accelAux.size();
+
+ offset = ROUNDUP_N(offset, alignof(NFAAccept));
+ const u32 acceptsOffset = offset;
+ offset += sizeof(NFAAccept) * accepts.size();
+ const u32 acceptsEodOffset = offset;
+ offset += sizeof(NFAAccept) * acceptsEod.size();
+
+ offset = ROUNDUP_CL(offset);
+ const u32 squashOffset = offset;
+ offset += sizeof(tableRow_t) * squash.size();
+
+ offset = ROUNDUP_CL(offset);
+ const u32 exceptionsOffset = offset;
offset += sizeof(exception_t) * exceptionCount;
-
+
const u32 reportListOffset = offset;
offset += sizeof(ReportID) * reportList.size();
-
- const u32 repeatOffsetsOffset = offset;
- offset += sizeof(u32) * args.repeats.size();
-
- offset = ROUNDUP_CL(offset);
- const u32 repeatsOffset = offset;
- offset += repeatSize;
-
- // Now we can allocate space for the NFA and get to work on layout.
-
- size_t nfaSize = sizeof(NFA) + offset;
- DEBUG_PRINTF("nfa size %zu\n", nfaSize);
+
+ const u32 repeatOffsetsOffset = offset;
+ offset += sizeof(u32) * args.repeats.size();
+
+ offset = ROUNDUP_CL(offset);
+ const u32 repeatsOffset = offset;
+ offset += repeatSize;
+
+ // Now we can allocate space for the NFA and get to work on layout.
+
+ size_t nfaSize = sizeof(NFA) + offset;
+ DEBUG_PRINTF("nfa size %zu\n", nfaSize);
auto nfa = make_zeroed_bytecode_ptr<NFA>(nfaSize);
- assert(nfa); // otherwise we would have thrown std::bad_alloc
-
- implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa.get());
- assert(ISALIGNED(limex));
-
- writeReachMapping(reach, reachMap, limex, reachOffset);
-
- writeTopMasks(tops, limex, topsOffset);
-
- writeAccel(accelMask, accelFriendsMask, accelAux, accelTable,
- limex, accelTableOffset, accelAuxOffset);
-
- writeAccepts(acceptMask, acceptEodMask, accepts, acceptsEod, squash,
+ assert(nfa); // otherwise we would have thrown std::bad_alloc
+
+ implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa.get());
+ assert(ISALIGNED(limex));
+
+ writeReachMapping(reach, reachMap, limex, reachOffset);
+
+ writeTopMasks(tops, limex, topsOffset);
+
+ writeAccel(accelMask, accelFriendsMask, accelAux, accelTable,
+ limex, accelTableOffset, accelAuxOffset);
+
+ writeAccepts(acceptMask, acceptEodMask, accepts, acceptsEod, squash,
limex, acceptsOffset, acceptsEodOffset, squashOffset,
reportListOffset);
-
+
limex->shiftCount = shiftCount;
- writeShiftMasks(args, limex);
-
+ writeShiftMasks(args, limex);
+
if (cannotDie(args)) {
DEBUG_PRINTF("nfa cannot die\n");
setLimexFlag(limex, LIMEX_FLAG_CANNOT_DIE);
}
- // Determine the state required for our state vector.
- findStateSize(args, limex);
-
+ // Determine the state required for our state vector.
+ findStateSize(args, limex);
+
writeReportList(reportList, limex, reportListOffset);
-
- // Repeat structures and offset table.
- vector<u32> repeatOffsets;
- writeRepeats(repeats, repeatOffsets, limex, repeatOffsetsOffset,
- repeatsOffset);
-
+
+ // Repeat structures and offset table.
+ vector<u32> repeatOffsets;
+ writeRepeats(repeats, repeatOffsets, limex, repeatOffsetsOffset,
+ repeatsOffset);
+
writeExceptions(args, exceptionMap, repeatOffsets, limex, exceptionsOffset,
reportListOffset);
-
- writeLimexMasks(args, limex);
-
- allocState(nfa.get(), repeats_full_state, repeats_stream_state);
-
- nfa->type = dtype;
- nfa->length = verify_u32(nfaSize);
- nfa->nPositions = args.num_states;
-
- if (!args.zombies.empty()) {
- setNfaFlag(nfa.get(), NFA_ZOMBIE);
- }
- if (!acceptsEod.empty()) {
- setNfaFlag(nfa.get(), NFA_ACCEPTS_EOD);
- }
-
- return nfa;
- }
-
- static int score(const build_info &args) {
- // LimEx NFAs are available in sizes from 32 to 512-bit.
- size_t num_states = args.num_states;
-
- size_t sz = findContainerSize(num_states);
- if (sz < 32) {
- sz = 32;
- }
-
- if (args.cc.grey.nfaForceSize) {
- sz = args.cc.grey.nfaForceSize;
- }
-
- if (sz != NFATraits<dtype>::maxStates) {
- return -1; // fail, size not appropriate
- }
-
- // We are of the right size, calculate a score based on the number
- // of exceptions and the number of shifts used by this LimEx.
+
+ writeLimexMasks(args, limex);
+
+ allocState(nfa.get(), repeats_full_state, repeats_stream_state);
+
+ nfa->type = dtype;
+ nfa->length = verify_u32(nfaSize);
+ nfa->nPositions = args.num_states;
+
+ if (!args.zombies.empty()) {
+ setNfaFlag(nfa.get(), NFA_ZOMBIE);
+ }
+ if (!acceptsEod.empty()) {
+ setNfaFlag(nfa.get(), NFA_ACCEPTS_EOD);
+ }
+
+ return nfa;
+ }
+
+ static int score(const build_info &args) {
+ // LimEx NFAs are available in sizes from 32 to 512-bit.
+ size_t num_states = args.num_states;
+
+ size_t sz = findContainerSize(num_states);
+ if (sz < 32) {
+ sz = 32;
+ }
+
+ if (args.cc.grey.nfaForceSize) {
+ sz = args.cc.grey.nfaForceSize;
+ }
+
+ if (sz != NFATraits<dtype>::maxStates) {
+ return -1; // fail, size not appropriate
+ }
+
+ // We are of the right size, calculate a score based on the number
+ // of exceptions and the number of shifts used by this LimEx.
int score;
u32 shiftCount = findBestNumOfVarShifts(args, &score);
if (shiftCount == 0) {
return -1;
- }
- return score;
- }
-};
-
-template<NFAEngineType dtype>
-struct generateNfa {
+ }
+ return score;
+ }
+};
+
+template<NFAEngineType dtype>
+struct generateNfa {
static bytecode_ptr<NFA> call(const build_info &args) {
- return Factory<dtype>::generateNfa(args);
- }
-};
-
-template<NFAEngineType dtype>
-struct scoreNfa {
- static int call(const build_info &args) {
- return Factory<dtype>::score(args);
- }
-};
-
+ return Factory<dtype>::generateNfa(args);
+ }
+};
+
+template<NFAEngineType dtype>
+struct scoreNfa {
+ static int call(const build_info &args) {
+ return Factory<dtype>::score(args);
+ }
+};
+
#define MAKE_LIMEX_TRAITS(mlt_size) \
template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \
typedef LimExNFA##mlt_size implNFA_t; \
@@ -2425,70 +2425,70 @@ struct scoreNfa {
static const size_t scratch_state_size = mlt_size == 64 ? sizeof(m128) \
: sizeof(tableRow_t); \
};
-
+
MAKE_LIMEX_TRAITS(32)
MAKE_LIMEX_TRAITS(64)
MAKE_LIMEX_TRAITS(128)
MAKE_LIMEX_TRAITS(256)
MAKE_LIMEX_TRAITS(384)
MAKE_LIMEX_TRAITS(512)
-
-} // namespace
-
-#ifndef NDEBUG
-// Some sanity tests, called by an assertion in generate().
-static UNUSED
+
+} // namespace
+
+#ifndef NDEBUG
+// Some sanity tests, called by an assertion in generate().
+static UNUSED
bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops,
const unordered_map<NFAVertex, u32> &state_ids,
- u32 num_states) {
+ u32 num_states) {
unordered_set<u32> seen;
unordered_set<NFAVertex> top_starts;
for (const auto &vv : tops | map_values) {
insert(&top_starts, vv);
- }
-
- for (auto v : vertices_range(h)) {
- if (!contains(state_ids, v)) {
+ }
+
+ for (auto v : vertices_range(h)) {
+ if (!contains(state_ids, v)) {
DEBUG_PRINTF("no entry for vertex %zu in state map\n", h[v].index);
- return false;
- }
- const u32 i = state_ids.at(v);
- if (i == NO_STATE) {
- continue;
- }
-
+ return false;
+ }
+ const u32 i = state_ids.at(v);
+ if (i == NO_STATE) {
+ continue;
+ }
+
DEBUG_PRINTF("checking vertex %zu (state %u)\n", h[v].index, i);
-
- if (i >= num_states || contains(seen, i)) {
- DEBUG_PRINTF("vertex %u/%u has invalid state\n", i, num_states);
- return false;
- }
- seen.insert(i);
-
- // All our states should be reachable and have a state assigned.
- if (h[v].char_reach.none()) {
+
+ if (i >= num_states || contains(seen, i)) {
+ DEBUG_PRINTF("vertex %u/%u has invalid state\n", i, num_states);
+ return false;
+ }
+ seen.insert(i);
+
+ // All our states should be reachable and have a state assigned.
+ if (h[v].char_reach.none()) {
DEBUG_PRINTF("vertex %zu has empty reachability\n", h[v].index);
- return false;
- }
-
- // Every state that isn't a start state (or top, in triggered NFAs)
- // must have at least one predecessor that is not itself.
- if (v != h.start && v != h.startDs && !contains(top_starts, v)
- && !proper_in_degree(v, h)) {
+ return false;
+ }
+
+ // Every state that isn't a start state (or top, in triggered NFAs)
+ // must have at least one predecessor that is not itself.
+ if (v != h.start && v != h.startDs && !contains(top_starts, v)
+ && !proper_in_degree(v, h)) {
DEBUG_PRINTF("vertex %zu has no pred\n", h[v].index);
- return false;
- }
- }
-
- if (seen.size() != num_states) {
- return false;
- }
-
- return true;
-}
-#endif // NDEBUG
-
-static
+ return false;
+ }
+ }
+
+ if (seen.size() != num_states) {
+ return false;
+ }
+
+ return true;
+}
+#endif // NDEBUG
+
+static
bool isFast(const build_info &args) {
const NGHolder &h = args.h;
const u32 num_states = args.num_states;
@@ -2552,17 +2552,17 @@ bool isFast(const build_info &args) {
static
u32 max_state(const unordered_map<NFAVertex, u32> &state_ids) {
- u32 rv = 0;
- for (const auto &m : state_ids) {
- DEBUG_PRINTF("state %u\n", m.second);
- if (m.second != NO_STATE) {
- rv = max(m.second, rv);
- }
- }
- DEBUG_PRINTF("max %u\n", rv);
- return rv;
-}
-
+ u32 rv = 0;
+ for (const auto &m : state_ids) {
+ DEBUG_PRINTF("state %u\n", m.second);
+ if (m.second != NO_STATE) {
+ rv = max(m.second, rv);
+ }
+ }
+ DEBUG_PRINTF("max %u\n", rv);
+ return rv;
+}
+
bytecode_ptr<NFA> generate(NGHolder &h,
const unordered_map<NFAVertex, u32> &states,
const vector<BoundedRepeatData> &repeats,
@@ -2572,52 +2572,52 @@ bytecode_ptr<NFA> generate(NGHolder &h,
const set<NFAVertex> &zombies, bool do_accel,
bool stateCompression, bool &fast, u32 hint,
const CompileContext &cc) {
- const u32 num_states = max_state(states) + 1;
- DEBUG_PRINTF("total states: %u\n", num_states);
-
- if (!cc.grey.allowLimExNFA) {
- DEBUG_PRINTF("limex not allowed\n");
- return nullptr;
- }
-
- // If you ask for a particular type, it had better be an NFA.
- assert(hint == INVALID_NFA || hint <= LAST_LIMEX_NFA);
- DEBUG_PRINTF("hint=%u\n", hint);
-
- // Sanity check the input data.
- assert(isSane(h, tops, states, num_states));
-
- // Build arguments used in the rest of this file.
- build_info arg(h, states, repeats, reportSquashMap, squashMap, tops,
- zombies, do_accel, stateCompression, cc, num_states);
-
- // Acceleration analysis.
- fillAccelInfo(arg);
-
+ const u32 num_states = max_state(states) + 1;
+ DEBUG_PRINTF("total states: %u\n", num_states);
+
+ if (!cc.grey.allowLimExNFA) {
+ DEBUG_PRINTF("limex not allowed\n");
+ return nullptr;
+ }
+
+ // If you ask for a particular type, it had better be an NFA.
+ assert(hint == INVALID_NFA || hint <= LAST_LIMEX_NFA);
+ DEBUG_PRINTF("hint=%u\n", hint);
+
+ // Sanity check the input data.
+ assert(isSane(h, tops, states, num_states));
+
+ // Build arguments used in the rest of this file.
+ build_info arg(h, states, repeats, reportSquashMap, squashMap, tops,
+ zombies, do_accel, stateCompression, cc, num_states);
+
+ // Acceleration analysis.
+ fillAccelInfo(arg);
+
vector<pair<int, NFAEngineType>> scores;
-
- if (hint != INVALID_NFA) {
- // The caller has told us what to (attempt to) build.
+
+ if (hint != INVALID_NFA) {
+ // The caller has told us what to (attempt to) build.
scores.emplace_back(0, (NFAEngineType)hint);
- } else {
- for (size_t i = 0; i <= LAST_LIMEX_NFA; i++) {
- NFAEngineType ntype = (NFAEngineType)i;
- int score = DISPATCH_BY_LIMEX_TYPE(ntype, scoreNfa, arg);
- if (score >= 0) {
- DEBUG_PRINTF("%s scores %d\n", nfa_type_name(ntype), score);
+ } else {
+ for (size_t i = 0; i <= LAST_LIMEX_NFA; i++) {
+ NFAEngineType ntype = (NFAEngineType)i;
+ int score = DISPATCH_BY_LIMEX_TYPE(ntype, scoreNfa, arg);
+ if (score >= 0) {
+ DEBUG_PRINTF("%s scores %d\n", nfa_type_name(ntype), score);
scores.emplace_back(score, ntype);
- }
- }
- }
-
- if (scores.empty()) {
- DEBUG_PRINTF("No NFA returned a valid score for this case.\n");
- return nullptr;
- }
-
+ }
+ }
+ }
+
+ if (scores.empty()) {
+ DEBUG_PRINTF("No NFA returned a valid score for this case.\n");
+ return nullptr;
+ }
+
// Sort acceptable models in priority order, lowest score first.
sort(scores.begin(), scores.end());
-
+
for (const auto &elem : scores) {
assert(elem.first >= 0);
NFAEngineType limex_model = elem.second;
@@ -2628,13 +2628,13 @@ bytecode_ptr<NFA> generate(NGHolder &h,
fast = isFast(arg);
return nfa;
}
- }
-
+ }
+
DEBUG_PRINTF("NFA build failed.\n");
return nullptr;
-}
-
-u32 countAccelStates(NGHolder &h,
+}
+
+u32 countAccelStates(NGHolder &h,
const unordered_map<NFAVertex, u32> &states,
const vector<BoundedRepeatData> &repeats,
const unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
@@ -2642,30 +2642,30 @@ u32 countAccelStates(NGHolder &h,
const map<u32, set<NFAVertex>> &tops,
const set<NFAVertex> &zombies,
const CompileContext &cc) {
- const u32 num_states = max_state(states) + 1;
- DEBUG_PRINTF("total states: %u\n", num_states);
-
- if (!cc.grey.allowLimExNFA) {
- DEBUG_PRINTF("limex not allowed\n");
+ const u32 num_states = max_state(states) + 1;
+ DEBUG_PRINTF("total states: %u\n", num_states);
+
+ if (!cc.grey.allowLimExNFA) {
+ DEBUG_PRINTF("limex not allowed\n");
return 0;
- }
-
- // Sanity check the input data.
- assert(isSane(h, tops, states, num_states));
-
- const bool do_accel = true;
- const bool state_compression = false;
-
- // Build arguments used in the rest of this file.
- build_info bi(h, states, repeats, reportSquashMap, squashMap, tops, zombies,
- do_accel, state_compression, cc, num_states);
-
- // Acceleration analysis.
+ }
+
+ // Sanity check the input data.
+ assert(isSane(h, tops, states, num_states));
+
+ const bool do_accel = true;
+ const bool state_compression = false;
+
+ // Build arguments used in the rest of this file.
+ build_info bi(h, states, repeats, reportSquashMap, squashMap, tops, zombies,
+ do_accel, state_compression, cc, num_states);
+
+ // Acceleration analysis.
nfaFindAccelSchemes(bi.h, bi.br_cyclic, &bi.accel.accel_map);
-
+
u32 num_accel = verify_u32(bi.accel.accel_map.size());
- DEBUG_PRINTF("found %u accel states\n", num_accel);
+ DEBUG_PRINTF("found %u accel states\n", num_accel);
return num_accel;
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfa/limex_compile.h b/contrib/libs/hyperscan/src/nfa/limex_compile.h
index fc3f461fbf..4afdcdb3e4 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_compile.h
+++ b/contrib/libs/hyperscan/src/nfa/limex_compile.h
@@ -1,74 +1,74 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Main NFA build code.
- */
-
-#ifndef LIMEX_COMPILE_H
-#define LIMEX_COMPILE_H
-
+ * \brief Main NFA build code.
+ */
+
+#ifndef LIMEX_COMPILE_H
+#define LIMEX_COMPILE_H
+
#include "nfagraph/ng_holder.h"
#include "nfagraph/ng_squash.h" // for NFAStateSet
#include "ue2common.h"
#include "util/bytecode_ptr.h"
#include <set>
-#include <map>
-#include <memory>
+#include <map>
+#include <memory>
#include <unordered_map>
-#include <vector>
-
-struct NFA;
-
-namespace ue2 {
-
-struct BoundedRepeatData;
-struct CompileContext;
-
+#include <vector>
+
+struct NFA;
+
+namespace ue2 {
+
+struct BoundedRepeatData;
+struct CompileContext;
+
/**
* \brief Construct a LimEx NFA from an NGHolder.
- *
- * \param g Input NFA graph. Must have state IDs assigned.
- * \param repeats Bounded repeat information, if any.
- * \param reportSquashMap Single-match mode squash map.
- * \param squashMap More general squash map.
- * \param tops Tops and their start vertices,
- * \param zombies The set of zombifying states.
- * \param do_accel Calculate acceleration schemes.
- * \param stateCompression Allow (and calculate masks for) state compression.
- * \param hint If not INVALID_NFA, this allows a particular LimEx NFA model
- to be requested.
- * \param cc Compile context.
- * \return a built NFA, or nullptr if no NFA could be constructed for this
- * graph.
- */
+ *
+ * \param g Input NFA graph. Must have state IDs assigned.
+ * \param repeats Bounded repeat information, if any.
+ * \param reportSquashMap Single-match mode squash map.
+ * \param squashMap More general squash map.
+ * \param tops Tops and their start vertices,
+ * \param zombies The set of zombifying states.
+ * \param do_accel Calculate acceleration schemes.
+ * \param stateCompression Allow (and calculate masks for) state compression.
+ * \param hint If not INVALID_NFA, this allows a particular LimEx NFA model
+ to be requested.
+ * \param cc Compile context.
+ * \return a built NFA, or nullptr if no NFA could be constructed for this
+ * graph.
+ */
bytecode_ptr<NFA> generate(NGHolder &g,
const std::unordered_map<NFAVertex, u32> &states,
const std::vector<BoundedRepeatData> &repeats,
@@ -81,14 +81,14 @@ bytecode_ptr<NFA> generate(NGHolder &g,
bool &fast,
u32 hint,
const CompileContext &cc);
-
-/**
+
+/**
* \brief For a given graph, count the number of accelerable states it has.
- *
+ *
* Note that this number may be greater than the number that are actually
* implementable.
- */
-u32 countAccelStates(NGHolder &h,
+ */
+u32 countAccelStates(NGHolder &h,
const std::unordered_map<NFAVertex, u32> &states,
const std::vector<BoundedRepeatData> &repeats,
const std::unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
@@ -96,7 +96,7 @@ u32 countAccelStates(NGHolder &h,
const std::map<u32, std::set<NFAVertex>> &tops,
const std::set<NFAVertex> &zombies,
const CompileContext &cc);
-
-} // namespace ue2
-
-#endif
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/limex_context.h b/contrib/libs/hyperscan/src/nfa/limex_context.h
index 06be3c9461..60d2087935 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_context.h
+++ b/contrib/libs/hyperscan/src/nfa/limex_context.h
@@ -1,44 +1,44 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Runtime context structures (NFAContext128 and friends) for the NFA.
- */
-
-#ifndef LIMEX_CONTEXT_H
-#define LIMEX_CONTEXT_H
-
-#include "ue2common.h"
-#include "callback.h"
-#include "util/simd_utils.h" // for m128 etc
-
-// Runtime context structures.
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Runtime context structures (NFAContext128 and friends) for the NFA.
+ */
+
+#ifndef LIMEX_CONTEXT_H
+#define LIMEX_CONTEXT_H
+
+#include "ue2common.h"
+#include "callback.h"
+#include "util/simd_utils.h" // for m128 etc
+
+// Runtime context structures.
+
/* Note: The size of the context structures may vary from platform to platform
* (notably, for the Limex64 structure). As a result, information based on the
* size and other detail of these structures should not be written into the
@@ -49,43 +49,43 @@
#error ue2 runtime only file
#endif
-/* cached_estate/esucc etc...
- *
- * If the exception state matches the cached_estate we will apply
- * the or in the cached_esucc to the successor states rather than processing
- * the exceptions.
- *
- * If the current exception state is a superset of the cached_estate, the
- * cache is NOT used at all.
- *
- * The cache is updated when we see a different cacheable estate.
- */
-
-#define GEN_CONTEXT_STRUCT(nsize, ntype) \
-struct ALIGN_CL_DIRECTIVE NFAContext##nsize { \
- ntype s; /**< state bitvector (on entry/exit) */ \
- ntype local_succ; /**< used by exception handling for large models */ \
- ntype cached_estate; /* inited to 0 */ \
- ntype cached_esucc; \
- char cached_br; /**< cached_estate contains a br state */ \
- const ReportID *cached_reports; \
- union RepeatControl *repeat_ctrl; \
- char *repeat_state; \
- NfaCallback callback; \
- void *context; \
-};
-
-GEN_CONTEXT_STRUCT(32, u32)
+/* cached_estate/esucc etc...
+ *
+ * If the exception state matches the cached_estate we will apply
+ * the or in the cached_esucc to the successor states rather than processing
+ * the exceptions.
+ *
+ * If the current exception state is a superset of the cached_estate, the
+ * cache is NOT used at all.
+ *
+ * The cache is updated when we see a different cacheable estate.
+ */
+
+#define GEN_CONTEXT_STRUCT(nsize, ntype) \
+struct ALIGN_CL_DIRECTIVE NFAContext##nsize { \
+ ntype s; /**< state bitvector (on entry/exit) */ \
+ ntype local_succ; /**< used by exception handling for large models */ \
+ ntype cached_estate; /* inited to 0 */ \
+ ntype cached_esucc; \
+ char cached_br; /**< cached_estate contains a br state */ \
+ const ReportID *cached_reports; \
+ union RepeatControl *repeat_ctrl; \
+ char *repeat_state; \
+ NfaCallback callback; \
+ void *context; \
+};
+
+GEN_CONTEXT_STRUCT(32, u32)
#ifdef ARCH_64_BIT
GEN_CONTEXT_STRUCT(64, u64a)
#else
GEN_CONTEXT_STRUCT(64, m128)
#endif
-GEN_CONTEXT_STRUCT(128, m128)
-GEN_CONTEXT_STRUCT(256, m256)
-GEN_CONTEXT_STRUCT(384, m384)
-GEN_CONTEXT_STRUCT(512, m512)
-
-#undef GEN_CONTEXT_STRUCT
-
-#endif
+GEN_CONTEXT_STRUCT(128, m128)
+GEN_CONTEXT_STRUCT(256, m256)
+GEN_CONTEXT_STRUCT(384, m384)
+GEN_CONTEXT_STRUCT(512, m512)
+
+#undef GEN_CONTEXT_STRUCT
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/limex_exceptional.h b/contrib/libs/hyperscan/src/nfa/limex_exceptional.h
index 2d1123dca0..6c7335f1b9 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_exceptional.h
+++ b/contrib/libs/hyperscan/src/nfa/limex_exceptional.h
@@ -1,240 +1,240 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief LimEx NFA: runtime exception processing code.
- *
- * X-macro generic impl, included into the various LimEx model implementations.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief LimEx NFA: runtime exception processing code.
+ *
+ * X-macro generic impl, included into the various LimEx model implementations.
+ */
+
#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG)
# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer.
-#endif
-
-#include "config.h"
-#include "limex_ring.h"
-#include "util/join.h"
-#include "util/uniform_ops.h"
-
-#define PE_FN JOIN(processExceptional, SIZE)
-#define RUN_EXCEPTION_FN JOIN(runException, SIZE)
-#define ZERO_STATE JOIN(zero_, STATE_T)
-#define AND_STATE JOIN(and_, STATE_T)
-#define EQ_STATE(a, b) (!JOIN(noteq_, STATE_T)((a), (b)))
-#define OR_STATE JOIN(or_, STATE_T)
+#endif
+
+#include "config.h"
+#include "limex_ring.h"
+#include "util/join.h"
+#include "util/uniform_ops.h"
+
+#define PE_FN JOIN(processExceptional, SIZE)
+#define RUN_EXCEPTION_FN JOIN(runException, SIZE)
+#define ZERO_STATE JOIN(zero_, STATE_T)
+#define AND_STATE JOIN(and_, STATE_T)
+#define EQ_STATE(a, b) (!JOIN(noteq_, STATE_T)((a), (b)))
+#define OR_STATE JOIN(or_, STATE_T)
#define EXPAND_STATE JOIN(expand_, STATE_T)
#define SHUFFLE_BYTE_STATE JOIN(shuffle_byte_, STATE_T)
-#define TESTBIT_STATE JOIN(testbit_, STATE_T)
-#define EXCEPTION_T JOIN(struct NFAException, SIZE)
-#define CONTEXT_T JOIN(NFAContext, SIZE)
-#define IMPL_NFA_T JOIN(LimExNFA, SIZE)
-#define GET_NFA_REPEAT_INFO_FN JOIN(getNfaRepeatInfo, SIZE)
-
-#ifdef ESTATE_ON_STACK
-#define ESTATE_ARG STATE_T estate
-#else
-#define ESTATE_ARG const STATE_T *estatep
+#define TESTBIT_STATE JOIN(testbit_, STATE_T)
+#define EXCEPTION_T JOIN(struct NFAException, SIZE)
+#define CONTEXT_T JOIN(NFAContext, SIZE)
+#define IMPL_NFA_T JOIN(LimExNFA, SIZE)
+#define GET_NFA_REPEAT_INFO_FN JOIN(getNfaRepeatInfo, SIZE)
+
+#ifdef ESTATE_ON_STACK
+#define ESTATE_ARG STATE_T estate
+#else
+#define ESTATE_ARG const STATE_T *estatep
#define estate (*estatep)
-#endif
-
-#ifdef STATE_ON_STACK
-#define STATE_ARG_NAME s
-#define STATE_ARG STATE_T STATE_ARG_NAME
-#define STATE_ARG_P &s
-#else
-#define STATE_ARG_NAME sp
-#define STATE_ARG const STATE_T *STATE_ARG_NAME
-#define STATE_ARG_P sp
-#endif
-
-#ifndef STATE_ON_STACK
-#define BIG_MODEL
-#endif
-
-#ifdef ARCH_64_BIT
-#define CHUNK_T u64a
-#define FIND_AND_CLEAR_FN findAndClearLSB_64
+#endif
+
+#ifdef STATE_ON_STACK
+#define STATE_ARG_NAME s
+#define STATE_ARG STATE_T STATE_ARG_NAME
+#define STATE_ARG_P &s
+#else
+#define STATE_ARG_NAME sp
+#define STATE_ARG const STATE_T *STATE_ARG_NAME
+#define STATE_ARG_P sp
+#endif
+
+#ifndef STATE_ON_STACK
+#define BIG_MODEL
+#endif
+
+#ifdef ARCH_64_BIT
+#define CHUNK_T u64a
+#define FIND_AND_CLEAR_FN findAndClearLSB_64
#define POPCOUNT_FN popcount64
#define RANK_IN_MASK_FN rank_in_mask64
-#else
-#define CHUNK_T u32
-#define FIND_AND_CLEAR_FN findAndClearLSB_32
+#else
+#define CHUNK_T u32
+#define FIND_AND_CLEAR_FN findAndClearLSB_32
#define POPCOUNT_FN popcount32
#define RANK_IN_MASK_FN rank_in_mask32
-#endif
-
-/** \brief Process a single exception. Returns 1 if exception handling should
- * continue, 0 if an accept callback has instructed us to halt. */
-static really_inline
-int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
- STATE_T *succ,
-#ifndef BIG_MODEL
- STATE_T *local_succ,
-#endif
- const struct IMPL_NFA_T *limex,
- u64a offset,
- struct CONTEXT_T *ctx,
- struct proto_cache *new_cache,
- enum CacheResult *cacheable,
- char in_rev,
- const char flags) {
- assert(e);
-
-#ifdef DEBUG_EXCEPTIONS
- printf("EXCEPTION e=%p reports=%u trigger=", e, e->reports);
- if (e->trigger == LIMEX_TRIGGER_NONE) {
- printf("none");
- } else if (e->trigger == LIMEX_TRIGGER_POS) {
- printf("pos");
- } else if (e->trigger == LIMEX_TRIGGER_TUG) {
- printf("tug");
- } else {
- printf("unknown!");
- }
- printf("\n");
-#endif
-
- // Trigger exceptions, used in bounded repeats.
- assert(!in_rev || e->trigger == LIMEX_TRIGGER_NONE);
- if (!in_rev && e->trigger != LIMEX_TRIGGER_NONE) {
- assert(e->repeatOffset != MO_INVALID_IDX);
- const struct NFARepeatInfo *info =
- (const struct NFARepeatInfo *)((const char *)limex +
- e->repeatOffset);
- const struct RepeatInfo *repeat = getRepeatInfo(info);
- assert(ctx->repeat_ctrl && ctx->repeat_state);
- union RepeatControl *repeat_ctrl = ctx->repeat_ctrl + info->ctrlIndex;
- char *repeat_state = ctx->repeat_state + info->stateOffset;
-
- if (e->trigger == LIMEX_TRIGGER_POS) {
+#endif
+
+/** \brief Process a single exception. Returns 1 if exception handling should
+ * continue, 0 if an accept callback has instructed us to halt. */
+static really_inline
+int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
+ STATE_T *succ,
+#ifndef BIG_MODEL
+ STATE_T *local_succ,
+#endif
+ const struct IMPL_NFA_T *limex,
+ u64a offset,
+ struct CONTEXT_T *ctx,
+ struct proto_cache *new_cache,
+ enum CacheResult *cacheable,
+ char in_rev,
+ const char flags) {
+ assert(e);
+
+#ifdef DEBUG_EXCEPTIONS
+ printf("EXCEPTION e=%p reports=%u trigger=", e, e->reports);
+ if (e->trigger == LIMEX_TRIGGER_NONE) {
+ printf("none");
+ } else if (e->trigger == LIMEX_TRIGGER_POS) {
+ printf("pos");
+ } else if (e->trigger == LIMEX_TRIGGER_TUG) {
+ printf("tug");
+ } else {
+ printf("unknown!");
+ }
+ printf("\n");
+#endif
+
+ // Trigger exceptions, used in bounded repeats.
+ assert(!in_rev || e->trigger == LIMEX_TRIGGER_NONE);
+ if (!in_rev && e->trigger != LIMEX_TRIGGER_NONE) {
+ assert(e->repeatOffset != MO_INVALID_IDX);
+ const struct NFARepeatInfo *info =
+ (const struct NFARepeatInfo *)((const char *)limex +
+ e->repeatOffset);
+ const struct RepeatInfo *repeat = getRepeatInfo(info);
+ assert(ctx->repeat_ctrl && ctx->repeat_state);
+ union RepeatControl *repeat_ctrl = ctx->repeat_ctrl + info->ctrlIndex;
+ char *repeat_state = ctx->repeat_state + info->stateOffset;
+
+ if (e->trigger == LIMEX_TRIGGER_POS) {
char cyclic_on = TESTBIT_STATE(*STATE_ARG_P, info->cyclicState);
- processPosTrigger(repeat, repeat_ctrl, repeat_state, offset,
- cyclic_on);
- *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES;
- } else {
- assert(e->trigger == LIMEX_TRIGGER_TUG);
- enum TriggerResult rv =
- processTugTrigger(repeat, repeat_ctrl, repeat_state, offset);
- if (rv == TRIGGER_FAIL) {
- *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES;
- DEBUG_PRINTF("tug found no valid matches in repeat state\n");
- return 1; // continue
- } else if (rv == TRIGGER_STALE) {
- *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES;
- DEBUG_PRINTF("stale history, squashing cyclic state\n");
- assert(e->hasSquash == LIMEX_SQUASH_TUG);
+ processPosTrigger(repeat, repeat_ctrl, repeat_state, offset,
+ cyclic_on);
+ *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES;
+ } else {
+ assert(e->trigger == LIMEX_TRIGGER_TUG);
+ enum TriggerResult rv =
+ processTugTrigger(repeat, repeat_ctrl, repeat_state, offset);
+ if (rv == TRIGGER_FAIL) {
+ *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES;
+ DEBUG_PRINTF("tug found no valid matches in repeat state\n");
+ return 1; // continue
+ } else if (rv == TRIGGER_STALE) {
+ *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES;
+ DEBUG_PRINTF("stale history, squashing cyclic state\n");
+ assert(e->hasSquash == LIMEX_SQUASH_TUG);
*succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash));
- return 1; // continue
- } else if (rv == TRIGGER_SUCCESS_CACHE) {
- new_cache->br = 1;
- } else {
- assert(rv == TRIGGER_SUCCESS);
- *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES;
- }
- }
- }
-
- // Some exceptions fire accepts.
- if (e->reports != MO_INVALID_IDX) {
- if (flags & CALLBACK_OUTPUT) {
+ return 1; // continue
+ } else if (rv == TRIGGER_SUCCESS_CACHE) {
+ new_cache->br = 1;
+ } else {
+ assert(rv == TRIGGER_SUCCESS);
+ *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES;
+ }
+ }
+ }
+
+ // Some exceptions fire accepts.
+ if (e->reports != MO_INVALID_IDX) {
+ if (flags & CALLBACK_OUTPUT) {
const ReportID *reports =
(const ReportID *)((const char *)limex + e->reports);
- if (unlikely(limexRunReports(reports, ctx->callback,
- ctx->context, offset)
- == MO_HALT_MATCHING)) {
- DEBUG_PRINTF("callback instructed us to stop\n");
- return 0; // halt
- }
- if (*cacheable == CACHE_RESULT) {
- if (!new_cache->reports || new_cache->reports == reports) {
- new_cache->reports = reports;
- } else {
- *cacheable = DO_NOT_CACHE_RESULT;
- }
- }
- } else {
- if ((flags & FIRST_BYTE) && *cacheable == CACHE_RESULT) {
- *cacheable = DO_NOT_CACHE_RESULT;
- } /* otherwise we can cache as we never care about accepts */
- }
- }
-
- // Most exceptions have a set of successors to switch on. `local_succ' is
- // ORed into `succ' at the end of the caller's loop.
-#ifndef BIG_MODEL
+ if (unlikely(limexRunReports(reports, ctx->callback,
+ ctx->context, offset)
+ == MO_HALT_MATCHING)) {
+ DEBUG_PRINTF("callback instructed us to stop\n");
+ return 0; // halt
+ }
+ if (*cacheable == CACHE_RESULT) {
+ if (!new_cache->reports || new_cache->reports == reports) {
+ new_cache->reports = reports;
+ } else {
+ *cacheable = DO_NOT_CACHE_RESULT;
+ }
+ }
+ } else {
+ if ((flags & FIRST_BYTE) && *cacheable == CACHE_RESULT) {
+ *cacheable = DO_NOT_CACHE_RESULT;
+ } /* otherwise we can cache as we never care about accepts */
+ }
+ }
+
+ // Most exceptions have a set of successors to switch on. `local_succ' is
+ // ORed into `succ' at the end of the caller's loop.
+#ifndef BIG_MODEL
*local_succ = OR_STATE(*local_succ, LOAD_FROM_ENG(&e->successors));
-#else
+#else
ctx->local_succ = OR_STATE(ctx->local_succ, LOAD_FROM_ENG(&e->successors));
-#endif
-
- // Some exceptions squash states behind them. Note that we squash states in
- // 'succ', not local_succ.
+#endif
+
+ // Some exceptions squash states behind them. Note that we squash states in
+ // 'succ', not local_succ.
if (e->hasSquash == LIMEX_SQUASH_CYCLIC
|| e->hasSquash == LIMEX_SQUASH_REPORT) {
*succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash));
- if (*cacheable == CACHE_RESULT) {
- *cacheable = DO_NOT_CACHE_RESULT;
- }
- }
-
- return 1; // continue
-}
-
-#ifndef RUN_EXCEPTION_FN_ONLY
-
+ if (*cacheable == CACHE_RESULT) {
+ *cacheable = DO_NOT_CACHE_RESULT;
+ }
+ }
+
+ return 1; // continue
+}
+
+#ifndef RUN_EXCEPTION_FN_ONLY
+
/** \brief Process all of the exceptions associated with the states in the \a
* estate. */
-static really_inline
+static really_inline
int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
- u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) {
- assert(diffmask > 0); // guaranteed by caller macro
-
+ u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) {
+ assert(diffmask > 0); // guaranteed by caller macro
+
if (EQ_STATE(estate, ctx->cached_estate)) {
- DEBUG_PRINTF("using cached succ from previous state\n");
+ DEBUG_PRINTF("using cached succ from previous state\n");
*succ = OR_STATE(*succ, ctx->cached_esucc);
- if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) {
- DEBUG_PRINTF("firing cached reports from previous state\n");
- if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback,
- ctx->context, offset)
- == MO_HALT_MATCHING)) {
- return PE_RV_HALT; // halt;
- }
- }
- return 0;
- }
-
-#ifndef BIG_MODEL
- STATE_T local_succ = ZERO_STATE;
-#else
+ if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) {
+ DEBUG_PRINTF("firing cached reports from previous state\n");
+ if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback,
+ ctx->context, offset)
+ == MO_HALT_MATCHING)) {
+ return PE_RV_HALT; // halt;
+ }
+ }
+ return 0;
+ }
+
+#ifndef BIG_MODEL
+ STATE_T local_succ = ZERO_STATE;
+#else
ctx->local_succ = ZERO_STATE;
-#endif
-
+#endif
+
struct proto_cache new_cache = {0, NULL};
enum CacheResult cacheable = CACHE_RESULT;
@@ -301,101 +301,101 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
} while (diffmask);
}
#else
- // A copy of the estate as an array of GPR-sized chunks.
- CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
+ // A copy of the estate as an array of GPR-sized chunks.
+ CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)];
-#ifdef ESTATE_ON_STACK
- memcpy(chunks, &estate, sizeof(STATE_T));
-#else
- memcpy(chunks, estatep, sizeof(STATE_T));
-#endif
+#ifdef ESTATE_ON_STACK
+ memcpy(chunks, &estate, sizeof(STATE_T));
+#else
+ memcpy(chunks, estatep, sizeof(STATE_T));
+#endif
memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T));
-
+
u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)];
base_index[0] = 0;
for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) {
base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]);
}
- do {
- u32 t = findAndClearLSB_32(&diffmask);
-#ifdef ARCH_64_BIT
- t >>= 1; // Due to diffmask64, which leaves holes in the bitmask.
-#endif
- assert(t < ARRAY_LENGTH(chunks));
- CHUNK_T word = chunks[t];
- assert(word != 0);
- do {
+ do {
+ u32 t = findAndClearLSB_32(&diffmask);
+#ifdef ARCH_64_BIT
+ t >>= 1; // Due to diffmask64, which leaves holes in the bitmask.
+#endif
+ assert(t < ARRAY_LENGTH(chunks));
+ CHUNK_T word = chunks[t];
+ assert(word != 0);
+ do {
u32 bit = FIND_AND_CLEAR_FN(&word);
u32 local_index = RANK_IN_MASK_FN(emask_chunks[t], bit);
u32 idx = local_index + base_index[t];
- const EXCEPTION_T *e = &exceptions[idx];
-
- if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,
-#ifndef BIG_MODEL
- &local_succ,
-#endif
+ const EXCEPTION_T *e = &exceptions[idx];
+
+ if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,
+#ifndef BIG_MODEL
+ &local_succ,
+#endif
limex, offset, ctx, &new_cache, &cacheable,
in_rev, flags)) {
- return PE_RV_HALT;
- }
- } while (word);
- } while (diffmask);
+ return PE_RV_HALT;
+ }
+ } while (word);
+ } while (diffmask);
#endif
-
-#ifndef BIG_MODEL
+
+#ifndef BIG_MODEL
*succ = OR_STATE(*succ, local_succ);
-#else
+#else
*succ = OR_STATE(*succ, ctx->local_succ);
-#endif
-
- if (cacheable == CACHE_RESULT) {
+#endif
+
+ if (cacheable == CACHE_RESULT) {
ctx->cached_estate = estate;
-#ifndef BIG_MODEL
- ctx->cached_esucc = local_succ;
-#else
+#ifndef BIG_MODEL
+ ctx->cached_esucc = local_succ;
+#else
ctx->cached_esucc = ctx->local_succ;
-#endif
- ctx->cached_reports = new_cache.reports;
- ctx->cached_br = new_cache.br;
- } else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) {
- if (ctx->cached_br) {
+#endif
+ ctx->cached_reports = new_cache.reports;
+ ctx->cached_br = new_cache.br;
+ } else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) {
+ if (ctx->cached_br) {
ctx->cached_estate = ZERO_STATE;
- }
- }
-
- return 0;
-}
-
-#endif
-
-#undef ZERO_STATE
-#undef AND_STATE
-#undef EQ_STATE
-#undef OR_STATE
+ }
+ }
+
+ return 0;
+}
+
+#endif
+
+#undef ZERO_STATE
+#undef AND_STATE
+#undef EQ_STATE
+#undef OR_STATE
#undef EXPAND_STATE
#undef SHUFFLE_BYTE_STATE
-#undef TESTBIT_STATE
-#undef PE_FN
-#undef RUN_EXCEPTION_FN
-#undef CONTEXT_T
-#undef EXCEPTION_T
-
-#ifdef estate
-#undef estate
-#endif
-
-#ifdef BIG_MODEL
-#undef BIG_MODEL
-#endif
-
-#undef STATE_ARG
-#undef STATE_ARG_NAME
-#undef STATE_ARG_P
-
+#undef TESTBIT_STATE
+#undef PE_FN
+#undef RUN_EXCEPTION_FN
+#undef CONTEXT_T
+#undef EXCEPTION_T
+
+#ifdef estate
+#undef estate
+#endif
+
+#ifdef BIG_MODEL
+#undef BIG_MODEL
+#endif
+
+#undef STATE_ARG
+#undef STATE_ARG_NAME
+#undef STATE_ARG_P
+
#undef IMPL_NFA_T
-#undef CHUNK_T
-#undef FIND_AND_CLEAR_FN
+#undef CHUNK_T
+#undef FIND_AND_CLEAR_FN
#undef POPCOUNT_FN
#undef RANK_IN_MASK_FN
diff --git a/contrib/libs/hyperscan/src/nfa/limex_internal.h b/contrib/libs/hyperscan/src/nfa/limex_internal.h
index 412f507d7c..23b1bd9707 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_internal.h
+++ b/contrib/libs/hyperscan/src/nfa/limex_internal.h
@@ -1,193 +1,193 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- This file provides the internal structures and definitions required for the
- real NFAs (aka limex NFAs );
-
- Limex NFAs now have variable length in memory. They look like this:
-
- LimExNFA structure
- Fixed length, e.g. LimExNFA256.
- Reachability table
- Variable length array of state bitvectors, mapped into by
- NFACommonXXX.reachMap.
- Tops
- Variable length array of state bitvectors, used for TOP_N events.
- Acceleration structures
- Variable length array of AccelAux structs.
- Accepts
- Variable length array of NFAAccept structs.
- EOD Accepts
- Variable length array of NFAAccept structs.
- Exceptions
- Variable length array of NFAExceptionXXX structs.
- Repeat Structure Offsets
- Array of u32 offsets that point at each "Repeat Structure" (below)
- Repeat Structures
- Variable length repeat structures, addressed via
- NFAException32::repeatOffset etc.
-
- The state associated with the NFA is split into:
-
- -# The "traditional" NFA state as a bitvector. This is stored in the
- first N bytes of the state space (length given in
- NFACommonXXX.stateSize), and may be stored shrunk to CEIL(stateSize/8)
- or compressed. If it is stored compressed, than the
- LIMEX_FLAG_COMPRESS_STATE flag is set in NFACommonXXX.flags.
- -# Extended NFA state, only used in some LimEx NFAs. This consists of a
- variable length array of LimExNFAExtendedState structures, each with
- pointers to a packed list of mmbit structures that follows them. Only
- present when used.
-
- The value of NFA.stateSize gives the total state size in bytes (the sum of
- all the above).
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ This file provides the internal structures and definitions required for the
+ real NFAs (aka limex NFAs );
+
+ Limex NFAs now have variable length in memory. They look like this:
+
+ LimExNFA structure
+ Fixed length, e.g. LimExNFA256.
+ Reachability table
+ Variable length array of state bitvectors, mapped into by
+ NFACommonXXX.reachMap.
+ Tops
+ Variable length array of state bitvectors, used for TOP_N events.
+ Acceleration structures
+ Variable length array of AccelAux structs.
+ Accepts
+ Variable length array of NFAAccept structs.
+ EOD Accepts
+ Variable length array of NFAAccept structs.
+ Exceptions
+ Variable length array of NFAExceptionXXX structs.
+ Repeat Structure Offsets
+ Array of u32 offsets that point at each "Repeat Structure" (below)
+ Repeat Structures
+ Variable length repeat structures, addressed via
+ NFAException32::repeatOffset etc.
+
+ The state associated with the NFA is split into:
+
+ -# The "traditional" NFA state as a bitvector. This is stored in the
+ first N bytes of the state space (length given in
+ NFACommonXXX.stateSize), and may be stored shrunk to CEIL(stateSize/8)
+ or compressed. If it is stored compressed, than the
+ LIMEX_FLAG_COMPRESS_STATE flag is set in NFACommonXXX.flags.
+ -# Extended NFA state, only used in some LimEx NFAs. This consists of a
+ variable length array of LimExNFAExtendedState structures, each with
+ pointers to a packed list of mmbit structures that follows them. Only
+ present when used.
+
+ The value of NFA.stateSize gives the total state size in bytes (the sum of
+ all the above).
+
Number of shifts should be always greater or equal to 1
Number of shifts 0 means that no appropriate NFA engine was found.
-*/
-
-#ifndef LIMEX_INTERNAL_H
-#define LIMEX_INTERNAL_H
-
-#include "nfa_internal.h"
-#include "repeat_internal.h"
-
-// Constants
+*/
+
+#ifndef LIMEX_INTERNAL_H
+#define LIMEX_INTERNAL_H
+
+#include "nfa_internal.h"
+#include "repeat_internal.h"
+
+// Constants
#define MAX_SHIFT_COUNT 8 /**< largest number of shifts used by a LimEx NFA */
#define MAX_SHIFT_AMOUNT 16 /**< largest shift amount used by a LimEx NFA */
-
-#define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */
-#define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */
+
+#define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */
+#define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */
#define LIMEX_FLAG_CANNOT_DIE 4 /**< limex cannot have no states on */
#define LIMEX_FLAG_EXTRACT_EXP 8 /**< use limex exception bit extraction */
-
-enum LimExTrigger {
- LIMEX_TRIGGER_NONE = 0,
- LIMEX_TRIGGER_POS = 1,
- LIMEX_TRIGGER_TUG = 2
-};
-
-enum LimExSquash {
- LIMEX_SQUASH_NONE = 0, //!< no squash for you!
- LIMEX_SQUASH_CYCLIC = 1, //!< squash due to cyclic state
- LIMEX_SQUASH_TUG = 2, //!< squash due to tug trigger with stale estate
- LIMEX_SQUASH_REPORT = 3 //!< squash when report is raised
-};
-
-/* uniform looking types for the macros */
-typedef u8 u_8;
-typedef u16 u_16;
-typedef u32 u_32;
-typedef u64a u_64;
-typedef m128 u_128;
-typedef m256 u_256;
-typedef m384 u_384;
-typedef m512 u_512;
-
-#define CREATE_NFA_LIMEX(size) \
-struct NFAException##size { \
- u_##size squash; /**< mask of states to leave on */ \
- u_##size successors; /**< mask of states to switch on */ \
- u32 reports; /**< offset to start of reports list, or MO_INVALID_IDX */ \
- u32 repeatOffset; /**< offset to NFARepeatInfo, or MO_INVALID_IDX */ \
- u8 hasSquash; /**< from enum LimExSquash */ \
- u8 trigger; /**< from enum LimExTrigger */ \
-}; \
- \
+
+enum LimExTrigger {
+ LIMEX_TRIGGER_NONE = 0,
+ LIMEX_TRIGGER_POS = 1,
+ LIMEX_TRIGGER_TUG = 2
+};
+
+enum LimExSquash {
+ LIMEX_SQUASH_NONE = 0, //!< no squash for you!
+ LIMEX_SQUASH_CYCLIC = 1, //!< squash due to cyclic state
+ LIMEX_SQUASH_TUG = 2, //!< squash due to tug trigger with stale estate
+ LIMEX_SQUASH_REPORT = 3 //!< squash when report is raised
+};
+
+/* uniform looking types for the macros */
+typedef u8 u_8;
+typedef u16 u_16;
+typedef u32 u_32;
+typedef u64a u_64;
+typedef m128 u_128;
+typedef m256 u_256;
+typedef m384 u_384;
+typedef m512 u_512;
+
+#define CREATE_NFA_LIMEX(size) \
+struct NFAException##size { \
+ u_##size squash; /**< mask of states to leave on */ \
+ u_##size successors; /**< mask of states to switch on */ \
+ u32 reports; /**< offset to start of reports list, or MO_INVALID_IDX */ \
+ u32 repeatOffset; /**< offset to NFARepeatInfo, or MO_INVALID_IDX */ \
+ u8 hasSquash; /**< from enum LimExSquash */ \
+ u8 trigger; /**< from enum LimExTrigger */ \
+}; \
+ \
struct LimExNFA##size { \
- u8 reachMap[N_CHARS]; /**< map of char -> entry in reach[] */ \
- u32 reachSize; /**< number of reach masks */ \
- u32 accelCount; /**< number of entries in accel table */ \
- u32 accelTableOffset; /* rel. to start of LimExNFA */ \
- u32 accelAuxCount; /**< number of entries in aux table */ \
- u32 accelAuxOffset; /* rel. to start of LimExNFA */ \
- u32 acceptCount; \
- u32 acceptOffset; /* rel. to start of LimExNFA */ \
- u32 acceptEodCount; \
- u32 acceptEodOffset; /* rel. to start of LimExNFA */ \
- u32 exceptionCount; \
- u32 exceptionOffset; /* rel. to start of LimExNFA */ \
- u32 repeatCount; \
- u32 repeatOffset; \
- u32 squashOffset; /* rel. to start of LimExNFA; for accept squashing */ \
- u32 squashCount; \
- u32 topCount; \
- u32 topOffset; /* rel. to start of LimExNFA */ \
- u32 stateSize; /**< not including extended history */ \
- u32 flags; \
- u_##size init; \
- u_##size initDS; \
- u_##size accept; /**< mask of accept states */ \
- u_##size acceptAtEOD; /**< mask of states that accept at EOD */ \
- u_##size accel; /**< mask of accelerable states */ \
- u_##size accelPermute; /**< pshufb permute mask (not GPR) */ \
- u_##size accelCompare; /**< pshufb compare mask (not GPR) */ \
- u_##size accel_and_friends; /**< mask of accelerable states + likely
- * followers */ \
- u_##size compressMask; /**< switch off before compress */ \
- u_##size exceptionMask; \
+ u8 reachMap[N_CHARS]; /**< map of char -> entry in reach[] */ \
+ u32 reachSize; /**< number of reach masks */ \
+ u32 accelCount; /**< number of entries in accel table */ \
+ u32 accelTableOffset; /* rel. to start of LimExNFA */ \
+ u32 accelAuxCount; /**< number of entries in aux table */ \
+ u32 accelAuxOffset; /* rel. to start of LimExNFA */ \
+ u32 acceptCount; \
+ u32 acceptOffset; /* rel. to start of LimExNFA */ \
+ u32 acceptEodCount; \
+ u32 acceptEodOffset; /* rel. to start of LimExNFA */ \
+ u32 exceptionCount; \
+ u32 exceptionOffset; /* rel. to start of LimExNFA */ \
+ u32 repeatCount; \
+ u32 repeatOffset; \
+ u32 squashOffset; /* rel. to start of LimExNFA; for accept squashing */ \
+ u32 squashCount; \
+ u32 topCount; \
+ u32 topOffset; /* rel. to start of LimExNFA */ \
+ u32 stateSize; /**< not including extended history */ \
+ u32 flags; \
+ u_##size init; \
+ u_##size initDS; \
+ u_##size accept; /**< mask of accept states */ \
+ u_##size acceptAtEOD; /**< mask of states that accept at EOD */ \
+ u_##size accel; /**< mask of accelerable states */ \
+ u_##size accelPermute; /**< pshufb permute mask (not GPR) */ \
+ u_##size accelCompare; /**< pshufb compare mask (not GPR) */ \
+ u_##size accel_and_friends; /**< mask of accelerable states + likely
+ * followers */ \
+ u_##size compressMask; /**< switch off before compress */ \
+ u_##size exceptionMask; \
u_##size repeatCyclicMask; /**< also includes tug states */ \
- u_##size zombieMask; /**< zombie if in any of the set states */ \
+ u_##size zombieMask; /**< zombie if in any of the set states */ \
u_##size shift[MAX_SHIFT_COUNT]; \
u32 shiftCount; /**< number of shift masks used */ \
u8 shiftAmount[MAX_SHIFT_COUNT]; /**< shift amount for each mask */ \
m512 exceptionShufMask; /**< exception byte shuffle mask */ \
m512 exceptionBitMask; /**< exception bit mask */ \
m512 exceptionAndMask; /**< exception and mask */ \
-};
-
-CREATE_NFA_LIMEX(32)
+};
+
+CREATE_NFA_LIMEX(32)
CREATE_NFA_LIMEX(64)
-CREATE_NFA_LIMEX(128)
-CREATE_NFA_LIMEX(256)
-CREATE_NFA_LIMEX(384)
-CREATE_NFA_LIMEX(512)
-
-/** \brief Structure describing a bounded repeat within the LimEx NFA.
- *
- * This struct is followed in memory by:
- *
- * -# a RepeatInfo structure
- * -# a variable-sized lookup table for REPEAT_SPARSE_OPTIMAL_P repeats
- * -# a TUG mask
- */
-struct NFARepeatInfo {
- u32 cyclicState; //!< index of this repeat's cyclic state
- u32 ctrlIndex; //!< index of this repeat's control block
- u32 packedCtrlOffset; //!< offset to packed control block in stream state
- u32 stateOffset; //!< offset to repeat state in stream state
- u32 stateSize; //!< total size of packed stream state for this repeat
- u32 tugMaskOffset; //!< offset to tug mask (rel. to NFARepeatInfo)
-};
-
-struct NFAAccept {
+CREATE_NFA_LIMEX(128)
+CREATE_NFA_LIMEX(256)
+CREATE_NFA_LIMEX(384)
+CREATE_NFA_LIMEX(512)
+
+/** \brief Structure describing a bounded repeat within the LimEx NFA.
+ *
+ * This struct is followed in memory by:
+ *
+ * -# a RepeatInfo structure
+ * -# a variable-sized lookup table for REPEAT_SPARSE_OPTIMAL_P repeats
+ * -# a TUG mask
+ */
+struct NFARepeatInfo {
+ u32 cyclicState; //!< index of this repeat's cyclic state
+ u32 ctrlIndex; //!< index of this repeat's control block
+ u32 packedCtrlOffset; //!< offset to packed control block in stream state
+ u32 stateOffset; //!< offset to repeat state in stream state
+ u32 stateSize; //!< total size of packed stream state for this repeat
+ u32 tugMaskOffset; //!< offset to tug mask (rel. to NFARepeatInfo)
+};
+
+struct NFAAccept {
u8 single_report; //!< If true, 'reports' is report id.
/**
@@ -198,6 +198,6 @@ struct NFAAccept {
u32 reports;
u32 squash; //!< Offset (from LimEx) into squash masks, or MO_INVALID_IDX.
-};
-
-#endif
+};
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/limex_limits.h b/contrib/libs/hyperscan/src/nfa/limex_limits.h
index 1ceaf6fa0b..f4df54a4b0 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_limits.h
+++ b/contrib/libs/hyperscan/src/nfa/limex_limits.h
@@ -1,35 +1,35 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef LIMEX_LIMITS_H
-#define LIMEX_LIMITS_H
-
-#define NFA_MAX_STATES 512 /**< max states in an NFA */
-#define NFA_MAX_ACCEL_STATES 8 /**< max accel states in a NFA */
-
-#endif
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef LIMEX_LIMITS_H
+#define LIMEX_LIMITS_H
+
+#define NFA_MAX_STATES 512 /**< max states in an NFA */
+#define NFA_MAX_ACCEL_STATES 8 /**< max accel states in a NFA */
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/limex_native.c b/contrib/libs/hyperscan/src/nfa/limex_native.c
index c66cef599e..f6f5809c36 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_native.c
+++ b/contrib/libs/hyperscan/src/nfa/limex_native.c
@@ -1,129 +1,129 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief LimEx NFA: native GPR runtime implementations.
- */
-
-//#define DEBUG
-//#define DEBUG_INPUT
-//#define DEBUG_EXCEPTIONS
-
-#include "limex.h"
-
-#include "accel.h"
-#include "limex_internal.h"
-#include "nfa_internal.h"
-#include "ue2common.h"
-#include "util/bitutils.h"
-
-// Common code
-#define STATE_ON_STACK
-#define ESTATE_ON_STACK
-
-#include "limex_runtime.h"
-
-// Other implementation code from X-Macro impl.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief LimEx NFA: native GPR runtime implementations.
+ */
+
+//#define DEBUG
+//#define DEBUG_INPUT
+//#define DEBUG_EXCEPTIONS
+
+#include "limex.h"
+
+#include "accel.h"
+#include "limex_internal.h"
+#include "nfa_internal.h"
+#include "ue2common.h"
+#include "util/bitutils.h"
+
+// Common code
+#define STATE_ON_STACK
+#define ESTATE_ON_STACK
+
+#include "limex_runtime.h"
+
+// Other implementation code from X-Macro impl.
#define SIZE 32
#define STATE_T u32
#define ENG_STATE_T u32
#define LOAD_FROM_ENG load_u32
-#include "limex_state_impl.h"
-
-#define INLINE_ATTR really_inline
-#include "limex_common_impl.h"
-
-////////////////////////////////////////////////////////////////////////////
-// LimEx NFA implementation code - general purpose registers
-////////////////////////////////////////////////////////////////////////////
-
-// Process exceptional states
-
-#define STATE_ON_STACK
-#define ESTATE_ON_STACK
-#define RUN_EXCEPTION_FN_ONLY
-#include "limex_exceptional.h"
-
-static really_inline
-int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
- const struct LimExNFA32 *limex,
+#include "limex_state_impl.h"
+
+#define INLINE_ATTR really_inline
+#include "limex_common_impl.h"
+
+////////////////////////////////////////////////////////////////////////////
+// LimEx NFA implementation code - general purpose registers
+////////////////////////////////////////////////////////////////////////////
+
+// Process exceptional states
+
+#define STATE_ON_STACK
+#define ESTATE_ON_STACK
+#define RUN_EXCEPTION_FN_ONLY
+#include "limex_exceptional.h"
+
+static really_inline
+int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
+ const struct LimExNFA32 *limex,
const struct NFAException32 *exceptions, u64a offset,
- struct NFAContext32 *ctx, char in_rev, char flags) {
- assert(estate != 0); // guaranteed by calling macro
-
- if (estate == ctx->cached_estate) {
- DEBUG_PRINTF("using cached succ from previous state\n");
- *succ |= ctx->cached_esucc;
- if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) {
- DEBUG_PRINTF("firing cached reports from previous state\n");
- if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback,
- ctx->context, offset)
- == MO_HALT_MATCHING)) {
- return PE_RV_HALT; // halt;
- }
- }
- return 0;
- }
-
- u32 orig_estate = estate; // for caching
- u32 local_succ = 0;
- struct proto_cache new_cache = {0, NULL};
- enum CacheResult cacheable = CACHE_RESULT;
-
- /* Note that only exception-states that consist of exceptions that _only_
- * set successors (not fire accepts or squash states) are cacheable. */
-
- do {
- u32 bit = findAndClearLSB_32(&estate);
+ struct NFAContext32 *ctx, char in_rev, char flags) {
+ assert(estate != 0); // guaranteed by calling macro
+
+ if (estate == ctx->cached_estate) {
+ DEBUG_PRINTF("using cached succ from previous state\n");
+ *succ |= ctx->cached_esucc;
+ if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) {
+ DEBUG_PRINTF("firing cached reports from previous state\n");
+ if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback,
+ ctx->context, offset)
+ == MO_HALT_MATCHING)) {
+ return PE_RV_HALT; // halt;
+ }
+ }
+ return 0;
+ }
+
+ u32 orig_estate = estate; // for caching
+ u32 local_succ = 0;
+ struct proto_cache new_cache = {0, NULL};
+ enum CacheResult cacheable = CACHE_RESULT;
+
+ /* Note that only exception-states that consist of exceptions that _only_
+ * set successors (not fire accepts or squash states) are cacheable. */
+
+ do {
+ u32 bit = findAndClearLSB_32(&estate);
u32 idx = rank_in_mask32(limex->exceptionMask, bit);
- const struct NFAException32 *e = &exceptions[idx];
+ const struct NFAException32 *e = &exceptions[idx];
if (!runException32(e, s, succ, &local_succ, limex, offset, ctx,
&new_cache, &cacheable, in_rev, flags)) {
- return PE_RV_HALT;
- }
- } while (estate != 0);
-
- *succ |= local_succ;
-
- if (cacheable == CACHE_RESULT) {
- ctx->cached_estate = orig_estate;
- ctx->cached_esucc = local_succ;
- ctx->cached_reports = new_cache.reports;
- ctx->cached_br = new_cache.br;
- } else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) {
- if (ctx->cached_br) {
- ctx->cached_estate = 0U;
- }
- }
-
- return 0;
-}
-
-// 32-bit models.
-#include "limex_runtime_impl.h"
+ return PE_RV_HALT;
+ }
+ } while (estate != 0);
+
+ *succ |= local_succ;
+
+ if (cacheable == CACHE_RESULT) {
+ ctx->cached_estate = orig_estate;
+ ctx->cached_esucc = local_succ;
+ ctx->cached_reports = new_cache.reports;
+ ctx->cached_br = new_cache.br;
+ } else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) {
+ if (ctx->cached_br) {
+ ctx->cached_estate = 0U;
+ }
+ }
+
+ return 0;
+}
+
+// 32-bit models.
+#include "limex_runtime_impl.h"
diff --git a/contrib/libs/hyperscan/src/nfa/limex_ring.h b/contrib/libs/hyperscan/src/nfa/limex_ring.h
index 4aac689a73..522cfa12bc 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_ring.h
+++ b/contrib/libs/hyperscan/src/nfa/limex_ring.h
@@ -1,106 +1,106 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Bounded Repeat implementation for the LimEx NFA.
- */
-
-#ifndef LIMEX_RING_H
-#define LIMEX_RING_H
-
-#include "ue2common.h"
-#include "repeat.h"
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-/** \brief Return values from \ref processTugTrigger, used to provide feedback
- * about a bounded repeat to the caller.
- *
- * TRIGGER_FAIL does not get cached as we prefer to use TRIGGER_STALE which
- * allows the exception to squash the cyclic state as well. */
-enum TriggerResult {
- TRIGGER_FAIL, /**< no valid matches, but history still valid */
- TRIGGER_SUCCESS, /**< valid match found */
- TRIGGER_STALE, /**< no valid matches and history is invalid (stale) */
- TRIGGER_SUCCESS_CACHE /**< valid match found; can cache as the repeat has no
- upper bound. */
-};
-
-/** \brief Handle a TUG trigger: given an \p offset, returns whether a repeat
- * matches or not. */
-static really_inline
-enum TriggerResult processTugTrigger(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const char *state, u64a offset) {
- DEBUG_PRINTF("tug trigger, %s history, repeat={%u,%u}, offset=%llu, "
- "ctrl=%p, state=%p\n",
- repeatTypeName(info->type), info->repeatMin, info->repeatMax,
- offset, ctrl, state);
-
- assert(ISALIGNED(ctrl));
-
- enum RepeatMatch rv = repeatHasMatch(info, ctrl, state, offset);
- switch (rv) {
- case REPEAT_NOMATCH:
- return TRIGGER_FAIL;
- case REPEAT_STALE:
- return TRIGGER_STALE;
- case REPEAT_MATCH:
- if (info->repeatMax == REPEAT_INF) {
- // {N,} repeats can be cached.
- return TRIGGER_SUCCESS_CACHE;
- } else {
- return TRIGGER_SUCCESS;
- }
- }
-
- assert(0); // unreachable
- return TRIGGER_FAIL;
-}
-
-/** \brief Handle a POS trigger: stores a top in the repeat. */
-static really_inline
-void processPosTrigger(const struct RepeatInfo *info, union RepeatControl *ctrl,
- char *state, u64a offset, char is_alive) {
- DEBUG_PRINTF("pos trigger, %s history, repeat={%u,%u}, offset=%llu, "
- "is_alive=%d\n", repeatTypeName(info->type),
- info->repeatMin, info->repeatMax, offset, is_alive);
-
- assert(ISALIGNED(ctrl));
-
- repeatStore(info, ctrl, state, offset, is_alive);
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Bounded Repeat implementation for the LimEx NFA.
+ */
+
+#ifndef LIMEX_RING_H
+#define LIMEX_RING_H
+
+#include "ue2common.h"
+#include "repeat.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/** \brief Return values from \ref processTugTrigger, used to provide feedback
+ * about a bounded repeat to the caller.
+ *
+ * TRIGGER_FAIL does not get cached as we prefer to use TRIGGER_STALE which
+ * allows the exception to squash the cyclic state as well. */
+enum TriggerResult {
+ TRIGGER_FAIL, /**< no valid matches, but history still valid */
+ TRIGGER_SUCCESS, /**< valid match found */
+ TRIGGER_STALE, /**< no valid matches and history is invalid (stale) */
+ TRIGGER_SUCCESS_CACHE /**< valid match found; can cache as the repeat has no
+ upper bound. */
+};
+
+/** \brief Handle a TUG trigger: given an \p offset, returns whether a repeat
+ * matches or not. */
+static really_inline
+enum TriggerResult processTugTrigger(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const char *state, u64a offset) {
+ DEBUG_PRINTF("tug trigger, %s history, repeat={%u,%u}, offset=%llu, "
+ "ctrl=%p, state=%p\n",
+ repeatTypeName(info->type), info->repeatMin, info->repeatMax,
+ offset, ctrl, state);
+
+ assert(ISALIGNED(ctrl));
+
+ enum RepeatMatch rv = repeatHasMatch(info, ctrl, state, offset);
+ switch (rv) {
+ case REPEAT_NOMATCH:
+ return TRIGGER_FAIL;
+ case REPEAT_STALE:
+ return TRIGGER_STALE;
+ case REPEAT_MATCH:
+ if (info->repeatMax == REPEAT_INF) {
+ // {N,} repeats can be cached.
+ return TRIGGER_SUCCESS_CACHE;
+ } else {
+ return TRIGGER_SUCCESS;
+ }
+ }
+
+ assert(0); // unreachable
+ return TRIGGER_FAIL;
+}
+
+/** \brief Handle a POS trigger: stores a top in the repeat. */
+static really_inline
+void processPosTrigger(const struct RepeatInfo *info, union RepeatControl *ctrl,
+ char *state, u64a offset, char is_alive) {
+ DEBUG_PRINTF("pos trigger, %s history, repeat={%u,%u}, offset=%llu, "
+ "is_alive=%d\n", repeatTypeName(info->type),
+ info->repeatMin, info->repeatMax, offset, is_alive);
+
+ assert(ISALIGNED(ctrl));
+
+ repeatStore(info, ctrl, state, offset, is_alive);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/limex_runtime.h b/contrib/libs/hyperscan/src/nfa/limex_runtime.h
index 3395a44830..6109d382d8 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_runtime.h
+++ b/contrib/libs/hyperscan/src/nfa/limex_runtime.h
@@ -1,108 +1,108 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- \brief Limex Execution Engine Or:
- How I Learned To Stop Worrying And Love The Preprocessor
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ \brief Limex Execution Engine Or:
+ How I Learned To Stop Worrying And Love The Preprocessor
+
This file includes utility functions which do not depend on the size of the
state or shift masks directly.
-*/
-
-#ifndef LIMEX_RUNTIME_H
-#define LIMEX_RUNTIME_H
-
-#include "limex_accel.h"
-#include "limex_context.h"
-#include "limex_internal.h"
-#include "nfa_api_util.h"
-#include "nfa_internal.h"
-#include "util/uniform_ops.h"
-
-////////////////////////////////////////////////////////////////////////////
-// LimEx NFA implementation code - common macros
-////////////////////////////////////////////////////////////////////////////
-
-#ifdef DEBUG_INPUT
-#include <ctype.h>
-#define DUMP_INPUT(index) DEBUG_PRINTF("input %p i=%zu: %02hhx (%c)\n", \
- &input[index], index, input[index], \
- isprint(input[index]) ? input[index] : ' ')
-#else
-#define DUMP_INPUT(index) do { } while(0)
-#endif
-
-#define NO_OUTPUT 0
-#define CALLBACK_OUTPUT 1
-#define FIRST_BYTE 16
-
-enum CacheResult {
- DO_NOT_CACHE_RESULT,
- CACHE_RESULT,
- DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES
-};
-
-struct proto_cache {
- char br;
- const ReportID *reports;
-};
-
-#define PE_RV_HALT 1
-
-#ifdef STATE_ON_STACK
-#define pass_state s
-#else
-#define pass_state &s
-#endif
-
-#ifdef ESTATE_ON_STACK
-#define pass_estate estate
-#else
-#define pass_estate &estate
-#endif
-
-static really_inline
-int limexRunReports(const ReportID *reports, NfaCallback callback,
- void *context, u64a offset) {
- assert(reports);
- assert(callback);
-
- for (; *reports != MO_INVALID_IDX; ++reports) {
- DEBUG_PRINTF("firing report for id %u at offset %llu\n",
- *reports, offset);
+*/
+
+#ifndef LIMEX_RUNTIME_H
+#define LIMEX_RUNTIME_H
+
+#include "limex_accel.h"
+#include "limex_context.h"
+#include "limex_internal.h"
+#include "nfa_api_util.h"
+#include "nfa_internal.h"
+#include "util/uniform_ops.h"
+
+////////////////////////////////////////////////////////////////////////////
+// LimEx NFA implementation code - common macros
+////////////////////////////////////////////////////////////////////////////
+
+#ifdef DEBUG_INPUT
+#include <ctype.h>
+#define DUMP_INPUT(index) DEBUG_PRINTF("input %p i=%zu: %02hhx (%c)\n", \
+ &input[index], index, input[index], \
+ isprint(input[index]) ? input[index] : ' ')
+#else
+#define DUMP_INPUT(index) do { } while(0)
+#endif
+
+#define NO_OUTPUT 0
+#define CALLBACK_OUTPUT 1
+#define FIRST_BYTE 16
+
+enum CacheResult {
+ DO_NOT_CACHE_RESULT,
+ CACHE_RESULT,
+ DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES
+};
+
+struct proto_cache {
+ char br;
+ const ReportID *reports;
+};
+
+#define PE_RV_HALT 1
+
+#ifdef STATE_ON_STACK
+#define pass_state s
+#else
+#define pass_state &s
+#endif
+
+#ifdef ESTATE_ON_STACK
+#define pass_estate estate
+#else
+#define pass_estate &estate
+#endif
+
+static really_inline
+int limexRunReports(const ReportID *reports, NfaCallback callback,
+ void *context, u64a offset) {
+ assert(reports);
+ assert(callback);
+
+ for (; *reports != MO_INVALID_IDX; ++reports) {
+ DEBUG_PRINTF("firing report for id %u at offset %llu\n",
+ *reports, offset);
int rv = callback(0, offset, *reports, context);
- if (rv == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
- return MO_CONTINUE_MATCHING; // continue
-}
-
+ if (rv == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+ return MO_CONTINUE_MATCHING; // continue
+}
+
static really_inline
int limexRunAccept(const char *limex_base, const struct NFAAccept *accept,
NfaCallback callback, void *context, u64a offset) {
@@ -135,67 +135,67 @@ int limexAcceptHasReport(const char *limex_base, const struct NFAAccept *accept,
return 0;
}
-/** \brief Return a (correctly typed) pointer to the exception table. */
-#define getExceptionTable(exc_type, lim) \
- ((const exc_type *)((const char *)(lim) + (lim)->exceptionOffset))
-
-/** \brief Return a pointer to the ordinary accepts table. */
-#define getAcceptTable(lim) \
- ((const struct NFAAccept *)((const char *)(lim) + (lim)->acceptOffset))
-
-/** \brief Return a pointer to the EOD accepts table. */
-#define getAcceptEodTable(lim) \
- ((const struct NFAAccept *)((const char *)(lim) + (lim)->acceptEodOffset))
-
-#define MAKE_GET_NFA_REPEAT_INFO(size) \
- static really_inline const struct NFARepeatInfo *getNfaRepeatInfo##size( \
- const struct LimExNFA##size *limex, unsigned num) { \
- assert(num < limex->repeatCount); \
- \
- const char *base = (const char *)limex; \
- const u32 *repeatOffset = (const u32 *)(base + limex->repeatOffset); \
- assert(ISALIGNED(repeatOffset)); \
- \
- const struct NFARepeatInfo *info = \
- (const struct NFARepeatInfo *)(base + repeatOffset[num]); \
- assert(ISALIGNED(info)); \
- return info; \
- }
-
-MAKE_GET_NFA_REPEAT_INFO(32)
+/** \brief Return a (correctly typed) pointer to the exception table. */
+#define getExceptionTable(exc_type, lim) \
+ ((const exc_type *)((const char *)(lim) + (lim)->exceptionOffset))
+
+/** \brief Return a pointer to the ordinary accepts table. */
+#define getAcceptTable(lim) \
+ ((const struct NFAAccept *)((const char *)(lim) + (lim)->acceptOffset))
+
+/** \brief Return a pointer to the EOD accepts table. */
+#define getAcceptEodTable(lim) \
+ ((const struct NFAAccept *)((const char *)(lim) + (lim)->acceptEodOffset))
+
+#define MAKE_GET_NFA_REPEAT_INFO(size) \
+ static really_inline const struct NFARepeatInfo *getNfaRepeatInfo##size( \
+ const struct LimExNFA##size *limex, unsigned num) { \
+ assert(num < limex->repeatCount); \
+ \
+ const char *base = (const char *)limex; \
+ const u32 *repeatOffset = (const u32 *)(base + limex->repeatOffset); \
+ assert(ISALIGNED(repeatOffset)); \
+ \
+ const struct NFARepeatInfo *info = \
+ (const struct NFARepeatInfo *)(base + repeatOffset[num]); \
+ assert(ISALIGNED(info)); \
+ return info; \
+ }
+
+MAKE_GET_NFA_REPEAT_INFO(32)
MAKE_GET_NFA_REPEAT_INFO(64)
-MAKE_GET_NFA_REPEAT_INFO(128)
-MAKE_GET_NFA_REPEAT_INFO(256)
-MAKE_GET_NFA_REPEAT_INFO(384)
-MAKE_GET_NFA_REPEAT_INFO(512)
-
-static really_inline
-const struct RepeatInfo *getRepeatInfo(const struct NFARepeatInfo *info) {
- const struct RepeatInfo *repeat =
- (const struct RepeatInfo *)((const char *)info + sizeof(*info));
- assert(ISALIGNED(repeat));
- return repeat;
-}
-
-static really_inline
-union RepeatControl *getRepeatControlBase(char *state, size_t nfa_state_size) {
- union RepeatControl *ctrl_base =
- (union RepeatControl *)(state +
- ROUNDUP_N(nfa_state_size,
- alignof(union RepeatControl)));
- assert(ISALIGNED(ctrl_base));
- return ctrl_base;
-}
-
-static really_inline
-const union RepeatControl *getRepeatControlBaseConst(const char *state,
- size_t nfa_state_size) {
- const union RepeatControl *ctrl_base =
- (const union RepeatControl *)(state +
- ROUNDUP_N(nfa_state_size,
- alignof(union RepeatControl)));
- assert(ISALIGNED(ctrl_base));
- return ctrl_base;
-}
-
-#endif
+MAKE_GET_NFA_REPEAT_INFO(128)
+MAKE_GET_NFA_REPEAT_INFO(256)
+MAKE_GET_NFA_REPEAT_INFO(384)
+MAKE_GET_NFA_REPEAT_INFO(512)
+
+static really_inline
+const struct RepeatInfo *getRepeatInfo(const struct NFARepeatInfo *info) {
+ const struct RepeatInfo *repeat =
+ (const struct RepeatInfo *)((const char *)info + sizeof(*info));
+ assert(ISALIGNED(repeat));
+ return repeat;
+}
+
+static really_inline
+union RepeatControl *getRepeatControlBase(char *state, size_t nfa_state_size) {
+ union RepeatControl *ctrl_base =
+ (union RepeatControl *)(state +
+ ROUNDUP_N(nfa_state_size,
+ alignof(union RepeatControl)));
+ assert(ISALIGNED(ctrl_base));
+ return ctrl_base;
+}
+
+static really_inline
+const union RepeatControl *getRepeatControlBaseConst(const char *state,
+ size_t nfa_state_size) {
+ const union RepeatControl *ctrl_base =
+ (const union RepeatControl *)(state +
+ ROUNDUP_N(nfa_state_size,
+ alignof(union RepeatControl)));
+ assert(ISALIGNED(ctrl_base));
+ return ctrl_base;
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/limex_runtime_impl.h b/contrib/libs/hyperscan/src/nfa/limex_runtime_impl.h
index 541744cec0..7b89182bea 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_runtime_impl.h
+++ b/contrib/libs/hyperscan/src/nfa/limex_runtime_impl.h
@@ -1,164 +1,164 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "util/join.h"
-#include <string.h>
-
-/** \file
- * \brief Limex Execution Engine Or:
- * How I Learned To Stop Worrying And Love The Preprocessor
- *
- * Version 2.0: now with X-Macros, so you get line numbers in your debugger.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "util/join.h"
+#include <string.h>
+
+/** \file
+ * \brief Limex Execution Engine Or:
+ * How I Learned To Stop Worrying And Love The Preprocessor
+ *
+ * Version 2.0: now with X-Macros, so you get line numbers in your debugger.
+ */
+
#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG)
# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer.
-#endif
-
+#endif
+
#define LIMEX_API_ROOT JOIN(nfaExecLimEx, SIZE)
-
-#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE)
-
-#define TESTEOD_FN JOIN(moNfaTestEod, SIZE)
-#define INITIAL_FN JOIN(moNfaInitial, SIZE)
-#define TOP_FN JOIN(moNfaTop, SIZE)
-#define TOPN_FN JOIN(moNfaTopN, SIZE)
-#define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE)
-#define COMPRESS_FN JOIN(moNfaCompressState, SIZE)
-#define EXPAND_FN JOIN(moNfaExpandState, SIZE)
-#define COMPRESS_REPEATS_FN JOIN(LIMEX_API_ROOT, _Compress_Repeats)
-#define EXPAND_REPEATS_FN JOIN(LIMEX_API_ROOT, _Expand_Repeats)
-#define PROCESS_ACCEPTS_FN JOIN(moProcessAccepts, SIZE)
-#define PROCESS_ACCEPTS_NOSQUASH_FN JOIN(moProcessAcceptsNoSquash, SIZE)
-#define GET_NFA_REPEAT_INFO_FN JOIN(getNfaRepeatInfo, SIZE)
-#define RUN_ACCEL_FN JOIN(LIMEX_API_ROOT, _Run_Accel)
-#define RUN_EXCEPTIONS_FN JOIN(LIMEX_API_ROOT, _Run_Exceptions)
-#define REV_STREAM_FN JOIN(LIMEX_API_ROOT, _Rev_Stream)
+
+#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE)
+
+#define TESTEOD_FN JOIN(moNfaTestEod, SIZE)
+#define INITIAL_FN JOIN(moNfaInitial, SIZE)
+#define TOP_FN JOIN(moNfaTop, SIZE)
+#define TOPN_FN JOIN(moNfaTopN, SIZE)
+#define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE)
+#define COMPRESS_FN JOIN(moNfaCompressState, SIZE)
+#define EXPAND_FN JOIN(moNfaExpandState, SIZE)
+#define COMPRESS_REPEATS_FN JOIN(LIMEX_API_ROOT, _Compress_Repeats)
+#define EXPAND_REPEATS_FN JOIN(LIMEX_API_ROOT, _Expand_Repeats)
+#define PROCESS_ACCEPTS_FN JOIN(moProcessAccepts, SIZE)
+#define PROCESS_ACCEPTS_NOSQUASH_FN JOIN(moProcessAcceptsNoSquash, SIZE)
+#define GET_NFA_REPEAT_INFO_FN JOIN(getNfaRepeatInfo, SIZE)
+#define RUN_ACCEL_FN JOIN(LIMEX_API_ROOT, _Run_Accel)
+#define RUN_EXCEPTIONS_FN JOIN(LIMEX_API_ROOT, _Run_Exceptions)
+#define REV_STREAM_FN JOIN(LIMEX_API_ROOT, _Rev_Stream)
#define LOOP_NOACCEL_FN JOIN(LIMEX_API_ROOT, _Loop_No_Accel)
-#define STREAM_FN JOIN(LIMEX_API_ROOT, _Stream)
-#define STREAMCB_FN JOIN(LIMEX_API_ROOT, _Stream_CB)
-#define STREAMFIRST_FN JOIN(LIMEX_API_ROOT, _Stream_First)
-#define STREAMSILENT_FN JOIN(LIMEX_API_ROOT, _Stream_Silent)
-#define CONTEXT_T JOIN(NFAContext, SIZE)
-#define EXCEPTION_T JOIN(struct NFAException, SIZE)
-#define AND_STATE JOIN(and_, STATE_T)
-#define ANDNOT_STATE JOIN(andnot_, STATE_T)
-#define OR_STATE JOIN(or_, STATE_T)
+#define STREAM_FN JOIN(LIMEX_API_ROOT, _Stream)
+#define STREAMCB_FN JOIN(LIMEX_API_ROOT, _Stream_CB)
+#define STREAMFIRST_FN JOIN(LIMEX_API_ROOT, _Stream_First)
+#define STREAMSILENT_FN JOIN(LIMEX_API_ROOT, _Stream_Silent)
+#define CONTEXT_T JOIN(NFAContext, SIZE)
+#define EXCEPTION_T JOIN(struct NFAException, SIZE)
+#define AND_STATE JOIN(and_, STATE_T)
+#define ANDNOT_STATE JOIN(andnot_, STATE_T)
+#define OR_STATE JOIN(or_, STATE_T)
#define LSHIFT_STATE JOIN(lshift_, STATE_T)
-#define TESTBIT_STATE JOIN(testbit_, STATE_T)
+#define TESTBIT_STATE JOIN(testbit_, STATE_T)
#define CLEARBIT_STATE JOIN(clearbit_, STATE_T)
-#define ZERO_STATE JOIN(zero_, STATE_T)
-#define ISNONZERO_STATE JOIN(isNonZero_, STATE_T)
-#define ISZERO_STATE JOIN(isZero_, STATE_T)
-#define NOTEQ_STATE JOIN(noteq_, STATE_T)
-
-// Pick an appropriate diffrich function for this platform.
-#ifdef ARCH_64_BIT
-#define DIFFRICH_STATE JOIN(diffrich64_, STATE_T)
-#else
-#define DIFFRICH_STATE JOIN(diffrich_, STATE_T)
-#endif
-
-#define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE)
-#define SQUASH_UNTUG_BR_FN JOIN(lazyTug, SIZE)
-
-// Acceleration and exception masks: we load them on the fly for really big
-// models.
-#if SIZE < 256
-#define ACCEL_MASK accelMask
-#define ACCEL_AND_FRIENDS_MASK accel_and_friendsMask
-#define EXCEPTION_MASK exceptionMask
-#else
+#define ZERO_STATE JOIN(zero_, STATE_T)
+#define ISNONZERO_STATE JOIN(isNonZero_, STATE_T)
+#define ISZERO_STATE JOIN(isZero_, STATE_T)
+#define NOTEQ_STATE JOIN(noteq_, STATE_T)
+
+// Pick an appropriate diffrich function for this platform.
+#ifdef ARCH_64_BIT
+#define DIFFRICH_STATE JOIN(diffrich64_, STATE_T)
+#else
+#define DIFFRICH_STATE JOIN(diffrich_, STATE_T)
+#endif
+
+#define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE)
+#define SQUASH_UNTUG_BR_FN JOIN(lazyTug, SIZE)
+
+// Acceleration and exception masks: we load them on the fly for really big
+// models.
+#if SIZE < 256
+#define ACCEL_MASK accelMask
+#define ACCEL_AND_FRIENDS_MASK accel_and_friendsMask
+#define EXCEPTION_MASK exceptionMask
+#else
#define ACCEL_MASK LOAD_FROM_ENG(&limex->accel)
#define ACCEL_AND_FRIENDS_MASK LOAD_FROM_ENG(&limex->accel_and_friends)
#define EXCEPTION_MASK LOAD_FROM_ENG(&limex->exceptionMask)
-#endif
-
-// Run exception processing, if necessary. Returns 0 if scanning should
-// continue, 1 if an accept was fired and the user instructed us to halt.
-static really_inline
-char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
- STATE_T s, const STATE_T emask, size_t i, u64a offset,
- STATE_T *succ, u64a *final_loc, struct CONTEXT_T *ctx,
- const char flags, const char in_rev,
- const char first_match) {
- STATE_T estate = AND_STATE(s, emask);
- u32 diffmask = DIFFRICH_STATE(ZERO_STATE, estate);
- if (likely(!diffmask)) {
- return 0; // No exceptions to process.
- }
-
- if (first_match && i) {
+#endif
+
+// Run exception processing, if necessary. Returns 0 if scanning should
+// continue, 1 if an accept was fired and the user instructed us to halt.
+static really_inline
+char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions,
+ STATE_T s, const STATE_T emask, size_t i, u64a offset,
+ STATE_T *succ, u64a *final_loc, struct CONTEXT_T *ctx,
+ const char flags, const char in_rev,
+ const char first_match) {
+ STATE_T estate = AND_STATE(s, emask);
+ u32 diffmask = DIFFRICH_STATE(ZERO_STATE, estate);
+ if (likely(!diffmask)) {
+ return 0; // No exceptions to process.
+ }
+
+ if (first_match && i) {
STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept);
- STATE_T foundAccepts = AND_STATE(s, acceptMask);
- if (unlikely(ISNONZERO_STATE(foundAccepts))) {
- DEBUG_PRINTF("first match at %zu\n", i);
- DEBUG_PRINTF("for nfa %p\n", limex);
- assert(final_loc);
+ STATE_T foundAccepts = AND_STATE(s, acceptMask);
+ if (unlikely(ISNONZERO_STATE(foundAccepts))) {
+ DEBUG_PRINTF("first match at %zu\n", i);
+ DEBUG_PRINTF("for nfa %p\n", limex);
+ assert(final_loc);
ctx->s = s;
- *final_loc = i;
- return 1; // Halt matching.
- }
- }
-
- u64a callback_offset = i + offset;
- char localflags = (!i && !in_rev) ? NO_OUTPUT | FIRST_BYTE : flags;
-
- int rv = JOIN(processExceptional, SIZE)(
+ *final_loc = i;
+ return 1; // Halt matching.
+ }
+ }
+
+ u64a callback_offset = i + offset;
+ char localflags = (!i && !in_rev) ? NO_OUTPUT | FIRST_BYTE : flags;
+
+ int rv = JOIN(processExceptional, SIZE)(
pass_state, pass_estate, diffmask, succ, limex, exceptions,
callback_offset, ctx, in_rev, localflags);
- if (rv == PE_RV_HALT) {
- return 1; // Halt matching.
- }
-
- return 0;
-}
-
-static really_inline
-size_t RUN_ACCEL_FN(const STATE_T s, UNUSED const STATE_T accelMask,
- UNUSED const IMPL_NFA_T *limex, const u8 *accelTable,
- const union AccelAux *accelAux, const u8 *input, size_t i,
- size_t length) {
- size_t j;
-#if SIZE < 128
- // For small cases, we pass the state by value.
- j = JOIN(doAccel, SIZE)(s, accelMask, accelTable, accelAux, input, i,
- length);
-#else
- j = JOIN(doAccel, SIZE)(&s, limex, accelTable, accelAux, input, i, length);
-#endif
-
- assert(j >= i);
- assert(i <= length);
- return j;
-}
-
+ if (rv == PE_RV_HALT) {
+ return 1; // Halt matching.
+ }
+
+ return 0;
+}
+
+static really_inline
+size_t RUN_ACCEL_FN(const STATE_T s, UNUSED const STATE_T accelMask,
+ UNUSED const IMPL_NFA_T *limex, const u8 *accelTable,
+ const union AccelAux *accelAux, const u8 *input, size_t i,
+ size_t length) {
+ size_t j;
+#if SIZE < 128
+ // For small cases, we pass the state by value.
+ j = JOIN(doAccel, SIZE)(s, accelMask, accelTable, accelAux, input, i,
+ length);
+#else
+ j = JOIN(doAccel, SIZE)(&s, limex, accelTable, accelAux, input, i, length);
+#endif
+
+ assert(j >= i);
+ assert(i <= length);
+ return j;
+}
+
// Shift macros for Limited NFAs. Defined in terms of uniform ops.
// LimExNFAxxx ptr in 'limex' and the current state in 's'
#define NFA_EXEC_LIM_SHIFT(limex_m, curr_m, shift_idx) \
@@ -206,7 +206,7 @@ size_t RUN_ACCEL_FN(const STATE_T s, UNUSED const STATE_T accelMask,
* true.
*
*/
-static really_inline
+static really_inline
char LOOP_NOACCEL_FN(const IMPL_NFA_T *limex, const u8 *input, size_t *loc,
size_t length, STATE_T *s_ptr, struct CONTEXT_T *ctx,
u64a offset, const char flags, u64a *final_loc,
@@ -244,203 +244,203 @@ char LOOP_NOACCEL_FN(const IMPL_NFA_T *limex, const u8 *input, size_t *loc,
}
static really_inline
-char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
- struct CONTEXT_T *ctx, u64a offset, const char flags,
- u64a *final_loc, const char first_match) {
+char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
+ struct CONTEXT_T *ctx, u64a offset, const char flags,
+ u64a *final_loc, const char first_match) {
const ENG_STATE_T *reach = get_reach_table(limex);
-#if SIZE < 256
+#if SIZE < 256
const STATE_T accelMask = LOAD_FROM_ENG(&limex->accel);
const STATE_T accel_and_friendsMask
= LOAD_FROM_ENG(&limex->accel_and_friends);
const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask);
-#endif
+#endif
const u8 *accelTable =
(const u8 *)((const char *)limex + limex->accelTableOffset);
- const union AccelAux *accelAux =
- (const union AccelAux *)((const char *)limex + limex->accelAuxOffset);
- const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
+ const union AccelAux *accelAux =
+ (const union AccelAux *)((const char *)limex + limex->accelAuxOffset);
+ const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
STATE_T s = ctx->s;
-
- /* assert(ISALIGNED_16(exceptions)); */
- /* assert(ISALIGNED_16(reach)); */
-
- size_t i = 0;
- size_t min_accel_offset = 0;
- if (!limex->accelCount || length < ACCEL_MIN_LEN) {
- min_accel_offset = length;
- goto without_accel;
- } else {
- goto with_accel;
- }
-
-without_accel:
+
+ /* assert(ISALIGNED_16(exceptions)); */
+ /* assert(ISALIGNED_16(reach)); */
+
+ size_t i = 0;
+ size_t min_accel_offset = 0;
+ if (!limex->accelCount || length < ACCEL_MIN_LEN) {
+ min_accel_offset = length;
+ goto without_accel;
+ } else {
+ goto with_accel;
+ }
+
+without_accel:
if (limex->flags & LIMEX_FLAG_CANNOT_DIE) {
const char can_die = 0;
if (LOOP_NOACCEL_FN(limex, input, &i, min_accel_offset, &s, ctx, offset,
flags, final_loc, first_match,
can_die) == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
- }
+ }
} else {
const char can_die = 1;
if (LOOP_NOACCEL_FN(limex, input, &i, min_accel_offset, &s, ctx, offset,
flags, final_loc, first_match,
can_die) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
-
-with_accel:
- for (; i != length; i++) {
- DUMP_INPUT(i);
- if (i + 16 <= length &&
- ISZERO_STATE(ANDNOT_STATE(ACCEL_AND_FRIENDS_MASK, s))) {
- DEBUG_PRINTF("current states are all accelerable\n");
- assert(i + 16 <= length);
- size_t post_idx =
- RUN_ACCEL_FN(s, ACCEL_MASK, limex, accelTable, accelAux, input,
- i, length);
- if (post_idx != i) {
- /* squashing any friends as they may no longer be valid;
- * offset back off should ensure they weren't doing anything
- * important */
- s = AND_STATE(ACCEL_MASK, s);
- }
-
- if (i && post_idx < min_accel_offset + BAD_ACCEL_DIST) {
- min_accel_offset = post_idx + BIG_ACCEL_PENALTY;
- } else {
- min_accel_offset = post_idx + SMALL_ACCEL_PENALTY;
- }
-
- if (min_accel_offset >= length - ACCEL_MIN_LEN) {
- min_accel_offset = length;
- }
-
- DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n",
- post_idx - i, min_accel_offset - post_idx,
- length - post_idx);
-
- i = post_idx;
- if (i == length) {
- break; /* all chars eaten, break out of loop */
- }
- goto without_accel;
- }
-
- STATE_T succ;
+ return MO_HALT_MATCHING;
+ }
+ }
+
+with_accel:
+ for (; i != length; i++) {
+ DUMP_INPUT(i);
+ if (i + 16 <= length &&
+ ISZERO_STATE(ANDNOT_STATE(ACCEL_AND_FRIENDS_MASK, s))) {
+ DEBUG_PRINTF("current states are all accelerable\n");
+ assert(i + 16 <= length);
+ size_t post_idx =
+ RUN_ACCEL_FN(s, ACCEL_MASK, limex, accelTable, accelAux, input,
+ i, length);
+ if (post_idx != i) {
+ /* squashing any friends as they may no longer be valid;
+ * offset back off should ensure they weren't doing anything
+ * important */
+ s = AND_STATE(ACCEL_MASK, s);
+ }
+
+ if (i && post_idx < min_accel_offset + BAD_ACCEL_DIST) {
+ min_accel_offset = post_idx + BIG_ACCEL_PENALTY;
+ } else {
+ min_accel_offset = post_idx + SMALL_ACCEL_PENALTY;
+ }
+
+ if (min_accel_offset >= length - ACCEL_MIN_LEN) {
+ min_accel_offset = length;
+ }
+
+ DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n",
+ post_idx - i, min_accel_offset - post_idx,
+ length - post_idx);
+
+ i = post_idx;
+ if (i == length) {
+ break; /* all chars eaten, break out of loop */
+ }
+ goto without_accel;
+ }
+
+ STATE_T succ;
NFA_EXEC_GET_LIM_SUCC(limex, s, succ);
-
+
if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset,
&succ, final_loc, ctx, flags, 0, first_match)) {
- return MO_HALT_MATCHING;
- }
-
+ return MO_HALT_MATCHING;
+ }
+
u8 c = input[i];
s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]]));
- }
-
+ }
+
ctx->s = s;
-
- if ((first_match || (flags & CALLBACK_OUTPUT)) && limex->acceptCount) {
+
+ if ((first_match || (flags & CALLBACK_OUTPUT)) && limex->acceptCount) {
STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept);
- const struct NFAAccept *acceptTable = getAcceptTable(limex);
- STATE_T foundAccepts = AND_STATE(s, acceptMask);
- if (unlikely(ISNONZERO_STATE(foundAccepts))) {
- if (first_match) {
+ const struct NFAAccept *acceptTable = getAcceptTable(limex);
+ STATE_T foundAccepts = AND_STATE(s, acceptMask);
+ if (unlikely(ISNONZERO_STATE(foundAccepts))) {
+ if (first_match) {
ctx->s = s;
- assert(final_loc);
- *final_loc = length;
- return MO_HALT_MATCHING;
+ assert(final_loc);
+ *final_loc = length;
+ return MO_HALT_MATCHING;
} else if (PROCESS_ACCEPTS_FN(limex, &ctx->s, &acceptMask,
acceptTable, offset + length,
- ctx->callback, ctx->context)) {
- return MO_HALT_MATCHING;
- }
- }
- }
- if (first_match) {
- assert(final_loc);
- *final_loc = length;
- }
- return MO_CONTINUE_MATCHING;
-}
-
-static never_inline
-char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
- struct CONTEXT_T *ctx, u64a offset) {
+ ctx->callback, ctx->context)) {
+ return MO_HALT_MATCHING;
+ }
+ }
+ }
+ if (first_match) {
+ assert(final_loc);
+ *final_loc = length;
+ }
+ return MO_CONTINUE_MATCHING;
+}
+
+static never_inline
+char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
+ struct CONTEXT_T *ctx, u64a offset) {
const ENG_STATE_T *reach = get_reach_table(limex);
-#if SIZE < 256
+#if SIZE < 256
const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask);
-#endif
- const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
+#endif
+ const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex);
STATE_T s = ctx->s;
-
- /* assert(ISALIGNED_16(exceptions)); */
- /* assert(ISALIGNED_16(reach)); */
- const char flags = CALLBACK_OUTPUT;
- u64a *final_loc = NULL;
-
- for (size_t i = length; i != 0; i--) {
+
+ /* assert(ISALIGNED_16(exceptions)); */
+ /* assert(ISALIGNED_16(reach)); */
+ const char flags = CALLBACK_OUTPUT;
+ u64a *final_loc = NULL;
+
+ for (size_t i = length; i != 0; i--) {
DUMP_INPUT(i - 1);
- if (ISZERO_STATE(s)) {
- DEBUG_PRINTF("no states are switched on, early exit\n");
+ if (ISZERO_STATE(s)) {
+ DEBUG_PRINTF("no states are switched on, early exit\n");
ctx->s = s;
- return MO_CONTINUE_MATCHING;
- }
-
- STATE_T succ;
+ return MO_CONTINUE_MATCHING;
+ }
+
+ STATE_T succ;
NFA_EXEC_GET_LIM_SUCC(limex, s, succ);
-
+
if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset,
&succ, final_loc, ctx, flags, 1, 0)) {
- return MO_HALT_MATCHING;
- }
-
+ return MO_HALT_MATCHING;
+ }
+
u8 c = input[i - 1];
s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]]));
- }
-
+ }
+
ctx->s = s;
-
+
STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept);
- const struct NFAAccept *acceptTable = getAcceptTable(limex);
- const u32 acceptCount = limex->acceptCount;
- assert(flags & CALLBACK_OUTPUT);
- if (acceptCount) {
- STATE_T foundAccepts = AND_STATE(s, acceptMask);
- if (unlikely(ISNONZERO_STATE(foundAccepts))) {
+ const struct NFAAccept *acceptTable = getAcceptTable(limex);
+ const u32 acceptCount = limex->acceptCount;
+ assert(flags & CALLBACK_OUTPUT);
+ if (acceptCount) {
+ STATE_T foundAccepts = AND_STATE(s, acceptMask);
+ if (unlikely(ISNONZERO_STATE(foundAccepts))) {
if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &ctx->s, &acceptMask,
acceptTable, offset, ctx->callback,
- ctx->context)) {
- return MO_HALT_MATCHING;
- }
- }
- }
- return MO_CONTINUE_MATCHING;
-}
-
-static really_inline
+ ctx->context)) {
+ return MO_HALT_MATCHING;
+ }
+ }
+ }
+ return MO_CONTINUE_MATCHING;
+}
+
+static really_inline
void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src,
- u64a offset) {
- if (!limex->repeatCount) {
- return;
- }
-
+ u64a offset) {
+ if (!limex->repeatCount) {
+ return;
+ }
+
STATE_T s = *(STATE_T *)src;
-
+
if (ISZERO_STATE(AND_STATE(LOAD_FROM_ENG(&limex->repeatCyclicMask), s))) {
DEBUG_PRINTF("no cyclics are on\n");
return;
}
- const union RepeatControl *ctrl =
- getRepeatControlBaseConst((const char *)src, sizeof(STATE_T));
- char *state_base = (char *)dest + limex->stateSize;
-
- for (u32 i = 0; i < limex->repeatCount; i++) {
+ const union RepeatControl *ctrl =
+ getRepeatControlBaseConst((const char *)src, sizeof(STATE_T));
+ char *state_base = (char *)dest + limex->stateSize;
+
+ for (u32 i = 0; i < limex->repeatCount; i++) {
DEBUG_PRINTF("repeat %u\n", i);
- const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
+ const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
const ENG_STATE_T *tug_mask =
(const ENG_STATE_T *)((const char *)info + info->tugMaskOffset);
@@ -451,34 +451,34 @@ void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src,
continue;
}
- const struct RepeatInfo *repeat = getRepeatInfo(info);
+ const struct RepeatInfo *repeat = getRepeatInfo(info);
DEBUG_PRINTF("packing state (packedCtrlOffset=%u)\n",
info->packedCtrlOffset);
- repeatPack(state_base + info->packedCtrlOffset, repeat, &ctrl[i],
- offset);
- }
+ repeatPack(state_base + info->packedCtrlOffset, repeat, &ctrl[i],
+ offset);
+ }
*(STATE_T *)src = s;
-}
-
-char JOIN(LIMEX_API_ROOT, _queueCompressState)(const struct NFA *n,
+}
+
+char JOIN(LIMEX_API_ROOT, _queueCompressState)(const struct NFA *n,
const struct mq *q, s64a loc) {
- void *dest = q->streamState;
+ void *dest = q->streamState;
void *src = q->state;
- u8 key = queue_prev_byte(q, loc);
- const IMPL_NFA_T *limex = getImplNfa(n);
+ u8 key = queue_prev_byte(q, loc);
+ const IMPL_NFA_T *limex = getImplNfa(n);
COMPRESS_REPEATS_FN(limex, dest, src, q->offset + loc);
- COMPRESS_FN(limex, dest, src, key);
- return 0;
-}
-
-static really_inline
-void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src,
- u64a offset) {
- if (!limex->repeatCount) {
- return;
- }
-
+ COMPRESS_FN(limex, dest, src, key);
+ return 0;
+}
+
+static really_inline
+void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src,
+ u64a offset) {
+ if (!limex->repeatCount) {
+ return;
+ }
+
// Note: state has already been expanded into 'dest'.
const STATE_T cyclics =
AND_STATE(*(STATE_T *)dest, LOAD_FROM_ENG(&limex->repeatCyclicMask));
@@ -486,14 +486,14 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src,
DEBUG_PRINTF("no cyclics are on\n");
return;
}
-
- union RepeatControl *ctrl =
- getRepeatControlBase((char *)dest, sizeof(STATE_T));
- const char *state_base = (const char *)src + limex->stateSize;
-
- for (u32 i = 0; i < limex->repeatCount; i++) {
+
+ union RepeatControl *ctrl =
+ getRepeatControlBase((char *)dest, sizeof(STATE_T));
+ const char *state_base = (const char *)src + limex->stateSize;
+
+ for (u32 i = 0; i < limex->repeatCount; i++) {
DEBUG_PRINTF("repeat %u\n", i);
- const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
+ const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
const ENG_STATE_T *tug_mask =
(const ENG_STATE_T *)((const char *)info + info->tugMaskOffset);
@@ -505,137 +505,137 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src,
DEBUG_PRINTF("unpacking state (packedCtrlOffset=%u)\n",
info->packedCtrlOffset);
- const struct RepeatInfo *repeat = getRepeatInfo(info);
- repeatUnpack(state_base + info->packedCtrlOffset, repeat, offset,
- &ctrl[i]);
- }
-}
-
-char JOIN(LIMEX_API_ROOT, _expandState)(const struct NFA *n, void *dest,
- const void *src, u64a offset,
- u8 key) {
- const IMPL_NFA_T *limex = getImplNfa(n);
- EXPAND_FN(limex, dest, src, key);
- EXPAND_REPEATS_FN(limex, dest, src, offset);
- return 0;
-}
-
+ const struct RepeatInfo *repeat = getRepeatInfo(info);
+ repeatUnpack(state_base + info->packedCtrlOffset, repeat, offset,
+ &ctrl[i]);
+ }
+}
+
+char JOIN(LIMEX_API_ROOT, _expandState)(const struct NFA *n, void *dest,
+ const void *src, u64a offset,
+ u8 key) {
+ const IMPL_NFA_T *limex = getImplNfa(n);
+ EXPAND_FN(limex, dest, src, key);
+ EXPAND_REPEATS_FN(limex, dest, src, offset);
+ return 0;
+}
+
char JOIN(LIMEX_API_ROOT, _queueInitState)(const struct NFA *n, struct mq *q) {
*(STATE_T *)q->state = ZERO_STATE;
-
- // Zero every bounded repeat control block in state.
- const IMPL_NFA_T *limex = getImplNfa(n);
- union RepeatControl *ctrl = getRepeatControlBase(q->state, sizeof(STATE_T));
- for (u32 i = 0; i < limex->repeatCount; i++) {
- memset(&ctrl[i], 0, sizeof(*ctrl));
- }
-
- return 0;
-}
-
-char JOIN(LIMEX_API_ROOT, _initCompressedState)(const struct NFA *n,
- u64a offset, void *state,
- u8 key) {
- const IMPL_NFA_T *limex = getImplNfa(n);
-
- STATE_T s = INITIAL_FN(limex, !!offset);
- if (ISZERO_STATE(s)) {
- DEBUG_PRINTF("state went to zero\n");
- return 0;
- }
-
- // NFA is still active, compress its state and ship it out.
- COMPRESS_FN(limex, state, &s, key);
-
- // Zero every packed bounded repeat control block in stream state.
- char *repeat_region = (char *)state + limex->stateSize;
- for (u32 i = 0; i < limex->repeatCount; i++) {
- const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
- const struct RepeatInfo *repeat = getRepeatInfo(info);
-
- memset(repeat_region + info->packedCtrlOffset, 0,
- repeat->packedCtrlSize);
- }
-
- return 1;
-}
-
-// Helper for history buffer scans, which catch up the NFA state but don't emit
-// matches.
-static never_inline
-void STREAMSILENT_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
- struct CONTEXT_T *ctx, u64a offset) {
- const char first_match = 0;
-
- UNUSED char rv = STREAM_FN(limex, input, length, ctx, offset, NO_OUTPUT,
- NULL, first_match);
- assert(rv != MO_HALT_MATCHING);
-}
-
-static never_inline
-char STREAMCB_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
- struct CONTEXT_T *ctx, u64a offset) {
- const char first_match = 0;
- assert(ISALIGNED_CL(ctx));
- return STREAM_FN(limex, input, length, ctx, offset, CALLBACK_OUTPUT, NULL,
- first_match);
-}
-
-static never_inline
-char STREAMFIRST_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
- struct CONTEXT_T *ctx, u64a offset, u64a *final_loc) {
- const char first_match = 1; // Run to first match and stop, no callbacks.
- return STREAM_FN(limex, input, length, ctx, offset, NO_OUTPUT, final_loc,
- first_match);
-}
-
-// Common code for handling the current event on the queue.
-static really_inline
-void JOIN(LIMEX_API_ROOT, _HandleEvent)(const IMPL_NFA_T *limex,
- struct mq *q, struct CONTEXT_T *ctx,
- u64a sp) {
-#define DEFINE_CASE(ee) \
- case ee: \
- DEBUG_PRINTF(#ee "\n");
-
- u32 e = q->items[q->cur].type;
- switch (e) {
- DEFINE_CASE(MQE_TOP)
+
+ // Zero every bounded repeat control block in state.
+ const IMPL_NFA_T *limex = getImplNfa(n);
+ union RepeatControl *ctrl = getRepeatControlBase(q->state, sizeof(STATE_T));
+ for (u32 i = 0; i < limex->repeatCount; i++) {
+ memset(&ctrl[i], 0, sizeof(*ctrl));
+ }
+
+ return 0;
+}
+
+char JOIN(LIMEX_API_ROOT, _initCompressedState)(const struct NFA *n,
+ u64a offset, void *state,
+ u8 key) {
+ const IMPL_NFA_T *limex = getImplNfa(n);
+
+ STATE_T s = INITIAL_FN(limex, !!offset);
+ if (ISZERO_STATE(s)) {
+ DEBUG_PRINTF("state went to zero\n");
+ return 0;
+ }
+
+ // NFA is still active, compress its state and ship it out.
+ COMPRESS_FN(limex, state, &s, key);
+
+ // Zero every packed bounded repeat control block in stream state.
+ char *repeat_region = (char *)state + limex->stateSize;
+ for (u32 i = 0; i < limex->repeatCount; i++) {
+ const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i);
+ const struct RepeatInfo *repeat = getRepeatInfo(info);
+
+ memset(repeat_region + info->packedCtrlOffset, 0,
+ repeat->packedCtrlSize);
+ }
+
+ return 1;
+}
+
+// Helper for history buffer scans, which catch up the NFA state but don't emit
+// matches.
+static never_inline
+void STREAMSILENT_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
+ struct CONTEXT_T *ctx, u64a offset) {
+ const char first_match = 0;
+
+ UNUSED char rv = STREAM_FN(limex, input, length, ctx, offset, NO_OUTPUT,
+ NULL, first_match);
+ assert(rv != MO_HALT_MATCHING);
+}
+
+static never_inline
+char STREAMCB_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
+ struct CONTEXT_T *ctx, u64a offset) {
+ const char first_match = 0;
+ assert(ISALIGNED_CL(ctx));
+ return STREAM_FN(limex, input, length, ctx, offset, CALLBACK_OUTPUT, NULL,
+ first_match);
+}
+
+static never_inline
+char STREAMFIRST_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length,
+ struct CONTEXT_T *ctx, u64a offset, u64a *final_loc) {
+ const char first_match = 1; // Run to first match and stop, no callbacks.
+ return STREAM_FN(limex, input, length, ctx, offset, NO_OUTPUT, final_loc,
+ first_match);
+}
+
+// Common code for handling the current event on the queue.
+static really_inline
+void JOIN(LIMEX_API_ROOT, _HandleEvent)(const IMPL_NFA_T *limex,
+ struct mq *q, struct CONTEXT_T *ctx,
+ u64a sp) {
+#define DEFINE_CASE(ee) \
+ case ee: \
+ DEBUG_PRINTF(#ee "\n");
+
+ u32 e = q->items[q->cur].type;
+ switch (e) {
+ DEFINE_CASE(MQE_TOP)
ctx->s = TOP_FN(limex, !!sp, ctx->s);
- break;
- DEFINE_CASE(MQE_START)
- break;
- DEFINE_CASE(MQE_END)
- break;
- default:
- assert(e >= MQE_TOP_FIRST);
- assert(e < MQE_INVALID);
- DEBUG_PRINTF("MQE_TOP + %d\n", ((int)e - MQE_TOP_FIRST));
+ break;
+ DEFINE_CASE(MQE_START)
+ break;
+ DEFINE_CASE(MQE_END)
+ break;
+ default:
+ assert(e >= MQE_TOP_FIRST);
+ assert(e < MQE_INVALID);
+ DEBUG_PRINTF("MQE_TOP + %d\n", ((int)e - MQE_TOP_FIRST));
ctx->s = TOPN_FN(limex, ctx->s, e - MQE_TOP_FIRST);
- }
-#undef DEFINE_CASE
-}
-
-// "Classic" queue call, used by outfixes
-char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) {
- const IMPL_NFA_T *limex = getImplNfa(n);
-
- if (q->report_current) {
- char rv = REPORTCURRENT_FN(limex, q);
-
- q->report_current = 0;
-
- if (rv == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
-
- if (q->cur == q->end) {
- return 1;
- }
-
- assert(q->cur + 1 < q->end); /* require at least two items */
-
+ }
+#undef DEFINE_CASE
+}
+
+// "Classic" queue call, used by outfixes
+char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) {
+ const IMPL_NFA_T *limex = getImplNfa(n);
+
+ if (q->report_current) {
+ char rv = REPORTCURRENT_FN(limex, q);
+
+ q->report_current = 0;
+
+ if (rv == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+
+ if (q->cur == q->end) {
+ return 1;
+ }
+
+ assert(q->cur + 1 < q->end); /* require at least two items */
+
struct CONTEXT_T ctx;
ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T));
ctx.repeat_state = q->streamState + limex->stateSize;
@@ -643,94 +643,94 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) {
ctx.context = q->context;
ctx.cached_estate = ZERO_STATE;
ctx.cached_br = 0;
-
- assert(q->items[q->cur].location >= 0);
- DEBUG_PRINTF("LOAD STATE\n");
+
+ assert(q->items[q->cur].location >= 0);
+ DEBUG_PRINTF("LOAD STATE\n");
ctx.s = *(STATE_T *)q->state;
- assert(q->items[q->cur].type == MQE_START);
-
- u64a offset = q->offset;
- u64a sp = offset + q->items[q->cur].location;
- u64a end_abs = offset + end;
- q->cur++;
-
- while (q->cur < q->end && sp <= end_abs) {
- u64a ep = offset + q->items[q->cur].location;
- ep = MIN(ep, end_abs);
- assert(ep >= sp);
-
- assert(sp >= offset); // We no longer do history buffer scans here.
-
- if (sp >= ep) {
- goto scan_done;
- }
-
- /* do main buffer region */
- DEBUG_PRINTF("MAIN BUFFER SCAN\n");
- assert(ep - offset <= q->length);
+ assert(q->items[q->cur].type == MQE_START);
+
+ u64a offset = q->offset;
+ u64a sp = offset + q->items[q->cur].location;
+ u64a end_abs = offset + end;
+ q->cur++;
+
+ while (q->cur < q->end && sp <= end_abs) {
+ u64a ep = offset + q->items[q->cur].location;
+ ep = MIN(ep, end_abs);
+ assert(ep >= sp);
+
+ assert(sp >= offset); // We no longer do history buffer scans here.
+
+ if (sp >= ep) {
+ goto scan_done;
+ }
+
+ /* do main buffer region */
+ DEBUG_PRINTF("MAIN BUFFER SCAN\n");
+ assert(ep - offset <= q->length);
if (STREAMCB_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp)
- == MO_HALT_MATCHING) {
+ == MO_HALT_MATCHING) {
*(STATE_T *)q->state = ZERO_STATE;
- return 0;
- }
-
- DEBUG_PRINTF("SCAN DONE\n");
- scan_done:
- sp = ep;
-
- if (sp != offset + q->items[q->cur].location) {
- assert(q->cur);
- DEBUG_PRINTF("bail: sp = %llu end_abs == %llu offset == %llu\n",
- sp, end_abs, offset);
- assert(sp == end_abs);
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = sp - offset;
- DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end);
+ return 0;
+ }
+
+ DEBUG_PRINTF("SCAN DONE\n");
+ scan_done:
+ sp = ep;
+
+ if (sp != offset + q->items[q->cur].location) {
+ assert(q->cur);
+ DEBUG_PRINTF("bail: sp = %llu end_abs == %llu offset == %llu\n",
+ sp, end_abs, offset);
+ assert(sp == end_abs);
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = sp - offset;
+ DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end);
*(STATE_T *)q->state = ctx.s;
- return MO_ALIVE;
- }
-
+ return MO_ALIVE;
+ }
+
JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp);
-
- q->cur++;
- }
-
+
+ q->cur++;
+ }
+
EXPIRE_ESTATE_FN(limex, &ctx, sp);
-
- DEBUG_PRINTF("END\n");
+
+ DEBUG_PRINTF("END\n");
*(STATE_T *)q->state = ctx.s;
-
- if (q->cur != q->end) {
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = sp - offset;
- return MO_ALIVE;
- }
-
+
+ if (q->cur != q->end) {
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = sp - offset;
+ return MO_ALIVE;
+ }
+
return ISNONZERO_STATE(ctx.s);
-}
-
-/* used by suffix execution in Rose */
-char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
- const IMPL_NFA_T *limex = getImplNfa(n);
-
- if (q->report_current) {
- char rv = REPORTCURRENT_FN(limex, q);
-
- q->report_current = 0;
-
- if (rv == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- }
- }
-
- if (q->cur == q->end) {
- return 1;
- }
-
- assert(q->cur + 1 < q->end); /* require at least two items */
-
+}
+
+/* used by suffix execution in Rose */
+char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
+ const IMPL_NFA_T *limex = getImplNfa(n);
+
+ if (q->report_current) {
+ char rv = REPORTCURRENT_FN(limex, q);
+
+ q->report_current = 0;
+
+ if (rv == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ }
+ }
+
+ if (q->cur == q->end) {
+ return 1;
+ }
+
+ assert(q->cur + 1 < q->end); /* require at least two items */
+
struct CONTEXT_T ctx;
ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T));
ctx.repeat_state = q->streamState + limex->stateSize;
@@ -738,23 +738,23 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
ctx.context = q->context;
ctx.cached_estate = ZERO_STATE;
ctx.cached_br = 0;
-
- DEBUG_PRINTF("LOAD STATE\n");
+
+ DEBUG_PRINTF("LOAD STATE\n");
ctx.s = *(STATE_T *)q->state;
- assert(q->items[q->cur].type == MQE_START);
-
- u64a offset = q->offset;
- u64a sp = offset + q->items[q->cur].location;
- u64a end_abs = offset + end;
- q->cur++;
-
- while (q->cur < q->end && sp <= end_abs) {
- u64a ep = offset + q->items[q->cur].location;
- DEBUG_PRINTF("sp = %llu, ep = %llu, end_abs = %llu\n",
- sp, ep, end_abs);
- ep = MIN(ep, end_abs);
- assert(ep >= sp);
-
+ assert(q->items[q->cur].type == MQE_START);
+
+ u64a offset = q->offset;
+ u64a sp = offset + q->items[q->cur].location;
+ u64a end_abs = offset + end;
+ q->cur++;
+
+ while (q->cur < q->end && sp <= end_abs) {
+ u64a ep = offset + q->items[q->cur].location;
+ DEBUG_PRINTF("sp = %llu, ep = %llu, end_abs = %llu\n",
+ sp, ep, end_abs);
+ ep = MIN(ep, end_abs);
+ assert(ep >= sp);
+
if (sp < offset) {
DEBUG_PRINTF("HISTORY BUFFER SCAN\n");
assert(offset - sp <= q->hlength);
@@ -773,76 +773,76 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
*(STATE_T *)q->state = ctx.s;
return MO_MATCHES_PENDING;
}
-
+
sp = local_ep;
}
- if (sp >= ep) {
- goto scan_done;
- }
-
- /* do main buffer region */
- u64a final_look = 0;
- assert(ep - offset <= q->length);
+ if (sp >= ep) {
+ goto scan_done;
+ }
+
+ /* do main buffer region */
+ u64a final_look = 0;
+ assert(ep - offset <= q->length);
if (STREAMFIRST_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp,
- &final_look) == MO_HALT_MATCHING) {
- DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu offset:%llu\n",
- final_look, sp, end_abs, offset);
- assert(q->cur);
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = sp + final_look - offset;
+ &final_look) == MO_HALT_MATCHING) {
+ DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu offset:%llu\n",
+ final_look, sp, end_abs, offset);
+ assert(q->cur);
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = sp + final_look - offset;
*(STATE_T *)q->state = ctx.s;
- return MO_MATCHES_PENDING;
- }
-
- scan_done:
- sp = ep;
-
- if (sp != offset + q->items[q->cur].location) {
- assert(q->cur);
- DEBUG_PRINTF("bail: sp = %llu end_abs == %llu offset == %llu\n",
- sp, end_abs, offset);
- assert(sp == end_abs);
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = sp - offset;
- DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end);
+ return MO_MATCHES_PENDING;
+ }
+
+ scan_done:
+ sp = ep;
+
+ if (sp != offset + q->items[q->cur].location) {
+ assert(q->cur);
+ DEBUG_PRINTF("bail: sp = %llu end_abs == %llu offset == %llu\n",
+ sp, end_abs, offset);
+ assert(sp == end_abs);
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = sp - offset;
+ DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end);
*(STATE_T *)q->state = ctx.s;
- return MO_ALIVE;
- }
-
+ return MO_ALIVE;
+ }
+
JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp);
-
- q->cur++;
- }
-
+
+ q->cur++;
+ }
+
EXPIRE_ESTATE_FN(limex, &ctx, sp);
-
- DEBUG_PRINTF("END\n");
+
+ DEBUG_PRINTF("END\n");
*(STATE_T *)q->state = ctx.s;
-
- if (q->cur != q->end) {
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = sp - offset;
- return MO_ALIVE;
- }
-
+
+ if (q->cur != q->end) {
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = sp - offset;
+ return MO_ALIVE;
+ }
+
return ISNONZERO_STATE(ctx.s);
-}
-
-// Used for execution Rose prefix/infixes.
-char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q,
- ReportID report) {
- const IMPL_NFA_T *limex = getImplNfa(n);
-
- if (q->cur == q->end) {
- return 1;
- }
-
- assert(q->cur + 1 < q->end); /* require at least two items */
-
+}
+
+// Used for execution Rose prefix/infixes.
+char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q,
+ ReportID report) {
+ const IMPL_NFA_T *limex = getImplNfa(n);
+
+ if (q->cur == q->end) {
+ return 1;
+ }
+
+ assert(q->cur + 1 < q->end); /* require at least two items */
+
struct CONTEXT_T ctx;
ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T));
ctx.repeat_state = q->streamState + limex->stateSize;
@@ -850,97 +850,97 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q,
ctx.context = NULL;
ctx.cached_estate = ZERO_STATE;
ctx.cached_br = 0;
-
- DEBUG_PRINTF("LOAD STATE\n");
+
+ DEBUG_PRINTF("LOAD STATE\n");
ctx.s = *(STATE_T *)q->state;
- assert(q->items[q->cur].type == MQE_START);
-
- u64a offset = q->offset;
- u64a sp = offset + q->items[q->cur].location;
- q->cur++;
-
- while (q->cur < q->end) {
- u64a ep = offset + q->items[q->cur].location;
- if (n->maxWidth) {
- if (ep - sp > n->maxWidth) {
- sp = ep - n->maxWidth;
+ assert(q->items[q->cur].type == MQE_START);
+
+ u64a offset = q->offset;
+ u64a sp = offset + q->items[q->cur].location;
+ q->cur++;
+
+ while (q->cur < q->end) {
+ u64a ep = offset + q->items[q->cur].location;
+ if (n->maxWidth) {
+ if (ep - sp > n->maxWidth) {
+ sp = ep - n->maxWidth;
ctx.s = INITIAL_FN(limex, !!sp);
- }
- }
- assert(ep >= sp);
-
- if (sp < offset) {
- DEBUG_PRINTF("HISTORY BUFFER SCAN\n");
- assert(offset - sp <= q->hlength);
- u64a local_ep = MIN(offset, ep);
- /* we are starting inside the history buffer */
- STREAMSILENT_FN(limex, q->history + q->hlength + sp - offset,
+ }
+ }
+ assert(ep >= sp);
+
+ if (sp < offset) {
+ DEBUG_PRINTF("HISTORY BUFFER SCAN\n");
+ assert(offset - sp <= q->hlength);
+ u64a local_ep = MIN(offset, ep);
+ /* we are starting inside the history buffer */
+ STREAMSILENT_FN(limex, q->history + q->hlength + sp - offset,
local_ep - sp, &ctx, sp);
-
- sp = local_ep;
- }
-
- if (sp >= ep) {
- goto scan_done;
- }
-
- /* do main buffer region */
- DEBUG_PRINTF("MAIN BUFFER SCAN\n");
- assert(ep - offset <= q->length);
+
+ sp = local_ep;
+ }
+
+ if (sp >= ep) {
+ goto scan_done;
+ }
+
+ /* do main buffer region */
+ DEBUG_PRINTF("MAIN BUFFER SCAN\n");
+ assert(ep - offset <= q->length);
STREAMSILENT_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp);
-
- DEBUG_PRINTF("SCAN DONE\n");
- scan_done:
- sp = ep;
-
+
+ DEBUG_PRINTF("SCAN DONE\n");
+ scan_done:
+ sp = ep;
+
JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp);
-
- q->cur++;
- }
-
+
+ q->cur++;
+ }
+
EXPIRE_ESTATE_FN(limex, &ctx, sp);
-
- DEBUG_PRINTF("END, nfa is %s\n",
+
+ DEBUG_PRINTF("END, nfa is %s\n",
ISNONZERO_STATE(ctx.s) ? "still alive" : "dead");
-
+
*(STATE_T *)q->state = ctx.s;
-
+
if (JOIN(limexInAccept, SIZE)(limex, ctx.s, ctx.repeat_ctrl,
ctx.repeat_state, sp + 1, report)) {
- return MO_MATCHES_PENDING;
- }
-
+ return MO_MATCHES_PENDING;
+ }
+
return ISNONZERO_STATE(ctx.s);
-}
-
-char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state,
+}
+
+char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state,
const char *streamState, u64a offset,
NfaCallback callback, void *context) {
- assert(n && state);
-
- const IMPL_NFA_T *limex = getImplNfa(n);
- const STATE_T *sptr = (const STATE_T *)state;
- const union RepeatControl *repeat_ctrl =
- getRepeatControlBaseConst(state, sizeof(STATE_T));
- const char *repeat_state = streamState + limex->stateSize;
+ assert(n && state);
+
+ const IMPL_NFA_T *limex = getImplNfa(n);
+ const STATE_T *sptr = (const STATE_T *)state;
+ const union RepeatControl *repeat_ctrl =
+ getRepeatControlBaseConst(state, sizeof(STATE_T));
+ const char *repeat_state = streamState + limex->stateSize;
return TESTEOD_FN(limex, sptr, repeat_ctrl, repeat_state, offset, callback,
context);
-}
-
-char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, struct mq *q) {
- const IMPL_NFA_T *limex = getImplNfa(n);
- REPORTCURRENT_FN(limex, q);
- return 1;
-}
-
-// Block mode reverse scan.
-char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset,
+}
+
+char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, struct mq *q) {
+ const IMPL_NFA_T *limex = getImplNfa(n);
+ REPORTCURRENT_FN(limex, q);
+ return 1;
+}
+
+// Block mode reverse scan.
+char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset,
const u8 *buf, size_t buflen,
const u8 *hbuf, size_t hlen,
NfaCallback cb, void *context) {
- assert(buf || hbuf);
- assert(buflen || hlen);
-
+ assert(buf || hbuf);
+ assert(buflen || hlen);
+
struct CONTEXT_T ctx;
ctx.repeat_ctrl = NULL;
ctx.repeat_state = NULL;
@@ -948,52 +948,52 @@ char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset,
ctx.context = context;
ctx.cached_estate = ZERO_STATE;
ctx.cached_br = 0;
-
- const IMPL_NFA_T *limex = getImplNfa(n);
+
+ const IMPL_NFA_T *limex = getImplNfa(n);
ctx.s = INITIAL_FN(limex, 0); // always anchored
-
- // 'buf' may be null, for example when we're scanning at EOD time.
- if (buflen) {
- assert(buf);
- DEBUG_PRINTF("MAIN BUFFER SCAN, %zu bytes\n", buflen);
- offset -= buflen;
+
+ // 'buf' may be null, for example when we're scanning at EOD time.
+ if (buflen) {
+ assert(buf);
+ DEBUG_PRINTF("MAIN BUFFER SCAN, %zu bytes\n", buflen);
+ offset -= buflen;
REV_STREAM_FN(limex, buf, buflen, &ctx, offset);
- }
-
- if (hlen) {
- assert(hbuf);
- DEBUG_PRINTF("HISTORY BUFFER SCAN, %zu bytes\n", hlen);
- offset -= hlen;
+ }
+
+ if (hlen) {
+ assert(hbuf);
+ DEBUG_PRINTF("HISTORY BUFFER SCAN, %zu bytes\n", hlen);
+ offset -= hlen;
REV_STREAM_FN(limex, hbuf, hlen, &ctx, offset);
- }
-
+ }
+
if (offset == 0 && limex->acceptEodCount && ISNONZERO_STATE(ctx.s)) {
const union RepeatControl *repeat_ctrl = NULL;
const char *repeat_state = NULL;
TESTEOD_FN(limex, &ctx.s, repeat_ctrl, repeat_state, offset, cb,
context);
- }
-
- // NOTE: return value is unused.
- return 0;
-}
-
-char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa,
- ReportID report, struct mq *q) {
- assert(nfa && q);
- assert(q->state && q->streamState);
-
- const IMPL_NFA_T *limex = getImplNfa(nfa);
- union RepeatControl *repeat_ctrl =
- getRepeatControlBase(q->state, sizeof(STATE_T));
- char *repeat_state = q->streamState + limex->stateSize;
+ }
+
+ // NOTE: return value is unused.
+ return 0;
+}
+
+char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa,
+ ReportID report, struct mq *q) {
+ assert(nfa && q);
+ assert(q->state && q->streamState);
+
+ const IMPL_NFA_T *limex = getImplNfa(nfa);
+ union RepeatControl *repeat_ctrl =
+ getRepeatControlBase(q->state, sizeof(STATE_T));
+ char *repeat_state = q->streamState + limex->stateSize;
STATE_T state = *(STATE_T *)q->state;
- u64a offset = q->offset + q_last_loc(q) + 1;
-
- return JOIN(limexInAccept, SIZE)(limex, state, repeat_ctrl, repeat_state,
- offset, report);
-}
-
+ u64a offset = q->offset + q_last_loc(q) + 1;
+
+ return JOIN(limexInAccept, SIZE)(limex, state, repeat_ctrl, repeat_state,
+ offset, report);
+}
+
char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
assert(nfa && q);
assert(q->state && q->streamState);
@@ -1009,67 +1009,67 @@ char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
offset);
}
-enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
- const struct NFA *nfa,
- struct mq *q,
- s64a loc) {
- assert(nfa->flags & NFA_ZOMBIE);
- const IMPL_NFA_T *limex = getImplNfa(nfa);
+enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
+ const struct NFA *nfa,
+ struct mq *q,
+ s64a loc) {
+ assert(nfa->flags & NFA_ZOMBIE);
+ const IMPL_NFA_T *limex = getImplNfa(nfa);
STATE_T state = *(STATE_T *)q->state;
STATE_T zmask = LOAD_FROM_ENG(&limex->zombieMask);
-
- if (limex->repeatCount) {
- u64a offset = q->offset + loc + 1;
- union RepeatControl *repeat_ctrl =
- getRepeatControlBase(q->state, sizeof(STATE_T));
- char *repeat_state = q->streamState + limex->stateSize;
- SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &state);
- }
-
- if (ISNONZERO_STATE(AND_STATE(state, zmask))) {
- return NFA_ZOMBIE_ALWAYS_YES;
- }
-
- return NFA_ZOMBIE_NO;
-}
-
-#undef TESTEOD_FN
-#undef INITIAL_FN
-#undef TOP_FN
-#undef TOPN_FN
-#undef REPORTCURRENT_FN
-#undef COMPRESS_FN
-#undef EXPAND_FN
-#undef COMPRESS_REPEATS_FN
-#undef EXPAND_REPEATS_FN
-#undef PROCESS_ACCEPTS_FN
-#undef PROCESS_ACCEPTS_NOSQUASH_FN
-#undef GET_NFA_REPEAT_INFO_FN
-#undef RUN_ACCEL_FN
-#undef RUN_EXCEPTIONS_FN
-#undef REV_STREAM_FN
+
+ if (limex->repeatCount) {
+ u64a offset = q->offset + loc + 1;
+ union RepeatControl *repeat_ctrl =
+ getRepeatControlBase(q->state, sizeof(STATE_T));
+ char *repeat_state = q->streamState + limex->stateSize;
+ SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &state);
+ }
+
+ if (ISNONZERO_STATE(AND_STATE(state, zmask))) {
+ return NFA_ZOMBIE_ALWAYS_YES;
+ }
+
+ return NFA_ZOMBIE_NO;
+}
+
+#undef TESTEOD_FN
+#undef INITIAL_FN
+#undef TOP_FN
+#undef TOPN_FN
+#undef REPORTCURRENT_FN
+#undef COMPRESS_FN
+#undef EXPAND_FN
+#undef COMPRESS_REPEATS_FN
+#undef EXPAND_REPEATS_FN
+#undef PROCESS_ACCEPTS_FN
+#undef PROCESS_ACCEPTS_NOSQUASH_FN
+#undef GET_NFA_REPEAT_INFO_FN
+#undef RUN_ACCEL_FN
+#undef RUN_EXCEPTIONS_FN
+#undef REV_STREAM_FN
#undef LOOP_NOACCEL_FN
-#undef STREAM_FN
-#undef STREAMCB_FN
-#undef STREAMFIRST_FN
-#undef STREAMSILENT_FN
-#undef CONTEXT_T
-#undef EXCEPTION_T
-#undef AND_STATE
-#undef ANDNOT_STATE
-#undef OR_STATE
+#undef STREAM_FN
+#undef STREAMCB_FN
+#undef STREAMFIRST_FN
+#undef STREAMSILENT_FN
+#undef CONTEXT_T
+#undef EXCEPTION_T
+#undef AND_STATE
+#undef ANDNOT_STATE
+#undef OR_STATE
#undef LSHIFT_STATE
-#undef TESTBIT_STATE
+#undef TESTBIT_STATE
#undef CLEARBIT_STATE
-#undef ZERO_STATE
-#undef ISNONZERO_STATE
-#undef ISZERO_STATE
-#undef NOTEQ_STATE
-#undef DIFFRICH_STATE
-#undef INLINE_ATTR_INT
-#undef IMPL_NFA_T
-#undef SQUASH_UNTUG_BR_FN
-#undef ACCEL_MASK
-#undef ACCEL_AND_FRIENDS_MASK
-#undef EXCEPTION_MASK
-#undef LIMEX_API_ROOT
+#undef ZERO_STATE
+#undef ISNONZERO_STATE
+#undef ISZERO_STATE
+#undef NOTEQ_STATE
+#undef DIFFRICH_STATE
+#undef INLINE_ATTR_INT
+#undef IMPL_NFA_T
+#undef SQUASH_UNTUG_BR_FN
+#undef ACCEL_MASK
+#undef ACCEL_AND_FRIENDS_MASK
+#undef EXCEPTION_MASK
+#undef LIMEX_API_ROOT
diff --git a/contrib/libs/hyperscan/src/nfa/limex_simd128.c b/contrib/libs/hyperscan/src/nfa/limex_simd128.c
index 2076423172..c5f2b33e3e 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_simd128.c
+++ b/contrib/libs/hyperscan/src/nfa/limex_simd128.c
@@ -1,63 +1,63 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief LimEx NFA: 128-bit SIMD runtime implementations.
- */
-
-//#define DEBUG_INPUT
-//#define DEBUG_EXCEPTIONS
-
-#include "limex.h"
-
-#include "accel.h"
-#include "limex_internal.h"
-#include "nfa_internal.h"
-#include "ue2common.h"
-#include "util/bitutils.h"
-#include "util/simd_utils.h"
-
-// Common code
-#define STATE_ON_STACK
-#define ESTATE_ON_STACK
-
-#include "limex_runtime.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief LimEx NFA: 128-bit SIMD runtime implementations.
+ */
+
+//#define DEBUG_INPUT
+//#define DEBUG_EXCEPTIONS
+
+#include "limex.h"
+
+#include "accel.h"
+#include "limex_internal.h"
+#include "nfa_internal.h"
+#include "ue2common.h"
+#include "util/bitutils.h"
+#include "util/simd_utils.h"
+
+// Common code
+#define STATE_ON_STACK
+#define ESTATE_ON_STACK
+
+#include "limex_runtime.h"
+
#define SIZE 128
#define STATE_T m128
#define ENG_STATE_T m128
#define LOAD_FROM_ENG load_m128
-#include "limex_exceptional.h"
-
-#include "limex_state_impl.h"
-
-#define INLINE_ATTR really_inline
-#include "limex_common_impl.h"
-
-#include "limex_runtime_impl.h"
+#include "limex_exceptional.h"
+
+#include "limex_state_impl.h"
+
+#define INLINE_ATTR really_inline
+#include "limex_common_impl.h"
+
+#include "limex_runtime_impl.h"
diff --git a/contrib/libs/hyperscan/src/nfa/limex_simd256.c b/contrib/libs/hyperscan/src/nfa/limex_simd256.c
index ebe1e6bc81..cc23290810 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_simd256.c
+++ b/contrib/libs/hyperscan/src/nfa/limex_simd256.c
@@ -1,60 +1,60 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief LimEx NFA: 256-bit SIMD runtime implementations.
- */
-
-//#define DEBUG_INPUT
-//#define DEBUG_EXCEPTIONS
-
-#include "limex.h"
-
-#include "accel.h"
-#include "limex_internal.h"
-#include "nfa_internal.h"
-#include "ue2common.h"
-#include "util/bitutils.h"
-#include "util/simd_utils.h"
-
-// Common code
-#include "limex_runtime.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief LimEx NFA: 256-bit SIMD runtime implementations.
+ */
+
+//#define DEBUG_INPUT
+//#define DEBUG_EXCEPTIONS
+
+#include "limex.h"
+
+#include "accel.h"
+#include "limex_internal.h"
+#include "nfa_internal.h"
+#include "ue2common.h"
+#include "util/bitutils.h"
+#include "util/simd_utils.h"
+
+// Common code
+#include "limex_runtime.h"
+
#define SIZE 256
#define STATE_T m256
#define ENG_STATE_T m256
#define LOAD_FROM_ENG load_m256
-#include "limex_exceptional.h"
-
-#include "limex_state_impl.h"
-
-#define INLINE_ATTR really_inline
-#include "limex_common_impl.h"
-
-#include "limex_runtime_impl.h"
+#include "limex_exceptional.h"
+
+#include "limex_state_impl.h"
+
+#define INLINE_ATTR really_inline
+#include "limex_common_impl.h"
+
+#include "limex_runtime_impl.h"
diff --git a/contrib/libs/hyperscan/src/nfa/limex_simd384.c b/contrib/libs/hyperscan/src/nfa/limex_simd384.c
index 0474e0706a..7e596e48b0 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_simd384.c
+++ b/contrib/libs/hyperscan/src/nfa/limex_simd384.c
@@ -1,60 +1,60 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief LimEx NFA: 384-bit SIMD runtime implementations.
- */
-
-//#define DEBUG_INPUT
-//#define DEBUG_EXCEPTIONS
-
-#include "limex.h"
-
-#include "accel.h"
-#include "limex_internal.h"
-#include "nfa_internal.h"
-#include "ue2common.h"
-#include "util/bitutils.h"
-#include "util/simd_utils.h"
-
-// Common code
-#include "limex_runtime.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief LimEx NFA: 384-bit SIMD runtime implementations.
+ */
+
+//#define DEBUG_INPUT
+//#define DEBUG_EXCEPTIONS
+
+#include "limex.h"
+
+#include "accel.h"
+#include "limex_internal.h"
+#include "nfa_internal.h"
+#include "ue2common.h"
+#include "util/bitutils.h"
+#include "util/simd_utils.h"
+
+// Common code
+#include "limex_runtime.h"
+
#define SIZE 384
#define STATE_T m384
#define ENG_STATE_T m384
#define LOAD_FROM_ENG load_m384
-#include "limex_exceptional.h"
-
-#include "limex_state_impl.h"
-
-#define INLINE_ATTR really_inline
-#include "limex_common_impl.h"
-
-#include "limex_runtime_impl.h"
+#include "limex_exceptional.h"
+
+#include "limex_state_impl.h"
+
+#define INLINE_ATTR really_inline
+#include "limex_common_impl.h"
+
+#include "limex_runtime_impl.h"
diff --git a/contrib/libs/hyperscan/src/nfa/limex_state_impl.h b/contrib/libs/hyperscan/src/nfa/limex_state_impl.h
index 0a71678792..81153f7171 100644
--- a/contrib/libs/hyperscan/src/nfa/limex_state_impl.h
+++ b/contrib/libs/hyperscan/src/nfa/limex_state_impl.h
@@ -1,141 +1,141 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief NFA stream state handling.
- */
-
-#include "util/join.h"
-#include "util/partial_store.h"
-#include "util/state_compress.h"
-#include <string.h>
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief NFA stream state handling.
+ */
+
+#include "util/join.h"
+#include "util/partial_store.h"
+#include "util/state_compress.h"
+#include <string.h>
+
#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG)
# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer.
-#endif
-
-#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE)
-#define COMMON_T JOIN(NFACommon, SIZE)
-#define REACHMASK_FN JOIN(moNfaReachMask, SIZE)
-#define COMPRESS_FN JOIN(moNfaCompressState, SIZE)
-#define EXPAND_FN JOIN(moNfaExpandState, SIZE)
+#endif
+
+#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE)
+#define COMMON_T JOIN(NFACommon, SIZE)
+#define REACHMASK_FN JOIN(moNfaReachMask, SIZE)
+#define COMPRESS_FN JOIN(moNfaCompressState, SIZE)
+#define EXPAND_FN JOIN(moNfaExpandState, SIZE)
#define COMPRESSED_STORE_FN JOIN(store_compressed_, STATE_T)
#define COMPRESSED_LOAD_FN JOIN(load_compressed_, STATE_T)
-#define PARTIAL_STORE_FN JOIN(partial_store_, STATE_T)
-#define PARTIAL_LOAD_FN JOIN(partial_load_, STATE_T)
-#define OR_STATE JOIN(or_, STATE_T)
-#define AND_STATE JOIN(and_, STATE_T)
-#define ISZERO_STATE JOIN(isZero_, STATE_T)
-
-static really_inline
+#define PARTIAL_STORE_FN JOIN(partial_store_, STATE_T)
+#define PARTIAL_LOAD_FN JOIN(partial_load_, STATE_T)
+#define OR_STATE JOIN(or_, STATE_T)
+#define AND_STATE JOIN(and_, STATE_T)
+#define ISZERO_STATE JOIN(isZero_, STATE_T)
+
+static really_inline
const ENG_STATE_T *get_reach_table(const IMPL_NFA_T *limex) {
const ENG_STATE_T *reach
= (const ENG_STATE_T *)((const char *)limex + sizeof(*limex));
assert(ISALIGNED_N(reach, alignof(ENG_STATE_T)));
return reach;
-}
-
-static really_inline
+}
+
+static really_inline
STATE_T REACHMASK_FN(const IMPL_NFA_T *limex, const u8 key) {
const ENG_STATE_T *reach = get_reach_table(limex);
return LOAD_FROM_ENG(&reach[limex->reachMap[key]]);
}
static really_inline
-void COMPRESS_FN(const IMPL_NFA_T *limex, u8 *dest, const STATE_T *src,
- u8 key) {
- assert(ISALIGNED_N(src, alignof(STATE_T)));
+void COMPRESS_FN(const IMPL_NFA_T *limex, u8 *dest, const STATE_T *src,
+ u8 key) {
+ assert(ISALIGNED_N(src, alignof(STATE_T)));
STATE_T a_src = *src;
-
- DEBUG_PRINTF("compress state: %p -> %p\n", src, dest);
-
- if (!(limex->flags & LIMEX_FLAG_COMPRESS_STATE)) {
- // No key-based compression, just a partial store.
- DEBUG_PRINTF("store state into %u bytes\n", limex->stateSize);
- PARTIAL_STORE_FN(dest, a_src, limex->stateSize);
- } else {
- DEBUG_PRINTF("compress state, key=%hhx\n", key);
-
+
+ DEBUG_PRINTF("compress state: %p -> %p\n", src, dest);
+
+ if (!(limex->flags & LIMEX_FLAG_COMPRESS_STATE)) {
+ // No key-based compression, just a partial store.
+ DEBUG_PRINTF("store state into %u bytes\n", limex->stateSize);
+ PARTIAL_STORE_FN(dest, a_src, limex->stateSize);
+ } else {
+ DEBUG_PRINTF("compress state, key=%hhx\n", key);
+
STATE_T reachmask = REACHMASK_FN(limex, key);
-
- // Masked compression means that we mask off the initDs states and
- // provide a shortcut for the all-zeroes case. Note that these must be
- // switched on in the EXPAND call below.
- if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) {
+
+ // Masked compression means that we mask off the initDs states and
+ // provide a shortcut for the all-zeroes case. Note that these must be
+ // switched on in the EXPAND call below.
+ if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) {
STATE_T s = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), a_src);
- if (ISZERO_STATE(s)) {
- DEBUG_PRINTF("after compression mask, all states are zero\n");
- memset(dest, 0, limex->stateSize);
- return;
- }
-
+ if (ISZERO_STATE(s)) {
+ DEBUG_PRINTF("after compression mask, all states are zero\n");
+ memset(dest, 0, limex->stateSize);
+ return;
+ }
+
STATE_T mask = AND_STATE(LOAD_FROM_ENG(&limex->compressMask),
reachmask);
- COMPRESSED_STORE_FN(dest, &s, &mask, limex->stateSize);
- } else {
+ COMPRESSED_STORE_FN(dest, &s, &mask, limex->stateSize);
+ } else {
COMPRESSED_STORE_FN(dest, src, &reachmask, limex->stateSize);
- }
- }
-}
-
-static really_inline
+ }
+ }
+}
+
+static really_inline
void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, u8 key) {
- assert(ISALIGNED_N(dest, alignof(STATE_T)));
- DEBUG_PRINTF("expand state: %p -> %p\n", src, dest);
-
- if (!(limex->flags & LIMEX_FLAG_COMPRESS_STATE)) {
- // No key-based compression, just a partial load.
- DEBUG_PRINTF("load state from %u bytes\n", limex->stateSize);
- *dest = PARTIAL_LOAD_FN(src, limex->stateSize);
- } else {
- DEBUG_PRINTF("expand state, key=%hhx\n", key);
+ assert(ISALIGNED_N(dest, alignof(STATE_T)));
+ DEBUG_PRINTF("expand state: %p -> %p\n", src, dest);
+
+ if (!(limex->flags & LIMEX_FLAG_COMPRESS_STATE)) {
+ // No key-based compression, just a partial load.
+ DEBUG_PRINTF("load state from %u bytes\n", limex->stateSize);
+ *dest = PARTIAL_LOAD_FN(src, limex->stateSize);
+ } else {
+ DEBUG_PRINTF("expand state, key=%hhx\n", key);
STATE_T reachmask = REACHMASK_FN(limex, key);
-
- if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) {
+
+ if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) {
STATE_T mask = AND_STATE(LOAD_FROM_ENG(&limex->compressMask),
reachmask);
- COMPRESSED_LOAD_FN(dest, src, &mask, limex->stateSize);
+ COMPRESSED_LOAD_FN(dest, src, &mask, limex->stateSize);
*dest = OR_STATE(LOAD_FROM_ENG(&limex->initDS), *dest);
- } else {
+ } else {
COMPRESSED_LOAD_FN(dest, src, &reachmask, limex->stateSize);
- }
- }
-}
-
-#undef IMPL_NFA_T
-#undef COMMON_T
-#undef REACHMASK_FN
-#undef COMPRESS_FN
-#undef EXPAND_FN
-#undef COMPRESSED_STORE_FN
-#undef COMPRESSED_LOAD_FN
-#undef PARTIAL_STORE_FN
-#undef PARTIAL_LOAD_FN
-#undef OR_STATE
-#undef AND_STATE
-#undef ISZERO_STATE
+ }
+ }
+}
+
+#undef IMPL_NFA_T
+#undef COMMON_T
+#undef REACHMASK_FN
+#undef COMPRESS_FN
+#undef EXPAND_FN
+#undef COMPRESSED_STORE_FN
+#undef COMPRESSED_LOAD_FN
+#undef PARTIAL_STORE_FN
+#undef PARTIAL_LOAD_FN
+#undef OR_STATE
+#undef AND_STATE
+#undef ISZERO_STATE
diff --git a/contrib/libs/hyperscan/src/nfa/mcclellan.c b/contrib/libs/hyperscan/src/nfa/mcclellan.c
index d0b2f8bbbd..71f71e3275 100644
--- a/contrib/libs/hyperscan/src/nfa/mcclellan.c
+++ b/contrib/libs/hyperscan/src/nfa/mcclellan.c
@@ -1,101 +1,101 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "mcclellan.h"
-
-#include "accel.h"
-#include "mcclellan_internal.h"
-#include "nfa_api.h"
-#include "nfa_api_queue.h"
-#include "nfa_internal.h"
-#include "util/bitutils.h"
-#include "util/compare.h"
-#include "util/simd_utils.h"
-#include "ue2common.h"
-
-#include "mcclellan_common_impl.h"
-
-static really_inline
-char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m,
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "mcclellan.h"
+
+#include "accel.h"
+#include "mcclellan_internal.h"
+#include "nfa_api.h"
+#include "nfa_api_queue.h"
+#include "nfa_internal.h"
+#include "util/bitutils.h"
+#include "util/compare.h"
+#include "util/simd_utils.h"
+#include "ue2common.h"
+
+#include "mcclellan_common_impl.h"
+
+static really_inline
+char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m,
u32 s, u64a loc, char eod, u32 *cached_accept_state,
u32 *cached_accept_id) {
DEBUG_PRINTF("reporting state = %u, loc=%llu, eod %hhu\n",
s & STATE_MASK, loc, eod);
-
- if (!eod && s == *cached_accept_state) {
+
+ if (!eod && s == *cached_accept_state) {
if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING; /* termination requested */
- }
-
- return MO_CONTINUE_MATCHING; /* continue execution */
- }
-
- const struct mstate_aux *aux = get_aux(m, s);
- size_t offset = eod ? aux->accept_eod : aux->accept;
-
- assert(offset);
- const struct report_list *rl
- = (const void *)((const char *)m + offset - sizeof(struct NFA));
- assert(ISALIGNED(rl));
-
- DEBUG_PRINTF("report list size %u\n", rl->count);
- u32 count = rl->count;
-
- if (!eod && count == 1) {
- *cached_accept_state = s;
- *cached_accept_id = rl->report[0];
-
- DEBUG_PRINTF("reporting %u\n", rl->report[0]);
+ return MO_HALT_MATCHING; /* termination requested */
+ }
+
+ return MO_CONTINUE_MATCHING; /* continue execution */
+ }
+
+ const struct mstate_aux *aux = get_aux(m, s);
+ size_t offset = eod ? aux->accept_eod : aux->accept;
+
+ assert(offset);
+ const struct report_list *rl
+ = (const void *)((const char *)m + offset - sizeof(struct NFA));
+ assert(ISALIGNED(rl));
+
+ DEBUG_PRINTF("report list size %u\n", rl->count);
+ u32 count = rl->count;
+
+ if (!eod && count == 1) {
+ *cached_accept_state = s;
+ *cached_accept_id = rl->report[0];
+
+ DEBUG_PRINTF("reporting %u\n", rl->report[0]);
if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING; /* termination requested */
- }
-
- return MO_CONTINUE_MATCHING; /* continue execution */
- }
-
- for (u32 i = 0; i < count; i++) {
- DEBUG_PRINTF("reporting %u\n", rl->report[i]);
+ return MO_HALT_MATCHING; /* termination requested */
+ }
+
+ return MO_CONTINUE_MATCHING; /* continue execution */
+ }
+
+ for (u32 i = 0; i < count; i++) {
+ DEBUG_PRINTF("reporting %u\n", rl->report[i]);
if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING; /* termination requested */
- }
- }
-
- return MO_CONTINUE_MATCHING; /* continue execution */
-}
-
-static really_inline
+ return MO_HALT_MATCHING; /* termination requested */
+ }
+ }
+
+ return MO_CONTINUE_MATCHING; /* continue execution */
+}
+
+static really_inline
const u8 *run_mcclellan_accel(const struct mcclellan *m,
const struct mstate_aux *aux, u32 s,
const u8 **min_accel_offset,
const u8 *c, const u8 *c_end) {
DEBUG_PRINTF("skipping\n");
u32 accel_offset = aux[s].accel_offset;
-
+
assert(aux[s].accel_offset);
assert(accel_offset >= m->aux_offset);
assert(!m->sherman_offset || accel_offset < m->sherman_offset);
@@ -126,14 +126,14 @@ u32 doNormal16(const struct mcclellan *m, const u8 **c_inout, const u8 *end,
const u16 *succ_table
= (const u16 *)((const char *)m + sizeof(struct mcclellan));
- assert(ISALIGNED_N(succ_table, 2));
+ assert(ISALIGNED_N(succ_table, 2));
u32 sherman_base = m->sherman_limit;
- const char *sherman_base_offset
- = (const char *)m - sizeof(struct NFA) + m->sherman_offset;
+ const char *sherman_base_offset
+ = (const char *)m - sizeof(struct NFA) + m->sherman_offset;
u32 as = m->alphaShift;
-
- s &= STATE_MASK;
-
+
+ s &= STATE_MASK;
+
while (c < end && s) {
u8 cprime = m->remap[*c];
DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u)\n", *c,
@@ -247,26 +247,26 @@ char mcclellanExec16_i(const struct mcclellan *m, u32 *state, char *qstate,
s &= STATE_MASK;
- u32 cached_accept_id = 0;
+ u32 cached_accept_id = 0;
u32 cached_accept_state = 0;
-
+
DEBUG_PRINTF("s: %u, len %zu\n", s, len);
-
- const u8 *min_accel_offset = c;
- if (!m->has_accel || len < ACCEL_MIN_LEN) {
- min_accel_offset = c_end;
- goto without_accel;
- }
-
- goto with_accel;
-
-without_accel:
+
+ const u8 *min_accel_offset = c;
+ if (!m->has_accel || len < ACCEL_MIN_LEN) {
+ min_accel_offset = c_end;
+ goto without_accel;
+ }
+
+ goto with_accel;
+
+without_accel:
do {
assert(c < min_accel_offset);
if (!s) {
goto exit;
- }
-
+ }
+
if (unlikely(m->has_wide)) {
s = doNormalWide16(m, &c, min_accel_offset, s, qstate, &offset, 0,
mode);
@@ -274,27 +274,27 @@ without_accel:
s = doNormal16(m, &c, min_accel_offset, s, 0, mode);
}
- if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
- if (mode == STOP_AT_MATCH) {
- *state = s & STATE_MASK;
- *c_final = c - 1;
+ if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
+ if (mode == STOP_AT_MATCH) {
+ *state = s & STATE_MASK;
+ *c_final = c - 1;
return MO_MATCHES_PENDING;
- }
-
- u64a loc = (c - 1) - buf + offAdj + 1;
-
- if (single) {
- DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ }
+
+ u64a loc = (c - 1) - buf + offAdj + 1;
+
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_DEAD; /* termination requested */
- }
- } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
+ }
+ } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
return MO_DEAD;
- }
- }
-
+ }
+ }
+
assert(c <= min_accel_offset);
} while (c < min_accel_offset);
@@ -304,15 +304,15 @@ without_accel:
goto exit;
} else {
goto with_accel;
- }
-
-with_accel:
+ }
+
+with_accel:
do {
assert(c < c_end);
if (!s) {
goto exit;
- }
-
+ }
+
if (s & ACCEL_FLAG) {
DEBUG_PRINTF("skipping\n");
s &= STATE_MASK;
@@ -330,87 +330,87 @@ with_accel:
s = doNormal16(m, &c, c_end, s, 1, mode);
}
- if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
- if (mode == STOP_AT_MATCH) {
- *state = s & STATE_MASK;
- *c_final = c - 1;
+ if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
+ if (mode == STOP_AT_MATCH) {
+ *state = s & STATE_MASK;
+ *c_final = c - 1;
return MO_MATCHES_PENDING;
- }
-
- u64a loc = (c - 1) - buf + offAdj + 1;
-
- if (single) {
- DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ }
+
+ u64a loc = (c - 1) - buf + offAdj + 1;
+
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_DEAD; /* termination requested */
- }
- } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
+ }
+ } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
return MO_DEAD;
- }
+ }
}
-
+
assert(c <= c_end);
} while (c < c_end);
-
+
exit:
s &= STATE_MASK;
-
- if (mode == STOP_AT_MATCH) {
- *c_final = c_end;
- }
- *state = s;
-
+
+ if (mode == STOP_AT_MATCH) {
+ *c_final = c_end;
+ }
+ *state = s;
+
return MO_ALIVE;
-}
-
-static never_inline
+}
+
+static never_inline
char mcclellanExec16_i_cb(const struct mcclellan *m, u32 *state, char *qstate,
const u8 *buf, size_t len, u64a offAdj,
NfaCallback cb, void *ctxt, char single,
const u8 **final_point) {
return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt,
single, final_point, CALLBACK_OUTPUT);
-}
-
-static never_inline
+}
+
+static never_inline
char mcclellanExec16_i_sam(const struct mcclellan *m, u32 *state, char *qstate,
const u8 *buf, size_t len, u64a offAdj,
NfaCallback cb, void *ctxt, char single,
const u8 **final_point) {
return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt,
single, final_point, STOP_AT_MATCH);
-}
-
-static never_inline
+}
+
+static never_inline
char mcclellanExec16_i_nm(const struct mcclellan *m, u32 *state, char *qstate,
const u8 *buf, size_t len, u64a offAdj,
NfaCallback cb, void *ctxt, char single,
const u8 **final_point) {
return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt,
single, final_point, NO_MATCHES);
-}
-
-static really_inline
+}
+
+static really_inline
char mcclellanExec16_i_ni(const struct mcclellan *m, u32 *state, char *qstate,
const u8 *buf, size_t len, u64a offAdj,
NfaCallback cb, void *ctxt, char single,
const u8 **final_point, enum MatchMode mode) {
- if (mode == CALLBACK_OUTPUT) {
+ if (mode == CALLBACK_OUTPUT) {
return mcclellanExec16_i_cb(m, state, qstate, buf, len, offAdj, cb,
ctxt, single, final_point);
- } else if (mode == STOP_AT_MATCH) {
+ } else if (mode == STOP_AT_MATCH) {
return mcclellanExec16_i_sam(m, state, qstate, buf, len, offAdj, cb,
ctxt, single, final_point);
- } else {
+ } else {
assert(mode == NO_MATCHES);
return mcclellanExec16_i_nm(m, state, qstate, buf, len, offAdj, cb,
ctxt, single, final_point);
- }
-}
-
-static really_inline
+ }
+}
+
+static really_inline
u32 doNormal8(const struct mcclellan *m, const u8 **c_inout, const u8 *end,
u32 s, char do_accel, enum MatchMode mode) {
const u8 *c = *c_inout;
@@ -418,14 +418,14 @@ u32 doNormal8(const struct mcclellan *m, const u8 **c_inout, const u8 *end,
u32 accept_limit = m->accept_limit_8;
const u32 as = m->alphaShift;
- const u8 *succ_table = (const u8 *)((const char *)m
- + sizeof(struct mcclellan));
+ const u8 *succ_table = (const u8 *)((const char *)m
+ + sizeof(struct mcclellan));
while (c < end && s) {
u8 cprime = m->remap[*c];
DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c,
ourisprint(*c) ? *c : '?', cprime);
s = succ_table[(s << as) + cprime];
-
+
DEBUG_PRINTF("s: %u\n", s);
c++;
if (do_accel) {
@@ -458,70 +458,70 @@ char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf,
const struct mstate_aux *aux
= (const struct mstate_aux *)((const char *)m + m->aux_offset
- - sizeof(struct NFA));
+ - sizeof(struct NFA));
u32 accept_limit = m->accept_limit_8;
-
- u32 cached_accept_id = 0;
+
+ u32 cached_accept_id = 0;
u32 cached_accept_state = 0;
-
+
DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit);
-
+
DEBUG_PRINTF("s: %u, len %zu\n", s, len);
-
- const u8 *min_accel_offset = c;
- if (!m->has_accel || len < ACCEL_MIN_LEN) {
- min_accel_offset = c_end;
- goto without_accel;
- }
-
- goto with_accel;
-
-without_accel:
+
+ const u8 *min_accel_offset = c;
+ if (!m->has_accel || len < ACCEL_MIN_LEN) {
+ min_accel_offset = c_end;
+ goto without_accel;
+ }
+
+ goto with_accel;
+
+without_accel:
do {
assert(c < min_accel_offset);
if (!s) {
goto exit;
}
-
+
s = doNormal8(m, &c, min_accel_offset, s, 0, mode);
- if (mode != NO_MATCHES && s >= accept_limit) {
- if (mode == STOP_AT_MATCH) {
- DEBUG_PRINTF("match - pausing\n");
- *state = s;
- *c_final = c - 1;
+ if (mode != NO_MATCHES && s >= accept_limit) {
+ if (mode == STOP_AT_MATCH) {
+ DEBUG_PRINTF("match - pausing\n");
+ *state = s;
+ *c_final = c - 1;
return MO_MATCHES_PENDING;
- }
-
- u64a loc = (c - 1) - buf + offAdj + 1;
- if (single) {
- DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ }
+
+ u64a loc = (c - 1) - buf + offAdj + 1;
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_DEAD;
- }
- } else if (doComplexReport(cb, ctxt, m, s, loc, 0,
+ }
+ } else if (doComplexReport(cb, ctxt, m, s, loc, 0,
&cached_accept_state, &cached_accept_id)
- == MO_HALT_MATCHING) {
+ == MO_HALT_MATCHING) {
return MO_DEAD;
- }
- }
+ }
+ }
assert(c <= min_accel_offset);
} while (c < min_accel_offset);
if (c == c_end) {
goto exit;
- }
-
-with_accel:
+ }
+
+with_accel:
do {
u32 accel_limit = m->accel_limit_8;
assert(c < c_end);
-
+
if (!s) {
goto exit;
}
-
+
if (s >= accel_limit && aux[s].accel_offset) {
c = run_mcclellan_accel(m, aux, s, &min_accel_offset, c, c_end);
if (c == c_end) {
@@ -531,7 +531,7 @@ with_accel:
}
}
s = doNormal8(m, &c, c_end, s, 1, mode);
-
+
if (mode != NO_MATCHES && s >= accept_limit) {
if (mode == STOP_AT_MATCH) {
DEBUG_PRINTF("match - pausing\n");
@@ -539,548 +539,548 @@ with_accel:
*c_final = c - 1;
return MO_MATCHES_PENDING;
}
-
+
u64a loc = (c - 1) - buf + offAdj + 1;
if (single) {
DEBUG_PRINTF("reporting %u\n", m->arb_report);
if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
return MO_DEAD;
- }
+ }
} else if (doComplexReport(cb, ctxt, m, s, loc, 0,
&cached_accept_state, &cached_accept_id)
== MO_HALT_MATCHING) {
return MO_DEAD;
- }
- }
-
+ }
+ }
+
assert(c <= c_end);
} while (c < c_end);
exit:
- *state = s;
- if (mode == STOP_AT_MATCH) {
- *c_final = c_end;
- }
+ *state = s;
+ if (mode == STOP_AT_MATCH) {
+ *c_final = c_end;
+ }
return MO_ALIVE;
-}
-
-static never_inline
+}
+
+static never_inline
char mcclellanExec8_i_cb(const struct mcclellan *m, u32 *state, const u8 *buf,
- size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
- char single, const u8 **final_point) {
- return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point) {
+ return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
final_point, CALLBACK_OUTPUT);
-}
-
-static never_inline
+}
+
+static never_inline
char mcclellanExec8_i_sam(const struct mcclellan *m, u32 *state, const u8 *buf,
- size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
- char single, const u8 **final_point) {
- return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point) {
+ return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
final_point, STOP_AT_MATCH);
-}
-
-static never_inline
+}
+
+static never_inline
char mcclellanExec8_i_nm(const struct mcclellan *m, u32 *state, const u8 *buf,
- size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
- char single, const u8 **final_point) {
- return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point) {
+ return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
final_point, NO_MATCHES);
-}
-
-static really_inline
+}
+
+static really_inline
char mcclellanExec8_i_ni(const struct mcclellan *m, u32 *state, const u8 *buf,
- size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
- char single, const u8 **final_point,
- enum MatchMode mode) {
- if (mode == CALLBACK_OUTPUT) {
- return mcclellanExec8_i_cb(m, state, buf, len, offAdj, cb, ctxt, single,
- final_point);
- } else if (mode == STOP_AT_MATCH) {
- return mcclellanExec8_i_sam(m, state, buf, len, offAdj, cb, ctxt,
- single, final_point);
- } else {
- assert(mode == NO_MATCHES);
- return mcclellanExec8_i_nm(m, state, buf, len, offAdj, cb, ctxt, single,
- final_point);
- }
-}
-
-static really_inline
+ size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
+ char single, const u8 **final_point,
+ enum MatchMode mode) {
+ if (mode == CALLBACK_OUTPUT) {
+ return mcclellanExec8_i_cb(m, state, buf, len, offAdj, cb, ctxt, single,
+ final_point);
+ } else if (mode == STOP_AT_MATCH) {
+ return mcclellanExec8_i_sam(m, state, buf, len, offAdj, cb, ctxt,
+ single, final_point);
+ } else {
+ assert(mode == NO_MATCHES);
+ return mcclellanExec8_i_nm(m, state, buf, len, offAdj, cb, ctxt, single,
+ final_point);
+ }
+}
+
+static really_inline
char mcclellanCheckEOD(const struct NFA *nfa, u32 s, u64a offset,
- NfaCallback cb, void *ctxt) {
- const struct mcclellan *m = getImplNfa(nfa);
- const struct mstate_aux *aux = get_aux(m, s);
-
+ NfaCallback cb, void *ctxt) {
+ const struct mcclellan *m = getImplNfa(nfa);
+ const struct mstate_aux *aux = get_aux(m, s);
+
if (m->has_wide == 1 && s >= m->wide_limit) {
return MO_CONTINUE_MATCHING;
}
if (!aux->accept_eod) {
return MO_CONTINUE_MATCHING;
- }
+ }
return doComplexReport(cb, ctxt, m, s, offset, 1, NULL, NULL);
-}
-
-static really_inline
-char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
- const u8 *hend, NfaCallback cb, void *context,
- struct mq *q, char single, s64a end,
- enum MatchMode mode) {
- assert(n->type == MCCLELLAN_NFA_16);
- const struct mcclellan *m = getImplNfa(n);
- s64a sp;
-
- assert(ISALIGNED_N(q->state, 2));
+}
+
+static really_inline
+char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
+ const u8 *hend, NfaCallback cb, void *context,
+ struct mq *q, char single, s64a end,
+ enum MatchMode mode) {
+ assert(n->type == MCCLELLAN_NFA_16);
+ const struct mcclellan *m = getImplNfa(n);
+ s64a sp;
+
+ assert(ISALIGNED_N(q->state, 2));
u32 s = *(u16 *)q->state;
-
- if (q->report_current) {
- assert(s);
- assert(get_aux(m, s)->accept);
-
- int rv;
- if (single) {
- DEBUG_PRINTF("reporting %u\n", m->arb_report);
+
+ if (q->report_current) {
+ assert(s);
+ assert(get_aux(m, s)->accept);
+
+ int rv;
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
rv = cb(0, q_cur_offset(q), m->arb_report, context);
- } else {
- u32 cached_accept_id = 0;
+ } else {
+ u32 cached_accept_id = 0;
u32 cached_accept_state = 0;
-
- rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0,
- &cached_accept_state, &cached_accept_id);
- }
-
- q->report_current = 0;
-
- if (rv == MO_HALT_MATCHING) {
+
+ rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0,
+ &cached_accept_state, &cached_accept_id);
+ }
+
+ q->report_current = 0;
+
+ if (rv == MO_HALT_MATCHING) {
return MO_DEAD;
- }
- }
-
- sp = q_cur_loc(q);
- q->cur++;
-
- const u8 *cur_buf = sp < 0 ? hend : buffer;
-
- assert(q->cur);
- if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
- DEBUG_PRINTF("this is as far as we go\n");
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = end;
- *(u16 *)q->state = s;
- return MO_ALIVE;
- }
-
- while (1) {
- assert(q->cur < q->end);
- s64a ep = q->items[q->cur].location;
- if (mode != NO_MATCHES) {
- ep = MIN(ep, end);
- }
-
- assert(ep >= sp);
-
- s64a local_ep = ep;
- if (sp < 0) {
- local_ep = MIN(0, ep);
- }
-
- /* do main buffer region */
- const u8 *final_look;
+ }
+ }
+
+ sp = q_cur_loc(q);
+ q->cur++;
+
+ const u8 *cur_buf = sp < 0 ? hend : buffer;
+
+ assert(q->cur);
+ if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
+ DEBUG_PRINTF("this is as far as we go\n");
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ *(u16 *)q->state = s;
+ return MO_ALIVE;
+ }
+
+ while (1) {
+ assert(q->cur < q->end);
+ s64a ep = q->items[q->cur].location;
+ if (mode != NO_MATCHES) {
+ ep = MIN(ep, end);
+ }
+
+ assert(ep >= sp);
+
+ s64a local_ep = ep;
+ if (sp < 0) {
+ local_ep = MIN(0, ep);
+ }
+
+ /* do main buffer region */
+ const u8 *final_look;
char rv = mcclellanExec16_i_ni(m, &s, q->state, cur_buf + sp,
local_ep - sp, offset + sp, cb, context,
single, &final_look, mode);
if (rv == MO_DEAD) {
- *(u16 *)q->state = 0;
+ *(u16 *)q->state = 0;
return MO_DEAD;
- }
+ }
if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) {
- DEBUG_PRINTF("this is as far as we go\n");
+ DEBUG_PRINTF("this is as far as we go\n");
DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf);
- assert(q->cur);
+ assert(q->cur);
assert(final_look != cur_buf + local_ep);
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = final_look - cur_buf + 1; /* due to
- * early -1 */
- *(u16 *)q->state = s;
- return MO_MATCHES_PENDING;
- }
-
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = final_look - cur_buf + 1; /* due to
+ * early -1 */
+ *(u16 *)q->state = s;
+ return MO_MATCHES_PENDING;
+ }
+
assert(rv == MO_ALIVE);
- assert(q->cur);
- if (mode != NO_MATCHES && q->items[q->cur].location > end) {
- DEBUG_PRINTF("this is as far as we go\n");
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = end;
- *(u16 *)q->state = s;
- return MO_ALIVE;
- }
-
- sp = local_ep;
-
- if (sp == 0) {
- cur_buf = buffer;
- }
-
- if (sp != ep) {
- continue;
- }
-
- switch (q->items[q->cur].type) {
- case MQE_TOP:
- assert(sp + offset || !s);
- if (sp + offset == 0) {
- s = m->start_anchored;
- break;
- }
- s = mcclellanEnableStarts(m, s);
- break;
- case MQE_END:
- *(u16 *)q->state = s;
- q->cur++;
+ assert(q->cur);
+ if (mode != NO_MATCHES && q->items[q->cur].location > end) {
+ DEBUG_PRINTF("this is as far as we go\n");
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ *(u16 *)q->state = s;
+ return MO_ALIVE;
+ }
+
+ sp = local_ep;
+
+ if (sp == 0) {
+ cur_buf = buffer;
+ }
+
+ if (sp != ep) {
+ continue;
+ }
+
+ switch (q->items[q->cur].type) {
+ case MQE_TOP:
+ assert(sp + offset || !s);
+ if (sp + offset == 0) {
+ s = m->start_anchored;
+ break;
+ }
+ s = mcclellanEnableStarts(m, s);
+ break;
+ case MQE_END:
+ *(u16 *)q->state = s;
+ q->cur++;
return s ? MO_ALIVE : MO_DEAD;
- default:
- assert(!"invalid queue event");
- }
-
- q->cur++;
- }
-}
-
+ default:
+ assert(!"invalid queue event");
+ }
+
+ q->cur++;
+ }
+}
+
static really_inline
char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer,
size_t length, NfaCallback cb, void *context,
char single) {
- assert(n->type == MCCLELLAN_NFA_16);
- const struct mcclellan *m = getImplNfa(n);
+ assert(n->type == MCCLELLAN_NFA_16);
+ const struct mcclellan *m = getImplNfa(n);
u32 s = m->start_anchored;
-
+
if (mcclellanExec16_i(m, &s, NULL, buffer, length, offset, cb, context,
single, NULL, CALLBACK_OUTPUT)
== MO_DEAD) {
return s ? MO_ALIVE : MO_DEAD;
- }
-
+ }
+
if (m->has_wide == 1 && s >= m->wide_limit) {
return MO_ALIVE;
}
- const struct mstate_aux *aux = get_aux(m, s);
-
- if (aux->accept_eod) {
- doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL);
- }
-
+ const struct mstate_aux *aux = get_aux(m, s);
+
+ if (aux->accept_eod) {
+ doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL);
+ }
+
return MO_ALIVE;
-}
-
-static really_inline
-char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
+}
+
+static really_inline
+char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
const u8 *hend, NfaCallback cb, void *context,
struct mq *q, char single, s64a end,
enum MatchMode mode) {
- assert(n->type == MCCLELLAN_NFA_8);
- const struct mcclellan *m = getImplNfa(n);
- s64a sp;
-
+ assert(n->type == MCCLELLAN_NFA_8);
+ const struct mcclellan *m = getImplNfa(n);
+ s64a sp;
+
u32 s = *(u8 *)q->state;
-
- if (q->report_current) {
- assert(s);
- assert(s >= m->accept_limit_8);
-
- int rv;
- if (single) {
- DEBUG_PRINTF("reporting %u\n", m->arb_report);
+
+ if (q->report_current) {
+ assert(s);
+ assert(s >= m->accept_limit_8);
+
+ int rv;
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
rv = cb(0, q_cur_offset(q), m->arb_report, context);
- } else {
- u32 cached_accept_id = 0;
+ } else {
+ u32 cached_accept_id = 0;
u32 cached_accept_state = 0;
-
- rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0,
- &cached_accept_state, &cached_accept_id);
- }
-
- q->report_current = 0;
-
- if (rv == MO_HALT_MATCHING) {
+
+ rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0,
+ &cached_accept_state, &cached_accept_id);
+ }
+
+ q->report_current = 0;
+
+ if (rv == MO_HALT_MATCHING) {
return MO_DEAD;
- }
- }
-
- sp = q_cur_loc(q);
- q->cur++;
-
- const u8 *cur_buf = sp < 0 ? hend : buffer;
-
- if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
- DEBUG_PRINTF("this is as far as we go\n");
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = end;
- *(u8 *)q->state = s;
- return MO_ALIVE;
- }
-
- while (1) {
- DEBUG_PRINTF("%s @ %llu\n", q->items[q->cur].type == MQE_TOP ? "TOP" :
- q->items[q->cur].type == MQE_END ? "END" : "???",
- q->items[q->cur].location + offset);
- assert(q->cur < q->end);
- s64a ep = q->items[q->cur].location;
- if (mode != NO_MATCHES) {
- ep = MIN(ep, end);
- }
-
- assert(ep >= sp);
-
- s64a local_ep = ep;
- if (sp < 0) {
- local_ep = MIN(0, ep);
- }
-
- const u8 *final_look;
+ }
+ }
+
+ sp = q_cur_loc(q);
+ q->cur++;
+
+ const u8 *cur_buf = sp < 0 ? hend : buffer;
+
+ if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
+ DEBUG_PRINTF("this is as far as we go\n");
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ *(u8 *)q->state = s;
+ return MO_ALIVE;
+ }
+
+ while (1) {
+ DEBUG_PRINTF("%s @ %llu\n", q->items[q->cur].type == MQE_TOP ? "TOP" :
+ q->items[q->cur].type == MQE_END ? "END" : "???",
+ q->items[q->cur].location + offset);
+ assert(q->cur < q->end);
+ s64a ep = q->items[q->cur].location;
+ if (mode != NO_MATCHES) {
+ ep = MIN(ep, end);
+ }
+
+ assert(ep >= sp);
+
+ s64a local_ep = ep;
+ if (sp < 0) {
+ local_ep = MIN(0, ep);
+ }
+
+ const u8 *final_look;
char rv = mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp,
offset + sp, cb, context, single,
&final_look, mode);
if (rv == MO_HALT_MATCHING) {
- *(u8 *)q->state = 0;
+ *(u8 *)q->state = 0;
return MO_DEAD;
- }
+ }
if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) {
DEBUG_PRINTF("this is as far as we go\n");
DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf);
- assert(q->cur);
+ assert(q->cur);
assert(final_look != cur_buf + local_ep);
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = final_look - cur_buf + 1; /* due to
- * early -1 */
- *(u8 *)q->state = s;
- return MO_MATCHES_PENDING;
- }
-
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = final_look - cur_buf + 1; /* due to
+ * early -1 */
+ *(u8 *)q->state = s;
+ return MO_MATCHES_PENDING;
+ }
+
assert(rv == MO_ALIVE);
- assert(q->cur);
- if (mode != NO_MATCHES && q->items[q->cur].location > end) {
- DEBUG_PRINTF("this is as far as we go\n");
- assert(q->cur);
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = end;
- *(u8 *)q->state = s;
- return MO_ALIVE;
- }
-
- sp = local_ep;
-
- if (sp == 0) {
- cur_buf = buffer;
- }
-
- if (sp != ep) {
- continue;
- }
-
- switch (q->items[q->cur].type) {
- case MQE_TOP:
- assert(sp + offset || !s);
- if (sp + offset == 0) {
- s = (u8)m->start_anchored;
- break;
- }
- s = mcclellanEnableStarts(m, s);
- break;
- case MQE_END:
- *(u8 *)q->state = s;
- q->cur++;
+ assert(q->cur);
+ if (mode != NO_MATCHES && q->items[q->cur].location > end) {
+ DEBUG_PRINTF("this is as far as we go\n");
+ assert(q->cur);
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ *(u8 *)q->state = s;
+ return MO_ALIVE;
+ }
+
+ sp = local_ep;
+
+ if (sp == 0) {
+ cur_buf = buffer;
+ }
+
+ if (sp != ep) {
+ continue;
+ }
+
+ switch (q->items[q->cur].type) {
+ case MQE_TOP:
+ assert(sp + offset || !s);
+ if (sp + offset == 0) {
+ s = (u8)m->start_anchored;
+ break;
+ }
+ s = mcclellanEnableStarts(m, s);
+ break;
+ case MQE_END:
+ *(u8 *)q->state = s;
+ q->cur++;
return s ? MO_ALIVE : MO_DEAD;
- default:
- assert(!"invalid queue event");
- }
-
- q->cur++;
- }
-}
-
+ default:
+ assert(!"invalid queue event");
+ }
+
+ q->cur++;
+ }
+}
+
static really_inline
-char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer,
- size_t length, NfaCallback cb, void *context,
- char single) {
- assert(n->type == MCCLELLAN_NFA_8);
- const struct mcclellan *m = getImplNfa(n);
+char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer,
+ size_t length, NfaCallback cb, void *context,
+ char single) {
+ assert(n->type == MCCLELLAN_NFA_8);
+ const struct mcclellan *m = getImplNfa(n);
u32 s = m->start_anchored;
-
- if (mcclellanExec8_i(m, &s, buffer, length, offset, cb, context, single,
- NULL, CALLBACK_OUTPUT)
+
+ if (mcclellanExec8_i(m, &s, buffer, length, offset, cb, context, single,
+ NULL, CALLBACK_OUTPUT)
== MO_DEAD) {
return MO_DEAD;
- }
-
- const struct mstate_aux *aux = get_aux(m, s);
-
- if (aux->accept_eod) {
- doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL);
- }
-
+ }
+
+ const struct mstate_aux *aux = get_aux(m, s);
+
+ if (aux->accept_eod) {
+ doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL);
+ }
+
return s ? MO_ALIVE : MO_DEAD;
-}
-
-char nfaExecMcClellan8_B(const struct NFA *n, u64a offset, const u8 *buffer,
- size_t length, NfaCallback cb, void *context) {
- assert(n->type == MCCLELLAN_NFA_8);
- const struct mcclellan *m = getImplNfa(n);
-
- if (m->flags & MCCLELLAN_FLAG_SINGLE) {
- return nfaExecMcClellan8_Bi(n, offset, buffer, length, cb, context, 1);
- } else {
- return nfaExecMcClellan8_Bi(n, offset, buffer, length, cb, context, 0);
- }
-}
-
-char nfaExecMcClellan8_Q(const struct NFA *n, struct mq *q, s64a end) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
- NfaCallback cb = q->cb;
- void *context = q->context;
- assert(n->type == MCCLELLAN_NFA_8);
- const struct mcclellan *m = getImplNfa(n);
- const u8 *hend = q->history + q->hlength;
-
- return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
- m->flags & MCCLELLAN_FLAG_SINGLE, end,
- CALLBACK_OUTPUT);
-}
-
-char nfaExecMcClellan16_B(const struct NFA *n, u64a offset, const u8 *buffer,
- size_t length, NfaCallback cb, void *context) {
- assert(n->type == MCCLELLAN_NFA_16);
- const struct mcclellan *m = getImplNfa(n);
-
- if (m->flags & MCCLELLAN_FLAG_SINGLE) {
- return nfaExecMcClellan16_Bi(n, offset, buffer, length, cb, context, 1);
- } else {
- return nfaExecMcClellan16_Bi(n, offset, buffer, length, cb, context, 0);
- }
-}
-
-char nfaExecMcClellan16_Q(const struct NFA *n, struct mq *q, s64a end) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
- NfaCallback cb = q->cb;
- void *context = q->context;
- assert(n->type == MCCLELLAN_NFA_16);
- const struct mcclellan *m = getImplNfa(n);
- const u8 *hend = q->history + q->hlength;
-
- return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
- m->flags & MCCLELLAN_FLAG_SINGLE, end,
- CALLBACK_OUTPUT);
-}
-
-char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) {
- const struct mcclellan *m = getImplNfa(n);
- NfaCallback cb = q->cb;
- void *ctxt = q->context;
+}
+
+char nfaExecMcClellan8_B(const struct NFA *n, u64a offset, const u8 *buffer,
+ size_t length, NfaCallback cb, void *context) {
+ assert(n->type == MCCLELLAN_NFA_8);
+ const struct mcclellan *m = getImplNfa(n);
+
+ if (m->flags & MCCLELLAN_FLAG_SINGLE) {
+ return nfaExecMcClellan8_Bi(n, offset, buffer, length, cb, context, 1);
+ } else {
+ return nfaExecMcClellan8_Bi(n, offset, buffer, length, cb, context, 0);
+ }
+}
+
+char nfaExecMcClellan8_Q(const struct NFA *n, struct mq *q, s64a end) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ assert(n->type == MCCLELLAN_NFA_8);
+ const struct mcclellan *m = getImplNfa(n);
+ const u8 *hend = q->history + q->hlength;
+
+ return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
+ m->flags & MCCLELLAN_FLAG_SINGLE, end,
+ CALLBACK_OUTPUT);
+}
+
+char nfaExecMcClellan16_B(const struct NFA *n, u64a offset, const u8 *buffer,
+ size_t length, NfaCallback cb, void *context) {
+ assert(n->type == MCCLELLAN_NFA_16);
+ const struct mcclellan *m = getImplNfa(n);
+
+ if (m->flags & MCCLELLAN_FLAG_SINGLE) {
+ return nfaExecMcClellan16_Bi(n, offset, buffer, length, cb, context, 1);
+ } else {
+ return nfaExecMcClellan16_Bi(n, offset, buffer, length, cb, context, 0);
+ }
+}
+
+char nfaExecMcClellan16_Q(const struct NFA *n, struct mq *q, s64a end) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ assert(n->type == MCCLELLAN_NFA_16);
+ const struct mcclellan *m = getImplNfa(n);
+ const u8 *hend = q->history + q->hlength;
+
+ return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
+ m->flags & MCCLELLAN_FLAG_SINGLE, end,
+ CALLBACK_OUTPUT);
+}
+
+char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) {
+ const struct mcclellan *m = getImplNfa(n);
+ NfaCallback cb = q->cb;
+ void *ctxt = q->context;
u32 s = *(u8 *)q->state;
- u8 single = m->flags & MCCLELLAN_FLAG_SINGLE;
- u64a offset = q_cur_offset(q);
- assert(q_cur_type(q) == MQE_START);
- assert(s);
-
- if (s >= m->accept_limit_8) {
- if (single) {
- DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ u8 single = m->flags & MCCLELLAN_FLAG_SINGLE;
+ u64a offset = q_cur_offset(q);
+ assert(q_cur_type(q) == MQE_START);
+ assert(s);
+
+ if (s >= m->accept_limit_8) {
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
cb(0, offset, m->arb_report, ctxt);
- } else {
- u32 cached_accept_id = 0;
+ } else {
+ u32 cached_accept_id = 0;
u32 cached_accept_state = 0;
-
- doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state,
- &cached_accept_id);
- }
- }
-
- return 0;
-}
-
-char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) {
+
+ doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state,
+ &cached_accept_id);
+ }
+ }
+
+ return 0;
+}
+
+char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) {
const struct mcclellan *m = getImplNfa(n);
- NfaCallback cb = q->cb;
- void *ctxt = q->context;
+ NfaCallback cb = q->cb;
+ void *ctxt = q->context;
u32 s = *(u16 *)q->state;
- const struct mstate_aux *aux = get_aux(m, s);
- u8 single = m->flags & MCCLELLAN_FLAG_SINGLE;
- u64a offset = q_cur_offset(q);
- assert(q_cur_type(q) == MQE_START);
+ const struct mstate_aux *aux = get_aux(m, s);
+ u8 single = m->flags & MCCLELLAN_FLAG_SINGLE;
+ u64a offset = q_cur_offset(q);
+ assert(q_cur_type(q) == MQE_START);
DEBUG_PRINTF("state %u\n", s);
- assert(s);
-
- if (aux->accept) {
- if (single) {
- DEBUG_PRINTF("reporting %u\n", m->arb_report);
+ assert(s);
+
+ if (aux->accept) {
+ if (single) {
+ DEBUG_PRINTF("reporting %u\n", m->arb_report);
cb(0, offset, m->arb_report, ctxt);
- } else {
- u32 cached_accept_id = 0;
+ } else {
+ u32 cached_accept_id = 0;
u32 cached_accept_state = 0;
-
- doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state,
- &cached_accept_id);
- }
- }
-
- return 0;
-}
-
-static
-char mcclellanHasAccept(const struct mcclellan *m, const struct mstate_aux *aux,
- ReportID report) {
- assert(m && aux);
-
- if (!aux->accept) {
- return 0;
- }
-
- const struct report_list *rl = (const struct report_list *)
- ((const char *)m + aux->accept - sizeof(struct NFA));
- assert(ISALIGNED_N(rl, 4));
-
- DEBUG_PRINTF("report list has %u entries\n", rl->count);
-
- for (u32 i = 0; i < rl->count; i++) {
- if (rl->report[i] == report) {
- return 1;
- }
- }
-
- return 0;
-}
-
-char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
- struct mq *q) {
- assert(n && q);
-
+
+ doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state,
+ &cached_accept_id);
+ }
+ }
+
+ return 0;
+}
+
+static
+char mcclellanHasAccept(const struct mcclellan *m, const struct mstate_aux *aux,
+ ReportID report) {
+ assert(m && aux);
+
+ if (!aux->accept) {
+ return 0;
+ }
+
+ const struct report_list *rl = (const struct report_list *)
+ ((const char *)m + aux->accept - sizeof(struct NFA));
+ assert(ISALIGNED_N(rl, 4));
+
+ DEBUG_PRINTF("report list has %u entries\n", rl->count);
+
+ for (u32 i = 0; i < rl->count; i++) {
+ if (rl->report[i] == report) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q) {
+ assert(n && q);
+
const struct mcclellan *m = getImplNfa(n);
- u8 s = *(u8 *)q->state;
- DEBUG_PRINTF("checking accepts for %hhu\n", s);
- if (s < m->accept_limit_8) {
- return 0;
- }
-
- return mcclellanHasAccept(m, get_aux(m, s), report);
-}
-
+ u8 s = *(u8 *)q->state;
+ DEBUG_PRINTF("checking accepts for %hhu\n", s);
+ if (s < m->accept_limit_8) {
+ return 0;
+ }
+
+ return mcclellanHasAccept(m, get_aux(m, s), report);
+}
+
char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q) {
assert(n && q);
-
+
const struct mcclellan *m = getImplNfa(n);
u8 s = *(u8 *)q->state;
DEBUG_PRINTF("checking accepts for %hhu\n", s);
@@ -1089,18 +1089,18 @@ char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q) {
return s >= m->accept_limit_8;
}
-char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
- struct mq *q) {
- assert(n && q);
-
+char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q) {
+ assert(n && q);
+
const struct mcclellan *m = getImplNfa(n);
- u16 s = *(u16 *)q->state;
- DEBUG_PRINTF("checking accepts for %hu\n", s);
-
+ u16 s = *(u16 *)q->state;
+ DEBUG_PRINTF("checking accepts for %hu\n", s);
+
return (m->has_wide == 1 && s >= m->wide_limit) ?
0 : mcclellanHasAccept(m, get_aux(m, s), report);
-}
-
+}
+
char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) {
assert(n && q);
@@ -1112,125 +1112,125 @@ char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) {
0 : !!get_aux(m, s)->accept;
}
-char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
- NfaCallback cb = q->cb;
- void *context = q->context;
- assert(n->type == MCCLELLAN_NFA_8);
+char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ assert(n->type == MCCLELLAN_NFA_8);
const struct mcclellan *m = getImplNfa(n);
- const u8 *hend = q->history + q->hlength;
-
- return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
- m->flags & MCCLELLAN_FLAG_SINGLE, end,
- STOP_AT_MATCH);
-}
-
-char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
- NfaCallback cb = q->cb;
- void *context = q->context;
- assert(n->type == MCCLELLAN_NFA_16);
+ const u8 *hend = q->history + q->hlength;
+
+ return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
+ m->flags & MCCLELLAN_FLAG_SINGLE, end,
+ STOP_AT_MATCH);
+}
+
+char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ assert(n->type == MCCLELLAN_NFA_16);
const struct mcclellan *m = getImplNfa(n);
- const u8 *hend = q->history + q->hlength;
-
- return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
- m->flags & MCCLELLAN_FLAG_SINGLE, end,
- STOP_AT_MATCH);
-}
-
-char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
- NfaCallback cb = q->cb;
- void *context = q->context;
- assert(n->type == MCCLELLAN_NFA_8);
+ const u8 *hend = q->history + q->hlength;
+
+ return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
+ m->flags & MCCLELLAN_FLAG_SINGLE, end,
+ STOP_AT_MATCH);
+}
+
+char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ assert(n->type == MCCLELLAN_NFA_8);
const struct mcclellan *m = getImplNfa(n);
- const u8 *hend = q->history + q->hlength;
-
- char rv = nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
- m->flags & MCCLELLAN_FLAG_SINGLE, 0 /* end */,
- NO_MATCHES);
- if (rv && nfaExecMcClellan8_inAccept(n, report, q)) {
- return MO_MATCHES_PENDING;
- } else {
- return rv;
- }
-}
-
-char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
- NfaCallback cb = q->cb;
- void *context = q->context;
- assert(n->type == MCCLELLAN_NFA_16);
+ const u8 *hend = q->history + q->hlength;
+
+ char rv = nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
+ m->flags & MCCLELLAN_FLAG_SINGLE, 0 /* end */,
+ NO_MATCHES);
+ if (rv && nfaExecMcClellan8_inAccept(n, report, q)) {
+ return MO_MATCHES_PENDING;
+ } else {
+ return rv;
+ }
+}
+
+char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) {
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ assert(n->type == MCCLELLAN_NFA_16);
const struct mcclellan *m = getImplNfa(n);
- const u8 *hend = q->history + q->hlength;
-
- char rv = nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
- m->flags & MCCLELLAN_FLAG_SINGLE,
- 0 /* end */, NO_MATCHES);
-
- if (rv && nfaExecMcClellan16_inAccept(n, report, q)) {
- return MO_MATCHES_PENDING;
- } else {
- return rv;
- }
-}
-
-char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset,
- void *state, UNUSED u8 key) {
+ const u8 *hend = q->history + q->hlength;
+
+ char rv = nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
+ m->flags & MCCLELLAN_FLAG_SINGLE,
+ 0 /* end */, NO_MATCHES);
+
+ if (rv && nfaExecMcClellan16_inAccept(n, report, q)) {
+ return MO_MATCHES_PENDING;
+ } else {
+ return rv;
+ }
+}
+
+char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset,
+ void *state, UNUSED u8 key) {
const struct mcclellan *m = getImplNfa(nfa);
- u8 s = offset ? m->start_floating : m->start_anchored;
- if (s) {
- *(u8 *)state = s;
- return 1;
- }
- return 0;
-}
-
-char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
- void *state, UNUSED u8 key) {
+ u8 s = offset ? m->start_floating : m->start_anchored;
+ if (s) {
+ *(u8 *)state = s;
+ return 1;
+ }
+ return 0;
+}
+
+char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
+ void *state, UNUSED u8 key) {
const struct mcclellan *m = getImplNfa(nfa);
- u16 s = offset ? m->start_floating : m->start_anchored;
+ u16 s = offset ? m->start_floating : m->start_anchored;
// new byte
if (m->has_wide) {
unaligned_store_u16((u16 *)state + 1, 0);
}
- if (s) {
- unaligned_store_u16(state, s);
- return 1;
- }
- return 0;
-}
-
-void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
- const u8 *buf, char top, size_t start_off,
- size_t len, NfaCallback cb, void *ctxt) {
+ if (s) {
+ unaligned_store_u16(state, s);
+ return 1;
+ }
+ return 0;
+}
+
+void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
+ const u8 *buf, char top, size_t start_off,
+ size_t len, NfaCallback cb, void *ctxt) {
const struct mcclellan *m = getImplNfa(nfa);
-
+
u32 s = top ? m->start_anchored : *(u8 *)state;
- if (m->flags & MCCLELLAN_FLAG_SINGLE) {
+ if (m->flags & MCCLELLAN_FLAG_SINGLE) {
mcclellanExec8_i(m, &s, buf + start_off, len - start_off,
- start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT);
- } else {
+ start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT);
+ } else {
mcclellanExec8_i(m, &s, buf + start_off, len - start_off,
- start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT);
- }
+ start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT);
+ }
*(u8 *)state = s;
-}
-
-void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
- const u8 *buf, char top, size_t start_off,
- size_t len, NfaCallback cb, void *ctxt) {
+}
+
+void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
+ const u8 *buf, char top, size_t start_off,
+ size_t len, NfaCallback cb, void *ctxt) {
const struct mcclellan *m = getImplNfa(nfa);
u32 s;
-
+
if (top) {
s = m->start_anchored;
@@ -1242,109 +1242,109 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
s = unaligned_load_u16(state);
}
- if (m->flags & MCCLELLAN_FLAG_SINGLE) {
+ if (m->flags & MCCLELLAN_FLAG_SINGLE) {
mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off,
start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT);
- } else {
+ } else {
mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off,
start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT);
- }
+ }
unaligned_store_u16(state, s);
-}
-
-char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state,
+}
+
+char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state,
UNUSED const char *streamState, u64a offset,
NfaCallback callback, void *context) {
return mcclellanCheckEOD(nfa, *(const u8 *)state, offset, callback,
context);
-}
-
-char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state,
+}
+
+char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state,
UNUSED const char *streamState, u64a offset,
NfaCallback callback, void *context) {
- assert(ISALIGNED_N(state, 2));
+ assert(ISALIGNED_N(state, 2));
return mcclellanCheckEOD(nfa, *(const u16 *)state, offset, callback,
context);
-}
-
+}
+
char nfaExecMcClellan8_queueInitState(UNUSED const struct NFA *nfa,
struct mq *q) {
- assert(nfa->scratchStateSize == 1);
- *(u8 *)q->state = 0;
- return 0;
-}
-
+ assert(nfa->scratchStateSize == 1);
+ *(u8 *)q->state = 0;
+ return 0;
+}
+
char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa,
struct mq *q) {
const struct mcclellan *m = getImplNfa(nfa);
assert(m->has_wide == 1 ? nfa->scratchStateSize == 4
: nfa->scratchStateSize == 2);
- assert(ISALIGNED_N(q->state, 2));
- *(u16 *)q->state = 0;
+ assert(ISALIGNED_N(q->state, 2));
+ *(u16 *)q->state = 0;
// new byte
if (m->has_wide) {
unaligned_store_u16((u16 *)q->state + 1, 0);
}
- return 0;
-}
-
-char nfaExecMcClellan8_queueCompressState(UNUSED const struct NFA *nfa,
- const struct mq *q, UNUSED s64a loc) {
- void *dest = q->streamState;
- const void *src = q->state;
- assert(nfa->scratchStateSize == 1);
- assert(nfa->streamStateSize == 1);
- *(u8 *)dest = *(const u8 *)src;
- return 0;
-}
-
-char nfaExecMcClellan8_expandState(UNUSED const struct NFA *nfa, void *dest,
- const void *src, UNUSED u64a offset,
- UNUSED u8 key) {
- assert(nfa->scratchStateSize == 1);
- assert(nfa->streamStateSize == 1);
- *(u8 *)dest = *(const u8 *)src;
- return 0;
-}
-
-char nfaExecMcClellan16_queueCompressState(UNUSED const struct NFA *nfa,
- const struct mq *q,
- UNUSED s64a loc) {
+ return 0;
+}
+
+char nfaExecMcClellan8_queueCompressState(UNUSED const struct NFA *nfa,
+ const struct mq *q, UNUSED s64a loc) {
+ void *dest = q->streamState;
+ const void *src = q->state;
+ assert(nfa->scratchStateSize == 1);
+ assert(nfa->streamStateSize == 1);
+ *(u8 *)dest = *(const u8 *)src;
+ return 0;
+}
+
+char nfaExecMcClellan8_expandState(UNUSED const struct NFA *nfa, void *dest,
+ const void *src, UNUSED u64a offset,
+ UNUSED u8 key) {
+ assert(nfa->scratchStateSize == 1);
+ assert(nfa->streamStateSize == 1);
+ *(u8 *)dest = *(const u8 *)src;
+ return 0;
+}
+
+char nfaExecMcClellan16_queueCompressState(UNUSED const struct NFA *nfa,
+ const struct mq *q,
+ UNUSED s64a loc) {
const struct mcclellan *m = getImplNfa(nfa);
- void *dest = q->streamState;
- const void *src = q->state;
+ void *dest = q->streamState;
+ const void *src = q->state;
assert(m->has_wide == 1 ? nfa->scratchStateSize == 4
: nfa->scratchStateSize == 2);
assert(m->has_wide == 1 ? nfa->streamStateSize == 4
: nfa->streamStateSize == 2);
- assert(ISALIGNED_N(src, 2));
- unaligned_store_u16(dest, *(const u16 *)(src));
+ assert(ISALIGNED_N(src, 2));
+ unaligned_store_u16(dest, *(const u16 *)(src));
// new byte
if (m->has_wide) {
unaligned_store_u16((u16 *)dest + 1, *((const u16 *)src + 1));
}
- return 0;
-}
-
-char nfaExecMcClellan16_expandState(UNUSED const struct NFA *nfa, void *dest,
- const void *src, UNUSED u64a offset,
- UNUSED u8 key) {
+ return 0;
+}
+
+char nfaExecMcClellan16_expandState(UNUSED const struct NFA *nfa, void *dest,
+ const void *src, UNUSED u64a offset,
+ UNUSED u8 key) {
const struct mcclellan *m = getImplNfa(nfa);
assert(m->has_wide == 1 ? nfa->scratchStateSize == 4
: nfa->scratchStateSize == 2);
assert(m->has_wide == 1 ? nfa->streamStateSize == 4
: nfa->streamStateSize == 2);
- assert(ISALIGNED_N(dest, 2));
- *(u16 *)dest = unaligned_load_u16(src);
+ assert(ISALIGNED_N(dest, 2));
+ *(u16 *)dest = unaligned_load_u16(src);
// new byte
if (m->has_wide) {
*((u16 *)dest + 1) = unaligned_load_u16((const u16 *)src + 1);
}
- return 0;
-}
+ return 0;
+}
diff --git a/contrib/libs/hyperscan/src/nfa/mcclellan.h b/contrib/libs/hyperscan/src/nfa/mcclellan.h
index 92a6dd8f2c..9c6b3eecb1 100644
--- a/contrib/libs/hyperscan/src/nfa/mcclellan.h
+++ b/contrib/libs/hyperscan/src/nfa/mcclellan.h
@@ -1,109 +1,109 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MCCLELLAN_H
-#define MCCLELLAN_H
-
-#include "callback.h"
-#include "ue2common.h"
-
-struct mq;
-struct NFA;
-
-// 8-bit McClellan
-
-char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state,
- const char *streamState, u64a offset,
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MCCLELLAN_H
+#define MCCLELLAN_H
+
+#include "callback.h"
+#include "ue2common.h"
+
+struct mq;
+struct NFA;
+
+// 8-bit McClellan
+
+char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state,
+ const char *streamState, u64a offset,
NfaCallback callback, void *context);
-char nfaExecMcClellan8_Q(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report);
-char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q);
-char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
- struct mq *q);
+char nfaExecMcClellan8_Q(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report);
+char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q);
+char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q);
char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q);
-char nfaExecMcClellan8_queueInitState(const struct NFA *n, struct mq *q);
-char nfaExecMcClellan8_initCompressedState(const struct NFA *n, u64a offset,
- void *state, u8 key);
-char nfaExecMcClellan8_queueCompressState(const struct NFA *nfa,
- const struct mq *q, s64a loc);
-char nfaExecMcClellan8_expandState(const struct NFA *nfa, void *dest,
- const void *src, u64a offset, u8 key);
-
-#define nfaExecMcClellan8_B_Reverse NFA_API_NO_IMPL
-#define nfaExecMcClellan8_zombie_status NFA_API_ZOMBIE_NO_IMPL
-
-// 16-bit McClellan
-
-char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state,
- const char *streamState, u64a offset,
+char nfaExecMcClellan8_queueInitState(const struct NFA *n, struct mq *q);
+char nfaExecMcClellan8_initCompressedState(const struct NFA *n, u64a offset,
+ void *state, u8 key);
+char nfaExecMcClellan8_queueCompressState(const struct NFA *nfa,
+ const struct mq *q, s64a loc);
+char nfaExecMcClellan8_expandState(const struct NFA *nfa, void *dest,
+ const void *src, u64a offset, u8 key);
+
+#define nfaExecMcClellan8_B_Reverse NFA_API_NO_IMPL
+#define nfaExecMcClellan8_zombie_status NFA_API_ZOMBIE_NO_IMPL
+
+// 16-bit McClellan
+
+char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state,
+ const char *streamState, u64a offset,
NfaCallback callback, void *context);
-char nfaExecMcClellan16_Q(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end);
-char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report);
-char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q);
-char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
- struct mq *q);
+char nfaExecMcClellan16_Q(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end);
+char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report);
+char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q);
+char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
+ struct mq *q);
char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q);
-char nfaExecMcClellan16_queueInitState(const struct NFA *n, struct mq *q);
-char nfaExecMcClellan16_initCompressedState(const struct NFA *n, u64a offset,
- void *state, u8 key);
-char nfaExecMcClellan16_queueCompressState(const struct NFA *nfa,
- const struct mq *q, s64a loc);
-char nfaExecMcClellan16_expandState(const struct NFA *nfa, void *dest,
- const void *src, u64a offset, u8 key);
-
-#define nfaExecMcClellan16_B_Reverse NFA_API_NO_IMPL
-#define nfaExecMcClellan16_zombie_status NFA_API_ZOMBIE_NO_IMPL
-
-/**
- * Simple streaming mode calls:
- * - always uses the anchored start state regardless if top is set regardless of
- * start_off
- * - never checks eod
- */
-void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
- const u8 *buf, char top, size_t start_off,
- size_t len, NfaCallback cb, void *ctxt);
-
-void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
- const u8 *buf, char top, size_t start_off,
- size_t len, NfaCallback cb, void *ctxt);
-
-/**
- * Simple block mode calls:
- * - always uses the anchored start state regardless of initial start
- */
-
-char nfaExecMcClellan8_B(const struct NFA *n, u64a offset, const u8 *buffer,
- size_t length, NfaCallback cb, void *context);
-
-char nfaExecMcClellan16_B(const struct NFA *n, u64a offset, const u8 *buffer,
- size_t length, NfaCallback cb, void *context);
-
-#endif
+char nfaExecMcClellan16_queueInitState(const struct NFA *n, struct mq *q);
+char nfaExecMcClellan16_initCompressedState(const struct NFA *n, u64a offset,
+ void *state, u8 key);
+char nfaExecMcClellan16_queueCompressState(const struct NFA *nfa,
+ const struct mq *q, s64a loc);
+char nfaExecMcClellan16_expandState(const struct NFA *nfa, void *dest,
+ const void *src, u64a offset, u8 key);
+
+#define nfaExecMcClellan16_B_Reverse NFA_API_NO_IMPL
+#define nfaExecMcClellan16_zombie_status NFA_API_ZOMBIE_NO_IMPL
+
+/**
+ * Simple streaming mode calls:
+ * - always uses the anchored start state regardless if top is set regardless of
+ * start_off
+ * - never checks eod
+ */
+void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
+ const u8 *buf, char top, size_t start_off,
+ size_t len, NfaCallback cb, void *ctxt);
+
+void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
+ const u8 *buf, char top, size_t start_off,
+ size_t len, NfaCallback cb, void *ctxt);
+
+/**
+ * Simple block mode calls:
+ * - always uses the anchored start state regardless of initial start
+ */
+
+char nfaExecMcClellan8_B(const struct NFA *n, u64a offset, const u8 *buffer,
+ size_t length, NfaCallback cb, void *context);
+
+char nfaExecMcClellan16_B(const struct NFA *n, u64a offset, const u8 *buffer,
+ size_t length, NfaCallback cb, void *context);
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h b/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h
index 74b27e7534..7b0e7f48cd 100644
--- a/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h
+++ b/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h
@@ -1,87 +1,87 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-enum MatchMode {
- CALLBACK_OUTPUT,
- STOP_AT_MATCH,
- NO_MATCHES
-};
-
-static really_inline
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+enum MatchMode {
+ CALLBACK_OUTPUT,
+ STOP_AT_MATCH,
+ NO_MATCHES
+};
+
+static really_inline
const struct mstate_aux *get_aux(const struct mcclellan *m, u32 s) {
- const char *nfa = (const char *)m - sizeof(struct NFA);
- const struct mstate_aux *aux
- = s + (const struct mstate_aux *)(nfa + m->aux_offset);
-
- assert(ISALIGNED(aux));
- return aux;
-}
-
-static really_inline
+ const char *nfa = (const char *)m - sizeof(struct NFA);
+ const struct mstate_aux *aux
+ = s + (const struct mstate_aux *)(nfa + m->aux_offset);
+
+ assert(ISALIGNED(aux));
+ return aux;
+}
+
+static really_inline
u32 mcclellanEnableStarts(const struct mcclellan *m, u32 s) {
- const struct mstate_aux *aux = get_aux(m, s);
-
+ const struct mstate_aux *aux = get_aux(m, s);
+
DEBUG_PRINTF("enabling starts %u->%hu\n", s, aux->top);
- return aux->top;
-}
-
-static really_inline
+ return aux->top;
+}
+
+static really_inline
u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table,
- u32 as) {
- assert(ISALIGNED_N(sherman_state, 16));
-
- u8 len = *(const u8 *)(sherman_state + SHERMAN_LEN_OFFSET);
-
- if (len) {
- m128 ss_char = load128(sherman_state);
- m128 cur_char = set16x8(cprime);
-
- u32 z = movemask128(eq128(ss_char, cur_char));
-
- /* remove header cruft: type 1, len 1, daddy 2*/
- z &= ~0xf;
- z &= (1U << (len + 4)) - 1;
-
- if (z) {
- u32 i = ctz32(z & ~0xf) - 4;
-
+ u32 as) {
+ assert(ISALIGNED_N(sherman_state, 16));
+
+ u8 len = *(const u8 *)(sherman_state + SHERMAN_LEN_OFFSET);
+
+ if (len) {
+ m128 ss_char = load128(sherman_state);
+ m128 cur_char = set16x8(cprime);
+
+ u32 z = movemask128(eq128(ss_char, cur_char));
+
+ /* remove header cruft: type 1, len 1, daddy 2*/
+ z &= ~0xf;
+ z &= (1U << (len + 4)) - 1;
+
+ if (z) {
+ u32 i = ctz32(z & ~0xf) - 4;
+
u32 s_out = unaligned_load_u16((const u8 *)sherman_state
- + SHERMAN_STATES_OFFSET(len)
- + sizeof(u16) * i);
+ + SHERMAN_STATES_OFFSET(len)
+ + sizeof(u16) * i);
DEBUG_PRINTF("found sherman match at %u/%u for c'=%hhu s=%u\n", i,
len, cprime, s_out);
- return s_out;
- }
- }
-
+ return s_out;
+ }
+ }
+
u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET);
return succ_table[(daddy << as) + cprime];
-}
+}
static really_inline
u16 doWide16(const char *wide_entry, const u8 **c_inout, const u8 *end,
diff --git a/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h b/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h
index 3c237f5153..482fdb1bc9 100644
--- a/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h
+++ b/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h
@@ -1,55 +1,55 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MCCLELLAN_INTERNAL_H
-#define MCCLELLAN_INTERNAL_H
-
-#include "nfa_internal.h"
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-#define ACCEPT_FLAG 0x8000
-#define ACCEL_FLAG 0x4000
-#define STATE_MASK 0x3fff
-
-#define SHERMAN_STATE 1
-
-#define SHERMAN_TYPE_OFFSET 0
-#define SHERMAN_FIXED_SIZE 32
-
-#define SHERMAN_LEN_OFFSET 1
-#define SHERMAN_DADDY_OFFSET 2
-#define SHERMAN_CHARS_OFFSET 4
-#define SHERMAN_STATES_OFFSET(sso_len) (4 + (sso_len))
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MCCLELLAN_INTERNAL_H
+#define MCCLELLAN_INTERNAL_H
+
+#include "nfa_internal.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#define ACCEPT_FLAG 0x8000
+#define ACCEL_FLAG 0x4000
+#define STATE_MASK 0x3fff
+
+#define SHERMAN_STATE 1
+
+#define SHERMAN_TYPE_OFFSET 0
+#define SHERMAN_FIXED_SIZE 32
+
+#define SHERMAN_LEN_OFFSET 1
+#define SHERMAN_DADDY_OFFSET 2
+#define SHERMAN_CHARS_OFFSET 4
+#define SHERMAN_STATES_OFFSET(sso_len) (4 + (sso_len))
+
#define WIDE_STATE 2
#define WIDE_ENTRY_OFFSET8(weo_pos) (2 + (weo_pos))
#define WIDE_ENTRY_OFFSET16(weo_pos) (4 + (weo_pos))
@@ -60,66 +60,66 @@ extern "C"
#define WIDE_SYMBOL_OFFSET16 2
#define WIDE_TRANSITION_OFFSET16(wto_width) (2 + ROUNDUP_N(wto_width, 2))
-struct report_list {
- u32 count;
- ReportID report[];
-};
-
-struct mstate_aux {
- u32 accept;
- u32 accept_eod;
- u16 top;
- u32 accel_offset; /* relative to start of struct mcclellan; 0 if no accel */
-};
-
-#define MCCLELLAN_FLAG_SINGLE 1 /**< we raise only single accept id */
-
-struct mcclellan {
- u16 state_count; /**< total number of states */
- u32 length; /**< length of dfa in bytes */
- u16 start_anchored; /**< anchored start state */
- u16 start_floating; /**< floating start state */
- u32 aux_offset; /**< offset of the aux structures relative to the start of
- * the nfa structure */
+struct report_list {
+ u32 count;
+ ReportID report[];
+};
+
+struct mstate_aux {
+ u32 accept;
+ u32 accept_eod;
+ u16 top;
+ u32 accel_offset; /* relative to start of struct mcclellan; 0 if no accel */
+};
+
+#define MCCLELLAN_FLAG_SINGLE 1 /**< we raise only single accept id */
+
+struct mcclellan {
+ u16 state_count; /**< total number of states */
+ u32 length; /**< length of dfa in bytes */
+ u16 start_anchored; /**< anchored start state */
+ u16 start_floating; /**< floating start state */
+ u32 aux_offset; /**< offset of the aux structures relative to the start of
+ * the nfa structure */
u32 sherman_offset; /**< offset of array of sherman state offsets the
* state_info structures relative to the start of the
* nfa structure */
u32 sherman_end; /**< offset of the end of the state_info structures
* relative to the start of the nfa structure */
- u16 accel_limit_8; /**< 8 bit, lowest accelerable state */
- u16 accept_limit_8; /**< 8 bit, lowest accept state */
- u16 sherman_limit; /**< lowest sherman state */
+ u16 accel_limit_8; /**< 8 bit, lowest accelerable state */
+ u16 accept_limit_8; /**< 8 bit, lowest accept state */
+ u16 sherman_limit; /**< lowest sherman state */
u16 wide_limit; /**< 8/16 bit, lowest wide head state */
- u8 alphaShift;
- u8 flags;
+ u8 alphaShift;
+ u8 flags;
u8 has_accel; /**< 1 iff there are any accel plans */
u8 has_wide; /**< 1 iff there exists any wide state */
- u8 remap[256]; /**< remaps characters to a smaller alphabet */
- ReportID arb_report; /**< one of the accepts that this dfa may raise */
+ u8 remap[256]; /**< remaps characters to a smaller alphabet */
+ ReportID arb_report; /**< one of the accepts that this dfa may raise */
u32 accel_offset; /**< offset of accel structures from start of McClellan */
- u32 haig_offset; /**< reserved for use by Haig, relative to start of NFA */
+ u32 haig_offset; /**< reserved for use by Haig, relative to start of NFA */
u32 wide_offset; /**< offset of the wide state entries to the start of the
* nfa structure */
-};
-
-static really_inline
-const char *findShermanState(UNUSED const struct mcclellan *m,
+};
+
+static really_inline
+const char *findShermanState(UNUSED const struct mcclellan *m,
const char *sherman_base_offset, u32 sherman_base,
u32 s) {
- const char *rv
- = sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base);
- assert(rv < (const char *)m + m->length - sizeof(struct NFA));
- UNUSED u8 type = *(const u8 *)(rv + SHERMAN_TYPE_OFFSET);
- assert(type == SHERMAN_STATE);
- return rv;
-}
-
-static really_inline
-char *findMutableShermanState(char *sherman_base_offset, u16 sherman_base,
+ const char *rv
+ = sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base);
+ assert(rv < (const char *)m + m->length - sizeof(struct NFA));
+ UNUSED u8 type = *(const u8 *)(rv + SHERMAN_TYPE_OFFSET);
+ assert(type == SHERMAN_STATE);
+ return rv;
+}
+
+static really_inline
+char *findMutableShermanState(char *sherman_base_offset, u16 sherman_base,
u32 s) {
- return sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base);
-}
-
+ return sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base);
+}
+
static really_inline
const char *findWideEntry8(UNUSED const struct mcclellan *m,
const char *wide_base, u32 wide_limit, u32 s) {
@@ -157,8 +157,8 @@ char *findMutableWideEntry16(char *wide_base, u32 wide_limit, u32 s) {
return wide_base + entry_offset;
}
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp b/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp
index 4612e53d3f..27ec1716e9 100644
--- a/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp
+++ b/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp
@@ -1,127 +1,127 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "mcclellancompile.h"
-
-#include "accel.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "mcclellancompile.h"
+
+#include "accel.h"
#include "accelcompile.h"
-#include "grey.h"
-#include "mcclellan_internal.h"
+#include "grey.h"
+#include "mcclellan_internal.h"
#include "mcclellancompile_util.h"
-#include "nfa_internal.h"
-#include "shufticompile.h"
-#include "trufflecompile.h"
-#include "ue2common.h"
-#include "util/alloc.h"
-#include "util/bitutils.h"
-#include "util/charreach.h"
-#include "util/compare.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/make_unique.h"
-#include "util/order_check.h"
+#include "nfa_internal.h"
+#include "shufticompile.h"
+#include "trufflecompile.h"
+#include "ue2common.h"
+#include "util/alloc.h"
+#include "util/bitutils.h"
+#include "util/charreach.h"
+#include "util/compare.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/make_unique.h"
+#include "util/order_check.h"
#include "util/report_manager.h"
#include "util/flat_containers.h"
-#include "util/unaligned.h"
-#include "util/verify_types.h"
-
-#include <algorithm>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <map>
-#include <memory>
+#include "util/unaligned.h"
+#include "util/verify_types.h"
+
+#include <algorithm>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <map>
+#include <memory>
#include <queue>
-#include <set>
-#include <vector>
-
+#include <set>
+#include <vector>
+
#include <boost/range/adaptor/map.hpp>
#include "mcclellandump.h"
#include "util/dump_util.h"
#include "util/dump_charclass.h"
-using namespace std;
+using namespace std;
using boost::adaptors::map_keys;
using boost::dynamic_bitset;
-
+
#define ACCEL_DFA_MAX_OFFSET_DEPTH 4
-
+
/** Maximum tolerated number of escape character from an accel state.
* This is larger than nfa, as we don't have a budget and the nfa cheats on stop
* characters for sets of states */
#define ACCEL_DFA_MAX_STOP_CHAR 160
-
+
/** Maximum tolerated number of escape character from a sds accel state. Larger
* than normal states as accelerating sds is important. Matches NFA value */
#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192
-
+
namespace ue2 {
-namespace /* anon */ {
-
-struct dstate_extra {
+namespace /* anon */ {
+
+struct dstate_extra {
u16 daddytaken = 0;
bool shermanState = false;
bool wideState = false;
bool wideHead = false;
-};
-
-struct dfa_info {
+};
+
+struct dfa_info {
accel_dfa_build_strat &strat;
- raw_dfa &raw;
- vector<dstate> &states;
- vector<dstate_extra> extra;
+ raw_dfa &raw;
+ vector<dstate> &states;
+ vector<dstate_extra> extra;
vector<vector<dstate_id_t>> wide_state_chain;
vector<vector<symbol_t>> wide_symbol_chain;
- const u16 alpha_size; /* including special symbols */
- const array<u16, ALPHABET_SIZE> &alpha_remap;
- const u16 impl_alpha_size;
-
- u8 getAlphaShift() const;
-
+ const u16 alpha_size; /* including special symbols */
+ const array<u16, ALPHABET_SIZE> &alpha_remap;
+ const u16 impl_alpha_size;
+
+ u8 getAlphaShift() const;
+
explicit dfa_info(accel_dfa_build_strat &s)
- : strat(s),
- raw(s.get_raw()),
- states(raw.states),
- extra(raw.states.size()),
- alpha_size(raw.alpha_size),
- alpha_remap(raw.alpha_remap),
- impl_alpha_size(raw.getImplAlphaSize()) {}
-
- dstate_id_t implId(dstate_id_t raw_id) const {
- return states[raw_id].impl_id;
- }
-
- bool is_sherman(dstate_id_t raw_id) const {
- return extra[raw_id].shermanState;
- }
-
+ : strat(s),
+ raw(s.get_raw()),
+ states(raw.states),
+ extra(raw.states.size()),
+ alpha_size(raw.alpha_size),
+ alpha_remap(raw.alpha_remap),
+ impl_alpha_size(raw.getImplAlphaSize()) {}
+
+ dstate_id_t implId(dstate_id_t raw_id) const {
+ return states[raw_id].impl_id;
+ }
+
+ bool is_sherman(dstate_id_t raw_id) const {
+ return extra[raw_id].shermanState;
+ }
+
bool is_widestate(dstate_id_t raw_id) const {
return extra[raw_id].wideState;
}
@@ -130,18 +130,18 @@ struct dfa_info {
return extra[raw_id].wideHead;
}
- size_t size(void) const { return states.size(); }
-};
-
-u8 dfa_info::getAlphaShift() const {
- if (impl_alpha_size < 2) {
- return 1;
- } else {
- /* log2 round up */
- return 32 - clz32(impl_alpha_size - 1);
- }
-}
-
+ size_t size(void) const { return states.size(); }
+};
+
+u8 dfa_info::getAlphaShift() const {
+ if (impl_alpha_size < 2) {
+ return 1;
+ } else {
+ /* log2 round up */
+ return 32 - clz32(impl_alpha_size - 1);
+ }
+}
+
struct state_prev_info {
vector<vector<dstate_id_t>> prev_vec;
explicit state_prev_info(size_t alpha_size) : prev_vec(alpha_size) {}
@@ -171,80 +171,80 @@ DfaPrevInfo::DfaPrevInfo(raw_dfa &rdfa)
}
}
}
-} // namespace
-
-static
-mstate_aux *getAux(NFA *n, dstate_id_t i) {
- assert(isMcClellanType(n->type));
-
- mcclellan *m = (mcclellan *)getMutableImplNfa(n);
- mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
-
- mstate_aux *aux = aux_base + i;
- assert((const char *)aux < (const char *)n + m->length);
- return aux;
-}
-
-static
-void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
- assert((size_t)succ_table % 2 == 0);
- assert(n->type == MCCLELLAN_NFA_16);
- u8 alphaShift = info.getAlphaShift();
- u16 alphaSize = info.impl_alpha_size;
- mcclellan *m = (mcclellan *)getMutableImplNfa(n);
-
- /* handle the normal states */
- for (u32 i = 0; i < m->sherman_limit; i++) {
- for (size_t j = 0; j < alphaSize; j++) {
- size_t c_prime = (i << alphaShift) + j;
-
+} // namespace
+
+static
+mstate_aux *getAux(NFA *n, dstate_id_t i) {
+ assert(isMcClellanType(n->type));
+
+ mcclellan *m = (mcclellan *)getMutableImplNfa(n);
+ mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
+
+ mstate_aux *aux = aux_base + i;
+ assert((const char *)aux < (const char *)n + m->length);
+ return aux;
+}
+
+static
+void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
+ assert((size_t)succ_table % 2 == 0);
+ assert(n->type == MCCLELLAN_NFA_16);
+ u8 alphaShift = info.getAlphaShift();
+ u16 alphaSize = info.impl_alpha_size;
+ mcclellan *m = (mcclellan *)getMutableImplNfa(n);
+
+ /* handle the normal states */
+ for (u32 i = 0; i < m->sherman_limit; i++) {
+ for (size_t j = 0; j < alphaSize; j++) {
+ size_t c_prime = (i << alphaShift) + j;
+
// wide state has no aux structure.
if (m->has_wide && succ_table[c_prime] >= m->wide_limit) {
continue;
}
- mstate_aux *aux = getAux(n, succ_table[c_prime]);
-
- if (aux->accept) {
- succ_table[c_prime] |= ACCEPT_FLAG;
- }
-
- if (aux->accel_offset) {
- succ_table[c_prime] |= ACCEL_FLAG;
- }
- }
- }
-
- /* handle the sherman states */
- char *sherman_base_offset = (char *)n + m->sherman_offset;
+ mstate_aux *aux = getAux(n, succ_table[c_prime]);
+
+ if (aux->accept) {
+ succ_table[c_prime] |= ACCEPT_FLAG;
+ }
+
+ if (aux->accel_offset) {
+ succ_table[c_prime] |= ACCEL_FLAG;
+ }
+ }
+ }
+
+ /* handle the sherman states */
+ char *sherman_base_offset = (char *)n + m->sherman_offset;
u16 sherman_ceil = m->has_wide == 1 ? m->wide_limit : m->state_count;
for (u16 j = m->sherman_limit; j < sherman_ceil; j++) {
- char *sherman_cur
- = findMutableShermanState(sherman_base_offset, m->sherman_limit, j);
- assert(*(sherman_cur + SHERMAN_TYPE_OFFSET) == SHERMAN_STATE);
- u8 len = *(u8 *)(sherman_cur + SHERMAN_LEN_OFFSET);
- u16 *succs = (u16 *)(sherman_cur + SHERMAN_STATES_OFFSET(len));
-
- for (u8 i = 0; i < len; i++) {
- u16 succ_i = unaligned_load_u16((u8 *)&succs[i]);
+ char *sherman_cur
+ = findMutableShermanState(sherman_base_offset, m->sherman_limit, j);
+ assert(*(sherman_cur + SHERMAN_TYPE_OFFSET) == SHERMAN_STATE);
+ u8 len = *(u8 *)(sherman_cur + SHERMAN_LEN_OFFSET);
+ u16 *succs = (u16 *)(sherman_cur + SHERMAN_STATES_OFFSET(len));
+
+ for (u8 i = 0; i < len; i++) {
+ u16 succ_i = unaligned_load_u16((u8 *)&succs[i]);
// wide state has no aux structure.
if (m->has_wide && succ_i >= m->wide_limit) {
continue;
}
- mstate_aux *aux = getAux(n, succ_i);
-
- if (aux->accept) {
- succ_i |= ACCEPT_FLAG;
- }
-
- if (aux->accel_offset) {
- succ_i |= ACCEL_FLAG;
- }
-
- unaligned_store_u16((u8 *)&succs[i], succ_i);
- }
- }
+ mstate_aux *aux = getAux(n, succ_i);
+
+ if (aux->accept) {
+ succ_i |= ACCEPT_FLAG;
+ }
+
+ if (aux->accel_offset) {
+ succ_i |= ACCEL_FLAG;
+ }
+
+ unaligned_store_u16((u8 *)&succs[i], succ_i);
+ }
+ }
/* handle the wide states */
if (m->has_wide) {
@@ -290,53 +290,53 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
}
}
}
-}
-
+}
+
u32 mcclellan_build_strat::max_allowed_offset_accel() const {
return ACCEL_DFA_MAX_OFFSET_DEPTH;
-}
-
+}
+
u32 mcclellan_build_strat::max_stop_char() const {
return ACCEL_DFA_MAX_STOP_CHAR;
-}
-
+}
+
u32 mcclellan_build_strat::max_floating_stop_char() const {
return ACCEL_DFA_MAX_FLOATING_STOP_CHAR;
-}
-
-static
-void populateBasicInfo(size_t state_size, const dfa_info &info,
- u32 total_size, u32 aux_offset, u32 accel_offset,
- u32 accel_count, ReportID arb, bool single, NFA *nfa) {
- assert(state_size == sizeof(u16) || state_size == sizeof(u8));
-
- nfa->length = total_size;
- nfa->nPositions = info.states.size();
-
- nfa->scratchStateSize = verify_u32(state_size);
- nfa->streamStateSize = verify_u32(state_size);
-
- if (state_size == sizeof(u8)) {
- nfa->type = MCCLELLAN_NFA_8;
- } else {
- nfa->type = MCCLELLAN_NFA_16;
- }
-
- mcclellan *m = (mcclellan *)getMutableImplNfa(nfa);
- for (u32 i = 0; i < 256; i++) {
- m->remap[i] = verify_u8(info.alpha_remap[i]);
- }
- m->alphaShift = info.getAlphaShift();
- m->length = total_size;
- m->aux_offset = aux_offset;
- m->accel_offset = accel_offset;
- m->arb_report = arb;
- m->state_count = verify_u16(info.size());
- m->start_anchored = info.implId(info.raw.start_anchored);
- m->start_floating = info.implId(info.raw.start_floating);
- m->has_accel = accel_count ? 1 : 0;
+}
+
+static
+void populateBasicInfo(size_t state_size, const dfa_info &info,
+ u32 total_size, u32 aux_offset, u32 accel_offset,
+ u32 accel_count, ReportID arb, bool single, NFA *nfa) {
+ assert(state_size == sizeof(u16) || state_size == sizeof(u8));
+
+ nfa->length = total_size;
+ nfa->nPositions = info.states.size();
+
+ nfa->scratchStateSize = verify_u32(state_size);
+ nfa->streamStateSize = verify_u32(state_size);
+
+ if (state_size == sizeof(u8)) {
+ nfa->type = MCCLELLAN_NFA_8;
+ } else {
+ nfa->type = MCCLELLAN_NFA_16;
+ }
+
+ mcclellan *m = (mcclellan *)getMutableImplNfa(nfa);
+ for (u32 i = 0; i < 256; i++) {
+ m->remap[i] = verify_u8(info.alpha_remap[i]);
+ }
+ m->alphaShift = info.getAlphaShift();
+ m->length = total_size;
+ m->aux_offset = aux_offset;
+ m->accel_offset = accel_offset;
+ m->arb_report = arb;
+ m->state_count = verify_u16(info.size());
+ m->start_anchored = info.implId(info.raw.start_anchored);
+ m->start_floating = info.implId(info.raw.start_floating);
+ m->has_accel = accel_count ? 1 : 0;
m->has_wide = info.wide_state_chain.size() > 0 ? 1 : 0;
-
+
if (state_size == sizeof(u8) && m->has_wide == 1) {
// allocate 1 more byte for wide state use.
nfa->scratchStateSize += sizeof(u8);
@@ -349,16 +349,16 @@ void populateBasicInfo(size_t state_size, const dfa_info &info,
nfa->streamStateSize += sizeof(u16);
}
- if (single) {
- m->flags |= MCCLELLAN_FLAG_SINGLE;
- }
-}
-
-namespace {
-
-struct raw_report_list {
- flat_set<ReportID> reports;
-
+ if (single) {
+ m->flags |= MCCLELLAN_FLAG_SINGLE;
+ }
+}
+
+namespace {
+
+struct raw_report_list {
+ flat_set<ReportID> reports;
+
raw_report_list(const flat_set<ReportID> &reports_in,
const ReportManager &rm, bool do_remap) {
if (do_remap) {
@@ -369,137 +369,137 @@ struct raw_report_list {
reports = reports_in;
}
}
-
- bool operator<(const raw_report_list &b) const {
- return reports < b.reports;
- }
-};
-
-struct raw_report_info_impl : public raw_report_info {
- vector<raw_report_list> rl;
- u32 getReportListSize() const override;
- size_t size() const override;
- void fillReportLists(NFA *n, size_t base_offset,
- std::vector<u32> &ro /* out */) const override;
-};
-}
-
-unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports(
- vector<u32> &reports,
- vector<u32> &reports_eod,
- u8 *isSingleReport,
- ReportID *arbReport) const {
- DEBUG_PRINTF("gathering reports\n");
-
+
+ bool operator<(const raw_report_list &b) const {
+ return reports < b.reports;
+ }
+};
+
+struct raw_report_info_impl : public raw_report_info {
+ vector<raw_report_list> rl;
+ u32 getReportListSize() const override;
+ size_t size() const override;
+ void fillReportLists(NFA *n, size_t base_offset,
+ std::vector<u32> &ro /* out */) const override;
+};
+}
+
+unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports(
+ vector<u32> &reports,
+ vector<u32> &reports_eod,
+ u8 *isSingleReport,
+ ReportID *arbReport) const {
+ DEBUG_PRINTF("gathering reports\n");
+
const bool remap_reports = has_managed_reports(rdfa.kind);
- auto ri = ue2::make_unique<raw_report_info_impl>();
- map<raw_report_list, u32> rev;
-
- for (const dstate &s : rdfa.states) {
- if (s.reports.empty()) {
- reports.push_back(MO_INVALID_IDX);
- continue;
- }
-
+ auto ri = ue2::make_unique<raw_report_info_impl>();
+ map<raw_report_list, u32> rev;
+
+ for (const dstate &s : rdfa.states) {
+ if (s.reports.empty()) {
+ reports.push_back(MO_INVALID_IDX);
+ continue;
+ }
+
raw_report_list rrl(s.reports, rm, remap_reports);
- DEBUG_PRINTF("non empty r\n");
+ DEBUG_PRINTF("non empty r\n");
auto it = rev.find(rrl);
if (it != rev.end()) {
reports.push_back(it->second);
- } else {
- DEBUG_PRINTF("adding to rl %zu\n", ri->size());
+ } else {
+ DEBUG_PRINTF("adding to rl %zu\n", ri->size());
rev.emplace(rrl, ri->size());
- reports.push_back(ri->size());
- ri->rl.push_back(rrl);
- }
- }
-
- for (const dstate &s : rdfa.states) {
- if (s.reports_eod.empty()) {
- reports_eod.push_back(MO_INVALID_IDX);
- continue;
- }
-
- DEBUG_PRINTF("non empty r eod\n");
+ reports.push_back(ri->size());
+ ri->rl.push_back(rrl);
+ }
+ }
+
+ for (const dstate &s : rdfa.states) {
+ if (s.reports_eod.empty()) {
+ reports_eod.push_back(MO_INVALID_IDX);
+ continue;
+ }
+
+ DEBUG_PRINTF("non empty r eod\n");
raw_report_list rrl(s.reports_eod, rm, remap_reports);
auto it = rev.find(rrl);
if (it != rev.end()) {
reports_eod.push_back(it->second);
- continue;
- }
-
- DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size());
+ continue;
+ }
+
+ DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size());
rev.emplace(rrl, ri->size());
- reports_eod.push_back(ri->size());
- ri->rl.push_back(rrl);
- }
-
- assert(!ri->rl.empty()); /* all components should be able to generate
- reports */
- if (!ri->rl.empty()) {
- *arbReport = *ri->rl.begin()->reports.begin();
- } else {
- *arbReport = 0;
- }
-
- /* if we have only a single report id generated from all accepts (not eod)
- * we can take some short cuts */
+ reports_eod.push_back(ri->size());
+ ri->rl.push_back(rrl);
+ }
+
+ assert(!ri->rl.empty()); /* all components should be able to generate
+ reports */
+ if (!ri->rl.empty()) {
+ *arbReport = *ri->rl.begin()->reports.begin();
+ } else {
+ *arbReport = 0;
+ }
+
+ /* if we have only a single report id generated from all accepts (not eod)
+ * we can take some short cuts */
flat_set<ReportID> reps;
-
- for (u32 rl_index : reports) {
- if (rl_index == MO_INVALID_IDX) {
- continue;
- }
- assert(rl_index < ri->size());
- insert(&reps, ri->rl[rl_index].reports);
- }
-
- if (reps.size() == 1) {
- *isSingleReport = 1;
- *arbReport = *reps.begin();
- DEBUG_PRINTF("single -- %u\n", *arbReport);
- } else {
- *isSingleReport = 0;
- }
-
- return move(ri);
-}
-
-u32 raw_report_info_impl::getReportListSize() const {
- u32 rv = 0;
-
- for (const auto &reps : rl) {
- rv += sizeof(report_list);
- rv += sizeof(ReportID) * reps.reports.size();
- }
-
- return rv;
-}
-
-size_t raw_report_info_impl::size() const {
- return rl.size();
-}
-
-void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset,
- vector<u32> &ro) const {
- for (const auto &reps : rl) {
- ro.push_back(base_offset);
-
- report_list *p = (report_list *)((char *)n + base_offset);
-
- u32 i = 0;
- for (const ReportID report : reps.reports) {
- p->report[i++] = report;
- }
- p->count = verify_u32(reps.reports.size());
-
- base_offset += sizeof(report_list);
- base_offset += sizeof(ReportID) * reps.reports.size();
- }
-}
-
-static
+
+ for (u32 rl_index : reports) {
+ if (rl_index == MO_INVALID_IDX) {
+ continue;
+ }
+ assert(rl_index < ri->size());
+ insert(&reps, ri->rl[rl_index].reports);
+ }
+
+ if (reps.size() == 1) {
+ *isSingleReport = 1;
+ *arbReport = *reps.begin();
+ DEBUG_PRINTF("single -- %u\n", *arbReport);
+ } else {
+ *isSingleReport = 0;
+ }
+
+ return move(ri);
+}
+
+u32 raw_report_info_impl::getReportListSize() const {
+ u32 rv = 0;
+
+ for (const auto &reps : rl) {
+ rv += sizeof(report_list);
+ rv += sizeof(ReportID) * reps.reports.size();
+ }
+
+ return rv;
+}
+
+size_t raw_report_info_impl::size() const {
+ return rl.size();
+}
+
+void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset,
+ vector<u32> &ro) const {
+ for (const auto &reps : rl) {
+ ro.push_back(base_offset);
+
+ report_list *p = (report_list *)((char *)n + base_offset);
+
+ u32 i = 0;
+ for (const ReportID report : reps.reports) {
+ p->report[i++] = report;
+ }
+ p->count = verify_u32(reps.reports.size());
+
+ base_offset += sizeof(report_list);
+ base_offset += sizeof(ReportID) * reps.reports.size();
+ }
+}
+
+static
void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info,
set<dstate_id_t> *accel_states) {
for (dstate_id_t i : accel_escape_info | map_keys) {
@@ -508,19 +508,19 @@ void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info,
}
static
-size_t calcShermanRegionSize(const dfa_info &info) {
- size_t rv = 0;
-
- for (size_t i = 0; i < info.size(); i++) {
- if (info.is_sherman(i)) {
- rv += SHERMAN_FIXED_SIZE;
- }
- }
-
- return ROUNDUP_16(rv);
-}
-
-static
+size_t calcShermanRegionSize(const dfa_info &info) {
+ size_t rv = 0;
+
+ for (size_t i = 0; i < info.size(); i++) {
+ if (info.is_sherman(i)) {
+ rv += SHERMAN_FIXED_SIZE;
+ }
+ }
+
+ return ROUNDUP_16(rv);
+}
+
+static
size_t calcWideRegionSize(const dfa_info &info) {
if (info.wide_state_chain.empty()) {
return 0;
@@ -539,58 +539,58 @@ size_t calcWideRegionSize(const dfa_info &info) {
}
static
-void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info,
- const vector<u32> &reports, const vector<u32> &reports_eod,
- vector<u32> &reportOffsets) {
- const dstate &raw_state = info.states[i];
- aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]];
- aux->accept_eod = raw_state.reports_eod.empty() ? 0
- : reportOffsets[reports_eod[i]];
- aux->top = info.implId(i ? raw_state.next[info.alpha_remap[TOP]]
- : info.raw.start_floating);
-}
-
+void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info,
+ const vector<u32> &reports, const vector<u32> &reports_eod,
+ vector<u32> &reportOffsets) {
+ const dstate &raw_state = info.states[i];
+ aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]];
+ aux->accept_eod = raw_state.reports_eod.empty() ? 0
+ : reportOffsets[reports_eod[i]];
+ aux->top = info.implId(i ? raw_state.next[info.alpha_remap[TOP]]
+ : info.raw.start_floating);
+}
+
/* returns false on error */
-static
+static
bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base,
dstate_id_t *wide_limit) {
- info.states[0].impl_id = 0; /* dead is always 0 */
-
- vector<dstate_id_t> norm;
- vector<dstate_id_t> sherm;
+ info.states[0].impl_id = 0; /* dead is always 0 */
+
+ vector<dstate_id_t> norm;
+ vector<dstate_id_t> sherm;
vector<dstate_id_t> wideHead;
vector<dstate_id_t> wideState;
-
- if (info.size() > (1 << 16)) {
- DEBUG_PRINTF("too many states\n");
+
+ if (info.size() > (1 << 16)) {
+ DEBUG_PRINTF("too many states\n");
*wide_limit = 0;
return false;
- }
-
- for (u32 i = 1; i < info.size(); i++) {
+ }
+
+ for (u32 i = 1; i < info.size(); i++) {
if (info.is_widehead(i)) {
wideHead.push_back(i);
} else if (info.is_widestate(i)) {
wideState.push_back(i);
} else if (info.is_sherman(i)) {
- sherm.push_back(i);
- } else {
- norm.push_back(i);
- }
- }
-
+ sherm.push_back(i);
+ } else {
+ norm.push_back(i);
+ }
+ }
+
dstate_id_t next = 1;
- for (const dstate_id_t &s : norm) {
+ for (const dstate_id_t &s : norm) {
DEBUG_PRINTF("[norm] mapping state %u to %u\n", s, next);
info.states[s].impl_id = next++;
- }
-
+ }
+
*sherman_base = next;
- for (const dstate_id_t &s : sherm) {
+ for (const dstate_id_t &s : sherm) {
DEBUG_PRINTF("[sherm] mapping state %u to %u\n", s, next);
info.states[s].impl_id = next++;
- }
-
+ }
+
*wide_limit = next;
for (const dstate_id_t &s : wideHead) {
DEBUG_PRINTF("[widehead] mapping state %u to %u\n", s, next);
@@ -602,58 +602,58 @@ bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base,
info.states[s].impl_id = next++;
}
- /* Check to see if we haven't over allocated our states */
+ /* Check to see if we haven't over allocated our states */
DEBUG_PRINTF("next sherman %u masked %u\n", next,
(dstate_id_t)(next & STATE_MASK));
return (next - 1) == ((next - 1) & STATE_MASK);
-}
-
-static
+}
+
+static
bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
set<dstate_id_t> *accel_states) {
- DEBUG_PRINTF("building mcclellan 16\n");
-
- vector<u32> reports; /* index in ri for the appropriate report list */
- vector<u32> reports_eod; /* as above */
- ReportID arb;
- u8 single;
-
- u8 alphaShift = info.getAlphaShift();
- assert(alphaShift <= 8);
-
- u16 count_real_states;
+ DEBUG_PRINTF("building mcclellan 16\n");
+
+ vector<u32> reports; /* index in ri for the appropriate report list */
+ vector<u32> reports_eod; /* as above */
+ ReportID arb;
+ u8 single;
+
+ u8 alphaShift = info.getAlphaShift();
+ assert(alphaShift <= 8);
+
+ u16 count_real_states;
u16 wide_limit;
if (!allocateFSN16(info, &count_real_states, &wide_limit)) {
- DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
- info.size());
- return nullptr;
- }
-
+ DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
+ info.size());
+ return nullptr;
+ }
+
DEBUG_PRINTF("count_real_states: %d\n", count_real_states);
DEBUG_PRINTF("non_wide_states: %d\n", wide_limit);
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
map<dstate_id_t, AccelScheme> accel_escape_info
= info.strat.getAccelInfo(cc.grey);
-
- size_t tran_size = (1 << info.getAlphaShift())
- * sizeof(u16) * count_real_states;
-
+
+ size_t tran_size = (1 << info.getAlphaShift())
+ * sizeof(u16) * count_real_states;
+
size_t aux_size = sizeof(mstate_aux) * wide_limit;
-
- size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size);
+
+ size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size);
size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
- size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
- + ri->getReportListSize(), 32);
- size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size);
- size_t sherman_size = calcShermanRegionSize(info);
+ size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
+ + ri->getReportListSize(), 32);
+ size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size);
+ size_t sherman_size = calcShermanRegionSize(info);
size_t wide_offset = ROUNDUP_16(sherman_offset + sherman_size);
size_t wide_size = calcWideRegionSize(info);
size_t total_size = wide_offset + wide_size;
-
- accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
- assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
-
+
+ accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
+ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
+
DEBUG_PRINTF("aux_offset %zu\n", aux_offset);
DEBUG_PRINTF("aux_size %zu\n", aux_size);
DEBUG_PRINTF("rl size %u\n", ri->getReportListSize());
@@ -666,111 +666,111 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
DEBUG_PRINTF("total_size %zu\n", total_size);
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
- char *nfa_base = (char *)nfa.get();
-
- populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset,
+ char *nfa_base = (char *)nfa.get();
+
+ populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset,
accel_escape_info.size(), arb, single, nfa.get());
-
- vector<u32> reportOffsets;
-
- ri->fillReportLists(nfa.get(), aux_offset + aux_size, reportOffsets);
-
- u16 *succ_table = (u16 *)(nfa_base + sizeof(NFA) + sizeof(mcclellan));
- mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset);
- mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get());
-
+
+ vector<u32> reportOffsets;
+
+ ri->fillReportLists(nfa.get(), aux_offset + aux_size, reportOffsets);
+
+ u16 *succ_table = (u16 *)(nfa_base + sizeof(NFA) + sizeof(mcclellan));
+ mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset);
+ mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get());
+
m->wide_limit = wide_limit;
m->wide_offset = wide_offset;
- /* copy in the mc header information */
- m->sherman_offset = sherman_offset;
- m->sherman_end = total_size;
- m->sherman_limit = count_real_states;
-
- /* do normal states */
- for (size_t i = 0; i < info.size(); i++) {
+ /* copy in the mc header information */
+ m->sherman_offset = sherman_offset;
+ m->sherman_end = total_size;
+ m->sherman_limit = count_real_states;
+
+ /* do normal states */
+ for (size_t i = 0; i < info.size(); i++) {
if (info.is_sherman(i) || info.is_widestate(i)) {
- continue;
- }
-
- u16 fs = info.implId(i);
- mstate_aux *this_aux = getAux(nfa.get(), fs);
-
- assert(fs < count_real_states);
-
- for (size_t j = 0; j < info.impl_alpha_size; j++) {
- succ_table[(fs << alphaShift) + j] =
- info.implId(info.states[i].next[j]);
- }
-
- fillInAux(&aux[fs], i, info, reports, reports_eod, reportOffsets);
-
+ continue;
+ }
+
+ u16 fs = info.implId(i);
+ mstate_aux *this_aux = getAux(nfa.get(), fs);
+
+ assert(fs < count_real_states);
+
+ for (size_t j = 0; j < info.impl_alpha_size; j++) {
+ succ_table[(fs << alphaShift) + j] =
+ info.implId(info.states[i].next[j]);
+ }
+
+ fillInAux(&aux[fs], i, info, reports, reports_eod, reportOffsets);
+
if (contains(accel_escape_info, i)) {
- this_aux->accel_offset = accel_offset;
- accel_offset += info.strat.accelSize();
- assert(accel_offset + sizeof(NFA) <= sherman_offset);
- assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
+ this_aux->accel_offset = accel_offset;
+ accel_offset += info.strat.accelSize();
+ assert(accel_offset + sizeof(NFA) <= sherman_offset);
+ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
info.strat.buildAccel(i, accel_escape_info.at(i),
- (void *)((char *)m + this_aux->accel_offset));
- }
- }
-
- /* do sherman states */
- char *sherman_table = nfa_base + m->sherman_offset;
- assert(ISALIGNED_16(sherman_table));
- for (size_t i = 0; i < info.size(); i++) {
- if (!info.is_sherman(i)) {
- continue;
- }
-
- u16 fs = verify_u16(info.implId(i));
- mstate_aux *this_aux = getAux(nfa.get(), fs);
-
- assert(fs >= count_real_states);
+ (void *)((char *)m + this_aux->accel_offset));
+ }
+ }
+
+ /* do sherman states */
+ char *sherman_table = nfa_base + m->sherman_offset;
+ assert(ISALIGNED_16(sherman_table));
+ for (size_t i = 0; i < info.size(); i++) {
+ if (!info.is_sherman(i)) {
+ continue;
+ }
+
+ u16 fs = verify_u16(info.implId(i));
+ mstate_aux *this_aux = getAux(nfa.get(), fs);
+
+ assert(fs >= count_real_states);
assert(fs < wide_limit);
-
- char *curr_sherman_entry
- = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE;
- assert(curr_sherman_entry <= nfa_base + m->length);
-
- fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets);
-
+
+ char *curr_sherman_entry
+ = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE;
+ assert(curr_sherman_entry <= nfa_base + m->length);
+
+ fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets);
+
if (contains(accel_escape_info, i)) {
- this_aux->accel_offset = accel_offset;
- accel_offset += info.strat.accelSize();
- assert(accel_offset + sizeof(NFA) <= sherman_offset);
- assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
+ this_aux->accel_offset = accel_offset;
+ accel_offset += info.strat.accelSize();
+ assert(accel_offset + sizeof(NFA) <= sherman_offset);
+ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
info.strat.buildAccel(i, accel_escape_info.at(i),
- (void *)((char *)m + this_aux->accel_offset));
- }
-
- u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken);
- assert(len <= 9);
- dstate_id_t d = info.states[i].daddy;
-
- *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE;
- *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len;
- *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d);
- u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
-
- for (u16 s = 0; s < info.impl_alpha_size; s++) {
- if (info.states[i].next[s] != info.states[d].next[s]) {
- *(chars++) = (u8)s;
- }
- }
-
- u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
- for (u16 s = 0; s < info.impl_alpha_size; s++) {
- if (info.states[i].next[s] != info.states[d].next[s]) {
- DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n",
- fs, info.implId(d),
- info.implId(info.states[i].next[s]));
- unaligned_store_u16((u8 *)states++,
- info.implId(info.states[i].next[s]));
- }
- }
- }
-
+ (void *)((char *)m + this_aux->accel_offset));
+ }
+
+ u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken);
+ assert(len <= 9);
+ dstate_id_t d = info.states[i].daddy;
+
+ *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE;
+ *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len;
+ *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d);
+ u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
+
+ for (u16 s = 0; s < info.impl_alpha_size; s++) {
+ if (info.states[i].next[s] != info.states[d].next[s]) {
+ *(chars++) = (u8)s;
+ }
+ }
+
+ u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
+ for (u16 s = 0; s < info.impl_alpha_size; s++) {
+ if (info.states[i].next[s] != info.states[d].next[s]) {
+ DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n",
+ fs, info.implId(d),
+ info.implId(info.states[i].next[s]));
+ unaligned_store_u16((u8 *)states++,
+ info.implId(info.states[i].next[s]));
+ }
+ }
+ }
+
if (!info.wide_state_chain.empty()) {
/* do wide states using info */
u16 wide_number = verify_u16(info.wide_symbol_chain.size());
@@ -836,185 +836,185 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
}
}
- markEdges(nfa.get(), succ_table, info);
-
+ markEdges(nfa.get(), succ_table, info);
+
if (accel_states && nfa) {
fillAccelOut(accel_escape_info, accel_states);
}
- return nfa;
-}
-
-static
-void fillInBasicState8(const dfa_info &info, mstate_aux *aux, u8 *succ_table,
- const vector<u32> &reportOffsets,
- const vector<u32> &reports,
- const vector<u32> &reports_eod, u32 i) {
- dstate_id_t j = info.implId(i);
- u8 alphaShift = info.getAlphaShift();
- assert(alphaShift <= 8);
-
- for (size_t s = 0; s < info.impl_alpha_size; s++) {
- dstate_id_t raw_succ = info.states[i].next[s];
- succ_table[(j << alphaShift) + s] = info.implId(raw_succ);
- }
-
- aux[j].accept = 0;
- aux[j].accept_eod = 0;
-
- if (!info.states[i].reports.empty()) {
- DEBUG_PRINTF("i=%u r[i]=%u\n", i, reports[i]);
- assert(reports[i] != MO_INVALID_IDX);
- aux[j].accept = reportOffsets[reports[i]];
- }
-
- if (!info.states[i].reports_eod.empty()) {
- DEBUG_PRINTF("i=%u re[i]=%u\n", i, reports_eod[i]);
- aux[j].accept_eod = reportOffsets[reports_eod[i]];
- }
-
- dstate_id_t raw_top = i ? info.states[i].next[info.alpha_remap[TOP]]
- : info.raw.start_floating;
-
- aux[j].top = info.implId(raw_top);
-}
-
-static
+ return nfa;
+}
+
+static
+void fillInBasicState8(const dfa_info &info, mstate_aux *aux, u8 *succ_table,
+ const vector<u32> &reportOffsets,
+ const vector<u32> &reports,
+ const vector<u32> &reports_eod, u32 i) {
+ dstate_id_t j = info.implId(i);
+ u8 alphaShift = info.getAlphaShift();
+ assert(alphaShift <= 8);
+
+ for (size_t s = 0; s < info.impl_alpha_size; s++) {
+ dstate_id_t raw_succ = info.states[i].next[s];
+ succ_table[(j << alphaShift) + s] = info.implId(raw_succ);
+ }
+
+ aux[j].accept = 0;
+ aux[j].accept_eod = 0;
+
+ if (!info.states[i].reports.empty()) {
+ DEBUG_PRINTF("i=%u r[i]=%u\n", i, reports[i]);
+ assert(reports[i] != MO_INVALID_IDX);
+ aux[j].accept = reportOffsets[reports[i]];
+ }
+
+ if (!info.states[i].reports_eod.empty()) {
+ DEBUG_PRINTF("i=%u re[i]=%u\n", i, reports_eod[i]);
+ aux[j].accept_eod = reportOffsets[reports_eod[i]];
+ }
+
+ dstate_id_t raw_top = i ? info.states[i].next[info.alpha_remap[TOP]]
+ : info.raw.start_floating;
+
+ aux[j].top = info.implId(raw_top);
+}
+
+static
void allocateFSN8(dfa_info &info,
const map<dstate_id_t, AccelScheme> &accel_escape_info,
u16 *accel_limit, u16 *accept_limit) {
- info.states[0].impl_id = 0; /* dead is always 0 */
-
- vector<dstate_id_t> norm;
- vector<dstate_id_t> accel;
- vector<dstate_id_t> accept;
-
- assert(info.size() <= (1 << 8));
-
- for (u32 i = 1; i < info.size(); i++) {
- if (!info.states[i].reports.empty()) {
- accept.push_back(i);
+ info.states[0].impl_id = 0; /* dead is always 0 */
+
+ vector<dstate_id_t> norm;
+ vector<dstate_id_t> accel;
+ vector<dstate_id_t> accept;
+
+ assert(info.size() <= (1 << 8));
+
+ for (u32 i = 1; i < info.size(); i++) {
+ if (!info.states[i].reports.empty()) {
+ accept.push_back(i);
} else if (contains(accel_escape_info, i)) {
- accel.push_back(i);
- } else {
- norm.push_back(i);
- }
- }
-
- u32 j = 1; /* dead is already at 0 */
- for (const dstate_id_t &s : norm) {
- assert(j <= 256);
- DEBUG_PRINTF("mapping state %u to %u\n", s, j);
- info.states[s].impl_id = j++;
- }
- *accel_limit = j;
- for (const dstate_id_t &s : accel) {
- assert(j <= 256);
- DEBUG_PRINTF("mapping state %u to %u\n", s, j);
- info.states[s].impl_id = j++;
- }
- *accept_limit = j;
- for (const dstate_id_t &s : accept) {
- assert(j <= 256);
- DEBUG_PRINTF("mapping state %u to %u\n", s, j);
- info.states[s].impl_id = j++;
- }
-}
-
-static
+ accel.push_back(i);
+ } else {
+ norm.push_back(i);
+ }
+ }
+
+ u32 j = 1; /* dead is already at 0 */
+ for (const dstate_id_t &s : norm) {
+ assert(j <= 256);
+ DEBUG_PRINTF("mapping state %u to %u\n", s, j);
+ info.states[s].impl_id = j++;
+ }
+ *accel_limit = j;
+ for (const dstate_id_t &s : accel) {
+ assert(j <= 256);
+ DEBUG_PRINTF("mapping state %u to %u\n", s, j);
+ info.states[s].impl_id = j++;
+ }
+ *accept_limit = j;
+ for (const dstate_id_t &s : accept) {
+ assert(j <= 256);
+ DEBUG_PRINTF("mapping state %u to %u\n", s, j);
+ info.states[s].impl_id = j++;
+ }
+}
+
+static
bytecode_ptr<NFA> mcclellanCompile8(dfa_info &info, const CompileContext &cc,
set<dstate_id_t> *accel_states) {
- DEBUG_PRINTF("building mcclellan 8\n");
-
- vector<u32> reports;
- vector<u32> reports_eod;
- ReportID arb;
- u8 single;
-
+ DEBUG_PRINTF("building mcclellan 8\n");
+
+ vector<u32> reports;
+ vector<u32> reports_eod;
+ ReportID arb;
+ u8 single;
+
auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
map<dstate_id_t, AccelScheme> accel_escape_info
= info.strat.getAccelInfo(cc.grey);
-
- size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size();
- size_t aux_size = sizeof(mstate_aux) * info.size();
- size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size);
+
+ size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size();
+ size_t aux_size = sizeof(mstate_aux) * info.size();
+ size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size);
size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
- size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
- + ri->getReportListSize(), 32);
- size_t total_size = accel_offset + accel_size;
-
- DEBUG_PRINTF("aux_size %zu\n", aux_size);
- DEBUG_PRINTF("aux_offset %zu\n", aux_offset);
- DEBUG_PRINTF("rl size %u\n", ri->getReportListSize());
- DEBUG_PRINTF("accel_size %zu\n", accel_size);
- DEBUG_PRINTF("accel_offset %zu\n", accel_offset);
- DEBUG_PRINTF("total_size %zu\n", total_size);
-
- accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
- assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
-
+ size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
+ + ri->getReportListSize(), 32);
+ size_t total_size = accel_offset + accel_size;
+
+ DEBUG_PRINTF("aux_size %zu\n", aux_size);
+ DEBUG_PRINTF("aux_offset %zu\n", aux_offset);
+ DEBUG_PRINTF("rl size %u\n", ri->getReportListSize());
+ DEBUG_PRINTF("accel_size %zu\n", accel_size);
+ DEBUG_PRINTF("accel_offset %zu\n", accel_offset);
+ DEBUG_PRINTF("total_size %zu\n", total_size);
+
+ accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
+ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
+
auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
- char *nfa_base = (char *)nfa.get();
-
- mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get());
-
+ char *nfa_base = (char *)nfa.get();
+
+ mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get());
+
allocateFSN8(info, accel_escape_info, &m->accel_limit_8,
&m->accept_limit_8);
- populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset,
+ populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset,
accel_escape_info.size(), arb, single, nfa.get());
-
- vector<u32> reportOffsets;
-
- ri->fillReportLists(nfa.get(), aux_offset + aux_size, reportOffsets);
-
- /* copy in the state information */
- u8 *succ_table = (u8 *)(nfa_base + sizeof(NFA) + sizeof(mcclellan));
- mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset);
-
- for (size_t i = 0; i < info.size(); i++) {
+
+ vector<u32> reportOffsets;
+
+ ri->fillReportLists(nfa.get(), aux_offset + aux_size, reportOffsets);
+
+ /* copy in the state information */
+ u8 *succ_table = (u8 *)(nfa_base + sizeof(NFA) + sizeof(mcclellan));
+ mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset);
+
+ for (size_t i = 0; i < info.size(); i++) {
if (contains(accel_escape_info, i)) {
- u32 j = info.implId(i);
-
- aux[j].accel_offset = accel_offset;
- accel_offset += info.strat.accelSize();
-
+ u32 j = info.implId(i);
+
+ aux[j].accel_offset = accel_offset;
+ accel_offset += info.strat.accelSize();
+
info.strat.buildAccel(i, accel_escape_info.at(i),
(void *)((char *)m + aux[j].accel_offset));
- }
-
- fillInBasicState8(info, aux, succ_table, reportOffsets, reports,
- reports_eod, i);
- }
-
- assert(accel_offset + sizeof(NFA) <= total_size);
-
- DEBUG_PRINTF("rl size %zu\n", ri->size());
-
+ }
+
+ fillInBasicState8(info, aux, succ_table, reportOffsets, reports,
+ reports_eod, i);
+ }
+
+ assert(accel_offset + sizeof(NFA) <= total_size);
+
+ DEBUG_PRINTF("rl size %zu\n", ri->size());
+
if (accel_states && nfa) {
fillAccelOut(accel_escape_info, accel_states);
}
- return nfa;
-}
-
+ return nfa;
+}
+
#define MAX_SHERMAN_LIST_LEN 9
-
-static
+
+static
void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate,
- dstate_id_t max) {
- if (candidate < max) {
- dest.insert(candidate);
- }
-}
-
-static
+ dstate_id_t max) {
+ if (candidate < max) {
+ dest.insert(candidate);
+ }
+}
+
+static
void addSuccessors(flat_set<dstate_id_t> &dest, const dstate &source,
- u16 alphasize, dstate_id_t curr_id) {
- for (symbol_t s = 0; s < alphasize; s++) {
- addIfEarlier(dest, source.next[s], curr_id);
- }
-}
-
+ u16 alphasize, dstate_id_t curr_id) {
+ for (symbol_t s = 0; s < alphasize; s++) {
+ addIfEarlier(dest, source.next[s], curr_id);
+ }
+}
+
/* \brief Returns a set of states to search for a better daddy. */
static
flat_set<dstate_id_t> find_daddy_candidates(const dfa_info &info,
@@ -1037,46 +1037,46 @@ flat_set<dstate_id_t> find_daddy_candidates(const dfa_info &info,
return hinted;
}
-#define MAX_SHERMAN_SELF_LOOP 20
-
-static
+#define MAX_SHERMAN_SELF_LOOP 20
+
+static
void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit,
bool any_cyclic_near_anchored_state,
bool trust_daddy_states, const Grey &grey) {
- if (!grey.allowShermanStates) {
- return;
- }
-
- const u16 width = using8bit ? sizeof(u8) : sizeof(u16);
- const u16 alphasize = info.impl_alpha_size;
-
- if (info.raw.start_anchored != DEAD_STATE
- && any_cyclic_near_anchored_state
- && curr_id < alphasize * 3) {
- /* crude attempt to prevent frequent states from being sherman'ed
- * depends on the fact that states are numbers are currently in bfs
- * order */
- DEBUG_PRINTF("%hu is banned\n", curr_id);
- return;
- }
-
- if (info.raw.start_floating != DEAD_STATE
- && curr_id >= info.raw.start_floating
- && curr_id < info.raw.start_floating + alphasize * 3) {
- /* crude attempt to prevent frequent states from being sherman'ed
- * depends on the fact that states are numbers are currently in bfs
- * order */
- DEBUG_PRINTF("%hu is banned (%hu)\n", curr_id, info.raw.start_floating);
- return;
- }
-
- const u16 full_state_size = width * alphasize;
- const u16 max_list_len = MIN(MAX_SHERMAN_LIST_LEN,
- (full_state_size - 2)/(width + 1));
- u16 best_score = 0;
- dstate_id_t best_daddy = 0;
- dstate &currState = info.states[curr_id];
-
+ if (!grey.allowShermanStates) {
+ return;
+ }
+
+ const u16 width = using8bit ? sizeof(u8) : sizeof(u16);
+ const u16 alphasize = info.impl_alpha_size;
+
+ if (info.raw.start_anchored != DEAD_STATE
+ && any_cyclic_near_anchored_state
+ && curr_id < alphasize * 3) {
+ /* crude attempt to prevent frequent states from being sherman'ed
+ * depends on the fact that states are numbers are currently in bfs
+ * order */
+ DEBUG_PRINTF("%hu is banned\n", curr_id);
+ return;
+ }
+
+ if (info.raw.start_floating != DEAD_STATE
+ && curr_id >= info.raw.start_floating
+ && curr_id < info.raw.start_floating + alphasize * 3) {
+ /* crude attempt to prevent frequent states from being sherman'ed
+ * depends on the fact that states are numbers are currently in bfs
+ * order */
+ DEBUG_PRINTF("%hu is banned (%hu)\n", curr_id, info.raw.start_floating);
+ return;
+ }
+
+ const u16 full_state_size = width * alphasize;
+ const u16 max_list_len = MIN(MAX_SHERMAN_LIST_LEN,
+ (full_state_size - 2)/(width + 1));
+ u16 best_score = 0;
+ dstate_id_t best_daddy = 0;
+ dstate &currState = info.states[curr_id];
+
flat_set<dstate_id_t> hinted;
if (trust_daddy_states) {
// Use the daddy already set for this state so long as it isn't already
@@ -1093,88 +1093,88 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit,
}
assert(!info.is_sherman(granddaddy));
hinted.insert(granddaddy);
- }
+ }
} else {
hinted = find_daddy_candidates(info, curr_id);
- }
-
- for (const dstate_id_t &donor : hinted) {
- assert(donor < curr_id);
- u32 score = 0;
-
+ }
+
+ for (const dstate_id_t &donor : hinted) {
+ assert(donor < curr_id);
+ u32 score = 0;
+
if (info.is_sherman(donor) || info.is_widestate(donor)) {
- continue;
- }
-
- const dstate &donorState = info.states[donor];
- for (symbol_t s = 0; s < alphasize; s++) {
- if (currState.next[s] == donorState.next[s]) {
- score++;
- }
- }
-
- /* prefer lower ids to provide some stability amongst potential
- * siblings */
- if (score > best_score || (score == best_score && donor < best_daddy)) {
- best_daddy = donor;
- best_score = score;
-
- if (score == alphasize) {
- break;
- }
- }
- }
-
- currState.daddy = best_daddy;
- info.extra[curr_id].daddytaken = best_score;
- DEBUG_PRINTF("%hu -> daddy %hu: %u/%u BF\n", curr_id, best_daddy,
- best_score, alphasize);
-
- if (best_score + max_list_len < alphasize) {
- return; /* ??? */
- }
-
- if (info.is_sherman(currState.daddy)) {
- return;
- }
-
- u32 self_loop_width = 0;
+ continue;
+ }
+
+ const dstate &donorState = info.states[donor];
+ for (symbol_t s = 0; s < alphasize; s++) {
+ if (currState.next[s] == donorState.next[s]) {
+ score++;
+ }
+ }
+
+ /* prefer lower ids to provide some stability amongst potential
+ * siblings */
+ if (score > best_score || (score == best_score && donor < best_daddy)) {
+ best_daddy = donor;
+ best_score = score;
+
+ if (score == alphasize) {
+ break;
+ }
+ }
+ }
+
+ currState.daddy = best_daddy;
+ info.extra[curr_id].daddytaken = best_score;
+ DEBUG_PRINTF("%hu -> daddy %hu: %u/%u BF\n", curr_id, best_daddy,
+ best_score, alphasize);
+
+ if (best_score + max_list_len < alphasize) {
+ return; /* ??? */
+ }
+
+ if (info.is_sherman(currState.daddy)) {
+ return;
+ }
+
+ u32 self_loop_width = 0;
const dstate &curr_raw = info.states[curr_id];
- for (unsigned i = 0; i < N_CHARS; i++) {
- if (curr_raw.next[info.alpha_remap[i]] == curr_id) {
- self_loop_width++;
- }
- }
-
- if (self_loop_width > MAX_SHERMAN_SELF_LOOP) {
- DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id,
+ for (unsigned i = 0; i < N_CHARS; i++) {
+ if (curr_raw.next[info.alpha_remap[i]] == curr_id) {
+ self_loop_width++;
+ }
+ }
+
+ if (self_loop_width > MAX_SHERMAN_SELF_LOOP) {
+ DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id,
self_loop_width);
- return;
- }
-
- DEBUG_PRINTF("%hu is sherman\n", curr_id);
- info.extra[curr_id].shermanState = true;
-}
-
-static
-bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
- symbol_t alphasize = raw.getImplAlphaSize();
- for (symbol_t s = 0; s < alphasize; s++) {
- dstate_id_t succ_id = raw.states[root].next[s];
- if (succ_id == DEAD_STATE) {
- continue;
- }
-
- const dstate &succ = raw.states[succ_id];
- for (symbol_t t = 0; t < alphasize; t++) {
- if (succ.next[t] == root || succ.next[t] == succ_id) {
- return true;
- }
- }
- }
- return false;
-}
-
+ return;
+ }
+
+ DEBUG_PRINTF("%hu is sherman\n", curr_id);
+ info.extra[curr_id].shermanState = true;
+}
+
+static
+bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
+ symbol_t alphasize = raw.getImplAlphaSize();
+ for (symbol_t s = 0; s < alphasize; s++) {
+ dstate_id_t succ_id = raw.states[root].next[s];
+ if (succ_id == DEAD_STATE) {
+ continue;
+ }
+
+ const dstate &succ = raw.states[succ_id];
+ for (symbol_t t = 0; t < alphasize; t++) {
+ if (succ.next[t] == root || succ.next[t] == succ_id) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
/* \brief Test for only-one-predecessor property. */
static
bool check_property1(const DfaPrevInfo &info, const u16 impl_alpha_size,
@@ -1464,17 +1464,17 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
bool trust_daddy_states,
set<dstate_id_t> *accel_states) {
assert(!is_dead(raw));
-
- dfa_info info(strat);
- bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256;
-
- if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
- * mode with our semantics */
- raw.stripExtraEodReports();
- }
-
- bool has_eod_reports = raw.hasEodReports();
-
+
+ dfa_info info(strat);
+ bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256;
+
+ if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
+ * mode with our semantics */
+ raw.stripExtraEodReports();
+ }
+
+ bool has_eod_reports = raw.hasEodReports();
+
bytecode_ptr<NFA> nfa;
if (!using8bit) {
// Wide state optimization
@@ -1486,7 +1486,7 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
u16 total_daddy = 0;
bool any_cyclic_near_anchored_state
= is_cyclic_near(raw, raw.start_anchored);
-
+
// Sherman optimization
if (info.impl_alpha_size > 16) {
for (u32 i = 0; i < info.size(); i++) {
@@ -1503,20 +1503,20 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
info.size() * info.impl_alpha_size, info.size(),
info.impl_alpha_size);
}
-
+
nfa = mcclellanCompile16(info, cc, accel_states);
- } else {
+ } else {
nfa = mcclellanCompile8(info, cc, accel_states);
- }
-
- if (has_eod_reports) {
- nfa->flags |= NFA_ACCEPTS_EOD;
- }
-
- DEBUG_PRINTF("compile done\n");
- return nfa;
-}
-
+ }
+
+ if (has_eod_reports) {
+ nfa->flags |= NFA_ACCEPTS_EOD;
+ }
+
+ DEBUG_PRINTF("compile done\n");
+ return nfa;
+}
+
bytecode_ptr<NFA> mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
const ReportManager &rm,
bool only_accel_init,
@@ -1524,33 +1524,33 @@ bytecode_ptr<NFA> mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
set<dstate_id_t> *accel_states) {
mcclellan_build_strat mbs(raw, rm, only_accel_init);
return mcclellanCompile_i(raw, mbs, cc, trust_daddy_states, accel_states);
-}
-
-size_t mcclellan_build_strat::accelSize(void) const {
- return sizeof(AccelAux); /* McClellan accel structures are just bare
- * accelaux */
-}
-
-u32 mcclellanStartReachSize(const raw_dfa *raw) {
- if (raw->states.size() < 2) {
- return 0;
- }
-
- const dstate &ds = raw->states[raw->start_anchored];
-
- CharReach out;
- for (unsigned i = 0; i < N_CHARS; i++) {
- if (ds.next[raw->alpha_remap[i]] != DEAD_STATE) {
- out.set(i);
- }
- }
-
- return out.count();
-}
-
+}
+
+size_t mcclellan_build_strat::accelSize(void) const {
+ return sizeof(AccelAux); /* McClellan accel structures are just bare
+ * accelaux */
+}
+
+u32 mcclellanStartReachSize(const raw_dfa *raw) {
+ if (raw->states.size() < 2) {
+ return 0;
+ }
+
+ const dstate &ds = raw->states[raw->start_anchored];
+
+ CharReach out;
+ for (unsigned i = 0; i < N_CHARS; i++) {
+ if (ds.next[raw->alpha_remap[i]] != DEAD_STATE) {
+ out.set(i);
+ }
+ }
+
+ return out.count();
+}
+
bool has_accel_mcclellan(const NFA *nfa) {
- const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
- return m->has_accel;
-}
-
-} // namespace ue2
+ const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
+ return m->has_accel;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfa/mcclellancompile.h b/contrib/libs/hyperscan/src/nfa/mcclellancompile.h
index d819a86f2d..73cb9fd775 100644
--- a/contrib/libs/hyperscan/src/nfa/mcclellancompile.h
+++ b/contrib/libs/hyperscan/src/nfa/mcclellancompile.h
@@ -1,71 +1,71 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MCCLELLANCOMPILE_H
-#define MCCLELLANCOMPILE_H
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MCCLELLANCOMPILE_H
+#define MCCLELLANCOMPILE_H
+
#include "accel_dfa_build_strat.h"
-#include "rdfa.h"
-#include "ue2common.h"
+#include "rdfa.h"
+#include "ue2common.h"
#include "util/bytecode_ptr.h"
-
-#include <memory>
-#include <vector>
-#include <set>
-
-struct NFA;
-
-namespace ue2 {
-
+
+#include <memory>
+#include <vector>
+#include <set>
+
+struct NFA;
+
+namespace ue2 {
+
class ReportManager;
-struct CompileContext;
-
+struct CompileContext;
+
class mcclellan_build_strat : public accel_dfa_build_strat {
-public:
+public:
mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in,
bool only_accel_init_in)
: accel_dfa_build_strat(rm_in, only_accel_init_in), rdfa(rdfa_in) {}
- raw_dfa &get_raw() const override { return rdfa; }
- std::unique_ptr<raw_report_info> gatherReports(
+ raw_dfa &get_raw() const override { return rdfa; }
+ std::unique_ptr<raw_report_info> gatherReports(
std::vector<u32> &reports /* out */,
std::vector<u32> &reports_eod /* out */,
u8 *isSingleReport /* out */,
ReportID *arbReport /* out */) const override;
- size_t accelSize(void) const override;
+ size_t accelSize(void) const override;
u32 max_allowed_offset_accel() const override;
u32 max_stop_char() const override;
u32 max_floating_stop_char() const override;
DfaType getType() const override { return McClellan; }
-
-private:
- raw_dfa &rdfa;
-};
-
+
+private:
+ raw_dfa &rdfa;
+};
+
/**
* \brief Construct an implementation DFA.
*
@@ -81,26 +81,26 @@ private:
* accelerable states
*/
bytecode_ptr<NFA>
-mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
+mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
const ReportManager &rm, bool only_accel_init,
bool trust_daddy_states = false,
- std::set<dstate_id_t> *accel_states = nullptr);
-
-/* used internally by mcclellan/haig/gough compile process */
+ std::set<dstate_id_t> *accel_states = nullptr);
+
+/* used internally by mcclellan/haig/gough compile process */
bytecode_ptr<NFA>
mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
const CompileContext &cc, bool trust_daddy_states = false,
- std::set<dstate_id_t> *accel_states = nullptr);
-
-/**
- * \brief Returns the width of the character reach at start.
- */
-u32 mcclellanStartReachSize(const raw_dfa *raw);
-
-std::set<ReportID> all_reports(const raw_dfa &rdfa);
-
+ std::set<dstate_id_t> *accel_states = nullptr);
+
+/**
+ * \brief Returns the width of the character reach at start.
+ */
+u32 mcclellanStartReachSize(const raw_dfa *raw);
+
+std::set<ReportID> all_reports(const raw_dfa &rdfa);
+
bool has_accel_mcclellan(const NFA *nfa);
-
-} // namespace ue2
-
+
+} // namespace ue2
+
#endif // MCCLELLANCOMPILE_H
diff --git a/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.cpp b/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.cpp
index 064f0c86e0..3e299b81e2 100644
--- a/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.cpp
+++ b/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.cpp
@@ -1,192 +1,192 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "mcclellancompile_util.h"
-
-#include "rdfa.h"
-#include "util/container.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "mcclellancompile_util.h"
+
+#include "rdfa.h"
+#include "util/container.h"
#include "util/hash.h"
-#include "ue2common.h"
-
-#include <deque>
+#include "ue2common.h"
+
+#include <deque>
#include <map>
-
-using namespace std;
-
-namespace ue2 {
-
-#define INIT_STATE 1
-
-static
+
+using namespace std;
+
+namespace ue2 {
+
+#define INIT_STATE 1
+
+static
bool state_has_reports(const raw_dfa &raw, dstate_id_t s) {
const auto &ds = raw.states[s];
return !ds.reports.empty() || !ds.reports_eod.empty();
}
static
-u32 count_dots(const raw_dfa &raw) {
- assert(raw.start_anchored == INIT_STATE);
-
- u32 i = INIT_STATE;
- for (; i < raw.states.size() && i != raw.start_floating; i++) {
- DEBUG_PRINTF("checking %u\n", i);
- assert(raw.states[i].reports.empty());
- assert(raw.states[i].reports_eod.empty());
-
- for (symbol_t s = 0; s < raw.getImplAlphaSize(); s++) {
- DEBUG_PRINTF("%hu -> %hu\n", s, raw.states[i].next[s]);
- if (raw.states[i].next[s] != i + 1) {
- goto validate;
- }
- }
-
+u32 count_dots(const raw_dfa &raw) {
+ assert(raw.start_anchored == INIT_STATE);
+
+ u32 i = INIT_STATE;
+ for (; i < raw.states.size() && i != raw.start_floating; i++) {
+ DEBUG_PRINTF("checking %u\n", i);
+ assert(raw.states[i].reports.empty());
+ assert(raw.states[i].reports_eod.empty());
+
+ for (symbol_t s = 0; s < raw.getImplAlphaSize(); s++) {
+ DEBUG_PRINTF("%hu -> %hu\n", s, raw.states[i].next[s]);
+ if (raw.states[i].next[s] != i + 1) {
+ goto validate;
+ }
+ }
+
if (state_has_reports(raw, raw.states[i].next[0])) {
- goto validate;
- }
-
- DEBUG_PRINTF("got dot\n");
- }
-
- validate:
- u32 dot_count = i - INIT_STATE;
-
- /* we need to check that no later state has a transition into these leading
- * dots */
- for (; i < raw.states.size(); i++) {
- for (symbol_t s = 0; s < raw.getImplAlphaSize(); s++) {
- DEBUG_PRINTF("%hu -> %hu\n", s, raw.states[i].next[s]);
- dstate_id_t n = raw.states[i].next[s];
- if (n != DEAD_STATE && n <= dot_count) {
- return 0;
- }
- }
- }
-
- return dot_count;
-}
-
-static
-void prune_leading_states(raw_dfa &raw, u32 count) {
- if (!count) {
- return;
- }
-
- for (u32 i = INIT_STATE + count; i < raw.states.size(); i++) {
- dstate &curr = raw.states[i - count];
- curr = raw.states[i];
- if (curr.daddy > count) {
- curr.daddy -= count;
- } else {
- curr.daddy = DEAD_STATE;
- }
-
- for (u32 j = 0; j < raw.alpha_size; j++) {
- assert(curr.next[j] == DEAD_STATE || curr.next[j] > count);
- if (curr.next[j]) {
- curr.next[j] -= count;
- }
- }
- }
-
- raw.states.erase(raw.states.end() - count, raw.states.end());
-}
-
-u32 remove_leading_dots(raw_dfa &raw) {
- u32 count = count_dots(raw);
- prune_leading_states(raw, count);
- DEBUG_PRINTF("removed %u leading dots\n", count);
- return count;
-}
-
-static never_inline
-u32 calc_min_dist_from_bob(raw_dfa &raw, vector<u32> *dist_in) {
- vector<u32> &dist = *dist_in;
+ goto validate;
+ }
+
+ DEBUG_PRINTF("got dot\n");
+ }
+
+ validate:
+ u32 dot_count = i - INIT_STATE;
+
+ /* we need to check that no later state has a transition into these leading
+ * dots */
+ for (; i < raw.states.size(); i++) {
+ for (symbol_t s = 0; s < raw.getImplAlphaSize(); s++) {
+ DEBUG_PRINTF("%hu -> %hu\n", s, raw.states[i].next[s]);
+ dstate_id_t n = raw.states[i].next[s];
+ if (n != DEAD_STATE && n <= dot_count) {
+ return 0;
+ }
+ }
+ }
+
+ return dot_count;
+}
+
+static
+void prune_leading_states(raw_dfa &raw, u32 count) {
+ if (!count) {
+ return;
+ }
+
+ for (u32 i = INIT_STATE + count; i < raw.states.size(); i++) {
+ dstate &curr = raw.states[i - count];
+ curr = raw.states[i];
+ if (curr.daddy > count) {
+ curr.daddy -= count;
+ } else {
+ curr.daddy = DEAD_STATE;
+ }
+
+ for (u32 j = 0; j < raw.alpha_size; j++) {
+ assert(curr.next[j] == DEAD_STATE || curr.next[j] > count);
+ if (curr.next[j]) {
+ curr.next[j] -= count;
+ }
+ }
+ }
+
+ raw.states.erase(raw.states.end() - count, raw.states.end());
+}
+
+u32 remove_leading_dots(raw_dfa &raw) {
+ u32 count = count_dots(raw);
+ prune_leading_states(raw, count);
+ DEBUG_PRINTF("removed %u leading dots\n", count);
+ return count;
+}
+
+static never_inline
+u32 calc_min_dist_from_bob(raw_dfa &raw, vector<u32> *dist_in) {
+ vector<u32> &dist = *dist_in;
dist.assign(raw.states.size(), ~0U);
-
- assert(raw.start_anchored != DEAD_STATE);
-
+
+ assert(raw.start_anchored != DEAD_STATE);
+
deque<dstate_id_t> to_visit = { raw.start_anchored };
- dist[raw.start_anchored] = 0;
-
- u32 last_d = 0;
-
- while (!to_visit.empty()) {
- dstate_id_t s = to_visit.front();
- DEBUG_PRINTF("inspecting %u\n", s);
- to_visit.pop_front();
- assert(s != DEAD_STATE);
-
- u32 d = dist[s];
- assert(d >= last_d);
- assert(d != ~0U);
-
+ dist[raw.start_anchored] = 0;
+
+ u32 last_d = 0;
+
+ while (!to_visit.empty()) {
+ dstate_id_t s = to_visit.front();
+ DEBUG_PRINTF("inspecting %u\n", s);
+ to_visit.pop_front();
+ assert(s != DEAD_STATE);
+
+ u32 d = dist[s];
+ assert(d >= last_d);
+ assert(d != ~0U);
+
for (dstate_id_t t : raw.states[s].next) {
- if (t == DEAD_STATE) {
- continue;
- }
- if (dist[t] == ~0U) {
- to_visit.push_back(t);
- dist[t] = d + 1;
- } else {
- assert(dist[t] <= d + 1);
- }
- }
-
- last_d = d;
- }
-
- return last_d;
-}
-
+ if (t == DEAD_STATE) {
+ continue;
+ }
+ if (dist[t] == ~0U) {
+ to_visit.push_back(t);
+ dist[t] = d + 1;
+ } else {
+ assert(dist[t] <= d + 1);
+ }
+ }
+
+ last_d = d;
+ }
+
+ return last_d;
+}
+
bool clear_deeper_reports(raw_dfa &raw, u32 max_offset) {
DEBUG_PRINTF("clearing reports on states deeper than %u\n", max_offset);
- vector<u32> bob_dist;
- u32 max_min_dist_bob = calc_min_dist_from_bob(raw, &bob_dist);
-
- if (max_min_dist_bob <= max_offset) {
+ vector<u32> bob_dist;
+ u32 max_min_dist_bob = calc_min_dist_from_bob(raw, &bob_dist);
+
+ if (max_min_dist_bob <= max_offset) {
return false;
- }
-
+ }
+
bool changed = false;
- for (u32 s = DEAD_STATE + 1; s < raw.states.size(); s++) {
+ for (u32 s = DEAD_STATE + 1; s < raw.states.size(); s++) {
if (bob_dist[s] > max_offset && state_has_reports(raw, s)) {
DEBUG_PRINTF("clearing reports on %u (depth %u)\n", s, bob_dist[s]);
auto &ds = raw.states[s];
ds.reports.clear();
ds.reports_eod.clear();
changed = true;
- }
- }
-
+ }
+ }
+
if (!changed) {
return false;
}
-
+
// We may have cleared all reports from the DFA, in which case it should
// become empty.
if (all_of_in(raw.states, [](const dstate &ds) {
@@ -195,57 +195,57 @@ bool clear_deeper_reports(raw_dfa &raw, u32 max_offset) {
DEBUG_PRINTF("no reports left at all, dfa is dead\n");
raw.start_anchored = DEAD_STATE;
raw.start_floating = DEAD_STATE;
- }
-
+ }
+
return true;
-}
-
-set<ReportID> all_reports(const raw_dfa &rdfa) {
- set<ReportID> all;
- for (const auto &ds : rdfa.states) {
- insert(&all, ds.reports);
- insert(&all, ds.reports_eod);
- }
- return all;
-}
-
-bool has_eod_accepts(const raw_dfa &rdfa) {
- for (const auto &ds : rdfa.states) {
- if (!ds.reports_eod.empty()) {
- return true;
- }
- }
- return false;
-}
-
-bool has_non_eod_accepts(const raw_dfa &rdfa) {
- for (const auto &ds : rdfa.states) {
- if (!ds.reports.empty()) {
- return true;
- }
- }
- return false;
-}
-
-size_t hash_dfa_no_reports(const raw_dfa &rdfa) {
- size_t v = 0;
- hash_combine(v, rdfa.alpha_size);
+}
+
+set<ReportID> all_reports(const raw_dfa &rdfa) {
+ set<ReportID> all;
+ for (const auto &ds : rdfa.states) {
+ insert(&all, ds.reports);
+ insert(&all, ds.reports_eod);
+ }
+ return all;
+}
+
+bool has_eod_accepts(const raw_dfa &rdfa) {
+ for (const auto &ds : rdfa.states) {
+ if (!ds.reports_eod.empty()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool has_non_eod_accepts(const raw_dfa &rdfa) {
+ for (const auto &ds : rdfa.states) {
+ if (!ds.reports.empty()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+size_t hash_dfa_no_reports(const raw_dfa &rdfa) {
+ size_t v = 0;
+ hash_combine(v, rdfa.alpha_size);
hash_combine(v, rdfa.alpha_remap);
-
- for (const auto &ds : rdfa.states) {
+
+ for (const auto &ds : rdfa.states) {
hash_combine(v, ds.next);
- }
-
- return v;
-}
-
-size_t hash_dfa(const raw_dfa &rdfa) {
- size_t v = 0;
- hash_combine(v, hash_dfa_no_reports(rdfa));
- hash_combine(v, all_reports(rdfa));
- return v;
-}
-
+ }
+
+ return v;
+}
+
+size_t hash_dfa(const raw_dfa &rdfa) {
+ size_t v = 0;
+ hash_combine(v, hash_dfa_no_reports(rdfa));
+ hash_combine(v, all_reports(rdfa));
+ return v;
+}
+
static
bool can_die_early(const raw_dfa &raw, dstate_id_t s,
map<dstate_id_t, u32> &visited, u32 age_limit) {
@@ -283,4 +283,4 @@ bool is_dead(const raw_dfa &rdfa) {
rdfa.start_floating == DEAD_STATE;
}
-} // namespace ue2
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.h b/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.h
index 0dc58533a1..bc730cddea 100644
--- a/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.h
+++ b/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.h
@@ -1,43 +1,43 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MCCLELLAN_COMPILE_UTIL_H
-#define MCCLELLAN_COMPILE_UTIL_H
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MCCLELLAN_COMPILE_UTIL_H
+#define MCCLELLAN_COMPILE_UTIL_H
+
#include "rdfa.h"
-#include "ue2common.h"
-
-#include <set>
-
-namespace ue2 {
-
+#include "ue2common.h"
+
+#include <set>
+
+namespace ue2 {
+
u32 remove_leading_dots(raw_dfa &raw);
-
+
/**
* \brief Clear reports on any states that are deeper than \a max_offset from
* start of stream.
@@ -46,17 +46,17 @@ u32 remove_leading_dots(raw_dfa &raw);
*/
bool clear_deeper_reports(raw_dfa &raw, u32 max_offset);
-std::set<ReportID> all_reports(const raw_dfa &rdfa);
-bool has_eod_accepts(const raw_dfa &rdfa);
-bool has_non_eod_accepts(const raw_dfa &rdfa);
-
-/** \brief Compute a simple hash of this raw_dfa. Does not include report
- * information. */
-size_t hash_dfa_no_reports(const raw_dfa &rdfa);
-
-/** \brief Compute a simple hash of this raw_dfa, including its reports. */
-size_t hash_dfa(const raw_dfa &rdfa);
-
+std::set<ReportID> all_reports(const raw_dfa &rdfa);
+bool has_eod_accepts(const raw_dfa &rdfa);
+bool has_non_eod_accepts(const raw_dfa &rdfa);
+
+/** \brief Compute a simple hash of this raw_dfa. Does not include report
+ * information. */
+size_t hash_dfa_no_reports(const raw_dfa &rdfa);
+
+/** \brief Compute a simple hash of this raw_dfa, including its reports. */
+size_t hash_dfa(const raw_dfa &rdfa);
+
bool can_die_early(const raw_dfa &raw, u32 age_limit);
/**
@@ -66,6 +66,6 @@ bool can_die_early(const raw_dfa &raw, u32 age_limit);
bool is_dead(const raw_dfa &rdfa);
-} // namespace ue2
-
-#endif
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/mpv.c b/contrib/libs/hyperscan/src/nfa/mpv.c
index a3245267ca..552754d608 100644
--- a/contrib/libs/hyperscan/src/nfa/mpv.c
+++ b/contrib/libs/hyperscan/src/nfa/mpv.c
@@ -1,1096 +1,1096 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "mpv.h"
-
-#include "mpv_internal.h"
-#include "nfa_api.h"
-#include "nfa_api_queue.h"
-#include "nfa_internal.h"
-#include "shufti.h"
-#include "truffle.h"
-#include "ue2common.h"
-#include "vermicelli.h"
-#include "vermicelli_run.h"
-#include "util/multibit.h"
-#include "util/partial_store.h"
-#include "util/simd_utils.h"
-#include "util/unaligned.h"
-
-#include <string.h>
-
-#define MIN_SKIP_REPEAT 32
-
-typedef struct mpv_pq_item PQ_T;
-#define PQ_COMP(pqc_items, a, b) \
- ((pqc_items)[a].trigger_loc < (pqc_items)[b].trigger_loc)
-#define PQ_COMP_B(pqc_items, a, b_fixed) \
- ((pqc_items)[a].trigger_loc < (b_fixed).trigger_loc)
-
-#include "util/pqueue.h"
-
-static really_inline
-u64a *get_counter_n(struct mpv_decomp_state *s,
- const struct mpv *m, u32 n) {
- return (u64a *)((char *)s + get_counter_info(m)[n].counter_offset);
-}
-
-static really_inline
-u64a *get_counter_for_kilo(struct mpv_decomp_state *s,
- const struct mpv_kilopuff *kp) {
- return (u64a *)((char *)s + kp->counter_offset);
-}
-
-static really_inline
-u64a get_counter_value_for_kilo(struct mpv_decomp_state *s,
- const struct mpv_kilopuff *kp) {
- return *get_counter_for_kilo(s, kp) + s->counter_adj;
-}
-
-static really_inline
-const u64a *get_counter_for_kilo_c(const struct mpv_decomp_state *s,
- const struct mpv_kilopuff *kp) {
- return (const u64a *)((const char *)s + kp->counter_offset);
-}
-
-
-static never_inline
-void normalize_counters(struct mpv_decomp_state *dstate, const struct mpv *m) {
- u64a adj = dstate->counter_adj;
- u64a *counters = get_counter_n(dstate, m, 0);
-
- if (!adj) {
- return;
- }
-
- for (u32 i = 0; i < m->counter_count; i++) {
- /* update all counters - alive or dead */
- counters[i] += adj;
- DEBUG_PRINTF("counter %u: %llu\n", i, counters[i]);
- }
-
- dstate->counter_adj = 0;
-}
-
-static really_inline
-char processReports(const struct mpv *m, u8 *reporters,
- const struct mpv_decomp_state *dstate, u64a counter_adj,
- u64a report_offset, NfaCallback cb, void *ctxt,
- ReportID *rl, u32 *rl_count_out) {
- DEBUG_PRINTF("reporting at offset %llu\n", report_offset);
- const struct mpv_kilopuff *kp = (const void *)(m + 1);
- u32 rl_count = 0;
-
- for (u32 i = mmbit_iterate(reporters, m->kilo_count, MMB_INVALID);
- i != MMB_INVALID; i = mmbit_iterate(reporters, m->kilo_count, i)) {
- const struct mpv_puffette *curr = dstate->active[i].curr;
- u64a curr_counter_val = *get_counter_for_kilo_c(dstate, &kp[i])
- + counter_adj;
- DEBUG_PRINTF("kilo %u, underlying counter: %llu current: %llu\n", i,
- *get_counter_for_kilo_c(dstate, &kp[i]), curr_counter_val);
- assert(curr_counter_val != MPV_DEAD_VALUE); /* counter_adj should take
- * care if underlying value
- * is -1 */
- char did_stuff = 0;
-
- while (curr->report != INVALID_REPORT) {
- assert(curr_counter_val >= curr->repeats);
- if (curr->unbounded || curr_counter_val == curr->repeats) {
- DEBUG_PRINTF("report %u at %llu\n", curr->report,
- report_offset);
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "mpv.h"
+
+#include "mpv_internal.h"
+#include "nfa_api.h"
+#include "nfa_api_queue.h"
+#include "nfa_internal.h"
+#include "shufti.h"
+#include "truffle.h"
+#include "ue2common.h"
+#include "vermicelli.h"
+#include "vermicelli_run.h"
+#include "util/multibit.h"
+#include "util/partial_store.h"
+#include "util/simd_utils.h"
+#include "util/unaligned.h"
+
+#include <string.h>
+
+#define MIN_SKIP_REPEAT 32
+
+typedef struct mpv_pq_item PQ_T;
+#define PQ_COMP(pqc_items, a, b) \
+ ((pqc_items)[a].trigger_loc < (pqc_items)[b].trigger_loc)
+#define PQ_COMP_B(pqc_items, a, b_fixed) \
+ ((pqc_items)[a].trigger_loc < (b_fixed).trigger_loc)
+
+#include "util/pqueue.h"
+
+static really_inline
+u64a *get_counter_n(struct mpv_decomp_state *s,
+ const struct mpv *m, u32 n) {
+ return (u64a *)((char *)s + get_counter_info(m)[n].counter_offset);
+}
+
+static really_inline
+u64a *get_counter_for_kilo(struct mpv_decomp_state *s,
+ const struct mpv_kilopuff *kp) {
+ return (u64a *)((char *)s + kp->counter_offset);
+}
+
+static really_inline
+u64a get_counter_value_for_kilo(struct mpv_decomp_state *s,
+ const struct mpv_kilopuff *kp) {
+ return *get_counter_for_kilo(s, kp) + s->counter_adj;
+}
+
+static really_inline
+const u64a *get_counter_for_kilo_c(const struct mpv_decomp_state *s,
+ const struct mpv_kilopuff *kp) {
+ return (const u64a *)((const char *)s + kp->counter_offset);
+}
+
+
+static never_inline
+void normalize_counters(struct mpv_decomp_state *dstate, const struct mpv *m) {
+ u64a adj = dstate->counter_adj;
+ u64a *counters = get_counter_n(dstate, m, 0);
+
+ if (!adj) {
+ return;
+ }
+
+ for (u32 i = 0; i < m->counter_count; i++) {
+ /* update all counters - alive or dead */
+ counters[i] += adj;
+ DEBUG_PRINTF("counter %u: %llu\n", i, counters[i]);
+ }
+
+ dstate->counter_adj = 0;
+}
+
+static really_inline
+char processReports(const struct mpv *m, u8 *reporters,
+ const struct mpv_decomp_state *dstate, u64a counter_adj,
+ u64a report_offset, NfaCallback cb, void *ctxt,
+ ReportID *rl, u32 *rl_count_out) {
+ DEBUG_PRINTF("reporting at offset %llu\n", report_offset);
+ const struct mpv_kilopuff *kp = (const void *)(m + 1);
+ u32 rl_count = 0;
+
+ for (u32 i = mmbit_iterate(reporters, m->kilo_count, MMB_INVALID);
+ i != MMB_INVALID; i = mmbit_iterate(reporters, m->kilo_count, i)) {
+ const struct mpv_puffette *curr = dstate->active[i].curr;
+ u64a curr_counter_val = *get_counter_for_kilo_c(dstate, &kp[i])
+ + counter_adj;
+ DEBUG_PRINTF("kilo %u, underlying counter: %llu current: %llu\n", i,
+ *get_counter_for_kilo_c(dstate, &kp[i]), curr_counter_val);
+ assert(curr_counter_val != MPV_DEAD_VALUE); /* counter_adj should take
+ * care if underlying value
+ * is -1 */
+ char did_stuff = 0;
+
+ while (curr->report != INVALID_REPORT) {
+ assert(curr_counter_val >= curr->repeats);
+ if (curr->unbounded || curr_counter_val == curr->repeats) {
+ DEBUG_PRINTF("report %u at %llu\n", curr->report,
+ report_offset);
+
if (curr->unbounded && !curr->simple_exhaust) {
- assert(rl_count < m->puffette_count);
- *rl = curr->report;
- ++rl;
- rl_count++;
- }
-
+ assert(rl_count < m->puffette_count);
+ *rl = curr->report;
+ ++rl;
+ rl_count++;
+ }
+
if (cb(0, report_offset, curr->report, ctxt) ==
MO_HALT_MATCHING) {
- DEBUG_PRINTF("bailing\n");
- return MO_HALT_MATCHING;
- }
- did_stuff = 1;
- }
-
- curr--;
- }
-
- if (!did_stuff) {
- mmbit_unset(reporters, m->kilo_count, i);
- }
- }
-
- *rl_count_out = rl_count;
- return MO_CONTINUE_MATCHING;
-}
-
-static
-ReportID *get_report_list(const struct mpv *m, struct mpv_decomp_state *s) {
- return (ReportID *)((char *)s + m->report_list_offset);
-}
-
-static really_inline
-char processReportsForRange(const struct mpv *m, u8 *reporters,
- struct mpv_decomp_state *dstate, u64a first_offset,
- size_t length, NfaCallback cb, void *ctxt) {
- if (!length) {
- return MO_CONTINUE_MATCHING;
- }
-
- u64a counter_adj = dstate->counter_adj;
- u32 rl_count = 0;
- ReportID *rl = get_report_list(m, dstate);
- char rv = processReports(m, reporters, dstate, 1 + counter_adj,
- first_offset + 1, cb, ctxt, rl, &rl_count);
- if (rv != MO_CONTINUE_MATCHING) {
- DEBUG_PRINTF("bailing\n");
- return rv;
- }
- if (!rl_count) {
- return MO_CONTINUE_MATCHING;
- }
-
+ DEBUG_PRINTF("bailing\n");
+ return MO_HALT_MATCHING;
+ }
+ did_stuff = 1;
+ }
+
+ curr--;
+ }
+
+ if (!did_stuff) {
+ mmbit_unset(reporters, m->kilo_count, i);
+ }
+ }
+
+ *rl_count_out = rl_count;
+ return MO_CONTINUE_MATCHING;
+}
+
+static
+ReportID *get_report_list(const struct mpv *m, struct mpv_decomp_state *s) {
+ return (ReportID *)((char *)s + m->report_list_offset);
+}
+
+static really_inline
+char processReportsForRange(const struct mpv *m, u8 *reporters,
+ struct mpv_decomp_state *dstate, u64a first_offset,
+ size_t length, NfaCallback cb, void *ctxt) {
+ if (!length) {
+ return MO_CONTINUE_MATCHING;
+ }
+
+ u64a counter_adj = dstate->counter_adj;
+ u32 rl_count = 0;
+ ReportID *rl = get_report_list(m, dstate);
+ char rv = processReports(m, reporters, dstate, 1 + counter_adj,
+ first_offset + 1, cb, ctxt, rl, &rl_count);
+ if (rv != MO_CONTINUE_MATCHING) {
+ DEBUG_PRINTF("bailing\n");
+ return rv;
+ }
+ if (!rl_count) {
+ return MO_CONTINUE_MATCHING;
+ }
+
DEBUG_PRINTF("length=%zu, rl_count=%u\n", length, rl_count);
for (size_t i = 2; i <= length; i++) {
- for (u32 j = 0; j < rl_count; j++) {
+ for (u32 j = 0; j < rl_count; j++) {
if (cb(0, first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) {
- DEBUG_PRINTF("bailing\n");
- return MO_HALT_MATCHING;
- }
- }
- }
-
- return MO_CONTINUE_MATCHING;
-}
-
-/* returns last puffette that we have satisfied */
-static
-const struct mpv_puffette *get_curr_puff(const struct mpv *m,
- const struct mpv_kilopuff *kp,
- struct mpv_decomp_state *dstate) {
- u64a counter = *get_counter_for_kilo(dstate, kp);
- assert(counter != MPV_DEAD_VALUE);
-
- const struct mpv_puffette *p = get_puff_array(m, kp);
- DEBUG_PRINTF("looking for current puffette (counter = %llu)\n", counter);
- DEBUG_PRINTF("next: (%u, %u)\n", p->repeats, p->report);
- while (counter + 1 >= p->repeats && p->report != INVALID_REPORT) {
- DEBUG_PRINTF("advancing\n");
- ++p;
- DEBUG_PRINTF("next: (%u, %u)\n", p->repeats, p->report);
- }
-
- return p - 1;
-}
-
-static
-const struct mpv_puffette *get_init_puff(const struct mpv *m,
- const struct mpv_kilopuff *kp) {
- const struct mpv_puffette *p = get_puff_array(m, kp);
- while (p->repeats == 1) {
- ++p;
- }
- return p - 1;
-}
-
-
-/* returns the last puffette whose repeats have been satisfied */
-static really_inline
-const struct mpv_puffette *update_curr_puff(const struct mpv *m, u8 *reporters,
- u64a counter,
- const struct mpv_puffette *in,
- u32 kilo_index) {
- assert(counter != MPV_DEAD_VALUE);
-
- const struct mpv_puffette *p = in;
- DEBUG_PRINTF("looking for current puffette (counter = %llu)\n", counter);
- DEBUG_PRINTF("curr: (%u, %u)\n", p->repeats, p->report);
- while (counter + 1 >= p[1].repeats && p[1].report != INVALID_REPORT) {
- DEBUG_PRINTF("advancing\n");
- ++p;
- DEBUG_PRINTF("curr: (%u, %u)\n", p->repeats, p->report);
- }
-
- if (p != in) {
- mmbit_set(reporters, m->kilo_count, kilo_index);
- }
-
- return p;
-}
-
-static really_inline
-size_t limitByReach(const struct mpv_kilopuff *kp, const u8 *buf,
- size_t length) {
- if (kp->type == MPV_VERM) {
- return vermicelliExec(kp->u.verm.c, 0, buf, buf + length) - buf;
- } else if (kp->type == MPV_SHUFTI) {
- m128 mask_lo = kp->u.shuf.mask_lo;
- m128 mask_hi = kp->u.shuf.mask_hi;
- return shuftiExec(mask_lo, mask_hi, buf, buf + length) - buf;
- } else if (kp->type == MPV_TRUFFLE) {
- return truffleExec(kp->u.truffle.mask1, kp->u.truffle.mask2, buf, buf + length) - buf;
- } else if (kp->type == MPV_NVERM) {
- return nvermicelliExec(kp->u.verm.c, 0, buf, buf + length) - buf;
- }
-
- assert(kp->type == MPV_DOT);
- return length;
-}
-
-static never_inline
-void fillLimits(const struct mpv *m, u8 *active, u8 *reporters,
- struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
- const u8 *buf, size_t length) {
- DEBUG_PRINTF("filling limits %zu\n", length);
- assert(!dstate->pq_size);
-
- if (!length) {
- DEBUG_PRINTF("0 length\n");
- return;
- }
-
- const struct mpv_kilopuff *kp = (const void *)(m + 1);
-
- for (u32 i = mmbit_iterate(active, m->kilo_count, MMB_INVALID);
- i != MMB_INVALID; i = mmbit_iterate(active, m->kilo_count, i)) {
- dstate->active[i].curr = get_curr_puff(m, &kp[i], dstate);
- if (dstate->active[i].curr->report != INVALID_REPORT) {
- /* this kilo puff may fire reports */
- mmbit_set(reporters, m->kilo_count, i);
- }
-
- u64a lim = limitByReach(&kp[i], buf, length);
- DEBUG_PRINTF("lim %llu/%zu\n", lim, length);
-
- if (kp[i].dead_point != MPV_DEAD_VALUE) {
- assert(!kp[i].auto_restart);
- u64a counter = get_counter_value_for_kilo(dstate, &kp[i]);
- u64a dp_trigger = kp[i].dead_point - counter;
- if (dp_trigger < lim) {
- DEBUG_PRINTF("dead point trigger %llu\n", dp_trigger);
- lim = dp_trigger;
- }
- }
-
- if (kp[i].auto_restart && !lim) {
- *get_counter_for_kilo(dstate, &kp[i]) = MPV_DEAD_VALUE;
- mmbit_unset(reporters, m->kilo_count, i);
- /* the counter value will cause the nex_trigger calculation below to
- * adjust correctly */
- if (length == 1) {
- dstate->active[i].limit = 0;
- continue;
- }
-
- lim = limitByReach(&kp[i], buf + 1, length - 1) + 1;
-
-
- /* restart active counters */
- dstate->active[i].curr = get_init_puff(m, &kp[i]);
- assert(dstate->active[i].curr[0].report == INVALID_REPORT);
-
- DEBUG_PRINTF("lim now %llu/%zu\n", lim, length);
- }
-
- dstate->active[i].limit = lim;
- if (!lim) {
- mmbit_unset(active, m->kilo_count, i);
- mmbit_unset(reporters, m->kilo_count, i);
- continue;
- }
- if (dstate->active[i].curr[1].report != INVALID_REPORT) {
- u32 next_trigger = dstate->active[i].curr[1].repeats - 1ULL
- - *get_counter_for_kilo(dstate, &kp[i]);
- DEBUG_PRINTF("next trigger %u\n", next_trigger);
- lim = MIN(lim, next_trigger);
- }
-
- if (lim != length) {
- struct mpv_pq_item temp = {
- .trigger_loc = lim,
- .kilo = i
- };
-
- DEBUG_PRINTF("push for %u at %llu\n", i, lim);
- pq_insert(pq, dstate->pq_size, temp);
- ++dstate->pq_size;
- }
-
- assert(lim || kp[i].auto_restart);
- }
-
- DEBUG_PRINTF("filled\n");
- dstate->filled = 1;
-}
-
-static never_inline
-void handleTopN(const struct mpv *m, s64a loc, u8 *active, u8 *reporters,
- struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
- const u8 *buf, size_t length, u32 i) {
- assert(i < m->kilo_count);
- DEBUG_PRINTF("MQE_TOP + %u @%lld\n", i, loc);
- if (mmbit_set(active, m->kilo_count, i)) {
- DEBUG_PRINTF("kilo is already alive and kicking\n");
- return;
- }
-
- const struct mpv_kilopuff *kp = (const struct mpv_kilopuff *)(m + 1);
-
- assert(!kp[i].auto_restart); /* handle later/never */
-
- /* we need to ensure that the counters are upto date */
- normalize_counters(dstate, m);
-
- /* reset counter */
- *get_counter_for_kilo(dstate, &kp[i]) = 0;
-
- if ((size_t)loc == length) {
- /* end of buffer, just make sure it is active */
- dstate->active[i].limit = loc;
- dstate->active[i].curr = get_init_puff(m, &kp[i]);
- return;
- }
-
- /* find the limit */
- u64a lim = limitByReach(&kp[i], buf + loc, length - loc) + loc;
-
- /* no need to worry about dead_point triggers here as kilopuff must first
- * update chain (to fire a report) before it goes dead. */
-
- if (lim == (u64a)loc) {
- DEBUG_PRINTF("dead on arrival\n");
- mmbit_unset(active, m->kilo_count, i);
- return;
- }
- dstate->active[i].limit = lim;
-
- /* setup puffette, find next trigger */
- dstate->active[i].curr = get_init_puff(m, &kp[i]);
- if (dstate->active[i].curr[1].report != INVALID_REPORT) {
- u32 next_trigger = dstate->active[i].curr[1].repeats - 1ULL + loc;
- lim = MIN(lim, next_trigger);
- }
-
- assert(dstate->active[i].curr[0].repeats == 1
- || dstate->active[i].curr[0].report == INVALID_REPORT);
- if (dstate->active[i].curr[0].repeats == 1) {
- DEBUG_PRINTF("yippee\n");
- mmbit_set(reporters, m->kilo_count, i);
- }
-
- assert(lim > (u64a)loc);
-
- /* add to pq */
- if (lim != length) {
- struct mpv_pq_item temp = {
- .trigger_loc = lim,
- .kilo = i
- };
-
- DEBUG_PRINTF("push for %u at %llu\n", i, lim);
- pq_insert(pq, dstate->pq_size, temp);
- ++dstate->pq_size;
- }
-}
-
-static really_inline
-void killKilo(const struct mpv *m, u8 *active, u8 *reporters,
- struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, u32 i) {
- DEBUG_PRINTF("squashing kilo %u (progress %llu, limit %llu)\n",
- i, pq_top(pq)->trigger_loc, dstate->active[i].limit);
- mmbit_unset(active, m->kilo_count, i);
- mmbit_unset(reporters, m->kilo_count, i);
-
- pq_pop(pq, dstate->pq_size);
- dstate->pq_size--;
-}
-
-static really_inline
-void updateKiloChains(const struct mpv *m, u8 *reporters,
- struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
- u64a curr_loc, size_t buf_length, u32 i) {
- const struct mpv_kilopuff *kp = (const void *)(m + 1);
- u64a counter = get_counter_value_for_kilo(dstate, &kp[i]);
-
- DEBUG_PRINTF("updating active puff for kilo %u\n", i);
- dstate->active[i].curr = update_curr_puff(m, reporters, counter,
- dstate->active[i].curr, i);
-
- u64a next_trigger = dstate->active[i].limit;
-
- if (dstate->active[i].curr[1].report != INVALID_REPORT) {
- u64a next_rep_trigger = dstate->active[i].curr[1].repeats - 1 - counter
- + curr_loc;
-
- next_trigger = MIN(next_trigger, next_rep_trigger);
- } else if (kp[i].dead_point != MPV_DEAD_VALUE) {
- u64a dp_trigger = kp[i].dead_point - counter + curr_loc;
- DEBUG_PRINTF("dead point trigger %llu\n", dp_trigger);
- if (dp_trigger < dstate->active[i].limit) {
- dstate->active[i].limit = dp_trigger;
- next_trigger = dp_trigger;
- }
- }
-
- DEBUG_PRINTF("next trigger location is %llu\n", next_trigger);
-
- if (next_trigger < buf_length) {
- assert(dstate->pq_size <= m->kilo_count);
- assert(next_trigger > pq_top(pq)->trigger_loc);
- struct mpv_pq_item temp = {
- .trigger_loc = next_trigger,
- .kilo = i
- };
-
- DEBUG_PRINTF("(replace) push for %u at %llu\n", i, next_trigger);
- pq_replace_top(pq, dstate->pq_size, temp);
- } else {
- pq_pop(pq, dstate->pq_size);
- dstate->pq_size--;
- DEBUG_PRINTF("PQ_POP\n");
- }
- DEBUG_PRINTF("pq size now %u next top %llu\n", dstate->pq_size,
- pq_top(pq)->trigger_loc);
-}
-
-static really_inline
-u8 do_single_shufti(const m128 l, const m128 h, u8 c) {
- const u8 *lo = (const u8 *)&l;
- const u8 *hi = (const u8 *)&h;
- return lo[c & 0xf] & hi[c >> 4];
-}
-
-static really_inline
-size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
- size_t length, size_t curr, u32 min_rep) {
- assert(kp->type != MPV_DOT);
-
- DEBUG_PRINTF("repeats = %u\n", min_rep);
- /* TODO: this should be replace by some sort of simd stuff */
-
- if (kp->type == MPV_VERM) {
- if (min_rep < MIN_SKIP_REPEAT) {
- return find_nverm_run(kp->u.verm.c, 0, min_rep, buf, buf + curr,
- buf + length) - buf - 1;
- }
-
- verm_restart:;
- assert(buf[curr] == kp->u.verm.c);
- size_t test = curr;
- if (curr + min_rep < length) {
- test = curr + min_rep;
- } else {
- test = length - 1;
- }
-
- while (test > curr) {
- if (buf[test] == kp->u.verm.c) {
- curr = test;
- if (curr == length - 1) {
- return curr;
- }
- goto verm_restart;
- }
- --test;
- }
- } else if (kp->type == MPV_SHUFTI) {
- m128 lo = kp->u.shuf.mask_lo;
- m128 hi = kp->u.shuf.mask_hi;
- shuf_restart:
- assert(do_single_shufti(lo, hi, buf[curr]));
- size_t test = curr;
- if (curr + min_rep < length) {
- test = curr + min_rep;
- } else {
- test = length - 1;
- }
-
- while (test > curr) {
- if (do_single_shufti(lo, hi, buf[test])) {
- DEBUG_PRINTF("updating curr from %zu to %zu\n", curr, test);
- curr = test;
- if (curr == length - 1) {
- return curr;
- }
- goto shuf_restart;
- }
- --test;
- }
- } else if (kp->type == MPV_TRUFFLE) {
- const m128 mask1 = kp->u.truffle.mask1;
- const m128 mask2 = kp->u.truffle.mask2;
- truffle_restart:;
- size_t test = curr;
- if (curr + min_rep < length) {
- test = curr + min_rep;
- } else {
- test = length - 1;
- }
-
- while (test > curr) {
- const u8 *rv = truffleExec(mask1, mask2, buf + test, buf + test + 1);
- if (rv == buf + test) {
- curr = test;
- if (curr == length - 1) {
- return curr;
- }
- goto truffle_restart;
- }
- --test;
- }
- } else if (kp->type == MPV_NVERM) {
- if (min_rep < MIN_SKIP_REPEAT) {
- return find_verm_run(kp->u.verm.c, 0, min_rep, buf, buf + curr,
- buf + length) - buf - 1;
- }
-
- nverm_restart:;
- assert(buf[curr] != kp->u.verm.c);
- size_t test = curr;
- if (curr + min_rep < length) {
- test = curr + min_rep;
- } else {
- test = length - 1;
- }
-
- while (test > curr) {
- if (buf[test] != kp->u.verm.c) {
- curr = test;
- if (curr == length - 1) {
- return curr;
- }
- goto nverm_restart;
- }
- --test;
- }
- } else {
- assert(0);
- }
-
- return curr;
-}
-
-static really_inline
-void restartKilo(const struct mpv *m, UNUSED u8 *active, u8 *reporters,
- struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
- const u8 *buf, u64a prev_limit, size_t buf_length, u32 i) {
- const struct mpv_kilopuff *kp = (const void *)(m + 1);
- assert(kp[i].auto_restart);
- assert(mmbit_isset(active, m->kilo_count, i));
-
- DEBUG_PRINTF("we got to %llu,%llu\n", prev_limit, dstate->active[i].limit);
- assert(prev_limit == dstate->active[i].limit);
-
- DEBUG_PRINTF("resetting counter\n");
-
- /* we need to ensure that the counters are upto date */
- normalize_counters(dstate, m);
-
- /* current byte is dead, will wrap to 0 after processing this byte */
- assert(MPV_DEAD_VALUE + 1 == 0);
- *get_counter_for_kilo(dstate, &kp[i]) = MPV_DEAD_VALUE;
-
- DEBUG_PRINTF("resetting puffettes\n");
- dstate->active[i].curr = get_init_puff(m, &kp[i]);
-
- assert(dstate->active[i].curr[0].report == INVALID_REPORT);
- /* TODO: handle restart .{1,}s */
-
- mmbit_unset(reporters, m->kilo_count, i);
-
- if (prev_limit != buf_length - 1) {
- size_t last_bad = find_last_bad(&kp[i], buf, buf_length, prev_limit,
- dstate->active[i].curr[1].repeats);
- assert(last_bad >= prev_limit && last_bad < buf_length);
- if (last_bad != prev_limit) {
- /* there is no point in getting restarted at this location */
- dstate->active[i].limit = last_bad;
- assert(dstate->pq_size <= m->kilo_count);
- struct mpv_pq_item temp = {
- .trigger_loc = last_bad,
- .kilo = i
- };
-
- pq_replace_top(pq, dstate->pq_size, temp);
- return;
- }
- }
-
- /* TODO: skipping would really come in handy about now */
- u64a lim;
- if (buf_length > prev_limit + 1) {
- lim = limitByReach(&kp[i], buf + prev_limit + 1,
- buf_length - (prev_limit + 1)) +
- prev_limit + 1;
- } else {
- assert(buf_length == prev_limit + 1);
- lim = buf_length;
- }
- DEBUG_PRINTF("next limit is %llu\n", lim);
-
- assert(lim > prev_limit);
-
- dstate->active[i].limit = lim;
-
- if (dstate->active[i].curr[1].report != INVALID_REPORT) {
- u32 next_trigger = dstate->active[i].curr[1].repeats + prev_limit;
- lim = MIN(lim, next_trigger);
- }
-
- DEBUG_PRINTF("next trigger for kilo at %llu\n", lim);
-
- if (lim < buf_length) {
- assert(dstate->pq_size <= m->kilo_count);
- assert(lim >= prev_limit);
- struct mpv_pq_item temp = {
- .trigger_loc = lim,
- .kilo = i
- };
-
- pq_replace_top(pq, dstate->pq_size, temp);
- } else {
- pq_pop(pq, dstate->pq_size);
- dstate->pq_size--;
- }
-}
-
-static really_inline
-void handle_events(const struct mpv *m, u8 *active, u8 *reporters,
- struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
- u64a loc, const u8 *buf, size_t buf_length) {
- const struct mpv_kilopuff *kp = (const void *)(m + 1);
-
- while (dstate->pq_size && pq_top(pq)->trigger_loc <= loc) {
- assert(pq_top(pq)->trigger_loc == loc);
-
- u32 kilo = pq_top(pq)->kilo;
-
- DEBUG_PRINTF("pop for kilo %u at %llu\n", kilo,
- pq_top(pq)->trigger_loc);
-
- if (dstate->active[kilo].limit <= loc) {
- if (!kp[kilo].auto_restart) {
- killKilo(m, active, reporters, dstate, pq, kilo);
- } else {
- restartKilo(m, active, reporters, dstate, pq, buf, loc,
- buf_length, kilo);
- }
- } else {
- updateKiloChains(m, reporters, dstate, pq, loc, buf_length, kilo);
- }
- }
-}
-
-static really_inline
-u64a find_next_limit(const struct mpv *m, u8 *active, u8 *reporters,
- struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
- const u8 *buf, u64a prev_limit, u64a ep,
- size_t buf_length) {
- u64a limit = ep;
-
- DEBUG_PRINTF("length %llu (prev %llu), pq %u\n", limit, prev_limit,
- dstate->pq_size);
-
- handle_events(m, active, reporters, dstate, pq, prev_limit, buf,
- buf_length);
-
- if (dstate->pq_size) {
- limit = MIN(pq_top(pq)->trigger_loc, limit);
- assert(limit > prev_limit);
- }
-
- DEBUG_PRINTF("limit now %llu\n", limit);
- return limit;
-}
-
-static really_inline
-char mpvExec(const struct mpv *m, u8 *active, u8 *reporters,
- struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
- const u8 *buf, s64a start, size_t length, size_t buf_length,
- u64a offsetAdj, NfaCallback cb, void *ctxt) {
- DEBUG_PRINTF("running mpv (s %lliu, l %zu, o %llu)\n",
- *get_counter_n(dstate, m, 0) + dstate->counter_adj, length,
- offsetAdj);
-
- u64a progress = start; /* progress is relative to buffer offsets */
-
- while (progress < length) {
- DEBUG_PRINTF("progress %llu\n", progress);
-
- /* find next limit and update chains */
- u64a limit = find_next_limit(m, active, reporters, dstate, pq, buf,
- progress, length, buf_length);
- assert(limit != progress);
- u64a incr = limit - progress;
- DEBUG_PRINTF("incr = %llu\n", incr);
-
- /* report matches upto next limit */
- char rv = processReportsForRange(m, reporters, dstate,
- offsetAdj + progress, limit - progress,
- cb, ctxt);
-
- if (rv != MO_CONTINUE_MATCHING) {
- DEBUG_PRINTF("mpvExec done %llu/%zu\n", progress, length);
- return rv;
- }
-
- dstate->counter_adj += incr;
- progress = limit;
- }
-
- assert(progress == length);
-
- DEBUG_PRINTF("mpvExec done\n");
- return MO_CONTINUE_MATCHING;
-}
-
-static really_inline
-void mpvLoadState(struct mpv_decomp_state *out, const struct NFA *n,
- const char *state) {
- assert(16 >= sizeof(struct mpv_decomp_kilo));
- assert(sizeof(*out) <= n->scratchStateSize);
- assert(ISALIGNED(out));
-
- const struct mpv *m = getImplNfa(n);
- const struct mpv_counter_info *counter_info = get_counter_info(m);
- u64a *counters = get_counter_n(out, m, 0);
- const char *comp_counter = state;
- for (u32 i = 0; i < m->counter_count; i++) {
- u32 counter_size = counter_info[i].counter_size;
- counters[i] = partial_load_u64a(comp_counter, counter_size);
- DEBUG_PRINTF("loaded %llu counter %u\n", counters[i], i);
- comp_counter += counter_size;
- }
-
- out->filled = 0; /* _Q_i will fill limits, curr puffetes, and populate pq
- * on first call */
- out->counter_adj = 0;
- out->pq_size = 0;
-
- u8 *reporters = (u8 *)out + m->reporter_offset;
-
- mmbit_clear(reporters, m->kilo_count);
-}
-
-static really_inline
-void mpvStoreState(const struct NFA *n, char *state,
- const struct mpv_decomp_state *in) {
- assert(ISALIGNED(in));
- const struct mpv *m = getImplNfa(n);
- const struct mpv_counter_info *counter_info = get_counter_info(m);
-
- const u64a *counters = (const u64a *)((const char *)in
- + get_counter_info(m)[0].counter_offset);
- u64a adj = in->counter_adj;
- char *comp_counter = state;
- for (u32 i = 0; i < m->counter_count; i++) {
- /* clamp counter to allow storage in smaller ints */
- u64a curr_counter = MIN(counters[i] + adj, counter_info[i].max_counter);
-
- u32 counter_size = counter_info[i].counter_size;
- partial_store_u64a(comp_counter, curr_counter, counter_size);
- DEBUG_PRINTF("stored %llu counter %u (orig %llu)\n", curr_counter, i,
- counters[i]);
- /* assert(counters[i] != MPV_DEAD_VALUE); /\* should have process 1 byte */
- /* * since a clear *\/ */
- comp_counter += counter_size;
- }
-}
-
+ DEBUG_PRINTF("bailing\n");
+ return MO_HALT_MATCHING;
+ }
+ }
+ }
+
+ return MO_CONTINUE_MATCHING;
+}
+
+/* returns last puffette that we have satisfied */
+static
+const struct mpv_puffette *get_curr_puff(const struct mpv *m,
+ const struct mpv_kilopuff *kp,
+ struct mpv_decomp_state *dstate) {
+ u64a counter = *get_counter_for_kilo(dstate, kp);
+ assert(counter != MPV_DEAD_VALUE);
+
+ const struct mpv_puffette *p = get_puff_array(m, kp);
+ DEBUG_PRINTF("looking for current puffette (counter = %llu)\n", counter);
+ DEBUG_PRINTF("next: (%u, %u)\n", p->repeats, p->report);
+ while (counter + 1 >= p->repeats && p->report != INVALID_REPORT) {
+ DEBUG_PRINTF("advancing\n");
+ ++p;
+ DEBUG_PRINTF("next: (%u, %u)\n", p->repeats, p->report);
+ }
+
+ return p - 1;
+}
+
+static
+const struct mpv_puffette *get_init_puff(const struct mpv *m,
+ const struct mpv_kilopuff *kp) {
+ const struct mpv_puffette *p = get_puff_array(m, kp);
+ while (p->repeats == 1) {
+ ++p;
+ }
+ return p - 1;
+}
+
+
+/* returns the last puffette whose repeats have been satisfied */
+static really_inline
+const struct mpv_puffette *update_curr_puff(const struct mpv *m, u8 *reporters,
+ u64a counter,
+ const struct mpv_puffette *in,
+ u32 kilo_index) {
+ assert(counter != MPV_DEAD_VALUE);
+
+ const struct mpv_puffette *p = in;
+ DEBUG_PRINTF("looking for current puffette (counter = %llu)\n", counter);
+ DEBUG_PRINTF("curr: (%u, %u)\n", p->repeats, p->report);
+ while (counter + 1 >= p[1].repeats && p[1].report != INVALID_REPORT) {
+ DEBUG_PRINTF("advancing\n");
+ ++p;
+ DEBUG_PRINTF("curr: (%u, %u)\n", p->repeats, p->report);
+ }
+
+ if (p != in) {
+ mmbit_set(reporters, m->kilo_count, kilo_index);
+ }
+
+ return p;
+}
+
+static really_inline
+size_t limitByReach(const struct mpv_kilopuff *kp, const u8 *buf,
+ size_t length) {
+ if (kp->type == MPV_VERM) {
+ return vermicelliExec(kp->u.verm.c, 0, buf, buf + length) - buf;
+ } else if (kp->type == MPV_SHUFTI) {
+ m128 mask_lo = kp->u.shuf.mask_lo;
+ m128 mask_hi = kp->u.shuf.mask_hi;
+ return shuftiExec(mask_lo, mask_hi, buf, buf + length) - buf;
+ } else if (kp->type == MPV_TRUFFLE) {
+ return truffleExec(kp->u.truffle.mask1, kp->u.truffle.mask2, buf, buf + length) - buf;
+ } else if (kp->type == MPV_NVERM) {
+ return nvermicelliExec(kp->u.verm.c, 0, buf, buf + length) - buf;
+ }
+
+ assert(kp->type == MPV_DOT);
+ return length;
+}
+
+static never_inline
+void fillLimits(const struct mpv *m, u8 *active, u8 *reporters,
+ struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
+ const u8 *buf, size_t length) {
+ DEBUG_PRINTF("filling limits %zu\n", length);
+ assert(!dstate->pq_size);
+
+ if (!length) {
+ DEBUG_PRINTF("0 length\n");
+ return;
+ }
+
+ const struct mpv_kilopuff *kp = (const void *)(m + 1);
+
+ for (u32 i = mmbit_iterate(active, m->kilo_count, MMB_INVALID);
+ i != MMB_INVALID; i = mmbit_iterate(active, m->kilo_count, i)) {
+ dstate->active[i].curr = get_curr_puff(m, &kp[i], dstate);
+ if (dstate->active[i].curr->report != INVALID_REPORT) {
+ /* this kilo puff may fire reports */
+ mmbit_set(reporters, m->kilo_count, i);
+ }
+
+ u64a lim = limitByReach(&kp[i], buf, length);
+ DEBUG_PRINTF("lim %llu/%zu\n", lim, length);
+
+ if (kp[i].dead_point != MPV_DEAD_VALUE) {
+ assert(!kp[i].auto_restart);
+ u64a counter = get_counter_value_for_kilo(dstate, &kp[i]);
+ u64a dp_trigger = kp[i].dead_point - counter;
+ if (dp_trigger < lim) {
+ DEBUG_PRINTF("dead point trigger %llu\n", dp_trigger);
+ lim = dp_trigger;
+ }
+ }
+
+ if (kp[i].auto_restart && !lim) {
+ *get_counter_for_kilo(dstate, &kp[i]) = MPV_DEAD_VALUE;
+ mmbit_unset(reporters, m->kilo_count, i);
+ /* the counter value will cause the nex_trigger calculation below to
+ * adjust correctly */
+ if (length == 1) {
+ dstate->active[i].limit = 0;
+ continue;
+ }
+
+ lim = limitByReach(&kp[i], buf + 1, length - 1) + 1;
+
+
+ /* restart active counters */
+ dstate->active[i].curr = get_init_puff(m, &kp[i]);
+ assert(dstate->active[i].curr[0].report == INVALID_REPORT);
+
+ DEBUG_PRINTF("lim now %llu/%zu\n", lim, length);
+ }
+
+ dstate->active[i].limit = lim;
+ if (!lim) {
+ mmbit_unset(active, m->kilo_count, i);
+ mmbit_unset(reporters, m->kilo_count, i);
+ continue;
+ }
+ if (dstate->active[i].curr[1].report != INVALID_REPORT) {
+ u32 next_trigger = dstate->active[i].curr[1].repeats - 1ULL
+ - *get_counter_for_kilo(dstate, &kp[i]);
+ DEBUG_PRINTF("next trigger %u\n", next_trigger);
+ lim = MIN(lim, next_trigger);
+ }
+
+ if (lim != length) {
+ struct mpv_pq_item temp = {
+ .trigger_loc = lim,
+ .kilo = i
+ };
+
+ DEBUG_PRINTF("push for %u at %llu\n", i, lim);
+ pq_insert(pq, dstate->pq_size, temp);
+ ++dstate->pq_size;
+ }
+
+ assert(lim || kp[i].auto_restart);
+ }
+
+ DEBUG_PRINTF("filled\n");
+ dstate->filled = 1;
+}
+
+static never_inline
+void handleTopN(const struct mpv *m, s64a loc, u8 *active, u8 *reporters,
+ struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
+ const u8 *buf, size_t length, u32 i) {
+ assert(i < m->kilo_count);
+ DEBUG_PRINTF("MQE_TOP + %u @%lld\n", i, loc);
+ if (mmbit_set(active, m->kilo_count, i)) {
+ DEBUG_PRINTF("kilo is already alive and kicking\n");
+ return;
+ }
+
+ const struct mpv_kilopuff *kp = (const struct mpv_kilopuff *)(m + 1);
+
+ assert(!kp[i].auto_restart); /* handle later/never */
+
+ /* we need to ensure that the counters are upto date */
+ normalize_counters(dstate, m);
+
+ /* reset counter */
+ *get_counter_for_kilo(dstate, &kp[i]) = 0;
+
+ if ((size_t)loc == length) {
+ /* end of buffer, just make sure it is active */
+ dstate->active[i].limit = loc;
+ dstate->active[i].curr = get_init_puff(m, &kp[i]);
+ return;
+ }
+
+ /* find the limit */
+ u64a lim = limitByReach(&kp[i], buf + loc, length - loc) + loc;
+
+ /* no need to worry about dead_point triggers here as kilopuff must first
+ * update chain (to fire a report) before it goes dead. */
+
+ if (lim == (u64a)loc) {
+ DEBUG_PRINTF("dead on arrival\n");
+ mmbit_unset(active, m->kilo_count, i);
+ return;
+ }
+ dstate->active[i].limit = lim;
+
+ /* setup puffette, find next trigger */
+ dstate->active[i].curr = get_init_puff(m, &kp[i]);
+ if (dstate->active[i].curr[1].report != INVALID_REPORT) {
+ u32 next_trigger = dstate->active[i].curr[1].repeats - 1ULL + loc;
+ lim = MIN(lim, next_trigger);
+ }
+
+ assert(dstate->active[i].curr[0].repeats == 1
+ || dstate->active[i].curr[0].report == INVALID_REPORT);
+ if (dstate->active[i].curr[0].repeats == 1) {
+ DEBUG_PRINTF("yippee\n");
+ mmbit_set(reporters, m->kilo_count, i);
+ }
+
+ assert(lim > (u64a)loc);
+
+ /* add to pq */
+ if (lim != length) {
+ struct mpv_pq_item temp = {
+ .trigger_loc = lim,
+ .kilo = i
+ };
+
+ DEBUG_PRINTF("push for %u at %llu\n", i, lim);
+ pq_insert(pq, dstate->pq_size, temp);
+ ++dstate->pq_size;
+ }
+}
+
+static really_inline
+void killKilo(const struct mpv *m, u8 *active, u8 *reporters,
+ struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, u32 i) {
+ DEBUG_PRINTF("squashing kilo %u (progress %llu, limit %llu)\n",
+ i, pq_top(pq)->trigger_loc, dstate->active[i].limit);
+ mmbit_unset(active, m->kilo_count, i);
+ mmbit_unset(reporters, m->kilo_count, i);
+
+ pq_pop(pq, dstate->pq_size);
+ dstate->pq_size--;
+}
+
+static really_inline
+void updateKiloChains(const struct mpv *m, u8 *reporters,
+ struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
+ u64a curr_loc, size_t buf_length, u32 i) {
+ const struct mpv_kilopuff *kp = (const void *)(m + 1);
+ u64a counter = get_counter_value_for_kilo(dstate, &kp[i]);
+
+ DEBUG_PRINTF("updating active puff for kilo %u\n", i);
+ dstate->active[i].curr = update_curr_puff(m, reporters, counter,
+ dstate->active[i].curr, i);
+
+ u64a next_trigger = dstate->active[i].limit;
+
+ if (dstate->active[i].curr[1].report != INVALID_REPORT) {
+ u64a next_rep_trigger = dstate->active[i].curr[1].repeats - 1 - counter
+ + curr_loc;
+
+ next_trigger = MIN(next_trigger, next_rep_trigger);
+ } else if (kp[i].dead_point != MPV_DEAD_VALUE) {
+ u64a dp_trigger = kp[i].dead_point - counter + curr_loc;
+ DEBUG_PRINTF("dead point trigger %llu\n", dp_trigger);
+ if (dp_trigger < dstate->active[i].limit) {
+ dstate->active[i].limit = dp_trigger;
+ next_trigger = dp_trigger;
+ }
+ }
+
+ DEBUG_PRINTF("next trigger location is %llu\n", next_trigger);
+
+ if (next_trigger < buf_length) {
+ assert(dstate->pq_size <= m->kilo_count);
+ assert(next_trigger > pq_top(pq)->trigger_loc);
+ struct mpv_pq_item temp = {
+ .trigger_loc = next_trigger,
+ .kilo = i
+ };
+
+ DEBUG_PRINTF("(replace) push for %u at %llu\n", i, next_trigger);
+ pq_replace_top(pq, dstate->pq_size, temp);
+ } else {
+ pq_pop(pq, dstate->pq_size);
+ dstate->pq_size--;
+ DEBUG_PRINTF("PQ_POP\n");
+ }
+ DEBUG_PRINTF("pq size now %u next top %llu\n", dstate->pq_size,
+ pq_top(pq)->trigger_loc);
+}
+
+static really_inline
+u8 do_single_shufti(const m128 l, const m128 h, u8 c) {
+ const u8 *lo = (const u8 *)&l;
+ const u8 *hi = (const u8 *)&h;
+ return lo[c & 0xf] & hi[c >> 4];
+}
+
+static really_inline
+size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf,
+ size_t length, size_t curr, u32 min_rep) {
+ assert(kp->type != MPV_DOT);
+
+ DEBUG_PRINTF("repeats = %u\n", min_rep);
+ /* TODO: this should be replace by some sort of simd stuff */
+
+ if (kp->type == MPV_VERM) {
+ if (min_rep < MIN_SKIP_REPEAT) {
+ return find_nverm_run(kp->u.verm.c, 0, min_rep, buf, buf + curr,
+ buf + length) - buf - 1;
+ }
+
+ verm_restart:;
+ assert(buf[curr] == kp->u.verm.c);
+ size_t test = curr;
+ if (curr + min_rep < length) {
+ test = curr + min_rep;
+ } else {
+ test = length - 1;
+ }
+
+ while (test > curr) {
+ if (buf[test] == kp->u.verm.c) {
+ curr = test;
+ if (curr == length - 1) {
+ return curr;
+ }
+ goto verm_restart;
+ }
+ --test;
+ }
+ } else if (kp->type == MPV_SHUFTI) {
+ m128 lo = kp->u.shuf.mask_lo;
+ m128 hi = kp->u.shuf.mask_hi;
+ shuf_restart:
+ assert(do_single_shufti(lo, hi, buf[curr]));
+ size_t test = curr;
+ if (curr + min_rep < length) {
+ test = curr + min_rep;
+ } else {
+ test = length - 1;
+ }
+
+ while (test > curr) {
+ if (do_single_shufti(lo, hi, buf[test])) {
+ DEBUG_PRINTF("updating curr from %zu to %zu\n", curr, test);
+ curr = test;
+ if (curr == length - 1) {
+ return curr;
+ }
+ goto shuf_restart;
+ }
+ --test;
+ }
+ } else if (kp->type == MPV_TRUFFLE) {
+ const m128 mask1 = kp->u.truffle.mask1;
+ const m128 mask2 = kp->u.truffle.mask2;
+ truffle_restart:;
+ size_t test = curr;
+ if (curr + min_rep < length) {
+ test = curr + min_rep;
+ } else {
+ test = length - 1;
+ }
+
+ while (test > curr) {
+ const u8 *rv = truffleExec(mask1, mask2, buf + test, buf + test + 1);
+ if (rv == buf + test) {
+ curr = test;
+ if (curr == length - 1) {
+ return curr;
+ }
+ goto truffle_restart;
+ }
+ --test;
+ }
+ } else if (kp->type == MPV_NVERM) {
+ if (min_rep < MIN_SKIP_REPEAT) {
+ return find_verm_run(kp->u.verm.c, 0, min_rep, buf, buf + curr,
+ buf + length) - buf - 1;
+ }
+
+ nverm_restart:;
+ assert(buf[curr] != kp->u.verm.c);
+ size_t test = curr;
+ if (curr + min_rep < length) {
+ test = curr + min_rep;
+ } else {
+ test = length - 1;
+ }
+
+ while (test > curr) {
+ if (buf[test] != kp->u.verm.c) {
+ curr = test;
+ if (curr == length - 1) {
+ return curr;
+ }
+ goto nverm_restart;
+ }
+ --test;
+ }
+ } else {
+ assert(0);
+ }
+
+ return curr;
+}
+
+static really_inline
+void restartKilo(const struct mpv *m, UNUSED u8 *active, u8 *reporters,
+ struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
+ const u8 *buf, u64a prev_limit, size_t buf_length, u32 i) {
+ const struct mpv_kilopuff *kp = (const void *)(m + 1);
+ assert(kp[i].auto_restart);
+ assert(mmbit_isset(active, m->kilo_count, i));
+
+ DEBUG_PRINTF("we got to %llu,%llu\n", prev_limit, dstate->active[i].limit);
+ assert(prev_limit == dstate->active[i].limit);
+
+ DEBUG_PRINTF("resetting counter\n");
+
+ /* we need to ensure that the counters are upto date */
+ normalize_counters(dstate, m);
+
+ /* current byte is dead, will wrap to 0 after processing this byte */
+ assert(MPV_DEAD_VALUE + 1 == 0);
+ *get_counter_for_kilo(dstate, &kp[i]) = MPV_DEAD_VALUE;
+
+ DEBUG_PRINTF("resetting puffettes\n");
+ dstate->active[i].curr = get_init_puff(m, &kp[i]);
+
+ assert(dstate->active[i].curr[0].report == INVALID_REPORT);
+ /* TODO: handle restart .{1,}s */
+
+ mmbit_unset(reporters, m->kilo_count, i);
+
+ if (prev_limit != buf_length - 1) {
+ size_t last_bad = find_last_bad(&kp[i], buf, buf_length, prev_limit,
+ dstate->active[i].curr[1].repeats);
+ assert(last_bad >= prev_limit && last_bad < buf_length);
+ if (last_bad != prev_limit) {
+ /* there is no point in getting restarted at this location */
+ dstate->active[i].limit = last_bad;
+ assert(dstate->pq_size <= m->kilo_count);
+ struct mpv_pq_item temp = {
+ .trigger_loc = last_bad,
+ .kilo = i
+ };
+
+ pq_replace_top(pq, dstate->pq_size, temp);
+ return;
+ }
+ }
+
+ /* TODO: skipping would really come in handy about now */
+ u64a lim;
+ if (buf_length > prev_limit + 1) {
+ lim = limitByReach(&kp[i], buf + prev_limit + 1,
+ buf_length - (prev_limit + 1)) +
+ prev_limit + 1;
+ } else {
+ assert(buf_length == prev_limit + 1);
+ lim = buf_length;
+ }
+ DEBUG_PRINTF("next limit is %llu\n", lim);
+
+ assert(lim > prev_limit);
+
+ dstate->active[i].limit = lim;
+
+ if (dstate->active[i].curr[1].report != INVALID_REPORT) {
+ u32 next_trigger = dstate->active[i].curr[1].repeats + prev_limit;
+ lim = MIN(lim, next_trigger);
+ }
+
+ DEBUG_PRINTF("next trigger for kilo at %llu\n", lim);
+
+ if (lim < buf_length) {
+ assert(dstate->pq_size <= m->kilo_count);
+ assert(lim >= prev_limit);
+ struct mpv_pq_item temp = {
+ .trigger_loc = lim,
+ .kilo = i
+ };
+
+ pq_replace_top(pq, dstate->pq_size, temp);
+ } else {
+ pq_pop(pq, dstate->pq_size);
+ dstate->pq_size--;
+ }
+}
+
+static really_inline
+void handle_events(const struct mpv *m, u8 *active, u8 *reporters,
+ struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
+ u64a loc, const u8 *buf, size_t buf_length) {
+ const struct mpv_kilopuff *kp = (const void *)(m + 1);
+
+ while (dstate->pq_size && pq_top(pq)->trigger_loc <= loc) {
+ assert(pq_top(pq)->trigger_loc == loc);
+
+ u32 kilo = pq_top(pq)->kilo;
+
+ DEBUG_PRINTF("pop for kilo %u at %llu\n", kilo,
+ pq_top(pq)->trigger_loc);
+
+ if (dstate->active[kilo].limit <= loc) {
+ if (!kp[kilo].auto_restart) {
+ killKilo(m, active, reporters, dstate, pq, kilo);
+ } else {
+ restartKilo(m, active, reporters, dstate, pq, buf, loc,
+ buf_length, kilo);
+ }
+ } else {
+ updateKiloChains(m, reporters, dstate, pq, loc, buf_length, kilo);
+ }
+ }
+}
+
+static really_inline
+u64a find_next_limit(const struct mpv *m, u8 *active, u8 *reporters,
+ struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
+ const u8 *buf, u64a prev_limit, u64a ep,
+ size_t buf_length) {
+ u64a limit = ep;
+
+ DEBUG_PRINTF("length %llu (prev %llu), pq %u\n", limit, prev_limit,
+ dstate->pq_size);
+
+ handle_events(m, active, reporters, dstate, pq, prev_limit, buf,
+ buf_length);
+
+ if (dstate->pq_size) {
+ limit = MIN(pq_top(pq)->trigger_loc, limit);
+ assert(limit > prev_limit);
+ }
+
+ DEBUG_PRINTF("limit now %llu\n", limit);
+ return limit;
+}
+
+static really_inline
+char mpvExec(const struct mpv *m, u8 *active, u8 *reporters,
+ struct mpv_decomp_state *dstate, struct mpv_pq_item *pq,
+ const u8 *buf, s64a start, size_t length, size_t buf_length,
+ u64a offsetAdj, NfaCallback cb, void *ctxt) {
+ DEBUG_PRINTF("running mpv (s %lliu, l %zu, o %llu)\n",
+ *get_counter_n(dstate, m, 0) + dstate->counter_adj, length,
+ offsetAdj);
+
+ u64a progress = start; /* progress is relative to buffer offsets */
+
+ while (progress < length) {
+ DEBUG_PRINTF("progress %llu\n", progress);
+
+ /* find next limit and update chains */
+ u64a limit = find_next_limit(m, active, reporters, dstate, pq, buf,
+ progress, length, buf_length);
+ assert(limit != progress);
+ u64a incr = limit - progress;
+ DEBUG_PRINTF("incr = %llu\n", incr);
+
+ /* report matches upto next limit */
+ char rv = processReportsForRange(m, reporters, dstate,
+ offsetAdj + progress, limit - progress,
+ cb, ctxt);
+
+ if (rv != MO_CONTINUE_MATCHING) {
+ DEBUG_PRINTF("mpvExec done %llu/%zu\n", progress, length);
+ return rv;
+ }
+
+ dstate->counter_adj += incr;
+ progress = limit;
+ }
+
+ assert(progress == length);
+
+ DEBUG_PRINTF("mpvExec done\n");
+ return MO_CONTINUE_MATCHING;
+}
+
+static really_inline
+void mpvLoadState(struct mpv_decomp_state *out, const struct NFA *n,
+ const char *state) {
+ assert(16 >= sizeof(struct mpv_decomp_kilo));
+ assert(sizeof(*out) <= n->scratchStateSize);
+ assert(ISALIGNED(out));
+
+ const struct mpv *m = getImplNfa(n);
+ const struct mpv_counter_info *counter_info = get_counter_info(m);
+ u64a *counters = get_counter_n(out, m, 0);
+ const char *comp_counter = state;
+ for (u32 i = 0; i < m->counter_count; i++) {
+ u32 counter_size = counter_info[i].counter_size;
+ counters[i] = partial_load_u64a(comp_counter, counter_size);
+ DEBUG_PRINTF("loaded %llu counter %u\n", counters[i], i);
+ comp_counter += counter_size;
+ }
+
+ out->filled = 0; /* _Q_i will fill limits, curr puffetes, and populate pq
+ * on first call */
+ out->counter_adj = 0;
+ out->pq_size = 0;
+
+ u8 *reporters = (u8 *)out + m->reporter_offset;
+
+ mmbit_clear(reporters, m->kilo_count);
+}
+
+static really_inline
+void mpvStoreState(const struct NFA *n, char *state,
+ const struct mpv_decomp_state *in) {
+ assert(ISALIGNED(in));
+ const struct mpv *m = getImplNfa(n);
+ const struct mpv_counter_info *counter_info = get_counter_info(m);
+
+ const u64a *counters = (const u64a *)((const char *)in
+ + get_counter_info(m)[0].counter_offset);
+ u64a adj = in->counter_adj;
+ char *comp_counter = state;
+ for (u32 i = 0; i < m->counter_count; i++) {
+ /* clamp counter to allow storage in smaller ints */
+ u64a curr_counter = MIN(counters[i] + adj, counter_info[i].max_counter);
+
+ u32 counter_size = counter_info[i].counter_size;
+ partial_store_u64a(comp_counter, curr_counter, counter_size);
+ DEBUG_PRINTF("stored %llu counter %u (orig %llu)\n", curr_counter, i,
+ counters[i]);
+ /* assert(counters[i] != MPV_DEAD_VALUE); /\* should have process 1 byte */
+ /* * since a clear *\/ */
+ comp_counter += counter_size;
+ }
+}
+
char nfaExecMpv_queueCompressState(const struct NFA *nfa, const struct mq *q,
UNUSED s64a loc) {
- void *dest = q->streamState;
- const void *src = q->state;
- mpvStoreState(nfa, dest, src);
- return 0;
-}
-
+ void *dest = q->streamState;
+ const void *src = q->state;
+ mpvStoreState(nfa, dest, src);
+ return 0;
+}
+
char nfaExecMpv_expandState(const struct NFA *nfa, void *dest, const void *src,
UNUSED u64a offset, UNUSED u8 key) {
- mpvLoadState(dest, nfa, src);
- return 0;
-}
-
+ mpvLoadState(dest, nfa, src);
+ return 0;
+}
+
char nfaExecMpv_reportCurrent(const struct NFA *n, struct mq *q) {
- const struct mpv *m = getImplNfa(n);
- u64a offset = q_cur_offset(q);
- struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state;
-
- DEBUG_PRINTF("report current: offset %llu\n", offset);
-
- u8 *active = (u8 *)q->streamState + m->active_offset;
- u32 rl_count = 0;
- ReportID *rl = get_report_list(m, s);
-
- processReports(m, active, s, s->counter_adj, offset, q->cb, q->context, rl,
- &rl_count);
- return 0;
-}
-
+ const struct mpv *m = getImplNfa(n);
+ u64a offset = q_cur_offset(q);
+ struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state;
+
+ DEBUG_PRINTF("report current: offset %llu\n", offset);
+
+ u8 *active = (u8 *)q->streamState + m->active_offset;
+ u32 rl_count = 0;
+ ReportID *rl = get_report_list(m, s);
+
+ processReports(m, active, s, s->counter_adj, offset, q->cb, q->context, rl,
+ &rl_count);
+ return 0;
+}
+
char nfaExecMpv_queueInitState(const struct NFA *n, struct mq *q) {
- struct mpv_decomp_state *out = (void *)q->state;
- const struct mpv *m = getImplNfa(n);
- assert(sizeof(*out) <= n->scratchStateSize);
-
- DEBUG_PRINTF("queue init state\n");
-
- u64a *counters = get_counter_n(out, m, 0);
- for (u32 i = 0; i < m->counter_count; i++) {
- counters[i] = MPV_DEAD_VALUE;
- }
-
- out->filled = 0;
- out->counter_adj = 0;
- out->pq_size = 0;
- out->active[0].curr = NULL;
-
- assert(q->streamState);
- u8 *active_kpuff = (u8 *)q->streamState + m->active_offset;
- u8 *reporters = (u8 *)q->state + m->reporter_offset;
- mmbit_clear(active_kpuff, m->kilo_count);
- mmbit_clear(reporters, m->kilo_count);
- return 0;
-}
-
+ struct mpv_decomp_state *out = (void *)q->state;
+ const struct mpv *m = getImplNfa(n);
+ assert(sizeof(*out) <= n->scratchStateSize);
+
+ DEBUG_PRINTF("queue init state\n");
+
+ u64a *counters = get_counter_n(out, m, 0);
+ for (u32 i = 0; i < m->counter_count; i++) {
+ counters[i] = MPV_DEAD_VALUE;
+ }
+
+ out->filled = 0;
+ out->counter_adj = 0;
+ out->pq_size = 0;
+ out->active[0].curr = NULL;
+
+ assert(q->streamState);
+ u8 *active_kpuff = (u8 *)q->streamState + m->active_offset;
+ u8 *reporters = (u8 *)q->state + m->reporter_offset;
+ mmbit_clear(active_kpuff, m->kilo_count);
+ mmbit_clear(reporters, m->kilo_count);
+ return 0;
+}
+
char nfaExecMpv_initCompressedState(const struct NFA *n, u64a offset,
void *state, UNUSED u8 key) {
- const struct mpv *m = getImplNfa(n);
- memset(state, 0, m->active_offset); /* active_offset marks end of comp
- * counters */
- u8 *active_kpuff = (u8 *)state + m->active_offset;
- if (!offset) {
- mmbit_init_range(active_kpuff, m->kilo_count, m->top_kilo_begin,
- m->top_kilo_end);
- return 1;
- } else {
- return 0;
- }
-}
-
-static really_inline
+ const struct mpv *m = getImplNfa(n);
+ memset(state, 0, m->active_offset); /* active_offset marks end of comp
+ * counters */
+ u8 *active_kpuff = (u8 *)state + m->active_offset;
+ if (!offset) {
+ mmbit_init_range(active_kpuff, m->kilo_count, m->top_kilo_begin,
+ m->top_kilo_end);
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static really_inline
char nfaExecMpv_Q_i(const struct NFA *n, struct mq *q, s64a end) {
- u64a offset = q->offset;
- const u8 *buffer = q->buffer;
- size_t length = q->length;
- NfaCallback cb = q->cb;
- void *context = q->context;
- s64a sp;
- const struct mpv *m = getImplNfa(n);
- struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state;
- u8 *active = (u8 *)q->streamState + m->active_offset;
- u8 *reporters = (u8 *)q->state + m->reporter_offset;
- struct mpv_pq_item *pq = (struct mpv_pq_item *)(q->state + m->pq_offset);
-
- if (!s->filled) {
- fillLimits(m, active, reporters, s, pq, q->buffer, q->length);
- }
-
- assert(!q->report_current);
-
- if (q->cur == q->end) {
- return 1;
- }
-
- assert(q->cur + 1 < q->end); /* require at least two items */
-
- assert(q_cur_type(q) == MQE_START);
- assert(q_cur_loc(q) >= 0);
- sp = q->items[q->cur].location;
- q->cur++;
-
- if (q->items[q->cur - 1].location > end) {
- /* this is as far as we go */
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = end;
- return MO_ALIVE;
- }
-
- while (q->cur < q->end) {
- s64a ep = q->items[q->cur].location;
-
- ep = MIN(ep, end);
-
- assert(ep >= sp);
-
- assert(sp >= 0); /* mpv should be an outfix; outfixes are not lazy */
-
- if (sp >= ep) {
- goto scan_done;
- }
-
- /* do main buffer region */
- assert((u64a)ep <= length);
- char rv = mpvExec(m, active, reporters, s, pq, buffer, sp, ep, length,
- offset, cb, context);
- if (rv == MO_HALT_MATCHING) {
- q->cur = q->end;
- return 0;
- }
-
- scan_done:
- if (q->items[q->cur].location > end) {
- /* this is as far as we go */
- q->cur--;
- q->items[q->cur].type = MQE_START;
- q->items[q->cur].location = end;
- return MO_ALIVE;
- }
-
- sp = ep;
-
- switch (q->items[q->cur].type) {
- case MQE_TOP:
- DEBUG_PRINTF("top %u %u\n", m->top_kilo_begin, m->top_kilo_end);
- /* MQE_TOP initialise all counters to 0; activates all kilos */
- {
- u64a *counters = get_counter_n(s, m, 0);
- assert(counters[0] == MPV_DEAD_VALUE);
- assert(!s->counter_adj);
- for (u32 i = 0; i < m->counter_count; i++) {
- counters[i] = 0;
- }
- mmbit_init_range(active, m->kilo_count, m->top_kilo_begin,
- m->top_kilo_end);
- fillLimits(m, active, reporters, s, pq, buffer, length);
- }
- break;
- case MQE_START:
- case MQE_END:
- break;
- default:
- /* MQE_TOP_N --> switch on kilo puff N */
- assert(q->items[q->cur].type >= MQE_TOP_FIRST);
- assert(q->items[q->cur].type < MQE_INVALID);
- u32 i = q->items[q->cur].type - MQE_TOP_FIRST;
- handleTopN(m, sp, active, reporters, s, pq, buffer, length, i);
- break;
- }
-
- q->cur++;
- }
-
- char alive = 0;
- assert(q->items[q->cur - 1].type == MQE_END);
- if (q->items[q->cur - 1].location == (s64a)q->length) {
- normalize_counters(s, m);
-
- const struct mpv_kilopuff *kp = (const struct mpv_kilopuff *)(m + 1);
- for (u32 i = mmbit_iterate(active, m->kilo_count, MMB_INVALID);
- i != MMB_INVALID; i = mmbit_iterate(active, m->kilo_count, i)) {
- if (*get_counter_for_kilo(s, &kp[i]) >= kp[i].dead_point) {
- mmbit_unset(active, m->kilo_count, i);
- } else {
- alive = 1;
- }
- }
- } else {
- alive
- = mmbit_iterate(active, m->kilo_count, MMB_INVALID) != MMB_INVALID;
- }
-
- DEBUG_PRINTF("finished %d\n", (int)alive);
- return alive;
-}
-
+ u64a offset = q->offset;
+ const u8 *buffer = q->buffer;
+ size_t length = q->length;
+ NfaCallback cb = q->cb;
+ void *context = q->context;
+ s64a sp;
+ const struct mpv *m = getImplNfa(n);
+ struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state;
+ u8 *active = (u8 *)q->streamState + m->active_offset;
+ u8 *reporters = (u8 *)q->state + m->reporter_offset;
+ struct mpv_pq_item *pq = (struct mpv_pq_item *)(q->state + m->pq_offset);
+
+ if (!s->filled) {
+ fillLimits(m, active, reporters, s, pq, q->buffer, q->length);
+ }
+
+ assert(!q->report_current);
+
+ if (q->cur == q->end) {
+ return 1;
+ }
+
+ assert(q->cur + 1 < q->end); /* require at least two items */
+
+ assert(q_cur_type(q) == MQE_START);
+ assert(q_cur_loc(q) >= 0);
+ sp = q->items[q->cur].location;
+ q->cur++;
+
+ if (q->items[q->cur - 1].location > end) {
+ /* this is as far as we go */
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ return MO_ALIVE;
+ }
+
+ while (q->cur < q->end) {
+ s64a ep = q->items[q->cur].location;
+
+ ep = MIN(ep, end);
+
+ assert(ep >= sp);
+
+ assert(sp >= 0); /* mpv should be an outfix; outfixes are not lazy */
+
+ if (sp >= ep) {
+ goto scan_done;
+ }
+
+ /* do main buffer region */
+ assert((u64a)ep <= length);
+ char rv = mpvExec(m, active, reporters, s, pq, buffer, sp, ep, length,
+ offset, cb, context);
+ if (rv == MO_HALT_MATCHING) {
+ q->cur = q->end;
+ return 0;
+ }
+
+ scan_done:
+ if (q->items[q->cur].location > end) {
+ /* this is as far as we go */
+ q->cur--;
+ q->items[q->cur].type = MQE_START;
+ q->items[q->cur].location = end;
+ return MO_ALIVE;
+ }
+
+ sp = ep;
+
+ switch (q->items[q->cur].type) {
+ case MQE_TOP:
+ DEBUG_PRINTF("top %u %u\n", m->top_kilo_begin, m->top_kilo_end);
+ /* MQE_TOP initialise all counters to 0; activates all kilos */
+ {
+ u64a *counters = get_counter_n(s, m, 0);
+ assert(counters[0] == MPV_DEAD_VALUE);
+ assert(!s->counter_adj);
+ for (u32 i = 0; i < m->counter_count; i++) {
+ counters[i] = 0;
+ }
+ mmbit_init_range(active, m->kilo_count, m->top_kilo_begin,
+ m->top_kilo_end);
+ fillLimits(m, active, reporters, s, pq, buffer, length);
+ }
+ break;
+ case MQE_START:
+ case MQE_END:
+ break;
+ default:
+ /* MQE_TOP_N --> switch on kilo puff N */
+ assert(q->items[q->cur].type >= MQE_TOP_FIRST);
+ assert(q->items[q->cur].type < MQE_INVALID);
+ u32 i = q->items[q->cur].type - MQE_TOP_FIRST;
+ handleTopN(m, sp, active, reporters, s, pq, buffer, length, i);
+ break;
+ }
+
+ q->cur++;
+ }
+
+ char alive = 0;
+ assert(q->items[q->cur - 1].type == MQE_END);
+ if (q->items[q->cur - 1].location == (s64a)q->length) {
+ normalize_counters(s, m);
+
+ const struct mpv_kilopuff *kp = (const struct mpv_kilopuff *)(m + 1);
+ for (u32 i = mmbit_iterate(active, m->kilo_count, MMB_INVALID);
+ i != MMB_INVALID; i = mmbit_iterate(active, m->kilo_count, i)) {
+ if (*get_counter_for_kilo(s, &kp[i]) >= kp[i].dead_point) {
+ mmbit_unset(active, m->kilo_count, i);
+ } else {
+ alive = 1;
+ }
+ }
+ } else {
+ alive
+ = mmbit_iterate(active, m->kilo_count, MMB_INVALID) != MMB_INVALID;
+ }
+
+ DEBUG_PRINTF("finished %d\n", (int)alive);
+ return alive;
+}
+
char nfaExecMpv_Q(const struct NFA *n, struct mq *q, s64a end) {
- DEBUG_PRINTF("_Q %lld\n", end);
+ DEBUG_PRINTF("_Q %lld\n", end);
return nfaExecMpv_Q_i(n, q, end);
-}
-
+}
+
s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) {
- DEBUG_PRINTF("nfa=%p end=%lld\n", nfa, end);
-#ifdef DEBUG
- debugQueue(q);
-#endif
-
+ DEBUG_PRINTF("nfa=%p end=%lld\n", nfa, end);
+#ifdef DEBUG
+ debugQueue(q);
+#endif
+
assert(nfa->type == MPV_NFA);
- assert(q && q->context && q->state);
- assert(end >= 0);
- assert(q->cur < q->end);
- assert(q->end <= MAX_MQE_LEN);
- assert(ISALIGNED_16(nfa) && ISALIGNED_16(getImplNfa(nfa)));
- assert(end < q->items[q->end - 1].location
- || q->items[q->end - 1].type == MQE_END);
-
- if (q->items[q->cur].location > end) {
- return 1;
- }
-
- char q_trimmed = 0;
-
- assert(end <= (s64a)q->length || !q->hlength);
- /* due to reverse accel in block mode some queues may work on a truncated
- * buffer */
- if (end > (s64a)q->length) {
- end = q->length;
- q_trimmed = 1;
- }
-
- /* TODO: restore max offset stuff, if/when _interesting_ max offset stuff
- * is filled in */
-
+ assert(q && q->context && q->state);
+ assert(end >= 0);
+ assert(q->cur < q->end);
+ assert(q->end <= MAX_MQE_LEN);
+ assert(ISALIGNED_16(nfa) && ISALIGNED_16(getImplNfa(nfa)));
+ assert(end < q->items[q->end - 1].location
+ || q->items[q->end - 1].type == MQE_END);
+
+ if (q->items[q->cur].location > end) {
+ return 1;
+ }
+
+ char q_trimmed = 0;
+
+ assert(end <= (s64a)q->length || !q->hlength);
+ /* due to reverse accel in block mode some queues may work on a truncated
+ * buffer */
+ if (end > (s64a)q->length) {
+ end = q->length;
+ q_trimmed = 1;
+ }
+
+ /* TODO: restore max offset stuff, if/when _interesting_ max offset stuff
+ * is filled in */
+
char rv = nfaExecMpv_Q_i(nfa, q, end);
-
- assert(!q->report_current);
- DEBUG_PRINTF("returned rv=%d, q_trimmed=%d\n", rv, q_trimmed);
- if (q_trimmed || !rv) {
- return 0;
- } else {
- const struct mpv *m = getImplNfa(nfa);
- u8 *reporters = (u8 *)q->state + m->reporter_offset;
-
- if (mmbit_any_precise(reporters, m->kilo_count)) {
- DEBUG_PRINTF("next byte\n");
- return 1; /* need to match at next byte */
- } else {
- s64a next_event = q->length;
- s64a next_pq = q->length;
-
- if (q->cur < q->end) {
- next_event = q->items[q->cur].location;
- }
-
- struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state;
- struct mpv_pq_item *pq
- = (struct mpv_pq_item *)(q->state + m->pq_offset);
- if (s->pq_size) {
- next_pq = pq_top(pq)->trigger_loc;
- }
-
- assert(next_event);
- assert(next_pq);
-
- DEBUG_PRINTF("next pq %lld event %lld\n", next_pq, next_event);
- return MIN(next_pq, next_event);
- }
- }
-}
+
+ assert(!q->report_current);
+ DEBUG_PRINTF("returned rv=%d, q_trimmed=%d\n", rv, q_trimmed);
+ if (q_trimmed || !rv) {
+ return 0;
+ } else {
+ const struct mpv *m = getImplNfa(nfa);
+ u8 *reporters = (u8 *)q->state + m->reporter_offset;
+
+ if (mmbit_any_precise(reporters, m->kilo_count)) {
+ DEBUG_PRINTF("next byte\n");
+ return 1; /* need to match at next byte */
+ } else {
+ s64a next_event = q->length;
+ s64a next_pq = q->length;
+
+ if (q->cur < q->end) {
+ next_event = q->items[q->cur].location;
+ }
+
+ struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state;
+ struct mpv_pq_item *pq
+ = (struct mpv_pq_item *)(q->state + m->pq_offset);
+ if (s->pq_size) {
+ next_pq = pq_top(pq)->trigger_loc;
+ }
+
+ assert(next_event);
+ assert(next_pq);
+
+ DEBUG_PRINTF("next pq %lld event %lld\n", next_pq, next_event);
+ return MIN(next_pq, next_event);
+ }
+ }
+}
diff --git a/contrib/libs/hyperscan/src/nfa/mpv.h b/contrib/libs/hyperscan/src/nfa/mpv.h
index 244dfe800d..3780728d7f 100644
--- a/contrib/libs/hyperscan/src/nfa/mpv.h
+++ b/contrib/libs/hyperscan/src/nfa/mpv.h
@@ -1,39 +1,39 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MPV_H
-#define MPV_H
-
-#include "ue2common.h"
-
-struct mq;
-struct NFA;
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MPV_H
+#define MPV_H
+
+#include "ue2common.h"
+
+struct mq;
+struct NFA;
+
char nfaExecMpv_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecMpv_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecMpv_queueInitState(const struct NFA *n, struct mq *q);
@@ -43,7 +43,7 @@ char nfaExecMpv_queueCompressState(const struct NFA *nfa, const struct mq *q,
s64a loc);
char nfaExecMpv_expandState(const struct NFA *nfa, void *dest, const void *src,
u64a offset, u8 key);
-
+
#define nfaExecMpv_testEOD NFA_API_NO_IMPL
#define nfaExecMpv_inAccept NFA_API_NO_IMPL
#define nfaExecMpv_inAnyAccept NFA_API_NO_IMPL
@@ -51,10 +51,10 @@ char nfaExecMpv_expandState(const struct NFA *nfa, void *dest, const void *src,
#define nfaExecMpv_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */
#define nfaExecMpv_B_Reverse NFA_API_NO_IMPL
#define nfaExecMpv_zombie_status NFA_API_ZOMBIE_NO_IMPL
-
-/**
- * return 0 if the mpv dies, otherwise returns the location of the next possible
- * match (given the currently known events). */
+
+/**
+ * return 0 if the mpv dies, otherwise returns the location of the next possible
+ * match (given the currently known events). */
s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end);
-
-#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/mpv_internal.h b/contrib/libs/hyperscan/src/nfa/mpv_internal.h
index 527a691cf8..a52853dce2 100644
--- a/contrib/libs/hyperscan/src/nfa/mpv_internal.h
+++ b/contrib/libs/hyperscan/src/nfa/mpv_internal.h
@@ -1,45 +1,45 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MPV_INTERNAL_H
-#define MPV_INTERNAL_H
-
-#include "ue2common.h"
-
-#define MPV_DOT 0
-#define MPV_VERM 1
-#define MPV_SHUFTI 2
-#define MPV_TRUFFLE 3
-#define MPV_NVERM 4
-
-struct mpv_puffette {
- u32 repeats;
- char unbounded;
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MPV_INTERNAL_H
+#define MPV_INTERNAL_H
+
+#include "ue2common.h"
+
+#define MPV_DOT 0
+#define MPV_VERM 1
+#define MPV_SHUFTI 2
+#define MPV_TRUFFLE 3
+#define MPV_NVERM 4
+
+struct mpv_puffette {
+ u32 repeats;
+ char unbounded;
/**
* \brief Report is simple-exhaustible.
@@ -49,149 +49,149 @@ struct mpv_puffette {
*/
char simple_exhaust;
- ReportID report;
-};
-
-struct mpv_kilopuff {
- u32 counter_offset; /**< offset (in full stream state) to the counter that
- * this kilopuff refers to */
- u32 count; /**< number of real (non sentinel mpv puffettes) */
- u32 puffette_offset; /**< relative to base of mpv, points past the 1st
- * sent */
- u64a dead_point;
- u8 auto_restart;
- u8 type; /* MPV_DOT, MPV_VERM, etc */
- union {
- struct {
- char c;
- } verm;
- struct {
- m128 mask_lo;
- m128 mask_hi;
- } shuf;
- struct {
- m128 mask1;
- m128 mask2;
- } truffle;
- } u;
-};
-
-struct mpv_counter_info {
- u64a max_counter; /**< maximum value this counter needs to track */
- u32 counter_size; /**< number of bytes to represent the counter in stream
- * state */
- u32 counter_offset; /**< offset that this counter is stored at in the
- * full stream state */
- u32 kilo_begin; /**< first kilo to turn on when the counter is started */
- u32 kilo_end; /**< 1 + last kilo to turn on when the counter is started */
-};
-
-struct ALIGN_AVX_DIRECTIVE mpv {
- u32 kilo_count; /**< number of kilopuffs following */
- u32 counter_count; /**< number of counters managed by the mpv */
- u32 puffette_count; /**< total number of puffettes under all the kilos */
- u32 pq_offset; /**< offset to the priority queue in the decompressed
- * state */
- u32 reporter_offset; /**< offset to the reporter mmbit in the decompressed
- * state */
- u32 report_list_offset; /**< offset to the report list scratch space in the
- * decompressed state */
- u32 active_offset; /**< offset to the active kp mmbit in the compressed
- * state */
- u32 top_kilo_begin; /**< first kilo to switch on when top arrives */
- u32 top_kilo_end; /**< one past the last kilo to switch on when top
- * arrives */
-};
-
-struct mpv_decomp_kilo {
- u64a limit;
- const struct mpv_puffette *curr;
-};
-
-/* note: size varies on different platforms */
-struct mpv_decomp_state {
- u32 pq_size;
- char filled;
- u64a counter_adj; /**< progress not yet written to the real counters */
- struct mpv_decomp_kilo active[];
-};
-
-/* ---
- * | | mpv
- * ---
- * | |
- * | | kilo_count * mpv_kilopuffs
- * | |
- * ...
- * | |
- * ---
- * | |
- * | | counter_count * mpv_counter_infos
- * | |
- * ...
- * | |
- * ---
- * | | sentinel mpv_puffette
- * ---
- * | | mpv_puffettes for 1st kilopuff
- * | | (mpv_puffettes are ordered by minimum number of repeats)
- * | |
- * ---
- * | | sentinel mpv_puffette
- * ---
- * | | mpv_puffettes for 2nd kilopuff
- * ...
- * | |
- * ---
- * | | sentinel mpv_puffette
- * ---
- */
-
-/*
- * Stream State
- * [Compressed Counter 0]
- * [Compressed Counter 1]
- * ...
- * [Compressed Counter N]
- * [mmbit of active kilopuffs]
- *
- * Decompressed State
- * [header (limit pq_size)]
- * [
- * [kilo 1 current reports]
- * ...
- * [kilo N current reports]
- * ]
- * [
- * [Full Counter 0]
- * [Full Counter 1]
- * ...
- * [Full Counter N]
- * ]
- * [pq of kilo changes]
- * [scratch space for current report lists (total number of puffettes)]
- * [mmbit of kilopuffs with active reports]
- */
-
-struct mpv_pq_item {
- u64a trigger_loc;
- u32 kilo;
-};
-
-/* returns pointer to first non sentinel mpv_puff */
-static really_inline
-const struct mpv_puffette *get_puff_array(const struct mpv *m,
- const struct mpv_kilopuff *kp) {
- return (const struct mpv_puffette *)((const char *)m + kp->puffette_offset);
-}
-
-static really_inline
-const struct mpv_counter_info *get_counter_info(const struct mpv *m) {
- return (const struct mpv_counter_info *)((const char *)(m + 1)
- + m->kilo_count * sizeof(struct mpv_kilopuff));
-}
-
-#define MPV_DEAD_VALUE (~0ULL)
-#define INVALID_REPORT (~0U)
-
-#endif
+ ReportID report;
+};
+
+struct mpv_kilopuff {
+ u32 counter_offset; /**< offset (in full stream state) to the counter that
+ * this kilopuff refers to */
+ u32 count; /**< number of real (non sentinel mpv puffettes) */
+ u32 puffette_offset; /**< relative to base of mpv, points past the 1st
+ * sent */
+ u64a dead_point;
+ u8 auto_restart;
+ u8 type; /* MPV_DOT, MPV_VERM, etc */
+ union {
+ struct {
+ char c;
+ } verm;
+ struct {
+ m128 mask_lo;
+ m128 mask_hi;
+ } shuf;
+ struct {
+ m128 mask1;
+ m128 mask2;
+ } truffle;
+ } u;
+};
+
+struct mpv_counter_info {
+ u64a max_counter; /**< maximum value this counter needs to track */
+ u32 counter_size; /**< number of bytes to represent the counter in stream
+ * state */
+ u32 counter_offset; /**< offset that this counter is stored at in the
+ * full stream state */
+ u32 kilo_begin; /**< first kilo to turn on when the counter is started */
+ u32 kilo_end; /**< 1 + last kilo to turn on when the counter is started */
+};
+
+struct ALIGN_AVX_DIRECTIVE mpv {
+ u32 kilo_count; /**< number of kilopuffs following */
+ u32 counter_count; /**< number of counters managed by the mpv */
+ u32 puffette_count; /**< total number of puffettes under all the kilos */
+ u32 pq_offset; /**< offset to the priority queue in the decompressed
+ * state */
+ u32 reporter_offset; /**< offset to the reporter mmbit in the decompressed
+ * state */
+ u32 report_list_offset; /**< offset to the report list scratch space in the
+ * decompressed state */
+ u32 active_offset; /**< offset to the active kp mmbit in the compressed
+ * state */
+ u32 top_kilo_begin; /**< first kilo to switch on when top arrives */
+ u32 top_kilo_end; /**< one past the last kilo to switch on when top
+ * arrives */
+};
+
+struct mpv_decomp_kilo {
+ u64a limit;
+ const struct mpv_puffette *curr;
+};
+
+/* note: size varies on different platforms */
+struct mpv_decomp_state {
+ u32 pq_size;
+ char filled;
+ u64a counter_adj; /**< progress not yet written to the real counters */
+ struct mpv_decomp_kilo active[];
+};
+
+/* ---
+ * | | mpv
+ * ---
+ * | |
+ * | | kilo_count * mpv_kilopuffs
+ * | |
+ * ...
+ * | |
+ * ---
+ * | |
+ * | | counter_count * mpv_counter_infos
+ * | |
+ * ...
+ * | |
+ * ---
+ * | | sentinel mpv_puffette
+ * ---
+ * | | mpv_puffettes for 1st kilopuff
+ * | | (mpv_puffettes are ordered by minimum number of repeats)
+ * | |
+ * ---
+ * | | sentinel mpv_puffette
+ * ---
+ * | | mpv_puffettes for 2nd kilopuff
+ * ...
+ * | |
+ * ---
+ * | | sentinel mpv_puffette
+ * ---
+ */
+
+/*
+ * Stream State
+ * [Compressed Counter 0]
+ * [Compressed Counter 1]
+ * ...
+ * [Compressed Counter N]
+ * [mmbit of active kilopuffs]
+ *
+ * Decompressed State
+ * [header (limit pq_size)]
+ * [
+ * [kilo 1 current reports]
+ * ...
+ * [kilo N current reports]
+ * ]
+ * [
+ * [Full Counter 0]
+ * [Full Counter 1]
+ * ...
+ * [Full Counter N]
+ * ]
+ * [pq of kilo changes]
+ * [scratch space for current report lists (total number of puffettes)]
+ * [mmbit of kilopuffs with active reports]
+ */
+
+struct mpv_pq_item {
+ u64a trigger_loc;
+ u32 kilo;
+};
+
+/* returns pointer to first non sentinel mpv_puff */
+static really_inline
+const struct mpv_puffette *get_puff_array(const struct mpv *m,
+ const struct mpv_kilopuff *kp) {
+ return (const struct mpv_puffette *)((const char *)m + kp->puffette_offset);
+}
+
+static really_inline
+const struct mpv_counter_info *get_counter_info(const struct mpv *m) {
+ return (const struct mpv_counter_info *)((const char *)(m + 1)
+ + m->kilo_count * sizeof(struct mpv_kilopuff));
+}
+
+#define MPV_DEAD_VALUE (~0ULL)
+#define INVALID_REPORT (~0U)
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/mpvcompile.cpp b/contrib/libs/hyperscan/src/nfa/mpvcompile.cpp
index 653c7ac78a..8497c64870 100644
--- a/contrib/libs/hyperscan/src/nfa/mpvcompile.cpp
+++ b/contrib/libs/hyperscan/src/nfa/mpvcompile.cpp
@@ -1,99 +1,99 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "mpvcompile.h"
-
-#include "mpv_internal.h"
-#include "nfa_api_queue.h"
-#include "nfa_internal.h"
-#include "shufticompile.h"
-#include "trufflecompile.h"
-#include "util/alloc.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "mpvcompile.h"
+
+#include "mpv_internal.h"
+#include "nfa_api_queue.h"
+#include "nfa_internal.h"
+#include "shufticompile.h"
+#include "trufflecompile.h"
+#include "util/alloc.h"
#include "util/multibit_build.h"
-#include "util/order_check.h"
+#include "util/order_check.h"
#include "util/report_manager.h"
-#include "util/verify_types.h"
-
-#include <algorithm>
-#include <iterator>
-#include <map>
-
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_values;
-using boost::adaptors::map_keys;
-
-namespace ue2 {
-
-namespace {
-struct pcomp {
- bool operator()(const raw_puff &a, const raw_puff &b) const {
+#include "util/verify_types.h"
+
+#include <algorithm>
+#include <iterator>
+#include <map>
+
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_values;
+using boost::adaptors::map_keys;
+
+namespace ue2 {
+
+namespace {
+struct pcomp {
+ bool operator()(const raw_puff &a, const raw_puff &b) const {
return tie(a.repeats, a.unbounded, a.simple_exhaust, a.report) <
tie(b.repeats, b.unbounded, b.simple_exhaust, b.report);
- }
-};
-
-struct ClusterKey {
- explicit ClusterKey(const raw_puff &src)
- : trigger_event(MQE_INVALID), reach(src.reach),
- auto_restart(src.auto_restart) {}
- ClusterKey(u32 event, const raw_puff &src)
- : trigger_event(event), reach(src.reach),
- auto_restart(src.auto_restart) {}
-
- u32 trigger_event;
- CharReach reach;
- bool auto_restart;
-
- bool operator<(const ClusterKey &b) const {
- const ClusterKey &a = *this;
- ORDER_CHECK(trigger_event); /* want triggered puffs first */
- ORDER_CHECK(auto_restart);
- ORDER_CHECK(reach);
- return false;
- }
-};
-
-} // namespace
-
-static
+ }
+};
+
+struct ClusterKey {
+ explicit ClusterKey(const raw_puff &src)
+ : trigger_event(MQE_INVALID), reach(src.reach),
+ auto_restart(src.auto_restart) {}
+ ClusterKey(u32 event, const raw_puff &src)
+ : trigger_event(event), reach(src.reach),
+ auto_restart(src.auto_restart) {}
+
+ u32 trigger_event;
+ CharReach reach;
+ bool auto_restart;
+
+ bool operator<(const ClusterKey &b) const {
+ const ClusterKey &a = *this;
+ ORDER_CHECK(trigger_event); /* want triggered puffs first */
+ ORDER_CHECK(auto_restart);
+ ORDER_CHECK(reach);
+ return false;
+ }
+};
+
+} // namespace
+
+static
void writePuffette(mpv_puffette *out, const raw_puff &rp,
const ReportManager &rm) {
- DEBUG_PRINTF("outputting %u %d %u to %p\n", rp.repeats, (int)rp.unbounded,
- rp.report, out);
- out->repeats = rp.repeats;
- out->unbounded = rp.unbounded;
+ DEBUG_PRINTF("outputting %u %d %u to %p\n", rp.repeats, (int)rp.unbounded,
+ rp.report, out);
+ out->repeats = rp.repeats;
+ out->unbounded = rp.unbounded;
out->simple_exhaust = rp.simple_exhaust;
out->report = rm.getProgramOffset(rp.report);
-}
-
-static
+}
+
+static
void writeSentinel(mpv_puffette *out) {
DEBUG_PRINTF("outputting sentinel to %p\n", out);
memset(out, 0, sizeof(*out));
@@ -101,300 +101,300 @@ void writeSentinel(mpv_puffette *out) {
}
static
-void writeDeadPoint(mpv_kilopuff *out, const vector<raw_puff> &puffs) {
- for (const auto &puff : puffs) {
- if (puff.unbounded) { /* mpv can never die */
- out->dead_point = MPV_DEAD_VALUE;
- return;
- }
- }
-
- out->dead_point = puffs.back().repeats + 1;
-}
-
-static
-size_t calcSize(const map<ClusterKey, vector<raw_puff>> &raw,
- const vector<mpv_counter_info> &counters) {
- size_t len = sizeof(NFA) + sizeof(mpv);
-
- len += sizeof(mpv_kilopuff) * raw.size(); /* need a kilopuff for each
- distinct reach */
-
- len += sizeof(mpv_counter_info) * counters.size();
-
- len += sizeof(mpv_puffette); /* initial sent */
-
- for (const vector<raw_puff> &puffs : raw | map_values) {
- len += sizeof(mpv_puffette) * puffs.size();
- len += sizeof(mpv_puffette); /* terminal sent */
- }
-
- return len;
-}
-
-static
-void populateClusters(const vector<raw_puff> &puffs_in,
- const vector<raw_puff> &triggered_puffs,
- map<ClusterKey, vector<raw_puff>> *raw) {
- map<ClusterKey, vector<raw_puff>> &puff_clusters = *raw;
-
- u32 e = MQE_TOP_FIRST;
- for (const auto &puff : triggered_puffs) {
- puff_clusters[ClusterKey(e, puff)].push_back(puff);
- e++;
- }
-
- for (const auto &puff : puffs_in) {
- puff_clusters[ClusterKey(puff)].push_back(puff);
- }
-
-
- for (vector<raw_puff> &puffs : puff_clusters | map_values) {
- sort(puffs.begin(), puffs.end(), pcomp());
- }
-}
-
-static
-void writeKiloPuff(const map<ClusterKey, vector<raw_puff>>::const_iterator &it,
+void writeDeadPoint(mpv_kilopuff *out, const vector<raw_puff> &puffs) {
+ for (const auto &puff : puffs) {
+ if (puff.unbounded) { /* mpv can never die */
+ out->dead_point = MPV_DEAD_VALUE;
+ return;
+ }
+ }
+
+ out->dead_point = puffs.back().repeats + 1;
+}
+
+static
+size_t calcSize(const map<ClusterKey, vector<raw_puff>> &raw,
+ const vector<mpv_counter_info> &counters) {
+ size_t len = sizeof(NFA) + sizeof(mpv);
+
+ len += sizeof(mpv_kilopuff) * raw.size(); /* need a kilopuff for each
+ distinct reach */
+
+ len += sizeof(mpv_counter_info) * counters.size();
+
+ len += sizeof(mpv_puffette); /* initial sent */
+
+ for (const vector<raw_puff> &puffs : raw | map_values) {
+ len += sizeof(mpv_puffette) * puffs.size();
+ len += sizeof(mpv_puffette); /* terminal sent */
+ }
+
+ return len;
+}
+
+static
+void populateClusters(const vector<raw_puff> &puffs_in,
+ const vector<raw_puff> &triggered_puffs,
+ map<ClusterKey, vector<raw_puff>> *raw) {
+ map<ClusterKey, vector<raw_puff>> &puff_clusters = *raw;
+
+ u32 e = MQE_TOP_FIRST;
+ for (const auto &puff : triggered_puffs) {
+ puff_clusters[ClusterKey(e, puff)].push_back(puff);
+ e++;
+ }
+
+ for (const auto &puff : puffs_in) {
+ puff_clusters[ClusterKey(puff)].push_back(puff);
+ }
+
+
+ for (vector<raw_puff> &puffs : puff_clusters | map_values) {
+ sort(puffs.begin(), puffs.end(), pcomp());
+ }
+}
+
+static
+void writeKiloPuff(const map<ClusterKey, vector<raw_puff>>::const_iterator &it,
const ReportManager &rm, u32 counter_offset, mpv *m,
mpv_kilopuff *kp, mpv_puffette **pa) {
- const CharReach &reach = it->first.reach;
- const vector<raw_puff> &puffs = it->second;
-
- kp->auto_restart = it->first.auto_restart;
-
- if (reach.all()) {
- kp->type = MPV_DOT;
- } else if (reach.count() == 255) {
- kp->type = MPV_VERM;
- size_t unset = (~reach).find_first();
- assert(unset != CharReach::npos);
- kp->u.verm.c = (char)unset;
- } else if (reach.count() == 1) {
- kp->type = MPV_NVERM;
- size_t set = reach.find_first();
- assert(set != CharReach::npos);
- kp->u.verm.c = (char)set;
+ const CharReach &reach = it->first.reach;
+ const vector<raw_puff> &puffs = it->second;
+
+ kp->auto_restart = it->first.auto_restart;
+
+ if (reach.all()) {
+ kp->type = MPV_DOT;
+ } else if (reach.count() == 255) {
+ kp->type = MPV_VERM;
+ size_t unset = (~reach).find_first();
+ assert(unset != CharReach::npos);
+ kp->u.verm.c = (char)unset;
+ } else if (reach.count() == 1) {
+ kp->type = MPV_NVERM;
+ size_t set = reach.find_first();
+ assert(set != CharReach::npos);
+ kp->u.verm.c = (char)set;
} else if (shuftiBuildMasks(~reach, (u8 *)&kp->u.shuf.mask_lo,
(u8 *)&kp->u.shuf.mask_hi) != -1) {
- kp->type = MPV_SHUFTI;
- } else {
- kp->type = MPV_TRUFFLE;
+ kp->type = MPV_SHUFTI;
+ } else {
+ kp->type = MPV_TRUFFLE;
truffleBuildMasks(~reach, (u8 *)&kp->u.truffle.mask1,
(u8 *)&kp->u.truffle.mask2);
- }
-
- kp->count = verify_u32(puffs.size());
- kp->counter_offset = counter_offset;
-
- /* start of real puffette array */
- kp->puffette_offset = verify_u32((char *)*pa - (char *)m);
- for (size_t i = 0; i < puffs.size(); i++) {
- assert(!it->first.auto_restart || puffs[i].unbounded);
+ }
+
+ kp->count = verify_u32(puffs.size());
+ kp->counter_offset = counter_offset;
+
+ /* start of real puffette array */
+ kp->puffette_offset = verify_u32((char *)*pa - (char *)m);
+ for (size_t i = 0; i < puffs.size(); i++) {
+ assert(!it->first.auto_restart || puffs[i].unbounded);
writePuffette(*pa + i, puffs[i], rm);
- }
-
- *pa += puffs.size();
+ }
+
+ *pa += puffs.size();
writeSentinel(*pa);
- ++*pa;
-
- writeDeadPoint(kp, puffs);
-}
-
-static
-void writeCoreNfa(NFA *nfa, u32 len, u32 min_width, u32 max_counter,
- u32 streamStateSize, u32 scratchStateSize) {
- assert(nfa);
-
- nfa->length = len;
- nfa->nPositions = max_counter - 1;
+ ++*pa;
+
+ writeDeadPoint(kp, puffs);
+}
+
+static
+void writeCoreNfa(NFA *nfa, u32 len, u32 min_width, u32 max_counter,
+ u32 streamStateSize, u32 scratchStateSize) {
+ assert(nfa);
+
+ nfa->length = len;
+ nfa->nPositions = max_counter - 1;
nfa->type = MPV_NFA;
- nfa->streamStateSize = streamStateSize;
- assert(16 >= sizeof(mpv_decomp_kilo));
- nfa->scratchStateSize = scratchStateSize;
- nfa->minWidth = min_width;
-}
-
-static
-void findCounterSize(map<ClusterKey, vector<raw_puff>>::const_iterator kp_it,
- map<ClusterKey, vector<raw_puff>>::const_iterator kp_ite,
- u64a *max_counter_out, u32 *counter_size) {
- u32 max_counter = 0; /* max counter that we may need to know about is one
- more than largest repeat */
- for (; kp_it != kp_ite; ++kp_it) {
- max_counter = MAX(max_counter, kp_it->second.back().repeats + 1);
- }
-
- if (max_counter < (1U << 8)) {
- *counter_size = 1;
- } else if (max_counter < (1U << 16)) {
- *counter_size = 2;
- } else if (max_counter < (1U << 24)) {
- *counter_size = 3;
- } else {
- *counter_size = 4;
- }
-
- *max_counter_out = max_counter;
-}
-
-static
-void fillCounterInfo(mpv_counter_info *out, u32 *curr_decomp_offset,
- u32 *curr_comp_offset,
- const map<ClusterKey, vector<raw_puff>> &kilopuffs,
- map<ClusterKey, vector<raw_puff>>::const_iterator kp_it,
- map<ClusterKey, vector<raw_puff>>::const_iterator kp_ite) {
-
- out->kilo_begin = distance(kilopuffs.begin(), kp_it);
- out->kilo_end = distance(kilopuffs.begin(), kp_ite);
- findCounterSize(kp_it, kp_ite, &out->max_counter, &out->counter_size);
- out->counter_offset = *curr_decomp_offset;
- *curr_decomp_offset += sizeof(u64a);
- *curr_comp_offset += out->counter_size;
-}
-
-static
-void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset,
- u32 *curr_comp_offset,
- const map<ClusterKey, vector<raw_puff>> &kilopuffs) {
- /* first the triggered puffs */
- map<ClusterKey, vector<raw_puff>>::const_iterator it = kilopuffs.begin();
- while (it != kilopuffs.end() && it->first.trigger_event != MQE_INVALID) {
- assert(!it->first.auto_restart);
- assert(it->first.trigger_event
- == MQE_TOP_FIRST + distance(kilopuffs.begin(), it));
-
- out->push_back(mpv_counter_info());
- map<ClusterKey, vector<raw_puff>>::const_iterator it_o = it;
- ++it;
- fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset,
- kilopuffs, it_o, it);
- }
-
- /* we may have 2 sets of non triggered puffs:
- * 1) always started with no auto_restart
- * 2) always started with auto_restart
- */
- map<ClusterKey, vector<raw_puff>>::const_iterator trig_ite = it;
- while (it != kilopuffs.end() && !it->first.auto_restart) {
- assert(it->first.trigger_event == MQE_INVALID);
-
- ++it;
- }
- if (it != trig_ite) {
- out->push_back(mpv_counter_info());
- fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset,
- kilopuffs, kilopuffs.begin(), it);
- }
- while (it != kilopuffs.end() && it->first.auto_restart) {
- assert(it->first.trigger_event == MQE_INVALID);
-
- out->push_back(mpv_counter_info());
- map<ClusterKey, vector<raw_puff>>::const_iterator it_o = it;
- ++it;
- fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset,
- kilopuffs, it_o, it);
- }
-}
-
-static
-const mpv_counter_info &findCounter(const vector<mpv_counter_info> &counters,
- u32 i) {
- for (const auto &counter : counters) {
- if (i >= counter.kilo_begin && i < counter.kilo_end) {
- return counter;
- }
- }
- assert(0);
- return counters.front();
-}
-
+ nfa->streamStateSize = streamStateSize;
+ assert(16 >= sizeof(mpv_decomp_kilo));
+ nfa->scratchStateSize = scratchStateSize;
+ nfa->minWidth = min_width;
+}
+
+static
+void findCounterSize(map<ClusterKey, vector<raw_puff>>::const_iterator kp_it,
+ map<ClusterKey, vector<raw_puff>>::const_iterator kp_ite,
+ u64a *max_counter_out, u32 *counter_size) {
+ u32 max_counter = 0; /* max counter that we may need to know about is one
+ more than largest repeat */
+ for (; kp_it != kp_ite; ++kp_it) {
+ max_counter = MAX(max_counter, kp_it->second.back().repeats + 1);
+ }
+
+ if (max_counter < (1U << 8)) {
+ *counter_size = 1;
+ } else if (max_counter < (1U << 16)) {
+ *counter_size = 2;
+ } else if (max_counter < (1U << 24)) {
+ *counter_size = 3;
+ } else {
+ *counter_size = 4;
+ }
+
+ *max_counter_out = max_counter;
+}
+
+static
+void fillCounterInfo(mpv_counter_info *out, u32 *curr_decomp_offset,
+ u32 *curr_comp_offset,
+ const map<ClusterKey, vector<raw_puff>> &kilopuffs,
+ map<ClusterKey, vector<raw_puff>>::const_iterator kp_it,
+ map<ClusterKey, vector<raw_puff>>::const_iterator kp_ite) {
+
+ out->kilo_begin = distance(kilopuffs.begin(), kp_it);
+ out->kilo_end = distance(kilopuffs.begin(), kp_ite);
+ findCounterSize(kp_it, kp_ite, &out->max_counter, &out->counter_size);
+ out->counter_offset = *curr_decomp_offset;
+ *curr_decomp_offset += sizeof(u64a);
+ *curr_comp_offset += out->counter_size;
+}
+
+static
+void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset,
+ u32 *curr_comp_offset,
+ const map<ClusterKey, vector<raw_puff>> &kilopuffs) {
+ /* first the triggered puffs */
+ map<ClusterKey, vector<raw_puff>>::const_iterator it = kilopuffs.begin();
+ while (it != kilopuffs.end() && it->first.trigger_event != MQE_INVALID) {
+ assert(!it->first.auto_restart);
+ assert(it->first.trigger_event
+ == MQE_TOP_FIRST + distance(kilopuffs.begin(), it));
+
+ out->push_back(mpv_counter_info());
+ map<ClusterKey, vector<raw_puff>>::const_iterator it_o = it;
+ ++it;
+ fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset,
+ kilopuffs, it_o, it);
+ }
+
+ /* we may have 2 sets of non triggered puffs:
+ * 1) always started with no auto_restart
+ * 2) always started with auto_restart
+ */
+ map<ClusterKey, vector<raw_puff>>::const_iterator trig_ite = it;
+ while (it != kilopuffs.end() && !it->first.auto_restart) {
+ assert(it->first.trigger_event == MQE_INVALID);
+
+ ++it;
+ }
+ if (it != trig_ite) {
+ out->push_back(mpv_counter_info());
+ fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset,
+ kilopuffs, kilopuffs.begin(), it);
+ }
+ while (it != kilopuffs.end() && it->first.auto_restart) {
+ assert(it->first.trigger_event == MQE_INVALID);
+
+ out->push_back(mpv_counter_info());
+ map<ClusterKey, vector<raw_puff>>::const_iterator it_o = it;
+ ++it;
+ fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset,
+ kilopuffs, it_o, it);
+ }
+}
+
+static
+const mpv_counter_info &findCounter(const vector<mpv_counter_info> &counters,
+ u32 i) {
+ for (const auto &counter : counters) {
+ if (i >= counter.kilo_begin && i < counter.kilo_end) {
+ return counter;
+ }
+ }
+ assert(0);
+ return counters.front();
+}
+
bytecode_ptr<NFA> mpvCompile(const vector<raw_puff> &puffs_in,
const vector<raw_puff> &triggered_puffs,
const ReportManager &rm) {
- assert(!puffs_in.empty() || !triggered_puffs.empty());
- u32 puffette_count = puffs_in.size() + triggered_puffs.size();
-
- map<ClusterKey, vector<raw_puff>> puff_clusters;
- populateClusters(puffs_in, triggered_puffs, &puff_clusters);
-
- u32 curr_comp_offset = 0;
-
- u32 curr_decomp_offset = sizeof(mpv_decomp_state);
- curr_decomp_offset += 16 * puff_clusters.size();
-
- vector<mpv_counter_info> counters;
- fillCounterInfos(&counters, &curr_decomp_offset, &curr_comp_offset,
- puff_clusters);
-
- u32 pq_offset = curr_decomp_offset;
- curr_decomp_offset += sizeof(mpv_pq_item) * puff_clusters.size();
-
- u32 rl_offset = curr_decomp_offset;
- curr_decomp_offset += sizeof(ReportID) * puffette_count;
-
- u32 reporter_offset = curr_decomp_offset;
- curr_decomp_offset += mmbit_size(puff_clusters.size());
-
- u32 active_offset = curr_comp_offset;
- curr_comp_offset += mmbit_size(puff_clusters.size());
-
- u32 len = calcSize(puff_clusters, counters);
-
- DEBUG_PRINTF("%u puffs, len = %u\n", puffette_count, len);
-
+ assert(!puffs_in.empty() || !triggered_puffs.empty());
+ u32 puffette_count = puffs_in.size() + triggered_puffs.size();
+
+ map<ClusterKey, vector<raw_puff>> puff_clusters;
+ populateClusters(puffs_in, triggered_puffs, &puff_clusters);
+
+ u32 curr_comp_offset = 0;
+
+ u32 curr_decomp_offset = sizeof(mpv_decomp_state);
+ curr_decomp_offset += 16 * puff_clusters.size();
+
+ vector<mpv_counter_info> counters;
+ fillCounterInfos(&counters, &curr_decomp_offset, &curr_comp_offset,
+ puff_clusters);
+
+ u32 pq_offset = curr_decomp_offset;
+ curr_decomp_offset += sizeof(mpv_pq_item) * puff_clusters.size();
+
+ u32 rl_offset = curr_decomp_offset;
+ curr_decomp_offset += sizeof(ReportID) * puffette_count;
+
+ u32 reporter_offset = curr_decomp_offset;
+ curr_decomp_offset += mmbit_size(puff_clusters.size());
+
+ u32 active_offset = curr_comp_offset;
+ curr_comp_offset += mmbit_size(puff_clusters.size());
+
+ u32 len = calcSize(puff_clusters, counters);
+
+ DEBUG_PRINTF("%u puffs, len = %u\n", puffette_count, len);
+
auto nfa = make_zeroed_bytecode_ptr<NFA>(len);
-
- mpv_puffette *pa_base = (mpv_puffette *)
- ((char *)nfa.get() + sizeof(NFA) + sizeof(mpv)
- + sizeof(mpv_kilopuff) * puff_clusters.size()
- + sizeof(mpv_counter_info) * counters.size());
- mpv_puffette *pa = pa_base;
-
+
+ mpv_puffette *pa_base = (mpv_puffette *)
+ ((char *)nfa.get() + sizeof(NFA) + sizeof(mpv)
+ + sizeof(mpv_kilopuff) * puff_clusters.size()
+ + sizeof(mpv_counter_info) * counters.size());
+ mpv_puffette *pa = pa_base;
+
writeSentinel(pa);
-
- ++pa; /* skip init sentinel */
-
- u32 min_repeat = ~0U;
- u32 max_counter = 0; /* max counter that we may need to know about is one
- more than largest repeat */
- for (const vector<raw_puff> &puffs : puff_clusters | map_values) {
- max_counter = max(max_counter, puffs.back().repeats + 1);
- min_repeat = min(min_repeat, puffs.front().repeats);
- }
-
- mpv *m = (mpv *)getMutableImplNfa(nfa.get());
- m->kilo_count = verify_u32(puff_clusters.size());
- m->counter_count = verify_u32(counters.size());
- m->puffette_count = puffette_count;
- m->pq_offset = pq_offset;
- m->reporter_offset = reporter_offset;
- m->report_list_offset = rl_offset;
- m->active_offset = active_offset;
- m->top_kilo_begin = verify_u32(triggered_puffs.size());
- m->top_kilo_end = verify_u32(puff_clusters.size());
-
- mpv_kilopuff *kp_begin = (mpv_kilopuff *)(m + 1);
- mpv_kilopuff *kp = kp_begin;
- for (auto it = puff_clusters.begin(); it != puff_clusters.end(); ++it) {
+
+ ++pa; /* skip init sentinel */
+
+ u32 min_repeat = ~0U;
+ u32 max_counter = 0; /* max counter that we may need to know about is one
+ more than largest repeat */
+ for (const vector<raw_puff> &puffs : puff_clusters | map_values) {
+ max_counter = max(max_counter, puffs.back().repeats + 1);
+ min_repeat = min(min_repeat, puffs.front().repeats);
+ }
+
+ mpv *m = (mpv *)getMutableImplNfa(nfa.get());
+ m->kilo_count = verify_u32(puff_clusters.size());
+ m->counter_count = verify_u32(counters.size());
+ m->puffette_count = puffette_count;
+ m->pq_offset = pq_offset;
+ m->reporter_offset = reporter_offset;
+ m->report_list_offset = rl_offset;
+ m->active_offset = active_offset;
+ m->top_kilo_begin = verify_u32(triggered_puffs.size());
+ m->top_kilo_end = verify_u32(puff_clusters.size());
+
+ mpv_kilopuff *kp_begin = (mpv_kilopuff *)(m + 1);
+ mpv_kilopuff *kp = kp_begin;
+ for (auto it = puff_clusters.begin(); it != puff_clusters.end(); ++it) {
writeKiloPuff(it, rm,
findCounter(counters, kp - kp_begin).counter_offset, m,
kp, &pa);
- ++kp;
- }
- assert((char *)pa == (char *)nfa.get() + len);
-
- mpv_counter_info *out_ci = (mpv_counter_info *)kp;
- for (const auto &counter : counters) {
- *out_ci = counter;
- ++out_ci;
- }
- assert((char *)out_ci == (char *)pa_base);
-
- writeCoreNfa(nfa.get(), len, min_repeat, max_counter, curr_comp_offset,
- curr_decomp_offset);
-
- return nfa;
-}
-
-} // namespace ue2
+ ++kp;
+ }
+ assert((char *)pa == (char *)nfa.get() + len);
+
+ mpv_counter_info *out_ci = (mpv_counter_info *)kp;
+ for (const auto &counter : counters) {
+ *out_ci = counter;
+ ++out_ci;
+ }
+ assert((char *)out_ci == (char *)pa_base);
+
+ writeCoreNfa(nfa.get(), len, min_repeat, max_counter, curr_comp_offset,
+ curr_decomp_offset);
+
+ return nfa;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfa/mpvcompile.h b/contrib/libs/hyperscan/src/nfa/mpvcompile.h
index 497b52358e..4f820e4365 100644
--- a/contrib/libs/hyperscan/src/nfa/mpvcompile.h
+++ b/contrib/libs/hyperscan/src/nfa/mpvcompile.h
@@ -1,70 +1,70 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MPV_COMPILE_H
-#define MPV_COMPILE_H
-
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MPV_COMPILE_H
+#define MPV_COMPILE_H
+
+#include "ue2common.h"
#include "util/bytecode_ptr.h"
-#include "util/charreach.h"
-
-#include <memory>
-#include <vector>
-
-struct NFA;
-
-namespace ue2 {
-
+#include "util/charreach.h"
+
+#include <memory>
+#include <vector>
+
+struct NFA;
+
+namespace ue2 {
+
class ReportManager;
-struct raw_puff {
- raw_puff(u32 repeats_in, bool unbounded_in, ReportID report_in,
+struct raw_puff {
+ raw_puff(u32 repeats_in, bool unbounded_in, ReportID report_in,
const CharReach &reach_in, bool auto_restart_in = false,
bool simple_exhaust_in = false)
- : repeats(repeats_in), unbounded(unbounded_in),
+ : repeats(repeats_in), unbounded(unbounded_in),
auto_restart(auto_restart_in), simple_exhaust(simple_exhaust_in),
report(report_in), reach(reach_in) {}
- u32 repeats; /**< report match after this many matching bytes */
- bool unbounded; /**< keep producing matches after repeats are reached */
- bool auto_restart; /**< for /[^X]{n}/ type patterns */
+ u32 repeats; /**< report match after this many matching bytes */
+ bool unbounded; /**< keep producing matches after repeats are reached */
+ bool auto_restart; /**< for /[^X]{n}/ type patterns */
bool simple_exhaust; /* first report will exhaust us */
- ReportID report;
- CharReach reach; /**< = ~escapes */
-};
-
-/*
- * puffs in the triggered_puffs vector are enabled when an TOP_N event is
- * delivered corresponding to their index in the vector
- */
+ ReportID report;
+ CharReach reach; /**< = ~escapes */
+};
+
+/*
+ * puffs in the triggered_puffs vector are enabled when an TOP_N event is
+ * delivered corresponding to their index in the vector
+ */
bytecode_ptr<NFA> mpvCompile(const std::vector<raw_puff> &puffs,
const std::vector<raw_puff> &triggered_puffs,
const ReportManager &rm);
-
-} // namespace ue2
-
-#endif
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/nfa_api.h b/contrib/libs/hyperscan/src/nfa/nfa_api.h
index 020f44682a..e3f7f74311 100644
--- a/contrib/libs/hyperscan/src/nfa/nfa_api.h
+++ b/contrib/libs/hyperscan/src/nfa/nfa_api.h
@@ -1,125 +1,125 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Declarations for the main NFA Engine API.
- *
- * This file provides the internal API for all runtime engines ("NFAs", even if
- * they're not strictly NFA implementations).
- */
-
-#ifndef NFA_API_H
-#define NFA_API_H
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-#include "callback.h"
-#include "ue2common.h"
-
-struct mq;
-struct NFA;
-
-/**
- * Indicates if an nfa is a zombie. Note: that there were plans for a more
- * nuanced view of zombiehood but this never eventuated.
- */
-enum nfa_zombie_status {
- NFA_ZOMBIE_NO, /**< nfa is not a zombie and will respond to top events */
- NFA_ZOMBIE_ALWAYS_YES /**< nfa is a zombie and will always be a zombie */
-};
-
-/**
- * Compresses an engine's state.
- * The expanded state (@ref mq::state, @ref mq::streamState) is reduced purely
- * to a corresponding compressed stream state (@ref mq::streamState).
- *
- * @param nfa engine the state belongs to
- * @param q queue for the engine. The final compressed stream stream is placed
- * in the location indicated by @ref mq::streamState
- * @param loc the location corresponding to the engine's current state
- */
-char nfaQueueCompressState(const struct NFA *nfa, const struct mq *q, s64a loc);
-
-/**
- * Expands an engine's compressed stream state, into its scratch space
- * representation. This is required before an engine starts operating over its
- * queue.
- *
- * @param nfa engine the state belongs to
- * @param dest location in scratch for decompressed state
- * @param src compressed stream state
- * @param offset the current stream offset.
- * @param key byte corresponding to the location where the compressed state was
- * created.
- */
-char nfaExpandState(const struct NFA *nfa, void *dest, const void *src,
- u64a offset, u8 key);
-
-/**
- * Gives us a properly initialised dead state suitable for later @ref
- * nfaQueueExec calls.
- */
-char nfaQueueInitState(const struct NFA *nfa, struct mq *q);
-
-/**
- * Initialise the state, applying a TOP appropriate for the offset. If the
- * NFA becomes inactive, return zero. Otherwise, write out its compressed
- * representation to `state' and return non-zero.
- *
- * @param nfa engine the state belongs to
- * @param offset offset in the stream (relative to start of stream)
- * @param state pointer indicating where the state is to be written
- * @param key byte corresponding to the location where the compressed state is
- * to be created.
- */
-char nfaInitCompressedState(const struct NFA *nfa, u64a offset, void *state,
- u8 key);
-
-/**
- * Process the queued commands on the given NFA.
- *
- * @param nfa the NFA to execute
- * @param q the queued commands. It must start with some variant of start and
- * end with some variant of end. The location field of the events must
- * be monotonically increasing.
- * @param end stop processing command queue when we reach this point
- *
- * @return non-zero if the nfa is still active, if the nfa is not active the
- * state data is undefined
- *
- * Note: this function can not process events from the past: the location field
- * of each event must be >= current offset.
- */
-char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end);
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Declarations for the main NFA Engine API.
+ *
+ * This file provides the internal API for all runtime engines ("NFAs", even if
+ * they're not strictly NFA implementations).
+ */
+
+#ifndef NFA_API_H
+#define NFA_API_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "callback.h"
+#include "ue2common.h"
+
+struct mq;
+struct NFA;
+
+/**
+ * Indicates if an nfa is a zombie. Note: that there were plans for a more
+ * nuanced view of zombiehood but this never eventuated.
+ */
+enum nfa_zombie_status {
+ NFA_ZOMBIE_NO, /**< nfa is not a zombie and will respond to top events */
+ NFA_ZOMBIE_ALWAYS_YES /**< nfa is a zombie and will always be a zombie */
+};
+
+/**
+ * Compresses an engine's state.
+ * The expanded state (@ref mq::state, @ref mq::streamState) is reduced purely
+ * to a corresponding compressed stream state (@ref mq::streamState).
+ *
+ * @param nfa engine the state belongs to
+ * @param q queue for the engine. The final compressed stream stream is placed
+ * in the location indicated by @ref mq::streamState
+ * @param loc the location corresponding to the engine's current state
+ */
+char nfaQueueCompressState(const struct NFA *nfa, const struct mq *q, s64a loc);
+
+/**
+ * Expands an engine's compressed stream state, into its scratch space
+ * representation. This is required before an engine starts operating over its
+ * queue.
+ *
+ * @param nfa engine the state belongs to
+ * @param dest location in scratch for decompressed state
+ * @param src compressed stream state
+ * @param offset the current stream offset.
+ * @param key byte corresponding to the location where the compressed state was
+ * created.
+ */
+char nfaExpandState(const struct NFA *nfa, void *dest, const void *src,
+ u64a offset, u8 key);
+
+/**
+ * Gives us a properly initialised dead state suitable for later @ref
+ * nfaQueueExec calls.
+ */
+char nfaQueueInitState(const struct NFA *nfa, struct mq *q);
+
+/**
+ * Initialise the state, applying a TOP appropriate for the offset. If the
+ * NFA becomes inactive, return zero. Otherwise, write out its compressed
+ * representation to `state' and return non-zero.
+ *
+ * @param nfa engine the state belongs to
+ * @param offset offset in the stream (relative to start of stream)
+ * @param state pointer indicating where the state is to be written
+ * @param key byte corresponding to the location where the compressed state is
+ * to be created.
+ */
+char nfaInitCompressedState(const struct NFA *nfa, u64a offset, void *state,
+ u8 key);
+
+/**
+ * Process the queued commands on the given NFA.
+ *
+ * @param nfa the NFA to execute
+ * @param q the queued commands. It must start with some variant of start and
+ * end with some variant of end. The location field of the events must
+ * be monotonically increasing.
+ * @param end stop processing command queue when we reach this point
+ *
+ * @return non-zero if the nfa is still active, if the nfa is not active the
+ * state data is undefined
+ *
+ * Note: this function can not process events from the past: the location field
+ * of each event must be >= current offset.
+ */
+char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end);
+
/**
* Main execution function that doesn't perform the checks and optimisations of
* nfaQueueExec() and just dispatches directly to the nfa implementations. It is
@@ -130,42 +130,42 @@ char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end);
/** Return value indicating that the engine is dead. */
#define MO_DEAD 0
-/** Return value indicating that the engine is alive. */
-#define MO_ALIVE 1
-
-/** Return value from @ref nfaQueueExecToMatch indicating that engine progress
- * stopped as a match state was reached. */
-#define MO_MATCHES_PENDING 2
-
-/**
- * Process the queued commands on the given nfa up to end or the first match.
- * This function will only fire the callback in response to an report_current
- * being set and accepts at the starting offset, in all other situations accepts
- * will result in the queue pausing with a return value of
- * @ref MO_MATCHES_PENDING.
- *
- * @param nfa the NFA to execute
- * @param q the queued commands. It must start with some variant of start and
- * end with some variant of end. The location field of the events must
- * be monotonically increasing. If not all the data was processed during
- * the call, the queue is updated to reflect the remaining work.
- * @param end stop processing command queue when we reach this point
- *
- * @return @ref MO_ALIVE if the nfa is still active with no matches pending,
- * and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not
- * alive
- *
- * Note: if it can be determined that the stream can never match, the nfa
- * may be reported as dead even if not all the data was scanned
- *
- * Note: if the nfa is not alive the state data is undefined
- *
- * Note: this function can not process events from the past: the location field
- * of each event must be >= current offset.
- */
-char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end);
-
-/**
+/** Return value indicating that the engine is alive. */
+#define MO_ALIVE 1
+
+/** Return value from @ref nfaQueueExecToMatch indicating that engine progress
+ * stopped as a match state was reached. */
+#define MO_MATCHES_PENDING 2
+
+/**
+ * Process the queued commands on the given nfa up to end or the first match.
+ * This function will only fire the callback in response to an report_current
+ * being set and accepts at the starting offset, in all other situations accepts
+ * will result in the queue pausing with a return value of
+ * @ref MO_MATCHES_PENDING.
+ *
+ * @param nfa the NFA to execute
+ * @param q the queued commands. It must start with some variant of start and
+ * end with some variant of end. The location field of the events must
+ * be monotonically increasing. If not all the data was processed during
+ * the call, the queue is updated to reflect the remaining work.
+ * @param end stop processing command queue when we reach this point
+ *
+ * @return @ref MO_ALIVE if the nfa is still active with no matches pending,
+ * and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not
+ * alive
+ *
+ * Note: if it can be determined that the stream can never match, the nfa
+ * may be reported as dead even if not all the data was scanned
+ *
+ * Note: if the nfa is not alive the state data is undefined
+ *
+ * Note: this function can not process events from the past: the location field
+ * of each event must be >= current offset.
+ */
+char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end);
+
+/**
* Main execution function that doesn't perform the checks and optimisations of
* nfaQueueExecToMatch() and just dispatches directly to the nfa
* implementations. It is intended to be used by the Tamarama engine.
@@ -173,108 +173,108 @@ char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end);
char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end);
/**
- * Report matches at the current queue location.
- *
- * @param nfa the NFA to execute
- * @param q the queued commands. It must start with some variant of start and
- * end with some variant of end. The location field of the events must
- * be monotonically increasing.
- *
- * Note: the queue MUST be located at position where @ref nfaQueueExecToMatch
- * returned @ref MO_MATCHES_PENDING.
- *
- * Note: the return value of this call is undefined, and should be ignored.
- */
-char nfaReportCurrentMatches(const struct NFA *nfa, struct mq *q);
-
-/**
- * Returns non-zero if the NFA is in an accept state with the given report ID.
- */
-char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q);
-
-/**
+ * Report matches at the current queue location.
+ *
+ * @param nfa the NFA to execute
+ * @param q the queued commands. It must start with some variant of start and
+ * end with some variant of end. The location field of the events must
+ * be monotonically increasing.
+ *
+ * Note: the queue MUST be located at position where @ref nfaQueueExecToMatch
+ * returned @ref MO_MATCHES_PENDING.
+ *
+ * Note: the return value of this call is undefined, and should be ignored.
+ */
+char nfaReportCurrentMatches(const struct NFA *nfa, struct mq *q);
+
+/**
+ * Returns non-zero if the NFA is in an accept state with the given report ID.
+ */
+char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q);
+
+/**
* Returns non-zero if the NFA is in any accept state regardless of report
* ID.
*/
char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q);
/**
- * Process the queued commands on the given NFA up to end or the first match.
- *
+ * Process the queued commands on the given NFA up to end or the first match.
+ *
* Note: This version is meant for rose prefix/infix NFAs:
- * - never uses a callback
- * - loading of state at a point in history is not special cased
- *
- * @param nfa the NFA to execute
- * @param q the queued commands. It must start with some variant of start and
- * end with some variant of end. The location field of the events must
- * be monotonically increasing. If not all the data was processed during
- * the call, the queue is updated to reflect the remaining work.
+ * - never uses a callback
+ * - loading of state at a point in history is not special cased
+ *
+ * @param nfa the NFA to execute
+ * @param q the queued commands. It must start with some variant of start and
+ * end with some variant of end. The location field of the events must
+ * be monotonically increasing. If not all the data was processed during
+ * the call, the queue is updated to reflect the remaining work.
* @param report we are interested in. If the given report will be raised at
* the end location, the function returns @ref MO_MATCHES_PENDING. If no
* match information is desired, MO_INVALID_IDX should be passed in.
- * @return @ref MO_ALIVE if the nfa is still active with no matches pending,
- * and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not
- * alive
- *
- * Note: if it can be determined that the stream can never match, the nfa
- * may be reported as dead even if not all the data was scanned
- *
- * Note: if the NFA is not active the state data is undefined.
- */
-char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID report);
-
-/**
- * Runs an NFA in reverse from (buf + buflen) to buf and then from (hbuf + hlen)
- * to hbuf (main buffer and history buffer).
- *
+ * @return @ref MO_ALIVE if the nfa is still active with no matches pending,
+ * and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not
+ * alive
+ *
+ * Note: if it can be determined that the stream can never match, the nfa
+ * may be reported as dead even if not all the data was scanned
+ *
+ * Note: if the NFA is not active the state data is undefined.
+ */
+char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID report);
+
+/**
+ * Runs an NFA in reverse from (buf + buflen) to buf and then from (hbuf + hlen)
+ * to hbuf (main buffer and history buffer).
+ *
* Note: provides the match location as the "end" offset when the callback is
* called.
*
- * @param nfa engine to run
- * @param offset base offset of buf
- * @param buf main buffer
- * @param buflen length of buf
- * @param hbuf history buf
- * @param hlen length of hbuf
- * @param callback the callback to call for each match raised
- * @param context context pointer passed to each callback
- */
-char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf,
- size_t buflen, const u8 *hbuf, size_t hlen,
+ * @param nfa engine to run
+ * @param offset base offset of buf
+ * @param buf main buffer
+ * @param buflen length of buf
+ * @param hbuf history buf
+ * @param hlen length of hbuf
+ * @param callback the callback to call for each match raised
+ * @param context context pointer passed to each callback
+ */
+char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf,
+ size_t buflen, const u8 *hbuf, size_t hlen,
NfaCallback callback, void *context);
-
-/**
- * Check whether the given NFA's state indicates that it is in one or more
- * final (accept at end of data) state. If so, call the callback for each
- * match.
- *
- * @param nfa the NFA to execute
- * @param state current state associated with this NFA
- * @param streamState stream version of the state associated with this NFA
- * (including br region)
- * @param offset the offset to return (via the callback) with each match
- * @param callback the callback to call for each match raised
- * @param context context pointer passed to each callback
+
+/**
+ * Check whether the given NFA's state indicates that it is in one or more
+ * final (accept at end of data) state. If so, call the callback for each
+ * match.
+ *
+ * @param nfa the NFA to execute
+ * @param state current state associated with this NFA
+ * @param streamState stream version of the state associated with this NFA
+ * (including br region)
+ * @param offset the offset to return (via the callback) with each match
+ * @param callback the callback to call for each match raised
+ * @param context context pointer passed to each callback
*
* @return @ref MO_HALT_MATCHING if the user instructed us to halt, otherwise
* @ref MO_CONTINUE_MATCHING.
- */
-char nfaCheckFinalState(const struct NFA *nfa, const char *state,
- const char *streamState, u64a offset,
+ */
+char nfaCheckFinalState(const struct NFA *nfa, const char *state,
+ const char *streamState, u64a offset,
NfaCallback callback, void *context);
-
-/**
- * Indicates if an engine is a zombie.
- *
- * @param nfa engine to consider
- * @param q queue corresponding to the engine
- * @param loc current location in the buffer for an engine
- */
-enum nfa_zombie_status nfaGetZombieStatus(const struct NFA *nfa, struct mq *q,
- s64a loc);
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
+
+/**
+ * Indicates if an engine is a zombie.
+ *
+ * @param nfa engine to consider
+ * @param q queue corresponding to the engine
+ * @param loc current location in the buffer for an engine
+ */
+enum nfa_zombie_status nfaGetZombieStatus(const struct NFA *nfa, struct mq *q,
+ s64a loc);
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c b/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c
index 04b9f7144d..75cac4b481 100644
--- a/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c
+++ b/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c
@@ -1,58 +1,58 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- \brief Dispatches NFA engine API calls to the appropriate engines
-*/
-#include "nfa_api.h"
-
-#include "nfa_api_queue.h"
-#include "nfa_internal.h"
-#include "ue2common.h"
-
-// Engine implementations.
-#include "castle.h"
-#include "gough.h"
-#include "lbr.h"
-#include "limex.h"
-#include "mcclellan.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ \brief Dispatches NFA engine API calls to the appropriate engines
+*/
+#include "nfa_api.h"
+
+#include "nfa_api_queue.h"
+#include "nfa_internal.h"
+#include "ue2common.h"
+
+// Engine implementations.
+#include "castle.h"
+#include "gough.h"
+#include "lbr.h"
+#include "limex.h"
+#include "mcclellan.h"
#include "mcsheng.h"
-#include "mpv.h"
+#include "mpv.h"
#include "sheng.h"
#include "tamarama.h"
-
+
#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_func_call) \
case dc_ltype: \
return nfaExec##dc_ftype##dc_func_call; \
- break
-
-// general framework calls
-
+ break
+
+// general framework calls
+
#define DISPATCH_BY_NFA_TYPE(dbnt_func) \
switch (nfa->type) { \
DISPATCH_CASE(LIMEX_NFA_32, LimEx32, dbnt_func); \
@@ -82,40 +82,40 @@
DISPATCH_CASE(MCSHENG_64_NFA_16, McSheng64_16, dbnt_func); \
default: \
assert(0); \
- }
-
-char nfaCheckFinalState(const struct NFA *nfa, const char *state,
- const char *streamState, u64a offset,
+ }
+
+char nfaCheckFinalState(const struct NFA *nfa, const char *state,
+ const char *streamState, u64a offset,
NfaCallback callback, void *context) {
- assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
-
- // Caller should avoid calling us if we can never produce matches.
- assert(nfaAcceptsEod(nfa));
-
- DISPATCH_BY_NFA_TYPE(_testEOD(nfa, state, streamState, offset, callback,
+ assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
+
+ // Caller should avoid calling us if we can never produce matches.
+ assert(nfaAcceptsEod(nfa));
+
+ DISPATCH_BY_NFA_TYPE(_testEOD(nfa, state, streamState, offset, callback,
context));
- return 0;
-}
-
-char nfaQueueInitState(const struct NFA *nfa, struct mq *q) {
- assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
-
- DISPATCH_BY_NFA_TYPE(_queueInitState(nfa, q));
- return 0;
-}
-
-static really_inline
-char nfaQueueExec_i(const struct NFA *nfa, struct mq *q, s64a end) {
- DISPATCH_BY_NFA_TYPE(_Q(nfa, q, end));
- return 0;
-}
-
-static really_inline
-char nfaQueueExec2_i(const struct NFA *nfa, struct mq *q, s64a end) {
- DISPATCH_BY_NFA_TYPE(_Q2(nfa, q, end));
- return 0;
-}
-
+ return 0;
+}
+
+char nfaQueueInitState(const struct NFA *nfa, struct mq *q) {
+ assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
+
+ DISPATCH_BY_NFA_TYPE(_queueInitState(nfa, q));
+ return 0;
+}
+
+static really_inline
+char nfaQueueExec_i(const struct NFA *nfa, struct mq *q, s64a end) {
+ DISPATCH_BY_NFA_TYPE(_Q(nfa, q, end));
+ return 0;
+}
+
+static really_inline
+char nfaQueueExec2_i(const struct NFA *nfa, struct mq *q, s64a end) {
+ DISPATCH_BY_NFA_TYPE(_Q2(nfa, q, end));
+ return 0;
+}
+
char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end) {
return nfaQueueExec_i(nfa, q, end);
}
@@ -124,245 +124,245 @@ char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end) {
return nfaQueueExec2_i(nfa, q, end);
}
-static really_inline
-char nfaQueueExecRose_i(const struct NFA *nfa, struct mq *q, ReportID report) {
- DISPATCH_BY_NFA_TYPE(_QR(nfa, q, report));
- return 0;
-}
-
-/** Returns 0 if this NFA cannot possibly match (due to width constraints etc)
- * and the caller should return 0. May also edit the queue. */
-static really_inline
-char nfaQueueCanMatch(const struct NFA *nfa, struct mq *q, s64a end,
- char *q_trimmed) {
- assert(q_trimmed);
- assert(q->end - q->cur >= 2);
- assert(end >= 0);
-
- DEBUG_PRINTF("q->offset=%llu, end=%lld\n", q->offset, end);
- DEBUG_PRINTF("maxBiAnchoredWidth=%u, maxOffset=%u\n",
- nfa->maxBiAnchoredWidth, nfa->maxOffset);
-
- if (nfa->maxBiAnchoredWidth &&
- (end + q->offset > nfa->maxBiAnchoredWidth)) {
- DEBUG_PRINTF("stream too long: o %llu l %zu max: %hhu\n", q->offset,
- q->length, nfa->maxBiAnchoredWidth);
- return 0;
- }
-
- if (nfa->maxOffset) {
- if (q->offset >= nfa->maxOffset) {
- DEBUG_PRINTF("stream is past maxOffset\n");
- return 0;
- }
-
- if (q->offset + end > nfa->maxOffset) {
- s64a maxEnd = nfa->maxOffset - q->offset;
- DEBUG_PRINTF("me %lld off %llu len = %lld\n", maxEnd,
- q->offset, end);
- while (q->end > q->cur
- && q->items[q->end - 1].location > maxEnd) {
- *q_trimmed = 1;
- DEBUG_PRINTF("killing item %u %lld %u\n", q->end,
- q->items[q->end - 1].location,
- q->items[q->end - 1].type);
- q->items[q->end - 1].location = maxEnd;
- q->items[q->end - 1].type = MQE_END;
- if (q->end - q->cur < 2
- ||q->items[q->end - 2].location <= maxEnd) {
- break;
- }
- q->end--;
- }
-
- if (q->end - q->cur < 2) { /* nothing left on q */
- DEBUG_PRINTF("queue empty\n");
- return 0;
- }
- }
-
-#ifdef DEBUG
- if (*q_trimmed) {
- debugQueue(q);
- }
-#endif
- }
-
- return 1;
-}
-
-char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end) {
- DEBUG_PRINTF("nfa=%p end=%lld\n", nfa, end);
-#ifdef DEBUG
- debugQueue(q);
-#endif
-
- assert(q && q->context && q->state);
- assert(end >= 0);
- assert(q->cur < q->end);
- assert(q->end <= MAX_MQE_LEN);
- assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
- assert(end < q->items[q->end - 1].location
- || q->items[q->end - 1].type == MQE_END);
-
- if (q->items[q->cur].location > end) {
- return 1;
- }
-
- char q_trimmed = 0;
-
- assert(end <= (s64a)q->length || !q->hlength);
- /* due to reverse accel in block mode some queues may work on a truncated
- * buffer */
- if (end > (s64a)q->length) {
- end = q->length;
- q_trimmed = 1;
- }
-
- if (!nfaQueueCanMatch(nfa, q, end, &q_trimmed)) {
- if (q->report_current) {
- nfaReportCurrentMatches(nfa, q);
- q->report_current = 0;
- }
-
- return 0;
- }
-
- char rv = nfaQueueExec_i(nfa, q, end);
-
-#ifdef DEBUG
- debugQueue(q);
-#endif
-
- assert(!q->report_current);
- DEBUG_PRINTF("returned rv=%d, q_trimmed=%d\n", rv, q_trimmed);
- return rv && !q_trimmed;
-}
-
-char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end) {
- DEBUG_PRINTF("nfa=%p end=%lld\n", nfa, end);
-#ifdef DEBUG
- debugQueue(q);
-#endif
-
- assert(q);
- assert(end >= 0);
- assert(q->state);
- assert(q->cur < q->end);
- assert(q->end <= MAX_MQE_LEN);
- assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
- assert(end < q->items[q->end - 1].location
- || q->items[q->end - 1].type == MQE_END);
-
- char q_trimmed_ra = 0;
- assert(end <= (s64a)q->length || !q->hlength);
- /* due to reverse accel in block mode some queues may work on a truncated
- * buffer */
- if (q->items[q->cur].location > end) {
- return 1;
- }
-
- if (end > (s64a)q->length) {
- end = q->length;
- q_trimmed_ra = 1;
- }
-
- char q_trimmed = 0;
- if (!nfaQueueCanMatch(nfa, q, end, &q_trimmed)) {
- if (q->report_current) {
- nfaReportCurrentMatches(nfa, q);
- q->report_current = 0;
- }
-
- return 0;
- }
-
- char rv = nfaQueueExec2_i(nfa, q, end);
- assert(!q->report_current);
- DEBUG_PRINTF("returned rv=%d, q_trimmed=%d\n", rv, q_trimmed);
- if (rv == MO_MATCHES_PENDING) {
- if (q_trimmed) {
- // We need to "fix" the queue so that subsequent operations must
- // trim it as well.
- assert(q->end > 0);
- assert(nfa->maxOffset);
- q->items[q->end - 1].location = nfa->maxOffset + 1;
- }
- return rv;
- }
- return rv && !q_trimmed && !q_trimmed_ra;
-}
-
-char nfaReportCurrentMatches(const struct NFA *nfa, struct mq *q) {
- DISPATCH_BY_NFA_TYPE(_reportCurrent(nfa, q));
- return 0;
-}
-
-char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q) {
- DISPATCH_BY_NFA_TYPE(_inAccept(nfa, report, q));
- return 0;
-}
-
+static really_inline
+char nfaQueueExecRose_i(const struct NFA *nfa, struct mq *q, ReportID report) {
+ DISPATCH_BY_NFA_TYPE(_QR(nfa, q, report));
+ return 0;
+}
+
+/** Returns 0 if this NFA cannot possibly match (due to width constraints etc)
+ * and the caller should return 0. May also edit the queue. */
+static really_inline
+char nfaQueueCanMatch(const struct NFA *nfa, struct mq *q, s64a end,
+ char *q_trimmed) {
+ assert(q_trimmed);
+ assert(q->end - q->cur >= 2);
+ assert(end >= 0);
+
+ DEBUG_PRINTF("q->offset=%llu, end=%lld\n", q->offset, end);
+ DEBUG_PRINTF("maxBiAnchoredWidth=%u, maxOffset=%u\n",
+ nfa->maxBiAnchoredWidth, nfa->maxOffset);
+
+ if (nfa->maxBiAnchoredWidth &&
+ (end + q->offset > nfa->maxBiAnchoredWidth)) {
+ DEBUG_PRINTF("stream too long: o %llu l %zu max: %hhu\n", q->offset,
+ q->length, nfa->maxBiAnchoredWidth);
+ return 0;
+ }
+
+ if (nfa->maxOffset) {
+ if (q->offset >= nfa->maxOffset) {
+ DEBUG_PRINTF("stream is past maxOffset\n");
+ return 0;
+ }
+
+ if (q->offset + end > nfa->maxOffset) {
+ s64a maxEnd = nfa->maxOffset - q->offset;
+ DEBUG_PRINTF("me %lld off %llu len = %lld\n", maxEnd,
+ q->offset, end);
+ while (q->end > q->cur
+ && q->items[q->end - 1].location > maxEnd) {
+ *q_trimmed = 1;
+ DEBUG_PRINTF("killing item %u %lld %u\n", q->end,
+ q->items[q->end - 1].location,
+ q->items[q->end - 1].type);
+ q->items[q->end - 1].location = maxEnd;
+ q->items[q->end - 1].type = MQE_END;
+ if (q->end - q->cur < 2
+ ||q->items[q->end - 2].location <= maxEnd) {
+ break;
+ }
+ q->end--;
+ }
+
+ if (q->end - q->cur < 2) { /* nothing left on q */
+ DEBUG_PRINTF("queue empty\n");
+ return 0;
+ }
+ }
+
+#ifdef DEBUG
+ if (*q_trimmed) {
+ debugQueue(q);
+ }
+#endif
+ }
+
+ return 1;
+}
+
+char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end) {
+ DEBUG_PRINTF("nfa=%p end=%lld\n", nfa, end);
+#ifdef DEBUG
+ debugQueue(q);
+#endif
+
+ assert(q && q->context && q->state);
+ assert(end >= 0);
+ assert(q->cur < q->end);
+ assert(q->end <= MAX_MQE_LEN);
+ assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
+ assert(end < q->items[q->end - 1].location
+ || q->items[q->end - 1].type == MQE_END);
+
+ if (q->items[q->cur].location > end) {
+ return 1;
+ }
+
+ char q_trimmed = 0;
+
+ assert(end <= (s64a)q->length || !q->hlength);
+ /* due to reverse accel in block mode some queues may work on a truncated
+ * buffer */
+ if (end > (s64a)q->length) {
+ end = q->length;
+ q_trimmed = 1;
+ }
+
+ if (!nfaQueueCanMatch(nfa, q, end, &q_trimmed)) {
+ if (q->report_current) {
+ nfaReportCurrentMatches(nfa, q);
+ q->report_current = 0;
+ }
+
+ return 0;
+ }
+
+ char rv = nfaQueueExec_i(nfa, q, end);
+
+#ifdef DEBUG
+ debugQueue(q);
+#endif
+
+ assert(!q->report_current);
+ DEBUG_PRINTF("returned rv=%d, q_trimmed=%d\n", rv, q_trimmed);
+ return rv && !q_trimmed;
+}
+
+char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end) {
+ DEBUG_PRINTF("nfa=%p end=%lld\n", nfa, end);
+#ifdef DEBUG
+ debugQueue(q);
+#endif
+
+ assert(q);
+ assert(end >= 0);
+ assert(q->state);
+ assert(q->cur < q->end);
+ assert(q->end <= MAX_MQE_LEN);
+ assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
+ assert(end < q->items[q->end - 1].location
+ || q->items[q->end - 1].type == MQE_END);
+
+ char q_trimmed_ra = 0;
+ assert(end <= (s64a)q->length || !q->hlength);
+ /* due to reverse accel in block mode some queues may work on a truncated
+ * buffer */
+ if (q->items[q->cur].location > end) {
+ return 1;
+ }
+
+ if (end > (s64a)q->length) {
+ end = q->length;
+ q_trimmed_ra = 1;
+ }
+
+ char q_trimmed = 0;
+ if (!nfaQueueCanMatch(nfa, q, end, &q_trimmed)) {
+ if (q->report_current) {
+ nfaReportCurrentMatches(nfa, q);
+ q->report_current = 0;
+ }
+
+ return 0;
+ }
+
+ char rv = nfaQueueExec2_i(nfa, q, end);
+ assert(!q->report_current);
+ DEBUG_PRINTF("returned rv=%d, q_trimmed=%d\n", rv, q_trimmed);
+ if (rv == MO_MATCHES_PENDING) {
+ if (q_trimmed) {
+ // We need to "fix" the queue so that subsequent operations must
+ // trim it as well.
+ assert(q->end > 0);
+ assert(nfa->maxOffset);
+ q->items[q->end - 1].location = nfa->maxOffset + 1;
+ }
+ return rv;
+ }
+ return rv && !q_trimmed && !q_trimmed_ra;
+}
+
+char nfaReportCurrentMatches(const struct NFA *nfa, struct mq *q) {
+ DISPATCH_BY_NFA_TYPE(_reportCurrent(nfa, q));
+ return 0;
+}
+
+char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q) {
+ DISPATCH_BY_NFA_TYPE(_inAccept(nfa, report, q));
+ return 0;
+}
+
char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q) {
DISPATCH_BY_NFA_TYPE(_inAnyAccept(nfa, q));
return 0;
}
-char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID r) {
- DEBUG_PRINTF("nfa=%p\n", nfa);
-#ifdef DEBUG
- debugQueue(q);
-#endif
-
- assert(q && !q->context && q->state);
- assert(q->cur <= q->end);
- assert(q->end <= MAX_MQE_LEN);
- assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
- assert(!q->report_current);
-
- return nfaQueueExecRose_i(nfa, q, r);
-}
-
-char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf,
- size_t buflen, const u8 *hbuf, size_t hlen,
+char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID r) {
+ DEBUG_PRINTF("nfa=%p\n", nfa);
+#ifdef DEBUG
+ debugQueue(q);
+#endif
+
+ assert(q && !q->context && q->state);
+ assert(q->cur <= q->end);
+ assert(q->end <= MAX_MQE_LEN);
+ assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
+ assert(!q->report_current);
+
+ return nfaQueueExecRose_i(nfa, q, r);
+}
+
+char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf,
+ size_t buflen, const u8 *hbuf, size_t hlen,
NfaCallback callback, void *context) {
- assert(nfa);
- assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
-
- DISPATCH_BY_NFA_TYPE(_B_Reverse(nfa, offset, buf, buflen, hbuf, hlen,
+ assert(nfa);
+ assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
+
+ DISPATCH_BY_NFA_TYPE(_B_Reverse(nfa, offset, buf, buflen, hbuf, hlen,
callback, context));
- return 0;
-}
-
-char nfaQueueCompressState(const struct NFA *nfa, const struct mq *q,
- s64a loc) {
- assert(nfa && q);
- assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
-
- DISPATCH_BY_NFA_TYPE(_queueCompressState(nfa, q, loc));
- return 0;
-}
-
-char nfaExpandState(const struct NFA *nfa, void *dest, const void *src,
- u64a offset, u8 key) {
- assert(nfa && dest && src);
- assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
-
- DISPATCH_BY_NFA_TYPE(_expandState(nfa, dest, src, offset, key));
- return 0;
-}
-
-char nfaInitCompressedState(const struct NFA *nfa, u64a offset, void *state,
- u8 key) {
- assert(nfa && state);
- assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
-
- DISPATCH_BY_NFA_TYPE(_initCompressedState(nfa, offset, state, key));
- return 0;
-}
-
-enum nfa_zombie_status nfaGetZombieStatus(const struct NFA *nfa, struct mq *q,
- s64a loc) {
- DISPATCH_BY_NFA_TYPE(_zombie_status(nfa, q, loc));
- return NFA_ZOMBIE_NO;
-}
+ return 0;
+}
+
+char nfaQueueCompressState(const struct NFA *nfa, const struct mq *q,
+ s64a loc) {
+ assert(nfa && q);
+ assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
+
+ DISPATCH_BY_NFA_TYPE(_queueCompressState(nfa, q, loc));
+ return 0;
+}
+
+char nfaExpandState(const struct NFA *nfa, void *dest, const void *src,
+ u64a offset, u8 key) {
+ assert(nfa && dest && src);
+ assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
+
+ DISPATCH_BY_NFA_TYPE(_expandState(nfa, dest, src, offset, key));
+ return 0;
+}
+
+char nfaInitCompressedState(const struct NFA *nfa, u64a offset, void *state,
+ u8 key) {
+ assert(nfa && state);
+ assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa)));
+
+ DISPATCH_BY_NFA_TYPE(_initCompressedState(nfa, offset, state, key));
+ return 0;
+}
+
+enum nfa_zombie_status nfaGetZombieStatus(const struct NFA *nfa, struct mq *q,
+ s64a loc) {
+ DISPATCH_BY_NFA_TYPE(_zombie_status(nfa, q, loc));
+ return NFA_ZOMBIE_NO;
+}
diff --git a/contrib/libs/hyperscan/src/nfa/nfa_api_queue.h b/contrib/libs/hyperscan/src/nfa/nfa_api_queue.h
index 511941f30b..e3579a7ee2 100644
--- a/contrib/libs/hyperscan/src/nfa/nfa_api_queue.h
+++ b/contrib/libs/hyperscan/src/nfa/nfa_api_queue.h
@@ -1,289 +1,289 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef NFA_API_QUEUE_H
-#define NFA_API_QUEUE_H
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-#include "ue2common.h"
-#include "callback.h"
-
-/** Size of mq::items, max elements on a queue. */
-#define MAX_MQE_LEN 10
-
-/** Queue events */
-
-/** Queue event: begin scanning. Note: stateless engines will start from this
- * location. */
-#define MQE_START 0U
-
-/** Queue event: stop scanning. */
-#define MQE_END 1U
-
-/** Queue event: enable start and start-dot-star. */
-#define MQE_TOP 2U
-
-/** Queue event: first event corresponding to a numbered TOP. Additional tops
- * (in multi-top engines) use the event values from MQE_TOP_FIRST to
- * MQE_INVALID - 1. */
-#define MQE_TOP_FIRST 4U
-
-/** Invalid queue event */
-#define MQE_INVALID (~0U)
-
-/** Queue item */
-struct mq_item {
- u32 type; /**< event type, from MQE_* */
- s64a location; /**< relative to the start of the current buffer */
- u64a som; /**< pattern start-of-match corresponding to a top, only used
- * by som engines. */
-};
-
-// Forward decl.
-struct NFA;
-
-/**
- * Queue of events to control engine execution. mq::cur is index of first
- * valid event, mq::end is one past the index of last valid event.
- */
-struct mq {
- const struct NFA *nfa; /**< nfa corresponding to the queue */
- u32 cur; /**< index of the first valid item in the queue */
- u32 end; /**< index one past the last valid item in the queue */
- char *state; /**< uncompressed stream state; lives in scratch */
- char *streamState; /**<
- * real stream state; used to access structures which
- * not duplicated the scratch state (bounded repeats,
- * etc) */
- u64a offset; /**< base offset of the buffer */
- const u8 *buffer; /**< buffer to scan */
- size_t length; /**< length of buffer */
- const u8 *history; /**<
- * history buffer; (logically) immediately before the
- * main buffer */
- size_t hlength; /**< length of the history buffer */
- struct hs_scratch *scratch; /**< global scratch space */
- char report_current; /**<
- * report_current matches at starting offset through
- * callback. If true, the queue must be located at a
- * point where MO_MATCHES_PENDING was returned */
- NfaCallback cb; /**< callback to trigger on matches */
- void *context; /**< context to pass along with a callback */
- struct mq_item items[MAX_MQE_LEN]; /**< queue items */
-};
-
-
-/**
- * Pushes an (event, location, som) item onto a queue. If it is identical to the
- * previous item on the queue, it is not added to the queue.
- * @param q queue
- * @param e event
- * @param som som marker
- * @param loc event location
- */
-static really_inline
-void pushQueueSom(struct mq * restrict q, u32 e, s64a loc, u64a som) {
- DEBUG_PRINTF("pushing %u@%lld -> %u [som = %llu]\n", e, loc, q->end, som);
- assert(q->end < MAX_MQE_LEN);
- assert(e < MQE_INVALID);
-/* stop gcc getting too smart for its own good */
-/* assert(!q->end || q->items[q->end - 1].location <= loc); */
- assert(q->end || e == MQE_START);
-
- // Avoid duplicate items on the queue.
- if (q->end) {
- struct mq_item *item = &q->items[q->end - 1];
- if (item->type == e && item->location == loc) {
- DEBUG_PRINTF("dropping duplicate item\n");
- LIMIT_TO_AT_MOST(&item->som, som); /* take lower som */
- return;
- }
- }
-
- u32 end = q->end;
- struct mq_item *item = &q->items[end];
- item->type = e;
- item->location = loc;
- item->som = som;
- q->end = end + 1;
-}
-
-/**
- * Pushes an (event, location) item onto a queue. If it is identical to the
- * previous item on the queue, it is not added to the queue.
- * @param q queue
- * @param e event
- * @param loc event location
- */
-static really_inline
-void pushQueue(struct mq * restrict q, u32 e, s64a loc) {
- pushQueueSom(q, e, loc, 0);
-}
-
-/**
- * Pushes an (event, location) item onto a queue.
- * This version of @ref pushQueue does not check to ensure that the item being
- * added is not already on the queue. Used for events other than tops.
- */
-static really_inline
-void pushQueueNoMerge(struct mq * restrict q, u32 e, s64a loc) {
- DEBUG_PRINTF("pushing %u@%lld -> %u\n", e, loc, q->end);
- assert(q->end < MAX_MQE_LEN);
- assert(e < MQE_INVALID);
-/* stop gcc getting too smart for its own good */
-/* assert(!q->end || q->items[q->end - 1].location <= loc); */
- assert(q->end || e == MQE_START);
-
-#ifndef NDEBUG
- // We assert that the event is different from its predecessor. If it's a
- // dupe, you should have used the ordinary pushQueue call.
- if (q->end) {
- UNUSED struct mq_item *prev = &q->items[q->end - 1];
- assert(prev->type != e || prev->location != loc);
- }
-#endif
-
- u32 end = q->end;
- struct mq_item *item = &q->items[end];
- item->type = e;
- item->location = loc;
- item->som = 0;
- q->end = end + 1;
-}
-
-/** \brief Returns the type of the current queue event. */
-static really_inline u32 q_cur_type(const struct mq *q) {
- assert(q->cur < q->end);
- assert(q->cur < MAX_MQE_LEN);
- return q->items[q->cur].type;
-}
-
-/** \brief Returns the location (relative to the beginning of the current data
- * buffer) of the current queue event. */
-static really_inline s64a q_cur_loc(const struct mq *q) {
- assert(q->cur < q->end);
- assert(q->cur < MAX_MQE_LEN);
- return q->items[q->cur].location;
-}
-
-/** \brief Returns the type of the last event in the queue. */
-static really_inline u32 q_last_type(const struct mq *q) {
- assert(q->cur < q->end);
- assert(q->end > 0);
- assert(q->end <= MAX_MQE_LEN);
- return q->items[q->end - 1].type;
-}
-
-/** \brief Returns the location (relative to the beginning of the current data
- * buffer) of the last event in the queue. */
-static really_inline s64a q_last_loc(const struct mq *q) {
- assert(q->cur < q->end);
- assert(q->end > 0);
- assert(q->end <= MAX_MQE_LEN);
- return q->items[q->end - 1].location;
-}
-
-/** \brief Returns the absolute stream offset of the current queue event. */
-static really_inline u64a q_cur_offset(const struct mq *q) {
- assert(q->cur < q->end);
- assert(q->cur < MAX_MQE_LEN);
- return q->offset + (u64a)q->items[q->cur].location;
-}
-
-/**
- * \brief Removes all events in the queue before the given location.
- */
-static really_inline
-void q_skip_forward_to(struct mq *q, s64a min_loc) {
- assert(q->cur < q->end);
- assert(q->cur < MAX_MQE_LEN);
- assert(q->items[q->cur].type == MQE_START);
-
- if (q_cur_loc(q) >= min_loc) {
- DEBUG_PRINTF("all events >= loc %lld\n", min_loc);
- return;
- }
-
- const u32 start_loc = q->cur;
-
- do {
- DEBUG_PRINTF("remove item with loc=%lld\n", q_cur_loc(q));
- q->cur++;
- } while (q->cur < q->end && q_cur_loc(q) < min_loc);
-
- if (q->cur > start_loc) {
- // Move original MQE_START item forward.
- q->cur--;
- q->items[q->cur] = q->items[start_loc];
- }
-}
-
-#ifdef DEBUG
-// Dump the contents of the given queue.
-static never_inline UNUSED
-void debugQueue(const struct mq *q) {
- DEBUG_PRINTF("q=%p, nfa=%p\n", q, q->nfa);
- DEBUG_PRINTF("q offset=%llu, buf={%p, len=%zu}, history={%p, len=%zu}\n",
- q->offset, q->buffer, q->length, q->history, q->hlength);
- DEBUG_PRINTF("q cur=%u, end=%u\n", q->cur, q->end);
- for (u32 cur = q->cur; cur < q->end; cur++) {
- const char *type = "UNKNOWN";
- u32 e = q->items[cur].type;
- switch (e) {
- case MQE_START:
- type = "MQE_START";
- break;
- case MQE_END:
- type = "MQE_END";
- break;
- case MQE_TOP:
- type = "MQE_TOP";
- break;
- case MQE_INVALID:
- type = "MQE_INVALID";
- break;
- default:
- assert(e >= MQE_TOP_FIRST && e < MQE_INVALID);
- type = "MQE_TOP_N";
- break;
- }
- DEBUG_PRINTF("\tq[%u] %lld %u:%s\n", cur, q->items[cur].location,
- q->items[cur].type, type);
- }
-}
-#endif // DEBUG
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef NFA_API_QUEUE_H
+#define NFA_API_QUEUE_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "ue2common.h"
+#include "callback.h"
+
+/** Size of mq::items, max elements on a queue. */
+#define MAX_MQE_LEN 10
+
+/** Queue events */
+
+/** Queue event: begin scanning. Note: stateless engines will start from this
+ * location. */
+#define MQE_START 0U
+
+/** Queue event: stop scanning. */
+#define MQE_END 1U
+
+/** Queue event: enable start and start-dot-star. */
+#define MQE_TOP 2U
+
+/** Queue event: first event corresponding to a numbered TOP. Additional tops
+ * (in multi-top engines) use the event values from MQE_TOP_FIRST to
+ * MQE_INVALID - 1. */
+#define MQE_TOP_FIRST 4U
+
+/** Invalid queue event */
+#define MQE_INVALID (~0U)
+
+/** Queue item */
+struct mq_item {
+ u32 type; /**< event type, from MQE_* */
+ s64a location; /**< relative to the start of the current buffer */
+ u64a som; /**< pattern start-of-match corresponding to a top, only used
+ * by som engines. */
+};
+
+// Forward decl.
+struct NFA;
+
+/**
+ * Queue of events to control engine execution. mq::cur is index of first
+ * valid event, mq::end is one past the index of last valid event.
+ */
+struct mq {
+ const struct NFA *nfa; /**< nfa corresponding to the queue */
+ u32 cur; /**< index of the first valid item in the queue */
+ u32 end; /**< index one past the last valid item in the queue */
+ char *state; /**< uncompressed stream state; lives in scratch */
+ char *streamState; /**<
+ * real stream state; used to access structures which
+ * not duplicated the scratch state (bounded repeats,
+ * etc) */
+ u64a offset; /**< base offset of the buffer */
+ const u8 *buffer; /**< buffer to scan */
+ size_t length; /**< length of buffer */
+ const u8 *history; /**<
+ * history buffer; (logically) immediately before the
+ * main buffer */
+ size_t hlength; /**< length of the history buffer */
+ struct hs_scratch *scratch; /**< global scratch space */
+ char report_current; /**<
+ * report_current matches at starting offset through
+ * callback. If true, the queue must be located at a
+ * point where MO_MATCHES_PENDING was returned */
+ NfaCallback cb; /**< callback to trigger on matches */
+ void *context; /**< context to pass along with a callback */
+ struct mq_item items[MAX_MQE_LEN]; /**< queue items */
+};
+
+
+/**
+ * Pushes an (event, location, som) item onto a queue. If it is identical to the
+ * previous item on the queue, it is not added to the queue.
+ * @param q queue
+ * @param e event
+ * @param som som marker
+ * @param loc event location
+ */
+static really_inline
+void pushQueueSom(struct mq * restrict q, u32 e, s64a loc, u64a som) {
+ DEBUG_PRINTF("pushing %u@%lld -> %u [som = %llu]\n", e, loc, q->end, som);
+ assert(q->end < MAX_MQE_LEN);
+ assert(e < MQE_INVALID);
+/* stop gcc getting too smart for its own good */
+/* assert(!q->end || q->items[q->end - 1].location <= loc); */
+ assert(q->end || e == MQE_START);
+
+ // Avoid duplicate items on the queue.
+ if (q->end) {
+ struct mq_item *item = &q->items[q->end - 1];
+ if (item->type == e && item->location == loc) {
+ DEBUG_PRINTF("dropping duplicate item\n");
+ LIMIT_TO_AT_MOST(&item->som, som); /* take lower som */
+ return;
+ }
+ }
+
+ u32 end = q->end;
+ struct mq_item *item = &q->items[end];
+ item->type = e;
+ item->location = loc;
+ item->som = som;
+ q->end = end + 1;
+}
+
+/**
+ * Pushes an (event, location) item onto a queue. If it is identical to the
+ * previous item on the queue, it is not added to the queue.
+ * @param q queue
+ * @param e event
+ * @param loc event location
+ */
+static really_inline
+void pushQueue(struct mq * restrict q, u32 e, s64a loc) {
+ pushQueueSom(q, e, loc, 0);
+}
+
+/**
+ * Pushes an (event, location) item onto a queue.
+ * This version of @ref pushQueue does not check to ensure that the item being
+ * added is not already on the queue. Used for events other than tops.
+ */
+static really_inline
+void pushQueueNoMerge(struct mq * restrict q, u32 e, s64a loc) {
+ DEBUG_PRINTF("pushing %u@%lld -> %u\n", e, loc, q->end);
+ assert(q->end < MAX_MQE_LEN);
+ assert(e < MQE_INVALID);
+/* stop gcc getting too smart for its own good */
+/* assert(!q->end || q->items[q->end - 1].location <= loc); */
+ assert(q->end || e == MQE_START);
+
+#ifndef NDEBUG
+ // We assert that the event is different from its predecessor. If it's a
+ // dupe, you should have used the ordinary pushQueue call.
+ if (q->end) {
+ UNUSED struct mq_item *prev = &q->items[q->end - 1];
+ assert(prev->type != e || prev->location != loc);
+ }
+#endif
+
+ u32 end = q->end;
+ struct mq_item *item = &q->items[end];
+ item->type = e;
+ item->location = loc;
+ item->som = 0;
+ q->end = end + 1;
+}
+
+/** \brief Returns the type of the current queue event. */
+static really_inline u32 q_cur_type(const struct mq *q) {
+ assert(q->cur < q->end);
+ assert(q->cur < MAX_MQE_LEN);
+ return q->items[q->cur].type;
+}
+
+/** \brief Returns the location (relative to the beginning of the current data
+ * buffer) of the current queue event. */
+static really_inline s64a q_cur_loc(const struct mq *q) {
+ assert(q->cur < q->end);
+ assert(q->cur < MAX_MQE_LEN);
+ return q->items[q->cur].location;
+}
+
+/** \brief Returns the type of the last event in the queue. */
+static really_inline u32 q_last_type(const struct mq *q) {
+ assert(q->cur < q->end);
+ assert(q->end > 0);
+ assert(q->end <= MAX_MQE_LEN);
+ return q->items[q->end - 1].type;
+}
+
+/** \brief Returns the location (relative to the beginning of the current data
+ * buffer) of the last event in the queue. */
+static really_inline s64a q_last_loc(const struct mq *q) {
+ assert(q->cur < q->end);
+ assert(q->end > 0);
+ assert(q->end <= MAX_MQE_LEN);
+ return q->items[q->end - 1].location;
+}
+
+/** \brief Returns the absolute stream offset of the current queue event. */
+static really_inline u64a q_cur_offset(const struct mq *q) {
+ assert(q->cur < q->end);
+ assert(q->cur < MAX_MQE_LEN);
+ return q->offset + (u64a)q->items[q->cur].location;
+}
+
+/**
+ * \brief Removes all events in the queue before the given location.
+ */
+static really_inline
+void q_skip_forward_to(struct mq *q, s64a min_loc) {
+ assert(q->cur < q->end);
+ assert(q->cur < MAX_MQE_LEN);
+ assert(q->items[q->cur].type == MQE_START);
+
+ if (q_cur_loc(q) >= min_loc) {
+ DEBUG_PRINTF("all events >= loc %lld\n", min_loc);
+ return;
+ }
+
+ const u32 start_loc = q->cur;
+
+ do {
+ DEBUG_PRINTF("remove item with loc=%lld\n", q_cur_loc(q));
+ q->cur++;
+ } while (q->cur < q->end && q_cur_loc(q) < min_loc);
+
+ if (q->cur > start_loc) {
+ // Move original MQE_START item forward.
+ q->cur--;
+ q->items[q->cur] = q->items[start_loc];
+ }
+}
+
+#ifdef DEBUG
+// Dump the contents of the given queue.
+static never_inline UNUSED
+void debugQueue(const struct mq *q) {
+ DEBUG_PRINTF("q=%p, nfa=%p\n", q, q->nfa);
+ DEBUG_PRINTF("q offset=%llu, buf={%p, len=%zu}, history={%p, len=%zu}\n",
+ q->offset, q->buffer, q->length, q->history, q->hlength);
+ DEBUG_PRINTF("q cur=%u, end=%u\n", q->cur, q->end);
+ for (u32 cur = q->cur; cur < q->end; cur++) {
+ const char *type = "UNKNOWN";
+ u32 e = q->items[cur].type;
+ switch (e) {
+ case MQE_START:
+ type = "MQE_START";
+ break;
+ case MQE_END:
+ type = "MQE_END";
+ break;
+ case MQE_TOP:
+ type = "MQE_TOP";
+ break;
+ case MQE_INVALID:
+ type = "MQE_INVALID";
+ break;
+ default:
+ assert(e >= MQE_TOP_FIRST && e < MQE_INVALID);
+ type = "MQE_TOP_N";
+ break;
+ }
+ DEBUG_PRINTF("\tq[%u] %lld %u:%s\n", cur, q->items[cur].location,
+ q->items[cur].type, type);
+ }
+}
+#endif // DEBUG
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/nfa_api_util.h b/contrib/libs/hyperscan/src/nfa/nfa_api_util.h
index 7e797e74b1..affc5f38f3 100644
--- a/contrib/libs/hyperscan/src/nfa/nfa_api_util.h
+++ b/contrib/libs/hyperscan/src/nfa/nfa_api_util.h
@@ -1,82 +1,82 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef NFA_API_UTIL_H
-#define NFA_API_UTIL_H
-
-#include "nfa_api_queue.h"
-#include "ue2common.h"
-
-/* returns the byte prior to the given location, NUL if not available */
-static really_inline
-u8 queue_prev_byte(const struct mq *q, s64a loc) {
- if (loc <= 0) {
- if (1LL - loc > (s64a)q->hlength) {
- return 0; /* assume NUL for start of stream write */
- }
- // In the history buffer.
- assert(q->history);
- assert(q->hlength >= (u64a)(loc * -1));
- return q->history[q->hlength - 1 + loc];
- } else {
- // In the stream write buffer.
- assert(q->buffer);
- assert(q->length >= (u64a)loc);
- return q->buffer[loc - 1];
- }
-}
-
-/* this is a modified version of pushQueue where we statically know the state of
- * the queue. Does not attempt to merge and inserts at the given queue
- * position. */
-static really_inline
-void pushQueueAt(struct mq * restrict q, u32 pos, u32 e, s64a loc) {
- assert(pos == q->end);
- DEBUG_PRINTF("pushing %u@%lld -> %u\n", e, loc, q->end);
- assert(q->end < MAX_MQE_LEN);
- assert(e < MQE_INVALID);
-/* stop gcc getting too smart for its own good */
-/* assert(!q->end || q->items[q->end - 1].location <= loc); */
- assert(q->end || e == MQE_START);
-
-#ifndef NDEBUG
- // We assert that the event is different from its predecessor. If it's a
- // dupe, you should have used the ordinary pushQueue call.
- if (q->end) {
- UNUSED struct mq_item *prev = &q->items[q->end - 1];
- assert(prev->type != e || prev->location != loc);
- }
-#endif
-
- struct mq_item *item = &q->items[pos];
- item->type = e;
- item->location = loc;
- item->som = 0;
- q->end = pos + 1;
-}
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef NFA_API_UTIL_H
+#define NFA_API_UTIL_H
+
+#include "nfa_api_queue.h"
+#include "ue2common.h"
+
+/* returns the byte prior to the given location, NUL if not available */
+static really_inline
+u8 queue_prev_byte(const struct mq *q, s64a loc) {
+ if (loc <= 0) {
+ if (1LL - loc > (s64a)q->hlength) {
+ return 0; /* assume NUL for start of stream write */
+ }
+ // In the history buffer.
+ assert(q->history);
+ assert(q->hlength >= (u64a)(loc * -1));
+ return q->history[q->hlength - 1 + loc];
+ } else {
+ // In the stream write buffer.
+ assert(q->buffer);
+ assert(q->length >= (u64a)loc);
+ return q->buffer[loc - 1];
+ }
+}
+
+/* this is a modified version of pushQueue where we statically know the state of
+ * the queue. Does not attempt to merge and inserts at the given queue
+ * position. */
+static really_inline
+void pushQueueAt(struct mq * restrict q, u32 pos, u32 e, s64a loc) {
+ assert(pos == q->end);
+ DEBUG_PRINTF("pushing %u@%lld -> %u\n", e, loc, q->end);
+ assert(q->end < MAX_MQE_LEN);
+ assert(e < MQE_INVALID);
+/* stop gcc getting too smart for its own good */
+/* assert(!q->end || q->items[q->end - 1].location <= loc); */
+ assert(q->end || e == MQE_START);
+
+#ifndef NDEBUG
+ // We assert that the event is different from its predecessor. If it's a
+ // dupe, you should have used the ordinary pushQueue call.
+ if (q->end) {
+ UNUSED struct mq_item *prev = &q->items[q->end - 1];
+ assert(prev->type != e || prev->location != loc);
+ }
+#endif
+
+ struct mq_item *item = &q->items[pos];
+ item->type = e;
+ item->location = loc;
+ item->som = 0;
+ q->end = pos + 1;
+}
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp b/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp
index bcf7ae1708..47153163e9 100644
--- a/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp
+++ b/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp
@@ -1,96 +1,96 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "nfa_build_util.h"
-
-#include "limex_internal.h"
-#include "mcclellancompile.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "nfa_build_util.h"
+
+#include "limex_internal.h"
+#include "mcclellancompile.h"
#include "mcsheng_compile.h"
#include "shengcompile.h"
-#include "nfa_internal.h"
-#include "repeat_internal.h"
-#include "ue2common.h"
-
-#include <algorithm>
-#include <cassert>
-#include <cstddef>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <sstream>
-
-using namespace std;
-
-namespace ue2 {
-
-namespace {
-
-template<NFAEngineType t> struct NFATraits { };
-
-template<template<NFAEngineType t> class sfunc, typename rv_t, typename arg_t,
- NFAEngineType lb>
-struct DISPATCH_BY_NFA_TYPE_INT {
- static rv_t doOp(NFAEngineType i, const arg_t &arg) {
- if (i == lb) {
- return sfunc<lb>::call(arg);
- } else {
- return DISPATCH_BY_NFA_TYPE_INT<sfunc, rv_t, arg_t,
- (NFAEngineType)(lb + 1)>
- ::doOp(i, arg);
- }
- }
-};
-
-template<template<NFAEngineType t> class sfunc, typename rv_t, typename arg_t>
-struct DISPATCH_BY_NFA_TYPE_INT<sfunc, rv_t, arg_t, INVALID_NFA> {
- // dummy
- static rv_t doOp(NFAEngineType, const arg_t &) {
- assert(0);
- throw std::logic_error("Unreachable");
- }
-};
-
-#define DISPATCH_BY_NFA_TYPE(i, op, arg) \
- DISPATCH_BY_NFA_TYPE_INT<op, decltype(op<(NFAEngineType)0>::call(arg)), \
- decltype(arg), (NFAEngineType)0>::doOp(i, arg)
-}
-
+#include "nfa_internal.h"
+#include "repeat_internal.h"
+#include "ue2common.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <sstream>
+
+using namespace std;
+
+namespace ue2 {
+
+namespace {
+
+template<NFAEngineType t> struct NFATraits { };
+
+template<template<NFAEngineType t> class sfunc, typename rv_t, typename arg_t,
+ NFAEngineType lb>
+struct DISPATCH_BY_NFA_TYPE_INT {
+ static rv_t doOp(NFAEngineType i, const arg_t &arg) {
+ if (i == lb) {
+ return sfunc<lb>::call(arg);
+ } else {
+ return DISPATCH_BY_NFA_TYPE_INT<sfunc, rv_t, arg_t,
+ (NFAEngineType)(lb + 1)>
+ ::doOp(i, arg);
+ }
+ }
+};
+
+template<template<NFAEngineType t> class sfunc, typename rv_t, typename arg_t>
+struct DISPATCH_BY_NFA_TYPE_INT<sfunc, rv_t, arg_t, INVALID_NFA> {
+ // dummy
+ static rv_t doOp(NFAEngineType, const arg_t &) {
+ assert(0);
+ throw std::logic_error("Unreachable");
+ }
+};
+
+#define DISPATCH_BY_NFA_TYPE(i, op, arg) \
+ DISPATCH_BY_NFA_TYPE_INT<op, decltype(op<(NFAEngineType)0>::call(arg)), \
+ decltype(arg), (NFAEngineType)0>::doOp(i, arg)
+}
+
typedef bool (*nfa_dispatch_fn)(const NFA *nfa);
-
-template<typename T>
-static
-bool has_accel_limex(const NFA *nfa) {
- const T *limex = (const T *)getImplNfa(nfa);
- return limex->accelCount;
-}
-
+
template<typename T>
-static
+static
+bool has_accel_limex(const NFA *nfa) {
+ const T *limex = (const T *)getImplNfa(nfa);
+ return limex->accelCount;
+}
+
+template<typename T>
+static
bool has_repeats_limex(const NFA *nfa) {
const T *limex = (const T *)getImplNfa(nfa);
return limex->repeatCount;
@@ -115,261 +115,261 @@ bool has_repeats_other_than_firsts_limex(const NFA *nfa) {
}
}
- return false;
-}
-
+ return false;
+}
+
static
bool dispatch_false(const NFA *) {
return false;
}
-#ifdef DUMP_SUPPORT
-namespace {
-template<NFAEngineType t>
-struct getName {
- static const char *call(void *) {
- return NFATraits<t>::name;
- }
-};
-
-// descr helper for LimEx NFAs
-template<NFAEngineType t>
-static
-string getDescriptionLimEx(const NFA *nfa) {
- const typename NFATraits<t>::implNFA_t *limex =
- (const typename NFATraits<t>::implNFA_t *)getImplNfa(nfa);
- ostringstream oss;
- oss << NFATraits<t>::name << "/" << limex->exceptionCount;
- if (limex->repeatCount) {
- oss << " +" << limex->repeatCount << "r";
- }
- return oss.str();
-}
-}
-
-// generic description: just return the name
-namespace {
-template<NFAEngineType t>
-struct getDescription {
- static string call(const void *) {
- return string(NFATraits<t>::name);
- }
-};
-}
-#endif
-
-
-/* build-utility Traits */
-
-namespace {
-enum NFACategory {NFA_LIMEX, NFA_OTHER};
-
-// Some of our traits we want around in DUMP_SUPPORT mode only.
-#if defined(DUMP_SUPPORT)
-#define DO_IF_DUMP_SUPPORT(a) a
-#else
-#define DO_IF_DUMP_SUPPORT(a)
-#endif
-
+#ifdef DUMP_SUPPORT
+namespace {
+template<NFAEngineType t>
+struct getName {
+ static const char *call(void *) {
+ return NFATraits<t>::name;
+ }
+};
+
+// descr helper for LimEx NFAs
+template<NFAEngineType t>
+static
+string getDescriptionLimEx(const NFA *nfa) {
+ const typename NFATraits<t>::implNFA_t *limex =
+ (const typename NFATraits<t>::implNFA_t *)getImplNfa(nfa);
+ ostringstream oss;
+ oss << NFATraits<t>::name << "/" << limex->exceptionCount;
+ if (limex->repeatCount) {
+ oss << " +" << limex->repeatCount << "r";
+ }
+ return oss.str();
+}
+}
+
+// generic description: just return the name
+namespace {
+template<NFAEngineType t>
+struct getDescription {
+ static string call(const void *) {
+ return string(NFATraits<t>::name);
+ }
+};
+}
+#endif
+
+
+/* build-utility Traits */
+
+namespace {
+enum NFACategory {NFA_LIMEX, NFA_OTHER};
+
+// Some of our traits we want around in DUMP_SUPPORT mode only.
+#if defined(DUMP_SUPPORT)
+#define DO_IF_DUMP_SUPPORT(a) a
+#else
+#define DO_IF_DUMP_SUPPORT(a)
+#endif
+
#define MAKE_LIMEX_TRAITS(mlt_size, mlt_align) \
template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \
- static UNUSED const char *name; \
- static const NFACategory category = NFA_LIMEX; \
- typedef LimExNFA##mlt_size implNFA_t; \
+ static UNUSED const char *name; \
+ static const NFACategory category = NFA_LIMEX; \
+ typedef LimExNFA##mlt_size implNFA_t; \
static const nfa_dispatch_fn has_accel; \
static const nfa_dispatch_fn has_repeats; \
static const nfa_dispatch_fn has_repeats_other_than_firsts; \
- static const u32 stateAlign = \
+ static const u32 stateAlign = \
MAX(mlt_align, alignof(RepeatControl)); \
- }; \
+ }; \
const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_accel \
- = has_accel_limex<LimExNFA##mlt_size>; \
+ = has_accel_limex<LimExNFA##mlt_size>; \
const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_repeats \
= has_repeats_limex<LimExNFA##mlt_size>; \
const nfa_dispatch_fn \
NFATraits<LIMEX_NFA_##mlt_size>::has_repeats_other_than_firsts \
= has_repeats_other_than_firsts_limex<LimExNFA##mlt_size>; \
- DO_IF_DUMP_SUPPORT( \
+ DO_IF_DUMP_SUPPORT( \
const char *NFATraits<LIMEX_NFA_##mlt_size>::name \
= "LimEx "#mlt_size; \
template<> struct getDescription<LIMEX_NFA_##mlt_size> { \
static string call(const void *p) { \
return getDescriptionLimEx<LIMEX_NFA_##mlt_size>((const NFA *)p); \
} \
- };)
-
+ };)
+
MAKE_LIMEX_TRAITS(32, alignof(u32))
MAKE_LIMEX_TRAITS(64, alignof(m128)) /* special, 32bit arch uses m128 */
MAKE_LIMEX_TRAITS(128, alignof(m128))
MAKE_LIMEX_TRAITS(256, alignof(m256))
MAKE_LIMEX_TRAITS(384, alignof(m384))
MAKE_LIMEX_TRAITS(512, alignof(m512))
-
-template<> struct NFATraits<MCCLELLAN_NFA_8> {
- UNUSED static const char *name;
- static const NFACategory category = NFA_OTHER;
- static const u32 stateAlign = 1;
+
+template<> struct NFATraits<MCCLELLAN_NFA_8> {
+ UNUSED static const char *name;
+ static const NFACategory category = NFA_OTHER;
+ static const u32 stateAlign = 1;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
-};
+};
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_accel = has_accel_mcclellan;
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
-#if defined(DUMP_SUPPORT)
-const char *NFATraits<MCCLELLAN_NFA_8>::name = "McClellan 8";
-#endif
-
-template<> struct NFATraits<MCCLELLAN_NFA_16> {
- UNUSED static const char *name;
- static const NFACategory category = NFA_OTHER;
- static const u32 stateAlign = 2;
+#if defined(DUMP_SUPPORT)
+const char *NFATraits<MCCLELLAN_NFA_8>::name = "McClellan 8";
+#endif
+
+template<> struct NFATraits<MCCLELLAN_NFA_16> {
+ UNUSED static const char *name;
+ static const NFACategory category = NFA_OTHER;
+ static const u32 stateAlign = 2;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
-};
+};
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_accel = has_accel_mcclellan;
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
-#if defined(DUMP_SUPPORT)
-const char *NFATraits<MCCLELLAN_NFA_16>::name = "McClellan 16";
-#endif
-
-template<> struct NFATraits<GOUGH_NFA_8> {
- UNUSED static const char *name;
- static const NFACategory category = NFA_OTHER;
- static const u32 stateAlign = 8;
+#if defined(DUMP_SUPPORT)
+const char *NFATraits<MCCLELLAN_NFA_16>::name = "McClellan 16";
+#endif
+
+template<> struct NFATraits<GOUGH_NFA_8> {
+ UNUSED static const char *name;
+ static const NFACategory category = NFA_OTHER;
+ static const u32 stateAlign = 8;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
-};
+};
const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_accel = has_accel_mcclellan;
const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_repeats_other_than_firsts = dispatch_false;
-#if defined(DUMP_SUPPORT)
-const char *NFATraits<GOUGH_NFA_8>::name = "Goughfish 8";
-#endif
-
-template<> struct NFATraits<GOUGH_NFA_16> {
- UNUSED static const char *name;
- static const NFACategory category = NFA_OTHER;
- static const u32 stateAlign = 8;
+#if defined(DUMP_SUPPORT)
+const char *NFATraits<GOUGH_NFA_8>::name = "Goughfish 8";
+#endif
+
+template<> struct NFATraits<GOUGH_NFA_16> {
+ UNUSED static const char *name;
+ static const NFACategory category = NFA_OTHER;
+ static const u32 stateAlign = 8;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
-};
+};
const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_accel = has_accel_mcclellan;
const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats_other_than_firsts = dispatch_false;
-#if defined(DUMP_SUPPORT)
-const char *NFATraits<GOUGH_NFA_16>::name = "Goughfish 16";
-#endif
-
+#if defined(DUMP_SUPPORT)
+const char *NFATraits<GOUGH_NFA_16>::name = "Goughfish 16";
+#endif
+
template<> struct NFATraits<MPV_NFA> {
- UNUSED static const char *name;
- static const NFACategory category = NFA_OTHER;
- static const u32 stateAlign = 8;
+ UNUSED static const char *name;
+ static const NFACategory category = NFA_OTHER;
+ static const u32 stateAlign = 8;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
-};
+};
const nfa_dispatch_fn NFATraits<MPV_NFA>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<MPV_NFA>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<MPV_NFA>::has_repeats_other_than_firsts = dispatch_false;
-#if defined(DUMP_SUPPORT)
+#if defined(DUMP_SUPPORT)
const char *NFATraits<MPV_NFA>::name = "Mega-Puff-Vac";
-#endif
-
+#endif
+
template<> struct NFATraits<CASTLE_NFA> {
- UNUSED static const char *name;
- static const NFACategory category = NFA_OTHER;
- static const u32 stateAlign = 8;
+ UNUSED static const char *name;
+ static const NFACategory category = NFA_OTHER;
+ static const u32 stateAlign = 8;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
-};
+};
const nfa_dispatch_fn NFATraits<CASTLE_NFA>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<CASTLE_NFA>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<CASTLE_NFA>::has_repeats_other_than_firsts = dispatch_false;
-#if defined(DUMP_SUPPORT)
+#if defined(DUMP_SUPPORT)
const char *NFATraits<CASTLE_NFA>::name = "Castle";
-#endif
-
+#endif
+
template<> struct NFATraits<LBR_NFA_DOT> {
- UNUSED static const char *name;
- static const NFACategory category = NFA_OTHER;
- static const u32 stateAlign = 8;
+ UNUSED static const char *name;
+ static const NFACategory category = NFA_OTHER;
+ static const u32 stateAlign = 8;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
-};
+};
const nfa_dispatch_fn NFATraits<LBR_NFA_DOT>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_DOT>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_DOT>::has_repeats_other_than_firsts = dispatch_false;
-#if defined(DUMP_SUPPORT)
+#if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_DOT>::name = "Lim Bounded Repeat (D)";
-#endif
-
+#endif
+
template<> struct NFATraits<LBR_NFA_VERM> {
- UNUSED static const char *name;
- static const NFACategory category = NFA_OTHER;
- static const u32 stateAlign = 8;
+ UNUSED static const char *name;
+ static const NFACategory category = NFA_OTHER;
+ static const u32 stateAlign = 8;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
-};
+};
const nfa_dispatch_fn NFATraits<LBR_NFA_VERM>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_VERM>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_VERM>::has_repeats_other_than_firsts = dispatch_false;
-#if defined(DUMP_SUPPORT)
+#if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_VERM>::name = "Lim Bounded Repeat (V)";
-#endif
-
+#endif
+
template<> struct NFATraits<LBR_NFA_NVERM> {
- UNUSED static const char *name;
- static const NFACategory category = NFA_OTHER;
- static const u32 stateAlign = 8;
+ UNUSED static const char *name;
+ static const NFACategory category = NFA_OTHER;
+ static const u32 stateAlign = 8;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
-};
+};
const nfa_dispatch_fn NFATraits<LBR_NFA_NVERM>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_NVERM>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_NVERM>::has_repeats_other_than_firsts = dispatch_false;
-#if defined(DUMP_SUPPORT)
+#if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_NVERM>::name = "Lim Bounded Repeat (NV)";
-#endif
-
+#endif
+
template<> struct NFATraits<LBR_NFA_SHUF> {
- UNUSED static const char *name;
- static const NFACategory category = NFA_OTHER;
- static const u32 stateAlign = 8;
+ UNUSED static const char *name;
+ static const NFACategory category = NFA_OTHER;
+ static const u32 stateAlign = 8;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
-};
+};
const nfa_dispatch_fn NFATraits<LBR_NFA_SHUF>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_SHUF>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_SHUF>::has_repeats_other_than_firsts = dispatch_false;
-#if defined(DUMP_SUPPORT)
+#if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_SHUF>::name = "Lim Bounded Repeat (S)";
-#endif
-
+#endif
+
template<> struct NFATraits<LBR_NFA_TRUF> {
- UNUSED static const char *name;
- static const NFACategory category = NFA_OTHER;
- static const u32 stateAlign = 8;
+ UNUSED static const char *name;
+ static const NFACategory category = NFA_OTHER;
+ static const u32 stateAlign = 8;
static const nfa_dispatch_fn has_accel;
static const nfa_dispatch_fn has_repeats;
static const nfa_dispatch_fn has_repeats_other_than_firsts;
-};
+};
const nfa_dispatch_fn NFATraits<LBR_NFA_TRUF>::has_accel = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_TRUF>::has_repeats = dispatch_false;
const nfa_dispatch_fn NFATraits<LBR_NFA_TRUF>::has_repeats_other_than_firsts = dispatch_false;
-#if defined(DUMP_SUPPORT)
+#if defined(DUMP_SUPPORT)
const char *NFATraits<LBR_NFA_TRUF>::name = "Lim Bounded Repeat (M)";
-#endif
-
+#endif
+
template<> struct NFATraits<SHENG_NFA> {
UNUSED static const char *name;
static const NFACategory category = NFA_OTHER;
@@ -489,87 +489,87 @@ const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_repeats_other_than_first
#if defined(DUMP_SUPPORT)
const char *NFATraits<MCSHENG_64_NFA_16>::name = "Shengy64 McShengFace 16";
#endif
-} // namespace
-
-#if defined(DUMP_SUPPORT)
-
-const char *nfa_type_name(NFAEngineType type) {
- return DISPATCH_BY_NFA_TYPE(type, getName, nullptr);
-}
-
-string describe(const NFA &nfa) {
- return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, getDescription, &nfa);
-}
-
-#endif /* DUMP_SUPPORT */
-
-namespace {
-template<NFAEngineType t>
-struct getStateAlign {
- static u32 call(void *) {
- return NFATraits<t>::stateAlign;
- }
-};
-}
-
-u32 state_alignment(const NFA &nfa) {
- return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, getStateAlign, nullptr);
-}
-
-namespace {
-template<NFAEngineType t>
-struct is_limex {
- static bool call(const void *) {
- return NFATraits<t>::category == NFA_LIMEX;
- }
-};
-}
-
+} // namespace
+
+#if defined(DUMP_SUPPORT)
+
+const char *nfa_type_name(NFAEngineType type) {
+ return DISPATCH_BY_NFA_TYPE(type, getName, nullptr);
+}
+
+string describe(const NFA &nfa) {
+ return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, getDescription, &nfa);
+}
+
+#endif /* DUMP_SUPPORT */
+
+namespace {
+template<NFAEngineType t>
+struct getStateAlign {
+ static u32 call(void *) {
+ return NFATraits<t>::stateAlign;
+ }
+};
+}
+
+u32 state_alignment(const NFA &nfa) {
+ return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, getStateAlign, nullptr);
+}
+
+namespace {
+template<NFAEngineType t>
+struct is_limex {
+ static bool call(const void *) {
+ return NFATraits<t>::category == NFA_LIMEX;
+ }
+};
+}
+
namespace {
template<NFAEngineType t>
struct has_repeats_other_than_firsts_dispatch {
static nfa_dispatch_fn call(const void *) {
return NFATraits<t>::has_repeats_other_than_firsts;
- }
+ }
};
}
-
+
bool has_bounded_repeats_other_than_firsts(const NFA &nfa) {
return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type,
has_repeats_other_than_firsts_dispatch,
&nfa)(&nfa);
}
-
+
namespace {
template<NFAEngineType t>
struct has_repeats_dispatch {
static nfa_dispatch_fn call(const void *) {
return NFATraits<t>::has_repeats;
- }
+ }
};
-}
-
-bool has_bounded_repeats(const NFA &nfa) {
+}
+
+bool has_bounded_repeats(const NFA &nfa) {
return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_repeats_dispatch,
&nfa)(&nfa);
-}
-
-namespace {
-template<NFAEngineType t>
-struct has_accel_dispatch {
+}
+
+namespace {
+template<NFAEngineType t>
+struct has_accel_dispatch {
static nfa_dispatch_fn call(const void *) {
- return NFATraits<t>::has_accel;
- }
-};
-}
-
-bool has_accel(const NFA &nfa) {
- return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_accel_dispatch,
+ return NFATraits<t>::has_accel;
+ }
+};
+}
+
+bool has_accel(const NFA &nfa) {
+ return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_accel_dispatch,
&nfa)(&nfa);
-}
-
-bool requires_decompress_key(const NFA &nfa) {
- return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, is_limex, &nfa);
-}
-
-} // namespace ue2
+}
+
+bool requires_decompress_key(const NFA &nfa) {
+ return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, is_limex, &nfa);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfa/nfa_build_util.h b/contrib/libs/hyperscan/src/nfa/nfa_build_util.h
index 9c6ec83ca8..ee7a309494 100644
--- a/contrib/libs/hyperscan/src/nfa/nfa_build_util.h
+++ b/contrib/libs/hyperscan/src/nfa/nfa_build_util.h
@@ -1,60 +1,60 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef NFA_BUILD_UTIL_H
-#define NFA_BUILD_UTIL_H
-
-#include "ue2common.h"
-#include "nfa_internal.h"
-
-#include <string>
-
-struct NFA;
-
-namespace ue2 {
-
-#ifdef DUMP_SUPPORT
-/* provided for debugging functions */
-const char *nfa_type_name(NFAEngineType type);
-std::string describe(const NFA &nfa);
-#endif
-
-// For a given NFA, retrieve the alignment required by its uncompressed state.
-u32 state_alignment(const NFA &nfa);
-
-bool has_bounded_repeats_other_than_firsts(const NFA &n);
-
-bool has_bounded_repeats(const NFA &n);
-
-bool has_accel(const NFA &n);
-
-bool requires_decompress_key(const NFA &n);
-
-} // namespace ue2
-
-#endif
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef NFA_BUILD_UTIL_H
+#define NFA_BUILD_UTIL_H
+
+#include "ue2common.h"
+#include "nfa_internal.h"
+
+#include <string>
+
+struct NFA;
+
+namespace ue2 {
+
+#ifdef DUMP_SUPPORT
+/* provided for debugging functions */
+const char *nfa_type_name(NFAEngineType type);
+std::string describe(const NFA &nfa);
+#endif
+
+// For a given NFA, retrieve the alignment required by its uncompressed state.
+u32 state_alignment(const NFA &nfa);
+
+bool has_bounded_repeats_other_than_firsts(const NFA &n);
+
+bool has_bounded_repeats(const NFA &n);
+
+bool has_accel(const NFA &n);
+
+bool requires_decompress_key(const NFA &n);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/nfa_internal.h b/contrib/libs/hyperscan/src/nfa/nfa_internal.h
index 8a61c04807..ad27e28b14 100644
--- a/contrib/libs/hyperscan/src/nfa/nfa_internal.h
+++ b/contrib/libs/hyperscan/src/nfa/nfa_internal.h
@@ -1,66 +1,66 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- \brief Declarations for the main NFA engine types and structures.
-*/
-#ifndef NFA_INTERNAL_H
-#define NFA_INTERNAL_H
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-#include "ue2common.h"
-
-// Constants
-
-#define MO_INVALID_IDX 0xffffffff /**< index meaning value is invalid */
-
-// Flags (used in NFA::flags)
-
-#define NFA_ACCEPTS_EOD 1U /**< can produce matches on EOD. */
-#define NFA_ZOMBIE 2U /**< supports zombies */
-
-// Common data structures for NFAs
-
-enum NFAEngineType {
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ \brief Declarations for the main NFA engine types and structures.
+*/
+#ifndef NFA_INTERNAL_H
+#define NFA_INTERNAL_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "ue2common.h"
+
+// Constants
+
+#define MO_INVALID_IDX 0xffffffff /**< index meaning value is invalid */
+
+// Flags (used in NFA::flags)
+
+#define NFA_ACCEPTS_EOD 1U /**< can produce matches on EOD. */
+#define NFA_ZOMBIE 2U /**< supports zombies */
+
+// Common data structures for NFAs
+
+enum NFAEngineType {
LIMEX_NFA_32,
LIMEX_NFA_64,
LIMEX_NFA_128,
LIMEX_NFA_256,
LIMEX_NFA_384,
LIMEX_NFA_512,
- MCCLELLAN_NFA_8, /**< magic pseudo nfa */
- MCCLELLAN_NFA_16, /**< magic pseudo nfa */
- GOUGH_NFA_8, /**< magic pseudo nfa */
- GOUGH_NFA_16, /**< magic pseudo nfa */
+ MCCLELLAN_NFA_8, /**< magic pseudo nfa */
+ MCCLELLAN_NFA_16, /**< magic pseudo nfa */
+ GOUGH_NFA_8, /**< magic pseudo nfa */
+ GOUGH_NFA_16, /**< magic pseudo nfa */
MPV_NFA, /**< magic pseudo nfa */
LBR_NFA_DOT, /**< magic pseudo nfa */
LBR_NFA_VERM, /**< magic pseudo nfa */
@@ -76,79 +76,79 @@ enum NFAEngineType {
SHENG_NFA_64, /**< magic pseudo nfa */
MCSHENG_64_NFA_8, /**< magic pseudo nfa */
MCSHENG_64_NFA_16, /**< magic pseudo nfa */
- /** \brief bogus NFA - not used */
- INVALID_NFA
-};
-
-/** \brief header for the NFA implementation. */
-struct ALIGN_CL_DIRECTIVE NFA {
- u32 flags;
-
- /** \brief The size in bytes of the NFA engine. The engine is
- * serialized to the extent that copying length bytes back into a
- * 16-byte aligned memory location yields a structure that has the same
- * behaviour as the original engine. */
- u32 length;
-
- /** \brief Active implementation used by this NFAEngineType */
- u8 type;
-
- u8 rAccelType;
- u8 rAccelOffset;
- u8 maxBiAnchoredWidth; /**< if non zero, max width of the block */
-
- union {
- u8 c;
- u16 dc;
- u8 array[2];
- } rAccelData;
-
- u32 queueIndex; /**< index of the associated queue in scratch */
-
- /** \brief The number of valid positions/states for this NFA. Debug only */
- u32 nPositions;
-
- /** \brief Size of the state required in scratch space.
- *
- * This state has less strict size requirements (as it doesn't go in stream
- * state) and does not persist between stream writes.
- */
- u32 scratchStateSize;
-
- /** \brief Size of the state required in stream state.
- *
- * This encompasses all state stored by the engine that must persist between
- * stream writes. */
- u32 streamStateSize;
-
- u32 maxWidth; /**< longest possible match in this NFA, 0 if unbounded */
- u32 minWidth; /**< minimum bytes required to match this NFA */
- u32 maxOffset; /**< non zero: maximum offset this pattern can match at */
-
- /* Note: implementation (e.g. a LimEx) directly follows struct in memory */
-} ;
-
-// Accessor macro for the implementation NFA: we do things this way to avoid
-// type-punning warnings.
-#define getImplNfa(nfa) \
- ((const void *)((const char *)(nfa) + sizeof(struct NFA)))
-
-// Non-const version of the above, used at compile time.
-#define getMutableImplNfa(nfa) ((char *)(nfa) + sizeof(struct NFA))
-
-static really_inline u32 nfaAcceptsEod(const struct NFA *nfa) {
- return nfa->flags & NFA_ACCEPTS_EOD;
-}
-
-static really_inline u32 nfaSupportsZombie(const struct NFA *nfa) {
- return nfa->flags & NFA_ZOMBIE;
-}
-
-/** \brief True if the given type (from NFA::type) is a McClellan DFA. */
-static really_inline int isMcClellanType(u8 t) {
- return t == MCCLELLAN_NFA_8 || t == MCCLELLAN_NFA_16;
-}
-
+ /** \brief bogus NFA - not used */
+ INVALID_NFA
+};
+
+/** \brief header for the NFA implementation. */
+struct ALIGN_CL_DIRECTIVE NFA {
+ u32 flags;
+
+ /** \brief The size in bytes of the NFA engine. The engine is
+ * serialized to the extent that copying length bytes back into a
+ * 16-byte aligned memory location yields a structure that has the same
+ * behaviour as the original engine. */
+ u32 length;
+
+ /** \brief Active implementation used by this NFAEngineType */
+ u8 type;
+
+ u8 rAccelType;
+ u8 rAccelOffset;
+ u8 maxBiAnchoredWidth; /**< if non zero, max width of the block */
+
+ union {
+ u8 c;
+ u16 dc;
+ u8 array[2];
+ } rAccelData;
+
+ u32 queueIndex; /**< index of the associated queue in scratch */
+
+ /** \brief The number of valid positions/states for this NFA. Debug only */
+ u32 nPositions;
+
+ /** \brief Size of the state required in scratch space.
+ *
+ * This state has less strict size requirements (as it doesn't go in stream
+ * state) and does not persist between stream writes.
+ */
+ u32 scratchStateSize;
+
+ /** \brief Size of the state required in stream state.
+ *
+ * This encompasses all state stored by the engine that must persist between
+ * stream writes. */
+ u32 streamStateSize;
+
+ u32 maxWidth; /**< longest possible match in this NFA, 0 if unbounded */
+ u32 minWidth; /**< minimum bytes required to match this NFA */
+ u32 maxOffset; /**< non zero: maximum offset this pattern can match at */
+
+ /* Note: implementation (e.g. a LimEx) directly follows struct in memory */
+} ;
+
+// Accessor macro for the implementation NFA: we do things this way to avoid
+// type-punning warnings.
+#define getImplNfa(nfa) \
+ ((const void *)((const char *)(nfa) + sizeof(struct NFA)))
+
+// Non-const version of the above, used at compile time.
+#define getMutableImplNfa(nfa) ((char *)(nfa) + sizeof(struct NFA))
+
+static really_inline u32 nfaAcceptsEod(const struct NFA *nfa) {
+ return nfa->flags & NFA_ACCEPTS_EOD;
+}
+
+static really_inline u32 nfaSupportsZombie(const struct NFA *nfa) {
+ return nfa->flags & NFA_ZOMBIE;
+}
+
+/** \brief True if the given type (from NFA::type) is a McClellan DFA. */
+static really_inline int isMcClellanType(u8 t) {
+ return t == MCCLELLAN_NFA_8 || t == MCCLELLAN_NFA_16;
+}
+
/** \brief True if the given type (from NFA::type) is a Sheng-McClellan hybrid
* DFA. */
static really_inline int isShengMcClellanType(u8 t) {
@@ -156,11 +156,11 @@ static really_inline int isShengMcClellanType(u8 t) {
t == MCSHENG_64_NFA_8 || t == MCSHENG_64_NFA_16;
}
-/** \brief True if the given type (from NFA::type) is a Gough DFA. */
-static really_inline int isGoughType(u8 t) {
- return t == GOUGH_NFA_8 || t == GOUGH_NFA_16;
-}
-
+/** \brief True if the given type (from NFA::type) is a Gough DFA. */
+static really_inline int isGoughType(u8 t) {
+ return t == GOUGH_NFA_8 || t == GOUGH_NFA_16;
+}
+
/** \brief True if the given type (from NFA::type) is a Sheng DFA. */
static really_inline int isSheng16Type(u8 t) {
return t == SHENG_NFA;
@@ -185,11 +185,11 @@ static really_inline int isShengType(u8 t) {
* \brief True if the given type (from NFA::type) is a McClellan, Gough or
* Sheng DFA.
*/
-static really_inline int isDfaType(u8 t) {
+static really_inline int isDfaType(u8 t) {
return isMcClellanType(t) || isGoughType(t) || isShengType(t)
|| isShengMcClellanType(t);
-}
-
+}
+
static really_inline int isBigDfaType(u8 t) {
return t == MCCLELLAN_NFA_16 || t == MCSHENG_NFA_16 || t == GOUGH_NFA_16;
}
@@ -198,69 +198,69 @@ static really_inline int isSmallDfaType(u8 t) {
return isDfaType(t) && !isBigDfaType(t);
}
-/** \brief True if the given type (from NFA::type) is an NFA. */
-static really_inline int isNfaType(u8 t) {
- switch (t) {
+/** \brief True if the given type (from NFA::type) is an NFA. */
+static really_inline int isNfaType(u8 t) {
+ switch (t) {
case LIMEX_NFA_32:
case LIMEX_NFA_64:
case LIMEX_NFA_128:
case LIMEX_NFA_256:
case LIMEX_NFA_384:
case LIMEX_NFA_512:
- return 1;
- default:
- break;
- }
- return 0;
-}
-
-/** \brief True if the given type (from NFA::type) is an LBR. */
-static really_inline
-int isLbrType(u8 t) {
+ return 1;
+ default:
+ break;
+ }
+ return 0;
+}
+
+/** \brief True if the given type (from NFA::type) is an LBR. */
+static really_inline
+int isLbrType(u8 t) {
return t == LBR_NFA_DOT || t == LBR_NFA_VERM || t == LBR_NFA_NVERM ||
t == LBR_NFA_SHUF || t == LBR_NFA_TRUF;
-}
-
+}
+
/** \brief True if the given type (from NFA::type) is a container engine. */
-static really_inline
+static really_inline
int isContainerType(u8 t) {
return t == TAMARAMA_NFA;
}
static really_inline
-int isMultiTopType(u8 t) {
- return !isDfaType(t) && !isLbrType(t);
-}
-
-/** Macros used in place of unimplemented NFA API functions for a given
- * engine. */
-#if !defined(_WIN32)
-
-/* Use for functions that return an integer. */
-#define NFA_API_NO_IMPL(...) \
- ({ \
+int isMultiTopType(u8 t) {
+ return !isDfaType(t) && !isLbrType(t);
+}
+
+/** Macros used in place of unimplemented NFA API functions for a given
+ * engine. */
+#if !defined(_WIN32)
+
+/* Use for functions that return an integer. */
+#define NFA_API_NO_IMPL(...) \
+ ({ \
assert(!"not implemented for this engine!"); \
- 0; /* return value, for places that need it */ \
- })
-
-/* Use for _zombie_status functions. */
-#define NFA_API_ZOMBIE_NO_IMPL(...) \
- ({ \
+ 0; /* return value, for places that need it */ \
+ })
+
+/* Use for _zombie_status functions. */
+#define NFA_API_ZOMBIE_NO_IMPL(...) \
+ ({ \
assert(!"not implemented for this engine!"); \
- NFA_ZOMBIE_NO; \
- })
-
-#else
-
-/* Simpler implementation for compilers that don't like the GCC extension used
- * above. */
-#define NFA_API_NO_IMPL(...) 0
-#define NFA_API_ZOMBIE_NO_IMPL(...) NFA_ZOMBIE_NO
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+ NFA_ZOMBIE_NO; \
+ })
+
+#else
+
+/* Simpler implementation for compilers that don't like the GCC extension used
+ * above. */
+#define NFA_API_NO_IMPL(...) 0
+#define NFA_API_ZOMBIE_NO_IMPL(...) NFA_ZOMBIE_NO
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/nfa_kind.h b/contrib/libs/hyperscan/src/nfa/nfa_kind.h
index 2dbc2406a5..f2ac6189b1 100644
--- a/contrib/libs/hyperscan/src/nfa/nfa_kind.h
+++ b/contrib/libs/hyperscan/src/nfa/nfa_kind.h
@@ -1,60 +1,60 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
* \brief Data structures and helper functions used to describe the purpose of
* a particular NFA engine at build time.
*/
-#ifndef NFA_KIND_H
-#define NFA_KIND_H
-
-#include "ue2common.h"
-
+#ifndef NFA_KIND_H
+#define NFA_KIND_H
+
+#include "ue2common.h"
+
#include <string>
-namespace ue2 {
-
-/** \brief Specify the use-case for an nfa engine. */
-enum nfa_kind {
- NFA_PREFIX, //!< rose prefix
- NFA_INFIX, //!< rose infix
- NFA_SUFFIX, //!< rose suffix
- NFA_OUTFIX, //!< "outfix" nfa not triggered by external events
+namespace ue2 {
+
+/** \brief Specify the use-case for an nfa engine. */
+enum nfa_kind {
+ NFA_PREFIX, //!< rose prefix
+ NFA_INFIX, //!< rose infix
+ NFA_SUFFIX, //!< rose suffix
+ NFA_OUTFIX, //!< "outfix" nfa not triggered by external events
NFA_OUTFIX_RAW, //!< "outfix", but with unmanaged reports
- NFA_REV_PREFIX, //! reverse running prefixes (for som)
+ NFA_REV_PREFIX, //! reverse running prefixes (for som)
NFA_EAGER_PREFIX, //!< rose prefix that is also run up to matches
-};
-
+};
+
/** \brief True if this kind of engine is triggered by a top event. */
inline
-bool is_triggered(enum nfa_kind k) {
+bool is_triggered(enum nfa_kind k) {
switch (k) {
case NFA_INFIX:
case NFA_SUFFIX:
@@ -63,8 +63,8 @@ bool is_triggered(enum nfa_kind k) {
default:
return false;
}
-}
-
+}
+
/**
* \brief True if this kind of engine generates actively checks for accept
* states either to halt matching or to raise a callback. Only these engines
@@ -72,7 +72,7 @@ bool is_triggered(enum nfa_kind k) {
* nfaQueueExecToMatch().
*/
inline
-bool generates_callbacks(enum nfa_kind k) {
+bool generates_callbacks(enum nfa_kind k) {
switch (k) {
case NFA_SUFFIX:
case NFA_OUTFIX:
@@ -83,8 +83,8 @@ bool generates_callbacks(enum nfa_kind k) {
default:
return false;
}
-}
-
+}
+
/**
* \brief True if this kind of engine has its state inspected to see if it is in
* an accept state. Engines generated with this property will commonly call
@@ -143,6 +143,6 @@ std::string to_string(nfa_kind k) {
#endif
-} // namespace ue2
-
-#endif
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/nfa_rev_api.h b/contrib/libs/hyperscan/src/nfa/nfa_rev_api.h
index 335a5440f3..370f96ef62 100644
--- a/contrib/libs/hyperscan/src/nfa/nfa_rev_api.h
+++ b/contrib/libs/hyperscan/src/nfa/nfa_rev_api.h
@@ -1,157 +1,157 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Reverse-acceleration optimizations for the NFA API block mode scans.
- */
-
-#ifndef NFA_REV_API_H
-#define NFA_REV_API_H
-
-#include "accel.h"
-#include "nfa_internal.h"
-#include "vermicelli.h"
-#include "util/unaligned.h"
-
-static really_inline
-size_t nfaRevAccel_i(const struct NFA *nfa, const u8 *buffer, size_t length) {
- DEBUG_PRINTF("checking rev accel mw %u\n", nfa->minWidth);
- assert(nfa->rAccelOffset >= 1);
- assert(nfa->rAccelOffset <= nfa->minWidth);
-
- const u8 *rv; // result for accel engine
-
- switch (nfa->rAccelType) {
- case ACCEL_RVERM:
- DEBUG_PRINTF("ACCEL_RVERM\n");
- if (length + 1 - nfa->rAccelOffset < 16) {
- break;
- }
-
- rv = rvermicelliExec(nfa->rAccelData.c, 0, buffer,
- buffer + length + 1 - nfa->rAccelOffset);
- length = (size_t)(rv - buffer + nfa->rAccelOffset);
- break;
- case ACCEL_RVERM_NOCASE:
- DEBUG_PRINTF("ACCEL_RVERM_NOCASE\n");
- if (length + 1 - nfa->rAccelOffset < 16) {
- break;
- }
-
- rv = rvermicelliExec(nfa->rAccelData.c, 1, buffer,
- buffer + length + 1 - nfa->rAccelOffset);
- length = (size_t)(rv - buffer + nfa->rAccelOffset);
- break;
- case ACCEL_RDVERM:
- DEBUG_PRINTF("ACCEL_RDVERM\n");
- if (length + 1 - nfa->rAccelOffset < 17) {
- break;
- }
-
- rv = rvermicelliDoubleExec(nfa->rAccelData.array[0],
- nfa->rAccelData.array[1], 0, buffer,
- buffer + length + 1 - nfa->rAccelOffset);
- length = (size_t)(rv - buffer + nfa->rAccelOffset);
- break;
- case ACCEL_RDVERM_NOCASE:
- DEBUG_PRINTF("ACCEL_RVERM_NOCASE\n");
- if (length + 1 - nfa->rAccelOffset < 17) {
- break;
- }
-
- rv = rvermicelliDoubleExec(nfa->rAccelData.array[0],
- nfa->rAccelData.array[1], 1, buffer,
- buffer + length + 1 - nfa->rAccelOffset);
- length = (size_t)(rv - buffer + nfa->rAccelOffset);
- break;
- case ACCEL_REOD:
- DEBUG_PRINTF("ACCEL_REOD\n");
- if (buffer[length - nfa->rAccelOffset] != nfa->rAccelData.c) {
- return 0;
- }
- break;
- case ACCEL_REOD_NOCASE:
- DEBUG_PRINTF("ACCEL_REOD_NOCASE\n");
- if ((buffer[length - nfa->rAccelOffset] & CASE_CLEAR) !=
- nfa->rAccelData.c) {
- return 0;
- }
- break;
- case ACCEL_RDEOD:
- DEBUG_PRINTF("ACCEL_RDEOD\n");
- if (unaligned_load_u16(buffer + length - nfa->rAccelOffset) !=
- nfa->rAccelData.dc) {
- return 0;
- }
- break;
- case ACCEL_RDEOD_NOCASE:
- DEBUG_PRINTF("ACCEL_RDEOD_NOCASE\n");
- if ((unaligned_load_u16(buffer + length - nfa->rAccelOffset) &
- DOUBLE_CASE_CLEAR) != nfa->rAccelData.dc) {
- return 0;
- }
- break;
- default:
- assert(!"not here");
- }
-
- if (nfa->minWidth > length) {
- DEBUG_PRINTF("post-accel, scan skipped: %zu < min %u bytes\n", length,
- nfa->minWidth);
- return 0;
- }
-
- return length;
-}
-
-/** \brief Reverse acceleration check. Returns a new length for the block,
- * guaranteeing that a match cannot occur beyond that point. */
-static really_inline
-size_t nfaRevAccelCheck(const struct NFA *nfa, const u8 *buffer,
- size_t length) {
- assert(nfa);
-
- // If this block is not long enough to satisfy the minimum width
- // constraint on this NFA, we can avoid the scan altogether.
- if (nfa->minWidth > length) {
- DEBUG_PRINTF("scan skipped: %zu < min %u bytes\n", length,
- nfa->minWidth);
- return 0;
- }
-
- if (nfa->rAccelType == ACCEL_NONE) {
- DEBUG_PRINTF("no rev accel available\n");
- return length;
- }
-
- size_t rv_length = nfaRevAccel_i(nfa, buffer, length);
- assert(rv_length <= length);
- return rv_length;
-}
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Reverse-acceleration optimizations for the NFA API block mode scans.
+ */
+
+#ifndef NFA_REV_API_H
+#define NFA_REV_API_H
+
+#include "accel.h"
+#include "nfa_internal.h"
+#include "vermicelli.h"
+#include "util/unaligned.h"
+
+static really_inline
+size_t nfaRevAccel_i(const struct NFA *nfa, const u8 *buffer, size_t length) {
+ DEBUG_PRINTF("checking rev accel mw %u\n", nfa->minWidth);
+ assert(nfa->rAccelOffset >= 1);
+ assert(nfa->rAccelOffset <= nfa->minWidth);
+
+ const u8 *rv; // result for accel engine
+
+ switch (nfa->rAccelType) {
+ case ACCEL_RVERM:
+ DEBUG_PRINTF("ACCEL_RVERM\n");
+ if (length + 1 - nfa->rAccelOffset < 16) {
+ break;
+ }
+
+ rv = rvermicelliExec(nfa->rAccelData.c, 0, buffer,
+ buffer + length + 1 - nfa->rAccelOffset);
+ length = (size_t)(rv - buffer + nfa->rAccelOffset);
+ break;
+ case ACCEL_RVERM_NOCASE:
+ DEBUG_PRINTF("ACCEL_RVERM_NOCASE\n");
+ if (length + 1 - nfa->rAccelOffset < 16) {
+ break;
+ }
+
+ rv = rvermicelliExec(nfa->rAccelData.c, 1, buffer,
+ buffer + length + 1 - nfa->rAccelOffset);
+ length = (size_t)(rv - buffer + nfa->rAccelOffset);
+ break;
+ case ACCEL_RDVERM:
+ DEBUG_PRINTF("ACCEL_RDVERM\n");
+ if (length + 1 - nfa->rAccelOffset < 17) {
+ break;
+ }
+
+ rv = rvermicelliDoubleExec(nfa->rAccelData.array[0],
+ nfa->rAccelData.array[1], 0, buffer,
+ buffer + length + 1 - nfa->rAccelOffset);
+ length = (size_t)(rv - buffer + nfa->rAccelOffset);
+ break;
+ case ACCEL_RDVERM_NOCASE:
+ DEBUG_PRINTF("ACCEL_RVERM_NOCASE\n");
+ if (length + 1 - nfa->rAccelOffset < 17) {
+ break;
+ }
+
+ rv = rvermicelliDoubleExec(nfa->rAccelData.array[0],
+ nfa->rAccelData.array[1], 1, buffer,
+ buffer + length + 1 - nfa->rAccelOffset);
+ length = (size_t)(rv - buffer + nfa->rAccelOffset);
+ break;
+ case ACCEL_REOD:
+ DEBUG_PRINTF("ACCEL_REOD\n");
+ if (buffer[length - nfa->rAccelOffset] != nfa->rAccelData.c) {
+ return 0;
+ }
+ break;
+ case ACCEL_REOD_NOCASE:
+ DEBUG_PRINTF("ACCEL_REOD_NOCASE\n");
+ if ((buffer[length - nfa->rAccelOffset] & CASE_CLEAR) !=
+ nfa->rAccelData.c) {
+ return 0;
+ }
+ break;
+ case ACCEL_RDEOD:
+ DEBUG_PRINTF("ACCEL_RDEOD\n");
+ if (unaligned_load_u16(buffer + length - nfa->rAccelOffset) !=
+ nfa->rAccelData.dc) {
+ return 0;
+ }
+ break;
+ case ACCEL_RDEOD_NOCASE:
+ DEBUG_PRINTF("ACCEL_RDEOD_NOCASE\n");
+ if ((unaligned_load_u16(buffer + length - nfa->rAccelOffset) &
+ DOUBLE_CASE_CLEAR) != nfa->rAccelData.dc) {
+ return 0;
+ }
+ break;
+ default:
+ assert(!"not here");
+ }
+
+ if (nfa->minWidth > length) {
+ DEBUG_PRINTF("post-accel, scan skipped: %zu < min %u bytes\n", length,
+ nfa->minWidth);
+ return 0;
+ }
+
+ return length;
+}
+
+/** \brief Reverse acceleration check. Returns a new length for the block,
+ * guaranteeing that a match cannot occur beyond that point. */
+static really_inline
+size_t nfaRevAccelCheck(const struct NFA *nfa, const u8 *buffer,
+ size_t length) {
+ assert(nfa);
+
+ // If this block is not long enough to satisfy the minimum width
+ // constraint on this NFA, we can avoid the scan altogether.
+ if (nfa->minWidth > length) {
+ DEBUG_PRINTF("scan skipped: %zu < min %u bytes\n", length,
+ nfa->minWidth);
+ return 0;
+ }
+
+ if (nfa->rAccelType == ACCEL_NONE) {
+ DEBUG_PRINTF("no rev accel available\n");
+ return length;
+ }
+
+ size_t rv_length = nfaRevAccel_i(nfa, buffer, length);
+ assert(rv_length <= length);
+ return rv_length;
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/rdfa.h b/contrib/libs/hyperscan/src/nfa/rdfa.h
index d1f1c3614d..6b994e4f2f 100644
--- a/contrib/libs/hyperscan/src/nfa/rdfa.h
+++ b/contrib/libs/hyperscan/src/nfa/rdfa.h
@@ -1,91 +1,91 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef RDFA_H
-#define RDFA_H
-
-#include "nfa_kind.h"
-#include "ue2common.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RDFA_H
+#define RDFA_H
+
+#include "nfa_kind.h"
+#include "ue2common.h"
+
#include "util/flat_containers.h"
-
-#include <array>
-#include <vector>
-
-namespace ue2 {
-
-typedef u16 dstate_id_t;
-typedef u16 symbol_t;
-
-static constexpr symbol_t TOP = 256;
-static constexpr symbol_t ALPHABET_SIZE = 257;
-static constexpr symbol_t N_SPECIAL_SYMBOL = 1;
-static constexpr dstate_id_t DEAD_STATE = 0;
-
-/** Structure representing a dfa state during construction. */
-struct dstate {
- /** Next state; indexed by remapped sym */
- std::vector<dstate_id_t> next;
-
- /** Set by ng_mcclellan, refined by mcclellancompile */
- dstate_id_t daddy = 0;
-
- /** Set by mcclellancompile, implementation state id, excludes edge
- * decorations */
- dstate_id_t impl_id = 0;
-
- /** Reports to fire (at any location). */
- flat_set<ReportID> reports;
-
- /** Reports to fire (at EOD). */
- flat_set<ReportID> reports_eod;
-
- explicit dstate(size_t alphabet_size) : next(alphabet_size, 0) {}
-};
-
-struct raw_dfa {
- nfa_kind kind;
- std::vector<dstate> states;
- dstate_id_t start_anchored = DEAD_STATE;
- dstate_id_t start_floating = DEAD_STATE;
- u16 alpha_size = 0; /* including special symbols */
-
- /* mapping from input symbol --> equiv class id */
- std::array<u16, ALPHABET_SIZE> alpha_remap;
-
- explicit raw_dfa(nfa_kind k) : kind(k) {}
- virtual ~raw_dfa();
-
+
+#include <array>
+#include <vector>
+
+namespace ue2 {
+
+typedef u16 dstate_id_t;
+typedef u16 symbol_t;
+
+static constexpr symbol_t TOP = 256;
+static constexpr symbol_t ALPHABET_SIZE = 257;
+static constexpr symbol_t N_SPECIAL_SYMBOL = 1;
+static constexpr dstate_id_t DEAD_STATE = 0;
+
+/** Structure representing a dfa state during construction. */
+struct dstate {
+ /** Next state; indexed by remapped sym */
+ std::vector<dstate_id_t> next;
+
+ /** Set by ng_mcclellan, refined by mcclellancompile */
+ dstate_id_t daddy = 0;
+
+ /** Set by mcclellancompile, implementation state id, excludes edge
+ * decorations */
+ dstate_id_t impl_id = 0;
+
+ /** Reports to fire (at any location). */
+ flat_set<ReportID> reports;
+
+ /** Reports to fire (at EOD). */
+ flat_set<ReportID> reports_eod;
+
+ explicit dstate(size_t alphabet_size) : next(alphabet_size, 0) {}
+};
+
+struct raw_dfa {
+ nfa_kind kind;
+ std::vector<dstate> states;
+ dstate_id_t start_anchored = DEAD_STATE;
+ dstate_id_t start_floating = DEAD_STATE;
+ u16 alpha_size = 0; /* including special symbols */
+
+ /* mapping from input symbol --> equiv class id */
+ std::array<u16, ALPHABET_SIZE> alpha_remap;
+
+ explicit raw_dfa(nfa_kind k) : kind(k) {}
+ virtual ~raw_dfa();
+
u16 getImplAlphaSize() const { return alpha_size - N_SPECIAL_SYMBOL; }
- virtual void stripExtraEodReports(void);
- bool hasEodReports(void) const;
-};
-
-}
-
-#endif
+ virtual void stripExtraEodReports(void);
+ bool hasEodReports(void) const;
+};
+
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/rdfa_merge.cpp b/contrib/libs/hyperscan/src/nfa/rdfa_merge.cpp
index 33f70a6bb2..2ad871234f 100644
--- a/contrib/libs/hyperscan/src/nfa/rdfa_merge.cpp
+++ b/contrib/libs/hyperscan/src/nfa/rdfa_merge.cpp
@@ -1,399 +1,399 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rdfa_merge.h"
-
-#include "grey.h"
-#include "dfa_min.h"
-#include "mcclellancompile_util.h"
-#include "rdfa.h"
-#include "ue2common.h"
-#include "nfagraph/ng_mcclellan_internal.h"
-#include "util/container.h"
-#include "util/determinise.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rdfa_merge.h"
+
+#include "grey.h"
+#include "dfa_min.h"
+#include "mcclellancompile_util.h"
+#include "rdfa.h"
+#include "ue2common.h"
+#include "nfagraph/ng_mcclellan_internal.h"
+#include "util/container.h"
+#include "util/determinise.h"
#include "util/flat_containers.h"
-#include "util/make_unique.h"
-#include "util/report_manager.h"
+#include "util/make_unique.h"
+#include "util/report_manager.h"
#include "util/unordered.h"
-
+
#include <algorithm>
-#include <queue>
-
-using namespace std;
-
-namespace ue2 {
-
-#define MAX_DFA_STATES 16383
-
-namespace {
-
-class Automaton_Merge {
-public:
+#include <queue>
+
+using namespace std;
+
+namespace ue2 {
+
+#define MAX_DFA_STATES 16383
+
+namespace {
+
+class Automaton_Merge {
+public:
using StateSet = vector<u16>;
using StateMap = ue2_unordered_map<StateSet, dstate_id_t>;
-
- Automaton_Merge(const raw_dfa *rdfa1, const raw_dfa *rdfa2,
- const ReportManager *rm_in, const Grey &grey_in)
- : rm(rm_in), grey(grey_in), nfas{rdfa1, rdfa2}, dead(2) {
- calculateAlphabet();
- populateAsFs();
- prunable = isPrunable();
- }
-
- Automaton_Merge(const vector<const raw_dfa *> &dfas,
- const ReportManager *rm_in, const Grey &grey_in)
- : rm(rm_in), grey(grey_in), nfas(dfas), dead(nfas.size()) {
- calculateAlphabet();
- populateAsFs();
- prunable = isPrunable();
- }
-
- void populateAsFs(void) {
- bool fs_same = true;
- bool fs_dead = true;
-
- as.resize(nfas.size());
- fs.resize(nfas.size());
- for (size_t i = 0, end = nfas.size(); i < end; i++) {
- as[i] = nfas[i]->start_anchored;
- fs[i] = nfas[i]->start_floating;
-
- if (fs[i]) {
- fs_dead = false;
- }
-
- if (as[i] != fs[i]) {
- fs_same = false;
- }
- }
-
- start_anchored = DEAD_STATE + 1;
- if (fs_same) {
- start_floating = start_anchored;
- } else if (fs_dead) {
- start_floating = DEAD_STATE;
- } else {
- start_floating = start_anchored + 1;
- }
- }
-
- void calculateAlphabet(void) {
- DEBUG_PRINTF("calculating alphabet\n");
- vector<CharReach> esets = {CharReach::dot()};
-
- for (const auto &rdfa : nfas) {
- DEBUG_PRINTF("...next dfa alphabet\n");
- assert(rdfa);
- const auto &alpha_remap = rdfa->alpha_remap;
-
- for (size_t i = 0; i < esets.size(); i++) {
- assert(esets[i].count());
- if (esets[i].count() == 1) {
- DEBUG_PRINTF("skipping singleton eq set\n");
- continue;
- }
-
- CharReach t;
- u8 leader_s = alpha_remap[esets[i].find_first()];
-
- DEBUG_PRINTF("checking eq set, leader %02hhx \n", leader_s);
-
- for (size_t s = esets[i].find_first(); s != CharReach::npos;
- s = esets[i].find_next(s)) {
- if (alpha_remap[s] != leader_s) {
- t.set(s);
- }
- }
-
- if (t.any() && t != esets[i]) {
- esets[i] &= ~t;
- esets.push_back(t);
- }
- }
- }
-
+
+ Automaton_Merge(const raw_dfa *rdfa1, const raw_dfa *rdfa2,
+ const ReportManager *rm_in, const Grey &grey_in)
+ : rm(rm_in), grey(grey_in), nfas{rdfa1, rdfa2}, dead(2) {
+ calculateAlphabet();
+ populateAsFs();
+ prunable = isPrunable();
+ }
+
+ Automaton_Merge(const vector<const raw_dfa *> &dfas,
+ const ReportManager *rm_in, const Grey &grey_in)
+ : rm(rm_in), grey(grey_in), nfas(dfas), dead(nfas.size()) {
+ calculateAlphabet();
+ populateAsFs();
+ prunable = isPrunable();
+ }
+
+ void populateAsFs(void) {
+ bool fs_same = true;
+ bool fs_dead = true;
+
+ as.resize(nfas.size());
+ fs.resize(nfas.size());
+ for (size_t i = 0, end = nfas.size(); i < end; i++) {
+ as[i] = nfas[i]->start_anchored;
+ fs[i] = nfas[i]->start_floating;
+
+ if (fs[i]) {
+ fs_dead = false;
+ }
+
+ if (as[i] != fs[i]) {
+ fs_same = false;
+ }
+ }
+
+ start_anchored = DEAD_STATE + 1;
+ if (fs_same) {
+ start_floating = start_anchored;
+ } else if (fs_dead) {
+ start_floating = DEAD_STATE;
+ } else {
+ start_floating = start_anchored + 1;
+ }
+ }
+
+ void calculateAlphabet(void) {
+ DEBUG_PRINTF("calculating alphabet\n");
+ vector<CharReach> esets = {CharReach::dot()};
+
+ for (const auto &rdfa : nfas) {
+ DEBUG_PRINTF("...next dfa alphabet\n");
+ assert(rdfa);
+ const auto &alpha_remap = rdfa->alpha_remap;
+
+ for (size_t i = 0; i < esets.size(); i++) {
+ assert(esets[i].count());
+ if (esets[i].count() == 1) {
+ DEBUG_PRINTF("skipping singleton eq set\n");
+ continue;
+ }
+
+ CharReach t;
+ u8 leader_s = alpha_remap[esets[i].find_first()];
+
+ DEBUG_PRINTF("checking eq set, leader %02hhx \n", leader_s);
+
+ for (size_t s = esets[i].find_first(); s != CharReach::npos;
+ s = esets[i].find_next(s)) {
+ if (alpha_remap[s] != leader_s) {
+ t.set(s);
+ }
+ }
+
+ if (t.any() && t != esets[i]) {
+ esets[i] &= ~t;
+ esets.push_back(t);
+ }
+ }
+ }
+
// Sort so that our alphabet mapping isn't dependent on the order of
// rdfas passed in.
sort(esets.begin(), esets.end());
- alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
- }
-
- bool isPrunable() const {
- if (!grey.highlanderPruneDFA || !rm) {
- DEBUG_PRINTF("disabled, or not managed reports\n");
- return false;
- }
-
- assert(!nfas.empty());
- if (!generates_callbacks(nfas.front()->kind)) {
- DEBUG_PRINTF("doesn't generate callbacks\n");
- return false;
- }
-
- // Collect all reports from all merge candidates.
- flat_set<ReportID> merge_reports;
- for (const auto &rdfa : nfas) {
- insert(&merge_reports, all_reports(*rdfa));
- }
-
- DEBUG_PRINTF("all reports: %s\n", as_string_list(merge_reports).c_str());
-
- // Return true if they're all exhaustible with the same exhaustion key.
- u32 ekey = INVALID_EKEY;
- for (const auto &report_id : merge_reports) {
- const Report &r = rm->getReport(report_id);
- if (!isSimpleExhaustible(r)) {
- DEBUG_PRINTF("report %u not simple exhaustible\n", report_id);
- return false;
- }
- assert(r.ekey != INVALID_EKEY);
- if (ekey == INVALID_EKEY) {
- ekey = r.ekey;
- } else if (ekey != r.ekey) {
- DEBUG_PRINTF("two different ekeys, %u and %u\n", ekey, r.ekey);
- return false;
- }
- }
-
- DEBUG_PRINTF("is prunable\n");
- return true;
- }
-
-
- void transition(const StateSet &in, StateSet *next) {
- u16 t[ALPHABET_SIZE];
-
- for (u32 i = 0; i < alphasize; i++) {
- next[i].resize(nfas.size());
- }
-
- for (size_t j = 0, j_end = nfas.size(); j < j_end; j++) {
- getFullTransitionFromState(*nfas[j], in[j], t);
- for (u32 i = 0; i < alphasize; i++) {
- next[i][j] = t[unalpha[i]];
- }
- }
- }
-
- const vector<StateSet> initial() {
- vector<StateSet> rv = {as};
- if (start_floating != DEAD_STATE && start_floating != start_anchored) {
- rv.push_back(fs);
- }
- return rv;
- }
-
-private:
- void reports_i(const StateSet &in, flat_set<ReportID> dstate::*r_set,
- flat_set<ReportID> &r) const {
- for (size_t i = 0, end = nfas.size(); i < end; i++) {
- const auto &rs = nfas[i]->states[in[i]].*r_set;
- insert(&r, rs);
- }
- }
-
-public:
- void reports(const StateSet &in, flat_set<ReportID> &rv) const {
- reports_i(in, &dstate::reports, rv);
- }
- void reportsEod(const StateSet &in, flat_set<ReportID> &rv) const {
- reports_i(in, &dstate::reports_eod, rv);
- }
-
- bool canPrune(const flat_set<ReportID> &test_reports) const {
- if (!grey.highlanderPruneDFA || !prunable) {
- return false;
- }
-
- // Must all be external reports.
- assert(rm);
- for (const auto &report_id : test_reports) {
- if (!isExternalReport(rm->getReport(report_id))) {
- return false;
- }
- }
-
- return true;
- }
-
- /** True if the minimization algorithm should be run after merging. */
- bool shouldMinimize() const {
- // We only need to run minimization if our merged DFAs shared a report.
- flat_set<ReportID> seen_reports;
- for (const auto &rdfa : nfas) {
- for (const auto &report_id : all_reports(*rdfa)) {
- if (!seen_reports.insert(report_id).second) {
- DEBUG_PRINTF("report %u in several dfas\n", report_id);
- return true;
- }
- }
- }
-
- return false;
- }
-
-private:
- const ReportManager *rm;
- const Grey &grey;
-
- vector<const raw_dfa *> nfas;
- vector<dstate_id_t> as;
- vector<dstate_id_t> fs;
-
- bool prunable = false;
-
-public:
- std::array<u16, ALPHABET_SIZE> alpha;
- std::array<u16, ALPHABET_SIZE> unalpha;
- u16 alphasize;
- StateSet dead;
-
- u16 start_anchored;
- u16 start_floating;
-};
-
-} // namespace
-
-unique_ptr<raw_dfa> mergeTwoDfas(const raw_dfa *d1, const raw_dfa *d2,
- size_t max_states, const ReportManager *rm,
- const Grey &grey) {
- assert(d1 && d2);
- assert(d1->kind == d2->kind);
- assert(max_states <= MAX_DFA_STATES);
-
- auto rdfa = ue2::make_unique<raw_dfa>(d1->kind);
-
- Automaton_Merge autom(d1, d2, rm, grey);
+ alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
+ }
+
+ bool isPrunable() const {
+ if (!grey.highlanderPruneDFA || !rm) {
+ DEBUG_PRINTF("disabled, or not managed reports\n");
+ return false;
+ }
+
+ assert(!nfas.empty());
+ if (!generates_callbacks(nfas.front()->kind)) {
+ DEBUG_PRINTF("doesn't generate callbacks\n");
+ return false;
+ }
+
+ // Collect all reports from all merge candidates.
+ flat_set<ReportID> merge_reports;
+ for (const auto &rdfa : nfas) {
+ insert(&merge_reports, all_reports(*rdfa));
+ }
+
+ DEBUG_PRINTF("all reports: %s\n", as_string_list(merge_reports).c_str());
+
+ // Return true if they're all exhaustible with the same exhaustion key.
+ u32 ekey = INVALID_EKEY;
+ for (const auto &report_id : merge_reports) {
+ const Report &r = rm->getReport(report_id);
+ if (!isSimpleExhaustible(r)) {
+ DEBUG_PRINTF("report %u not simple exhaustible\n", report_id);
+ return false;
+ }
+ assert(r.ekey != INVALID_EKEY);
+ if (ekey == INVALID_EKEY) {
+ ekey = r.ekey;
+ } else if (ekey != r.ekey) {
+ DEBUG_PRINTF("two different ekeys, %u and %u\n", ekey, r.ekey);
+ return false;
+ }
+ }
+
+ DEBUG_PRINTF("is prunable\n");
+ return true;
+ }
+
+
+ void transition(const StateSet &in, StateSet *next) {
+ u16 t[ALPHABET_SIZE];
+
+ for (u32 i = 0; i < alphasize; i++) {
+ next[i].resize(nfas.size());
+ }
+
+ for (size_t j = 0, j_end = nfas.size(); j < j_end; j++) {
+ getFullTransitionFromState(*nfas[j], in[j], t);
+ for (u32 i = 0; i < alphasize; i++) {
+ next[i][j] = t[unalpha[i]];
+ }
+ }
+ }
+
+ const vector<StateSet> initial() {
+ vector<StateSet> rv = {as};
+ if (start_floating != DEAD_STATE && start_floating != start_anchored) {
+ rv.push_back(fs);
+ }
+ return rv;
+ }
+
+private:
+ void reports_i(const StateSet &in, flat_set<ReportID> dstate::*r_set,
+ flat_set<ReportID> &r) const {
+ for (size_t i = 0, end = nfas.size(); i < end; i++) {
+ const auto &rs = nfas[i]->states[in[i]].*r_set;
+ insert(&r, rs);
+ }
+ }
+
+public:
+ void reports(const StateSet &in, flat_set<ReportID> &rv) const {
+ reports_i(in, &dstate::reports, rv);
+ }
+ void reportsEod(const StateSet &in, flat_set<ReportID> &rv) const {
+ reports_i(in, &dstate::reports_eod, rv);
+ }
+
+ bool canPrune(const flat_set<ReportID> &test_reports) const {
+ if (!grey.highlanderPruneDFA || !prunable) {
+ return false;
+ }
+
+ // Must all be external reports.
+ assert(rm);
+ for (const auto &report_id : test_reports) {
+ if (!isExternalReport(rm->getReport(report_id))) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /** True if the minimization algorithm should be run after merging. */
+ bool shouldMinimize() const {
+ // We only need to run minimization if our merged DFAs shared a report.
+ flat_set<ReportID> seen_reports;
+ for (const auto &rdfa : nfas) {
+ for (const auto &report_id : all_reports(*rdfa)) {
+ if (!seen_reports.insert(report_id).second) {
+ DEBUG_PRINTF("report %u in several dfas\n", report_id);
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+private:
+ const ReportManager *rm;
+ const Grey &grey;
+
+ vector<const raw_dfa *> nfas;
+ vector<dstate_id_t> as;
+ vector<dstate_id_t> fs;
+
+ bool prunable = false;
+
+public:
+ std::array<u16, ALPHABET_SIZE> alpha;
+ std::array<u16, ALPHABET_SIZE> unalpha;
+ u16 alphasize;
+ StateSet dead;
+
+ u16 start_anchored;
+ u16 start_floating;
+};
+
+} // namespace
+
+unique_ptr<raw_dfa> mergeTwoDfas(const raw_dfa *d1, const raw_dfa *d2,
+ size_t max_states, const ReportManager *rm,
+ const Grey &grey) {
+ assert(d1 && d2);
+ assert(d1->kind == d2->kind);
+ assert(max_states <= MAX_DFA_STATES);
+
+ auto rdfa = ue2::make_unique<raw_dfa>(d1->kind);
+
+ Automaton_Merge autom(d1, d2, rm, grey);
if (determinise(autom, rdfa->states, max_states)) {
- rdfa->start_anchored = autom.start_anchored;
- rdfa->start_floating = autom.start_floating;
- rdfa->alpha_size = autom.alphasize;
- rdfa->alpha_remap = autom.alpha;
- DEBUG_PRINTF("merge succeeded, %zu states\n", rdfa->states.size());
-
- if (autom.shouldMinimize()) {
- minimize_hopcroft(*rdfa, grey);
- DEBUG_PRINTF("minimized, %zu states\n", rdfa->states.size());
- }
-
- return rdfa;
- }
-
- return nullptr;
-}
-
-void mergeDfas(vector<unique_ptr<raw_dfa>> &dfas, size_t max_states,
- const ReportManager *rm, const Grey &grey) {
- assert(max_states <= MAX_DFA_STATES);
-
- if (dfas.size() <= 1) {
- return;
- }
-
- DEBUG_PRINTF("before merging, we have %zu dfas\n", dfas.size());
-
- queue<unique_ptr<raw_dfa>> q;
- for (auto &dfa : dfas) {
- q.push(move(dfa));
- }
-
- // All DFAs are now on the queue, so we'll clear the vector and use it for
- // output from here.
- dfas.clear();
-
- while (q.size() > 1) {
- // Attempt to merge the two front elements of the queue.
- unique_ptr<raw_dfa> d1 = move(q.front());
- q.pop();
- unique_ptr<raw_dfa> d2 = move(q.front());
- q.pop();
-
- auto rdfa = mergeTwoDfas(d1.get(), d2.get(), max_states, rm, grey);
- if (rdfa) {
- q.push(move(rdfa));
- } else {
- DEBUG_PRINTF("failed to merge\n");
- // Put the larger of the two DFAs on the output list, retain the
- // smaller one on the queue for further merge attempts.
- if (d2->states.size() > d1->states.size()) {
- dfas.push_back(move(d2));
- q.push(move(d1));
- } else {
- dfas.push_back(move(d1));
- q.push(move(d2));
- }
- }
- }
-
- while (!q.empty()) {
- dfas.push_back(move(q.front()));
- q.pop();
- }
-
- DEBUG_PRINTF("after merging, we have %zu dfas\n", dfas.size());
-}
-
-unique_ptr<raw_dfa> mergeAllDfas(const vector<const raw_dfa *> &dfas,
- size_t max_states, const ReportManager *rm,
- const Grey &grey) {
- assert(max_states <= MAX_DFA_STATES);
- assert(!dfas.empty());
-
- // All the DFAs should be of the same kind.
- const auto kind = dfas.front()->kind;
- assert(all_of(begin(dfas), end(dfas),
- [&kind](const raw_dfa *rdfa) { return rdfa->kind == kind; }));
-
- auto rdfa = ue2::make_unique<raw_dfa>(kind);
- Automaton_Merge n(dfas, rm, grey);
-
- DEBUG_PRINTF("merging dfa\n");
-
+ rdfa->start_anchored = autom.start_anchored;
+ rdfa->start_floating = autom.start_floating;
+ rdfa->alpha_size = autom.alphasize;
+ rdfa->alpha_remap = autom.alpha;
+ DEBUG_PRINTF("merge succeeded, %zu states\n", rdfa->states.size());
+
+ if (autom.shouldMinimize()) {
+ minimize_hopcroft(*rdfa, grey);
+ DEBUG_PRINTF("minimized, %zu states\n", rdfa->states.size());
+ }
+
+ return rdfa;
+ }
+
+ return nullptr;
+}
+
+void mergeDfas(vector<unique_ptr<raw_dfa>> &dfas, size_t max_states,
+ const ReportManager *rm, const Grey &grey) {
+ assert(max_states <= MAX_DFA_STATES);
+
+ if (dfas.size() <= 1) {
+ return;
+ }
+
+ DEBUG_PRINTF("before merging, we have %zu dfas\n", dfas.size());
+
+ queue<unique_ptr<raw_dfa>> q;
+ for (auto &dfa : dfas) {
+ q.push(move(dfa));
+ }
+
+ // All DFAs are now on the queue, so we'll clear the vector and use it for
+ // output from here.
+ dfas.clear();
+
+ while (q.size() > 1) {
+ // Attempt to merge the two front elements of the queue.
+ unique_ptr<raw_dfa> d1 = move(q.front());
+ q.pop();
+ unique_ptr<raw_dfa> d2 = move(q.front());
+ q.pop();
+
+ auto rdfa = mergeTwoDfas(d1.get(), d2.get(), max_states, rm, grey);
+ if (rdfa) {
+ q.push(move(rdfa));
+ } else {
+ DEBUG_PRINTF("failed to merge\n");
+ // Put the larger of the two DFAs on the output list, retain the
+ // smaller one on the queue for further merge attempts.
+ if (d2->states.size() > d1->states.size()) {
+ dfas.push_back(move(d2));
+ q.push(move(d1));
+ } else {
+ dfas.push_back(move(d1));
+ q.push(move(d2));
+ }
+ }
+ }
+
+ while (!q.empty()) {
+ dfas.push_back(move(q.front()));
+ q.pop();
+ }
+
+ DEBUG_PRINTF("after merging, we have %zu dfas\n", dfas.size());
+}
+
+unique_ptr<raw_dfa> mergeAllDfas(const vector<const raw_dfa *> &dfas,
+ size_t max_states, const ReportManager *rm,
+ const Grey &grey) {
+ assert(max_states <= MAX_DFA_STATES);
+ assert(!dfas.empty());
+
+ // All the DFAs should be of the same kind.
+ const auto kind = dfas.front()->kind;
+ assert(all_of(begin(dfas), end(dfas),
+ [&kind](const raw_dfa *rdfa) { return rdfa->kind == kind; }));
+
+ auto rdfa = ue2::make_unique<raw_dfa>(kind);
+ Automaton_Merge n(dfas, rm, grey);
+
+ DEBUG_PRINTF("merging dfa\n");
+
if (!determinise(n, rdfa->states, max_states)) {
- DEBUG_PRINTF("state limit (%zu) exceeded\n", max_states);
- return nullptr; /* over state limit */
- }
-
- rdfa->start_anchored = n.start_anchored;
- rdfa->start_floating = n.start_floating;
- rdfa->alpha_size = n.alphasize;
- rdfa->alpha_remap = n.alpha;
-
- DEBUG_PRINTF("merged, building impl dfa (a,f) = (%hu,%hu)\n",
- rdfa->start_anchored, rdfa->start_floating);
-
- if (n.shouldMinimize()) {
- minimize_hopcroft(*rdfa, grey);
- DEBUG_PRINTF("minimized, %zu states\n", rdfa->states.size());
- }
-
- return rdfa;
-}
-
-} // namespace ue2
+ DEBUG_PRINTF("state limit (%zu) exceeded\n", max_states);
+ return nullptr; /* over state limit */
+ }
+
+ rdfa->start_anchored = n.start_anchored;
+ rdfa->start_floating = n.start_floating;
+ rdfa->alpha_size = n.alphasize;
+ rdfa->alpha_remap = n.alpha;
+
+ DEBUG_PRINTF("merged, building impl dfa (a,f) = (%hu,%hu)\n",
+ rdfa->start_anchored, rdfa->start_floating);
+
+ if (n.shouldMinimize()) {
+ minimize_hopcroft(*rdfa, grey);
+ DEBUG_PRINTF("minimized, %zu states\n", rdfa->states.size());
+ }
+
+ return rdfa;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfa/rdfa_merge.h b/contrib/libs/hyperscan/src/nfa/rdfa_merge.h
index 857a32224c..9cfb3843a6 100644
--- a/contrib/libs/hyperscan/src/nfa/rdfa_merge.h
+++ b/contrib/libs/hyperscan/src/nfa/rdfa_merge.h
@@ -1,62 +1,62 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Merge code for McClellan DFA.
- */
-
-#ifndef RDFA_MERGE_H
-#define RDFA_MERGE_H
-
-#include <memory>
-#include <vector>
-
-namespace ue2 {
-
-class ReportManager;
-struct raw_dfa;
-struct Grey;
-
-/** \brief Attempts to merge two raw_dfas into one. */
-std::unique_ptr<raw_dfa> mergeTwoDfas(const raw_dfa *d1, const raw_dfa *d2,
- size_t max_states, const ReportManager *rm,
- const Grey &grey);
-
-/** \brief Attempts to merge all the given raw_dfas into one. */
-std::unique_ptr<raw_dfa> mergeAllDfas(const std::vector<const raw_dfa *> &dfas,
- size_t max_states,
- const ReportManager *rm,
- const Grey &grey);
-
-/** \brief Merges the given list of raw_dfas as much as possible in-place. */
-void mergeDfas(std::vector<std::unique_ptr<raw_dfa>> &dfas, size_t max_states,
- const ReportManager *rm, const Grey &grey);
-
-} // namespace ue2
-
-#endif // RDFA_MERGE_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Merge code for McClellan DFA.
+ */
+
+#ifndef RDFA_MERGE_H
+#define RDFA_MERGE_H
+
+#include <memory>
+#include <vector>
+
+namespace ue2 {
+
+class ReportManager;
+struct raw_dfa;
+struct Grey;
+
+/** \brief Attempts to merge two raw_dfas into one. */
+std::unique_ptr<raw_dfa> mergeTwoDfas(const raw_dfa *d1, const raw_dfa *d2,
+ size_t max_states, const ReportManager *rm,
+ const Grey &grey);
+
+/** \brief Attempts to merge all the given raw_dfas into one. */
+std::unique_ptr<raw_dfa> mergeAllDfas(const std::vector<const raw_dfa *> &dfas,
+ size_t max_states,
+ const ReportManager *rm,
+ const Grey &grey);
+
+/** \brief Merges the given list of raw_dfas as much as possible in-place. */
+void mergeDfas(std::vector<std::unique_ptr<raw_dfa>> &dfas, size_t max_states,
+ const ReportManager *rm, const Grey &grey);
+
+} // namespace ue2
+
+#endif // RDFA_MERGE_H
diff --git a/contrib/libs/hyperscan/src/nfa/repeat.c b/contrib/libs/hyperscan/src/nfa/repeat.c
index 5ef76ac696..5b2e4df4ed 100644
--- a/contrib/libs/hyperscan/src/nfa/repeat.c
+++ b/contrib/libs/hyperscan/src/nfa/repeat.c
@@ -1,893 +1,893 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief API for handling bounded repeats.
- *
- * This file provides an internal API for handling bounded repeats of character
- * classes. It is used by the Large Bounded Repeat (LBR) engine and by the
- * bounded repeat handling in the LimEx NFA engine as well.
- */
-#include "repeat.h"
-#include "util/bitutils.h"
-#include "util/multibit.h"
-#include "util/pack_bits.h"
-#include "util/partial_store.h"
-#include "util/unaligned.h"
-
-#include <stdint.h>
-#include <string.h>
-
-/** \brief Returns the total capacity of the ring.
- * Note that it's currently one greater than repeatMax so that we can handle
- * cases where the tug and pos triggers overlap. */
-static
-u32 ringCapacity(const struct RepeatInfo *info) {
- return info->repeatMax + 1;
-}
-
-/** \brief Returns the number of elements currently in the ring. Note that if
- * the first and last indices are equal, the ring is full. */
-static
-u32 ringOccupancy(const struct RepeatRingControl *xs, const u32 ringSize) {
- if (xs->last > xs->first) {
- return xs->last - xs->first;
- } else { // wrapped
- return ringSize - (xs->first - xs->last);
- }
-}
-
-/** \brief Returns the offset of the _last_ top stored in the ring. */
-static
-u64a ringLastTop(const struct RepeatRingControl *xs, const u32 ringSize) {
- return xs->offset + ringOccupancy(xs, ringSize) - 1;
-}
-
-#if !defined(NDEBUG) || defined(DUMP_SUPPORT)
-/** \brief For debugging: returns the total capacity of the range list. */
-static UNUSED
-u32 rangeListCapacity(const struct RepeatInfo *info) {
- u32 d = info->repeatMax - info->repeatMin;
- assert(d > 0); // should be in a RING model!
- return 2 * ((info->repeatMax / d) + 1);
-}
-#endif
-
-#ifdef DEBUG
-static
-void dumpRing(const struct RepeatInfo *info, const struct RepeatRingControl *xs,
- const u8 *ring) {
- const u32 ringSize = ringCapacity(info);
- DEBUG_PRINTF("ring (occ %u/%u, %u->%u): ", ringOccupancy(xs, ringSize),
- ringSize, xs->first, xs->last);
-
- u16 i = xs->first, n = 0;
- do {
- if (mmbit_isset(ring, ringSize, i)) {
- u64a ringOffset = xs->offset + n;
- printf("%llu ", ringOffset);
- }
- ++i, ++n;
- if (i == ringSize) {
- i = 0;
- }
- } while (i != xs->last);
- printf("\n");
-}
-
-static
-void dumpRange(const struct RepeatInfo *info,
- const struct RepeatRangeControl *xs, const u16 *ring) {
- const u32 ringSize = rangeListCapacity(info);
- DEBUG_PRINTF("ring (occ %u/%u): ", xs->num, ringSize);
-
- if (xs->num) {
- for (u32 i = 0; i < xs->num; i++) {
- printf("%llu ", xs->offset + unaligned_load_u16(ring + i));
- }
- } else {
- printf("empty");
- }
- printf("\n");
-}
-
-static
-void dumpBitmap(const struct RepeatBitmapControl *xs) {
- DEBUG_PRINTF("bitmap (base=%llu): ", xs->offset);
- u64a bitmap = xs->bitmap;
- while (bitmap) {
- printf("%llu ", xs->offset + findAndClearLSB_64(&bitmap));
- }
- printf("\n");
-}
-
-static
-void dumpTrailer(const struct RepeatInfo *info,
- const struct RepeatTrailerControl *xs) {
- const u64a m_width = info->repeatMax - info->repeatMin;
- DEBUG_PRINTF("trailer: current extent is [%llu,%llu]", xs->offset,
- xs->offset + m_width);
- u64a bitmap = xs->bitmap;
- if (bitmap) {
- printf(", also matches at: ");
- while (bitmap) {
- u32 idx = findAndClearMSB_64(&bitmap);
- printf("%llu ", xs->offset - idx - 1);
- }
- } else {
- printf(", no earlier matches");
- }
- printf("\n");
-}
-
-#endif // DEBUG
-
-#ifndef NDEBUG
-/** \brief For debugging: returns true if the range is ordered with no dupes. */
-static UNUSED
-int rangeListIsOrdered(const struct RepeatRangeControl *xs, const u16 *ring) {
- for (u32 i = 1; i < xs->num; i++) {
- u16 a = unaligned_load_u16(ring + i - 1);
- u16 b = unaligned_load_u16(ring + i);
- if (a >= b) {
- return 0;
- }
- }
- return 1;
-}
-#endif
-
-u64a repeatLastTopRing(const struct RepeatInfo *info,
- const union RepeatControl *ctrl) {
- const u32 ringSize = ringCapacity(info);
- return ringLastTop(&ctrl->ring, ringSize);
-}
-
-u64a repeatLastTopRange(const union RepeatControl *ctrl, const void *state) {
- const u16 *ring = (const u16 *)state;
- const struct RepeatRangeControl *xs = &ctrl->range;
- assert(xs->num);
- return xs->offset + unaligned_load_u16(ring + xs->num - 1);
-}
-
-u64a repeatLastTopBitmap(const union RepeatControl *ctrl) {
- const struct RepeatBitmapControl *xs = &ctrl->bitmap;
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief API for handling bounded repeats.
+ *
+ * This file provides an internal API for handling bounded repeats of character
+ * classes. It is used by the Large Bounded Repeat (LBR) engine and by the
+ * bounded repeat handling in the LimEx NFA engine as well.
+ */
+#include "repeat.h"
+#include "util/bitutils.h"
+#include "util/multibit.h"
+#include "util/pack_bits.h"
+#include "util/partial_store.h"
+#include "util/unaligned.h"
+
+#include <stdint.h>
+#include <string.h>
+
+/** \brief Returns the total capacity of the ring.
+ * Note that it's currently one greater than repeatMax so that we can handle
+ * cases where the tug and pos triggers overlap. */
+static
+u32 ringCapacity(const struct RepeatInfo *info) {
+ return info->repeatMax + 1;
+}
+
+/** \brief Returns the number of elements currently in the ring. Note that if
+ * the first and last indices are equal, the ring is full. */
+static
+u32 ringOccupancy(const struct RepeatRingControl *xs, const u32 ringSize) {
+ if (xs->last > xs->first) {
+ return xs->last - xs->first;
+ } else { // wrapped
+ return ringSize - (xs->first - xs->last);
+ }
+}
+
+/** \brief Returns the offset of the _last_ top stored in the ring. */
+static
+u64a ringLastTop(const struct RepeatRingControl *xs, const u32 ringSize) {
+ return xs->offset + ringOccupancy(xs, ringSize) - 1;
+}
+
+#if !defined(NDEBUG) || defined(DUMP_SUPPORT)
+/** \brief For debugging: returns the total capacity of the range list. */
+static UNUSED
+u32 rangeListCapacity(const struct RepeatInfo *info) {
+ u32 d = info->repeatMax - info->repeatMin;
+ assert(d > 0); // should be in a RING model!
+ return 2 * ((info->repeatMax / d) + 1);
+}
+#endif
+
+#ifdef DEBUG
+static
+void dumpRing(const struct RepeatInfo *info, const struct RepeatRingControl *xs,
+ const u8 *ring) {
+ const u32 ringSize = ringCapacity(info);
+ DEBUG_PRINTF("ring (occ %u/%u, %u->%u): ", ringOccupancy(xs, ringSize),
+ ringSize, xs->first, xs->last);
+
+ u16 i = xs->first, n = 0;
+ do {
+ if (mmbit_isset(ring, ringSize, i)) {
+ u64a ringOffset = xs->offset + n;
+ printf("%llu ", ringOffset);
+ }
+ ++i, ++n;
+ if (i == ringSize) {
+ i = 0;
+ }
+ } while (i != xs->last);
+ printf("\n");
+}
+
+static
+void dumpRange(const struct RepeatInfo *info,
+ const struct RepeatRangeControl *xs, const u16 *ring) {
+ const u32 ringSize = rangeListCapacity(info);
+ DEBUG_PRINTF("ring (occ %u/%u): ", xs->num, ringSize);
+
+ if (xs->num) {
+ for (u32 i = 0; i < xs->num; i++) {
+ printf("%llu ", xs->offset + unaligned_load_u16(ring + i));
+ }
+ } else {
+ printf("empty");
+ }
+ printf("\n");
+}
+
+static
+void dumpBitmap(const struct RepeatBitmapControl *xs) {
+ DEBUG_PRINTF("bitmap (base=%llu): ", xs->offset);
+ u64a bitmap = xs->bitmap;
+ while (bitmap) {
+ printf("%llu ", xs->offset + findAndClearLSB_64(&bitmap));
+ }
+ printf("\n");
+}
+
+static
+void dumpTrailer(const struct RepeatInfo *info,
+ const struct RepeatTrailerControl *xs) {
+ const u64a m_width = info->repeatMax - info->repeatMin;
+ DEBUG_PRINTF("trailer: current extent is [%llu,%llu]", xs->offset,
+ xs->offset + m_width);
+ u64a bitmap = xs->bitmap;
+ if (bitmap) {
+ printf(", also matches at: ");
+ while (bitmap) {
+ u32 idx = findAndClearMSB_64(&bitmap);
+ printf("%llu ", xs->offset - idx - 1);
+ }
+ } else {
+ printf(", no earlier matches");
+ }
+ printf("\n");
+}
+
+#endif // DEBUG
+
+#ifndef NDEBUG
+/** \brief For debugging: returns true if the range is ordered with no dupes. */
+static UNUSED
+int rangeListIsOrdered(const struct RepeatRangeControl *xs, const u16 *ring) {
+ for (u32 i = 1; i < xs->num; i++) {
+ u16 a = unaligned_load_u16(ring + i - 1);
+ u16 b = unaligned_load_u16(ring + i);
+ if (a >= b) {
+ return 0;
+ }
+ }
+ return 1;
+}
+#endif
+
+u64a repeatLastTopRing(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl) {
+ const u32 ringSize = ringCapacity(info);
+ return ringLastTop(&ctrl->ring, ringSize);
+}
+
+u64a repeatLastTopRange(const union RepeatControl *ctrl, const void *state) {
+ const u16 *ring = (const u16 *)state;
+ const struct RepeatRangeControl *xs = &ctrl->range;
+ assert(xs->num);
+ return xs->offset + unaligned_load_u16(ring + xs->num - 1);
+}
+
+u64a repeatLastTopBitmap(const union RepeatControl *ctrl) {
+ const struct RepeatBitmapControl *xs = &ctrl->bitmap;
if (!xs->bitmap) {
/* last top was too long ago */
return 0;
}
- return xs->offset + 63 - clz64(xs->bitmap);
-}
-
-u64a repeatLastTopTrailer(const struct RepeatInfo *info,
- const union RepeatControl *ctrl) {
- const struct RepeatTrailerControl *xs = &ctrl->trailer;
- assert(xs->offset >= info->repeatMin);
- return xs->offset - info->repeatMin;
-}
-
-u64a repeatNextMatchRing(const struct RepeatInfo *info,
- const union RepeatControl *ctrl, const void *state,
- u64a offset) {
- const struct RepeatRingControl *xs = &ctrl->ring;
- const u8 *ring = (const u8 *)state;
- const u32 ringSize = ringCapacity(info);
-
- // We should have at least one top stored.
- assert(mmbit_any(ring, ringSize));
- assert(info->repeatMax < REPEAT_INF);
-
- // Increment offset, as we want the NEXT match.
- offset++;
-
- const u64a base_offset = xs->offset;
- DEBUG_PRINTF("offset=%llu, base_offset=%llu\n", offset, base_offset);
-
- u64a delta = offset - base_offset;
- if (offset < base_offset || delta < info->repeatMin) {
- DEBUG_PRINTF("before min repeat\n");
- return base_offset + info->repeatMin;
- }
- if (offset > ringLastTop(xs, ringSize) + info->repeatMax) {
- DEBUG_PRINTF("ring is stale\n");
- return 0; // no more matches
- }
-
- DEBUG_PRINTF("delta=%llu\n", delta);
- u64a lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
- DEBUG_PRINTF("lower=%llu\n", lower);
-
- assert(lower < ringSize);
-
- // First scan, either to xs->last if there's no wrap-around or ringSize
- // (end of the underlying multibit) if we are wrapping.
-
- u32 begin = xs->first + lower;
- if (begin >= ringSize) {
- // This branch and sub tested a lot faster than using % (integer div).
- begin -= ringSize;
- }
- const u32 end = begin >= xs->last ? ringSize : xs->last;
- u32 i = mmbit_iterate_bounded(ring, ringSize, begin, end);
- if (i != MMB_INVALID) {
- u32 j = i - begin + lower;
- return MAX(offset, base_offset + j + info->repeatMin);
- }
-
- // A second scan is necessary if we need to cope with wrap-around in the
- // ring buffer.
-
- if (begin >= xs->last) {
- i = mmbit_iterate_bounded(ring, ringSize, 0, xs->last);
- if (i != MMB_INVALID) {
- u32 j = i + (ringSize - begin) + lower;
- return MAX(offset, base_offset + j + info->repeatMin);
- }
- }
-
- return 0;
-}
-
-u64a repeatNextMatchRange(const struct RepeatInfo *info,
- const union RepeatControl *ctrl, const void *state,
- u64a offset) {
- const struct RepeatRangeControl *xs = &ctrl->range;
- const u16 *ring = (const u16 *)state;
-
- assert(xs->num > 0);
- assert(xs->num <= rangeListCapacity(info));
- assert(rangeListIsOrdered(xs, ring));
- assert(info->repeatMax < REPEAT_INF);
-
- for (u32 i = 0; i < xs->num; i++) {
- u64a base = xs->offset + unaligned_load_u16(ring + i);
- u64a first = base + info->repeatMin;
- if (offset < first) {
- return first;
- }
- if (offset < base + info->repeatMax) {
- return offset + 1;
- }
- }
-
- return 0;
-}
-
-u64a repeatNextMatchBitmap(const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- const struct RepeatBitmapControl *xs = &ctrl->bitmap;
- const u64a base = xs->offset;
- u64a bitmap = xs->bitmap;
-
- // FIXME: quick exit if there is no match, based on last top in bitmap?
-
- while (bitmap) {
- u64a top = base + findAndClearLSB_64(&bitmap);
- if (offset < top + info->repeatMin) {
- return top + info->repeatMin;
- }
- if (offset < top + info->repeatMax) {
- return offset + 1;
- }
- }
-
- return 0; // No more matches.
-}
-
-u64a repeatNextMatchTrailer(const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- const struct RepeatTrailerControl *xs = &ctrl->trailer;
- const u32 m_width = info->repeatMax - info->repeatMin;
-
- DEBUG_PRINTF("offset=%llu, xs->offset=%llu\n", offset, xs->offset);
- DEBUG_PRINTF("{%u,%u} repeat, m_width=%u\n", info->repeatMin,
- info->repeatMax, m_width);
-
- assert(xs->offset >= info->repeatMin);
-
- if (offset >= xs->offset + m_width) {
- DEBUG_PRINTF("no more matches\n");
- return 0;
- }
-
- if (offset >= xs->offset) {
- DEBUG_PRINTF("inside most recent match window, next match %llu\n",
- offset + 1);
- return offset + 1;
- }
-
- // Offset is before the match window, we need to consult the bitmap of
- // earlier match offsets.
- u64a bitmap = xs->bitmap;
-
- u64a diff = xs->offset - offset;
- DEBUG_PRINTF("diff=%llu\n", diff);
- if (diff <= 64) {
- assert(diff);
- bitmap &= (1ULL << (diff - 1)) - 1;
- }
- DEBUG_PRINTF("bitmap = 0x%llx\n", bitmap);
- if (bitmap) {
- u32 idx = 63 - clz64(bitmap);
- DEBUG_PRINTF("clz=%u, idx = %u -> offset %llu\n", clz64(bitmap), idx,
- xs->offset - idx);
- DEBUG_PRINTF("next match at %llu\n", xs->offset - idx - 1);
- u64a next_match = xs->offset - idx - 1;
- assert(next_match > offset);
- return next_match;
- }
-
- DEBUG_PRINTF("next match is start of match window, %llu\n", xs->offset);
- return xs->offset;
-}
-
-/** \brief Store the first top in the ring buffer. */
-static
-void storeInitialRingTop(struct RepeatRingControl *xs, u8 *ring,
- u64a offset, const u32 ringSize) {
- DEBUG_PRINTF("ring=%p, ringSize=%u\n", ring, ringSize);
- xs->offset = offset;
- mmbit_clear(ring, ringSize);
- mmbit_set(ring, ringSize, 0);
- xs->first = 0;
- xs->last = 1;
-}
-
-static really_inline
-char ringIsStale(const struct RepeatRingControl *xs, const u32 ringSize,
- const u64a offset) {
- u64a finalMatch = ringLastTop(xs, ringSize);
- if (offset - finalMatch >= ringSize) {
- DEBUG_PRINTF("all matches in ring are stale\n");
- return 1;
- }
-
- return 0;
-}
-
-void repeatStoreRing(const struct RepeatInfo *info, union RepeatControl *ctrl,
- void *state, u64a offset, char is_alive) {
- struct RepeatRingControl *xs = &ctrl->ring;
- u8 *ring = (u8 *)state;
- const u32 ringSize = ringCapacity(info);
- assert(ringSize > 0);
-
- DEBUG_PRINTF("storing top for offset %llu in ring\n", offset);
-
- if (!is_alive || ringIsStale(xs, ringSize, offset)) {
- storeInitialRingTop(xs, ring, offset, ringSize);
- } else {
- assert(offset > ringLastTop(xs, ringSize)); // Dupe or out of order.
- u32 occ = ringOccupancy(xs, ringSize);
- u64a diff = offset - xs->offset;
- DEBUG_PRINTF("diff=%llu, occ=%u\n", diff, occ);
- if (diff >= ringSize) {
- u32 push = diff - ringSize + 1;
- DEBUG_PRINTF("push ring %u\n", push);
- xs->first += push;
- if (xs->first >= ringSize) {
- xs->first -= ringSize;
- }
- xs->offset += push;
- diff -= push;
- occ -= push;
- }
-
- // There's now room in the ring for this top, so we write a run of
- // zeroes, then a one.
- DEBUG_PRINTF("diff=%llu, occ=%u\n", diff, occ);
- assert(diff < ringSize);
- assert(diff >= occ);
- u32 n = diff - occ;
-
- u32 i = xs->last + n;
-
- mmbit_unset_range(ring, ringSize, xs->last, MIN(i, ringSize));
- if (i >= ringSize) {
- i -= ringSize;
- mmbit_unset_range(ring, ringSize, 0, i);
- }
-
- assert(i != xs->first);
- DEBUG_PRINTF("set bit %u\n", i);
- mmbit_set(ring, ringSize, i);
- xs->last = i + 1;
- if (xs->last == ringSize) {
- xs->last = 0;
- }
- }
-
- // Our ring indices shouldn't have spiraled off into uncharted space.
- assert(xs->first < ringSize);
- assert(xs->last < ringSize);
-
-#ifdef DEBUG
- DEBUG_PRINTF("post-store ring state\n");
- dumpRing(info, xs, ring);
-#endif
-
- // The final top stored in our ring should be the one we just wrote in.
- assert(ringLastTop(xs, ringSize) == offset);
-}
-
-static really_inline
-void storeInitialRangeTop(struct RepeatRangeControl *xs, u16 *ring,
- u64a offset) {
- xs->offset = offset;
- xs->num = 1;
- unaligned_store_u16(ring, 0);
-}
-
-void repeatStoreRange(const struct RepeatInfo *info, union RepeatControl *ctrl,
- void *state, u64a offset, char is_alive) {
- struct RepeatRangeControl *xs = &ctrl->range;
- u16 *ring = (u16 *)state;
-
- if (!is_alive) {
- DEBUG_PRINTF("storing initial top at %llu\n", offset);
- storeInitialRangeTop(xs, ring, offset);
- return;
- }
-
- DEBUG_PRINTF("storing top at %llu, list currently has %u/%u elements\n",
- offset, xs->num, rangeListCapacity(info));
-
-#ifdef DEBUG
- dumpRange(info, xs, ring);
-#endif
-
- // Walk ring from front. Identify the number of stale elements, and shift
- // the whole ring to delete them.
- u32 i = 0;
- for (; i < xs->num; i++) {
- u64a this_offset = xs->offset + unaligned_load_u16(ring + i);
- DEBUG_PRINTF("this_offset=%llu, diff=%llu\n", this_offset,
- offset - this_offset);
- if (offset - this_offset <= info->repeatMax) {
- break;
- }
- }
-
- if (i == xs->num) {
- DEBUG_PRINTF("whole ring is stale\n");
- storeInitialRangeTop(xs, ring, offset);
- return;
- } else if (i > 0) {
- DEBUG_PRINTF("expiring %u stale tops\n", i);
- u16 first_offset = unaligned_load_u16(ring + i); // first live top
- for (u32 j = 0; j < xs->num - i; j++) {
- u16 val = unaligned_load_u16(ring + i + j);
- assert(val >= first_offset);
- unaligned_store_u16(ring + j, val - first_offset);
- }
- xs->offset += first_offset;
- xs->num -= i;
- }
-
-#ifdef DEBUG
- DEBUG_PRINTF("post-expire:\n");
- dumpRange(info, xs, ring);
-#endif
-
- if (xs->num == 1) {
- goto append;
- }
-
- // Let d = repeatMax - repeatMin
- // Examine penultimate entry x[-2].
- // If (offset - x[-2] <= d), then last entry x[-1] can be replaced with
- // entry for offset.
- assert(xs->num >= 2);
- u32 d = info->repeatMax - info->repeatMin;
- u64a penultimate_offset =
- xs->offset + unaligned_load_u16(ring + xs->num - 2);
- if (offset - penultimate_offset <= d) {
- assert(offset - xs->offset <= (u16)-1);
- unaligned_store_u16(ring + xs->num - 1, offset - xs->offset);
- goto done;
- }
-
- // Otherwise, write a new entry for offset and return.
-
-append:
- assert(offset - xs->offset <= (u16)-1);
- assert(xs->num < rangeListCapacity(info));
- unaligned_store_u16(ring + xs->num, offset - xs->offset);
- xs->num++;
-
-done:
- assert(rangeListIsOrdered(xs, ring));
-}
-
-void repeatStoreBitmap(const struct RepeatInfo *info, union RepeatControl *ctrl,
- u64a offset, char is_alive) {
- DEBUG_PRINTF("{%u,%u} repeat, storing top at %llu\n", info->repeatMin,
- info->repeatMax, offset);
-
- struct RepeatBitmapControl *xs = &ctrl->bitmap;
- if (!is_alive || !xs->bitmap) {
- DEBUG_PRINTF("storing initial top at %llu\n", offset);
- xs->offset = offset;
- xs->bitmap = 1U;
- return;
- }
-
-#ifdef DEBUG
- DEBUG_PRINTF("pre-store:\n");
- dumpBitmap(xs);
-#endif
-
- assert(offset >= xs->offset);
-
- u64a last_top = xs->offset + 63 - clz64(xs->bitmap);
- if (offset > last_top + info->repeatMax) {
- DEBUG_PRINTF("bitmap stale, storing initial top\n");
- xs->offset = offset;
- xs->bitmap = 1U;
- return;
- }
-
- u64a diff = offset - xs->offset;
- if (diff >= info->repeatMax + 1) {
- DEBUG_PRINTF("need expire, diff=%llu\n", diff);
- u64a push = diff - info->repeatMax;
- xs->offset += push;
- xs->bitmap = push >= 64 ? 0 : xs->bitmap >> push;
- DEBUG_PRINTF("pushed xs->offset to %llu\n", xs->offset);
- }
-
- // Write a new entry.
- diff = offset - xs->offset;
- assert(diff < 64);
- xs->bitmap |= (1ULL << diff);
-
-#ifdef DEBUG
- DEBUG_PRINTF("post-store:\n");
- dumpBitmap(xs);
-#endif
-}
-
-/** \brief Returns 1 if the ring has a match between (logical) index \a lower
- * and \a upper, excluding \a upper. */
-static
-int ringHasMatch(const struct RepeatRingControl *xs, const u8 *ring,
- const u32 ringSize, u32 lower, u32 upper) {
- assert(lower < upper);
- assert(lower < ringSize);
- assert(upper <= ringSize);
-
- u32 i = xs->first + lower;
- if (i >= ringSize) {
- i -= ringSize;
- }
-
- // Performance tweak: if we're looking at a fixed repeat, we can just use
- // mmbit_isset.
- if (lower + 1 == upper) {
- return mmbit_isset(ring, ringSize, i);
- }
-
- u32 end = xs->first + upper;
- if (end >= ringSize) {
- end -= ringSize;
- }
-
- // First scan, either to end if there's no wrap-around or ringSize (end of
- // the underlying multibit) if we are wrapping.
-
- u32 scan_end = i < end ? end : ringSize;
- u32 m = mmbit_iterate_bounded(ring, ringSize, i, scan_end);
- if (m != MMB_INVALID) {
- return 1;
- }
-
- // A second scan is necessary if we need to cope with wrap-around in the
- // ring buffer.
-
- if (i >= end) {
- m = mmbit_iterate_bounded(ring, ringSize, 0, end);
- return m != MMB_INVALID;
- }
-
- return 0;
-}
-
-/** Return a mask of ones in bit positions [0..v]. */
-static really_inline
-u64a mask_ones_to(u32 v) {
- if (v < 63) {
- return (1ULL << (v + 1)) - 1;
- } else {
- return ~(0ULL);
- }
-}
-
-void repeatStoreTrailer(const struct RepeatInfo *info,
- union RepeatControl *ctrl, u64a offset, char is_alive) {
- DEBUG_PRINTF("{%u,%u} repeat, top at %llu\n", info->repeatMin,
- info->repeatMax, offset);
-
- struct RepeatTrailerControl *xs = &ctrl->trailer;
-
- /* The TRAILER repeat model stores the following data in its control block:
- *
- * 1. offset, which is the min extent of the most recent match window
- * (i.e. corresponding to the most recent top)
- * 2. bitmap, which is a bitmap of up to repeatMin matches before
- * the min extent offset.
- */
-
- const u64a next_extent = offset + info->repeatMin;
-
- if (!is_alive) {
- xs->offset = next_extent;
- xs->bitmap = 0;
- DEBUG_PRINTF("initial top, set extent to %llu\n", next_extent);
- return;
- }
-
-#ifdef DEBUG
- DEBUG_PRINTF("pre-store:\n");
- dumpTrailer(info, xs);
-#endif
-
- const u32 m_width = info->repeatMax - info->repeatMin;
- DEBUG_PRINTF("most recent match window is [%llu,%llu]\n", xs->offset,
- xs->offset + m_width);
-
- assert(next_extent > xs->offset);
- u64a diff = next_extent - xs->offset;
- DEBUG_PRINTF("diff=%llu, m_width=%u\n", diff, m_width);
-
- assert(diff);
- xs->bitmap = diff < 64 ? xs->bitmap << diff : 0;
-
- // Switch on bits in the bitmask corresponding to matches in the previous
- // match window.
- if (diff <= m_width) {
- u64a m = mask_ones_to(diff - 1);
- xs->bitmap |= m;
- } else {
- u64a shift = diff - m_width - 1;
- if (shift < 64) {
- u64a m = mask_ones_to(m_width);
- m <<= shift;
- xs->bitmap |= m;
- }
- }
-
- DEBUG_PRINTF("bitmap=0x%llx\n", xs->bitmap);
-
- // Update max extent.
- xs->offset = next_extent;
-
- // Trim stale history: we only need repeatMin bytes of history.
- if (info->repeatMin < 63) {
- u64a mask = (1ULL << (info->repeatMin + 1)) - 1;
- xs->bitmap &= mask;
- }
-
-#ifdef DEBUG
- DEBUG_PRINTF("post-store:\n");
- dumpTrailer(info, xs);
-#endif
-}
-
-enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state, u64a offset) {
- const struct RepeatRingControl *xs = &ctrl->ring;
- const u8 *ring = (const u8 *)state;
- const u32 ringSize = ringCapacity(info);
-
- assert(mmbit_any(ring, ringSize));
- assert(offset >= xs->offset);
-
- DEBUG_PRINTF("check: offset=%llu, repeat=[%u,%u]\n", offset,
- info->repeatMin, info->repeatMax);
-#ifdef DEBUG
- DEBUG_PRINTF("ring state\n");
- dumpRing(info, xs, ring);
-#endif
-
- if (offset - xs->offset < info->repeatMin) {
- DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
- return REPEAT_NOMATCH;
- }
-
- if (offset - ringLastTop(xs, ringSize) >= ringSize) {
- DEBUG_PRINTF("ring is stale\n");
- return REPEAT_STALE;
- }
-
- // If we're not stale, delta fits in the range [repeatMin, lastTop +
- // repeatMax], which fits in a u32.
- assert(offset - xs->offset < UINT32_MAX);
- u32 delta = (u32)(offset - xs->offset);
- DEBUG_PRINTF("delta=%u\n", delta);
-
- // Find the bounds on possible matches in the ring buffer.
- u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
- u32 upper = MIN(delta - info->repeatMin + 1, ringOccupancy(xs, ringSize));
-
- if (lower >= upper) {
- DEBUG_PRINTF("no matches to check\n");
- return REPEAT_NOMATCH;
- }
-
- DEBUG_PRINTF("possible match indices=[%u,%u]\n", lower, upper);
- if (ringHasMatch(xs, ring, ringSize, lower, upper)) {
- return REPEAT_MATCH;
- }
-
- return REPEAT_NOMATCH;
-}
-
-enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state, u64a offset) {
- const struct RepeatRangeControl *xs = &ctrl->range;
- const u16 *ring = (const u16 *)state;
-
- assert(xs->num > 0);
- assert(xs->num <= rangeListCapacity(info));
- assert(rangeListIsOrdered(xs, ring));
-
- // Walk the ring. For each entry x:
- // if (offset - x) falls inside repeat bounds, return success.
-
- // It may be worth doing tests on first and last elements first to bail
- // early if the whole ring is too young or stale.
-
- DEBUG_PRINTF("check %u (of %u) elements, offset %llu, bounds={%u,%u}\n",
- xs->num, rangeListCapacity(info), offset,
- info->repeatMin, info->repeatMax);
-#ifdef DEBUG
- dumpRange(info, xs, ring);
-#endif
-
- // Quick pre-check for minimum.
- assert(offset >= xs->offset);
- if (offset - xs->offset < info->repeatMin) {
- DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
- return REPEAT_NOMATCH;
- }
-
- // We check the most recent offset first, as we can establish staleness.
- u64a match = xs->offset + unaligned_load_u16(ring + xs->num - 1);
- assert(offset >= match);
- u64a diff = offset - match;
- if (diff > info->repeatMax) {
- DEBUG_PRINTF("range list is stale\n");
- return REPEAT_STALE;
- } else if (diff >= info->repeatMin && diff <= info->repeatMax) {
- return REPEAT_MATCH;
- }
-
- // Check the other offsets in the list.
- u32 count = xs->num - 1;
- for (u32 i = 0; i < count; i++) {
- match = xs->offset + unaligned_load_u16(ring + i);
- assert(offset >= match);
- diff = offset - match;
- if (diff >= info->repeatMin && diff <= info->repeatMax) {
- return REPEAT_MATCH;
- }
- }
-
- return REPEAT_NOMATCH;
-}
-
-enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- u64a offset) {
- const struct RepeatBitmapControl *xs = &ctrl->bitmap;
-
- DEBUG_PRINTF("checking if offset=%llu is a match\n", offset);
-
-#ifdef DEBUG
- dumpBitmap(xs);
-#endif
-
- u64a bitmap = xs->bitmap;
- if (!bitmap) {
- DEBUG_PRINTF("no tops; stale\n");
- return REPEAT_STALE;
- }
-
- // Quick pre-check for minimum.
- const u64a base = xs->offset;
- assert(offset >= base);
- if (offset - base < info->repeatMin) {
- DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
- return REPEAT_NOMATCH;
- }
-
- // We check the most recent offset first, as we can establish staleness.
- u64a match = base + findAndClearMSB_64(&bitmap);
- DEBUG_PRINTF("offset=%llu, last_match %llu\n", offset, match);
- assert(offset >= match);
- u64a diff = offset - match;
- if (diff > info->repeatMax) {
- DEBUG_PRINTF("stale\n");
- return REPEAT_STALE;
- } else if (diff >= info->repeatMin && diff <= info->repeatMax) {
- return REPEAT_MATCH;
- }
-
- while (bitmap) {
- match = base + findAndClearLSB_64(&bitmap);
- DEBUG_PRINTF("offset=%llu, last_match %llu\n", offset, match);
- assert(offset >= match);
- diff = offset - match;
- if (diff >= info->repeatMin && diff <= info->repeatMax) {
- return REPEAT_MATCH;
- }
- }
-
- return REPEAT_NOMATCH;
-}
-
-enum RepeatMatch repeatHasMatchTrailer(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- u64a offset) {
- const struct RepeatTrailerControl *xs = &ctrl->trailer;
- const u32 m_width = info->repeatMax - info->repeatMin;
-
- DEBUG_PRINTF("offset=%llu, xs->offset=%llu, xs->bitmap=0x%llx\n", offset,
- xs->offset, xs->bitmap);
-
- if (offset > xs->offset + m_width) {
- DEBUG_PRINTF("stale\n");
- return REPEAT_STALE;
- }
-
- if (offset >= xs->offset) {
- DEBUG_PRINTF("in match window\n");
- return REPEAT_MATCH;
- }
-
- if (offset >= xs->offset - info->repeatMin) {
- u32 idx = xs->offset - offset - 1;
- DEBUG_PRINTF("check bitmap idx %u\n", idx);
- assert(idx < 64);
- if (xs->bitmap & (1ULL << idx)) {
- DEBUG_PRINTF("match in bitmap\n");
- return REPEAT_MATCH;
- }
- }
-
- DEBUG_PRINTF("no match\n");
- return REPEAT_NOMATCH;
-}
-
+ return xs->offset + 63 - clz64(xs->bitmap);
+}
+
+u64a repeatLastTopTrailer(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl) {
+ const struct RepeatTrailerControl *xs = &ctrl->trailer;
+ assert(xs->offset >= info->repeatMin);
+ return xs->offset - info->repeatMin;
+}
+
+u64a repeatNextMatchRing(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, const void *state,
+ u64a offset) {
+ const struct RepeatRingControl *xs = &ctrl->ring;
+ const u8 *ring = (const u8 *)state;
+ const u32 ringSize = ringCapacity(info);
+
+ // We should have at least one top stored.
+ assert(mmbit_any(ring, ringSize));
+ assert(info->repeatMax < REPEAT_INF);
+
+ // Increment offset, as we want the NEXT match.
+ offset++;
+
+ const u64a base_offset = xs->offset;
+ DEBUG_PRINTF("offset=%llu, base_offset=%llu\n", offset, base_offset);
+
+ u64a delta = offset - base_offset;
+ if (offset < base_offset || delta < info->repeatMin) {
+ DEBUG_PRINTF("before min repeat\n");
+ return base_offset + info->repeatMin;
+ }
+ if (offset > ringLastTop(xs, ringSize) + info->repeatMax) {
+ DEBUG_PRINTF("ring is stale\n");
+ return 0; // no more matches
+ }
+
+ DEBUG_PRINTF("delta=%llu\n", delta);
+ u64a lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
+ DEBUG_PRINTF("lower=%llu\n", lower);
+
+ assert(lower < ringSize);
+
+ // First scan, either to xs->last if there's no wrap-around or ringSize
+ // (end of the underlying multibit) if we are wrapping.
+
+ u32 begin = xs->first + lower;
+ if (begin >= ringSize) {
+ // This branch and sub tested a lot faster than using % (integer div).
+ begin -= ringSize;
+ }
+ const u32 end = begin >= xs->last ? ringSize : xs->last;
+ u32 i = mmbit_iterate_bounded(ring, ringSize, begin, end);
+ if (i != MMB_INVALID) {
+ u32 j = i - begin + lower;
+ return MAX(offset, base_offset + j + info->repeatMin);
+ }
+
+ // A second scan is necessary if we need to cope with wrap-around in the
+ // ring buffer.
+
+ if (begin >= xs->last) {
+ i = mmbit_iterate_bounded(ring, ringSize, 0, xs->last);
+ if (i != MMB_INVALID) {
+ u32 j = i + (ringSize - begin) + lower;
+ return MAX(offset, base_offset + j + info->repeatMin);
+ }
+ }
+
+ return 0;
+}
+
+u64a repeatNextMatchRange(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, const void *state,
+ u64a offset) {
+ const struct RepeatRangeControl *xs = &ctrl->range;
+ const u16 *ring = (const u16 *)state;
+
+ assert(xs->num > 0);
+ assert(xs->num <= rangeListCapacity(info));
+ assert(rangeListIsOrdered(xs, ring));
+ assert(info->repeatMax < REPEAT_INF);
+
+ for (u32 i = 0; i < xs->num; i++) {
+ u64a base = xs->offset + unaligned_load_u16(ring + i);
+ u64a first = base + info->repeatMin;
+ if (offset < first) {
+ return first;
+ }
+ if (offset < base + info->repeatMax) {
+ return offset + 1;
+ }
+ }
+
+ return 0;
+}
+
+u64a repeatNextMatchBitmap(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ const struct RepeatBitmapControl *xs = &ctrl->bitmap;
+ const u64a base = xs->offset;
+ u64a bitmap = xs->bitmap;
+
+ // FIXME: quick exit if there is no match, based on last top in bitmap?
+
+ while (bitmap) {
+ u64a top = base + findAndClearLSB_64(&bitmap);
+ if (offset < top + info->repeatMin) {
+ return top + info->repeatMin;
+ }
+ if (offset < top + info->repeatMax) {
+ return offset + 1;
+ }
+ }
+
+ return 0; // No more matches.
+}
+
+u64a repeatNextMatchTrailer(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ const struct RepeatTrailerControl *xs = &ctrl->trailer;
+ const u32 m_width = info->repeatMax - info->repeatMin;
+
+ DEBUG_PRINTF("offset=%llu, xs->offset=%llu\n", offset, xs->offset);
+ DEBUG_PRINTF("{%u,%u} repeat, m_width=%u\n", info->repeatMin,
+ info->repeatMax, m_width);
+
+ assert(xs->offset >= info->repeatMin);
+
+ if (offset >= xs->offset + m_width) {
+ DEBUG_PRINTF("no more matches\n");
+ return 0;
+ }
+
+ if (offset >= xs->offset) {
+ DEBUG_PRINTF("inside most recent match window, next match %llu\n",
+ offset + 1);
+ return offset + 1;
+ }
+
+ // Offset is before the match window, we need to consult the bitmap of
+ // earlier match offsets.
+ u64a bitmap = xs->bitmap;
+
+ u64a diff = xs->offset - offset;
+ DEBUG_PRINTF("diff=%llu\n", diff);
+ if (diff <= 64) {
+ assert(diff);
+ bitmap &= (1ULL << (diff - 1)) - 1;
+ }
+ DEBUG_PRINTF("bitmap = 0x%llx\n", bitmap);
+ if (bitmap) {
+ u32 idx = 63 - clz64(bitmap);
+ DEBUG_PRINTF("clz=%u, idx = %u -> offset %llu\n", clz64(bitmap), idx,
+ xs->offset - idx);
+ DEBUG_PRINTF("next match at %llu\n", xs->offset - idx - 1);
+ u64a next_match = xs->offset - idx - 1;
+ assert(next_match > offset);
+ return next_match;
+ }
+
+ DEBUG_PRINTF("next match is start of match window, %llu\n", xs->offset);
+ return xs->offset;
+}
+
+/** \brief Store the first top in the ring buffer. */
+static
+void storeInitialRingTop(struct RepeatRingControl *xs, u8 *ring,
+ u64a offset, const u32 ringSize) {
+ DEBUG_PRINTF("ring=%p, ringSize=%u\n", ring, ringSize);
+ xs->offset = offset;
+ mmbit_clear(ring, ringSize);
+ mmbit_set(ring, ringSize, 0);
+ xs->first = 0;
+ xs->last = 1;
+}
+
+static really_inline
+char ringIsStale(const struct RepeatRingControl *xs, const u32 ringSize,
+ const u64a offset) {
+ u64a finalMatch = ringLastTop(xs, ringSize);
+ if (offset - finalMatch >= ringSize) {
+ DEBUG_PRINTF("all matches in ring are stale\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+void repeatStoreRing(const struct RepeatInfo *info, union RepeatControl *ctrl,
+ void *state, u64a offset, char is_alive) {
+ struct RepeatRingControl *xs = &ctrl->ring;
+ u8 *ring = (u8 *)state;
+ const u32 ringSize = ringCapacity(info);
+ assert(ringSize > 0);
+
+ DEBUG_PRINTF("storing top for offset %llu in ring\n", offset);
+
+ if (!is_alive || ringIsStale(xs, ringSize, offset)) {
+ storeInitialRingTop(xs, ring, offset, ringSize);
+ } else {
+ assert(offset > ringLastTop(xs, ringSize)); // Dupe or out of order.
+ u32 occ = ringOccupancy(xs, ringSize);
+ u64a diff = offset - xs->offset;
+ DEBUG_PRINTF("diff=%llu, occ=%u\n", diff, occ);
+ if (diff >= ringSize) {
+ u32 push = diff - ringSize + 1;
+ DEBUG_PRINTF("push ring %u\n", push);
+ xs->first += push;
+ if (xs->first >= ringSize) {
+ xs->first -= ringSize;
+ }
+ xs->offset += push;
+ diff -= push;
+ occ -= push;
+ }
+
+ // There's now room in the ring for this top, so we write a run of
+ // zeroes, then a one.
+ DEBUG_PRINTF("diff=%llu, occ=%u\n", diff, occ);
+ assert(diff < ringSize);
+ assert(diff >= occ);
+ u32 n = diff - occ;
+
+ u32 i = xs->last + n;
+
+ mmbit_unset_range(ring, ringSize, xs->last, MIN(i, ringSize));
+ if (i >= ringSize) {
+ i -= ringSize;
+ mmbit_unset_range(ring, ringSize, 0, i);
+ }
+
+ assert(i != xs->first);
+ DEBUG_PRINTF("set bit %u\n", i);
+ mmbit_set(ring, ringSize, i);
+ xs->last = i + 1;
+ if (xs->last == ringSize) {
+ xs->last = 0;
+ }
+ }
+
+ // Our ring indices shouldn't have spiraled off into uncharted space.
+ assert(xs->first < ringSize);
+ assert(xs->last < ringSize);
+
+#ifdef DEBUG
+ DEBUG_PRINTF("post-store ring state\n");
+ dumpRing(info, xs, ring);
+#endif
+
+ // The final top stored in our ring should be the one we just wrote in.
+ assert(ringLastTop(xs, ringSize) == offset);
+}
+
+static really_inline
+void storeInitialRangeTop(struct RepeatRangeControl *xs, u16 *ring,
+ u64a offset) {
+ xs->offset = offset;
+ xs->num = 1;
+ unaligned_store_u16(ring, 0);
+}
+
+void repeatStoreRange(const struct RepeatInfo *info, union RepeatControl *ctrl,
+ void *state, u64a offset, char is_alive) {
+ struct RepeatRangeControl *xs = &ctrl->range;
+ u16 *ring = (u16 *)state;
+
+ if (!is_alive) {
+ DEBUG_PRINTF("storing initial top at %llu\n", offset);
+ storeInitialRangeTop(xs, ring, offset);
+ return;
+ }
+
+ DEBUG_PRINTF("storing top at %llu, list currently has %u/%u elements\n",
+ offset, xs->num, rangeListCapacity(info));
+
+#ifdef DEBUG
+ dumpRange(info, xs, ring);
+#endif
+
+ // Walk ring from front. Identify the number of stale elements, and shift
+ // the whole ring to delete them.
+ u32 i = 0;
+ for (; i < xs->num; i++) {
+ u64a this_offset = xs->offset + unaligned_load_u16(ring + i);
+ DEBUG_PRINTF("this_offset=%llu, diff=%llu\n", this_offset,
+ offset - this_offset);
+ if (offset - this_offset <= info->repeatMax) {
+ break;
+ }
+ }
+
+ if (i == xs->num) {
+ DEBUG_PRINTF("whole ring is stale\n");
+ storeInitialRangeTop(xs, ring, offset);
+ return;
+ } else if (i > 0) {
+ DEBUG_PRINTF("expiring %u stale tops\n", i);
+ u16 first_offset = unaligned_load_u16(ring + i); // first live top
+ for (u32 j = 0; j < xs->num - i; j++) {
+ u16 val = unaligned_load_u16(ring + i + j);
+ assert(val >= first_offset);
+ unaligned_store_u16(ring + j, val - first_offset);
+ }
+ xs->offset += first_offset;
+ xs->num -= i;
+ }
+
+#ifdef DEBUG
+ DEBUG_PRINTF("post-expire:\n");
+ dumpRange(info, xs, ring);
+#endif
+
+ if (xs->num == 1) {
+ goto append;
+ }
+
+ // Let d = repeatMax - repeatMin
+ // Examine penultimate entry x[-2].
+ // If (offset - x[-2] <= d), then last entry x[-1] can be replaced with
+ // entry for offset.
+ assert(xs->num >= 2);
+ u32 d = info->repeatMax - info->repeatMin;
+ u64a penultimate_offset =
+ xs->offset + unaligned_load_u16(ring + xs->num - 2);
+ if (offset - penultimate_offset <= d) {
+ assert(offset - xs->offset <= (u16)-1);
+ unaligned_store_u16(ring + xs->num - 1, offset - xs->offset);
+ goto done;
+ }
+
+ // Otherwise, write a new entry for offset and return.
+
+append:
+ assert(offset - xs->offset <= (u16)-1);
+ assert(xs->num < rangeListCapacity(info));
+ unaligned_store_u16(ring + xs->num, offset - xs->offset);
+ xs->num++;
+
+done:
+ assert(rangeListIsOrdered(xs, ring));
+}
+
+void repeatStoreBitmap(const struct RepeatInfo *info, union RepeatControl *ctrl,
+ u64a offset, char is_alive) {
+ DEBUG_PRINTF("{%u,%u} repeat, storing top at %llu\n", info->repeatMin,
+ info->repeatMax, offset);
+
+ struct RepeatBitmapControl *xs = &ctrl->bitmap;
+ if (!is_alive || !xs->bitmap) {
+ DEBUG_PRINTF("storing initial top at %llu\n", offset);
+ xs->offset = offset;
+ xs->bitmap = 1U;
+ return;
+ }
+
+#ifdef DEBUG
+ DEBUG_PRINTF("pre-store:\n");
+ dumpBitmap(xs);
+#endif
+
+ assert(offset >= xs->offset);
+
+ u64a last_top = xs->offset + 63 - clz64(xs->bitmap);
+ if (offset > last_top + info->repeatMax) {
+ DEBUG_PRINTF("bitmap stale, storing initial top\n");
+ xs->offset = offset;
+ xs->bitmap = 1U;
+ return;
+ }
+
+ u64a diff = offset - xs->offset;
+ if (diff >= info->repeatMax + 1) {
+ DEBUG_PRINTF("need expire, diff=%llu\n", diff);
+ u64a push = diff - info->repeatMax;
+ xs->offset += push;
+ xs->bitmap = push >= 64 ? 0 : xs->bitmap >> push;
+ DEBUG_PRINTF("pushed xs->offset to %llu\n", xs->offset);
+ }
+
+ // Write a new entry.
+ diff = offset - xs->offset;
+ assert(diff < 64);
+ xs->bitmap |= (1ULL << diff);
+
+#ifdef DEBUG
+ DEBUG_PRINTF("post-store:\n");
+ dumpBitmap(xs);
+#endif
+}
+
+/** \brief Returns 1 if the ring has a match between (logical) index \a lower
+ * and \a upper, excluding \a upper. */
+static
+int ringHasMatch(const struct RepeatRingControl *xs, const u8 *ring,
+ const u32 ringSize, u32 lower, u32 upper) {
+ assert(lower < upper);
+ assert(lower < ringSize);
+ assert(upper <= ringSize);
+
+ u32 i = xs->first + lower;
+ if (i >= ringSize) {
+ i -= ringSize;
+ }
+
+ // Performance tweak: if we're looking at a fixed repeat, we can just use
+ // mmbit_isset.
+ if (lower + 1 == upper) {
+ return mmbit_isset(ring, ringSize, i);
+ }
+
+ u32 end = xs->first + upper;
+ if (end >= ringSize) {
+ end -= ringSize;
+ }
+
+ // First scan, either to end if there's no wrap-around or ringSize (end of
+ // the underlying multibit) if we are wrapping.
+
+ u32 scan_end = i < end ? end : ringSize;
+ u32 m = mmbit_iterate_bounded(ring, ringSize, i, scan_end);
+ if (m != MMB_INVALID) {
+ return 1;
+ }
+
+ // A second scan is necessary if we need to cope with wrap-around in the
+ // ring buffer.
+
+ if (i >= end) {
+ m = mmbit_iterate_bounded(ring, ringSize, 0, end);
+ return m != MMB_INVALID;
+ }
+
+ return 0;
+}
+
+/** Return a mask of ones in bit positions [0..v]. */
+static really_inline
+u64a mask_ones_to(u32 v) {
+ if (v < 63) {
+ return (1ULL << (v + 1)) - 1;
+ } else {
+ return ~(0ULL);
+ }
+}
+
+void repeatStoreTrailer(const struct RepeatInfo *info,
+ union RepeatControl *ctrl, u64a offset, char is_alive) {
+ DEBUG_PRINTF("{%u,%u} repeat, top at %llu\n", info->repeatMin,
+ info->repeatMax, offset);
+
+ struct RepeatTrailerControl *xs = &ctrl->trailer;
+
+ /* The TRAILER repeat model stores the following data in its control block:
+ *
+ * 1. offset, which is the min extent of the most recent match window
+ * (i.e. corresponding to the most recent top)
+ * 2. bitmap, which is a bitmap of up to repeatMin matches before
+ * the min extent offset.
+ */
+
+ const u64a next_extent = offset + info->repeatMin;
+
+ if (!is_alive) {
+ xs->offset = next_extent;
+ xs->bitmap = 0;
+ DEBUG_PRINTF("initial top, set extent to %llu\n", next_extent);
+ return;
+ }
+
+#ifdef DEBUG
+ DEBUG_PRINTF("pre-store:\n");
+ dumpTrailer(info, xs);
+#endif
+
+ const u32 m_width = info->repeatMax - info->repeatMin;
+ DEBUG_PRINTF("most recent match window is [%llu,%llu]\n", xs->offset,
+ xs->offset + m_width);
+
+ assert(next_extent > xs->offset);
+ u64a diff = next_extent - xs->offset;
+ DEBUG_PRINTF("diff=%llu, m_width=%u\n", diff, m_width);
+
+ assert(diff);
+ xs->bitmap = diff < 64 ? xs->bitmap << diff : 0;
+
+ // Switch on bits in the bitmask corresponding to matches in the previous
+ // match window.
+ if (diff <= m_width) {
+ u64a m = mask_ones_to(diff - 1);
+ xs->bitmap |= m;
+ } else {
+ u64a shift = diff - m_width - 1;
+ if (shift < 64) {
+ u64a m = mask_ones_to(m_width);
+ m <<= shift;
+ xs->bitmap |= m;
+ }
+ }
+
+ DEBUG_PRINTF("bitmap=0x%llx\n", xs->bitmap);
+
+ // Update max extent.
+ xs->offset = next_extent;
+
+ // Trim stale history: we only need repeatMin bytes of history.
+ if (info->repeatMin < 63) {
+ u64a mask = (1ULL << (info->repeatMin + 1)) - 1;
+ xs->bitmap &= mask;
+ }
+
+#ifdef DEBUG
+ DEBUG_PRINTF("post-store:\n");
+ dumpTrailer(info, xs);
+#endif
+}
+
+enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state, u64a offset) {
+ const struct RepeatRingControl *xs = &ctrl->ring;
+ const u8 *ring = (const u8 *)state;
+ const u32 ringSize = ringCapacity(info);
+
+ assert(mmbit_any(ring, ringSize));
+ assert(offset >= xs->offset);
+
+ DEBUG_PRINTF("check: offset=%llu, repeat=[%u,%u]\n", offset,
+ info->repeatMin, info->repeatMax);
+#ifdef DEBUG
+ DEBUG_PRINTF("ring state\n");
+ dumpRing(info, xs, ring);
+#endif
+
+ if (offset - xs->offset < info->repeatMin) {
+ DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
+ return REPEAT_NOMATCH;
+ }
+
+ if (offset - ringLastTop(xs, ringSize) >= ringSize) {
+ DEBUG_PRINTF("ring is stale\n");
+ return REPEAT_STALE;
+ }
+
+ // If we're not stale, delta fits in the range [repeatMin, lastTop +
+ // repeatMax], which fits in a u32.
+ assert(offset - xs->offset < UINT32_MAX);
+ u32 delta = (u32)(offset - xs->offset);
+ DEBUG_PRINTF("delta=%u\n", delta);
+
+ // Find the bounds on possible matches in the ring buffer.
+ u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
+ u32 upper = MIN(delta - info->repeatMin + 1, ringOccupancy(xs, ringSize));
+
+ if (lower >= upper) {
+ DEBUG_PRINTF("no matches to check\n");
+ return REPEAT_NOMATCH;
+ }
+
+ DEBUG_PRINTF("possible match indices=[%u,%u]\n", lower, upper);
+ if (ringHasMatch(xs, ring, ringSize, lower, upper)) {
+ return REPEAT_MATCH;
+ }
+
+ return REPEAT_NOMATCH;
+}
+
+enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state, u64a offset) {
+ const struct RepeatRangeControl *xs = &ctrl->range;
+ const u16 *ring = (const u16 *)state;
+
+ assert(xs->num > 0);
+ assert(xs->num <= rangeListCapacity(info));
+ assert(rangeListIsOrdered(xs, ring));
+
+ // Walk the ring. For each entry x:
+ // if (offset - x) falls inside repeat bounds, return success.
+
+ // It may be worth doing tests on first and last elements first to bail
+ // early if the whole ring is too young or stale.
+
+ DEBUG_PRINTF("check %u (of %u) elements, offset %llu, bounds={%u,%u}\n",
+ xs->num, rangeListCapacity(info), offset,
+ info->repeatMin, info->repeatMax);
+#ifdef DEBUG
+ dumpRange(info, xs, ring);
+#endif
+
+ // Quick pre-check for minimum.
+ assert(offset >= xs->offset);
+ if (offset - xs->offset < info->repeatMin) {
+ DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
+ return REPEAT_NOMATCH;
+ }
+
+ // We check the most recent offset first, as we can establish staleness.
+ u64a match = xs->offset + unaligned_load_u16(ring + xs->num - 1);
+ assert(offset >= match);
+ u64a diff = offset - match;
+ if (diff > info->repeatMax) {
+ DEBUG_PRINTF("range list is stale\n");
+ return REPEAT_STALE;
+ } else if (diff >= info->repeatMin && diff <= info->repeatMax) {
+ return REPEAT_MATCH;
+ }
+
+ // Check the other offsets in the list.
+ u32 count = xs->num - 1;
+ for (u32 i = 0; i < count; i++) {
+ match = xs->offset + unaligned_load_u16(ring + i);
+ assert(offset >= match);
+ diff = offset - match;
+ if (diff >= info->repeatMin && diff <= info->repeatMax) {
+ return REPEAT_MATCH;
+ }
+ }
+
+ return REPEAT_NOMATCH;
+}
+
+enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ u64a offset) {
+ const struct RepeatBitmapControl *xs = &ctrl->bitmap;
+
+ DEBUG_PRINTF("checking if offset=%llu is a match\n", offset);
+
+#ifdef DEBUG
+ dumpBitmap(xs);
+#endif
+
+ u64a bitmap = xs->bitmap;
+ if (!bitmap) {
+ DEBUG_PRINTF("no tops; stale\n");
+ return REPEAT_STALE;
+ }
+
+ // Quick pre-check for minimum.
+ const u64a base = xs->offset;
+ assert(offset >= base);
+ if (offset - base < info->repeatMin) {
+ DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
+ return REPEAT_NOMATCH;
+ }
+
+ // We check the most recent offset first, as we can establish staleness.
+ u64a match = base + findAndClearMSB_64(&bitmap);
+ DEBUG_PRINTF("offset=%llu, last_match %llu\n", offset, match);
+ assert(offset >= match);
+ u64a diff = offset - match;
+ if (diff > info->repeatMax) {
+ DEBUG_PRINTF("stale\n");
+ return REPEAT_STALE;
+ } else if (diff >= info->repeatMin && diff <= info->repeatMax) {
+ return REPEAT_MATCH;
+ }
+
+ while (bitmap) {
+ match = base + findAndClearLSB_64(&bitmap);
+ DEBUG_PRINTF("offset=%llu, last_match %llu\n", offset, match);
+ assert(offset >= match);
+ diff = offset - match;
+ if (diff >= info->repeatMin && diff <= info->repeatMax) {
+ return REPEAT_MATCH;
+ }
+ }
+
+ return REPEAT_NOMATCH;
+}
+
+enum RepeatMatch repeatHasMatchTrailer(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ u64a offset) {
+ const struct RepeatTrailerControl *xs = &ctrl->trailer;
+ const u32 m_width = info->repeatMax - info->repeatMin;
+
+ DEBUG_PRINTF("offset=%llu, xs->offset=%llu, xs->bitmap=0x%llx\n", offset,
+ xs->offset, xs->bitmap);
+
+ if (offset > xs->offset + m_width) {
+ DEBUG_PRINTF("stale\n");
+ return REPEAT_STALE;
+ }
+
+ if (offset >= xs->offset) {
+ DEBUG_PRINTF("in match window\n");
+ return REPEAT_MATCH;
+ }
+
+ if (offset >= xs->offset - info->repeatMin) {
+ u32 idx = xs->offset - offset - 1;
+ DEBUG_PRINTF("check bitmap idx %u\n", idx);
+ assert(idx < 64);
+ if (xs->bitmap & (1ULL << idx)) {
+ DEBUG_PRINTF("match in bitmap\n");
+ return REPEAT_MATCH;
+ }
+ }
+
+ DEBUG_PRINTF("no match\n");
+ return REPEAT_NOMATCH;
+}
+
/** \brief True if the given value can be packed into len bytes. */
-static really_inline
+static really_inline
int fits_in_len_bytes(u64a val, u32 len) {
if (len >= 8) {
return 1;
@@ -896,205 +896,205 @@ int fits_in_len_bytes(u64a val, u32 len) {
}
static really_inline
-void storePackedRelative(char *dest, u64a val, u64a offset, u64a max, u32 len) {
- assert(val <= offset);
+void storePackedRelative(char *dest, u64a val, u64a offset, u64a max, u32 len) {
+ assert(val <= offset);
assert(fits_in_len_bytes(max, len));
- u64a delta = offset - val;
- if (delta >= max) {
- delta = max;
- }
- DEBUG_PRINTF("delta %llu\n", delta);
+ u64a delta = offset - val;
+ if (delta >= max) {
+ delta = max;
+ }
+ DEBUG_PRINTF("delta %llu\n", delta);
assert(fits_in_len_bytes(delta, len));
- partial_store_u64a(dest, delta, len);
-}
-
-static
-void repeatPackRing(char *dest, const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- const struct RepeatRingControl *xs = &ctrl->ring;
- const u32 ring_indices_len = info->repeatMax < 254 ? 2 : 4;
- const u32 offset_len = info->packedCtrlSize - ring_indices_len;
-
- // Write out packed relative base offset.
- assert(info->packedCtrlSize > ring_indices_len);
- storePackedRelative(dest, xs->offset, offset, info->horizon, offset_len);
-
- // Write out ring indices.
- if (ring_indices_len == 4) {
- unaligned_store_u16(dest + offset_len, xs->first);
- unaligned_store_u16(dest + offset_len + 2, xs->last);
- } else {
- assert(xs->first < 256 && xs->last < 256);
- u8 *indices = (u8 *)dest + offset_len;
- indices[0] = xs->first;
- indices[1] = xs->last;
- }
-}
-
-static
-void repeatPackOffset(char *dest, const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- const struct RepeatOffsetControl *xs = &ctrl->offset;
- DEBUG_PRINTF("packing offset %llu [h %u]\n", xs->offset, info->horizon);
+ partial_store_u64a(dest, delta, len);
+}
+
+static
+void repeatPackRing(char *dest, const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ const struct RepeatRingControl *xs = &ctrl->ring;
+ const u32 ring_indices_len = info->repeatMax < 254 ? 2 : 4;
+ const u32 offset_len = info->packedCtrlSize - ring_indices_len;
+
+ // Write out packed relative base offset.
+ assert(info->packedCtrlSize > ring_indices_len);
+ storePackedRelative(dest, xs->offset, offset, info->horizon, offset_len);
+
+ // Write out ring indices.
+ if (ring_indices_len == 4) {
+ unaligned_store_u16(dest + offset_len, xs->first);
+ unaligned_store_u16(dest + offset_len + 2, xs->last);
+ } else {
+ assert(xs->first < 256 && xs->last < 256);
+ u8 *indices = (u8 *)dest + offset_len;
+ indices[0] = xs->first;
+ indices[1] = xs->last;
+ }
+}
+
+static
+void repeatPackOffset(char *dest, const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ const struct RepeatOffsetControl *xs = &ctrl->offset;
+ DEBUG_PRINTF("packing offset %llu [h %u]\n", xs->offset, info->horizon);
if (!info->packedCtrlSize) {
assert(info->type == REPEAT_ALWAYS);
DEBUG_PRINTF("externally guarded .*\n");
return;
}
- storePackedRelative(dest, xs->offset, offset, info->horizon,
- info->packedCtrlSize);
-}
-
-static
-void repeatPackRange(char *dest, const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- const struct RepeatRangeControl *xs = &ctrl->range;
-
- // Write out packed relative base offset.
- assert(info->packedCtrlSize > 1);
- storePackedRelative(dest, xs->offset, offset, info->horizon,
- info->packedCtrlSize - 1);
-
- // Write out range number of elements.
- dest[info->packedCtrlSize - 1] = xs->num;
-}
-
-static
-void repeatPackBitmap(char *dest, const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- const struct RepeatBitmapControl *xs = &ctrl->bitmap;
- const u32 bound = info->repeatMax;
-
- assert(offset >= xs->offset);
- u64a new_base = offset > bound ? offset - bound : 0;
-
- // Shift bitmap to begin at new_base rather than xs->offset.
- u64a bitmap = xs->bitmap;
- if (new_base >= xs->offset) {
- u64a shift = new_base - xs->offset;
- bitmap = shift < 64 ? bitmap >> shift : 0;
- } else {
- u64a shift = xs->offset - new_base;
- bitmap = shift < 64 ? bitmap << shift : 0;
- }
-
- DEBUG_PRINTF("packing %llu into %u bytes\n", bitmap, info->packedCtrlSize);
-
- // Write out packed bitmap.
+ storePackedRelative(dest, xs->offset, offset, info->horizon,
+ info->packedCtrlSize);
+}
+
+static
+void repeatPackRange(char *dest, const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ const struct RepeatRangeControl *xs = &ctrl->range;
+
+ // Write out packed relative base offset.
+ assert(info->packedCtrlSize > 1);
+ storePackedRelative(dest, xs->offset, offset, info->horizon,
+ info->packedCtrlSize - 1);
+
+ // Write out range number of elements.
+ dest[info->packedCtrlSize - 1] = xs->num;
+}
+
+static
+void repeatPackBitmap(char *dest, const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ const struct RepeatBitmapControl *xs = &ctrl->bitmap;
+ const u32 bound = info->repeatMax;
+
+ assert(offset >= xs->offset);
+ u64a new_base = offset > bound ? offset - bound : 0;
+
+ // Shift bitmap to begin at new_base rather than xs->offset.
+ u64a bitmap = xs->bitmap;
+ if (new_base >= xs->offset) {
+ u64a shift = new_base - xs->offset;
+ bitmap = shift < 64 ? bitmap >> shift : 0;
+ } else {
+ u64a shift = xs->offset - new_base;
+ bitmap = shift < 64 ? bitmap << shift : 0;
+ }
+
+ DEBUG_PRINTF("packing %llu into %u bytes\n", bitmap, info->packedCtrlSize);
+
+ // Write out packed bitmap.
assert(fits_in_len_bytes(bitmap, info->packedCtrlSize));
- partial_store_u64a(dest, bitmap, info->packedCtrlSize);
-}
-
-static
-void repeatPackSparseOptimalP(char *dest, const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- const struct RepeatRingControl *xs = &ctrl->ring;
- // set ring index pointer according to patch count
- const u32 ring_indices_len = info->patchCount < 254 ? 2 : 4;
- const u32 offset_len = info->packedCtrlSize - ring_indices_len;
-
- // Write out packed relative base offset.
- assert(info->packedCtrlSize > ring_indices_len);
- storePackedRelative(dest, xs->offset, offset, info->horizon, offset_len);
-
- // Write out ring indices.
- if (ring_indices_len == 4) {
- unaligned_store_u16(dest + offset_len, xs->first);
- unaligned_store_u16(dest + offset_len + 2, xs->last);
- } else {
- assert(xs->first < 256 && xs->last < 256);
- u8 *indices = (u8 *)dest + offset_len;
- indices[0] = xs->first;
- indices[1] = xs->last;
- }
-
-}
-
-static
-void repeatPackTrailer(char *dest, const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- const struct RepeatTrailerControl *xs = &ctrl->trailer;
-
- DEBUG_PRINTF("saving: offset=%llu, xs->offset=%llu, xs->bitmap=0x%llx\n",
- offset, xs->offset, xs->bitmap);
-
- // XXX: xs->offset may be zero in the NFA path (effectively uninitialized).
- u64a top;
- if (xs->offset) {
- assert(xs->offset >= info->repeatMin);
- top = xs->offset - info->repeatMin;
- } else {
- top = 0;
- }
-
- top = offset - top; // Pack top relative to offset.
-
- u64a v[2];
- v[0] = MIN(top, info->horizon);
- v[1] = xs->bitmap;
-
- pack_bits_64(dest, v, info->packedFieldSizes, 2);
-}
-
-void repeatPack(char *dest, const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- assert(dest && info && ctrl);
-
- switch ((enum RepeatType)info->type) {
- case REPEAT_RING:
- repeatPackRing(dest, info, ctrl, offset);
- break;
- case REPEAT_FIRST:
- case REPEAT_LAST:
- repeatPackOffset(dest, info, ctrl, offset);
- break;
- case REPEAT_RANGE:
- repeatPackRange(dest, info, ctrl, offset);
- break;
- case REPEAT_BITMAP:
- repeatPackBitmap(dest, info, ctrl, offset);
- break;
- case REPEAT_SPARSE_OPTIMAL_P:
- repeatPackSparseOptimalP(dest, info, ctrl, offset);
- break;
- case REPEAT_TRAILER:
- repeatPackTrailer(dest, info, ctrl, offset);
- break;
+ partial_store_u64a(dest, bitmap, info->packedCtrlSize);
+}
+
+static
+void repeatPackSparseOptimalP(char *dest, const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ const struct RepeatRingControl *xs = &ctrl->ring;
+ // set ring index pointer according to patch count
+ const u32 ring_indices_len = info->patchCount < 254 ? 2 : 4;
+ const u32 offset_len = info->packedCtrlSize - ring_indices_len;
+
+ // Write out packed relative base offset.
+ assert(info->packedCtrlSize > ring_indices_len);
+ storePackedRelative(dest, xs->offset, offset, info->horizon, offset_len);
+
+ // Write out ring indices.
+ if (ring_indices_len == 4) {
+ unaligned_store_u16(dest + offset_len, xs->first);
+ unaligned_store_u16(dest + offset_len + 2, xs->last);
+ } else {
+ assert(xs->first < 256 && xs->last < 256);
+ u8 *indices = (u8 *)dest + offset_len;
+ indices[0] = xs->first;
+ indices[1] = xs->last;
+ }
+
+}
+
+static
+void repeatPackTrailer(char *dest, const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ const struct RepeatTrailerControl *xs = &ctrl->trailer;
+
+ DEBUG_PRINTF("saving: offset=%llu, xs->offset=%llu, xs->bitmap=0x%llx\n",
+ offset, xs->offset, xs->bitmap);
+
+ // XXX: xs->offset may be zero in the NFA path (effectively uninitialized).
+ u64a top;
+ if (xs->offset) {
+ assert(xs->offset >= info->repeatMin);
+ top = xs->offset - info->repeatMin;
+ } else {
+ top = 0;
+ }
+
+ top = offset - top; // Pack top relative to offset.
+
+ u64a v[2];
+ v[0] = MIN(top, info->horizon);
+ v[1] = xs->bitmap;
+
+ pack_bits_64(dest, v, info->packedFieldSizes, 2);
+}
+
+void repeatPack(char *dest, const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ assert(dest && info && ctrl);
+
+ switch ((enum RepeatType)info->type) {
+ case REPEAT_RING:
+ repeatPackRing(dest, info, ctrl, offset);
+ break;
+ case REPEAT_FIRST:
+ case REPEAT_LAST:
+ repeatPackOffset(dest, info, ctrl, offset);
+ break;
+ case REPEAT_RANGE:
+ repeatPackRange(dest, info, ctrl, offset);
+ break;
+ case REPEAT_BITMAP:
+ repeatPackBitmap(dest, info, ctrl, offset);
+ break;
+ case REPEAT_SPARSE_OPTIMAL_P:
+ repeatPackSparseOptimalP(dest, info, ctrl, offset);
+ break;
+ case REPEAT_TRAILER:
+ repeatPackTrailer(dest, info, ctrl, offset);
+ break;
case REPEAT_ALWAYS:
/* nothing to do - no state */
break;
- }
-}
-
-static really_inline
-u64a loadPackedRelative(const char *src, u64a offset, u32 len) {
- u64a delta = partial_load_u64a(src, len);
- DEBUG_PRINTF("delta %llu\n", delta);
- assert(offset >= delta);
- return offset - delta;
-}
-
-static
-void repeatUnpackRing(const char *src, const struct RepeatInfo *info,
- u64a offset, union RepeatControl *ctrl) {
- struct RepeatRingControl *xs = &ctrl->ring;
- const u32 ring_indices_len = info->repeatMax < 254 ? 2 : 4;
- const u32 offset_len = info->packedCtrlSize - ring_indices_len;
- xs->offset = loadPackedRelative(src, offset, offset_len);
- if (ring_indices_len == 4) {
- xs->first = unaligned_load_u16(src + offset_len);
- xs->last = unaligned_load_u16(src + offset_len + 2);
- } else {
- const u8 *indices = (const u8 *)src + offset_len;
- xs->first = indices[0];
- xs->last = indices[1];
- }
-}
-
-static
-void repeatUnpackOffset(const char *src, const struct RepeatInfo *info,
- u64a offset, union RepeatControl *ctrl) {
- struct RepeatOffsetControl *xs = &ctrl->offset;
+ }
+}
+
+static really_inline
+u64a loadPackedRelative(const char *src, u64a offset, u32 len) {
+ u64a delta = partial_load_u64a(src, len);
+ DEBUG_PRINTF("delta %llu\n", delta);
+ assert(offset >= delta);
+ return offset - delta;
+}
+
+static
+void repeatUnpackRing(const char *src, const struct RepeatInfo *info,
+ u64a offset, union RepeatControl *ctrl) {
+ struct RepeatRingControl *xs = &ctrl->ring;
+ const u32 ring_indices_len = info->repeatMax < 254 ? 2 : 4;
+ const u32 offset_len = info->packedCtrlSize - ring_indices_len;
+ xs->offset = loadPackedRelative(src, offset, offset_len);
+ if (ring_indices_len == 4) {
+ xs->first = unaligned_load_u16(src + offset_len);
+ xs->last = unaligned_load_u16(src + offset_len + 2);
+ } else {
+ const u8 *indices = (const u8 *)src + offset_len;
+ xs->first = indices[0];
+ xs->last = indices[1];
+ }
+}
+
+static
+void repeatUnpackOffset(const char *src, const struct RepeatInfo *info,
+ u64a offset, union RepeatControl *ctrl) {
+ struct RepeatOffsetControl *xs = &ctrl->offset;
if (!info->packedCtrlSize) {
assert(info->type == REPEAT_ALWAYS);
DEBUG_PRINTF("externally guarded .*\n");
@@ -1102,503 +1102,503 @@ void repeatUnpackOffset(const char *src, const struct RepeatInfo *info,
} else {
xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize);
}
- DEBUG_PRINTF("unpacking offset %llu [h%u]\n", xs->offset,
- info->horizon);
-}
-
-static
-void repeatUnpackRange(const char *src, const struct RepeatInfo *info,
- u64a offset, union RepeatControl *ctrl) {
- struct RepeatRangeControl *xs = &ctrl->range;
- xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize - 1);
- xs->num = src[info->packedCtrlSize - 1];
-}
-
-static
-void repeatUnpackBitmap(const char *src, const struct RepeatInfo *info,
- u64a offset, union RepeatControl *ctrl) {
- struct RepeatBitmapControl *xs = &ctrl->bitmap;
- xs->offset = offset > info->repeatMax ? offset - info->repeatMax : 0;
- xs->bitmap = partial_load_u64a(src, info->packedCtrlSize);
-}
-
-static
-void repeatUnpackSparseOptimalP(const char *src, const struct RepeatInfo *info,
- u64a offset, union RepeatControl *ctrl) {
- struct RepeatRingControl *xs = &ctrl->ring;
- const u32 ring_indices_len = info->patchCount < 254 ? 2 : 4;
- const u32 offset_len = info->packedCtrlSize - ring_indices_len;
- xs->offset = loadPackedRelative(src, offset, offset_len);
- if (ring_indices_len == 4) {
- xs->first = unaligned_load_u16(src + offset_len);
- xs->last = unaligned_load_u16(src + offset_len + 2);
- } else {
- const u8 *indices = (const u8 *)src + offset_len;
- xs->first = indices[0];
- xs->last = indices[1];
- }
-}
-
-static
-void repeatUnpackTrailer(const char *src, const struct RepeatInfo *info,
- u64a offset, union RepeatControl *ctrl) {
- struct RepeatTrailerControl *xs = &ctrl->trailer;
-
- u64a v[2];
- unpack_bits_64(v, (const u8 *)src, info->packedFieldSizes, 2);
-
- xs->offset = offset - v[0] + info->repeatMin;
- xs->bitmap = v[1];
-
- DEBUG_PRINTF("loaded: xs->offset=%llu, xs->bitmap=0x%llx\n", xs->offset,
- xs->bitmap);
-}
-
-void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset,
- union RepeatControl *ctrl) {
- assert(src && info && ctrl);
-
- switch ((enum RepeatType)info->type) {
- case REPEAT_RING:
- repeatUnpackRing(src, info, offset, ctrl);
- break;
- case REPEAT_FIRST:
- case REPEAT_LAST:
- repeatUnpackOffset(src, info, offset, ctrl);
- break;
- case REPEAT_RANGE:
- repeatUnpackRange(src, info, offset, ctrl);
- break;
- case REPEAT_BITMAP:
- repeatUnpackBitmap(src, info, offset, ctrl);
- break;
- case REPEAT_SPARSE_OPTIMAL_P:
- repeatUnpackSparseOptimalP(src, info, offset, ctrl);
- break;
- case REPEAT_TRAILER:
- repeatUnpackTrailer(src, info, offset, ctrl);
- break;
+ DEBUG_PRINTF("unpacking offset %llu [h%u]\n", xs->offset,
+ info->horizon);
+}
+
+static
+void repeatUnpackRange(const char *src, const struct RepeatInfo *info,
+ u64a offset, union RepeatControl *ctrl) {
+ struct RepeatRangeControl *xs = &ctrl->range;
+ xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize - 1);
+ xs->num = src[info->packedCtrlSize - 1];
+}
+
+static
+void repeatUnpackBitmap(const char *src, const struct RepeatInfo *info,
+ u64a offset, union RepeatControl *ctrl) {
+ struct RepeatBitmapControl *xs = &ctrl->bitmap;
+ xs->offset = offset > info->repeatMax ? offset - info->repeatMax : 0;
+ xs->bitmap = partial_load_u64a(src, info->packedCtrlSize);
+}
+
+static
+void repeatUnpackSparseOptimalP(const char *src, const struct RepeatInfo *info,
+ u64a offset, union RepeatControl *ctrl) {
+ struct RepeatRingControl *xs = &ctrl->ring;
+ const u32 ring_indices_len = info->patchCount < 254 ? 2 : 4;
+ const u32 offset_len = info->packedCtrlSize - ring_indices_len;
+ xs->offset = loadPackedRelative(src, offset, offset_len);
+ if (ring_indices_len == 4) {
+ xs->first = unaligned_load_u16(src + offset_len);
+ xs->last = unaligned_load_u16(src + offset_len + 2);
+ } else {
+ const u8 *indices = (const u8 *)src + offset_len;
+ xs->first = indices[0];
+ xs->last = indices[1];
+ }
+}
+
+static
+void repeatUnpackTrailer(const char *src, const struct RepeatInfo *info,
+ u64a offset, union RepeatControl *ctrl) {
+ struct RepeatTrailerControl *xs = &ctrl->trailer;
+
+ u64a v[2];
+ unpack_bits_64(v, (const u8 *)src, info->packedFieldSizes, 2);
+
+ xs->offset = offset - v[0] + info->repeatMin;
+ xs->bitmap = v[1];
+
+ DEBUG_PRINTF("loaded: xs->offset=%llu, xs->bitmap=0x%llx\n", xs->offset,
+ xs->bitmap);
+}
+
+void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset,
+ union RepeatControl *ctrl) {
+ assert(src && info && ctrl);
+
+ switch ((enum RepeatType)info->type) {
+ case REPEAT_RING:
+ repeatUnpackRing(src, info, offset, ctrl);
+ break;
+ case REPEAT_FIRST:
+ case REPEAT_LAST:
+ repeatUnpackOffset(src, info, offset, ctrl);
+ break;
+ case REPEAT_RANGE:
+ repeatUnpackRange(src, info, offset, ctrl);
+ break;
+ case REPEAT_BITMAP:
+ repeatUnpackBitmap(src, info, offset, ctrl);
+ break;
+ case REPEAT_SPARSE_OPTIMAL_P:
+ repeatUnpackSparseOptimalP(src, info, offset, ctrl);
+ break;
+ case REPEAT_TRAILER:
+ repeatUnpackTrailer(src, info, offset, ctrl);
+ break;
case REPEAT_ALWAYS:
/* nothing to do - no state */
break;
- }
-}
-
-static really_inline
-const u64a *getImplTable(const struct RepeatInfo *info) {
- const u64a *table = ((const u64a *)(ROUNDUP_PTR(
- ((const char *)(info) +
- sizeof(*info)),
- alignof(u64a))));
- return table;
-}
-
-static
-void storeInitialRingTopPatch(const struct RepeatInfo *info,
- struct RepeatRingControl *xs,
- u8 *state, u64a offset) {
- DEBUG_PRINTF("set the first patch, offset=%llu\n", offset);
- xs->offset = offset;
-
- u8 *active = state;
- u32 patch_count = info->patchCount;
- mmbit_clear(active, patch_count);
- mmbit_set(active, patch_count, 0);
-
- u8 *ring = active + info->patchesOffset;
- u32 encoding_size = info->encodingSize;
- partial_store_u64a(ring, 1ull, encoding_size);
- xs->first = 0;
- xs->last = 1;
-}
-
-static
-u32 getSparseOptimalTargetValue(const struct RepeatInfo *info,
- const u32 tval, u64a *val) {
- u32 patch_size = info->patchSize;
- const u64a *repeatTable = getImplTable(info);
- u32 loc = 0;
- DEBUG_PRINTF("val:%llu \n", *val);
- for (u32 i = 1; i <= patch_size - tval; i++) {
- u64a tmp = repeatTable[patch_size - i];
- if (*val >= tmp) {
- *val -= tmp;
- loc = i;
- i += (info->minPeriod - 1);
- }
- }
-
- return loc;
-}
-
-static
-u64a sparseLastTop(const struct RepeatInfo *info,
- const struct RepeatRingControl *xs, const u8 *state) {
- DEBUG_PRINTF("looking for last top\n");
- u32 patch_size = info->patchSize;
- u32 patch_count = info->patchCount;
- u32 encoding_size = info->encodingSize;
-
- u32 occ = ringOccupancy(xs, patch_count);
- u32 patch = xs->first + occ - 1;
- if (patch >= patch_count) {
- patch -= patch_count;
- }
-
- DEBUG_PRINTF("patch%u encoding_size%u occ%u\n", patch, encoding_size, occ);
- const u8 *ring = state + info->patchesOffset;
- u64a val = partial_load_u64a(ring + encoding_size * patch, encoding_size);
-
- DEBUG_PRINTF("val:%llu\n", val);
- const u64a *repeatTable = getImplTable(info);
- for (s32 i = patch_size - 1; i >= 0; i--) {
- if (val >= repeatTable[i]) {
- DEBUG_PRINTF("xs->offset%llu v%u p%llu\n",
- xs->offset, i, repeatTable[i]);
- return xs->offset + i + (occ - 1) * patch_size;
- }
- }
-
- assert(0);
- return 0;
-}
-
-u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state) {
- return sparseLastTop(info, &ctrl->ring, state);
-}
-
-u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state, u64a offset) {
- const struct RepeatRingControl *xs = &ctrl->ring;
-
- DEBUG_PRINTF("repeat [%u, %u] looking for match after %llu\n",
- info->repeatMin, info->repeatMax, offset);
-
- assert(offset >= xs->offset);
-
- u64a nextOffset = offset + 1;
-
- u32 patch_size = info->patchSize;
- u32 patch;
- u32 tval;
- if (nextOffset <= xs->offset + info->repeatMin) {
- patch = xs->first;
- tval = 0;
- } else if (nextOffset > sparseLastTop(info, xs, state) + info->repeatMax) {
- DEBUG_PRINTF("ring is stale\n");
- return 0;
- } else {
- assert(nextOffset - xs->offset < UINT32_MAX); // ring is not stale
- u32 delta = (u32)(nextOffset - xs->offset);
- u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
- patch = lower / patch_size;
- tval = lower - patch * patch_size;
- }
-
- DEBUG_PRINTF("patch %u\n", patch);
- u32 patch_count = info->patchCount;
- if (patch >= patch_count) {
- return 0;
- }
-
- DEBUG_PRINTF("initial test for %u\n", tval);
-
- u32 begin = xs->first + patch;
- if (begin >= patch_count) {
- begin -= patch_count;
- }
-
- const u8 *active = (const u8 *)state;
- const u8 *ring = active + info->patchesOffset;
- u32 encoding_size = info->encodingSize;
- const u32 end = begin >= xs->last ? patch_count : xs->last;
- u32 low = tval;
- u64a diff = 0, loc = 0;
- DEBUG_PRINTF("begin %u end %u\n", begin, end);
- for (u32 p = mmbit_iterate_bounded(active, patch_count, begin, end);
- p != MMB_INVALID; p = mmbit_iterate_bounded(active, patch_count,
- p + 1, end)) {
- if (p != begin) {
- low = 0;
- }
-
- u64a val = partial_load_u64a(ring + encoding_size * p, encoding_size);
- u32 p1 = 0;
- if (p >= xs->first) {
- p1 = p - xs->first;
- } else {
- p1 = p + patch_count - xs->first;
- }
-
- if (val) {
- loc = getSparseOptimalTargetValue(info, low, &val);
- diff = (p1 + 1) * patch_size - loc;
- }
- if (loc) {
- u64a rv = MAX(nextOffset, xs->offset + info->repeatMin + diff);
- DEBUG_PRINTF("offset%llu next match at %llu\n", xs->offset, rv);
- return rv;
- }
- low = 0;
- }
-
- low = 0;
- if (begin >= xs->last) {
- for (u32 p = mmbit_iterate_bounded(active, patch_count, 0, xs->last);
- p != MMB_INVALID; p = mmbit_iterate_bounded(active, patch_count,
- p + 1, xs->last)) {
-
- u64a val = partial_load_u64a(ring + encoding_size * p,
- encoding_size);
- if (val) {
- loc = getSparseOptimalTargetValue(info, low, &val);
- diff = (p + 1) * patch_size - loc;
- }
- if (loc) {
- u64a rv = MAX(nextOffset, xs->offset + info->repeatMin +
- diff + (end - xs->first) * patch_size);
- DEBUG_PRINTF("next match at %llu\n", rv);
- return rv;
- }
- }
- }
-
- DEBUG_PRINTF("next match\n");
- return 0;
-}
-
-void repeatStoreSparseOptimalP(const struct RepeatInfo *info,
- union RepeatControl *ctrl, void *state,
- u64a offset, char is_alive) {
- struct RepeatRingControl *xs = &ctrl->ring;
- u8 *active = (u8 *)state;
-
- DEBUG_PRINTF("offset: %llu encoding_size: %u\n", offset,
- info->encodingSize);
-
- // If (a) this is the first top, or (b) the ring is stale, initialize the
- // ring and write this offset in as the first top.
- if (!is_alive ||
- offset > sparseLastTop(info, xs, state) + info->repeatMax) {
- storeInitialRingTopPatch(info, xs, active, offset);
- return;
- }
-
- // Tops should arrive in order, with no duplicates.
- assert(offset > sparseLastTop(info, xs, state));
-
- // As the ring is not stale, our delta should fit within a u32.
- assert(offset - xs->offset <= UINT32_MAX);
- u32 delta = (u32)(offset - xs->offset);
- u32 patch_size = info->patchSize;
- u32 patch_count = info->patchCount;
- u32 encoding_size = info->encodingSize;
- u32 patch = delta / patch_size;
-
- DEBUG_PRINTF("delta=%u, patch_size=%u, patch=%u\n", delta, patch_size,
- patch);
-
- u8 *ring = active + info->patchesOffset;
- u32 occ = ringOccupancy(xs, patch_count);
- u64a val = 0;
- u32 idx;
-
- DEBUG_PRINTF("patch: %u patch_count: %u occ: %u\n",
- patch, patch_count, occ);
- if (patch >= patch_count) {
- u32 patch_shift_count = patch - patch_count + 1;
- assert(patch >= patch_shift_count);
- DEBUG_PRINTF("shifting by %u\n", patch_shift_count);
- xs->offset += patch_size * patch_shift_count;
- xs->first += patch_shift_count;
- if (xs->first >= patch_count) {
- xs->first -= patch_count;
- }
- idx = xs->last + patch - occ;
- mmbit_unset_range(active, patch_count, xs->last,
- MIN(idx, patch_count));
- if (idx >= patch_count) {
- idx -= patch_count;
- mmbit_unset_range(active, patch_count, 0, idx + 1);
- }
- xs->last = idx + 1;
- if (xs->last == patch_count) {
- xs->last = 0;
- }
- } else if (patch < occ) {
- assert(patch == occ - 1);
- idx = xs->last == 0 ? patch_count - 1 : (u32)xs->last - 1;
- val = partial_load_u64a(ring + encoding_size * idx, encoding_size);
- } else {
- idx = xs->last + patch - occ;
- mmbit_unset_range(active, patch_count, xs->last,
- MIN(idx, patch_count));
- if (idx >= patch_count) {
- idx -= patch_count;
- mmbit_unset_range(active, patch_count, 0, idx + 1);
- }
- xs->last = idx + 1;
- if (xs->last == patch_count) {
- xs->last = 0;
- }
- }
-
- assert((u64a)patch * patch_size <= delta);
- u32 diff = delta - patch * patch_size;
- const u64a *repeatTable = getImplTable(info);
- val += repeatTable[diff];
-
- DEBUG_PRINTF("patch=%u, occ=%u\n", patch, occ);
- DEBUG_PRINTF("xs->first:%u xs->last:%u patch:%u\n",
- xs->first, xs->last, patch);
- DEBUG_PRINTF("value:%llu\n", val);
+ }
+}
+
+static really_inline
+const u64a *getImplTable(const struct RepeatInfo *info) {
+ const u64a *table = ((const u64a *)(ROUNDUP_PTR(
+ ((const char *)(info) +
+ sizeof(*info)),
+ alignof(u64a))));
+ return table;
+}
+
+static
+void storeInitialRingTopPatch(const struct RepeatInfo *info,
+ struct RepeatRingControl *xs,
+ u8 *state, u64a offset) {
+ DEBUG_PRINTF("set the first patch, offset=%llu\n", offset);
+ xs->offset = offset;
+
+ u8 *active = state;
+ u32 patch_count = info->patchCount;
+ mmbit_clear(active, patch_count);
+ mmbit_set(active, patch_count, 0);
+
+ u8 *ring = active + info->patchesOffset;
+ u32 encoding_size = info->encodingSize;
+ partial_store_u64a(ring, 1ull, encoding_size);
+ xs->first = 0;
+ xs->last = 1;
+}
+
+static
+u32 getSparseOptimalTargetValue(const struct RepeatInfo *info,
+ const u32 tval, u64a *val) {
+ u32 patch_size = info->patchSize;
+ const u64a *repeatTable = getImplTable(info);
+ u32 loc = 0;
+ DEBUG_PRINTF("val:%llu \n", *val);
+ for (u32 i = 1; i <= patch_size - tval; i++) {
+ u64a tmp = repeatTable[patch_size - i];
+ if (*val >= tmp) {
+ *val -= tmp;
+ loc = i;
+ i += (info->minPeriod - 1);
+ }
+ }
+
+ return loc;
+}
+
+static
+u64a sparseLastTop(const struct RepeatInfo *info,
+ const struct RepeatRingControl *xs, const u8 *state) {
+ DEBUG_PRINTF("looking for last top\n");
+ u32 patch_size = info->patchSize;
+ u32 patch_count = info->patchCount;
+ u32 encoding_size = info->encodingSize;
+
+ u32 occ = ringOccupancy(xs, patch_count);
+ u32 patch = xs->first + occ - 1;
+ if (patch >= patch_count) {
+ patch -= patch_count;
+ }
+
+ DEBUG_PRINTF("patch%u encoding_size%u occ%u\n", patch, encoding_size, occ);
+ const u8 *ring = state + info->patchesOffset;
+ u64a val = partial_load_u64a(ring + encoding_size * patch, encoding_size);
+
+ DEBUG_PRINTF("val:%llu\n", val);
+ const u64a *repeatTable = getImplTable(info);
+ for (s32 i = patch_size - 1; i >= 0; i--) {
+ if (val >= repeatTable[i]) {
+ DEBUG_PRINTF("xs->offset%llu v%u p%llu\n",
+ xs->offset, i, repeatTable[i]);
+ return xs->offset + i + (occ - 1) * patch_size;
+ }
+ }
+
+ assert(0);
+ return 0;
+}
+
+u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state) {
+ return sparseLastTop(info, &ctrl->ring, state);
+}
+
+u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state, u64a offset) {
+ const struct RepeatRingControl *xs = &ctrl->ring;
+
+ DEBUG_PRINTF("repeat [%u, %u] looking for match after %llu\n",
+ info->repeatMin, info->repeatMax, offset);
+
+ assert(offset >= xs->offset);
+
+ u64a nextOffset = offset + 1;
+
+ u32 patch_size = info->patchSize;
+ u32 patch;
+ u32 tval;
+ if (nextOffset <= xs->offset + info->repeatMin) {
+ patch = xs->first;
+ tval = 0;
+ } else if (nextOffset > sparseLastTop(info, xs, state) + info->repeatMax) {
+ DEBUG_PRINTF("ring is stale\n");
+ return 0;
+ } else {
+ assert(nextOffset - xs->offset < UINT32_MAX); // ring is not stale
+ u32 delta = (u32)(nextOffset - xs->offset);
+ u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
+ patch = lower / patch_size;
+ tval = lower - patch * patch_size;
+ }
+
+ DEBUG_PRINTF("patch %u\n", patch);
+ u32 patch_count = info->patchCount;
+ if (patch >= patch_count) {
+ return 0;
+ }
+
+ DEBUG_PRINTF("initial test for %u\n", tval);
+
+ u32 begin = xs->first + patch;
+ if (begin >= patch_count) {
+ begin -= patch_count;
+ }
+
+ const u8 *active = (const u8 *)state;
+ const u8 *ring = active + info->patchesOffset;
+ u32 encoding_size = info->encodingSize;
+ const u32 end = begin >= xs->last ? patch_count : xs->last;
+ u32 low = tval;
+ u64a diff = 0, loc = 0;
+ DEBUG_PRINTF("begin %u end %u\n", begin, end);
+ for (u32 p = mmbit_iterate_bounded(active, patch_count, begin, end);
+ p != MMB_INVALID; p = mmbit_iterate_bounded(active, patch_count,
+ p + 1, end)) {
+ if (p != begin) {
+ low = 0;
+ }
+
+ u64a val = partial_load_u64a(ring + encoding_size * p, encoding_size);
+ u32 p1 = 0;
+ if (p >= xs->first) {
+ p1 = p - xs->first;
+ } else {
+ p1 = p + patch_count - xs->first;
+ }
+
+ if (val) {
+ loc = getSparseOptimalTargetValue(info, low, &val);
+ diff = (p1 + 1) * patch_size - loc;
+ }
+ if (loc) {
+ u64a rv = MAX(nextOffset, xs->offset + info->repeatMin + diff);
+ DEBUG_PRINTF("offset%llu next match at %llu\n", xs->offset, rv);
+ return rv;
+ }
+ low = 0;
+ }
+
+ low = 0;
+ if (begin >= xs->last) {
+ for (u32 p = mmbit_iterate_bounded(active, patch_count, 0, xs->last);
+ p != MMB_INVALID; p = mmbit_iterate_bounded(active, patch_count,
+ p + 1, xs->last)) {
+
+ u64a val = partial_load_u64a(ring + encoding_size * p,
+ encoding_size);
+ if (val) {
+ loc = getSparseOptimalTargetValue(info, low, &val);
+ diff = (p + 1) * patch_size - loc;
+ }
+ if (loc) {
+ u64a rv = MAX(nextOffset, xs->offset + info->repeatMin +
+ diff + (end - xs->first) * patch_size);
+ DEBUG_PRINTF("next match at %llu\n", rv);
+ return rv;
+ }
+ }
+ }
+
+ DEBUG_PRINTF("next match\n");
+ return 0;
+}
+
+void repeatStoreSparseOptimalP(const struct RepeatInfo *info,
+ union RepeatControl *ctrl, void *state,
+ u64a offset, char is_alive) {
+ struct RepeatRingControl *xs = &ctrl->ring;
+ u8 *active = (u8 *)state;
+
+ DEBUG_PRINTF("offset: %llu encoding_size: %u\n", offset,
+ info->encodingSize);
+
+ // If (a) this is the first top, or (b) the ring is stale, initialize the
+ // ring and write this offset in as the first top.
+ if (!is_alive ||
+ offset > sparseLastTop(info, xs, state) + info->repeatMax) {
+ storeInitialRingTopPatch(info, xs, active, offset);
+ return;
+ }
+
+ // Tops should arrive in order, with no duplicates.
+ assert(offset > sparseLastTop(info, xs, state));
+
+ // As the ring is not stale, our delta should fit within a u32.
+ assert(offset - xs->offset <= UINT32_MAX);
+ u32 delta = (u32)(offset - xs->offset);
+ u32 patch_size = info->patchSize;
+ u32 patch_count = info->patchCount;
+ u32 encoding_size = info->encodingSize;
+ u32 patch = delta / patch_size;
+
+ DEBUG_PRINTF("delta=%u, patch_size=%u, patch=%u\n", delta, patch_size,
+ patch);
+
+ u8 *ring = active + info->patchesOffset;
+ u32 occ = ringOccupancy(xs, patch_count);
+ u64a val = 0;
+ u32 idx;
+
+ DEBUG_PRINTF("patch: %u patch_count: %u occ: %u\n",
+ patch, patch_count, occ);
+ if (patch >= patch_count) {
+ u32 patch_shift_count = patch - patch_count + 1;
+ assert(patch >= patch_shift_count);
+ DEBUG_PRINTF("shifting by %u\n", patch_shift_count);
+ xs->offset += patch_size * patch_shift_count;
+ xs->first += patch_shift_count;
+ if (xs->first >= patch_count) {
+ xs->first -= patch_count;
+ }
+ idx = xs->last + patch - occ;
+ mmbit_unset_range(active, patch_count, xs->last,
+ MIN(idx, patch_count));
+ if (idx >= patch_count) {
+ idx -= patch_count;
+ mmbit_unset_range(active, patch_count, 0, idx + 1);
+ }
+ xs->last = idx + 1;
+ if (xs->last == patch_count) {
+ xs->last = 0;
+ }
+ } else if (patch < occ) {
+ assert(patch == occ - 1);
+ idx = xs->last == 0 ? patch_count - 1 : (u32)xs->last - 1;
+ val = partial_load_u64a(ring + encoding_size * idx, encoding_size);
+ } else {
+ idx = xs->last + patch - occ;
+ mmbit_unset_range(active, patch_count, xs->last,
+ MIN(idx, patch_count));
+ if (idx >= patch_count) {
+ idx -= patch_count;
+ mmbit_unset_range(active, patch_count, 0, idx + 1);
+ }
+ xs->last = idx + 1;
+ if (xs->last == patch_count) {
+ xs->last = 0;
+ }
+ }
+
+ assert((u64a)patch * patch_size <= delta);
+ u32 diff = delta - patch * patch_size;
+ const u64a *repeatTable = getImplTable(info);
+ val += repeatTable[diff];
+
+ DEBUG_PRINTF("patch=%u, occ=%u\n", patch, occ);
+ DEBUG_PRINTF("xs->first:%u xs->last:%u patch:%u\n",
+ xs->first, xs->last, patch);
+ DEBUG_PRINTF("value:%llu\n", val);
assert(fits_in_len_bytes(val, encoding_size));
- partial_store_u64a(ring + encoding_size * idx, val, encoding_size);
- mmbit_set(active, patch_count, idx);
-}
-
-static
-char sparseHasMatch(const struct RepeatInfo *info, const u8 *state,
- u32 lower, u32 upper) {
- u32 patch_size = info->patchSize;
- u32 patch_count = info->patchCount;
- u32 encoding_size = info->encodingSize;
- u32 patch_lower = lower / patch_size;
- u32 patch_upper = upper / patch_size;
- u32 diff = lower - patch_lower * patch_size;
-
- DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper);
- const u64a *repeatTable = getImplTable(info);
-
- const u8 *ring = state + info->patchesOffset;
- const u8 *active = state;
- u64a val;
- // test the first patch
- if (mmbit_isset(active, patch_count, patch_lower)) {
- val = partial_load_u64a(ring + encoding_size * patch_lower,
- encoding_size);
- DEBUG_PRINTF("patch_size=%u, diff=%u, table=%llu\n",
- patch_size, diff, repeatTable[diff]);
- DEBUG_PRINTF("patch_lower=%u, patch_upper=%u\n",
- patch_lower, patch_upper);
- if (patch_upper == patch_lower) {
- u32 limit = upper - patch_lower * patch_size;
- getSparseOptimalTargetValue(info, limit + 1, &val);
- }
- if (val >= repeatTable[diff]) {
- return 1;
- }
- }
-
- if (patch_lower == patch_upper) {
- return 0;
- }
-
- // test the patches between first and last
- u32 m = mmbit_iterate_bounded(active, patch_count,
- patch_lower + 1, patch_upper);
- if (m != MMB_INVALID) {
- return 1;
- }
-
- if (patch_upper == patch_count) {
- return 0;
- }
-
- // test the last patch
- if (!mmbit_isset(active, patch_count, patch_upper)) {
- return 0;
- }
- diff = (patch_upper + 1) * patch_size - upper;
- DEBUG_PRINTF("diff=%u\n", diff);
- val = partial_load_u64a(ring + encoding_size * patch_upper, encoding_size);
- getSparseOptimalTargetValue(info, patch_size - diff + 1, &val);
- if (val) {
- DEBUG_PRINTF("last patch: val=%llu\n", val);
- return 1;
- }
-
- return 0;
-}
-
-enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state, u64a offset) {
- DEBUG_PRINTF("check for match at %llu corresponding to trigger "
- "at [%llu, %llu]\n", offset, offset - info->repeatMax,
- offset - info->repeatMin);
-
- const struct RepeatRingControl *xs = &ctrl->ring;
- const u8 *ring = (const u8 *)state;
-
- assert(offset >= xs->offset);
-
- if (offset < xs->offset + info->repeatMin) {
- DEBUG_PRINTF("too soon\n");
- return REPEAT_NOMATCH;
- } else if (offset > sparseLastTop(info, xs, state) + info->repeatMax) {
- DEBUG_PRINTF("stale\n");
- return REPEAT_STALE;
- }
-
- // Our delta between the base offset of the ring and the current offset
- // must fit within the range [repeatMin, lastPossibleTop + repeatMax]. This
- // range fits comfortably within a u32.
- assert(offset - xs->offset <= UINT32_MAX);
-
- u32 delta = (u32)(offset - xs->offset);
- u32 patch_size = info->patchSize;
- u32 patch_count = info->patchCount;
- u32 occ = ringOccupancy(xs, patch_count);
-
- u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
- u32 upper = MIN(delta - info->repeatMin, occ * patch_size - 1);
-
- DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper);
- u32 patch_lower = lower / patch_size;
- u32 patch_upper = upper / patch_size;
-
- if (patch_lower >= occ) {
- DEBUG_PRINTF("too late\n");
- return REPEAT_NOMATCH;
- }
-
- u32 remaining_lower = lower - patch_lower * patch_size;
- u32 remaining_upper = upper - patch_upper * patch_size;
- patch_lower += xs->first;
- patch_upper += xs->first;
- if (patch_lower >= patch_count) {
- patch_lower -= patch_count;
- patch_upper -= patch_count;
- } else if (patch_upper >= patch_count) {
- patch_upper -= patch_count;
- }
-
- DEBUG_PRINTF("xs->first:%u xs->last:%u patch_lower:%u, patch_upper:%u\n",
- xs->first, xs->last, patch_lower, patch_upper);
-
- u32 scan_end;
- const char is_not_wrapped = (patch_lower <= patch_upper);
- if (is_not_wrapped) {
- scan_end = patch_upper * patch_size + remaining_upper;
- } else {
- scan_end = patch_count * patch_size;
- }
-
- lower = patch_lower * patch_size + remaining_lower;
- if (sparseHasMatch(info, ring, lower, scan_end)) {
- return REPEAT_MATCH;
- }
-
- if (!is_not_wrapped) {
- upper -= (patch_count - xs->first) * patch_size;
- if (sparseHasMatch(info, ring, 0, upper)) {
- return REPEAT_MATCH;
- }
- }
-
- return REPEAT_NOMATCH;
-}
+ partial_store_u64a(ring + encoding_size * idx, val, encoding_size);
+ mmbit_set(active, patch_count, idx);
+}
+
+static
+char sparseHasMatch(const struct RepeatInfo *info, const u8 *state,
+ u32 lower, u32 upper) {
+ u32 patch_size = info->patchSize;
+ u32 patch_count = info->patchCount;
+ u32 encoding_size = info->encodingSize;
+ u32 patch_lower = lower / patch_size;
+ u32 patch_upper = upper / patch_size;
+ u32 diff = lower - patch_lower * patch_size;
+
+ DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper);
+ const u64a *repeatTable = getImplTable(info);
+
+ const u8 *ring = state + info->patchesOffset;
+ const u8 *active = state;
+ u64a val;
+ // test the first patch
+ if (mmbit_isset(active, patch_count, patch_lower)) {
+ val = partial_load_u64a(ring + encoding_size * patch_lower,
+ encoding_size);
+ DEBUG_PRINTF("patch_size=%u, diff=%u, table=%llu\n",
+ patch_size, diff, repeatTable[diff]);
+ DEBUG_PRINTF("patch_lower=%u, patch_upper=%u\n",
+ patch_lower, patch_upper);
+ if (patch_upper == patch_lower) {
+ u32 limit = upper - patch_lower * patch_size;
+ getSparseOptimalTargetValue(info, limit + 1, &val);
+ }
+ if (val >= repeatTable[diff]) {
+ return 1;
+ }
+ }
+
+ if (patch_lower == patch_upper) {
+ return 0;
+ }
+
+ // test the patches between first and last
+ u32 m = mmbit_iterate_bounded(active, patch_count,
+ patch_lower + 1, patch_upper);
+ if (m != MMB_INVALID) {
+ return 1;
+ }
+
+ if (patch_upper == patch_count) {
+ return 0;
+ }
+
+ // test the last patch
+ if (!mmbit_isset(active, patch_count, patch_upper)) {
+ return 0;
+ }
+ diff = (patch_upper + 1) * patch_size - upper;
+ DEBUG_PRINTF("diff=%u\n", diff);
+ val = partial_load_u64a(ring + encoding_size * patch_upper, encoding_size);
+ getSparseOptimalTargetValue(info, patch_size - diff + 1, &val);
+ if (val) {
+ DEBUG_PRINTF("last patch: val=%llu\n", val);
+ return 1;
+ }
+
+ return 0;
+}
+
+enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state, u64a offset) {
+ DEBUG_PRINTF("check for match at %llu corresponding to trigger "
+ "at [%llu, %llu]\n", offset, offset - info->repeatMax,
+ offset - info->repeatMin);
+
+ const struct RepeatRingControl *xs = &ctrl->ring;
+ const u8 *ring = (const u8 *)state;
+
+ assert(offset >= xs->offset);
+
+ if (offset < xs->offset + info->repeatMin) {
+ DEBUG_PRINTF("too soon\n");
+ return REPEAT_NOMATCH;
+ } else if (offset > sparseLastTop(info, xs, state) + info->repeatMax) {
+ DEBUG_PRINTF("stale\n");
+ return REPEAT_STALE;
+ }
+
+ // Our delta between the base offset of the ring and the current offset
+ // must fit within the range [repeatMin, lastPossibleTop + repeatMax]. This
+ // range fits comfortably within a u32.
+ assert(offset - xs->offset <= UINT32_MAX);
+
+ u32 delta = (u32)(offset - xs->offset);
+ u32 patch_size = info->patchSize;
+ u32 patch_count = info->patchCount;
+ u32 occ = ringOccupancy(xs, patch_count);
+
+ u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
+ u32 upper = MIN(delta - info->repeatMin, occ * patch_size - 1);
+
+ DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper);
+ u32 patch_lower = lower / patch_size;
+ u32 patch_upper = upper / patch_size;
+
+ if (patch_lower >= occ) {
+ DEBUG_PRINTF("too late\n");
+ return REPEAT_NOMATCH;
+ }
+
+ u32 remaining_lower = lower - patch_lower * patch_size;
+ u32 remaining_upper = upper - patch_upper * patch_size;
+ patch_lower += xs->first;
+ patch_upper += xs->first;
+ if (patch_lower >= patch_count) {
+ patch_lower -= patch_count;
+ patch_upper -= patch_count;
+ } else if (patch_upper >= patch_count) {
+ patch_upper -= patch_count;
+ }
+
+ DEBUG_PRINTF("xs->first:%u xs->last:%u patch_lower:%u, patch_upper:%u\n",
+ xs->first, xs->last, patch_lower, patch_upper);
+
+ u32 scan_end;
+ const char is_not_wrapped = (patch_lower <= patch_upper);
+ if (is_not_wrapped) {
+ scan_end = patch_upper * patch_size + remaining_upper;
+ } else {
+ scan_end = patch_count * patch_size;
+ }
+
+ lower = patch_lower * patch_size + remaining_lower;
+ if (sparseHasMatch(info, ring, lower, scan_end)) {
+ return REPEAT_MATCH;
+ }
+
+ if (!is_not_wrapped) {
+ upper -= (patch_count - xs->first) * patch_size;
+ if (sparseHasMatch(info, ring, 0, upper)) {
+ return REPEAT_MATCH;
+ }
+ }
+
+ return REPEAT_NOMATCH;
+}
diff --git a/contrib/libs/hyperscan/src/nfa/repeat.h b/contrib/libs/hyperscan/src/nfa/repeat.h
index eeb8448ade..d4f84ea0a9 100644
--- a/contrib/libs/hyperscan/src/nfa/repeat.h
+++ b/contrib/libs/hyperscan/src/nfa/repeat.h
@@ -1,370 +1,370 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief API for handling bounded repeats.
- *
- * This file provides an internal API for handling bounded repeats of character
- * classes. It is used by the Large Bounded Repeat (LBR) engine and by the
- * bounded repeat handling in the LimEx NFA engine as well.
- *
- * The state required by these functions is split into two regions:
- *
- * 1. Control block. This is a small structure (size varies with repeat mode)
- * that may be copied around or compressed into stream state.
- * 2. Repeat state. This is a larger structure that can be quite big for large
- * repeats, often containing a multibit ring or large vector of indices.
- * This generally lives in stream state and is not copied.
- */
-
-#ifndef REPEAT_H
-#define REPEAT_H
-
-#include "ue2common.h"
-#include "repeat_internal.h"
-#include "util/bitutils.h"
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-/** Returns the offset of the most recent 'top' offset set in the repeat. */
-static really_inline
-u64a repeatLastTop(const struct RepeatInfo *info,
- const union RepeatControl *ctrl, const void *state);
-
-/** Returns the offset of the next match after 'offset', or zero if no further
- * matches are possible. */
-static really_inline
-u64a repeatNextMatch(const struct RepeatInfo *info,
- const union RepeatControl *ctrl, const void *state,
- u64a offset);
-
-/** Stores a new top in the repeat. If is_alive is false, the repeat will be
- * initialised first and this top will become the first (and only) one. */
-static really_inline
-void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl,
- void *state, u64a offset, char is_alive);
-
-/** Return type for repeatHasMatch. */
-enum RepeatMatch {
- REPEAT_NOMATCH, /**< This offset is not a valid match. */
- REPEAT_MATCH, /**< This offset is a valid match. */
- REPEAT_STALE /**< This offset is not a valid match and no greater
- offset will be (unless another top is stored). */
-};
-
-/** Query whether the repeat has a match at the given offset. Returns
- * ::REPEAT_STALE if it does not have a match at that offset _and_
- * no further matches are possible. */
-static really_inline
-enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state, u64a offset);
-
-/** \brief Serialize a packed version of the repeat control block into stream
- * state. */
-void repeatPack(char *dest, const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset);
-
-/** \brief Deserialize a packed version of the repeat control block. */
-void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset,
- union RepeatControl *ctrl);
-
-////
-//// IMPLEMENTATION.
-////
-
-u64a repeatLastTopRing(const struct RepeatInfo *info,
- const union RepeatControl *ctrl);
-
-u64a repeatLastTopRange(const union RepeatControl *ctrl,
- const void *state);
-
-u64a repeatLastTopBitmap(const union RepeatControl *ctrl);
-
-u64a repeatLastTopTrailer(const struct RepeatInfo *info,
- const union RepeatControl *ctrl);
-
-u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state);
-
-static really_inline
-u64a repeatLastTop(const struct RepeatInfo *info,
- const union RepeatControl *ctrl, const void *state) {
- assert(info && ctrl && state);
-
- switch ((enum RepeatType)info->type) {
- case REPEAT_RING:
- return repeatLastTopRing(info, ctrl);
- case REPEAT_FIRST:
- case REPEAT_LAST:
- return ctrl->offset.offset;
- case REPEAT_RANGE:
- return repeatLastTopRange(ctrl, state);
- case REPEAT_BITMAP:
- return repeatLastTopBitmap(ctrl);
- case REPEAT_SPARSE_OPTIMAL_P:
- return repeatLastTopSparseOptimalP(info, ctrl, state);
- case REPEAT_TRAILER:
- return repeatLastTopTrailer(info, ctrl);
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief API for handling bounded repeats.
+ *
+ * This file provides an internal API for handling bounded repeats of character
+ * classes. It is used by the Large Bounded Repeat (LBR) engine and by the
+ * bounded repeat handling in the LimEx NFA engine as well.
+ *
+ * The state required by these functions is split into two regions:
+ *
+ * 1. Control block. This is a small structure (size varies with repeat mode)
+ * that may be copied around or compressed into stream state.
+ * 2. Repeat state. This is a larger structure that can be quite big for large
+ * repeats, often containing a multibit ring or large vector of indices.
+ * This generally lives in stream state and is not copied.
+ */
+
+#ifndef REPEAT_H
+#define REPEAT_H
+
+#include "ue2common.h"
+#include "repeat_internal.h"
+#include "util/bitutils.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/** Returns the offset of the most recent 'top' offset set in the repeat. */
+static really_inline
+u64a repeatLastTop(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, const void *state);
+
+/** Returns the offset of the next match after 'offset', or zero if no further
+ * matches are possible. */
+static really_inline
+u64a repeatNextMatch(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, const void *state,
+ u64a offset);
+
+/** Stores a new top in the repeat. If is_alive is false, the repeat will be
+ * initialised first and this top will become the first (and only) one. */
+static really_inline
+void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl,
+ void *state, u64a offset, char is_alive);
+
+/** Return type for repeatHasMatch. */
+enum RepeatMatch {
+ REPEAT_NOMATCH, /**< This offset is not a valid match. */
+ REPEAT_MATCH, /**< This offset is a valid match. */
+ REPEAT_STALE /**< This offset is not a valid match and no greater
+ offset will be (unless another top is stored). */
+};
+
+/** Query whether the repeat has a match at the given offset. Returns
+ * ::REPEAT_STALE if it does not have a match at that offset _and_
+ * no further matches are possible. */
+static really_inline
+enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state, u64a offset);
+
+/** \brief Serialize a packed version of the repeat control block into stream
+ * state. */
+void repeatPack(char *dest, const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset);
+
+/** \brief Deserialize a packed version of the repeat control block. */
+void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset,
+ union RepeatControl *ctrl);
+
+////
+//// IMPLEMENTATION.
+////
+
+u64a repeatLastTopRing(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl);
+
+u64a repeatLastTopRange(const union RepeatControl *ctrl,
+ const void *state);
+
+u64a repeatLastTopBitmap(const union RepeatControl *ctrl);
+
+u64a repeatLastTopTrailer(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl);
+
+u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state);
+
+static really_inline
+u64a repeatLastTop(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, const void *state) {
+ assert(info && ctrl && state);
+
+ switch ((enum RepeatType)info->type) {
+ case REPEAT_RING:
+ return repeatLastTopRing(info, ctrl);
+ case REPEAT_FIRST:
+ case REPEAT_LAST:
+ return ctrl->offset.offset;
+ case REPEAT_RANGE:
+ return repeatLastTopRange(ctrl, state);
+ case REPEAT_BITMAP:
+ return repeatLastTopBitmap(ctrl);
+ case REPEAT_SPARSE_OPTIMAL_P:
+ return repeatLastTopSparseOptimalP(info, ctrl, state);
+ case REPEAT_TRAILER:
+ return repeatLastTopTrailer(info, ctrl);
case REPEAT_ALWAYS:
return 0;
- }
-
- DEBUG_PRINTF("bad repeat type %u\n", info->type);
- assert(0);
- return 0;
-}
-
-// Used for both FIRST and LAST models.
-static really_inline
-u64a repeatNextMatchOffset(const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- u64a first = ctrl->offset.offset + info->repeatMin;
- if (offset < first) {
- return first;
- }
-
- if (info->repeatMax == REPEAT_INF ||
- offset < ctrl->offset.offset + info->repeatMax) {
- return offset + 1;
- }
-
- return 0; // No more matches.
-}
-
-u64a repeatNextMatchRing(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state, u64a offset);
-
-u64a repeatNextMatchRange(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state, u64a offset);
-
-u64a repeatNextMatchBitmap(const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset);
-
-u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state, u64a offset);
-
-u64a repeatNextMatchTrailer(const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset);
-
-static really_inline
-u64a repeatNextMatch(const struct RepeatInfo *info,
- const union RepeatControl *ctrl, const void *state,
- u64a offset) {
- assert(info && ctrl && state);
- assert(ISALIGNED(info));
- assert(ISALIGNED(ctrl));
-
- switch ((enum RepeatType)info->type) {
- case REPEAT_RING:
- return repeatNextMatchRing(info, ctrl, state, offset);
- case REPEAT_FIRST:
- // fall through
- case REPEAT_LAST:
- return repeatNextMatchOffset(info, ctrl, offset);
- case REPEAT_RANGE:
- return repeatNextMatchRange(info, ctrl, state, offset);
- case REPEAT_BITMAP:
- return repeatNextMatchBitmap(info, ctrl, offset);
- case REPEAT_SPARSE_OPTIMAL_P:
- return repeatNextMatchSparseOptimalP(info, ctrl, state, offset);
- case REPEAT_TRAILER:
- return repeatNextMatchTrailer(info, ctrl, offset);
+ }
+
+ DEBUG_PRINTF("bad repeat type %u\n", info->type);
+ assert(0);
+ return 0;
+}
+
+// Used for both FIRST and LAST models.
+static really_inline
+u64a repeatNextMatchOffset(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ u64a first = ctrl->offset.offset + info->repeatMin;
+ if (offset < first) {
+ return first;
+ }
+
+ if (info->repeatMax == REPEAT_INF ||
+ offset < ctrl->offset.offset + info->repeatMax) {
+ return offset + 1;
+ }
+
+ return 0; // No more matches.
+}
+
+u64a repeatNextMatchRing(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state, u64a offset);
+
+u64a repeatNextMatchRange(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state, u64a offset);
+
+u64a repeatNextMatchBitmap(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset);
+
+u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state, u64a offset);
+
+u64a repeatNextMatchTrailer(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset);
+
+static really_inline
+u64a repeatNextMatch(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, const void *state,
+ u64a offset) {
+ assert(info && ctrl && state);
+ assert(ISALIGNED(info));
+ assert(ISALIGNED(ctrl));
+
+ switch ((enum RepeatType)info->type) {
+ case REPEAT_RING:
+ return repeatNextMatchRing(info, ctrl, state, offset);
+ case REPEAT_FIRST:
+ // fall through
+ case REPEAT_LAST:
+ return repeatNextMatchOffset(info, ctrl, offset);
+ case REPEAT_RANGE:
+ return repeatNextMatchRange(info, ctrl, state, offset);
+ case REPEAT_BITMAP:
+ return repeatNextMatchBitmap(info, ctrl, offset);
+ case REPEAT_SPARSE_OPTIMAL_P:
+ return repeatNextMatchSparseOptimalP(info, ctrl, state, offset);
+ case REPEAT_TRAILER:
+ return repeatNextMatchTrailer(info, ctrl, offset);
case REPEAT_ALWAYS:
return offset + 1;
- }
-
- DEBUG_PRINTF("bad repeat type %u\n", info->type);
- assert(0);
- return 0;
-}
-
-static really_inline
-void repeatStoreFirst(union RepeatControl *ctrl, u64a offset,
- char is_alive) {
- if (is_alive) {
- return;
- }
- ctrl->offset.offset = offset;
-}
-
-static really_inline
-void repeatStoreLast(union RepeatControl *ctrl, u64a offset,
- UNUSED char is_alive) {
- assert(!is_alive || offset >= ctrl->offset.offset);
- ctrl->offset.offset = offset;
-}
-
-void repeatStoreRing(const struct RepeatInfo *info,
- union RepeatControl *ctrl, void *state, u64a offset,
- char is_alive);
-
-void repeatStoreRange(const struct RepeatInfo *info,
- union RepeatControl *ctrl, void *state, u64a offset,
- char is_alive);
-
-void repeatStoreBitmap(const struct RepeatInfo *info,
- union RepeatControl *ctrl, u64a offset,
- char is_alive);
-
-void repeatStoreSparseOptimalP(const struct RepeatInfo *info,
- union RepeatControl *ctrl, void *state,
- u64a offset, char is_alive);
-
-void repeatStoreTrailer(const struct RepeatInfo *info,
- union RepeatControl *ctrl, u64a offset,
- char is_alive);
-
-static really_inline
-void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl,
- void *state, u64a offset, char is_alive) {
- assert(info && ctrl && state);
- assert(ISALIGNED(info));
- assert(ISALIGNED(ctrl));
-
- assert(info->repeatMin <= info->repeatMax);
- assert(info->repeatMax <= REPEAT_INF);
-
- switch ((enum RepeatType)info->type) {
- case REPEAT_RING:
- repeatStoreRing(info, ctrl, state, offset, is_alive);
- break;
- case REPEAT_FIRST:
- repeatStoreFirst(ctrl, offset, is_alive);
- break;
- case REPEAT_LAST:
- repeatStoreLast(ctrl, offset, is_alive);
- break;
- case REPEAT_RANGE:
- repeatStoreRange(info, ctrl, state, offset, is_alive);
- break;
- case REPEAT_BITMAP:
- repeatStoreBitmap(info, ctrl, offset, is_alive);
- break;
- case REPEAT_SPARSE_OPTIMAL_P:
- repeatStoreSparseOptimalP(info, ctrl, state, offset, is_alive);
- break;
- case REPEAT_TRAILER:
- repeatStoreTrailer(info, ctrl, offset, is_alive);
- break;
+ }
+
+ DEBUG_PRINTF("bad repeat type %u\n", info->type);
+ assert(0);
+ return 0;
+}
+
+static really_inline
+void repeatStoreFirst(union RepeatControl *ctrl, u64a offset,
+ char is_alive) {
+ if (is_alive) {
+ return;
+ }
+ ctrl->offset.offset = offset;
+}
+
+static really_inline
+void repeatStoreLast(union RepeatControl *ctrl, u64a offset,
+ UNUSED char is_alive) {
+ assert(!is_alive || offset >= ctrl->offset.offset);
+ ctrl->offset.offset = offset;
+}
+
+void repeatStoreRing(const struct RepeatInfo *info,
+ union RepeatControl *ctrl, void *state, u64a offset,
+ char is_alive);
+
+void repeatStoreRange(const struct RepeatInfo *info,
+ union RepeatControl *ctrl, void *state, u64a offset,
+ char is_alive);
+
+void repeatStoreBitmap(const struct RepeatInfo *info,
+ union RepeatControl *ctrl, u64a offset,
+ char is_alive);
+
+void repeatStoreSparseOptimalP(const struct RepeatInfo *info,
+ union RepeatControl *ctrl, void *state,
+ u64a offset, char is_alive);
+
+void repeatStoreTrailer(const struct RepeatInfo *info,
+ union RepeatControl *ctrl, u64a offset,
+ char is_alive);
+
+static really_inline
+void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl,
+ void *state, u64a offset, char is_alive) {
+ assert(info && ctrl && state);
+ assert(ISALIGNED(info));
+ assert(ISALIGNED(ctrl));
+
+ assert(info->repeatMin <= info->repeatMax);
+ assert(info->repeatMax <= REPEAT_INF);
+
+ switch ((enum RepeatType)info->type) {
+ case REPEAT_RING:
+ repeatStoreRing(info, ctrl, state, offset, is_alive);
+ break;
+ case REPEAT_FIRST:
+ repeatStoreFirst(ctrl, offset, is_alive);
+ break;
+ case REPEAT_LAST:
+ repeatStoreLast(ctrl, offset, is_alive);
+ break;
+ case REPEAT_RANGE:
+ repeatStoreRange(info, ctrl, state, offset, is_alive);
+ break;
+ case REPEAT_BITMAP:
+ repeatStoreBitmap(info, ctrl, offset, is_alive);
+ break;
+ case REPEAT_SPARSE_OPTIMAL_P:
+ repeatStoreSparseOptimalP(info, ctrl, state, offset, is_alive);
+ break;
+ case REPEAT_TRAILER:
+ repeatStoreTrailer(info, ctrl, offset, is_alive);
+ break;
case REPEAT_ALWAYS:
/* nothing to do - no state */
break;
- }
-}
-
-static really_inline
-enum RepeatMatch repeatHasMatchFirst(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- u64a offset) {
- if (offset < ctrl->offset.offset + info->repeatMin) {
- return REPEAT_NOMATCH;
- }
-
- // FIRST models are {N,} repeats, i.e. they always have inf max depth.
- assert(info->repeatMax == REPEAT_INF);
- return REPEAT_MATCH;
-}
-
-static really_inline
-enum RepeatMatch repeatHasMatchLast(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- u64a offset) {
- if (offset < ctrl->offset.offset + info->repeatMin) {
- return REPEAT_NOMATCH;
- }
- assert(info->repeatMax < REPEAT_INF);
- if (offset <= ctrl->offset.offset + info->repeatMax) {
- return REPEAT_MATCH;
- }
- return REPEAT_STALE;
-}
-
-enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state, u64a offset);
-
-enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state, u64a offset);
-
-enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state, u64a offset);
-
-enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- u64a offset);
-
-enum RepeatMatch repeatHasMatchTrailer(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- u64a offset);
-
-static really_inline
-enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state, u64a offset) {
- assert(info && ctrl && state);
- assert(ISALIGNED(info));
- assert(ISALIGNED(ctrl));
-
- switch ((enum RepeatType)info->type) {
- case REPEAT_RING:
- return repeatHasMatchRing(info, ctrl, state, offset);
- case REPEAT_FIRST:
- return repeatHasMatchFirst(info, ctrl, offset);
- case REPEAT_LAST:
- return repeatHasMatchLast(info, ctrl, offset);
- case REPEAT_RANGE:
- return repeatHasMatchRange(info, ctrl, state, offset);
- case REPEAT_BITMAP:
- return repeatHasMatchBitmap(info, ctrl, offset);
- case REPEAT_SPARSE_OPTIMAL_P:
- return repeatHasMatchSparseOptimalP(info, ctrl, state, offset);
- case REPEAT_TRAILER:
- return repeatHasMatchTrailer(info, ctrl, offset);
+ }
+}
+
+static really_inline
+enum RepeatMatch repeatHasMatchFirst(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ u64a offset) {
+ if (offset < ctrl->offset.offset + info->repeatMin) {
+ return REPEAT_NOMATCH;
+ }
+
+ // FIRST models are {N,} repeats, i.e. they always have inf max depth.
+ assert(info->repeatMax == REPEAT_INF);
+ return REPEAT_MATCH;
+}
+
+static really_inline
+enum RepeatMatch repeatHasMatchLast(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ u64a offset) {
+ if (offset < ctrl->offset.offset + info->repeatMin) {
+ return REPEAT_NOMATCH;
+ }
+ assert(info->repeatMax < REPEAT_INF);
+ if (offset <= ctrl->offset.offset + info->repeatMax) {
+ return REPEAT_MATCH;
+ }
+ return REPEAT_STALE;
+}
+
+enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state, u64a offset);
+
+enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state, u64a offset);
+
+enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state, u64a offset);
+
+enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ u64a offset);
+
+enum RepeatMatch repeatHasMatchTrailer(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ u64a offset);
+
+static really_inline
+enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state, u64a offset) {
+ assert(info && ctrl && state);
+ assert(ISALIGNED(info));
+ assert(ISALIGNED(ctrl));
+
+ switch ((enum RepeatType)info->type) {
+ case REPEAT_RING:
+ return repeatHasMatchRing(info, ctrl, state, offset);
+ case REPEAT_FIRST:
+ return repeatHasMatchFirst(info, ctrl, offset);
+ case REPEAT_LAST:
+ return repeatHasMatchLast(info, ctrl, offset);
+ case REPEAT_RANGE:
+ return repeatHasMatchRange(info, ctrl, state, offset);
+ case REPEAT_BITMAP:
+ return repeatHasMatchBitmap(info, ctrl, offset);
+ case REPEAT_SPARSE_OPTIMAL_P:
+ return repeatHasMatchSparseOptimalP(info, ctrl, state, offset);
+ case REPEAT_TRAILER:
+ return repeatHasMatchTrailer(info, ctrl, offset);
case REPEAT_ALWAYS:
return REPEAT_MATCH;
- }
-
- assert(0);
- return REPEAT_NOMATCH;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // REPEAT_H
+ }
+
+ assert(0);
+ return REPEAT_NOMATCH;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // REPEAT_H
diff --git a/contrib/libs/hyperscan/src/nfa/repeat_internal.h b/contrib/libs/hyperscan/src/nfa/repeat_internal.h
index 399e53c93f..9e3f455c80 100644
--- a/contrib/libs/hyperscan/src/nfa/repeat_internal.h
+++ b/contrib/libs/hyperscan/src/nfa/repeat_internal.h
@@ -1,87 +1,87 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef REPEAT_INTERNAL_H
-#define REPEAT_INTERNAL_H
-
-#include "ue2common.h"
-
-/** \file
- * \brief Bounded Repeat models.
- *
- * Used by the NFA, to represent bounded repeats managed via special POS and
- * TUG exceptions, and by the LBR (limited bounded repeat) and Castle
- * specialist engines.
- *
- * We currently have a number of different kinds of bounded repeat model, for
- * different kinds of {N,M} repeats, described by ::RepeatType.
- */
-
-/** Different types of bounded repeats. */
-enum RepeatType {
- /** General mechanism for tracking {N,M} repeats. Stores the first top as
- * an absolute offset, then subsequent tops in the {N,M} range as a ring of
- * relative top indices stored in a multibit. */
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef REPEAT_INTERNAL_H
+#define REPEAT_INTERNAL_H
+
+#include "ue2common.h"
+
+/** \file
+ * \brief Bounded Repeat models.
+ *
+ * Used by the NFA, to represent bounded repeats managed via special POS and
+ * TUG exceptions, and by the LBR (limited bounded repeat) and Castle
+ * specialist engines.
+ *
+ * We currently have a number of different kinds of bounded repeat model, for
+ * different kinds of {N,M} repeats, described by ::RepeatType.
+ */
+
+/** Different types of bounded repeats. */
+enum RepeatType {
+ /** General mechanism for tracking {N,M} repeats. Stores the first top as
+ * an absolute offset, then subsequent tops in the {N,M} range as a ring of
+ * relative top indices stored in a multibit. */
REPEAT_RING,
-
- /** Used to track {N,} repeats. Uses the \ref RepeatOffsetControl structure,
- * since only the first top encountered needs to be stored. */
+
+ /** Used to track {N,} repeats. Uses the \ref RepeatOffsetControl structure,
+ * since only the first top encountered needs to be stored. */
REPEAT_FIRST,
-
- /** Used to track {0,N} repeats. Much like ::REPEAT_FIRST, except that we
- * store the most recent top encountered. */
+
+ /** Used to track {0,N} repeats. Much like ::REPEAT_FIRST, except that we
+ * store the most recent top encountered. */
REPEAT_LAST,
-
- /** Like ::REPEAT_RING, this is also used for {N,M} repeats, but for cases
- * where there is a large difference between N and M, and developed to
- * reduce the state requirements of this case (relative to the RING model).
- * Uses a small ordered array of top indices relative to \ref
- * RepeatRangeControl::offset. */
+
+ /** Like ::REPEAT_RING, this is also used for {N,M} repeats, but for cases
+ * where there is a large difference between N and M, and developed to
+ * reduce the state requirements of this case (relative to the RING model).
+ * Uses a small ordered array of top indices relative to \ref
+ * RepeatRangeControl::offset. */
REPEAT_RANGE,
-
- /** Used for {N,M} repeats where 0 < M <= 64. Uses the \ref
- * RepeatBitmapControl structure at runtime. */
+
+ /** Used for {N,M} repeats where 0 < M <= 64. Uses the \ref
+ * RepeatBitmapControl structure at runtime. */
REPEAT_BITMAP,
-
- /** Optimal mechanism for tracking {N,M} repeats when there is a bound on
- * how frequently they can be retriggered.
- * Assume f(repeat, min) representing the number of possible bit patterns
- * we can have for repeat size = repeat, minimum period = min
- * We will have the following recurrence relation:
- * f(repeat, min) = f(repeat - 1, min) + f(repeat - min, min);
- * We use this recurrence to encode bit patterns with 64-bit values by
- * referencing a table that stores values from f(0, min) to f(repeat, min)
- * eg: repeat = 5, min = 2. 10001 => f(4,2) + f(0,2) = 9.
- * We search the optimal patch size between min and repeat in advance and
+
+ /** Optimal mechanism for tracking {N,M} repeats when there is a bound on
+ * how frequently they can be retriggered.
+ * Assume f(repeat, min) representing the number of possible bit patterns
+ * we can have for repeat size = repeat, minimum period = min
+ * We will have the following recurrence relation:
+ * f(repeat, min) = f(repeat - 1, min) + f(repeat - min, min);
+ * We use this recurrence to encode bit patterns with 64-bit values by
+ * referencing a table that stores values from f(0, min) to f(repeat, min)
+ * eg: repeat = 5, min = 2. 10001 => f(4,2) + f(0,2) = 9.
+ * We search the optimal patch size between min and repeat in advance and
* use the scheme above to do encoding and decoding to reduce stream state
* size. */
REPEAT_SPARSE_OPTIMAL_P,
-
+
/** Used for {N,M} repeats where 0 < N < 64. Uses the
* \ref RepeatTrailerControl structure at runtime. */
REPEAT_TRAILER,
@@ -89,130 +89,130 @@ enum RepeatType {
/** Degenerate repeat that always returns true. Used by castle for pseudo
* [^X]* repeats. */
REPEAT_ALWAYS,
-};
-
-/**
- * \brief Value used to represent an unbounded max repeat.
- *
- * Note that we do not support \ref RepeatInfo::repeatMax values larger than
- * this.
- */
-#define REPEAT_INF 65535
-
-/** Max slots used by ::REPEAT_RANGE repeat model. */
-#define REPEAT_RANGE_MAX_SLOTS 16
-
-/** Structure describing a bounded repeat in the bytecode */
-struct RepeatInfo {
- u8 type; //!< from enum RepeatType.
- u32 repeatMin; //!< minimum number of repeats.
- u32 repeatMax; //!< maximum number of repeats, or REPEAT_INF if unbounded.
-
- /** Maximum value that is required to be stored in the control block
- * counters. Any value greater than this will be capped at the horizon.
- */
- u32 horizon;
-
- /** Size of the compressed control block in bytes. This is what is written
- * out to stream state at stream boundaries. */
- u32 packedCtrlSize;
-
- /** Size of the repeat state block in bytes. This is where the REPEAT_RANGE
- * vector and REPEAT_RING multibit are stored, in stream state, and they
- * are manipulated directly (i.e. not copied at stream boundaries). */
- u32 stateSize;
-
- /** How soon after one trigger we can see the next trigger.
- * Used by REPEAT_SPARSE_OPTIMAL_P. */
- u32 minPeriod;
-
- /** Packed control block field sizes (in bits), used by REPEAT_TRAILER. */
- u32 packedFieldSizes[2];
-
- /* Number of patches, used by REPEAT_SPARSE_OPTIMAL_P. */
- u32 patchCount;
-
- /* Optimal patch length, used by REPEAT_SPARSE_OPTIMAL_P. */
- u32 patchSize;
-
- /* Encoding patch length in bytes, used by REPEAT_SPARSE_OPTIMAL_P. */
- u32 encodingSize;
-
- /* RepeatInfo struct length including table size. */
- u32 length;
-
- /** Offset of patches relative to the start of repeat stream state,
- * used by REPEAT_SPARSE_OPTIMAL_P. */
- u32 patchesOffset;
-};
-
-/** Runtime control block structure for ::REPEAT_RING and
- * ::REPEAT_SPARSE_OPTIMAL_P bounded repeats. Note that this struct is packed
- * (may not be aligned). */
-struct RepeatRingControl {
- u64a offset; //!< index of first top.
- u16 first; //!< start index in ring.
- u16 last; //!< end index in ring.
-};
-
-/** Runtime control block structure for ::REPEAT_RANGE bounded repeats. Note
- * that this struct is packed (may not be aligned). */
-struct RepeatRangeControl {
- u64a offset; //!< index of first top.
- u8 num; //!< number of elements in array.
-};
-
-/** Runtime control block structure for cases where only a single offset is
- * needed to track the repeat, both ::REPEAT_FIRST and ::REPEAT_LAST. Note that
- * this struct is packed (may not be aligned). */
-struct RepeatOffsetControl {
- u64a offset; //!< index of a top.
-};
-
-/** Runtime control block structure for ::REPEAT_BITMAP bounded repeats. */
-struct RepeatBitmapControl {
- u64a offset; //!< index of first top.
- u64a bitmap; //!< forward bitmap of tops relative to base offset.
-};
-
-/** Runtime control block structure for ::REPEAT_TRAILER bounded repeats. */
-struct RepeatTrailerControl {
- u64a offset; //!< min extent of most recent match window.
- u64a bitmap; //!< trailing bitmap of earlier matches, relative to offset.
-};
-
-/** \brief Union of control block types, used at runtime. */
-union RepeatControl {
- struct RepeatRingControl ring;
- struct RepeatRangeControl range;
- struct RepeatOffsetControl offset;
- struct RepeatBitmapControl bitmap;
- struct RepeatTrailerControl trailer;
-};
-
-/** For debugging, returns the name of a repeat model. */
-static really_inline UNUSED
-const char *repeatTypeName(u8 type) {
- switch ((enum RepeatType)type) {
- case REPEAT_RING:
- return "RING";
- case REPEAT_FIRST:
- return "FIRST";
- case REPEAT_LAST:
- return "LAST";
- case REPEAT_RANGE:
- return "RANGE";
- case REPEAT_BITMAP:
- return "BITMAP";
- case REPEAT_SPARSE_OPTIMAL_P:
- return "SPARSE_OPTIMAL_P";
- case REPEAT_TRAILER:
- return "TRAILER";
+};
+
+/**
+ * \brief Value used to represent an unbounded max repeat.
+ *
+ * Note that we do not support \ref RepeatInfo::repeatMax values larger than
+ * this.
+ */
+#define REPEAT_INF 65535
+
+/** Max slots used by ::REPEAT_RANGE repeat model. */
+#define REPEAT_RANGE_MAX_SLOTS 16
+
+/** Structure describing a bounded repeat in the bytecode */
+struct RepeatInfo {
+ u8 type; //!< from enum RepeatType.
+ u32 repeatMin; //!< minimum number of repeats.
+ u32 repeatMax; //!< maximum number of repeats, or REPEAT_INF if unbounded.
+
+ /** Maximum value that is required to be stored in the control block
+ * counters. Any value greater than this will be capped at the horizon.
+ */
+ u32 horizon;
+
+ /** Size of the compressed control block in bytes. This is what is written
+ * out to stream state at stream boundaries. */
+ u32 packedCtrlSize;
+
+ /** Size of the repeat state block in bytes. This is where the REPEAT_RANGE
+ * vector and REPEAT_RING multibit are stored, in stream state, and they
+ * are manipulated directly (i.e. not copied at stream boundaries). */
+ u32 stateSize;
+
+ /** How soon after one trigger we can see the next trigger.
+ * Used by REPEAT_SPARSE_OPTIMAL_P. */
+ u32 minPeriod;
+
+ /** Packed control block field sizes (in bits), used by REPEAT_TRAILER. */
+ u32 packedFieldSizes[2];
+
+ /* Number of patches, used by REPEAT_SPARSE_OPTIMAL_P. */
+ u32 patchCount;
+
+ /* Optimal patch length, used by REPEAT_SPARSE_OPTIMAL_P. */
+ u32 patchSize;
+
+ /* Encoding patch length in bytes, used by REPEAT_SPARSE_OPTIMAL_P. */
+ u32 encodingSize;
+
+ /* RepeatInfo struct length including table size. */
+ u32 length;
+
+ /** Offset of patches relative to the start of repeat stream state,
+ * used by REPEAT_SPARSE_OPTIMAL_P. */
+ u32 patchesOffset;
+};
+
+/** Runtime control block structure for ::REPEAT_RING and
+ * ::REPEAT_SPARSE_OPTIMAL_P bounded repeats. Note that this struct is packed
+ * (may not be aligned). */
+struct RepeatRingControl {
+ u64a offset; //!< index of first top.
+ u16 first; //!< start index in ring.
+ u16 last; //!< end index in ring.
+};
+
+/** Runtime control block structure for ::REPEAT_RANGE bounded repeats. Note
+ * that this struct is packed (may not be aligned). */
+struct RepeatRangeControl {
+ u64a offset; //!< index of first top.
+ u8 num; //!< number of elements in array.
+};
+
+/** Runtime control block structure for cases where only a single offset is
+ * needed to track the repeat, both ::REPEAT_FIRST and ::REPEAT_LAST. Note that
+ * this struct is packed (may not be aligned). */
+struct RepeatOffsetControl {
+ u64a offset; //!< index of a top.
+};
+
+/** Runtime control block structure for ::REPEAT_BITMAP bounded repeats. */
+struct RepeatBitmapControl {
+ u64a offset; //!< index of first top.
+ u64a bitmap; //!< forward bitmap of tops relative to base offset.
+};
+
+/** Runtime control block structure for ::REPEAT_TRAILER bounded repeats. */
+struct RepeatTrailerControl {
+ u64a offset; //!< min extent of most recent match window.
+ u64a bitmap; //!< trailing bitmap of earlier matches, relative to offset.
+};
+
+/** \brief Union of control block types, used at runtime. */
+union RepeatControl {
+ struct RepeatRingControl ring;
+ struct RepeatRangeControl range;
+ struct RepeatOffsetControl offset;
+ struct RepeatBitmapControl bitmap;
+ struct RepeatTrailerControl trailer;
+};
+
+/** For debugging, returns the name of a repeat model. */
+static really_inline UNUSED
+const char *repeatTypeName(u8 type) {
+ switch ((enum RepeatType)type) {
+ case REPEAT_RING:
+ return "RING";
+ case REPEAT_FIRST:
+ return "FIRST";
+ case REPEAT_LAST:
+ return "LAST";
+ case REPEAT_RANGE:
+ return "RANGE";
+ case REPEAT_BITMAP:
+ return "BITMAP";
+ case REPEAT_SPARSE_OPTIMAL_P:
+ return "SPARSE_OPTIMAL_P";
+ case REPEAT_TRAILER:
+ return "TRAILER";
case REPEAT_ALWAYS:
return "ALWAYS";
- }
- assert(0);
- return "UNKNOWN";
-}
-
-#endif // REPEAT_INTERNAL_H
+ }
+ assert(0);
+ return "UNKNOWN";
+}
+
+#endif // REPEAT_INTERNAL_H
diff --git a/contrib/libs/hyperscan/src/nfa/repeatcompile.cpp b/contrib/libs/hyperscan/src/nfa/repeatcompile.cpp
index 48d96bfa06..934dd29e6b 100644
--- a/contrib/libs/hyperscan/src/nfa/repeatcompile.cpp
+++ b/contrib/libs/hyperscan/src/nfa/repeatcompile.cpp
@@ -1,211 +1,211 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Bounded repeat compile-time code.
- */
-#include "repeatcompile.h"
-#include "util/bitutils.h"
-#include "util/charreach.h"
-#include "util/depth.h"
-#include "util/dump_charclass.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Bounded repeat compile-time code.
+ */
+#include "repeatcompile.h"
+#include "util/bitutils.h"
+#include "util/charreach.h"
+#include "util/depth.h"
+#include "util/dump_charclass.h"
#include "util/multibit_build.h"
-#include "util/verify_types.h"
-
-#include <algorithm>
-#include <cstring> // memset
-#include <utility>
-
-using namespace std;
-
-namespace ue2 {
-
-/** \brief Calculate the number of slots required to store the given repeat in
- * a RANGE model. */
-static
-u32 numRangeSlots(u32 repeatMin, u32 repeatMax) {
- assert(repeatMax > repeatMin);
-
- u32 d = repeatMax - repeatMin;
- u32 slots = 2 * ((repeatMax / d) + 1);
- return slots;
-}
-
-static
-u32 calcPackedBits(u64a val) {
- assert(val);
- if (val <= 1) {
- return 1;
- }
- u32 bits = lg2_64(val - 1) + 1U; /* lg2 rounds down */
- DEBUG_PRINTF("packing %llu into %u bits\n", val, bits);
- return bits;
-}
-
-/* returns the min number of bytes required to represent val options */
-u32 calcPackedBytes(u64a val) {
- u32 rv = (calcPackedBits(val) + 7U) / 8U;
- DEBUG_PRINTF("packing %llu into %u bytes\n", val, rv);
- return rv;
-}
-
-static
-u32 repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax,
- const u32 minPeriod) {
- u32 repeatTmp = info->patchCount > 2 ? 64 : (u32)repeatMax;
- u32 repeat_index = repeatTmp < minPeriod ? repeatTmp : minPeriod;
- for (u32 i = 0; i <= repeat_index; i++) {
- info->table.push_back(i + 1);
- }
- for (u32 i = minPeriod + 1; i <= repeatTmp; i++) {
- info->table.push_back(info->table[i - 1] + info->table[i - minPeriod]);
- if (info->table[i] < info->table[i - 1]) {
- return i - 1;
- }
- }
- return 0;
-}
-
-static
-u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax,
- const u32 minPeriod, u32 rv) {
- u32 cnt = 0;
- u32 patch_bits = 0;
- u32 total_size = 0;
- u32 min = ~0U;
- u32 patch_len = 0;
-
- if (!rv) {
- rv = repeatMax;
- }
-
- for (u32 i = minPeriod; i <= rv; i++) {
- cnt = ((u32)repeatMax + (i - 1)) / i + 1;
-
- // no bit packing version
- patch_bits = calcPackedBits(info->table[i]);
- total_size = (patch_bits + 7U) / 8U * cnt;
-
- if (total_size < min) {
- patch_len = i;
- min = total_size;
- info->patchCount = cnt;
- }
- }
- return patch_len;
-}
-
-RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin,
- const depth &repeatMax, u32 minPeriod)
- : stateSize(0), packedCtrlSize(0), horizon(0), patchCount(0),
- patchSize(0), encodingSize(0), patchesOffset(0) {
- assert(repeatMin <= repeatMax);
- assert(repeatMax.is_reachable());
- assert(minPeriod || type != REPEAT_SPARSE_OPTIMAL_P);
-
- switch (type) {
- case REPEAT_FIRST:
- assert(repeatMin.is_finite());
- stateSize = 0; // everything is in the control block.
- horizon = repeatMin;
- packedCtrlSize = calcPackedBytes(horizon + 1);
- break;
- case REPEAT_LAST:
- assert(repeatMax.is_finite());
- stateSize = 0; // everything is in the control block.
- horizon = repeatMax + 1;
- packedCtrlSize = calcPackedBytes(horizon + 1);
- break;
- case REPEAT_RING:
- assert(repeatMax.is_finite());
- stateSize = mmbit_size(repeatMax + 1);
- horizon = repeatMax * 2 + 1; /* TODO: investigate tightening */
- // Packed offset member, plus two bytes for each ring index, reduced to
- // one byte each if they'll fit in eight bits.
- {
- u32 offset_len = calcPackedBytes(horizon + 1);
- u32 ring_indices_len = repeatMax < depth(254) ? 2 : 4;
- packedCtrlSize = offset_len + ring_indices_len;
- }
- break;
- case REPEAT_RANGE:
- assert(repeatMax.is_finite());
- assert(repeatMin < repeatMax);
- stateSize = numRangeSlots(repeatMin, repeatMax) * sizeof(u16);
- horizon = repeatMax * 2 + 1;
- // Packed offset member, plus one byte for the number of range
- // elements.
- packedCtrlSize = calcPackedBytes(horizon + 1) + 1;
- break;
- case REPEAT_BITMAP:
- stateSize = 0; // everything is in the control block.
- horizon = 0; // unused
- packedCtrlSize = ROUNDUP_N(repeatMax + 1, 8) / 8;
- break;
- case REPEAT_SPARSE_OPTIMAL_P:
- assert(minPeriod);
- assert(repeatMax.is_finite());
- {
- u32 rv = repeatRecurTable(this, repeatMax, minPeriod);
- u32 repeatTmp = 0;
- if ((u32)repeatMax < minPeriod) {
- repeatTmp = repeatMax;
- patchCount = 1;
- } else {
- // find optimal patch size
- repeatTmp =
- findOptimalPatchSize(this, repeatMax, minPeriod, rv);
- assert(patchCount < 65536);
- }
- DEBUG_PRINTF("repeat[%u %u], period=%u\n", (u32)repeatMin,
- (u32)repeatMax, minPeriod);
- u64a maxVal = table[repeatTmp];
- encodingSize = calcPackedBytes(maxVal);
- patchSize = repeatTmp;
- assert(encodingSize <= 64);
-
- patchesOffset = mmbit_size(patchCount);
- stateSize = patchesOffset + encodingSize * patchCount;
- horizon = (repeatTmp * patchCount) * 2 + 1;
- u32 ring_indices_len = patchCount < depth(254) ? 2 : 4;
- packedCtrlSize = calcPackedBytes(horizon + 1) + ring_indices_len;
- }
- break;
- case REPEAT_TRAILER:
- assert(repeatMax.is_finite());
- assert(repeatMin <= depth(64));
- stateSize = 0; // everything is in the control block.
- horizon = repeatMax + 1;
- packedFieldSizes.resize(2);
- packedFieldSizes[0] = calcPackedBits(horizon + 1);
- packedFieldSizes[1] = repeatMin;
- packedCtrlSize = (packedFieldSizes[0] + packedFieldSizes[1] + 7U) / 8U;
- break;
+#include "util/verify_types.h"
+
+#include <algorithm>
+#include <cstring> // memset
+#include <utility>
+
+using namespace std;
+
+namespace ue2 {
+
+/** \brief Calculate the number of slots required to store the given repeat in
+ * a RANGE model. */
+static
+u32 numRangeSlots(u32 repeatMin, u32 repeatMax) {
+ assert(repeatMax > repeatMin);
+
+ u32 d = repeatMax - repeatMin;
+ u32 slots = 2 * ((repeatMax / d) + 1);
+ return slots;
+}
+
+static
+u32 calcPackedBits(u64a val) {
+ assert(val);
+ if (val <= 1) {
+ return 1;
+ }
+ u32 bits = lg2_64(val - 1) + 1U; /* lg2 rounds down */
+ DEBUG_PRINTF("packing %llu into %u bits\n", val, bits);
+ return bits;
+}
+
+/* returns the min number of bytes required to represent val options */
+u32 calcPackedBytes(u64a val) {
+ u32 rv = (calcPackedBits(val) + 7U) / 8U;
+ DEBUG_PRINTF("packing %llu into %u bytes\n", val, rv);
+ return rv;
+}
+
+static
+u32 repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax,
+ const u32 minPeriod) {
+ u32 repeatTmp = info->patchCount > 2 ? 64 : (u32)repeatMax;
+ u32 repeat_index = repeatTmp < minPeriod ? repeatTmp : minPeriod;
+ for (u32 i = 0; i <= repeat_index; i++) {
+ info->table.push_back(i + 1);
+ }
+ for (u32 i = minPeriod + 1; i <= repeatTmp; i++) {
+ info->table.push_back(info->table[i - 1] + info->table[i - minPeriod]);
+ if (info->table[i] < info->table[i - 1]) {
+ return i - 1;
+ }
+ }
+ return 0;
+}
+
+static
+u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax,
+ const u32 minPeriod, u32 rv) {
+ u32 cnt = 0;
+ u32 patch_bits = 0;
+ u32 total_size = 0;
+ u32 min = ~0U;
+ u32 patch_len = 0;
+
+ if (!rv) {
+ rv = repeatMax;
+ }
+
+ for (u32 i = minPeriod; i <= rv; i++) {
+ cnt = ((u32)repeatMax + (i - 1)) / i + 1;
+
+ // no bit packing version
+ patch_bits = calcPackedBits(info->table[i]);
+ total_size = (patch_bits + 7U) / 8U * cnt;
+
+ if (total_size < min) {
+ patch_len = i;
+ min = total_size;
+ info->patchCount = cnt;
+ }
+ }
+ return patch_len;
+}
+
+RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin,
+ const depth &repeatMax, u32 minPeriod)
+ : stateSize(0), packedCtrlSize(0), horizon(0), patchCount(0),
+ patchSize(0), encodingSize(0), patchesOffset(0) {
+ assert(repeatMin <= repeatMax);
+ assert(repeatMax.is_reachable());
+ assert(minPeriod || type != REPEAT_SPARSE_OPTIMAL_P);
+
+ switch (type) {
+ case REPEAT_FIRST:
+ assert(repeatMin.is_finite());
+ stateSize = 0; // everything is in the control block.
+ horizon = repeatMin;
+ packedCtrlSize = calcPackedBytes(horizon + 1);
+ break;
+ case REPEAT_LAST:
+ assert(repeatMax.is_finite());
+ stateSize = 0; // everything is in the control block.
+ horizon = repeatMax + 1;
+ packedCtrlSize = calcPackedBytes(horizon + 1);
+ break;
+ case REPEAT_RING:
+ assert(repeatMax.is_finite());
+ stateSize = mmbit_size(repeatMax + 1);
+ horizon = repeatMax * 2 + 1; /* TODO: investigate tightening */
+ // Packed offset member, plus two bytes for each ring index, reduced to
+ // one byte each if they'll fit in eight bits.
+ {
+ u32 offset_len = calcPackedBytes(horizon + 1);
+ u32 ring_indices_len = repeatMax < depth(254) ? 2 : 4;
+ packedCtrlSize = offset_len + ring_indices_len;
+ }
+ break;
+ case REPEAT_RANGE:
+ assert(repeatMax.is_finite());
+ assert(repeatMin < repeatMax);
+ stateSize = numRangeSlots(repeatMin, repeatMax) * sizeof(u16);
+ horizon = repeatMax * 2 + 1;
+ // Packed offset member, plus one byte for the number of range
+ // elements.
+ packedCtrlSize = calcPackedBytes(horizon + 1) + 1;
+ break;
+ case REPEAT_BITMAP:
+ stateSize = 0; // everything is in the control block.
+ horizon = 0; // unused
+ packedCtrlSize = ROUNDUP_N(repeatMax + 1, 8) / 8;
+ break;
+ case REPEAT_SPARSE_OPTIMAL_P:
+ assert(minPeriod);
+ assert(repeatMax.is_finite());
+ {
+ u32 rv = repeatRecurTable(this, repeatMax, minPeriod);
+ u32 repeatTmp = 0;
+ if ((u32)repeatMax < minPeriod) {
+ repeatTmp = repeatMax;
+ patchCount = 1;
+ } else {
+ // find optimal patch size
+ repeatTmp =
+ findOptimalPatchSize(this, repeatMax, minPeriod, rv);
+ assert(patchCount < 65536);
+ }
+ DEBUG_PRINTF("repeat[%u %u], period=%u\n", (u32)repeatMin,
+ (u32)repeatMax, minPeriod);
+ u64a maxVal = table[repeatTmp];
+ encodingSize = calcPackedBytes(maxVal);
+ patchSize = repeatTmp;
+ assert(encodingSize <= 64);
+
+ patchesOffset = mmbit_size(patchCount);
+ stateSize = patchesOffset + encodingSize * patchCount;
+ horizon = (repeatTmp * patchCount) * 2 + 1;
+ u32 ring_indices_len = patchCount < depth(254) ? 2 : 4;
+ packedCtrlSize = calcPackedBytes(horizon + 1) + ring_indices_len;
+ }
+ break;
+ case REPEAT_TRAILER:
+ assert(repeatMax.is_finite());
+ assert(repeatMin <= depth(64));
+ stateSize = 0; // everything is in the control block.
+ horizon = repeatMax + 1;
+ packedFieldSizes.resize(2);
+ packedFieldSizes[0] = calcPackedBits(horizon + 1);
+ packedFieldSizes[1] = repeatMin;
+ packedCtrlSize = (packedFieldSizes[0] + packedFieldSizes[1] + 7U) / 8U;
+ break;
case REPEAT_ALWAYS:
assert(repeatMin == 0ULL);
assert(repeatMax.is_infinite());
@@ -213,174 +213,174 @@ RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin,
horizon = 0;
packedCtrlSize = 0;
break;
- }
- DEBUG_PRINTF("stateSize=%u, packedCtrlSize=%u, horizon=%u\n", stateSize,
- packedCtrlSize, horizon);
-
- assert(packedCtrlSize <= sizeof(RepeatControl));
-}
-
-/** \brief Returns the packed control block size in bytes for a given bounded
- * repeat. */
-static
-u32 packedSize(enum RepeatType type, const depth &repeatMin,
- const depth &repeatMax, u32 minPeriod) {
- RepeatStateInfo rsi(type, repeatMin, repeatMax, minPeriod);
- return rsi.packedCtrlSize;
-}
-
-/** \brief Returns the stream state size in bytes for a given bounded
- * repeat. */
-static
-u32 streamStateSize(enum RepeatType type, const depth &repeatMin,
- const depth &repeatMax, u32 minPeriod) {
- RepeatStateInfo rsi(type, repeatMin, repeatMax, minPeriod);
- return rsi.stateSize;
-}
-
-enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax,
+ }
+ DEBUG_PRINTF("stateSize=%u, packedCtrlSize=%u, horizon=%u\n", stateSize,
+ packedCtrlSize, horizon);
+
+ assert(packedCtrlSize <= sizeof(RepeatControl));
+}
+
+/** \brief Returns the packed control block size in bytes for a given bounded
+ * repeat. */
+static
+u32 packedSize(enum RepeatType type, const depth &repeatMin,
+ const depth &repeatMax, u32 minPeriod) {
+ RepeatStateInfo rsi(type, repeatMin, repeatMax, minPeriod);
+ return rsi.packedCtrlSize;
+}
+
+/** \brief Returns the stream state size in bytes for a given bounded
+ * repeat. */
+static
+u32 streamStateSize(enum RepeatType type, const depth &repeatMin,
+ const depth &repeatMax, u32 minPeriod) {
+ RepeatStateInfo rsi(type, repeatMin, repeatMax, minPeriod);
+ return rsi.stateSize;
+}
+
+enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax,
u32 minPeriod, bool is_reset,
bool has_external_guard) {
- if (repeatMax.is_infinite()) {
+ if (repeatMax.is_infinite()) {
if (has_external_guard && !repeatMin) {
return REPEAT_ALWAYS;
} else {
return REPEAT_FIRST;
}
- }
-
- if (repeatMin == depth(0) || is_reset) {
- return REPEAT_LAST;
- }
-
- // Cases with max < 64 can be handled with either bitmap or trailer. We use
- // whichever has smaller packed state.
-
- if (repeatMax < depth(64)) {
- u32 bitmap_len =
- packedSize(REPEAT_BITMAP, repeatMin, repeatMax, minPeriod);
- u32 trailer_len =
- packedSize(REPEAT_TRAILER, repeatMin, repeatMax, minPeriod);
- return bitmap_len <= trailer_len ? REPEAT_BITMAP : REPEAT_TRAILER;
- }
-
- if (repeatMin <= depth(64)) {
- return REPEAT_TRAILER;
- }
-
- u32 range_len = ~0U;
- if (repeatMax > repeatMin &&
- numRangeSlots(repeatMin, repeatMax) <= REPEAT_RANGE_MAX_SLOTS) {
- assert(numRangeSlots(repeatMin, repeatMax) < 256); // stored in u8
- range_len =
- streamStateSize(REPEAT_RANGE, repeatMin, repeatMax, minPeriod);
- }
-
- assert(repeatMax.is_finite());
-
- u32 sparse_len = ~0U;
- if (minPeriod > 6) {
- sparse_len =
- streamStateSize(REPEAT_SPARSE_OPTIMAL_P, repeatMin, repeatMax, minPeriod);
- }
-
- if (range_len != ~0U || sparse_len != ~0U) {
- return range_len < sparse_len ? REPEAT_RANGE : REPEAT_SPARSE_OPTIMAL_P;
- }
-
- return REPEAT_RING;
-}
-
-bool matches(vector<CharReach>::const_iterator a_it,
- vector<CharReach>::const_iterator a_ite,
- vector<CharReach>::const_iterator b_it,
- UNUSED vector<CharReach>::const_iterator b_ite) {
- for (; a_it != a_ite; ++a_it, ++b_it) {
- assert(b_it != b_ite);
- if ((*a_it & *b_it).none()) {
- return false;
- }
- }
- assert(b_it == b_ite);
- return true;
-}
-
-static
-u32 minDistAfterA(const vector<CharReach> &a, const vector<CharReach> &b) {
- /* we do not count the case where b can end at the same position as a */
-
- for (u32 i = 1; i < b.size(); i++) {
- u32 overlap_len = b.size() - i;
- if (overlap_len <= a.size()) {
- if (matches(a.end() - overlap_len, a.end(),
- b.begin(), b.end() - i)) {
- return i;
- }
- } else {
- assert(overlap_len > a.size());
- if (matches(a.begin(), a.end(),
- b.end() - i - a.size(), b.end() - i)) {
- return i;
- }
- }
- }
-
- return b.size();
-}
-
-vector<size_t> minResetDistToEnd(const vector<vector<CharReach>> &triggers,
- const CharReach &cr) {
- /* if a trigger does not reset the repeat, it gets a distance of trigger
- length */
- vector<size_t> out;
- for (const auto &trig : triggers) {
- size_t size = trig.size();
- size_t i = 0;
- for (; i < size; i++) {
- if ((trig[size - i - 1] & cr).none()) {
- break;
- }
- }
- out.push_back(i);
- }
-
- return out;
-}
-
-#if defined(DEBUG) || defined(DUMP_SUPPORT)
-
-static UNUSED
-string dumpTrigger(const vector<CharReach> &trigger) {
- string s;
- for (const auto &cr : trigger) {
- s += describeClass(cr);
- }
- return s;
-}
-
-#endif
-
-u32 minPeriod(const vector<vector<CharReach>> &triggers, const CharReach &cr,
- bool *can_reset) {
- assert(!triggers.empty());
-
- u32 rv = ~0U;
- *can_reset = true;
- vector<size_t> min_reset_dist = minResetDistToEnd(triggers, cr);
-
- for (const auto &trigger : triggers) {
- DEBUG_PRINTF("trigger: %s\n", dumpTrigger(trigger).c_str());
- for (size_t j = 0; j < triggers.size(); j++) {
- u32 min_ext = minDistAfterA(trigger, triggers[j]);
- rv = min(rv, min_ext);
- if (min_ext <= min_reset_dist[j]) {
- *can_reset = false;
- }
- }
- }
-
- DEBUG_PRINTF("min period %u\n", rv);
- return rv;
-}
-
-} // namespace ue2
+ }
+
+ if (repeatMin == depth(0) || is_reset) {
+ return REPEAT_LAST;
+ }
+
+ // Cases with max < 64 can be handled with either bitmap or trailer. We use
+ // whichever has smaller packed state.
+
+ if (repeatMax < depth(64)) {
+ u32 bitmap_len =
+ packedSize(REPEAT_BITMAP, repeatMin, repeatMax, minPeriod);
+ u32 trailer_len =
+ packedSize(REPEAT_TRAILER, repeatMin, repeatMax, minPeriod);
+ return bitmap_len <= trailer_len ? REPEAT_BITMAP : REPEAT_TRAILER;
+ }
+
+ if (repeatMin <= depth(64)) {
+ return REPEAT_TRAILER;
+ }
+
+ u32 range_len = ~0U;
+ if (repeatMax > repeatMin &&
+ numRangeSlots(repeatMin, repeatMax) <= REPEAT_RANGE_MAX_SLOTS) {
+ assert(numRangeSlots(repeatMin, repeatMax) < 256); // stored in u8
+ range_len =
+ streamStateSize(REPEAT_RANGE, repeatMin, repeatMax, minPeriod);
+ }
+
+ assert(repeatMax.is_finite());
+
+ u32 sparse_len = ~0U;
+ if (minPeriod > 6) {
+ sparse_len =
+ streamStateSize(REPEAT_SPARSE_OPTIMAL_P, repeatMin, repeatMax, minPeriod);
+ }
+
+ if (range_len != ~0U || sparse_len != ~0U) {
+ return range_len < sparse_len ? REPEAT_RANGE : REPEAT_SPARSE_OPTIMAL_P;
+ }
+
+ return REPEAT_RING;
+}
+
+bool matches(vector<CharReach>::const_iterator a_it,
+ vector<CharReach>::const_iterator a_ite,
+ vector<CharReach>::const_iterator b_it,
+ UNUSED vector<CharReach>::const_iterator b_ite) {
+ for (; a_it != a_ite; ++a_it, ++b_it) {
+ assert(b_it != b_ite);
+ if ((*a_it & *b_it).none()) {
+ return false;
+ }
+ }
+ assert(b_it == b_ite);
+ return true;
+}
+
+static
+u32 minDistAfterA(const vector<CharReach> &a, const vector<CharReach> &b) {
+ /* we do not count the case where b can end at the same position as a */
+
+ for (u32 i = 1; i < b.size(); i++) {
+ u32 overlap_len = b.size() - i;
+ if (overlap_len <= a.size()) {
+ if (matches(a.end() - overlap_len, a.end(),
+ b.begin(), b.end() - i)) {
+ return i;
+ }
+ } else {
+ assert(overlap_len > a.size());
+ if (matches(a.begin(), a.end(),
+ b.end() - i - a.size(), b.end() - i)) {
+ return i;
+ }
+ }
+ }
+
+ return b.size();
+}
+
+vector<size_t> minResetDistToEnd(const vector<vector<CharReach>> &triggers,
+ const CharReach &cr) {
+ /* if a trigger does not reset the repeat, it gets a distance of trigger
+ length */
+ vector<size_t> out;
+ for (const auto &trig : triggers) {
+ size_t size = trig.size();
+ size_t i = 0;
+ for (; i < size; i++) {
+ if ((trig[size - i - 1] & cr).none()) {
+ break;
+ }
+ }
+ out.push_back(i);
+ }
+
+ return out;
+}
+
+#if defined(DEBUG) || defined(DUMP_SUPPORT)
+
+static UNUSED
+string dumpTrigger(const vector<CharReach> &trigger) {
+ string s;
+ for (const auto &cr : trigger) {
+ s += describeClass(cr);
+ }
+ return s;
+}
+
+#endif
+
+u32 minPeriod(const vector<vector<CharReach>> &triggers, const CharReach &cr,
+ bool *can_reset) {
+ assert(!triggers.empty());
+
+ u32 rv = ~0U;
+ *can_reset = true;
+ vector<size_t> min_reset_dist = minResetDistToEnd(triggers, cr);
+
+ for (const auto &trigger : triggers) {
+ DEBUG_PRINTF("trigger: %s\n", dumpTrigger(trigger).c_str());
+ for (size_t j = 0; j < triggers.size(); j++) {
+ u32 min_ext = minDistAfterA(trigger, triggers[j]);
+ rv = min(rv, min_ext);
+ if (min_ext <= min_reset_dist[j]) {
+ *can_reset = false;
+ }
+ }
+ }
+
+ DEBUG_PRINTF("min period %u\n", rv);
+ return rv;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfa/repeatcompile.h b/contrib/libs/hyperscan/src/nfa/repeatcompile.h
index f3d2df92a0..fe9a710623 100644
--- a/contrib/libs/hyperscan/src/nfa/repeatcompile.h
+++ b/contrib/libs/hyperscan/src/nfa/repeatcompile.h
@@ -1,90 +1,90 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Bounded repeat compile-time code.
- */
-
-#ifndef REPEATCOMPILE_H
-#define REPEATCOMPILE_H
-
-#include "repeat_internal.h"
-
-#include <cstdint>
-#include <utility>
-#include <vector>
-
-namespace ue2 {
-
-class CharReach;
-class depth;
-
-/**
- * \brief Structure representing the various state requirements for a given
- * bounded repeat.
- */
-struct RepeatStateInfo {
- RepeatStateInfo(enum RepeatType type, const depth &repeatMin,
- const depth &repeatMax, u32 minPeriod);
-
- u32 stateSize;
- u32 packedCtrlSize;
- u32 horizon;
- u32 patchCount;
- u32 patchSize;
- u32 encodingSize;
- u32 patchesOffset;
- std::vector<u32> packedFieldSizes;
- std::vector<uint64_t> table; // not u64a, for boost/gcc-4.9
-};
-
-/**
- * \brief Given the parameters of a repeat, choose a repeat implementation
- * type.
- */
-enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax,
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Bounded repeat compile-time code.
+ */
+
+#ifndef REPEATCOMPILE_H
+#define REPEATCOMPILE_H
+
+#include "repeat_internal.h"
+
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+namespace ue2 {
+
+class CharReach;
+class depth;
+
+/**
+ * \brief Structure representing the various state requirements for a given
+ * bounded repeat.
+ */
+struct RepeatStateInfo {
+ RepeatStateInfo(enum RepeatType type, const depth &repeatMin,
+ const depth &repeatMax, u32 minPeriod);
+
+ u32 stateSize;
+ u32 packedCtrlSize;
+ u32 horizon;
+ u32 patchCount;
+ u32 patchSize;
+ u32 encodingSize;
+ u32 patchesOffset;
+ std::vector<u32> packedFieldSizes;
+ std::vector<uint64_t> table; // not u64a, for boost/gcc-4.9
+};
+
+/**
+ * \brief Given the parameters of a repeat, choose a repeat implementation
+ * type.
+ */
+enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax,
u32 minPeriod, bool is_reset,
bool has_external_guard = false);
-
-u32 calcPackedBytes(u64a val);
-
-bool matches(std::vector<CharReach>::const_iterator a_it,
- std::vector<CharReach>::const_iterator a_ite,
- std::vector<CharReach>::const_iterator b_it,
- std::vector<CharReach>::const_iterator b_ite);
-
-std::vector<size_t>
-minResetDistToEnd(const std::vector<std::vector<CharReach>> &triggers,
- const CharReach &cr);
-
-u32 minPeriod(const std::vector<std::vector<CharReach>> &triggers,
- const CharReach &cr, bool *can_reset);
-
-} // namespace ue2
-
-#endif // REPEATCOMPILE_H
+
+u32 calcPackedBytes(u64a val);
+
+bool matches(std::vector<CharReach>::const_iterator a_it,
+ std::vector<CharReach>::const_iterator a_ite,
+ std::vector<CharReach>::const_iterator b_it,
+ std::vector<CharReach>::const_iterator b_ite);
+
+std::vector<size_t>
+minResetDistToEnd(const std::vector<std::vector<CharReach>> &triggers,
+ const CharReach &cr);
+
+u32 minPeriod(const std::vector<std::vector<CharReach>> &triggers,
+ const CharReach &cr, bool *can_reset);
+
+} // namespace ue2
+
+#endif // REPEATCOMPILE_H
diff --git a/contrib/libs/hyperscan/src/nfa/shufti.c b/contrib/libs/hyperscan/src/nfa/shufti.c
index f2b3915c2a..09ffc0cf9a 100644
--- a/contrib/libs/hyperscan/src/nfa/shufti.c
+++ b/contrib/libs/hyperscan/src/nfa/shufti.c
@@ -1,82 +1,82 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Shufti: character class acceleration.
- *
- * Utilises the SSSE3 pshufb shuffle instruction
- */
-
-#include "shufti.h"
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Shufti: character class acceleration.
+ *
+ * Utilises the SSSE3 pshufb shuffle instruction
+ */
+
+#include "shufti.h"
+#include "ue2common.h"
#include "util/arch.h"
-#include "util/bitutils.h"
-#include "util/simd_utils.h"
-#include "util/unaligned.h"
-
-#ifdef DEBUG
-#include <ctype.h>
-
-#define DUMP_MSK(_t) \
-static UNUSED \
-void dumpMsk##_t(m##_t msk) { \
- u8 * mskAsU8 = (u8 *)&msk; \
- for (unsigned i = 0; i < sizeof(msk); i++) { \
- u8 c = mskAsU8[i]; \
- for (int j = 0; j < 8; j++) { \
- if ((c >> (7-j)) & 0x1) \
- printf("1"); \
- else \
- printf("0"); \
- } \
- printf(" "); \
- } \
-} \
-static UNUSED \
-void dumpMsk##_t##AsChars(m##_t msk) { \
- u8 * mskAsU8 = (u8 *)&msk; \
- for (unsigned i = 0; i < sizeof(msk); i++) { \
- u8 c = mskAsU8[i]; \
- if (isprint(c)) \
- printf("%c",c); \
- else \
- printf("."); \
- } \
-}
-
-#endif
-
+#include "util/bitutils.h"
+#include "util/simd_utils.h"
+#include "util/unaligned.h"
+
+#ifdef DEBUG
+#include <ctype.h>
+
+#define DUMP_MSK(_t) \
+static UNUSED \
+void dumpMsk##_t(m##_t msk) { \
+ u8 * mskAsU8 = (u8 *)&msk; \
+ for (unsigned i = 0; i < sizeof(msk); i++) { \
+ u8 c = mskAsU8[i]; \
+ for (int j = 0; j < 8; j++) { \
+ if ((c >> (7-j)) & 0x1) \
+ printf("1"); \
+ else \
+ printf("0"); \
+ } \
+ printf(" "); \
+ } \
+} \
+static UNUSED \
+void dumpMsk##_t##AsChars(m##_t msk) { \
+ u8 * mskAsU8 = (u8 *)&msk; \
+ for (unsigned i = 0; i < sizeof(msk); i++) { \
+ u8 c = mskAsU8[i]; \
+ if (isprint(c)) \
+ printf("%c",c); \
+ else \
+ printf("."); \
+ } \
+}
+
+#endif
+
/** \brief Naive byte-by-byte implementation. */
static really_inline
const u8 *shuftiFwdSlow(const u8 *lo, const u8 *hi, const u8 *buf,
const u8 *buf_end) {
assert(buf < buf_end);
-
+
for (; buf < buf_end; ++buf) {
u8 c = *buf;
if (lo[c & 0xf] & hi[c >> 4]) {
@@ -102,306 +102,306 @@ const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf,
}
#if !defined(HAVE_AVX2)
-/* Normal SSSE3 shufti */
-
+/* Normal SSSE3 shufti */
+
#ifdef DEBUG
DUMP_MSK(128)
#endif
-#define GET_LO_4(chars) and128(chars, low4bits)
+#define GET_LO_4(chars) and128(chars, low4bits)
#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4)
-
-static really_inline
+
+static really_inline
u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits,
const m128 compare) {
m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(chars));
m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(chars));
m128 t = and128(c_lo, c_hi);
-#ifdef DEBUG
+#ifdef DEBUG
DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n");
DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n");
DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n");
DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n");
DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n");
-#endif
+#endif
return movemask128(eq128(t, compare));
}
-
+
static really_inline
const u8 *firstMatch(const u8 *buf, u32 z) {
- if (unlikely(z != 0xffff)) {
- u32 pos = ctz32(~z & 0xffff);
- assert(pos < 16);
- return buf + pos;
- } else {
- return NULL; // no match
- }
-}
-
-static really_inline
-const u8 *fwdBlock(m128 mask_lo, m128 mask_hi, m128 chars, const u8 *buf,
- const m128 low4bits, const m128 zeroes) {
+ if (unlikely(z != 0xffff)) {
+ u32 pos = ctz32(~z & 0xffff);
+ assert(pos < 16);
+ return buf + pos;
+ } else {
+ return NULL; // no match
+ }
+}
+
+static really_inline
+const u8 *fwdBlock(m128 mask_lo, m128 mask_hi, m128 chars, const u8 *buf,
+ const m128 low4bits, const m128 zeroes) {
u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes);
-
+
return firstMatch(buf, z);
-}
-
-const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
- const u8 *buf_end) {
- assert(buf && buf_end);
- assert(buf < buf_end);
-
- // Slow path for small cases.
- if (buf_end - buf < 16) {
- return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi,
- buf, buf_end);
- }
-
- const m128 zeroes = zeroes128();
- const m128 low4bits = _mm_set1_epi8(0xf);
- const u8 *rv;
-
- size_t min = (size_t)buf % 16;
- assert(buf_end - buf >= 16);
-
- // Preconditioning: most of the time our buffer won't be aligned.
- m128 chars = loadu128(buf);
- rv = fwdBlock(mask_lo, mask_hi, chars, buf, low4bits, zeroes);
- if (rv) {
- return rv;
- }
- buf += (16 - min);
-
- // Unrolling was here, but it wasn't doing anything but taking up space.
- // Reroll FTW.
-
- const u8 *last_block = buf_end - 16;
- while (buf < last_block) {
- m128 lchars = load128(buf);
- rv = fwdBlock(mask_lo, mask_hi, lchars, buf, low4bits, zeroes);
- if (rv) {
- return rv;
- }
- buf += 16;
- }
-
- // Use an unaligned load to mop up the last 16 bytes and get an accurate
- // picture to buf_end.
- assert(buf <= buf_end && buf >= buf_end - 16);
- chars = loadu128(buf_end - 16);
- rv = fwdBlock(mask_lo, mask_hi, chars, buf_end - 16, low4bits, zeroes);
- if (rv) {
- return rv;
- }
-
- return buf_end;
-}
-
-static really_inline
-const u8 *lastMatch(const u8 *buf, m128 t, m128 compare) {
-#ifdef DEBUG
- DEBUG_PRINTF("confirming match in:"); dumpMsk128(t); printf("\n");
-#endif
-
- u32 z = movemask128(eq128(t, compare));
- if (unlikely(z != 0xffff)) {
- u32 pos = clz32(~z & 0xffff);
- DEBUG_PRINTF("buf=%p, pos=%u\n", buf, pos);
- assert(pos >= 16 && pos < 32);
- return buf + (31 - pos);
- } else {
- return NULL; // no match
- }
-}
-
-
-static really_inline
-const u8 *revBlock(m128 mask_lo, m128 mask_hi, m128 chars, const u8 *buf,
- const m128 low4bits, const m128 zeroes) {
+}
+
+const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
+ const u8 *buf_end) {
+ assert(buf && buf_end);
+ assert(buf < buf_end);
+
+ // Slow path for small cases.
+ if (buf_end - buf < 16) {
+ return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi,
+ buf, buf_end);
+ }
+
+ const m128 zeroes = zeroes128();
+ const m128 low4bits = _mm_set1_epi8(0xf);
+ const u8 *rv;
+
+ size_t min = (size_t)buf % 16;
+ assert(buf_end - buf >= 16);
+
+ // Preconditioning: most of the time our buffer won't be aligned.
+ m128 chars = loadu128(buf);
+ rv = fwdBlock(mask_lo, mask_hi, chars, buf, low4bits, zeroes);
+ if (rv) {
+ return rv;
+ }
+ buf += (16 - min);
+
+ // Unrolling was here, but it wasn't doing anything but taking up space.
+ // Reroll FTW.
+
+ const u8 *last_block = buf_end - 16;
+ while (buf < last_block) {
+ m128 lchars = load128(buf);
+ rv = fwdBlock(mask_lo, mask_hi, lchars, buf, low4bits, zeroes);
+ if (rv) {
+ return rv;
+ }
+ buf += 16;
+ }
+
+ // Use an unaligned load to mop up the last 16 bytes and get an accurate
+ // picture to buf_end.
+ assert(buf <= buf_end && buf >= buf_end - 16);
+ chars = loadu128(buf_end - 16);
+ rv = fwdBlock(mask_lo, mask_hi, chars, buf_end - 16, low4bits, zeroes);
+ if (rv) {
+ return rv;
+ }
+
+ return buf_end;
+}
+
+static really_inline
+const u8 *lastMatch(const u8 *buf, m128 t, m128 compare) {
+#ifdef DEBUG
+ DEBUG_PRINTF("confirming match in:"); dumpMsk128(t); printf("\n");
+#endif
+
+ u32 z = movemask128(eq128(t, compare));
+ if (unlikely(z != 0xffff)) {
+ u32 pos = clz32(~z & 0xffff);
+ DEBUG_PRINTF("buf=%p, pos=%u\n", buf, pos);
+ assert(pos >= 16 && pos < 32);
+ return buf + (31 - pos);
+ } else {
+ return NULL; // no match
+ }
+}
+
+
+static really_inline
+const u8 *revBlock(m128 mask_lo, m128 mask_hi, m128 chars, const u8 *buf,
+ const m128 low4bits, const m128 zeroes) {
m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(chars));
m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(chars));
- m128 t = and128(c_lo, c_hi);
-
-#ifdef DEBUG
- DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n");
- DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n");
- DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n");
- DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n");
- DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n");
-#endif
-
- return lastMatch(buf, t, zeroes);
-}
-
-const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
- const u8 *buf_end) {
- assert(buf && buf_end);
- assert(buf < buf_end);
-
- // Slow path for small cases.
- if (buf_end - buf < 16) {
- return shuftiRevSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi,
- buf, buf_end);
- }
-
- const m128 zeroes = zeroes128();
- const m128 low4bits = _mm_set1_epi8(0xf);
- const u8 *rv;
-
- assert(buf_end - buf >= 16);
-
- // Preconditioning: most of the time our buffer won't be aligned.
- m128 chars = loadu128(buf_end - 16);
- rv = revBlock(mask_lo, mask_hi, chars, buf_end - 16, low4bits, zeroes);
- if (rv) {
- return rv;
- }
- buf_end = (const u8 *)((size_t)buf_end & ~((size_t)0xf));
-
- // Unrolling was here, but it wasn't doing anything but taking up space.
- // Reroll FTW.
-
- const u8 *last_block = buf + 16;
- while (buf_end > last_block) {
- buf_end -= 16;
- m128 lchars = load128(buf_end);
- rv = revBlock(mask_lo, mask_hi, lchars, buf_end, low4bits, zeroes);
- if (rv) {
- return rv;
- }
- }
-
- // Use an unaligned load to mop up the last 16 bytes and get an accurate
- // picture to buf.
- chars = loadu128(buf);
- rv = revBlock(mask_lo, mask_hi, chars, buf, low4bits, zeroes);
- if (rv) {
- return rv;
- }
-
- return buf - 1;
-}
-
-static really_inline
-const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi,
- m128 chars, const u8 *buf, const m128 low4bits,
- const m128 ones) {
- m128 chars_lo = GET_LO_4(chars);
- m128 chars_hi = GET_HI_4(chars);
+ m128 t = and128(c_lo, c_hi);
+
+#ifdef DEBUG
+ DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n");
+ DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n");
+ DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n");
+ DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n");
+ DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n");
+#endif
+
+ return lastMatch(buf, t, zeroes);
+}
+
+const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
+ const u8 *buf_end) {
+ assert(buf && buf_end);
+ assert(buf < buf_end);
+
+ // Slow path for small cases.
+ if (buf_end - buf < 16) {
+ return shuftiRevSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi,
+ buf, buf_end);
+ }
+
+ const m128 zeroes = zeroes128();
+ const m128 low4bits = _mm_set1_epi8(0xf);
+ const u8 *rv;
+
+ assert(buf_end - buf >= 16);
+
+ // Preconditioning: most of the time our buffer won't be aligned.
+ m128 chars = loadu128(buf_end - 16);
+ rv = revBlock(mask_lo, mask_hi, chars, buf_end - 16, low4bits, zeroes);
+ if (rv) {
+ return rv;
+ }
+ buf_end = (const u8 *)((size_t)buf_end & ~((size_t)0xf));
+
+ // Unrolling was here, but it wasn't doing anything but taking up space.
+ // Reroll FTW.
+
+ const u8 *last_block = buf + 16;
+ while (buf_end > last_block) {
+ buf_end -= 16;
+ m128 lchars = load128(buf_end);
+ rv = revBlock(mask_lo, mask_hi, lchars, buf_end, low4bits, zeroes);
+ if (rv) {
+ return rv;
+ }
+ }
+
+ // Use an unaligned load to mop up the last 16 bytes and get an accurate
+ // picture to buf.
+ chars = loadu128(buf);
+ rv = revBlock(mask_lo, mask_hi, chars, buf, low4bits, zeroes);
+ if (rv) {
+ return rv;
+ }
+
+ return buf - 1;
+}
+
+static really_inline
+const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi,
+ m128 chars, const u8 *buf, const m128 low4bits,
+ const m128 ones) {
+ m128 chars_lo = GET_LO_4(chars);
+ m128 chars_hi = GET_HI_4(chars);
m128 c_lo = pshufb_m128(mask1_lo, chars_lo);
m128 c_hi = pshufb_m128(mask1_hi, chars_hi);
- m128 t = or128(c_lo, c_hi);
-
-#ifdef DEBUG
- DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n");
- DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n");
- DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n");
- DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n");
- DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n");
-#endif
-
+ m128 t = or128(c_lo, c_hi);
+
+#ifdef DEBUG
+ DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n");
+ DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n");
+ DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n");
+ DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n");
+ DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n");
+#endif
+
m128 c2_lo = pshufb_m128(mask2_lo, chars_lo);
m128 c2_hi = pshufb_m128(mask2_hi, chars_hi);
m128 t2 = or128(t, rshiftbyte_m128(or128(c2_lo, c2_hi), 1));
-
-#ifdef DEBUG
- DEBUG_PRINTF(" c2_lo: "); dumpMsk128(c2_lo); printf("\n");
- DEBUG_PRINTF(" c2_hi: "); dumpMsk128(c2_hi); printf("\n");
- DEBUG_PRINTF(" t2: "); dumpMsk128(t2); printf("\n");
-#endif
-
+
+#ifdef DEBUG
+ DEBUG_PRINTF(" c2_lo: "); dumpMsk128(c2_lo); printf("\n");
+ DEBUG_PRINTF(" c2_hi: "); dumpMsk128(c2_hi); printf("\n");
+ DEBUG_PRINTF(" t2: "); dumpMsk128(t2); printf("\n");
+#endif
+
u32 z = movemask128(eq128(t2, ones));
DEBUG_PRINTF(" z: 0x%08x\n", z);
return firstMatch(buf, z);
-}
-
-const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi,
- m128 mask2_lo, m128 mask2_hi,
- const u8 *buf, const u8 *buf_end) {
- const m128 ones = ones128();
- const m128 low4bits = _mm_set1_epi8(0xf);
- const u8 *rv;
-
- size_t min = (size_t)buf % 16;
-
- // Preconditioning: most of the time our buffer won't be aligned.
- m128 chars = loadu128(buf);
- rv = fwdBlock2(mask1_lo, mask1_hi, mask2_lo, mask2_hi,
- chars, buf, low4bits, ones);
- if (rv) {
- return rv;
- }
- buf += (16 - min);
-
- // Unrolling was here, but it wasn't doing anything but taking up space.
- // Reroll FTW.
-
- const u8 *last_block = buf_end - 16;
- while (buf < last_block) {
- m128 lchars = load128(buf);
- rv = fwdBlock2(mask1_lo, mask1_hi, mask2_lo, mask2_hi,
- lchars, buf, low4bits, ones);
- if (rv) {
- return rv;
- }
- buf += 16;
- }
-
- // Use an unaligned load to mop up the last 16 bytes and get an accurate
- // picture to buf_end.
- chars = loadu128(buf_end - 16);
- rv = fwdBlock2(mask1_lo, mask1_hi, mask2_lo, mask2_hi,
- chars, buf_end - 16, low4bits, ones);
- if (rv) {
- return rv;
- }
-
- return buf_end;
-}
-
+}
+
+const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi,
+ m128 mask2_lo, m128 mask2_hi,
+ const u8 *buf, const u8 *buf_end) {
+ const m128 ones = ones128();
+ const m128 low4bits = _mm_set1_epi8(0xf);
+ const u8 *rv;
+
+ size_t min = (size_t)buf % 16;
+
+ // Preconditioning: most of the time our buffer won't be aligned.
+ m128 chars = loadu128(buf);
+ rv = fwdBlock2(mask1_lo, mask1_hi, mask2_lo, mask2_hi,
+ chars, buf, low4bits, ones);
+ if (rv) {
+ return rv;
+ }
+ buf += (16 - min);
+
+ // Unrolling was here, but it wasn't doing anything but taking up space.
+ // Reroll FTW.
+
+ const u8 *last_block = buf_end - 16;
+ while (buf < last_block) {
+ m128 lchars = load128(buf);
+ rv = fwdBlock2(mask1_lo, mask1_hi, mask2_lo, mask2_hi,
+ lchars, buf, low4bits, ones);
+ if (rv) {
+ return rv;
+ }
+ buf += 16;
+ }
+
+ // Use an unaligned load to mop up the last 16 bytes and get an accurate
+ // picture to buf_end.
+ chars = loadu128(buf_end - 16);
+ rv = fwdBlock2(mask1_lo, mask1_hi, mask2_lo, mask2_hi,
+ chars, buf_end - 16, low4bits, ones);
+ if (rv) {
+ return rv;
+ }
+
+ return buf_end;
+}
+
#elif !defined(HAVE_AVX512)
// AVX2 - 256 wide shuftis
-
-#ifdef DEBUG
-DUMP_MSK(256)
-#endif
-
-#define GET_LO_4(chars) and256(chars, low4bits)
+
+#ifdef DEBUG
+DUMP_MSK(256)
+#endif
+
+#define GET_LO_4(chars) and256(chars, low4bits)
#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4)
-
-static really_inline
+
+static really_inline
u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits,
const m256 compare) {
m256 c_lo = pshufb_m256(mask_lo, GET_LO_4(chars));
m256 c_hi = pshufb_m256(mask_hi, GET_HI_4(chars));
m256 t = and256(c_lo, c_hi);
-#ifdef DEBUG
+#ifdef DEBUG
DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n");
DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n");
DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n");
DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n");
DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n");
-#endif
-
+#endif
+
return movemask256(eq256(t, compare));
}
static really_inline
const u8 *firstMatch(const u8 *buf, u32 z) {
DEBUG_PRINTF("z 0x%08x\n", z);
- if (unlikely(z != 0xffffffff)) {
- u32 pos = ctz32(~z);
- assert(pos < 32);
+ if (unlikely(z != 0xffffffff)) {
+ u32 pos = ctz32(~z);
+ assert(pos < 32);
DEBUG_PRINTF("match @ pos %u\n", pos);
- return buf + pos;
- } else {
- return NULL; // no match
- }
-}
-
-static really_inline
+ return buf + pos;
+ } else {
+ return NULL; // no match
+ }
+}
+
+static really_inline
const u8 *fwdBlockShort(m256 mask, m128 chars, const u8 *buf,
const m256 low4bits) {
// do the hi and lo shuffles in the one avx register
@@ -435,103 +435,103 @@ const u8 *shuftiFwdShort(m128 mask_lo, m128 mask_hi, const u8 *buf,
}
static really_inline
-const u8 *fwdBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf,
- const m256 low4bits, const m256 zeroes) {
+const u8 *fwdBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf,
+ const m256 low4bits, const m256 zeroes) {
u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes);
-
+
return firstMatch(buf, z);
-}
-
-/* takes 128 bit masks, but operates on 256 bits of data */
-const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
- const u8 *buf_end) {
- assert(buf && buf_end);
- assert(buf < buf_end);
+}
+
+/* takes 128 bit masks, but operates on 256 bits of data */
+const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
+ const u8 *buf_end) {
+ assert(buf && buf_end);
+ assert(buf < buf_end);
DEBUG_PRINTF("shufti %p len %zu\n", buf, buf_end - buf);
-
- // Slow path for small cases.
+
+ // Slow path for small cases.
if (buf_end - buf < 16) {
- return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi,
- buf, buf_end);
- }
-
+ return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi,
+ buf, buf_end);
+ }
+
const m256 low4bits = set32x8(0xf);
if (buf_end - buf <= 32) {
return shuftiFwdShort(mask_lo, mask_hi, buf, buf_end, low4bits);
}
- const m256 zeroes = zeroes256();
- const m256 wide_mask_lo = set2x128(mask_lo);
- const m256 wide_mask_hi = set2x128(mask_hi);
- const u8 *rv;
-
- size_t min = (size_t)buf % 32;
- assert(buf_end - buf >= 32);
-
- // Preconditioning: most of the time our buffer won't be aligned.
- m256 chars = loadu256(buf);
- rv = fwdBlock(wide_mask_lo, wide_mask_hi, chars, buf, low4bits, zeroes);
- if (rv) {
- return rv;
- }
- buf += (32 - min);
-
- // Unrolling was here, but it wasn't doing anything but taking up space.
- // Reroll FTW.
-
- const u8 *last_block = buf_end - 32;
- while (buf < last_block) {
- m256 lchars = load256(buf);
- rv = fwdBlock(wide_mask_lo, wide_mask_hi, lchars, buf, low4bits, zeroes);
- if (rv) {
- return rv;
- }
- buf += 32;
- }
-
- // Use an unaligned load to mop up the last 32 bytes and get an accurate
- // picture to buf_end.
- assert(buf <= buf_end && buf >= buf_end - 32);
- chars = loadu256(buf_end - 32);
- rv = fwdBlock(wide_mask_lo, wide_mask_hi, chars, buf_end - 32, low4bits, zeroes);
- if (rv) {
- return rv;
- }
-
- return buf_end;
-}
-
-static really_inline
+ const m256 zeroes = zeroes256();
+ const m256 wide_mask_lo = set2x128(mask_lo);
+ const m256 wide_mask_hi = set2x128(mask_hi);
+ const u8 *rv;
+
+ size_t min = (size_t)buf % 32;
+ assert(buf_end - buf >= 32);
+
+ // Preconditioning: most of the time our buffer won't be aligned.
+ m256 chars = loadu256(buf);
+ rv = fwdBlock(wide_mask_lo, wide_mask_hi, chars, buf, low4bits, zeroes);
+ if (rv) {
+ return rv;
+ }
+ buf += (32 - min);
+
+ // Unrolling was here, but it wasn't doing anything but taking up space.
+ // Reroll FTW.
+
+ const u8 *last_block = buf_end - 32;
+ while (buf < last_block) {
+ m256 lchars = load256(buf);
+ rv = fwdBlock(wide_mask_lo, wide_mask_hi, lchars, buf, low4bits, zeroes);
+ if (rv) {
+ return rv;
+ }
+ buf += 32;
+ }
+
+ // Use an unaligned load to mop up the last 32 bytes and get an accurate
+ // picture to buf_end.
+ assert(buf <= buf_end && buf >= buf_end - 32);
+ chars = loadu256(buf_end - 32);
+ rv = fwdBlock(wide_mask_lo, wide_mask_hi, chars, buf_end - 32, low4bits, zeroes);
+ if (rv) {
+ return rv;
+ }
+
+ return buf_end;
+}
+
+static really_inline
const u8 *lastMatch(const u8 *buf, u32 z) {
- if (unlikely(z != 0xffffffff)) {
- u32 pos = clz32(~z);
- DEBUG_PRINTF("buf=%p, pos=%u\n", buf, pos);
- return buf + (31 - pos);
- } else {
- return NULL; // no match
- }
-}
-
-static really_inline
-const u8 *revBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf,
- const m256 low4bits, const m256 zeroes) {
+ if (unlikely(z != 0xffffffff)) {
+ u32 pos = clz32(~z);
+ DEBUG_PRINTF("buf=%p, pos=%u\n", buf, pos);
+ return buf + (31 - pos);
+ } else {
+ return NULL; // no match
+ }
+}
+
+static really_inline
+const u8 *revBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf,
+ const m256 low4bits, const m256 zeroes) {
m256 c_lo = pshufb_m256(mask_lo, GET_LO_4(chars));
m256 c_hi = pshufb_m256(mask_hi, GET_HI_4(chars));
- m256 t = and256(c_lo, c_hi);
-
-#ifdef DEBUG
- DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n");
- DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n");
- DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n");
- DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n");
- DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n");
-#endif
-
+ m256 t = and256(c_lo, c_hi);
+
+#ifdef DEBUG
+ DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n");
+ DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n");
+ DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n");
+ DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n");
+ DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n");
+#endif
+
u32 z = movemask256(eq256(t, zeroes));
return lastMatch(buf, z);
-}
-
+}
+
static really_inline
const u8 *revBlockShort(m256 mask, m128 chars, const u8 *buf,
const m256 low4bits) {
@@ -567,95 +567,95 @@ const u8 *shuftiRevShort(m128 mask_lo, m128 mask_hi, const u8 *buf,
}
-/* takes 128 bit masks, but operates on 256 bits of data */
-const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
- const u8 *buf_end) {
- assert(buf && buf_end);
- assert(buf < buf_end);
-
- // Slow path for small cases.
+/* takes 128 bit masks, but operates on 256 bits of data */
+const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
+ const u8 *buf_end) {
+ assert(buf && buf_end);
+ assert(buf < buf_end);
+
+ // Slow path for small cases.
if (buf_end - buf < 16) {
- return shuftiRevSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi,
- buf, buf_end);
- }
-
+ return shuftiRevSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi,
+ buf, buf_end);
+ }
+
const m256 low4bits = set32x8(0xf);
if (buf_end - buf <= 32) {
return shuftiRevShort(mask_lo, mask_hi, buf, buf_end, low4bits);
}
- const m256 zeroes = zeroes256();
- const m256 wide_mask_lo = set2x128(mask_lo);
- const m256 wide_mask_hi = set2x128(mask_hi);
- const u8 *rv;
-
- assert(buf_end - buf >= 32);
-
- // Preconditioning: most of the time our buffer won't be aligned.
- m256 chars = loadu256(buf_end - 32);
- rv = revBlock(wide_mask_lo, wide_mask_hi, chars, buf_end - 32, low4bits, zeroes);
- if (rv) {
- return rv;
- }
- buf_end = (const u8 *)((size_t)buf_end & ~((size_t)0x1f));
-
- // Unrolling was here, but it wasn't doing anything but taking up space.
- // Reroll FTW.
- const u8 *last_block = buf + 32;
- while (buf_end > last_block) {
- buf_end -= 32;
- m256 lchars = load256(buf_end);
- rv = revBlock(wide_mask_lo, wide_mask_hi, lchars, buf_end, low4bits, zeroes);
- if (rv) {
- return rv;
- }
- }
-
- // Use an unaligned load to mop up the last 32 bytes and get an accurate
- // picture to buf.
- chars = loadu256(buf);
- rv = revBlock(wide_mask_lo, wide_mask_hi, chars, buf, low4bits, zeroes);
- if (rv) {
- return rv;
- }
-
- return buf - 1;
-}
-
-static really_inline
-const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi,
- m256 chars, const u8 *buf, const m256 low4bits,
- const m256 ones) {
- DEBUG_PRINTF("buf %p\n", buf);
- m256 chars_lo = GET_LO_4(chars);
- m256 chars_hi = GET_HI_4(chars);
+ const m256 zeroes = zeroes256();
+ const m256 wide_mask_lo = set2x128(mask_lo);
+ const m256 wide_mask_hi = set2x128(mask_hi);
+ const u8 *rv;
+
+ assert(buf_end - buf >= 32);
+
+ // Preconditioning: most of the time our buffer won't be aligned.
+ m256 chars = loadu256(buf_end - 32);
+ rv = revBlock(wide_mask_lo, wide_mask_hi, chars, buf_end - 32, low4bits, zeroes);
+ if (rv) {
+ return rv;
+ }
+ buf_end = (const u8 *)((size_t)buf_end & ~((size_t)0x1f));
+
+ // Unrolling was here, but it wasn't doing anything but taking up space.
+ // Reroll FTW.
+ const u8 *last_block = buf + 32;
+ while (buf_end > last_block) {
+ buf_end -= 32;
+ m256 lchars = load256(buf_end);
+ rv = revBlock(wide_mask_lo, wide_mask_hi, lchars, buf_end, low4bits, zeroes);
+ if (rv) {
+ return rv;
+ }
+ }
+
+ // Use an unaligned load to mop up the last 32 bytes and get an accurate
+ // picture to buf.
+ chars = loadu256(buf);
+ rv = revBlock(wide_mask_lo, wide_mask_hi, chars, buf, low4bits, zeroes);
+ if (rv) {
+ return rv;
+ }
+
+ return buf - 1;
+}
+
+static really_inline
+const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi,
+ m256 chars, const u8 *buf, const m256 low4bits,
+ const m256 ones) {
+ DEBUG_PRINTF("buf %p\n", buf);
+ m256 chars_lo = GET_LO_4(chars);
+ m256 chars_hi = GET_HI_4(chars);
m256 c_lo = pshufb_m256(mask1_lo, chars_lo);
m256 c_hi = pshufb_m256(mask1_hi, chars_hi);
- m256 t = or256(c_lo, c_hi);
-
-#ifdef DEBUG
- DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n");
- DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n");
- DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n");
- DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n");
- DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n");
-#endif
-
+ m256 t = or256(c_lo, c_hi);
+
+#ifdef DEBUG
+ DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n");
+ DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n");
+ DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n");
+ DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n");
+ DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n");
+#endif
+
m256 c2_lo = pshufb_m256(mask2_lo, chars_lo);
m256 c2_hi = pshufb_m256(mask2_hi, chars_hi);
m256 t2 = or256(t, rshift128_m256(or256(c2_lo, c2_hi), 1));
-
-#ifdef DEBUG
- DEBUG_PRINTF(" c2_lo: "); dumpMsk256(c2_lo); printf("\n");
- DEBUG_PRINTF(" c2_hi: "); dumpMsk256(c2_hi); printf("\n");
- DEBUG_PRINTF(" t2: "); dumpMsk256(t2); printf("\n");
-#endif
+
+#ifdef DEBUG
+ DEBUG_PRINTF(" c2_lo: "); dumpMsk256(c2_lo); printf("\n");
+ DEBUG_PRINTF(" c2_hi: "); dumpMsk256(c2_hi); printf("\n");
+ DEBUG_PRINTF(" t2: "); dumpMsk256(t2); printf("\n");
+#endif
u32 z = movemask256(eq256(t2, ones));
-
+
return firstMatch(buf, z);
-}
-
+}
+
static really_inline
const u8 *fwdBlockShort2(m256 mask1, m256 mask2, m128 chars, const u8 *buf,
const m256 low4bits) {
@@ -694,63 +694,63 @@ const u8 *shuftiDoubleShort(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo,
return buf_end;
}
-/* takes 128 bit masks, but operates on 256 bits of data */
-const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi,
- m128 mask2_lo, m128 mask2_hi,
- const u8 *buf, const u8 *buf_end) {
+/* takes 128 bit masks, but operates on 256 bits of data */
+const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi,
+ m128 mask2_lo, m128 mask2_hi,
+ const u8 *buf, const u8 *buf_end) {
/* we should always have at least 16 bytes */
assert(buf_end - buf >= 16);
DEBUG_PRINTF("buf %p len %zu\n", buf, buf_end - buf);
- if (buf_end - buf < 32) {
+ if (buf_end - buf < 32) {
return shuftiDoubleShort(mask1_lo, mask1_hi, mask2_lo, mask2_hi, buf,
buf_end);
- }
-
- const m256 ones = ones256();
- const m256 low4bits = set32x8(0xf);
- const m256 wide_mask1_lo = set2x128(mask1_lo);
- const m256 wide_mask1_hi = set2x128(mask1_hi);
- const m256 wide_mask2_lo = set2x128(mask2_lo);
- const m256 wide_mask2_hi = set2x128(mask2_hi);
- const u8 *rv;
-
- size_t min = (size_t)buf % 32;
-
- // Preconditioning: most of the time our buffer won't be aligned.
- m256 chars = loadu256(buf);
- rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi,
- chars, buf, low4bits, ones);
- if (rv) {
- return rv;
- }
- buf += (32 - min);
-
- // Unrolling was here, but it wasn't doing anything but taking up space.
- // Reroll FTW.
- const u8 *last_block = buf_end - 32;
- while (buf < last_block) {
- m256 lchars = load256(buf);
- rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi,
- lchars, buf, low4bits, ones);
- if (rv) {
- return rv;
- }
- buf += 32;
- }
-
- // Use an unaligned load to mop up the last 32 bytes and get an accurate
- // picture to buf_end.
- chars = loadu256(buf_end - 32);
- rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi,
- chars, buf_end - 32, low4bits, ones);
- if (rv) {
- return rv;
- }
-
- return buf_end;
-}
-
+ }
+
+ const m256 ones = ones256();
+ const m256 low4bits = set32x8(0xf);
+ const m256 wide_mask1_lo = set2x128(mask1_lo);
+ const m256 wide_mask1_hi = set2x128(mask1_hi);
+ const m256 wide_mask2_lo = set2x128(mask2_lo);
+ const m256 wide_mask2_hi = set2x128(mask2_hi);
+ const u8 *rv;
+
+ size_t min = (size_t)buf % 32;
+
+ // Preconditioning: most of the time our buffer won't be aligned.
+ m256 chars = loadu256(buf);
+ rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi,
+ chars, buf, low4bits, ones);
+ if (rv) {
+ return rv;
+ }
+ buf += (32 - min);
+
+ // Unrolling was here, but it wasn't doing anything but taking up space.
+ // Reroll FTW.
+ const u8 *last_block = buf_end - 32;
+ while (buf < last_block) {
+ m256 lchars = load256(buf);
+ rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi,
+ lchars, buf, low4bits, ones);
+ if (rv) {
+ return rv;
+ }
+ buf += 32;
+ }
+
+ // Use an unaligned load to mop up the last 32 bytes and get an accurate
+ // picture to buf_end.
+ chars = loadu256(buf_end - 32);
+ rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi,
+ chars, buf_end - 32, low4bits, ones);
+ if (rv) {
+ return rv;
+ }
+
+ return buf_end;
+}
+
#else // defined(HAVE_AVX512)
#ifdef DEBUG
diff --git a/contrib/libs/hyperscan/src/nfa/shufti.h b/contrib/libs/hyperscan/src/nfa/shufti.h
index 2663301488..1ebf776cc7 100644
--- a/contrib/libs/hyperscan/src/nfa/shufti.h
+++ b/contrib/libs/hyperscan/src/nfa/shufti.h
@@ -1,61 +1,61 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Shufti: character class acceleration.
- *
- * Utilises the SSSE3 pshufb shuffle instruction
- */
-
-#ifndef SHUFTI_H
-#define SHUFTI_H
-
-#include "ue2common.h"
-#include "util/simd_utils.h"
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
- const u8 *buf_end);
-
-// Returns (buf - 1) if not found.
-const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
- const u8 *buf_end);
-
-const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi,
- m128 mask2_lo, m128 mask2_hi,
- const u8 *buf, const u8 *buf_end);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Shufti: character class acceleration.
+ *
+ * Utilises the SSSE3 pshufb shuffle instruction
+ */
+
+#ifndef SHUFTI_H
+#define SHUFTI_H
+
+#include "ue2common.h"
+#include "util/simd_utils.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
+ const u8 *buf_end);
+
+// Returns (buf - 1) if not found.
+const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
+ const u8 *buf_end);
+
+const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi,
+ m128 mask2_lo, m128 mask2_hi,
+ const u8 *buf, const u8 *buf_end);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfa/shufticompile.cpp b/contrib/libs/hyperscan/src/nfa/shufticompile.cpp
index 577a8063f5..f712ef94a4 100644
--- a/contrib/libs/hyperscan/src/nfa/shufticompile.cpp
+++ b/contrib/libs/hyperscan/src/nfa/shufticompile.cpp
@@ -1,113 +1,113 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Shufti acceleration: compile code.
- */
-#include "shufticompile.h"
-#include "ue2common.h"
-#include "util/charreach.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Shufti acceleration: compile code.
+ */
+#include "shufticompile.h"
+#include "ue2common.h"
+#include "util/charreach.h"
#include "util/container.h"
#include "util/flat_containers.h"
-
-#include <array>
-#include <cassert>
-#include <cstring>
-#include <map>
-
-using namespace std;
-
-namespace ue2 {
-
-/** \brief Single-byte variant.
- *
- * Returns -1 if unable to construct masks, otherwise returns number of bits
- * used in the mask.
- *
- * Note: always able to construct masks for 8 or fewer characters.
- */
+
+#include <array>
+#include <cassert>
+#include <cstring>
+#include <map>
+
+using namespace std;
+
+namespace ue2 {
+
+/** \brief Single-byte variant.
+ *
+ * Returns -1 if unable to construct masks, otherwise returns number of bits
+ * used in the mask.
+ *
+ * Note: always able to construct masks for 8 or fewer characters.
+ */
int shuftiBuildMasks(const CharReach &c, u8 *lo, u8 *hi) {
- /* Things could be packed much more optimally, but this should be able to
- * handle any set of characters entirely in the lower half. */
-
- assert(c.count() < 256);
- assert(!c.none());
-
- map<u8, CharReach> by_hi; /* hi nibble -> set of matching lo nibbles */
- /* group matching characters by high nibble */
- for (size_t i = c.find_first(); i != CharReach::npos; i = c.find_next(i)) {
- u8 it_hi = i >> 4;
- u8 it_lo = i & 0xf;
- by_hi[it_hi].set(it_lo);
- }
-
- map<CharReach, CharReach> by_lo_set;
- /* group all hi nibbles with a common set of lo nibbles together */
- for (map<u8, CharReach>::const_iterator it = by_hi.begin();
- it != by_hi.end(); ++it) {
- by_lo_set[it->second].set(it->first);
- }
-
- if (by_lo_set.size() > 8) {
- /* too many char classes on the dance floor */
- assert(c.size() > 8);
- return -1;
- }
-
- u8 bit_index = 0;
- array<u8, 16> lo_a; lo_a.fill(0);
- array<u8, 16> hi_a; hi_a.fill(0);
- for (map<CharReach, CharReach>::const_iterator it = by_lo_set.begin();
- it != by_lo_set.end(); ++it) {
- const CharReach &lo_nibbles = it->first;
- const CharReach &hi_nibbles = it->second;
-
- /* set bits in low mask */
- for (size_t j = lo_nibbles.find_first(); j != CharReach::npos;
- j = lo_nibbles.find_next(j)) {
- lo_a[j] |= (1 << bit_index);
- }
-
- /* set bits in high mask */
- for (size_t j = hi_nibbles.find_first(); j != CharReach::npos;
- j = hi_nibbles.find_next(j)) {
- hi_a[j] |= (1 << bit_index);
- }
-
- bit_index++;
- }
-
- memcpy(lo, lo_a.data(), sizeof(m128));
- memcpy(hi, hi_a.data(), sizeof(m128));
-
- return bit_index;
-}
-
+ /* Things could be packed much more optimally, but this should be able to
+ * handle any set of characters entirely in the lower half. */
+
+ assert(c.count() < 256);
+ assert(!c.none());
+
+ map<u8, CharReach> by_hi; /* hi nibble -> set of matching lo nibbles */
+ /* group matching characters by high nibble */
+ for (size_t i = c.find_first(); i != CharReach::npos; i = c.find_next(i)) {
+ u8 it_hi = i >> 4;
+ u8 it_lo = i & 0xf;
+ by_hi[it_hi].set(it_lo);
+ }
+
+ map<CharReach, CharReach> by_lo_set;
+ /* group all hi nibbles with a common set of lo nibbles together */
+ for (map<u8, CharReach>::const_iterator it = by_hi.begin();
+ it != by_hi.end(); ++it) {
+ by_lo_set[it->second].set(it->first);
+ }
+
+ if (by_lo_set.size() > 8) {
+ /* too many char classes on the dance floor */
+ assert(c.size() > 8);
+ return -1;
+ }
+
+ u8 bit_index = 0;
+ array<u8, 16> lo_a; lo_a.fill(0);
+ array<u8, 16> hi_a; hi_a.fill(0);
+ for (map<CharReach, CharReach>::const_iterator it = by_lo_set.begin();
+ it != by_lo_set.end(); ++it) {
+ const CharReach &lo_nibbles = it->first;
+ const CharReach &hi_nibbles = it->second;
+
+ /* set bits in low mask */
+ for (size_t j = lo_nibbles.find_first(); j != CharReach::npos;
+ j = lo_nibbles.find_next(j)) {
+ lo_a[j] |= (1 << bit_index);
+ }
+
+ /* set bits in high mask */
+ for (size_t j = hi_nibbles.find_first(); j != CharReach::npos;
+ j = hi_nibbles.find_next(j)) {
+ hi_a[j] |= (1 << bit_index);
+ }
+
+ bit_index++;
+ }
+
+ memcpy(lo, lo_a.data(), sizeof(m128));
+ memcpy(hi, hi_a.data(), sizeof(m128));
+
+ return bit_index;
+}
+
static
array<u16, 4> or_array(array<u16, 4> a, const array<u16, 4> &b) {
a[0] |= b[0];
@@ -133,21 +133,21 @@ void set_buckets_from_mask(u16 nibble_mask, u32 bucket,
}
bool shuftiBuildDoubleMasks(const CharReach &onechar,
- const flat_set<pair<u8, u8>> &twochar,
+ const flat_set<pair<u8, u8>> &twochar,
u8 *lo1, u8 *hi1, u8 *lo2, u8 *hi2) {
- DEBUG_PRINTF("unibytes %zu dibytes %zu\n", onechar.size(),
- twochar.size());
- array<u8, 16> lo1_a;
- array<u8, 16> lo2_a;
- array<u8, 16> hi1_a;
- array<u8, 16> hi2_a;
-
- lo1_a.fill(0xff);
- lo2_a.fill(0xff);
- hi1_a.fill(0xff);
- hi2_a.fill(0xff);
-
- // two-byte literals
+ DEBUG_PRINTF("unibytes %zu dibytes %zu\n", onechar.size(),
+ twochar.size());
+ array<u8, 16> lo1_a;
+ array<u8, 16> lo2_a;
+ array<u8, 16> hi1_a;
+ array<u8, 16> hi2_a;
+
+ lo1_a.fill(0xff);
+ lo2_a.fill(0xff);
+ hi1_a.fill(0xff);
+ hi2_a.fill(0xff);
+
+ // two-byte literals
vector<array<u16, 4>> nibble_masks;
for (const auto &p : twochar) {
DEBUG_PRINTF("%02hhx %02hhx\n", p.first, p.second);
@@ -156,10 +156,10 @@ bool shuftiBuildDoubleMasks(const CharReach &onechar,
u16 b_lo = 1U << (p.second & 0xf);
u16 b_hi = 1U << (p.second >> 4);
nibble_masks.push_back({{a_lo, a_hi, b_lo, b_hi}});
- }
-
- // one-byte literals (second byte is a wildcard)
- for (size_t it = onechar.find_first(); it != CharReach::npos;
+ }
+
+ // one-byte literals (second byte is a wildcard)
+ for (size_t it = onechar.find_first(); it != CharReach::npos;
it = onechar.find_next(it)) {
DEBUG_PRINTF("%02hhx\n", (u8)it);
u16 a_lo = 1U << (it & 0xf);
@@ -167,7 +167,7 @@ bool shuftiBuildDoubleMasks(const CharReach &onechar,
u16 wildcard = 0xffff;
nibble_masks.push_back({{a_lo, a_hi, wildcard, wildcard}});
}
-
+
// try to merge strings into shared buckets
for (u32 i = 0; i < 4; i++) {
map<array<u16, 4>, array<u16, 4>> new_masks;
@@ -179,13 +179,13 @@ bool shuftiBuildDoubleMasks(const CharReach &onechar,
} else {
new_masks[key] = or_array(new_masks[key], a);
}
- }
+ }
nibble_masks.clear();
for (const auto &e : new_masks) {
nibble_masks.push_back(e.second);
}
- }
-
+ }
+
if (nibble_masks.size() > MAX_BUCKETS) {
DEBUG_PRINTF("too many buckets needed (%zu)\n", nibble_masks.size());
return false;
@@ -200,26 +200,26 @@ bool shuftiBuildDoubleMasks(const CharReach &onechar,
i++;
}
- memcpy(lo1, lo1_a.data(), sizeof(m128));
- memcpy(lo2, lo2_a.data(), sizeof(m128));
- memcpy(hi1, hi1_a.data(), sizeof(m128));
- memcpy(hi2, hi2_a.data(), sizeof(m128));
-
+ memcpy(lo1, lo1_a.data(), sizeof(m128));
+ memcpy(lo2, lo2_a.data(), sizeof(m128));
+ memcpy(hi1, hi1_a.data(), sizeof(m128));
+ memcpy(hi2, hi2_a.data(), sizeof(m128));
+
return true;
-}
-
-#ifdef DUMP_SUPPORT
-
+}
+
+#ifdef DUMP_SUPPORT
+
CharReach shufti2cr(const u8 *lo, const u8 *hi) {
- CharReach cr;
- for (u32 i = 0; i < 256; i++) {
- if (lo[(u8)i & 0xf] & hi[(u8)i >> 4]) {
- cr.set(i);
- }
- }
- return cr;
-}
-
-#endif // DUMP_SUPPORT
-
-} // namespace ue2
+ CharReach cr;
+ for (u32 i = 0; i < 256; i++) {
+ if (lo[(u8)i & 0xf] & hi[(u8)i >> 4]) {
+ cr.set(i);
+ }
+ }
+ return cr;
+}
+
+#endif // DUMP_SUPPORT
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfa/shufticompile.h b/contrib/libs/hyperscan/src/nfa/shufticompile.h
index fefd59f9b4..59b9c38dff 100644
--- a/contrib/libs/hyperscan/src/nfa/shufticompile.h
+++ b/contrib/libs/hyperscan/src/nfa/shufticompile.h
@@ -1,73 +1,73 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Shufti acceleration: compile code.
- */
-
-#ifndef SHUFTI_COMPILE_H
-#define SHUFTI_COMPILE_H
-
-#include "ue2common.h"
-#include "util/charreach.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Shufti acceleration: compile code.
+ */
+
+#ifndef SHUFTI_COMPILE_H
+#define SHUFTI_COMPILE_H
+
+#include "ue2common.h"
+#include "util/charreach.h"
#include "util/flat_containers.h"
-
-#include <utility>
-
-namespace ue2 {
-
-/** \brief Single-byte variant.
- *
- * Returns -1 if unable to construct masks, otherwise returns number of bits
- * used in the mask.
- *
- * Note: always able to construct masks for 8 or fewer characters.
- */
+
+#include <utility>
+
+namespace ue2 {
+
+/** \brief Single-byte variant.
+ *
+ * Returns -1 if unable to construct masks, otherwise returns number of bits
+ * used in the mask.
+ *
+ * Note: always able to construct masks for 8 or fewer characters.
+ */
int shuftiBuildMasks(const CharReach &chars, u8 *lo, u8 *hi);
-
+
/** \brief Double-byte variant
*
* Returns false if we are unable to build the masks (too many buckets required)
*/
bool shuftiBuildDoubleMasks(const CharReach &onechar,
- const flat_set<std::pair<u8, u8>> &twochar,
+ const flat_set<std::pair<u8, u8>> &twochar,
u8 *lo1, u8 *hi1, u8 *lo2, u8 *hi2);
-
-#ifdef DUMP_SUPPORT
-
-/**
- * \brief Dump code: returns a CharReach with the reach that would match this
- * shufti.
- */
+
+#ifdef DUMP_SUPPORT
+
+/**
+ * \brief Dump code: returns a CharReach with the reach that would match this
+ * shufti.
+ */
CharReach shufti2cr(const u8 *lo, const u8 *hi);
-
-#endif // DUMP_SUPPORT
-
-} // namespace ue2
-
-#endif // SHUFTI_COMPILE_H
+
+#endif // DUMP_SUPPORT
+
+} // namespace ue2
+
+#endif // SHUFTI_COMPILE_H
diff --git a/contrib/libs/hyperscan/src/nfa/truffle.c b/contrib/libs/hyperscan/src/nfa/truffle.c
index 667d8ea13d..be6b312cf2 100644
--- a/contrib/libs/hyperscan/src/nfa/truffle.c
+++ b/contrib/libs/hyperscan/src/nfa/truffle.c
@@ -1,106 +1,106 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Matches a byte in a charclass using three shuffles
- */
-
-
-#include "ue2common.h"
-#include "truffle.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Matches a byte in a charclass using three shuffles
+ */
+
+
+#include "ue2common.h"
+#include "truffle.h"
#include "util/arch.h"
-#include "util/bitutils.h"
-#include "util/simd_utils.h"
-
+#include "util/bitutils.h"
+#include "util/simd_utils.h"
+
#if !defined(HAVE_AVX2)
-
-static really_inline
+
+static really_inline
const u8 *lastMatch(const u8 *buf, u32 z) {
- if (unlikely(z != 0xffff)) {
+ if (unlikely(z != 0xffff)) {
u32 pos = clz32(~z & 0xffff);
assert(pos >= 16 && pos < 32);
return buf + (31 - pos);
- }
-
- return NULL; // no match
-}
-
-static really_inline
+ }
+
+ return NULL; // no match
+}
+
+static really_inline
const u8 *firstMatch(const u8 *buf, u32 z) {
- if (unlikely(z != 0xffff)) {
+ if (unlikely(z != 0xffff)) {
u32 pos = ctz32(~z & 0xffff);
assert(pos < 16);
return buf + pos;
- }
-
- return NULL; // no match
-}
-
-static really_inline
-u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) {
-
- m128 highconst = _mm_set1_epi8(0x80);
- m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201);
-
- // and now do the real work
+ }
+
+ return NULL; // no match
+}
+
+static really_inline
+u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) {
+
+ m128 highconst = _mm_set1_epi8(0x80);
+ m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201);
+
+ // and now do the real work
m128 shuf1 = pshufb_m128(shuf_mask_lo_highclear, v);
- m128 t1 = xor128(v, highconst);
+ m128 t1 = xor128(v, highconst);
m128 shuf2 = pshufb_m128(shuf_mask_lo_highset, t1);
m128 t2 = andnot128(highconst, rshift64_m128(v, 4));
m128 shuf3 = pshufb_m128(shuf_mask_hi, t2);
- m128 tmp = and128(or128(shuf1, shuf2), shuf3);
- m128 tmp2 = eq128(tmp, zeroes128());
- u32 z = movemask128(tmp2);
-
- return z;
-}
-
-static
-const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
- const u8 *buf, const u8 *buf_end) {
- uintptr_t len = buf_end - buf;
- assert(len < 16);
-
- m128 chars = zeroes128();
- memcpy(&chars, buf, len);
-
- u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
- // can't be these bytes in z
+ m128 tmp = and128(or128(shuf1, shuf2), shuf3);
+ m128 tmp2 = eq128(tmp, zeroes128());
+ u32 z = movemask128(tmp2);
+
+ return z;
+}
+
+static
+const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
+ const u8 *buf, const u8 *buf_end) {
+ uintptr_t len = buf_end - buf;
+ assert(len < 16);
+
+ m128 chars = zeroes128();
+ memcpy(&chars, buf, len);
+
+ u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
+ // can't be these bytes in z
u32 mask = (0xffff >> (16 - len)) ^ 0xffff;
const u8 *rv = firstMatch(buf, z | mask);
-
- if (rv) {
- return rv;
- } else {
- return buf_end;
- }
-}
-
+
+ if (rv) {
+ return rv;
+ } else {
+ return buf_end;
+ }
+}
+
static really_inline
const u8 *fwdBlock(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
m128 v, const u8 *buf) {
@@ -115,124 +115,124 @@ const u8 *revBlock(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
return lastMatch(buf, z);
}
-const u8 *truffleExec(m128 shuf_mask_lo_highclear,
+const u8 *truffleExec(m128 shuf_mask_lo_highclear,
m128 shuf_mask_lo_highset,
const u8 *buf, const u8 *buf_end) {
- DEBUG_PRINTF("len %zu\n", buf_end - buf);
-
- assert(buf && buf_end);
- assert(buf < buf_end);
- const u8 *rv;
-
- if (buf_end - buf < 16) {
- return truffleMini(shuf_mask_lo_highclear, shuf_mask_lo_highset, buf,
- buf_end);
- }
-
- size_t min = (size_t)buf % 16;
- assert(buf_end - buf >= 16);
-
- // Preconditioning: most of the time our buffer won't be aligned.
- m128 chars = loadu128(buf);
- rv = fwdBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, buf);
- if (rv) {
- return rv;
- }
- buf += (16 - min);
-
- const u8 *last_block = buf_end - 16;
- while (buf < last_block) {
- m128 lchars = load128(buf);
- rv = fwdBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, lchars,
- buf);
- if (rv) {
- return rv;
- }
- buf += 16;
- }
-
- // Use an unaligned load to mop up the last 16 bytes and get an accurate
- // picture to buf_end.
- assert(buf <= buf_end && buf >= buf_end - 16);
- chars = loadu128(buf_end - 16);
- rv = fwdBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars,
- buf_end - 16);
- if (rv) {
- return rv;
- }
-
- return buf_end;
-}
-
-static
-const u8 *truffleRevMini(m128 shuf_mask_lo_highclear,
+ DEBUG_PRINTF("len %zu\n", buf_end - buf);
+
+ assert(buf && buf_end);
+ assert(buf < buf_end);
+ const u8 *rv;
+
+ if (buf_end - buf < 16) {
+ return truffleMini(shuf_mask_lo_highclear, shuf_mask_lo_highset, buf,
+ buf_end);
+ }
+
+ size_t min = (size_t)buf % 16;
+ assert(buf_end - buf >= 16);
+
+ // Preconditioning: most of the time our buffer won't be aligned.
+ m128 chars = loadu128(buf);
+ rv = fwdBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, buf);
+ if (rv) {
+ return rv;
+ }
+ buf += (16 - min);
+
+ const u8 *last_block = buf_end - 16;
+ while (buf < last_block) {
+ m128 lchars = load128(buf);
+ rv = fwdBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, lchars,
+ buf);
+ if (rv) {
+ return rv;
+ }
+ buf += 16;
+ }
+
+ // Use an unaligned load to mop up the last 16 bytes and get an accurate
+ // picture to buf_end.
+ assert(buf <= buf_end && buf >= buf_end - 16);
+ chars = loadu128(buf_end - 16);
+ rv = fwdBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars,
+ buf_end - 16);
+ if (rv) {
+ return rv;
+ }
+
+ return buf_end;
+}
+
+static
+const u8 *truffleRevMini(m128 shuf_mask_lo_highclear,
m128 shuf_mask_lo_highset, const u8 *buf,
const u8 *buf_end) {
- uintptr_t len = buf_end - buf;
- assert(len < 16);
-
- m128 chars = zeroes128();
- memcpy(&chars, buf, len);
-
+ uintptr_t len = buf_end - buf;
+ assert(len < 16);
+
+ m128 chars = zeroes128();
+ memcpy(&chars, buf, len);
+
u32 mask = (0xffff >> (16 - len)) ^ 0xffff;
- u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
- const u8 *rv = lastMatch(buf, z | mask);
-
- if (rv) {
- return rv;
- }
- return buf - 1;
-}
-
-const u8 *rtruffleExec(m128 shuf_mask_lo_highclear,
- m128 shuf_mask_lo_highset,
- const u8 *buf, const u8 *buf_end) {
- assert(buf && buf_end);
- assert(buf < buf_end);
- const u8 *rv;
-
- DEBUG_PRINTF("len %zu\n", buf_end - buf);
-
- if (buf_end - buf < 16) {
- return truffleRevMini(shuf_mask_lo_highclear, shuf_mask_lo_highset, buf,
- buf_end);
- }
-
- assert(buf_end - buf >= 16);
-
- // Preconditioning: most of the time our buffer won't be aligned.
- m128 chars = loadu128(buf_end - 16);
- rv = revBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars,
- buf_end - 16);
- if (rv) {
- return rv;
- }
- buf_end = (const u8 *)((size_t)buf_end & ~((size_t)0xf));
-
- const u8 *last_block = buf + 16;
- while (buf_end > last_block) {
- buf_end -= 16;
- m128 lchars = load128(buf_end);
- rv = revBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, lchars,
- buf_end);
- if (rv) {
- return rv;
- }
- }
-
- // Use an unaligned load to mop up the last 16 bytes and get an accurate
- // picture to buf_end.
- chars = loadu128(buf);
- rv = revBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, buf);
- if (rv) {
- return rv;
- }
-
- return buf - 1;
-}
-
+ u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars);
+ const u8 *rv = lastMatch(buf, z | mask);
+
+ if (rv) {
+ return rv;
+ }
+ return buf - 1;
+}
+
+const u8 *rtruffleExec(m128 shuf_mask_lo_highclear,
+ m128 shuf_mask_lo_highset,
+ const u8 *buf, const u8 *buf_end) {
+ assert(buf && buf_end);
+ assert(buf < buf_end);
+ const u8 *rv;
+
+ DEBUG_PRINTF("len %zu\n", buf_end - buf);
+
+ if (buf_end - buf < 16) {
+ return truffleRevMini(shuf_mask_lo_highclear, shuf_mask_lo_highset, buf,
+ buf_end);
+ }
+
+ assert(buf_end - buf >= 16);
+
+ // Preconditioning: most of the time our buffer won't be aligned.
+ m128 chars = loadu128(buf_end - 16);
+ rv = revBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars,
+ buf_end - 16);
+ if (rv) {
+ return rv;
+ }
+ buf_end = (const u8 *)((size_t)buf_end & ~((size_t)0xf));
+
+ const u8 *last_block = buf + 16;
+ while (buf_end > last_block) {
+ buf_end -= 16;
+ m128 lchars = load128(buf_end);
+ rv = revBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, lchars,
+ buf_end);
+ if (rv) {
+ return rv;
+ }
+ }
+
+ // Use an unaligned load to mop up the last 16 bytes and get an accurate
+ // picture to buf_end.
+ chars = loadu128(buf);
+ rv = revBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, buf);
+ if (rv) {
+ return rv;
+ }
+
+ return buf - 1;
+}
+
#elif !defined(HAVE_AVX512)
-
+
// AVX2
static really_inline
diff --git a/contrib/libs/hyperscan/src/nfa/truffle.h b/contrib/libs/hyperscan/src/nfa/truffle.h
index 327061adcd..f67227ad1e 100644
--- a/contrib/libs/hyperscan/src/nfa/truffle.h
+++ b/contrib/libs/hyperscan/src/nfa/truffle.h
@@ -1,57 +1,57 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/** \file
* \brief Truffle: fully general character class acceleration.
*
* Utilises the SSSE3 pshufb or AVX2 vpshufb shuffle instructions
*/
-#ifndef TRUFFLE_H
-#define TRUFFLE_H
-
-#include "util/simd_types.h"
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-const u8 *truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
- const u8 *buf, const u8 *buf_end);
-
-const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
- const u8 *buf, const u8 *buf_end);
-
-#ifdef __cplusplus
-}
-#endif
-
-
-#endif /* TRUFFLE_H */
-
+#ifndef TRUFFLE_H
+#define TRUFFLE_H
+
+#include "util/simd_types.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+const u8 *truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
+ const u8 *buf, const u8 *buf_end);
+
+const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset,
+ const u8 *buf, const u8 *buf_end);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* TRUFFLE_H */
+
diff --git a/contrib/libs/hyperscan/src/nfa/trufflecompile.cpp b/contrib/libs/hyperscan/src/nfa/trufflecompile.cpp
index 6cca946c08..f19de0ee04 100644
--- a/contrib/libs/hyperscan/src/nfa/trufflecompile.cpp
+++ b/contrib/libs/hyperscan/src/nfa/trufflecompile.cpp
@@ -1,96 +1,96 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Truffle compiler
- *
- * truffle is always able to represent an entire character class, providing a
- * backstop to other acceleration engines.
- */
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
-#include "trufflecompile.h"
+/** \file
+ * \brief Truffle compiler
+ *
+ * truffle is always able to represent an entire character class, providing a
+ * backstop to other acceleration engines.
+ */
-#include "ue2common.h"
-#include "util/charreach.h"
+#include "trufflecompile.h"
+
+#include "ue2common.h"
+#include "util/charreach.h"
#include "util/dump_mask.h"
-#include "util/simd_types.h"
-
+#include "util/simd_types.h"
+
#include <cstring>
-using namespace std;
-
-namespace ue2 {
-
-/*
- * To represent an entire charclass (256 chars), truffle uses two 128 bit
- * masks - the first is for chars that do not have the high bit/bit 7 set,
- * i.e. chars {0..127}. The second mask is for chars with bit 7 set.
- *
- * Each char to be represented is split into the low nibble (bits {0..3}) and
- * bits {4,5,6} - the low nibble is the offset into the mask and the value of
- * bits 456 is the bit that is set at that offset.
- */
-
+using namespace std;
+
+namespace ue2 {
+
+/*
+ * To represent an entire charclass (256 chars), truffle uses two 128 bit
+ * masks - the first is for chars that do not have the high bit/bit 7 set,
+ * i.e. chars {0..127}. The second mask is for chars with bit 7 set.
+ *
+ * Each char to be represented is split into the low nibble (bits {0..3}) and
+ * bits {4,5,6} - the low nibble is the offset into the mask and the value of
+ * bits 456 is the bit that is set at that offset.
+ */
+
void truffleBuildMasks(const CharReach &cr, u8 *shuf_mask_lo_highclear,
u8 *shuf_mask_lo_highset) {
memset(shuf_mask_lo_highset, 0, sizeof(m128));
memset(shuf_mask_lo_highclear, 0, sizeof(m128));
-
- for (size_t v = cr.find_first(); v != CharReach::npos;
- v = cr.find_next(v)) {
- DEBUG_PRINTF("adding 0x%02x to %s\n", (u8)v, (v & 0x80) ? "highset" : "highclear");
+
+ for (size_t v = cr.find_first(); v != CharReach::npos;
+ v = cr.find_next(v)) {
+ DEBUG_PRINTF("adding 0x%02x to %s\n", (u8)v, (v & 0x80) ? "highset" : "highclear");
u8 *change_mask = (v & 0x80) ? shuf_mask_lo_highset : shuf_mask_lo_highclear;
- u8 low_nibble = v & 0xf;
- u8 bits_456 = (v & 0x70) >> 4;
- change_mask[low_nibble] |= 1 << bits_456;
- }
-}
-
-/*
- * Reconstruct the charclass that the truffle masks represent
- */
+ u8 low_nibble = v & 0xf;
+ u8 bits_456 = (v & 0x70) >> 4;
+ change_mask[low_nibble] |= 1 << bits_456;
+ }
+}
+
+/*
+ * Reconstruct the charclass that the truffle masks represent
+ */
CharReach truffle2cr(const u8 *highclear, const u8 *highset) {
- CharReach cr;
- for (u8 i = 0; i < 16; i++) {
+ CharReach cr;
+ for (u8 i = 0; i < 16; i++) {
u32 bits_456 = highclear[i];
- while (bits_456) {
- u32 pos = findAndClearLSB_32(&bits_456);
- assert(pos < 8);
- cr.set(pos << 4 | i);
- }
+ while (bits_456) {
+ u32 pos = findAndClearLSB_32(&bits_456);
+ assert(pos < 8);
+ cr.set(pos << 4 | i);
+ }
bits_456 = highset[i];
- while (bits_456) {
- u32 pos = findAndClearLSB_32(&bits_456);
- assert(pos < 8);
- cr.set(0x80 | pos << 4 | i);
- }
- }
- return cr;
-}
-
-} // namespc
+ while (bits_456) {
+ u32 pos = findAndClearLSB_32(&bits_456);
+ assert(pos < 8);
+ cr.set(0x80 | pos << 4 | i);
+ }
+ }
+ return cr;
+}
+
+} // namespc
diff --git a/contrib/libs/hyperscan/src/nfa/trufflecompile.h b/contrib/libs/hyperscan/src/nfa/trufflecompile.h
index c58da03f63..14b314f391 100644
--- a/contrib/libs/hyperscan/src/nfa/trufflecompile.h
+++ b/contrib/libs/hyperscan/src/nfa/trufflecompile.h
@@ -1,43 +1,43 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TRUFFLECOMPILE_H
-#define TRUFFLECOMPILE_H
-
-#include "ue2common.h"
-#include "util/charreach.h"
-
-namespace ue2 {
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TRUFFLECOMPILE_H
+#define TRUFFLECOMPILE_H
+
+#include "ue2common.h"
+#include "util/charreach.h"
+
+namespace ue2 {
+
void truffleBuildMasks(const CharReach &cr, u8 *mask1, u8 *mask2);
CharReach truffle2cr(const u8 *lo_in, const u8 *hi_in);
-
-}
-
-#endif /* TRUFFLECOMPILE_H */
-
+
+}
+
+#endif /* TRUFFLECOMPILE_H */
+
diff --git a/contrib/libs/hyperscan/src/nfa/vermicelli.h b/contrib/libs/hyperscan/src/nfa/vermicelli.h
index 358add7c26..ed797d83f9 100644
--- a/contrib/libs/hyperscan/src/nfa/vermicelli.h
+++ b/contrib/libs/hyperscan/src/nfa/vermicelli.h
@@ -1,54 +1,54 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Vermicelli: single-byte and double-byte acceleration.
- */
-
-#ifndef VERMICELLI_H
-#define VERMICELLI_H
-
-#include "util/bitutils.h"
-#include "util/simd_utils.h"
-#include "util/unaligned.h"
-
-#include "vermicelli_sse.h"
-
-static really_inline
-const u8 *vermicelliExec(char c, char nocase, const u8 *buf,
- const u8 *buf_end) {
- DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n",
- nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
- assert(buf < buf_end);
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Vermicelli: single-byte and double-byte acceleration.
+ */
+
+#ifndef VERMICELLI_H
+#define VERMICELLI_H
+
+#include "util/bitutils.h"
+#include "util/simd_utils.h"
+#include "util/unaligned.h"
+
+#include "vermicelli_sse.h"
+
+static really_inline
+const u8 *vermicelliExec(char c, char nocase, const u8 *buf,
+ const u8 *buf_end) {
+ DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n",
+ nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
+ assert(buf < buf_end);
+
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
- // Handle small scans.
+ // Handle small scans.
#ifdef HAVE_AVX512
if (buf_end - buf <= VERM_BOUNDARY) {
const u8 *ptr = nocase
@@ -60,61 +60,61 @@ const u8 *vermicelliExec(char c, char nocase, const u8 *buf,
return buf_end;
}
#else
- if (buf_end - buf < VERM_BOUNDARY) {
- for (; buf < buf_end; buf++) {
- char cur = (char)*buf;
- if (nocase) {
- cur &= CASE_CLEAR;
- }
- if (cur == c) {
- break;
- }
- }
- return buf;
- }
+ if (buf_end - buf < VERM_BOUNDARY) {
+ for (; buf < buf_end; buf++) {
+ char cur = (char)*buf;
+ if (nocase) {
+ cur &= CASE_CLEAR;
+ }
+ if (cur == c) {
+ break;
+ }
+ }
+ return buf;
+ }
#endif
-
- uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
- if (min) {
- // Input isn't aligned, so we need to run one iteration with an
- // unaligned load, then skip buf forward to the next aligned address.
- // There's some small overlap here, but we don't mind scanning it twice
- // if we can do it quickly, do we?
- const u8 *ptr = nocase ? vermUnalignNocase(chars, buf, 0)
- : vermUnalign(chars, buf, 0);
- if (ptr) {
- return ptr;
- }
-
- buf += VERM_BOUNDARY - min;
+
+ uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
+ if (min) {
+ // Input isn't aligned, so we need to run one iteration with an
+ // unaligned load, then skip buf forward to the next aligned address.
+ // There's some small overlap here, but we don't mind scanning it twice
+ // if we can do it quickly, do we?
+ const u8 *ptr = nocase ? vermUnalignNocase(chars, buf, 0)
+ : vermUnalign(chars, buf, 0);
+ if (ptr) {
+ return ptr;
+ }
+
+ buf += VERM_BOUNDARY - min;
assert(buf < buf_end);
- }
-
- // Aligned loops from here on in
- const u8 *ptr = nocase ? vermSearchAlignedNocase(chars, buf, buf_end - 1, 0)
- : vermSearchAligned(chars, buf, buf_end - 1, 0);
- if (ptr) {
- return ptr;
- }
-
- // Tidy up the mess at the end
- ptr = nocase ? vermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 0)
- : vermUnalign(chars, buf_end - VERM_BOUNDARY, 0);
- return ptr ? ptr : buf_end;
-}
-
-/* like vermicelliExec except returns the address of the first character which
- * is not c */
-static really_inline
-const u8 *nvermicelliExec(char c, char nocase, const u8 *buf,
- const u8 *buf_end) {
- DEBUG_PRINTF("nverm scan %s\\x%02hhx over %zu bytes\n",
- nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
- assert(buf < buf_end);
-
+ }
+
+ // Aligned loops from here on in
+ const u8 *ptr = nocase ? vermSearchAlignedNocase(chars, buf, buf_end - 1, 0)
+ : vermSearchAligned(chars, buf, buf_end - 1, 0);
+ if (ptr) {
+ return ptr;
+ }
+
+ // Tidy up the mess at the end
+ ptr = nocase ? vermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 0)
+ : vermUnalign(chars, buf_end - VERM_BOUNDARY, 0);
+ return ptr ? ptr : buf_end;
+}
+
+/* like vermicelliExec except returns the address of the first character which
+ * is not c */
+static really_inline
+const u8 *nvermicelliExec(char c, char nocase, const u8 *buf,
+ const u8 *buf_end) {
+ DEBUG_PRINTF("nverm scan %s\\x%02hhx over %zu bytes\n",
+ nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
+ assert(buf < buf_end);
+
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
- // Handle small scans.
+ // Handle small scans.
#ifdef HAVE_AVX512
if (buf_end - buf <= VERM_BOUNDARY) {
const u8 *ptr = nocase
@@ -126,59 +126,59 @@ const u8 *nvermicelliExec(char c, char nocase, const u8 *buf,
return buf_end;
}
#else
- if (buf_end - buf < VERM_BOUNDARY) {
- for (; buf < buf_end; buf++) {
- char cur = (char)*buf;
- if (nocase) {
- cur &= CASE_CLEAR;
- }
- if (cur != c) {
- break;
- }
- }
- return buf;
- }
+ if (buf_end - buf < VERM_BOUNDARY) {
+ for (; buf < buf_end; buf++) {
+ char cur = (char)*buf;
+ if (nocase) {
+ cur &= CASE_CLEAR;
+ }
+ if (cur != c) {
+ break;
+ }
+ }
+ return buf;
+ }
#endif
-
- size_t min = (size_t)buf % VERM_BOUNDARY;
- if (min) {
- // Input isn't aligned, so we need to run one iteration with an
- // unaligned load, then skip buf forward to the next aligned address.
- // There's some small overlap here, but we don't mind scanning it twice
- // if we can do it quickly, do we?
- const u8 *ptr = nocase ? vermUnalignNocase(chars, buf, 1)
- : vermUnalign(chars, buf, 1);
- if (ptr) {
- return ptr;
- }
-
- buf += VERM_BOUNDARY - min;
+
+ size_t min = (size_t)buf % VERM_BOUNDARY;
+ if (min) {
+ // Input isn't aligned, so we need to run one iteration with an
+ // unaligned load, then skip buf forward to the next aligned address.
+ // There's some small overlap here, but we don't mind scanning it twice
+ // if we can do it quickly, do we?
+ const u8 *ptr = nocase ? vermUnalignNocase(chars, buf, 1)
+ : vermUnalign(chars, buf, 1);
+ if (ptr) {
+ return ptr;
+ }
+
+ buf += VERM_BOUNDARY - min;
assert(buf < buf_end);
- }
-
- // Aligned loops from here on in
- const u8 *ptr = nocase ? vermSearchAlignedNocase(chars, buf, buf_end - 1, 1)
- : vermSearchAligned(chars, buf, buf_end - 1, 1);
- if (ptr) {
- return ptr;
- }
-
- // Tidy up the mess at the end
- ptr = nocase ? vermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 1)
- : vermUnalign(chars, buf_end - VERM_BOUNDARY, 1);
- return ptr ? ptr : buf_end;
-}
-
-static really_inline
-const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
- const u8 *buf_end) {
- DEBUG_PRINTF("double verm scan %s\\x%02hhx%02hhx over %zu bytes\n",
- nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf));
- assert(buf < buf_end);
-
- VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */
- VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */
-
+ }
+
+ // Aligned loops from here on in
+ const u8 *ptr = nocase ? vermSearchAlignedNocase(chars, buf, buf_end - 1, 1)
+ : vermSearchAligned(chars, buf, buf_end - 1, 1);
+ if (ptr) {
+ return ptr;
+ }
+
+ // Tidy up the mess at the end
+ ptr = nocase ? vermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 1)
+ : vermUnalign(chars, buf_end - VERM_BOUNDARY, 1);
+ return ptr ? ptr : buf_end;
+}
+
+static really_inline
+const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
+ const u8 *buf_end) {
+ DEBUG_PRINTF("double verm scan %s\\x%02hhx%02hhx over %zu bytes\n",
+ nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf));
+ assert(buf < buf_end);
+
+ VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */
+ VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */
+
#ifdef HAVE_AVX512
if (buf_end - buf <= VERM_BOUNDARY) {
const u8 *ptr = nocase
@@ -201,19 +201,19 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
assert((buf_end - buf) >= VERM_BOUNDARY);
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
- if (min) {
- // Input isn't aligned, so we need to run one iteration with an
- // unaligned load, then skip buf forward to the next aligned address.
- // There's some small overlap here, but we don't mind scanning it twice
- // if we can do it quickly, do we?
- const u8 *ptr = nocase
- ? dvermPreconditionNocase(chars1, chars2, buf)
- : dvermPrecondition(chars1, chars2, buf);
- if (ptr) {
- return ptr;
- }
-
- buf += VERM_BOUNDARY - min;
+ if (min) {
+ // Input isn't aligned, so we need to run one iteration with an
+ // unaligned load, then skip buf forward to the next aligned address.
+ // There's some small overlap here, but we don't mind scanning it twice
+ // if we can do it quickly, do we?
+ const u8 *ptr = nocase
+ ? dvermPreconditionNocase(chars1, chars2, buf)
+ : dvermPrecondition(chars1, chars2, buf);
+ if (ptr) {
+ return ptr;
+ }
+
+ buf += VERM_BOUNDARY - min;
assert(buf < buf_end);
}
@@ -285,18 +285,18 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2,
const u8 *p = dvermPreconditionMasked(chars1, chars2, mask1, mask2, buf);
if (p) {
return p;
- }
+ }
buf += VERM_BOUNDARY - min;
assert(buf < buf_end);
- }
-
- // Aligned loops from here on in
+ }
+
+ // Aligned loops from here on in
const u8 *ptr = dvermSearchAlignedMasked(chars1, chars2, mask1, mask2, c1,
c2, m1, m2, buf, buf_end);
if (ptr) {
return ptr;
- }
+ }
// Tidy up the mess at the end
ptr = dvermPreconditionMasked(chars1, chars2, mask1, mask2,
@@ -313,20 +313,20 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2,
}
return buf_end;
-}
-
-// Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if
-// character not found.
-static really_inline
-const u8 *rvermicelliExec(char c, char nocase, const u8 *buf,
- const u8 *buf_end) {
- DEBUG_PRINTF("rev verm scan %s\\x%02hhx over %zu bytes\n",
- nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
- assert(buf < buf_end);
-
+}
+
+// Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if
+// character not found.
+static really_inline
+const u8 *rvermicelliExec(char c, char nocase, const u8 *buf,
+ const u8 *buf_end) {
+ DEBUG_PRINTF("rev verm scan %s\\x%02hhx over %zu bytes\n",
+ nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
+ assert(buf < buf_end);
+
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
- // Handle small scans.
+ // Handle small scans.
#ifdef HAVE_AVX512
if (buf_end - buf <= VERM_BOUNDARY) {
const u8 *ptr = nocase
@@ -338,26 +338,26 @@ const u8 *rvermicelliExec(char c, char nocase, const u8 *buf,
return buf - 1;
}
#else
- if (buf_end - buf < VERM_BOUNDARY) {
- for (buf_end--; buf_end >= buf; buf_end--) {
- char cur = (char)*buf_end;
- if (nocase) {
- cur &= CASE_CLEAR;
- }
- if (cur == c) {
- break;
- }
- }
- return buf_end;
- }
+ if (buf_end - buf < VERM_BOUNDARY) {
+ for (buf_end--; buf_end >= buf; buf_end--) {
+ char cur = (char)*buf_end;
+ if (nocase) {
+ cur &= CASE_CLEAR;
+ }
+ if (cur == c) {
+ break;
+ }
+ }
+ return buf_end;
+ }
#endif
-
- size_t min = (size_t)buf_end % VERM_BOUNDARY;
- if (min) {
- // Input isn't aligned, so we need to run one iteration with an
- // unaligned load, then skip buf backward to the next aligned address.
- // There's some small overlap here, but we don't mind scanning it twice
- // if we can do it quickly, do we?
+
+ size_t min = (size_t)buf_end % VERM_BOUNDARY;
+ if (min) {
+ // Input isn't aligned, so we need to run one iteration with an
+ // unaligned load, then skip buf backward to the next aligned address.
+ // There's some small overlap here, but we don't mind scanning it twice
+ // if we can do it quickly, do we?
const u8 *ptr = nocase ? rvermUnalignNocase(chars,
buf_end - VERM_BOUNDARY,
0)
@@ -366,39 +366,39 @@ const u8 *rvermicelliExec(char c, char nocase, const u8 *buf,
if (ptr) {
return ptr;
- }
-
- buf_end -= min;
- if (buf >= buf_end) {
- return buf_end;
- }
- }
-
- // Aligned loops from here on in.
- const u8 *ptr = nocase ? rvermSearchAlignedNocase(chars, buf, buf_end, 0)
- : rvermSearchAligned(chars, buf, buf_end, 0);
- if (ptr) {
- return ptr;
- }
-
- // Tidy up the mess at the end, return buf - 1 if not found.
- ptr = nocase ? rvermUnalignNocase(chars, buf, 0)
- : rvermUnalign(chars, buf, 0);
- return ptr ? ptr : buf - 1;
-}
-
-/* like rvermicelliExec except returns the address of the last character which
- * is not c */
-static really_inline
-const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf,
- const u8 *buf_end) {
- DEBUG_PRINTF("rev verm scan %s\\x%02hhx over %zu bytes\n",
- nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
- assert(buf < buf_end);
-
+ }
+
+ buf_end -= min;
+ if (buf >= buf_end) {
+ return buf_end;
+ }
+ }
+
+ // Aligned loops from here on in.
+ const u8 *ptr = nocase ? rvermSearchAlignedNocase(chars, buf, buf_end, 0)
+ : rvermSearchAligned(chars, buf, buf_end, 0);
+ if (ptr) {
+ return ptr;
+ }
+
+ // Tidy up the mess at the end, return buf - 1 if not found.
+ ptr = nocase ? rvermUnalignNocase(chars, buf, 0)
+ : rvermUnalign(chars, buf, 0);
+ return ptr ? ptr : buf - 1;
+}
+
+/* like rvermicelliExec except returns the address of the last character which
+ * is not c */
+static really_inline
+const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf,
+ const u8 *buf_end) {
+ DEBUG_PRINTF("rev verm scan %s\\x%02hhx over %zu bytes\n",
+ nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
+ assert(buf < buf_end);
+
VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
- // Handle small scans.
+ // Handle small scans.
#ifdef HAVE_AVX512
if (buf_end - buf <= VERM_BOUNDARY) {
const u8 *ptr = nocase
@@ -410,26 +410,26 @@ const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf,
return buf - 1;
}
#else
- if (buf_end - buf < VERM_BOUNDARY) {
- for (buf_end--; buf_end >= buf; buf_end--) {
- char cur = (char)*buf_end;
- if (nocase) {
- cur &= CASE_CLEAR;
- }
- if (cur != c) {
- break;
- }
- }
- return buf_end;
- }
+ if (buf_end - buf < VERM_BOUNDARY) {
+ for (buf_end--; buf_end >= buf; buf_end--) {
+ char cur = (char)*buf_end;
+ if (nocase) {
+ cur &= CASE_CLEAR;
+ }
+ if (cur != c) {
+ break;
+ }
+ }
+ return buf_end;
+ }
#endif
-
- size_t min = (size_t)buf_end % VERM_BOUNDARY;
- if (min) {
- // Input isn't aligned, so we need to run one iteration with an
- // unaligned load, then skip buf backward to the next aligned address.
- // There's some small overlap here, but we don't mind scanning it twice
- // if we can do it quickly, do we?
+
+ size_t min = (size_t)buf_end % VERM_BOUNDARY;
+ if (min) {
+ // Input isn't aligned, so we need to run one iteration with an
+ // unaligned load, then skip buf backward to the next aligned address.
+ // There's some small overlap here, but we don't mind scanning it twice
+ // if we can do it quickly, do we?
const u8 *ptr = nocase ? rvermUnalignNocase(chars,
buf_end - VERM_BOUNDARY,
1)
@@ -438,38 +438,38 @@ const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf,
if (ptr) {
return ptr;
- }
-
- buf_end -= min;
- if (buf >= buf_end) {
- return buf_end;
- }
- }
-
- // Aligned loops from here on in.
- const u8 *ptr = nocase ? rvermSearchAlignedNocase(chars, buf, buf_end, 1)
- : rvermSearchAligned(chars, buf, buf_end, 1);
- if (ptr) {
- return ptr;
- }
-
- // Tidy up the mess at the end, return buf - 1 if not found.
- ptr = nocase ? rvermUnalignNocase(chars, buf, 1)
- : rvermUnalign(chars, buf, 1);
- return ptr ? ptr : buf - 1;
-}
-
-/* returns highest offset of c2 (NOTE: not c1) */
-static really_inline
-const u8 *rvermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
- const u8 *buf_end) {
- DEBUG_PRINTF("rev double verm scan %s\\x%02hhx%02hhx over %zu bytes\n",
- nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf));
- assert(buf < buf_end);
-
- VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */
- VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */
-
+ }
+
+ buf_end -= min;
+ if (buf >= buf_end) {
+ return buf_end;
+ }
+ }
+
+ // Aligned loops from here on in.
+ const u8 *ptr = nocase ? rvermSearchAlignedNocase(chars, buf, buf_end, 1)
+ : rvermSearchAligned(chars, buf, buf_end, 1);
+ if (ptr) {
+ return ptr;
+ }
+
+ // Tidy up the mess at the end, return buf - 1 if not found.
+ ptr = nocase ? rvermUnalignNocase(chars, buf, 1)
+ : rvermUnalign(chars, buf, 1);
+ return ptr ? ptr : buf - 1;
+}
+
+/* returns highest offset of c2 (NOTE: not c1) */
+static really_inline
+const u8 *rvermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
+ const u8 *buf_end) {
+ DEBUG_PRINTF("rev double verm scan %s\\x%02hhx%02hhx over %zu bytes\n",
+ nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf));
+ assert(buf < buf_end);
+
+ VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */
+ VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */
+
#ifdef HAVE_AVX512
if (buf_end - buf <= VERM_BOUNDARY) {
const u8 *ptr = nocase
@@ -487,32 +487,32 @@ const u8 *rvermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
assert((buf_end - buf) >= VERM_BOUNDARY);
size_t min = (size_t)buf_end % VERM_BOUNDARY;
- if (min) {
- // input not aligned, so we need to run one iteration with an unaligned
- // load, then skip buf forward to the next aligned address. There's
- // some small overlap here, but we don't mind scanning it twice if we
- // can do it quickly, do we?
+ if (min) {
+ // input not aligned, so we need to run one iteration with an unaligned
+ // load, then skip buf forward to the next aligned address. There's
+ // some small overlap here, but we don't mind scanning it twice if we
+ // can do it quickly, do we?
const u8 *ptr = nocase ? rdvermPreconditionNocase(chars1, chars2,
buf_end - VERM_BOUNDARY)
: rdvermPrecondition(chars1, chars2,
buf_end - VERM_BOUNDARY);
-
- if (ptr) {
- return ptr;
- }
-
- buf_end -= min;
- if (buf >= buf_end) {
- return buf_end;
- }
- }
-
- // Aligned loops from here on in
- if (nocase) {
- return rdvermSearchAlignedNocase(chars1, chars2, c1, c2, buf, buf_end);
- } else {
- return rdvermSearchAligned(chars1, chars2, c1, c2, buf, buf_end);
- }
-}
-
-#endif /* VERMICELLI_H */
+
+ if (ptr) {
+ return ptr;
+ }
+
+ buf_end -= min;
+ if (buf >= buf_end) {
+ return buf_end;
+ }
+ }
+
+ // Aligned loops from here on in
+ if (nocase) {
+ return rdvermSearchAlignedNocase(chars1, chars2, c1, c2, buf, buf_end);
+ } else {
+ return rdvermSearchAligned(chars1, chars2, c1, c2, buf, buf_end);
+ }
+}
+
+#endif /* VERMICELLI_H */
diff --git a/contrib/libs/hyperscan/src/nfa/vermicelli_run.h b/contrib/libs/hyperscan/src/nfa/vermicelli_run.h
index 4459461ebf..d6fe7ec78f 100644
--- a/contrib/libs/hyperscan/src/nfa/vermicelli_run.h
+++ b/contrib/libs/hyperscan/src/nfa/vermicelli_run.h
@@ -1,90 +1,90 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "vermicelli.h"
-
-static really_inline
-const u8 *find_xverm_run(char c, char nocase, u32 repeat, UNUSED const u8 *buf,
- const u8 *buf_start, const u8 *buf_end, char negate) {
- DEBUG_PRINTF("looking for 0x%hhx{%u} in %p [%zd, %zd)\n", c, repeat, buf,
- buf_start - buf, buf_end - buf);
-
- /* TODO optimise on where it is easy to get a dense bitfield of character
- * matches */
- if (repeat == 1) {
- return negate ? nvermicelliExec(c, nocase, buf_start, buf_end)
- : vermicelliExec(c, nocase, buf_start, buf_end);
- }
-
- while (1) {
- const u8 *s;
- if (negate) {
- s = nvermicelliExec(c, nocase, buf_start, buf_end);
- } else if (buf_end - buf_start >= VERM_BOUNDARY && !nocase) {
- s = vermicelliDoubleExec(c, c, nocase, buf_start, buf_end);
-
- if (s != buf_end && *s != c) { /* double verm is not certain to be
- * precise */
- s = vermicelliExec(c, nocase, s, buf_end);
- }
- } else {
- s = vermicelliExec(c, nocase, buf_start, buf_end);
- }
- if (s == buf_end) {
- return s;
- }
-
- DEBUG_PRINTF("cand %zd\n", s - buf);
-
- const u8 *test_e = MIN(s + repeat, buf_end);
-
- const u8 *rv = negate ? vermicelliExec(c, nocase, s, test_e)
- : nvermicelliExec(c, nocase, s, test_e);
-
- assert(rv > buf_start);
- assert(rv <= buf_end);
-
- if (rv == test_e) {
- return s;
- }
-
- buf_start = rv;
- }
-}
-
-static really_inline
-const u8 *find_verm_run(char c, char nocase, u32 repeat, const u8 *buf,
- const u8 *buf_start, const u8 *buf_end) {
- return find_xverm_run(c, nocase, repeat, buf, buf_start, buf_end, 0);
-}
-
-static really_inline
-const u8 *find_nverm_run(char c, char nocase, u32 repeat, const u8 *buf,
- const u8 *buf_start, const u8 *buf_end) {
- return find_xverm_run(c, nocase, repeat, buf, buf_start, buf_end, 1);
-}
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "vermicelli.h"
+
+static really_inline
+const u8 *find_xverm_run(char c, char nocase, u32 repeat, UNUSED const u8 *buf,
+ const u8 *buf_start, const u8 *buf_end, char negate) {
+ DEBUG_PRINTF("looking for 0x%hhx{%u} in %p [%zd, %zd)\n", c, repeat, buf,
+ buf_start - buf, buf_end - buf);
+
+ /* TODO optimise on where it is easy to get a dense bitfield of character
+ * matches */
+ if (repeat == 1) {
+ return negate ? nvermicelliExec(c, nocase, buf_start, buf_end)
+ : vermicelliExec(c, nocase, buf_start, buf_end);
+ }
+
+ while (1) {
+ const u8 *s;
+ if (negate) {
+ s = nvermicelliExec(c, nocase, buf_start, buf_end);
+ } else if (buf_end - buf_start >= VERM_BOUNDARY && !nocase) {
+ s = vermicelliDoubleExec(c, c, nocase, buf_start, buf_end);
+
+ if (s != buf_end && *s != c) { /* double verm is not certain to be
+ * precise */
+ s = vermicelliExec(c, nocase, s, buf_end);
+ }
+ } else {
+ s = vermicelliExec(c, nocase, buf_start, buf_end);
+ }
+ if (s == buf_end) {
+ return s;
+ }
+
+ DEBUG_PRINTF("cand %zd\n", s - buf);
+
+ const u8 *test_e = MIN(s + repeat, buf_end);
+
+ const u8 *rv = negate ? vermicelliExec(c, nocase, s, test_e)
+ : nvermicelliExec(c, nocase, s, test_e);
+
+ assert(rv > buf_start);
+ assert(rv <= buf_end);
+
+ if (rv == test_e) {
+ return s;
+ }
+
+ buf_start = rv;
+ }
+}
+
+static really_inline
+const u8 *find_verm_run(char c, char nocase, u32 repeat, const u8 *buf,
+ const u8 *buf_start, const u8 *buf_end) {
+ return find_xverm_run(c, nocase, repeat, buf, buf_start, buf_end, 0);
+}
+
+static really_inline
+const u8 *find_nverm_run(char c, char nocase, u32 repeat, const u8 *buf,
+ const u8 *buf_start, const u8 *buf_end) {
+ return find_xverm_run(c, nocase, repeat, buf, buf_start, buf_end, 1);
+}
diff --git a/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h b/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h
index e58023f586..3307486cff 100644
--- a/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h
+++ b/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h
@@ -1,181 +1,181 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Vermicelli: Intel SSE implementation.
- *
- * (users should include vermicelli.h)
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Vermicelli: Intel SSE implementation.
+ *
+ * (users should include vermicelli.h)
+ */
+
#if !defined(HAVE_AVX512)
-#define VERM_BOUNDARY 16
-#define VERM_TYPE m128
-#define VERM_SET_FN set16x8
-
-static really_inline
-const u8 *vermSearchAligned(m128 chars, const u8 *buf, const u8 *buf_end,
- char negate) {
- assert((size_t)buf % 16 == 0);
- for (; buf + 31 < buf_end; buf += 32) {
- m128 data = load128(buf);
- u32 z1 = movemask128(eq128(chars, data));
- m128 data2 = load128(buf + 16);
- u32 z2 = movemask128(eq128(chars, data2));
- u32 z = z1 | (z2 << 16);
- if (negate) {
- z = ~z;
- }
- if (unlikely(z)) {
- u32 pos = ctz32(z);
- return buf + pos;
- }
- }
- for (; buf + 15 < buf_end; buf += 16) {
- m128 data = load128(buf);
- u32 z = movemask128(eq128(chars, data));
- if (negate) {
- z = ~z & 0xffff;
- }
- if (unlikely(z)) {
- u32 pos = ctz32(z);
- return buf + pos;
- }
- }
- return NULL;
-}
-
-static really_inline
-const u8 *vermSearchAlignedNocase(m128 chars, const u8 *buf,
- const u8 *buf_end, char negate) {
- assert((size_t)buf % 16 == 0);
- m128 casemask = set16x8(CASE_CLEAR);
-
- for (; buf + 31 < buf_end; buf += 32) {
- m128 data = load128(buf);
- u32 z1 = movemask128(eq128(chars, and128(casemask, data)));
- m128 data2 = load128(buf + 16);
- u32 z2 = movemask128(eq128(chars, and128(casemask, data2)));
- u32 z = z1 | (z2 << 16);
- if (negate) {
- z = ~z;
- }
- if (unlikely(z)) {
- u32 pos = ctz32(z);
- return buf + pos;
- }
- }
-
- for (; buf + 15 < buf_end; buf += 16) {
- m128 data = load128(buf);
- u32 z = movemask128(eq128(chars, and128(casemask, data)));
- if (negate) {
- z = ~z & 0xffff;
- }
- if (unlikely(z)) {
- u32 pos = ctz32(z);
- return buf + pos;
- }
- }
- return NULL;
-}
-
-// returns NULL if not found
-static really_inline
-const u8 *vermUnalign(m128 chars, const u8 *buf, char negate) {
- m128 data = loadu128(buf); // unaligned
- u32 z = movemask128(eq128(chars, data));
- if (negate) {
- z = ~z & 0xffff;
- }
- if (unlikely(z)) {
- return buf + ctz32(z);
- }
- return NULL;
-}
-
-// returns NULL if not found
-static really_inline
-const u8 *vermUnalignNocase(m128 chars, const u8 *buf, char negate) {
- m128 casemask = set16x8(CASE_CLEAR);
- m128 data = loadu128(buf); // unaligned
- u32 z = movemask128(eq128(chars, and128(casemask, data)));
- if (negate) {
- z = ~z & 0xffff;
- }
- if (unlikely(z)) {
- return buf + ctz32(z);
- }
- return NULL;
-}
-
-static really_inline
-const u8 *dvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2,
- const u8 *buf, const u8 *buf_end) {
- for (; buf + 16 < buf_end; buf += 16) {
- m128 data = load128(buf);
- u32 z = movemask128(and128(eq128(chars1, data),
+#define VERM_BOUNDARY 16
+#define VERM_TYPE m128
+#define VERM_SET_FN set16x8
+
+static really_inline
+const u8 *vermSearchAligned(m128 chars, const u8 *buf, const u8 *buf_end,
+ char negate) {
+ assert((size_t)buf % 16 == 0);
+ for (; buf + 31 < buf_end; buf += 32) {
+ m128 data = load128(buf);
+ u32 z1 = movemask128(eq128(chars, data));
+ m128 data2 = load128(buf + 16);
+ u32 z2 = movemask128(eq128(chars, data2));
+ u32 z = z1 | (z2 << 16);
+ if (negate) {
+ z = ~z;
+ }
+ if (unlikely(z)) {
+ u32 pos = ctz32(z);
+ return buf + pos;
+ }
+ }
+ for (; buf + 15 < buf_end; buf += 16) {
+ m128 data = load128(buf);
+ u32 z = movemask128(eq128(chars, data));
+ if (negate) {
+ z = ~z & 0xffff;
+ }
+ if (unlikely(z)) {
+ u32 pos = ctz32(z);
+ return buf + pos;
+ }
+ }
+ return NULL;
+}
+
+static really_inline
+const u8 *vermSearchAlignedNocase(m128 chars, const u8 *buf,
+ const u8 *buf_end, char negate) {
+ assert((size_t)buf % 16 == 0);
+ m128 casemask = set16x8(CASE_CLEAR);
+
+ for (; buf + 31 < buf_end; buf += 32) {
+ m128 data = load128(buf);
+ u32 z1 = movemask128(eq128(chars, and128(casemask, data)));
+ m128 data2 = load128(buf + 16);
+ u32 z2 = movemask128(eq128(chars, and128(casemask, data2)));
+ u32 z = z1 | (z2 << 16);
+ if (negate) {
+ z = ~z;
+ }
+ if (unlikely(z)) {
+ u32 pos = ctz32(z);
+ return buf + pos;
+ }
+ }
+
+ for (; buf + 15 < buf_end; buf += 16) {
+ m128 data = load128(buf);
+ u32 z = movemask128(eq128(chars, and128(casemask, data)));
+ if (negate) {
+ z = ~z & 0xffff;
+ }
+ if (unlikely(z)) {
+ u32 pos = ctz32(z);
+ return buf + pos;
+ }
+ }
+ return NULL;
+}
+
+// returns NULL if not found
+static really_inline
+const u8 *vermUnalign(m128 chars, const u8 *buf, char negate) {
+ m128 data = loadu128(buf); // unaligned
+ u32 z = movemask128(eq128(chars, data));
+ if (negate) {
+ z = ~z & 0xffff;
+ }
+ if (unlikely(z)) {
+ return buf + ctz32(z);
+ }
+ return NULL;
+}
+
+// returns NULL if not found
+static really_inline
+const u8 *vermUnalignNocase(m128 chars, const u8 *buf, char negate) {
+ m128 casemask = set16x8(CASE_CLEAR);
+ m128 data = loadu128(buf); // unaligned
+ u32 z = movemask128(eq128(chars, and128(casemask, data)));
+ if (negate) {
+ z = ~z & 0xffff;
+ }
+ if (unlikely(z)) {
+ return buf + ctz32(z);
+ }
+ return NULL;
+}
+
+static really_inline
+const u8 *dvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2,
+ const u8 *buf, const u8 *buf_end) {
+ for (; buf + 16 < buf_end; buf += 16) {
+ m128 data = load128(buf);
+ u32 z = movemask128(and128(eq128(chars1, data),
rshiftbyte_m128(eq128(chars2, data), 1)));
- if (buf[15] == c1 && buf[16] == c2) {
- z |= (1 << 15);
- }
- if (unlikely(z)) {
- u32 pos = ctz32(z);
- return buf + pos;
- }
- }
-
- return NULL;
-}
-
-static really_inline
-const u8 *dvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2,
- const u8 *buf, const u8 *buf_end) {
- assert((size_t)buf % 16 == 0);
- m128 casemask = set16x8(CASE_CLEAR);
-
- for (; buf + 16 < buf_end; buf += 16) {
- m128 data = load128(buf);
- m128 v = and128(casemask, data);
- u32 z = movemask128(and128(eq128(chars1, v),
+ if (buf[15] == c1 && buf[16] == c2) {
+ z |= (1 << 15);
+ }
+ if (unlikely(z)) {
+ u32 pos = ctz32(z);
+ return buf + pos;
+ }
+ }
+
+ return NULL;
+}
+
+static really_inline
+const u8 *dvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2,
+ const u8 *buf, const u8 *buf_end) {
+ assert((size_t)buf % 16 == 0);
+ m128 casemask = set16x8(CASE_CLEAR);
+
+ for (; buf + 16 < buf_end; buf += 16) {
+ m128 data = load128(buf);
+ m128 v = and128(casemask, data);
+ u32 z = movemask128(and128(eq128(chars1, v),
rshiftbyte_m128(eq128(chars2, v), 1)));
- if ((buf[15] & CASE_CLEAR) == c1 && (buf[16] & CASE_CLEAR) == c2) {
- z |= (1 << 15);
- }
- if (unlikely(z)) {
- u32 pos = ctz32(z);
- return buf + pos;
- }
- }
+ if ((buf[15] & CASE_CLEAR) == c1 && (buf[16] & CASE_CLEAR) == c2) {
+ z |= (1 << 15);
+ }
+ if (unlikely(z)) {
+ u32 pos = ctz32(z);
+ return buf + pos;
+ }
+ }
return NULL;
-}
-
+}
+
static really_inline
const u8 *dvermSearchAlignedMasked(m128 chars1, m128 chars2,
m128 mask1, m128 mask2, u8 c1, u8 c2, u8 m1,
@@ -200,41 +200,41 @@ const u8 *dvermSearchAlignedMasked(m128 chars1, m128 chars2,
return NULL;
}
-// returns NULL if not found
-static really_inline
-const u8 *dvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) {
- m128 data = loadu128(buf); // unaligned
- u32 z = movemask128(and128(eq128(chars1, data),
+// returns NULL if not found
+static really_inline
+const u8 *dvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) {
+ m128 data = loadu128(buf); // unaligned
+ u32 z = movemask128(and128(eq128(chars1, data),
rshiftbyte_m128(eq128(chars2, data), 1)));
-
- /* no fixup of the boundary required - the aligned run will pick it up */
- if (unlikely(z)) {
- u32 pos = ctz32(z);
- return buf + pos;
- }
- return NULL;
-}
-
-// returns NULL if not found
-static really_inline
-const u8 *dvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) {
- /* due to laziness, nonalphas and nocase having interesting behaviour */
- m128 casemask = set16x8(CASE_CLEAR);
- m128 data = loadu128(buf); // unaligned
- m128 v = and128(casemask, data);
- u32 z = movemask128(and128(eq128(chars1, v),
+
+ /* no fixup of the boundary required - the aligned run will pick it up */
+ if (unlikely(z)) {
+ u32 pos = ctz32(z);
+ return buf + pos;
+ }
+ return NULL;
+}
+
+// returns NULL if not found
+static really_inline
+const u8 *dvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) {
+ /* due to laziness, nonalphas and nocase having interesting behaviour */
+ m128 casemask = set16x8(CASE_CLEAR);
+ m128 data = loadu128(buf); // unaligned
+ m128 v = and128(casemask, data);
+ u32 z = movemask128(and128(eq128(chars1, v),
rshiftbyte_m128(eq128(chars2, v), 1)));
-
- /* no fixup of the boundary required - the aligned run will pick it up */
- if (unlikely(z)) {
- u32 pos = ctz32(z);
- return buf + pos;
- }
- return NULL;
-}
-
+
+ /* no fixup of the boundary required - the aligned run will pick it up */
+ if (unlikely(z)) {
+ u32 pos = ctz32(z);
+ return buf + pos;
+ }
+ return NULL;
+}
+
// returns NULL if not found
-static really_inline
+static really_inline
const u8 *dvermPreconditionMasked(m128 chars1, m128 chars2,
m128 mask1, m128 mask2, const u8 *buf) {
m128 data = loadu128(buf); // unaligned
@@ -251,148 +251,148 @@ const u8 *dvermPreconditionMasked(m128 chars1, m128 chars2,
}
static really_inline
-const u8 *lastMatchOffset(const u8 *buf_end, u32 z) {
- assert(z);
- return buf_end - 16 + 31 - clz32(z);
-}
-
-static really_inline
-const u8 *rvermSearchAligned(m128 chars, const u8 *buf, const u8 *buf_end,
- char negate) {
- assert((size_t)buf_end % 16 == 0);
- for (; buf + 15 < buf_end; buf_end -= 16) {
- m128 data = load128(buf_end - 16);
- u32 z = movemask128(eq128(chars, data));
- if (negate) {
- z = ~z & 0xffff;
- }
- if (unlikely(z)) {
- return lastMatchOffset(buf_end, z);
- }
- }
- return NULL;
-}
-
-static really_inline
-const u8 *rvermSearchAlignedNocase(m128 chars, const u8 *buf,
- const u8 *buf_end, char negate) {
- assert((size_t)buf_end % 16 == 0);
- m128 casemask = set16x8(CASE_CLEAR);
-
- for (; buf + 15 < buf_end; buf_end -= 16) {
- m128 data = load128(buf_end - 16);
- u32 z = movemask128(eq128(chars, and128(casemask, data)));
- if (negate) {
- z = ~z & 0xffff;
- }
- if (unlikely(z)) {
- return lastMatchOffset(buf_end, z);
- }
- }
- return NULL;
-}
-
-// returns NULL if not found
-static really_inline
-const u8 *rvermUnalign(m128 chars, const u8 *buf, char negate) {
- m128 data = loadu128(buf); // unaligned
- u32 z = movemask128(eq128(chars, data));
- if (negate) {
- z = ~z & 0xffff;
- }
- if (unlikely(z)) {
- return lastMatchOffset(buf + 16, z);
- }
- return NULL;
-}
-
-// returns NULL if not found
-static really_inline
-const u8 *rvermUnalignNocase(m128 chars, const u8 *buf, char negate) {
- m128 casemask = set16x8(CASE_CLEAR);
- m128 data = loadu128(buf); // unaligned
- u32 z = movemask128(eq128(chars, and128(casemask, data)));
- if (negate) {
- z = ~z & 0xffff;
- }
- if (unlikely(z)) {
- return lastMatchOffset(buf + 16, z);
- }
- return NULL;
-}
-
-static really_inline
-const u8 *rdvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2,
- const u8 *buf, const u8 *buf_end) {
- assert((size_t)buf_end % 16 == 0);
-
- for (; buf + 16 < buf_end; buf_end -= 16) {
- m128 data = load128(buf_end - 16);
- u32 z = movemask128(and128(eq128(chars2, data),
+const u8 *lastMatchOffset(const u8 *buf_end, u32 z) {
+ assert(z);
+ return buf_end - 16 + 31 - clz32(z);
+}
+
+static really_inline
+const u8 *rvermSearchAligned(m128 chars, const u8 *buf, const u8 *buf_end,
+ char negate) {
+ assert((size_t)buf_end % 16 == 0);
+ for (; buf + 15 < buf_end; buf_end -= 16) {
+ m128 data = load128(buf_end - 16);
+ u32 z = movemask128(eq128(chars, data));
+ if (negate) {
+ z = ~z & 0xffff;
+ }
+ if (unlikely(z)) {
+ return lastMatchOffset(buf_end, z);
+ }
+ }
+ return NULL;
+}
+
+static really_inline
+const u8 *rvermSearchAlignedNocase(m128 chars, const u8 *buf,
+ const u8 *buf_end, char negate) {
+ assert((size_t)buf_end % 16 == 0);
+ m128 casemask = set16x8(CASE_CLEAR);
+
+ for (; buf + 15 < buf_end; buf_end -= 16) {
+ m128 data = load128(buf_end - 16);
+ u32 z = movemask128(eq128(chars, and128(casemask, data)));
+ if (negate) {
+ z = ~z & 0xffff;
+ }
+ if (unlikely(z)) {
+ return lastMatchOffset(buf_end, z);
+ }
+ }
+ return NULL;
+}
+
+// returns NULL if not found
+static really_inline
+const u8 *rvermUnalign(m128 chars, const u8 *buf, char negate) {
+ m128 data = loadu128(buf); // unaligned
+ u32 z = movemask128(eq128(chars, data));
+ if (negate) {
+ z = ~z & 0xffff;
+ }
+ if (unlikely(z)) {
+ return lastMatchOffset(buf + 16, z);
+ }
+ return NULL;
+}
+
+// returns NULL if not found
+static really_inline
+const u8 *rvermUnalignNocase(m128 chars, const u8 *buf, char negate) {
+ m128 casemask = set16x8(CASE_CLEAR);
+ m128 data = loadu128(buf); // unaligned
+ u32 z = movemask128(eq128(chars, and128(casemask, data)));
+ if (negate) {
+ z = ~z & 0xffff;
+ }
+ if (unlikely(z)) {
+ return lastMatchOffset(buf + 16, z);
+ }
+ return NULL;
+}
+
+static really_inline
+const u8 *rdvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2,
+ const u8 *buf, const u8 *buf_end) {
+ assert((size_t)buf_end % 16 == 0);
+
+ for (; buf + 16 < buf_end; buf_end -= 16) {
+ m128 data = load128(buf_end - 16);
+ u32 z = movemask128(and128(eq128(chars2, data),
lshiftbyte_m128(eq128(chars1, data), 1)));
- if (buf_end[-17] == c1 && buf_end[-16] == c2) {
- z |= 1;
- }
- if (unlikely(z)) {
- return lastMatchOffset(buf_end, z);
- }
- }
- return buf_end;
-}
-
-static really_inline
-const u8 *rdvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2,
- const u8 *buf, const u8 *buf_end) {
- assert((size_t)buf_end % 16 == 0);
- m128 casemask = set16x8(CASE_CLEAR);
-
- for (; buf + 16 < buf_end; buf_end -= 16) {
- m128 data = load128(buf_end - 16);
- m128 v = and128(casemask, data);
- u32 z = movemask128(and128(eq128(chars2, v),
+ if (buf_end[-17] == c1 && buf_end[-16] == c2) {
+ z |= 1;
+ }
+ if (unlikely(z)) {
+ return lastMatchOffset(buf_end, z);
+ }
+ }
+ return buf_end;
+}
+
+static really_inline
+const u8 *rdvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2,
+ const u8 *buf, const u8 *buf_end) {
+ assert((size_t)buf_end % 16 == 0);
+ m128 casemask = set16x8(CASE_CLEAR);
+
+ for (; buf + 16 < buf_end; buf_end -= 16) {
+ m128 data = load128(buf_end - 16);
+ m128 v = and128(casemask, data);
+ u32 z = movemask128(and128(eq128(chars2, v),
lshiftbyte_m128(eq128(chars1, v), 1)));
- if ((buf_end[-17] & CASE_CLEAR) == c1
- && (buf_end[-16] & CASE_CLEAR) == c2) {
- z |= 1;
- }
- if (unlikely(z)) {
- return lastMatchOffset(buf_end, z);
- }
- }
- return buf_end;
-}
-
-// returns NULL if not found
-static really_inline
-const u8 *rdvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) {
- m128 data = loadu128(buf);
- u32 z = movemask128(and128(eq128(chars2, data),
+ if ((buf_end[-17] & CASE_CLEAR) == c1
+ && (buf_end[-16] & CASE_CLEAR) == c2) {
+ z |= 1;
+ }
+ if (unlikely(z)) {
+ return lastMatchOffset(buf_end, z);
+ }
+ }
+ return buf_end;
+}
+
+// returns NULL if not found
+static really_inline
+const u8 *rdvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) {
+ m128 data = loadu128(buf);
+ u32 z = movemask128(and128(eq128(chars2, data),
lshiftbyte_m128(eq128(chars1, data), 1)));
-
- /* no fixup of the boundary required - the aligned run will pick it up */
- if (unlikely(z)) {
- return lastMatchOffset(buf + 16, z);
- }
-
- return NULL;
-}
-
-// returns NULL if not found
-static really_inline
-const u8 *rdvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) {
- /* due to laziness, nonalphas and nocase having interesting behaviour */
- m128 casemask = set16x8(CASE_CLEAR);
- m128 data = loadu128(buf);
- m128 v = and128(casemask, data);
- u32 z = movemask128(and128(eq128(chars2, v),
+
+ /* no fixup of the boundary required - the aligned run will pick it up */
+ if (unlikely(z)) {
+ return lastMatchOffset(buf + 16, z);
+ }
+
+ return NULL;
+}
+
+// returns NULL if not found
+static really_inline
+const u8 *rdvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) {
+ /* due to laziness, nonalphas and nocase having interesting behaviour */
+ m128 casemask = set16x8(CASE_CLEAR);
+ m128 data = loadu128(buf);
+ m128 v = and128(casemask, data);
+ u32 z = movemask128(and128(eq128(chars2, v),
lshiftbyte_m128(eq128(chars1, v), 1)));
- /* no fixup of the boundary required - the aligned run will pick it up */
- if (unlikely(z)) {
- return lastMatchOffset(buf + 16, z);
- }
-
- return NULL;
-}
+ /* no fixup of the boundary required - the aligned run will pick it up */
+ if (unlikely(z)) {
+ return lastMatchOffset(buf + 16, z);
+ }
+
+ return NULL;
+}
#else // HAVE_AVX512
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng.cpp b/contrib/libs/hyperscan/src/nfagraph/ng.cpp
index 6545d55ac0..8dccf9863d 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng.cpp
@@ -1,229 +1,229 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
* \brief NG and graph handling.
- */
+ */
#include "ng.h"
-#include "grey.h"
-#include "ng_anchored_acyclic.h"
-#include "ng_anchored_dots.h"
-#include "ng_asserts.h"
-#include "ng_calc_components.h"
-#include "ng_cyclic_redundancy.h"
-#include "ng_dump.h"
-#include "ng_edge_redundancy.h"
-#include "ng_equivalence.h"
-#include "ng_extparam.h"
-#include "ng_fixed_width.h"
+#include "grey.h"
+#include "ng_anchored_acyclic.h"
+#include "ng_anchored_dots.h"
+#include "ng_asserts.h"
+#include "ng_calc_components.h"
+#include "ng_cyclic_redundancy.h"
+#include "ng_dump.h"
+#include "ng_edge_redundancy.h"
+#include "ng_equivalence.h"
+#include "ng_extparam.h"
+#include "ng_fixed_width.h"
#include "ng_fuzzy.h"
-#include "ng_haig.h"
-#include "ng_literal_component.h"
-#include "ng_literal_decorated.h"
-#include "ng_misc_opt.h"
-#include "ng_puff.h"
-#include "ng_prefilter.h"
-#include "ng_prune.h"
-#include "ng_redundancy.h"
-#include "ng_region.h"
-#include "ng_region_redundancy.h"
-#include "ng_reports.h"
-#include "ng_sep.h"
-#include "ng_small_literal_set.h"
-#include "ng_som.h"
-#include "ng_vacuous.h"
+#include "ng_haig.h"
+#include "ng_literal_component.h"
+#include "ng_literal_decorated.h"
+#include "ng_misc_opt.h"
+#include "ng_puff.h"
+#include "ng_prefilter.h"
+#include "ng_prune.h"
+#include "ng_redundancy.h"
+#include "ng_region.h"
+#include "ng_region_redundancy.h"
+#include "ng_reports.h"
+#include "ng_sep.h"
+#include "ng_small_literal_set.h"
+#include "ng_som.h"
+#include "ng_vacuous.h"
#include "ng_violet.h"
-#include "ng_utf8.h"
-#include "ng_util.h"
-#include "ng_width.h"
-#include "ue2common.h"
+#include "ng_utf8.h"
+#include "ng_util.h"
+#include "ng_width.h"
+#include "ue2common.h"
#include "compiler/compiler.h"
-#include "nfa/goughcompile.h"
+#include "nfa/goughcompile.h"
#include "rose/rose_build.h"
-#include "smallwrite/smallwrite_build.h"
-#include "util/compile_error.h"
-#include "util/container.h"
-#include "util/depth.h"
-#include "util/graph_range.h"
-#include "util/make_unique.h"
-#include "util/ue2string.h"
-
-using namespace std;
-
-namespace ue2 {
-
+#include "smallwrite/smallwrite_build.h"
+#include "util/compile_error.h"
+#include "util/container.h"
+#include "util/depth.h"
+#include "util/graph_range.h"
+#include "util/make_unique.h"
+#include "util/ue2string.h"
+
+using namespace std;
+
+namespace ue2 {
+
NG::NG(const CompileContext &in_cc, size_t num_patterns,
unsigned in_somPrecision)
- : maxSomRevHistoryAvailable(in_cc.grey.somMaxRevNfaLength),
- minWidth(depth::infinity()),
- rm(in_cc.grey),
- ssm(in_somPrecision),
- cc(in_cc),
+ : maxSomRevHistoryAvailable(in_cc.grey.somMaxRevNfaLength),
+ minWidth(depth::infinity()),
+ rm(in_cc.grey),
+ ssm(in_somPrecision),
+ cc(in_cc),
smwr(makeSmallWriteBuilder(num_patterns, rm, cc)),
rose(makeRoseBuilder(rm, ssm, *smwr, cc, boundary)) {
-}
-
-NG::~NG() {
- // empty
-}
-
-/** \brief SOM handling code, called by \ref addComponent.
- *
- * \return true if the component was handled completely by something (e.g. a
- * Haig outfix), false if SOM could be established but implementation via an
- * engine will be required.
- *
- * \throw CompileError if SOM cannot be supported for the component.
- */
-static
+}
+
+NG::~NG() {
+ // empty
+}
+
+/** \brief SOM handling code, called by \ref addComponent.
+ *
+ * \return true if the component was handled completely by something (e.g. a
+ * Haig outfix), false if SOM could be established but implementation via an
+ * engine will be required.
+ *
+ * \throw CompileError if SOM cannot be supported for the component.
+ */
+static
bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
- const som_type som, const u32 comp_id) {
- DEBUG_PRINTF("doing som\n");
+ const som_type som, const u32 comp_id) {
+ DEBUG_PRINTF("doing som\n");
dumpComponent(g, "03_presom", expr.index, comp_id, ng.cc.grey);
- assert(hasCorrectlyNumberedVertices(g));
+ assert(hasCorrectlyNumberedVertices(g));
assert(allMatchStatesHaveReports(g));
-
- // First, we try the "SOM chain" support in ng_som.cpp.
-
+
+ // First, we try the "SOM chain" support in ng_som.cpp.
+
sombe_rv rv = doSom(ng, g, expr, comp_id, som);
- if (rv == SOMBE_HANDLED_INTERNAL) {
- return false;
- } else if (rv == SOMBE_HANDLED_ALL) {
- return true;
- }
- assert(rv == SOMBE_FAIL);
-
- /* Next, Sombe style approaches */
+ if (rv == SOMBE_HANDLED_INTERNAL) {
+ return false;
+ } else if (rv == SOMBE_HANDLED_ALL) {
+ return true;
+ }
+ assert(rv == SOMBE_FAIL);
+
+ /* Next, Sombe style approaches */
rv = doSomWithHaig(ng, g, expr, comp_id, som);
- if (rv == SOMBE_HANDLED_INTERNAL) {
- return false;
- } else if (rv == SOMBE_HANDLED_ALL) {
- return true;
- }
- assert(rv == SOMBE_FAIL);
-
- // If the previous approach could not support this pattern, we try treating
- // it monolithically, as a Haig outfix.
-
- vector<vector<CharReach> > triggers; /* empty for outfix */
-
- assert(g.kind == NFA_OUTFIX);
+ if (rv == SOMBE_HANDLED_INTERNAL) {
+ return false;
+ } else if (rv == SOMBE_HANDLED_ALL) {
+ return true;
+ }
+ assert(rv == SOMBE_FAIL);
+
+ // If the previous approach could not support this pattern, we try treating
+ // it monolithically, as a Haig outfix.
+
+ vector<vector<CharReach> > triggers; /* empty for outfix */
+
+ assert(g.kind == NFA_OUTFIX);
dumpComponent(g, "haig", expr.index, comp_id, ng.cc.grey);
makeReportsSomPass(ng.rm, g);
- auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers,
- ng.cc.grey);
- if (haig) {
- DEBUG_PRINTF("built haig outfix\n");
- ng.rose->addOutfix(g, *haig);
- return true;
- }
-
- /* Our various strategies for supporting SOM for this pattern have failed.
- * Provide a generic pattern not supported/too large return value as it is
- * unclear what the meaning of a specific SOM error would be */
+ auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers,
+ ng.cc.grey);
+ if (haig) {
+ DEBUG_PRINTF("built haig outfix\n");
+ ng.rose->addOutfix(g, *haig);
+ return true;
+ }
+
+ /* Our various strategies for supporting SOM for this pattern have failed.
+ * Provide a generic pattern not supported/too large return value as it is
+ * unclear what the meaning of a specific SOM error would be */
throw CompileError(expr.index, "Pattern is too large.");
-
- assert(0); // unreachable
- return false;
-}
-
-void reduceGraph(NGHolder &g, som_type som, bool utf8,
- const CompileContext &cc) {
- if (!cc.grey.performGraphSimplification) {
- return;
- }
-
- // We run reduction passes until either the graph stops changing or we hit
- // a (small) limit.
-
- if (!som) {
- mergeCyclicDotStars(g);
- }
-
- const unsigned MAX_PASSES = 3;
- for (unsigned pass = 1; pass <= MAX_PASSES; pass++) {
- bool changed = false;
- DEBUG_PRINTF("reduce pass %u/%u\n", pass, MAX_PASSES);
- changed |= removeEdgeRedundancy(g, som, cc);
- changed |= reduceGraphEquivalences(g, cc);
- changed |= removeRedundancy(g, som);
+
+ assert(0); // unreachable
+ return false;
+}
+
+void reduceGraph(NGHolder &g, som_type som, bool utf8,
+ const CompileContext &cc) {
+ if (!cc.grey.performGraphSimplification) {
+ return;
+ }
+
+ // We run reduction passes until either the graph stops changing or we hit
+ // a (small) limit.
+
+ if (!som) {
+ mergeCyclicDotStars(g);
+ }
+
+ const unsigned MAX_PASSES = 3;
+ for (unsigned pass = 1; pass <= MAX_PASSES; pass++) {
+ bool changed = false;
+ DEBUG_PRINTF("reduce pass %u/%u\n", pass, MAX_PASSES);
+ changed |= removeEdgeRedundancy(g, som, cc);
+ changed |= reduceGraphEquivalences(g, cc);
+ changed |= removeRedundancy(g, som);
changed |= removeCyclicPathRedundancy(g);
- if (!changed) {
- DEBUG_PRINTF("graph unchanged after pass %u, stopping\n", pass);
- break;
- }
- }
-
- if (utf8) {
- utf8DotRestoration(g, som);
- }
-
- /* Minor non-redundancy improvements */
- if (improveGraph(g, som)) {
- /* may be some more edges to remove */
- removeEdgeRedundancy(g, som, cc);
- }
-
- removeCyclicDominated(g, som);
-
- if (!som) {
- mergeCyclicDotStars(g);
- }
-
- if (!som) {
- removeSiblingsOfStartDotStar(g);
- }
-}
-
-static
+ if (!changed) {
+ DEBUG_PRINTF("graph unchanged after pass %u, stopping\n", pass);
+ break;
+ }
+ }
+
+ if (utf8) {
+ utf8DotRestoration(g, som);
+ }
+
+ /* Minor non-redundancy improvements */
+ if (improveGraph(g, som)) {
+ /* may be some more edges to remove */
+ removeEdgeRedundancy(g, som, cc);
+ }
+
+ removeCyclicDominated(g, som);
+
+ if (!som) {
+ mergeCyclicDotStars(g);
+ }
+
+ if (!som) {
+ removeSiblingsOfStartDotStar(g);
+ }
+}
+
+static
bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr,
const som_type som, const u32 comp_id) {
- const CompileContext &cc = ng.cc;
+ const CompileContext &cc = ng.cc;
assert(hasCorrectlyNumberedVertices(g));
-
- DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n",
+
+ DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n",
expr.index, comp_id, num_vertices(g), num_edges(g));
-
+
dumpComponent(g, "01_begin", expr.index, comp_id, ng.cc.grey);
-
+
assert(allMatchStatesHaveReports(g));
-
+
reduceExtendedParams(g, ng.rm, som);
reduceGraph(g, som, expr.utf8, cc);
-
+
dumpComponent(g, "02_reduced", expr.index, comp_id, ng.cc.grey);
- // There may be redundant regions that we can remove
- if (cc.grey.performGraphSimplification) {
- removeRegionRedundancy(g, som);
- }
-
+ // There may be redundant regions that we can remove
+ if (cc.grey.performGraphSimplification) {
+ removeRegionRedundancy(g, som);
+ }
+
// We might be done at this point: if we've run out of vertices, we can
// stop processing.
if (num_vertices(g) == N_SPECIALS) {
@@ -231,125 +231,125 @@ bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr,
return true;
}
- // "Short Exhaustible Passthrough" patterns always become outfixes.
- if (!som && isSEP(g, ng.rm, cc.grey)) {
- DEBUG_PRINTF("graph is SEP\n");
- if (ng.rose->addOutfix(g)) {
- return true;
- }
- }
-
- // Start Of Match handling.
- if (som) {
+ // "Short Exhaustible Passthrough" patterns always become outfixes.
+ if (!som && isSEP(g, ng.rm, cc.grey)) {
+ DEBUG_PRINTF("graph is SEP\n");
+ if (ng.rose->addOutfix(g)) {
+ return true;
+ }
+ }
+
+ // Start Of Match handling.
+ if (som) {
if (addComponentSom(ng, g, expr, som, comp_id)) {
- return true;
- }
- }
-
+ return true;
+ }
+ }
+
assert(allMatchStatesHaveReports(g));
- if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) {
- return true;
- }
-
- if (handleSmallLiteralSets(*ng.rose, g, cc)
- || handleFixedWidth(*ng.rose, g, cc.grey)) {
- return true;
- }
-
- if (handleDecoratedLiterals(*ng.rose, g, cc)) {
- return true;
- }
-
+ if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) {
+ return true;
+ }
+
+ if (handleSmallLiteralSets(*ng.rose, g, cc)
+ || handleFixedWidth(*ng.rose, g, cc.grey)) {
+ return true;
+ }
+
+ if (handleDecoratedLiterals(*ng.rose, g, cc)) {
+ return true;
+ }
+
if (doViolet(*ng.rose, g, expr.prefilter, false, ng.rm, cc)) {
- return true;
- }
-
+ return true;
+ }
+
if (splitOffPuffs(*ng.rose, ng.rm, g, expr.prefilter, cc)) {
- return true;
- }
-
- if (handleSmallLiteralSets(*ng.rose, g, cc)
- || handleFixedWidth(*ng.rose, g, cc.grey)) {
- return true;
- }
-
- if (handleDecoratedLiterals(*ng.rose, g, cc)) {
- return true;
- }
-
+ return true;
+ }
+
+ if (handleSmallLiteralSets(*ng.rose, g, cc)
+ || handleFixedWidth(*ng.rose, g, cc.grey)) {
+ return true;
+ }
+
+ if (handleDecoratedLiterals(*ng.rose, g, cc)) {
+ return true;
+ }
+
if (doViolet(*ng.rose, g, expr.prefilter, true, ng.rm, cc)) {
- return true;
- }
-
- DEBUG_PRINTF("testing for outfix\n");
- assert(allMatchStatesHaveReports(g));
- if (ng.rose->addOutfix(g)) {
- return true;
- }
-
- return false;
-}
-
-// Returns true if all components have been added.
-static
+ return true;
+ }
+
+ DEBUG_PRINTF("testing for outfix\n");
+ assert(allMatchStatesHaveReports(g));
+ if (ng.rose->addOutfix(g)) {
+ return true;
+ }
+
+ return false;
+}
+
+// Returns true if all components have been added.
+static
bool processComponents(NG &ng, ExpressionInfo &expr,
- deque<unique_ptr<NGHolder>> &g_comp,
- const som_type som) {
- const u32 num_components = g_comp.size();
-
- u32 failed = 0;
- for (u32 i = 0; i < num_components; i++) {
- if (!g_comp[i]) {
- continue;
- }
+ deque<unique_ptr<NGHolder>> &g_comp,
+ const som_type som) {
+ const u32 num_components = g_comp.size();
+
+ u32 failed = 0;
+ for (u32 i = 0; i < num_components; i++) {
+ if (!g_comp[i]) {
+ continue;
+ }
if (addComponent(ng, *g_comp[i], expr, som, i)) {
- g_comp[i].reset();
- continue;
- }
-
- if (som) { /* bail immediately */
- return false;
- }
- failed++;
- }
-
- if (!failed) {
- DEBUG_PRINTF("all components claimed\n");
- return true;
- }
-
- DEBUG_PRINTF("%u components still remain\n", failed);
- return false;
-}
-
+ g_comp[i].reset();
+ continue;
+ }
+
+ if (som) { /* bail immediately */
+ return false;
+ }
+ failed++;
+ }
+
+ if (!failed) {
+ DEBUG_PRINTF("all components claimed\n");
+ return true;
+ }
+
+ DEBUG_PRINTF("%u components still remain\n", failed);
+ return false;
+}
+
bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) {
assert(g_ptr);
NGHolder &g = *g_ptr;
- // remove reports that aren't on vertices connected to accept.
+ // remove reports that aren't on vertices connected to accept.
clearReports(g);
-
+
som_type som = expr.som;
if (som && isVacuous(g)) {
throw CompileError(expr.index, "Start of match is not "
- "currently supported for patterns which match an "
- "empty buffer.");
- }
-
+ "currently supported for patterns which match an "
+ "empty buffer.");
+ }
+
dumpDotWrapper(g, expr, "01_initial", cc.grey);
assert(allMatchStatesHaveReports(g));
-
- /* ensure utf8 starts at cp boundary */
+
+ /* ensure utf8 starts at cp boundary */
ensureCodePointStart(rm, g, expr);
-
+
if (can_never_match(g)) {
throw CompileError(expr.index, "Pattern can never match.");
}
-
+
bool hamming = expr.hamm_distance > 0;
u32 e_dist = hamming ? expr.hamm_distance : expr.edit_distance;
-
+
DEBUG_PRINTF("edit distance = %u hamming = %s\n", e_dist, hamming ? "true" : "false");
// validate graph's suitability for fuzzing before resolving asserts
@@ -367,10 +367,10 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) {
if (can_never_match(g)) {
throw CompileError(expr.index, "Pattern can never match.");
- }
-
+ }
+
optimiseVirtualStarts(g); /* good for som */
-
+
propagateExtendedParams(g, expr, rm);
reduceExtendedParams(g, rm, som);
@@ -387,61 +387,61 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) {
})) {
// We have at least one report with a minimum length constraint, which
// we currently use SOM to satisfy.
- som = SOM_LEFT;
- ssm.somPrecision(8);
- }
-
- if (som) {
- rose->setSom();
- }
-
- // first, we can perform graph work that can be done on an individual
- // expression basis.
-
+ som = SOM_LEFT;
+ ssm.somPrecision(8);
+ }
+
+ if (som) {
+ rose->setSom();
+ }
+
+ // first, we can perform graph work that can be done on an individual
+ // expression basis.
+
if (expr.utf8) {
relaxForbiddenUtf8(g, expr);
- }
-
+ }
+
if (all_of_in(all_reports(g), [&](ReportID id) {
const auto &report = rm.getReport(id);
return report.ekey != INVALID_EKEY && !report.minLength &&
!report.minOffset;
})) {
- // In highlander mode: if we don't have constraints on our reports that
- // may prevent us accepting our first match (i.e. extended params) we
- // can prune the other out-edges of all vertices connected to accept.
+ // In highlander mode: if we don't have constraints on our reports that
+ // may prevent us accepting our first match (i.e. extended params) we
+ // can prune the other out-edges of all vertices connected to accept.
// TODO: shift the report checking down into pruneHighlanderAccepts()
// to allow us to handle the parts we can in mixed cases.
pruneHighlanderAccepts(g, rm);
- }
-
+ }
+
dumpDotWrapper(g, expr, "02b_fairly_early", cc.grey);
-
- // If we're a vacuous pattern, we can handle this early.
+
+ // If we're a vacuous pattern, we can handle this early.
if (splitOffVacuous(boundary, rm, g, expr)) {
- DEBUG_PRINTF("split off vacuous\n");
- }
-
- // We might be done at this point: if we've run out of vertices, we can
- // stop processing.
+ DEBUG_PRINTF("split off vacuous\n");
+ }
+
+ // We might be done at this point: if we've run out of vertices, we can
+ // stop processing.
if (num_vertices(g) == N_SPECIALS) {
- DEBUG_PRINTF("all vertices claimed by vacuous handling\n");
- return true;
- }
-
- // Now that vacuous edges have been removed, update the min width exclusive
- // of boundary reports.
+ DEBUG_PRINTF("all vertices claimed by vacuous handling\n");
+ return true;
+ }
+
+ // Now that vacuous edges have been removed, update the min width exclusive
+ // of boundary reports.
minWidth = min(minWidth, findMinWidth(g));
-
- // Add the pattern to the small write builder.
+
+ // Add the pattern to the small write builder.
smwr->add(g, expr);
-
- if (!som) {
+
+ if (!som) {
removeSiblingsOfStartDotStar(g);
- }
-
+ }
+
dumpDotWrapper(g, expr, "03_early", cc.grey);
-
+
// Perform a reduction pass to merge sibling character classes together.
if (cc.grey.performGraphSimplification) {
removeRedundancy(g, som);
@@ -450,177 +450,177 @@ bool NG::addGraph(ExpressionInfo &expr, unique_ptr<NGHolder> g_ptr) {
dumpDotWrapper(g, expr, "04_reduced", cc.grey);
- // If we've got some literals that span the graph from start to accept, we
- // can split them off into Rose from here.
- if (!som) {
+ // If we've got some literals that span the graph from start to accept, we
+ // can split them off into Rose from here.
+ if (!som) {
if (splitOffLiterals(*this, g)) {
- DEBUG_PRINTF("some vertices claimed by literals\n");
- }
- }
-
- // We might be done at this point: if we've run out of vertices, we can
- // stop processing.
+ DEBUG_PRINTF("some vertices claimed by literals\n");
+ }
+ }
+
+ // We might be done at this point: if we've run out of vertices, we can
+ // stop processing.
if (num_vertices(g) == N_SPECIALS) {
- DEBUG_PRINTF("all vertices claimed before calc components\n");
- return true;
- }
-
+ DEBUG_PRINTF("all vertices claimed before calc components\n");
+ return true;
+ }
+
// Split the graph into a set of connected components and process those.
// Note: this invalidates g_ptr.
-
+
auto g_comp = calcComponents(std::move(g_ptr), cc.grey);
- assert(!g_comp.empty());
-
- if (!som) {
+ assert(!g_comp.empty());
+
+ if (!som) {
for (auto &gc : g_comp) {
assert(gc);
reformLeadingDots(*gc);
- }
-
+ }
+
recalcComponents(g_comp, cc.grey);
- }
-
+ }
+
if (processComponents(*this, expr, g_comp, som)) {
- return true;
- }
-
- // If we're in prefiltering mode, we can run the prefilter reductions and
- // have another shot at accepting the graph.
-
+ return true;
+ }
+
+ // If we're in prefiltering mode, we can run the prefilter reductions and
+ // have another shot at accepting the graph.
+
if (cc.grey.prefilterReductions && expr.prefilter) {
for (auto &gc : g_comp) {
if (!gc) {
- continue;
- }
+ continue;
+ }
prefilterReductions(*gc, cc);
- }
-
+ }
+
if (processComponents(*this, expr, g_comp, som)) {
- return true;
- }
- }
-
- // We must have components that could not be compiled.
- for (u32 i = 0; i < g_comp.size(); i++) {
- if (g_comp[i]) {
- DEBUG_PRINTF("could not compile component %u with %zu vertices\n",
- i, num_vertices(*g_comp[i]));
+ return true;
+ }
+ }
+
+ // We must have components that could not be compiled.
+ for (u32 i = 0; i < g_comp.size(); i++) {
+ if (g_comp[i]) {
+ DEBUG_PRINTF("could not compile component %u with %zu vertices\n",
+ i, num_vertices(*g_comp[i]));
throw CompileError(expr.index, "Pattern is too large.");
- }
- }
-
- assert(0); // should have thrown.
- return false;
-}
-
-/** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */
+ }
+ }
+
+ assert(0); // should have thrown.
+ return false;
+}
+
+/** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */
bool NG::addHolder(NGHolder &g) {
DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(g));
assert(allMatchStatesHaveReports(g));
assert(hasCorrectlyNumberedVertices(g));
-
- /* We don't update the global minWidth here as we care about the min width
- * of the whole pattern - not a just a prefix of it. */
-
- bool prefilter = false;
+
+ /* We don't update the global minWidth here as we care about the min width
+ * of the whole pattern - not a just a prefix of it. */
+
+ bool prefilter = false;
//dumpDotComp(comp, g, *this, 20, "prefix_init");
-
- som_type som = SOM_NONE; /* the prefixes created by the SOM code do not
- themselves track som */
- bool utf8 = false; // handling done earlier
+
+ som_type som = SOM_NONE; /* the prefixes created by the SOM code do not
+ themselves track som */
+ bool utf8 = false; // handling done earlier
reduceGraph(g, som, utf8, cc);
-
- // There may be redundant regions that we can remove
- if (cc.grey.performGraphSimplification) {
+
+ // There may be redundant regions that we can remove
+ if (cc.grey.performGraphSimplification) {
removeRegionRedundancy(g, som);
- }
-
- // "Short Exhaustible Passthrough" patterns always become outfixes.
+ }
+
+ // "Short Exhaustible Passthrough" patterns always become outfixes.
if (isSEP(g, rm, cc.grey)) {
- DEBUG_PRINTF("graph is SEP\n");
+ DEBUG_PRINTF("graph is SEP\n");
if (rose->addOutfix(g)) {
- return true;
- }
- }
-
+ return true;
+ }
+ }
+
if (splitOffAnchoredAcyclic(*rose, g, cc)) {
- return true;
- }
-
+ return true;
+ }
+
if (handleSmallLiteralSets(*rose, g, cc)
|| handleFixedWidth(*rose, g, cc.grey)) {
- return true;
- }
-
+ return true;
+ }
+
if (handleDecoratedLiterals(*rose, g, cc)) {
- return true;
- }
-
+ return true;
+ }
+
if (doViolet(*rose, g, prefilter, false, rm, cc)) {
- return true;
- }
+ return true;
+ }
if (splitOffPuffs(*rose, rm, g, prefilter, cc)) {
- return true;
- }
+ return true;
+ }
if (doViolet(*rose, g, prefilter, true, rm, cc)) {
- return true;
- }
-
- DEBUG_PRINTF("trying for outfix\n");
+ return true;
+ }
+
+ DEBUG_PRINTF("trying for outfix\n");
if (rose->addOutfix(g)) {
- DEBUG_PRINTF("ok\n");
- return true;
- }
- DEBUG_PRINTF("trying for outfix - failed\n");
- DEBUG_PRINTF("nobody would take us\n");
- return false;
-}
-
-bool NG::addLiteral(const ue2_literal &literal, u32 expr_index,
+ DEBUG_PRINTF("ok\n");
+ return true;
+ }
+ DEBUG_PRINTF("trying for outfix - failed\n");
+ DEBUG_PRINTF("nobody would take us\n");
+ return false;
+}
+
+bool NG::addLiteral(const ue2_literal &literal, u32 expr_index,
u32 external_report, bool highlander, som_type som,
bool quiet) {
- assert(!literal.empty());
-
- if (!cc.grey.shortcutLiterals) {
- return false;
- }
-
- // We can't natively handle arbitrary literals with mixed case sensitivity
- // in Rose -- they require mechanisms like benefits masks, which have
- // length limits etc. Better to let those go through full graph processing.
- if (mixed_sensitivity(literal)) {
- DEBUG_PRINTF("mixed sensitivity\n");
- return false;
- }
-
- // Register external report and validate highlander constraints.
- rm.registerExtReport(external_report,
- external_report_info(highlander, expr_index));
-
- ReportID id;
- if (som) {
- assert(!highlander); // not allowed, checked earlier.
- Report r = makeSomRelativeCallback(external_report, 0, literal.length());
- id = rm.getInternalId(r);
- rose->setSom();
- } else {
- u32 ekey = highlander ? rm.getExhaustibleKey(external_report)
- : INVALID_EKEY;
+ assert(!literal.empty());
+
+ if (!cc.grey.shortcutLiterals) {
+ return false;
+ }
+
+ // We can't natively handle arbitrary literals with mixed case sensitivity
+ // in Rose -- they require mechanisms like benefits masks, which have
+ // length limits etc. Better to let those go through full graph processing.
+ if (mixed_sensitivity(literal)) {
+ DEBUG_PRINTF("mixed sensitivity\n");
+ return false;
+ }
+
+ // Register external report and validate highlander constraints.
+ rm.registerExtReport(external_report,
+ external_report_info(highlander, expr_index));
+
+ ReportID id;
+ if (som) {
+ assert(!highlander); // not allowed, checked earlier.
+ Report r = makeSomRelativeCallback(external_report, 0, literal.length());
+ id = rm.getInternalId(r);
+ rose->setSom();
+ } else {
+ u32 ekey = highlander ? rm.getExhaustibleKey(external_report)
+ : INVALID_EKEY;
Report r = makeECallback(external_report, 0, ekey, quiet);
- id = rm.getInternalId(r);
- }
-
- DEBUG_PRINTF("success: graph is literal '%s', report ID %u\n",
- dumpString(literal).c_str(), id);
-
- rose->add(false, false, literal, {id});
-
- minWidth = min(minWidth, depth(literal.length()));
-
+ id = rm.getInternalId(r);
+ }
+
+ DEBUG_PRINTF("success: graph is literal '%s', report ID %u\n",
+ dumpString(literal).c_str(), id);
+
+ rose->add(false, false, literal, {id});
+
+ minWidth = min(minWidth, depth(literal.length()));
+
/* inform small write handler about this literal */
smwr->add(literal, id);
-
- return true;
-}
-
-} // namespace ue2
+
+ return true;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng.h b/contrib/libs/hyperscan/src/nfagraph/ng.h
index ed908e9a8d..a5a9077d4f 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng.h
@@ -1,110 +1,110 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
* \brief NG declaration.
- */
-
-#ifndef NG_H
-#define NG_H
-
-#include "ng_holder.h"
-#include "ue2common.h"
-#include "parser/position.h"
-#include "som/slot_manager.h"
-#include "som/som.h"
-#include "util/boundary_reports.h"
-#include "util/compile_context.h"
-#include "util/depth.h"
-#include "util/graph.h"
+ */
+
+#ifndef NG_H
+#define NG_H
+
+#include "ng_holder.h"
+#include "ue2common.h"
+#include "parser/position.h"
+#include "som/slot_manager.h"
+#include "som/som.h"
+#include "util/boundary_reports.h"
+#include "util/compile_context.h"
+#include "util/depth.h"
+#include "util/graph.h"
#include "util/noncopyable.h"
-#include "util/report_manager.h"
-
-#include <deque>
-#include <map>
-#include <memory>
-#include <utility>
-#include <vector>
-
-namespace ue2 {
-
-struct CompileContext;
-struct ue2_literal;
-
+#include "util/report_manager.h"
+
+#include <deque>
+#include <map>
+#include <memory>
+#include <utility>
+#include <vector>
+
+namespace ue2 {
+
+struct CompileContext;
+struct ue2_literal;
+
class ExpressionInfo;
-class RoseBuild;
-class SmallWriteBuild;
-
+class RoseBuild;
+class SmallWriteBuild;
+
class NG : noncopyable {
-public:
+public:
NG(const CompileContext &in_cc, size_t num_patterns,
unsigned in_somPrecision);
- ~NG();
-
- /** \brief Consumes a pattern, returns false or throws a CompileError
- * exception if the graph cannot be consumed. */
+ ~NG();
+
+ /** \brief Consumes a pattern, returns false or throws a CompileError
+ * exception if the graph cannot be consumed. */
bool addGraph(ExpressionInfo &expr, std::unique_ptr<NGHolder> g_ptr);
-
- /** \brief Consumes a graph, cut-down version of addGraph for use by SOM
- * processing. */
- bool addHolder(NGHolder &h);
-
+
+ /** \brief Consumes a graph, cut-down version of addGraph for use by SOM
+ * processing. */
+ bool addHolder(NGHolder &h);
+
/** \brief Adds a literal to Rose, used by literal shortcut passes (instead
* of using \ref addGraph) */
- bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report,
+ bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report,
bool highlander, som_type som, bool quiet);
-
- /** \brief Maximum history in bytes available for use by SOM reverse NFAs,
- * a hack for pattern support (see UE-1903). This is always set to the max
- * "lookbehind" length. */
- const u32 maxSomRevHistoryAvailable;
-
- /** \brief The length of the shortest corpus which can match a pattern
- * contained in the NG (excluding the boundary reports used by vacuous
- * patterns, which give an effective minWidth of zero). */
- depth minWidth;
-
- ReportManager rm;
- SomSlotManager ssm;
- BoundaryReports boundary;
- const CompileContext cc;
-
+
+ /** \brief Maximum history in bytes available for use by SOM reverse NFAs,
+ * a hack for pattern support (see UE-1903). This is always set to the max
+ * "lookbehind" length. */
+ const u32 maxSomRevHistoryAvailable;
+
+ /** \brief The length of the shortest corpus which can match a pattern
+ * contained in the NG (excluding the boundary reports used by vacuous
+ * patterns, which give an effective minWidth of zero). */
+ depth minWidth;
+
+ ReportManager rm;
+ SomSlotManager ssm;
+ BoundaryReports boundary;
+ const CompileContext cc;
+
const std::unique_ptr<SmallWriteBuild> smwr; //!< SmallWrite builder.
- const std::unique_ptr<RoseBuild> rose; //!< Rose builder.
-};
-
-/** \brief Run graph reduction passes.
- *
- * Shared with the small write compiler.
- */
+ const std::unique_ptr<RoseBuild> rose; //!< Rose builder.
+};
+
+/** \brief Run graph reduction passes.
+ *
+ * Shared with the small write compiler.
+ */
void reduceGraph(NGHolder &g, som_type som, bool utf8,
const CompileContext &cc);
-
-} // namespace ue2
-
-#endif
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.cpp
index 6547c7a8e1..22e3e49609 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.cpp
@@ -1,67 +1,67 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Anchored acyclic graph -> DFA analysis.
- */
-#include "ng_anchored_acyclic.h"
-
-#include "ng_holder.h"
-#include "ng_reports.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "rose/rose_build.h"
-#include "util/compile_context.h"
-
-namespace ue2 {
-
-bool splitOffAnchoredAcyclic(RoseBuild &rose, const NGHolder &h,
- const CompileContext &cc) {
- if (!cc.grey.allowAnchoredAcyclic) {
- return false;
- }
-
- if (!isAnchored(h)) {
- DEBUG_PRINTF("fail, not anchored\n");
- return false;
- }
-
- if (!isAcyclic(h)) {
- DEBUG_PRINTF("fail, not acyclic\n");
- return false;
- }
-
- if (rose.addAnchoredAcyclic(h)) {
- return true;
- } else {
- DEBUG_PRINTF("failed to add anchored nfa\n");
- return false;
- }
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Anchored acyclic graph -> DFA analysis.
+ */
+#include "ng_anchored_acyclic.h"
+
+#include "ng_holder.h"
+#include "ng_reports.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "rose/rose_build.h"
+#include "util/compile_context.h"
+
+namespace ue2 {
+
+bool splitOffAnchoredAcyclic(RoseBuild &rose, const NGHolder &h,
+ const CompileContext &cc) {
+ if (!cc.grey.allowAnchoredAcyclic) {
+ return false;
+ }
+
+ if (!isAnchored(h)) {
+ DEBUG_PRINTF("fail, not anchored\n");
+ return false;
+ }
+
+ if (!isAcyclic(h)) {
+ DEBUG_PRINTF("fail, not acyclic\n");
+ return false;
+ }
+
+ if (rose.addAnchoredAcyclic(h)) {
+ return true;
+ } else {
+ DEBUG_PRINTF("failed to add anchored nfa\n");
+ return false;
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.h b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.h
index fa4e6199b4..f9bc5d772e 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_acyclic.h
@@ -1,49 +1,49 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Anchored acyclic graph -> DFA analysis.
- */
-
-#ifndef NG_ANCHORED_ACYCLIC_H
-#define NG_ANCHORED_ACYCLIC_H
-
-namespace ue2 {
-
-class NGHolder;
-class RoseBuild;
-struct CompileContext;
-
-/** \brief Attempt to consume the entire pattern in graph \a h as an anchored
- * acyclic DFA. Returns true if successful. */
-bool splitOffAnchoredAcyclic(RoseBuild &rose, const NGHolder &h,
- const CompileContext &cc);
-
-} // namespace ue2
-
-#endif // NG_ANCHORED_ACYCLIC_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Anchored acyclic graph -> DFA analysis.
+ */
+
+#ifndef NG_ANCHORED_ACYCLIC_H
+#define NG_ANCHORED_ACYCLIC_H
+
+namespace ue2 {
+
+class NGHolder;
+class RoseBuild;
+struct CompileContext;
+
+/** \brief Attempt to consume the entire pattern in graph \a h as an anchored
+ * acyclic DFA. Returns true if successful. */
+bool splitOffAnchoredAcyclic(RoseBuild &rose, const NGHolder &h,
+ const CompileContext &cc);
+
+} // namespace ue2
+
+#endif // NG_ANCHORED_ACYCLIC_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.cpp
index 9a0abb124c..9a13376d19 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.cpp
@@ -1,651 +1,651 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Analysis pass to reform leading dots.
- *
- * We have found that many regexes found in the wild use an anchored dot-repeat
- * to represent an unanchored pattern, particularly if they have been used with
- * a regex engine that assumes that a pattern is anchored. This pass reforms
- * patterns that begin with sequences of dots into a more standard form.
- *
- * In addition, both anchored and unanchored patterns with dot repeats as
- * prefixes will have these prefixes reformed into a canonical form, which some
- * later analyses depend upon.
- */
-#include "ng_anchored_dots.h"
-
-#include "grey.h"
-#include "ng_holder.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/container.h"
-#include "util/depth.h"
-#include "util/graph_range.h"
-
-#include <algorithm>
-#include <queue>
-#include <set>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-static
-bool findStarts(const NGHolder &g, set<NFAVertex> &anchored,
- set<NFAVertex> &unanchored) {
- // Populate unanchored map
- for (auto v : adjacent_vertices_range(g.startDs, g)) {
- if (is_special(v, g)) {
- continue;
- }
- unanchored.insert(v);
- }
-
- // Populate anchored map
- for (auto v : adjacent_vertices_range(g.start, g)) {
- if (is_special(v, g)) {
- continue;
- }
- anchored.insert(v);
- }
-
- if (unanchored == anchored) {
- anchored.clear();
- } else if (!unanchored.empty() && !anchored.empty()) {
- return false;
- }
-
- return !anchored.empty() || !unanchored.empty();
-}
-
-namespace {
-class DotInfo {
-public:
- DotInfo(NFAVertex v, bool se, u32 idx)
- : vertex(v), hasSelfLoop(se), index(idx) {}
-
- bool operator<(const DotInfo &other) const {
- if (hasSelfLoop != other.hasSelfLoop)
- return hasSelfLoop < other.hasSelfLoop;
- // tie break with vertex id: lowest ID wins
- return index > other.index;
- }
-
- NFAVertex vertex;
- bool hasSelfLoop;
- u32 index;
-};
-}
-
-// Returns nullptr if all vertices in the given set are not dots.
-// We can only pick one dot vertex, so we go for a dot-star if it exists,
-// otherwise the dot without a self-edge with the lowest ID.
-static
-NFAVertex findReformable(const NGHolder &g, const set<NFAVertex> &starts,
- set<NFAVertex> &otherV) {
- priority_queue<DotInfo> dotq;
- for (auto v : starts) {
- if (is_dot(v, g)) {
- u32 idx = g[v].index;
- dotq.push(DotInfo(v, hasSelfLoop(v, g), idx));
- }
- }
-
- if (dotq.empty()) {
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Analysis pass to reform leading dots.
+ *
+ * We have found that many regexes found in the wild use an anchored dot-repeat
+ * to represent an unanchored pattern, particularly if they have been used with
+ * a regex engine that assumes that a pattern is anchored. This pass reforms
+ * patterns that begin with sequences of dots into a more standard form.
+ *
+ * In addition, both anchored and unanchored patterns with dot repeats as
+ * prefixes will have these prefixes reformed into a canonical form, which some
+ * later analyses depend upon.
+ */
+#include "ng_anchored_dots.h"
+
+#include "grey.h"
+#include "ng_holder.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/container.h"
+#include "util/depth.h"
+#include "util/graph_range.h"
+
+#include <algorithm>
+#include <queue>
+#include <set>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+static
+bool findStarts(const NGHolder &g, set<NFAVertex> &anchored,
+ set<NFAVertex> &unanchored) {
+ // Populate unanchored map
+ for (auto v : adjacent_vertices_range(g.startDs, g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ unanchored.insert(v);
+ }
+
+ // Populate anchored map
+ for (auto v : adjacent_vertices_range(g.start, g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ anchored.insert(v);
+ }
+
+ if (unanchored == anchored) {
+ anchored.clear();
+ } else if (!unanchored.empty() && !anchored.empty()) {
+ return false;
+ }
+
+ return !anchored.empty() || !unanchored.empty();
+}
+
+namespace {
+class DotInfo {
+public:
+ DotInfo(NFAVertex v, bool se, u32 idx)
+ : vertex(v), hasSelfLoop(se), index(idx) {}
+
+ bool operator<(const DotInfo &other) const {
+ if (hasSelfLoop != other.hasSelfLoop)
+ return hasSelfLoop < other.hasSelfLoop;
+ // tie break with vertex id: lowest ID wins
+ return index > other.index;
+ }
+
+ NFAVertex vertex;
+ bool hasSelfLoop;
+ u32 index;
+};
+}
+
+// Returns nullptr if all vertices in the given set are not dots.
+// We can only pick one dot vertex, so we go for a dot-star if it exists,
+// otherwise the dot without a self-edge with the lowest ID.
+static
+NFAVertex findReformable(const NGHolder &g, const set<NFAVertex> &starts,
+ set<NFAVertex> &otherV) {
+ priority_queue<DotInfo> dotq;
+ for (auto v : starts) {
+ if (is_dot(v, g)) {
+ u32 idx = g[v].index;
+ dotq.push(DotInfo(v, hasSelfLoop(v, g), idx));
+ }
+ }
+
+ if (dotq.empty()) {
return NGHolder::null_vertex();
- }
-
- const DotInfo &dot = dotq.top();
- otherV = starts;
- otherV.erase(dot.vertex);
- DEBUG_PRINTF("selected dot vertex %u (%s)\n", dot.index,
- dot.hasSelfLoop ? "has self-edge" : "no self-edge");
- DEBUG_PRINTF("%zu other vertices\n", otherV.size());
- return dot.vertex;
-}
-
-// Returns true if the given vertex is only preceded by start. If start is
-// graph.startDs (i.e. unanchored), the given vertex can also be connected to
-// graph.start. If selfLoopIsAcceptable is set, self-loops are ignored.
-static
-bool isStartNode(NFAVertex v, NFAVertex start, const NGHolder &g,
- bool selfLoopIsAcceptable) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (selfLoopIsAcceptable && u == v) {
- continue;
- } else if (u == start) {
- continue;
- } else if (start == g.startDs && u == g.start) {
- continue;
- } else {
- return false;
- }
- }
- return true;
-}
-
-// Note: this will only remove the anchored first dot in the chain -- any other
-// removable nodes will be handled by the unanchored case below.
-static
-void reformAnchoredRepeatsComponent(NGHolder &g,
- set<NFAVertex> &compAnchoredStarts,
- set<NFAVertex> &compUnanchoredStarts,
- set<NFAVertex> &dead, depth *startBegin,
- depth *startEnd) {
- // anchored cases can not have any unanchored starts
- if (!compUnanchoredStarts.empty()) {
- DEBUG_PRINTF("we have unanchored starts, skipping\n");
- return;
- }
-
+ }
+
+ const DotInfo &dot = dotq.top();
+ otherV = starts;
+ otherV.erase(dot.vertex);
+ DEBUG_PRINTF("selected dot vertex %u (%s)\n", dot.index,
+ dot.hasSelfLoop ? "has self-edge" : "no self-edge");
+ DEBUG_PRINTF("%zu other vertices\n", otherV.size());
+ return dot.vertex;
+}
+
+// Returns true if the given vertex is only preceded by start. If start is
+// graph.startDs (i.e. unanchored), the given vertex can also be connected to
+// graph.start. If selfLoopIsAcceptable is set, self-loops are ignored.
+static
+bool isStartNode(NFAVertex v, NFAVertex start, const NGHolder &g,
+ bool selfLoopIsAcceptable) {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (selfLoopIsAcceptable && u == v) {
+ continue;
+ } else if (u == start) {
+ continue;
+ } else if (start == g.startDs && u == g.start) {
+ continue;
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+
+// Note: this will only remove the anchored first dot in the chain -- any other
+// removable nodes will be handled by the unanchored case below.
+static
+void reformAnchoredRepeatsComponent(NGHolder &g,
+ set<NFAVertex> &compAnchoredStarts,
+ set<NFAVertex> &compUnanchoredStarts,
+ set<NFAVertex> &dead, depth *startBegin,
+ depth *startEnd) {
+ // anchored cases can not have any unanchored starts
+ if (!compUnanchoredStarts.empty()) {
+ DEBUG_PRINTF("we have unanchored starts, skipping\n");
+ return;
+ }
+
NFAVertex dotV = NGHolder::null_vertex();
- set<NFAVertex> otherV;
- dotV = findReformable(g, compAnchoredStarts, otherV);
+ set<NFAVertex> otherV;
+ dotV = findReformable(g, compAnchoredStarts, otherV);
if (dotV == NGHolder::null_vertex()) {
- DEBUG_PRINTF("no candidate reformable dot found.\n");
- return;
- }
-
- NFAEdge loopEdge;
- bool selfLoop = false;
- bool bustOut = false;
-
- for (const auto &e : out_edges_range(dotV, g)) {
- NFAVertex t = target(e, g);
- if (t == dotV) {
- selfLoop = true;
- loopEdge = e;
- continue;
- }
-
- if (is_special(t, g)) {
- bustOut = true;
- break;
- }
-
- if (!otherV.empty() && otherV.find(t) == otherV.end()) {
- bustOut = true;
- break;
- }
- }
-
- if (bustOut) {
- DEBUG_PRINTF("busting out\n");
- return;
- }
-
- if (!isStartNode(dotV, g.start, g, true)) {
+ DEBUG_PRINTF("no candidate reformable dot found.\n");
+ return;
+ }
+
+ NFAEdge loopEdge;
+ bool selfLoop = false;
+ bool bustOut = false;
+
+ for (const auto &e : out_edges_range(dotV, g)) {
+ NFAVertex t = target(e, g);
+ if (t == dotV) {
+ selfLoop = true;
+ loopEdge = e;
+ continue;
+ }
+
+ if (is_special(t, g)) {
+ bustOut = true;
+ break;
+ }
+
+ if (!otherV.empty() && otherV.find(t) == otherV.end()) {
+ bustOut = true;
+ break;
+ }
+ }
+
+ if (bustOut) {
+ DEBUG_PRINTF("busting out\n");
+ return;
+ }
+
+ if (!isStartNode(dotV, g.start, g, true)) {
DEBUG_PRINTF("fleeing: vertex %zu has other preds\n", g[dotV].index);
- return;
- }
-
- /* get bounds */
- depth min;
+ return;
+ }
+
+ /* get bounds */
+ depth min;
depth max(1);
-
- if (selfLoop) {
- // A self-loop indicates that this is a '.+' or '.*'
- max = depth::infinity();
- }
-
- if (!otherV.empty()) {
- /* We require that the successors of the dot node are are the same
- * as the start vertex. TODO: remember why.
- */
- if (selfLoop) {
- if (otherV.size() != out_degree(dotV, g) - 1) {
- return;
- }
- } else {
- if (otherV.size() != out_degree(dotV, g)) {
- return;
- }
- }
-
+
+ if (selfLoop) {
+ // A self-loop indicates that this is a '.+' or '.*'
+ max = depth::infinity();
+ }
+
+ if (!otherV.empty()) {
+ /* We require that the successors of the dot node are are the same
+ * as the start vertex. TODO: remember why.
+ */
+ if (selfLoop) {
+ if (otherV.size() != out_degree(dotV, g) - 1) {
+ return;
+ }
+ } else {
+ if (otherV.size() != out_degree(dotV, g)) {
+ return;
+ }
+ }
+
min = depth(0);
- } else {
+ } else {
min = depth(1);
- }
-
- *startBegin = min;
- *startEnd = max;
-
- for (auto t : adjacent_vertices_range(dotV, g)) {
- if (t != dotV) {
- add_edge_if_not_present(g.startDs, t, g);
- add_edge_if_not_present(g.start, t, g);
- compUnanchoredStarts.insert(t);
- }
- }
-
- for (auto v : otherV) {
- remove_edge(g.start, v, g);
- }
-
+ }
+
+ *startBegin = min;
+ *startEnd = max;
+
+ for (auto t : adjacent_vertices_range(dotV, g)) {
+ if (t != dotV) {
+ add_edge_if_not_present(g.startDs, t, g);
+ add_edge_if_not_present(g.start, t, g);
+ compUnanchoredStarts.insert(t);
+ }
+ }
+
+ for (auto v : otherV) {
+ remove_edge(g.start, v, g);
+ }
+
DEBUG_PRINTF("removing vertex %zu\n", g[dotV].index);
- clear_vertex(dotV, g);
- dead.insert(dotV);
- compAnchoredStarts.erase(dotV);
-}
-
-static
-void reformUnanchoredRepeatsComponent(NGHolder &g,
- set<NFAVertex> &compAnchoredStarts,
- set<NFAVertex> &compUnanchoredStarts,
- set<NFAVertex> &dead,
- depth *startBegin, depth *startEnd) {
- // unanchored cases can not have any anchored starts
- if (!compAnchoredStarts.empty()) {
- DEBUG_PRINTF("we have anchored starts, skipping\n");
- return;
- }
-
- while (true) {
+ clear_vertex(dotV, g);
+ dead.insert(dotV);
+ compAnchoredStarts.erase(dotV);
+}
+
+static
+void reformUnanchoredRepeatsComponent(NGHolder &g,
+ set<NFAVertex> &compAnchoredStarts,
+ set<NFAVertex> &compUnanchoredStarts,
+ set<NFAVertex> &dead,
+ depth *startBegin, depth *startEnd) {
+ // unanchored cases can not have any anchored starts
+ if (!compAnchoredStarts.empty()) {
+ DEBUG_PRINTF("we have anchored starts, skipping\n");
+ return;
+ }
+
+ while (true) {
NFAVertex dotV = NGHolder::null_vertex();
- set<NFAVertex> otherV;
- dotV = findReformable(g, compUnanchoredStarts, otherV);
+ set<NFAVertex> otherV;
+ dotV = findReformable(g, compUnanchoredStarts, otherV);
if (dotV == NGHolder::null_vertex()) {
- DEBUG_PRINTF("no candidate reformable dot found.\n");
- return;
- }
-
- NFAEdge loopEdge;
- bool selfLoop = false;
- bool bustOut = false;
-
- for (const auto &e : out_edges_range(dotV, g)) {
- NFAVertex t = target(e, g);
-
- if (t == dotV) {
- selfLoop = true;
- loopEdge = e;
- continue;
- }
-
- if (is_special(t, g)) {
- bustOut = true;
- break;
- }
-
- if (!otherV.empty() && otherV.find(t) == otherV.end()) {
- bustOut = true;
- break;
- }
- }
-
- if (bustOut) {
- DEBUG_PRINTF("busting out\n");
- if (!selfLoop) {
- return;
- }
-
- for (auto v : otherV) {
- if (!edge(dotV, v, g).second) {
- return;
- }
- }
-
- // A self-loop indicates that this is a '.+' or '.*'
+ DEBUG_PRINTF("no candidate reformable dot found.\n");
+ return;
+ }
+
+ NFAEdge loopEdge;
+ bool selfLoop = false;
+ bool bustOut = false;
+
+ for (const auto &e : out_edges_range(dotV, g)) {
+ NFAVertex t = target(e, g);
+
+ if (t == dotV) {
+ selfLoop = true;
+ loopEdge = e;
+ continue;
+ }
+
+ if (is_special(t, g)) {
+ bustOut = true;
+ break;
+ }
+
+ if (!otherV.empty() && otherV.find(t) == otherV.end()) {
+ bustOut = true;
+ break;
+ }
+ }
+
+ if (bustOut) {
+ DEBUG_PRINTF("busting out\n");
+ if (!selfLoop) {
+ return;
+ }
+
+ for (auto v : otherV) {
+ if (!edge(dotV, v, g).second) {
+ return;
+ }
+ }
+
+ // A self-loop indicates that this is a '.+' or '.*'
DEBUG_PRINTF("self-loop detected on %zu\n", g[dotV].index);
- *startEnd = depth::infinity();
- remove_edge(dotV, dotV, g);
- return;
- }
-
- if (!isStartNode(dotV, g.startDs, g, true)) {
+ *startEnd = depth::infinity();
+ remove_edge(dotV, dotV, g);
+ return;
+ }
+
+ if (!isStartNode(dotV, g.startDs, g, true)) {
DEBUG_PRINTF("fleeing: vertex %zu has other preds\n",
g[dotV].index);
- return;
- }
-
- /* get bounds */
+ return;
+ }
+
+ /* get bounds */
depth min(1);
depth max(1);
-
- if (selfLoop) {
- // A self-loop indicates that this is a '.+' or '.*'
- DEBUG_PRINTF("self-loop detected\n");
- max = depth::infinity();
- }
-
- if (!otherV.empty()) {
- if (!selfLoop && otherV.size() != out_degree(dotV, g)) {
- return;
- }
-
- if (selfLoop && otherV.size() != out_degree(dotV, g) - 1) {
- return;
- }
-
- if (min > depth(1)) {
- /* this is not a case we can handle */
- DEBUG_PRINTF("min greater than one, skipping\n");
- return;
- }
+
+ if (selfLoop) {
+ // A self-loop indicates that this is a '.+' or '.*'
+ DEBUG_PRINTF("self-loop detected\n");
+ max = depth::infinity();
+ }
+
+ if (!otherV.empty()) {
+ if (!selfLoop && otherV.size() != out_degree(dotV, g)) {
+ return;
+ }
+
+ if (selfLoop && otherV.size() != out_degree(dotV, g) - 1) {
+ return;
+ }
+
+ if (min > depth(1)) {
+ /* this is not a case we can handle */
+ DEBUG_PRINTF("min greater than one, skipping\n");
+ return;
+ }
min = depth(0);
- }
-
- *startBegin += min;
- *startEnd += max;
-
- for (auto v : otherV) {
- remove_edge(g.start, v, g);
- remove_edge(g.startDs, v, g);
- }
-
- compUnanchoredStarts.clear();
- for (auto t : adjacent_vertices_range(dotV, g)) {
- if (t != dotV) {
+ }
+
+ *startBegin += min;
+ *startEnd += max;
+
+ for (auto v : otherV) {
+ remove_edge(g.start, v, g);
+ remove_edge(g.startDs, v, g);
+ }
+
+ compUnanchoredStarts.clear();
+ for (auto t : adjacent_vertices_range(dotV, g)) {
+ if (t != dotV) {
DEBUG_PRINTF("connecting sds -> %zu\n", g[t].index);
- add_edge(g.startDs, t, g);
- add_edge(g.start, t, g);
- compUnanchoredStarts.insert(t);
- }
- }
-
+ add_edge(g.startDs, t, g);
+ add_edge(g.start, t, g);
+ compUnanchoredStarts.insert(t);
+ }
+ }
+
DEBUG_PRINTF("removing vertex %zu\n", g[dotV].index);
- dead.insert(dotV);
- clear_vertex(dotV, g);
- compUnanchoredStarts.erase(dotV);
- }
-}
-
-// for t to be another optional dot, it must have only in-edges from v and from
-// starts
-static
-bool isOptionalDot(NFAVertex t, NFAVertex v, const NGHolder &g) {
- if (!is_dot(t, g)) {
- return false;
- }
-
- bool found_v = false, found_start = false;
-
- for (auto u : inv_adjacent_vertices_range(t, g)) {
- if (u == v) {
- found_v = true;
- } else if (u == g.start || u == g.startDs) {
- found_start = true;
- } else {
- return false;
- }
- }
-
- return found_v && found_start;
-}
-
-static
-bool gatherParticipants(const NGHolder &g,
- NFAVertex start, NFAVertex initialDot,
- set<NFAVertex> &dots, set<NFAVertex> &succ) {
- // Walk the graph downwards from the initial dot; each dot will have:
- // 1) a single optional dot successor, or
- // 2) N successors (our terminating case)
- dots.insert(initialDot);
- NFAVertex v = initialDot;
-
- while (out_degree(v, g) == 1) {
- NFAVertex t = *(adjacent_vertices(v, g).first);
- // for t to be another optional dot, it must have only in-edges from v
- // and from starts
- if (isOptionalDot(t, v, g)) {
- // another dot; bail if we've seen it once already
- if (dots.find(t) != dots.end()) {
+ dead.insert(dotV);
+ clear_vertex(dotV, g);
+ compUnanchoredStarts.erase(dotV);
+ }
+}
+
+// for t to be another optional dot, it must have only in-edges from v and from
+// starts
+static
+bool isOptionalDot(NFAVertex t, NFAVertex v, const NGHolder &g) {
+ if (!is_dot(t, g)) {
+ return false;
+ }
+
+ bool found_v = false, found_start = false;
+
+ for (auto u : inv_adjacent_vertices_range(t, g)) {
+ if (u == v) {
+ found_v = true;
+ } else if (u == g.start || u == g.startDs) {
+ found_start = true;
+ } else {
+ return false;
+ }
+ }
+
+ return found_v && found_start;
+}
+
+static
+bool gatherParticipants(const NGHolder &g,
+ NFAVertex start, NFAVertex initialDot,
+ set<NFAVertex> &dots, set<NFAVertex> &succ) {
+ // Walk the graph downwards from the initial dot; each dot will have:
+ // 1) a single optional dot successor, or
+ // 2) N successors (our terminating case)
+ dots.insert(initialDot);
+ NFAVertex v = initialDot;
+
+ while (out_degree(v, g) == 1) {
+ NFAVertex t = *(adjacent_vertices(v, g).first);
+ // for t to be another optional dot, it must have only in-edges from v
+ // and from starts
+ if (isOptionalDot(t, v, g)) {
+ // another dot; bail if we've seen it once already
+ if (dots.find(t) != dots.end()) {
DEBUG_PRINTF("cycle detected at vertex %zu\n", g[t].index);
- return false;
- }
- dots.insert(t);
- v = t;
- continue;
- }
- // otherwise, we found a terminating dot state
- break;
- }
-
- // Our terminating states are the successors of v.
- // All of these MUST have an edge from start as well.
- for (auto w : adjacent_vertices_range(v, g)) {
- succ.insert(w);
- if (!edge(start, w, g).second) {
+ return false;
+ }
+ dots.insert(t);
+ v = t;
+ continue;
+ }
+ // otherwise, we found a terminating dot state
+ break;
+ }
+
+ // Our terminating states are the successors of v.
+ // All of these MUST have an edge from start as well.
+ for (auto w : adjacent_vertices_range(v, g)) {
+ succ.insert(w);
+ if (!edge(start, w, g).second) {
DEBUG_PRINTF("failing, vertex %zu does not have edge from start\n",
- g[w].index);
- return false;
- }
- }
-
- /* All the non chained v connected to start must be in succ as well
- * TODO: remember why (and document). */
- for (auto u : adjacent_vertices_range(start, g)) {
- if (is_special(u, g)) {
- continue;
- }
- if (!contains(dots, u) && !contains(succ, u)) {
- return false;
- }
- }
-
- return !succ.empty();
-}
-
-static
-void collapseVariableDotRepeat(NGHolder &g, NFAVertex start,
- set<NFAVertex> &dead, UNUSED depth *startBegin,
- depth *startEnd) {
- // Handle optional dot repeat prefixes, e.g.
- // /^.{0,30}foo/s, /^.{0,5}foo/s, unanchored equivs
- // Note that this code assumes that fixed repeats ('^.{5,20}') have been
- // pruned already, down (in this case) to '^.{0,15}'.
-
- // The first of our optional dots must be connected to start. The jump edge
- // past it will be verified in gatherParticipants(). If start is
- // graph.start, it should not be connected to startDs.
+ g[w].index);
+ return false;
+ }
+ }
+
+ /* All the non chained v connected to start must be in succ as well
+ * TODO: remember why (and document). */
+ for (auto u : adjacent_vertices_range(start, g)) {
+ if (is_special(u, g)) {
+ continue;
+ }
+ if (!contains(dots, u) && !contains(succ, u)) {
+ return false;
+ }
+ }
+
+ return !succ.empty();
+}
+
+static
+void collapseVariableDotRepeat(NGHolder &g, NFAVertex start,
+ set<NFAVertex> &dead, UNUSED depth *startBegin,
+ depth *startEnd) {
+ // Handle optional dot repeat prefixes, e.g.
+ // /^.{0,30}foo/s, /^.{0,5}foo/s, unanchored equivs
+ // Note that this code assumes that fixed repeats ('^.{5,20}') have been
+ // pruned already, down (in this case) to '^.{0,15}'.
+
+ // The first of our optional dots must be connected to start. The jump edge
+ // past it will be verified in gatherParticipants(). If start is
+ // graph.start, it should not be connected to startDs.
NFAVertex initialDot = NGHolder::null_vertex();
- for (auto v : adjacent_vertices_range(start, g)) {
- if (is_special(v, g)) {
- continue;
- }
- if (is_dot(v, g) && isStartNode(v, start, g, false)) {
- if (initialDot) {
- return;
- }
- initialDot = v;
+ for (auto v : adjacent_vertices_range(start, g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ if (is_dot(v, g) && isStartNode(v, start, g, false)) {
+ if (initialDot) {
+ return;
+ }
+ initialDot = v;
DEBUG_PRINTF("initial dot vertex is %zu\n", g[v].index);
- }
- }
-
- if (!initialDot) {
- return;
- }
-
- // Collect all the other optional dot vertices and the successor vertices
- // by walking down the graph from initialDot
- set<NFAVertex> dots, succ;
- if (!gatherParticipants(g, start, initialDot, dots, succ)) {
- DEBUG_PRINTF("gatherParticipants failed\n");
- return;
- }
-
- DEBUG_PRINTF("optional dot repeat with %zu participants, "
- "terminating in %zu non-dot nodes\n",
- dots.size(), succ.size());
-
- // Remove all the participants and set the start offset
- dead.insert(dots.begin(), dots.end());
-
- DEBUG_PRINTF("current offsets: %s-%s\n", startBegin->str().c_str(),
- startEnd->str().c_str());
-
- if (start == g.start && startEnd->is_infinite()) {
+ }
+ }
+
+ if (!initialDot) {
+ return;
+ }
+
+ // Collect all the other optional dot vertices and the successor vertices
+ // by walking down the graph from initialDot
+ set<NFAVertex> dots, succ;
+ if (!gatherParticipants(g, start, initialDot, dots, succ)) {
+ DEBUG_PRINTF("gatherParticipants failed\n");
+ return;
+ }
+
+ DEBUG_PRINTF("optional dot repeat with %zu participants, "
+ "terminating in %zu non-dot nodes\n",
+ dots.size(), succ.size());
+
+ // Remove all the participants and set the start offset
+ dead.insert(dots.begin(), dots.end());
+
+ DEBUG_PRINTF("current offsets: %s-%s\n", startBegin->str().c_str(),
+ startEnd->str().c_str());
+
+ if (start == g.start && startEnd->is_infinite()) {
*startEnd = depth(dots.size());
- } else if (startEnd->is_finite()) {
- *startEnd += dots.size();
- }
- assert(startEnd->is_reachable());
-
- // Connect our successor vertices to both start and startDs.
+ } else if (startEnd->is_finite()) {
+ *startEnd += dots.size();
+ }
+ assert(startEnd->is_reachable());
+
+ // Connect our successor vertices to both start and startDs.
for (auto v : succ) {
- add_edge_if_not_present(g.start, v, g);
- add_edge_if_not_present(g.startDs, v, g);
- }
-}
-
-static
-void deleteVertices(set<NFAVertex> &dead, NGHolder &g) {
- if (!dead.empty()) {
- DEBUG_PRINTF("pruning %zu vertices\n", dead.size());
- remove_vertices(dead, g);
- }
- dead.clear();
-}
-
-static
-void reformAnchoredRepeats(NGHolder &g, depth *startBegin, depth *startEnd) {
- DEBUG_PRINTF("component\n");
- set<NFAVertex> anchored, unanchored, dead;
- if (!findStarts(g, anchored, unanchored)) {
- DEBUG_PRINTF("no starts\n");
- return;
- }
-
- reformAnchoredRepeatsComponent(g, anchored, unanchored, dead, startBegin,
- startEnd);
- deleteVertices(dead, g);
-
- reformUnanchoredRepeatsComponent(g, anchored, unanchored, dead, startBegin,
- startEnd);
- deleteVertices(dead, g);
-}
-
-static
-void collapseVariableRepeats(NGHolder &g, depth *startBegin, depth *startEnd) {
- DEBUG_PRINTF("collapseVariableRepeats\n");
- set<NFAVertex> dead;
-
- collapseVariableDotRepeat(g, g.start, dead, startBegin, startEnd);
- deleteVertices(dead, g);
-
- collapseVariableDotRepeat(g, g.startDs, dead, startBegin, startEnd);
- deleteVertices(dead, g);
-}
-
-static
-void addDotsBetween(NGHolder &g, NFAVertex lhs, vector<NFAVertex> &rhs,
- depth min_repeat, depth max_repeat) {
- const bool unbounded = max_repeat.is_infinite();
- if (unbounded) {
- max_repeat = min_repeat;
- }
-
- assert(max_repeat.is_finite());
-
- NFAVertex u = lhs;
-
- if (!min_repeat && unbounded) {
- NFAVertex v = add_vertex(g);
- add_edge(u, v, g);
- g[v].char_reach.setall();
-
- for (auto w : rhs) {
- add_edge(lhs, w, g);
- }
- }
-
- for (u32 i = 0; i < min_repeat; i++) {
- NFAVertex v = add_vertex(g);
- add_edge(u, v, g);
- g[v].char_reach.setall();
- u = v;
- }
-
- NFAVertex split = u;
- /* lhs now split point for optional */
- for (u32 i = min_repeat; i < max_repeat; i++) {
- NFAVertex v = add_vertex(g);
- add_edge(u, v, g);
- if (u != split) {
- add_edge(split, v, g);
- }
- g[v].char_reach.setall();
- u = v;
- }
-
- if (unbounded) {
- add_edge(u, u, g);
- }
-
- for (auto w : rhs) {
- add_edge(u, w, g);
- if (split != u) {
- add_edge(split, w, g);
- }
- }
-}
-
-static
-void restoreLeadingDots(NGHolder &g, const depth &startBegin,
- const depth &startEnd) {
- if (startBegin == depth(0) && startEnd.is_infinite()) {
- return;
- }
- DEBUG_PRINTF("ungobble (%s, %s)\n", startBegin.str().c_str(),
- startEnd.str().c_str());
-
- for (UNUSED auto v : adjacent_vertices_range(g.start, g)) {
- assert(edge(g.startDs, v, g).second);
- }
- clear_out_edges(g.start, g);
- add_edge(g.start, g.startDs, g);
-
- const bool unbounded = startEnd.is_infinite();
-
- NFAVertex root = unbounded ? g.startDs : g.start;
-
- vector<NFAVertex> rhs;
- insert(&rhs, rhs.end(), adjacent_vertices(g.startDs, g));
- rhs.erase(remove(rhs.begin(), rhs.end(), g.startDs), rhs.end());
- for (auto v : rhs) {
- remove_edge(g.startDs, v, g);
- }
-
- addDotsBetween(g, root, rhs, startBegin, startEnd);
+ add_edge_if_not_present(g.start, v, g);
+ add_edge_if_not_present(g.startDs, v, g);
+ }
+}
+
+static
+void deleteVertices(set<NFAVertex> &dead, NGHolder &g) {
+ if (!dead.empty()) {
+ DEBUG_PRINTF("pruning %zu vertices\n", dead.size());
+ remove_vertices(dead, g);
+ }
+ dead.clear();
+}
+
+static
+void reformAnchoredRepeats(NGHolder &g, depth *startBegin, depth *startEnd) {
+ DEBUG_PRINTF("component\n");
+ set<NFAVertex> anchored, unanchored, dead;
+ if (!findStarts(g, anchored, unanchored)) {
+ DEBUG_PRINTF("no starts\n");
+ return;
+ }
+
+ reformAnchoredRepeatsComponent(g, anchored, unanchored, dead, startBegin,
+ startEnd);
+ deleteVertices(dead, g);
+
+ reformUnanchoredRepeatsComponent(g, anchored, unanchored, dead, startBegin,
+ startEnd);
+ deleteVertices(dead, g);
+}
+
+static
+void collapseVariableRepeats(NGHolder &g, depth *startBegin, depth *startEnd) {
+ DEBUG_PRINTF("collapseVariableRepeats\n");
+ set<NFAVertex> dead;
+
+ collapseVariableDotRepeat(g, g.start, dead, startBegin, startEnd);
+ deleteVertices(dead, g);
+
+ collapseVariableDotRepeat(g, g.startDs, dead, startBegin, startEnd);
+ deleteVertices(dead, g);
+}
+
+static
+void addDotsBetween(NGHolder &g, NFAVertex lhs, vector<NFAVertex> &rhs,
+ depth min_repeat, depth max_repeat) {
+ const bool unbounded = max_repeat.is_infinite();
+ if (unbounded) {
+ max_repeat = min_repeat;
+ }
+
+ assert(max_repeat.is_finite());
+
+ NFAVertex u = lhs;
+
+ if (!min_repeat && unbounded) {
+ NFAVertex v = add_vertex(g);
+ add_edge(u, v, g);
+ g[v].char_reach.setall();
+
+ for (auto w : rhs) {
+ add_edge(lhs, w, g);
+ }
+ }
+
+ for (u32 i = 0; i < min_repeat; i++) {
+ NFAVertex v = add_vertex(g);
+ add_edge(u, v, g);
+ g[v].char_reach.setall();
+ u = v;
+ }
+
+ NFAVertex split = u;
+ /* lhs now split point for optional */
+ for (u32 i = min_repeat; i < max_repeat; i++) {
+ NFAVertex v = add_vertex(g);
+ add_edge(u, v, g);
+ if (u != split) {
+ add_edge(split, v, g);
+ }
+ g[v].char_reach.setall();
+ u = v;
+ }
+
+ if (unbounded) {
+ add_edge(u, u, g);
+ }
+
+ for (auto w : rhs) {
+ add_edge(u, w, g);
+ if (split != u) {
+ add_edge(split, w, g);
+ }
+ }
+}
+
+static
+void restoreLeadingDots(NGHolder &g, const depth &startBegin,
+ const depth &startEnd) {
+ if (startBegin == depth(0) && startEnd.is_infinite()) {
+ return;
+ }
+ DEBUG_PRINTF("ungobble (%s, %s)\n", startBegin.str().c_str(),
+ startEnd.str().c_str());
+
+ for (UNUSED auto v : adjacent_vertices_range(g.start, g)) {
+ assert(edge(g.startDs, v, g).second);
+ }
+ clear_out_edges(g.start, g);
+ add_edge(g.start, g.startDs, g);
+
+ const bool unbounded = startEnd.is_infinite();
+
+ NFAVertex root = unbounded ? g.startDs : g.start;
+
+ vector<NFAVertex> rhs;
+ insert(&rhs, rhs.end(), adjacent_vertices(g.startDs, g));
+ rhs.erase(remove(rhs.begin(), rhs.end(), g.startDs), rhs.end());
+ for (auto v : rhs) {
+ remove_edge(g.startDs, v, g);
+ }
+
+ addDotsBetween(g, root, rhs, startBegin, startEnd);
renumber_vertices(g);
renumber_edges(g);
-}
-
-// Entry point.
-void reformLeadingDots(NGHolder &g) {
- depth startBegin(0);
- depth startEnd = depth::infinity();
-
- reformAnchoredRepeats(g, &startBegin, &startEnd);
- collapseVariableRepeats(g, &startBegin, &startEnd);
- restoreLeadingDots(g, startBegin, startEnd);
-}
-
-} // namespace ue2
+}
+
+// Entry point.
+void reformLeadingDots(NGHolder &g) {
+ depth startBegin(0);
+ depth startEnd = depth::infinity();
+
+ reformAnchoredRepeats(g, &startBegin, &startEnd);
+ collapseVariableRepeats(g, &startBegin, &startEnd);
+ restoreLeadingDots(g, startBegin, startEnd);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.h b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.h
index d5ffee2d79..8454c31941 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_anchored_dots.h
@@ -1,45 +1,45 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Analysis pass to reform leading dots.
- */
-
-#ifndef NG_ANCHORED_BOUNDED_REPEATS_H
-#define NG_ANCHORED_BOUNDED_REPEATS_H
-
-namespace ue2 {
-
-class NGHolder;
-
-/* should not be used if SoM is required */
-void reformLeadingDots(NGHolder &g);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Analysis pass to reform leading dots.
+ */
+
+#ifndef NG_ANCHORED_BOUNDED_REPEATS_H
+#define NG_ANCHORED_BOUNDED_REPEATS_H
+
+namespace ue2 {
+
+class NGHolder;
+
+/* should not be used if SoM is required */
+void reformLeadingDots(NGHolder &g);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp
index 24d4ecace1..8812afadb7 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp
@@ -1,558 +1,558 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Resolve special assert vertices.
- *
- * The assert resolution algorithm proceeds by iterating over those edges with
- * assertion flags, considering source and target vertices of each edge. If a
- * vertex has a superset of the reachability demanded by the assertion on the
- * edge, it is split into alternatives providing the word and non-word paths
- * through that vertex.
- *
- * A great deal of the complexity in the resolveAsserts pass is devoted to
- * handling these assertions when the UCP flag is specified (meaning \\w and \\W
- * are implemented with Unicode properties, rather than their ASCII
- * interpretation) and the prefiltering flag is also used. Complete,
- * non-prefiltering UCP support is not available yet.
- */
-#include "ng_asserts.h"
-
-#include "ng.h"
-#include "ng_prune.h"
-#include "ng_redundancy.h"
-#include "ng_util.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Resolve special assert vertices.
+ *
+ * The assert resolution algorithm proceeds by iterating over those edges with
+ * assertion flags, considering source and target vertices of each edge. If a
+ * vertex has a superset of the reachability demanded by the assertion on the
+ * edge, it is split into alternatives providing the word and non-word paths
+ * through that vertex.
+ *
+ * A great deal of the complexity in the resolveAsserts pass is devoted to
+ * handling these assertions when the UCP flag is specified (meaning \\w and \\W
+ * are implemented with Unicode properties, rather than their ASCII
+ * interpretation) and the prefiltering flag is also used. Complete,
+ * non-prefiltering UCP support is not available yet.
+ */
+#include "ng_asserts.h"
+
+#include "ng.h"
+#include "ng_prune.h"
+#include "ng_redundancy.h"
+#include "ng_util.h"
#include "compiler/compiler.h"
-#include "parser/position.h" // for POS flags
-#include "util/bitutils.h" // for findAndClearLSB_32
-#include "util/boundary_reports.h"
-#include "util/container.h"
-#include "util/compile_context.h"
-#include "util/compile_error.h"
-#include "util/graph_range.h"
-#include "util/report_manager.h"
-#include "util/unicode_def.h"
-
-#include <queue>
-
-using namespace std;
-
-namespace ue2 {
-
-/** \brief Hard limit on the maximum number of vertices we'll clone before we
- * throw up our hands and report 'Pattern too large.' */
-static const size_t MAX_CLONED_VERTICES = 2048;
-
-/** \brief The definition of \\w, since we use it everywhere in here. */
-static const CharReach CHARREACH_WORD(CharReach('a', 'z') |
- CharReach('A', 'Z') | CharReach('0', '9') | CharReach('_'));
-
-/** \brief \\W is the inverse of \\w */
-static const CharReach CHARREACH_NONWORD(~CHARREACH_WORD);
-
-/** \brief Prefiltering definition of \\w for UCP mode.
- *
- * Includes all high bytes as to capture all non-ASCII, however depending on
- * direction only continuers or starters are strictly required - as the input
- * is well-formed, this laxness will not cost us. */
-static const CharReach CHARREACH_WORD_UCP_PRE(CHARREACH_WORD
- | CharReach(128, 255));
-
-/** \brief Prefiltering definition of \\W for UCP Mode.
- *
- * (non-word already includes high bytes) */
-static const CharReach CHARREACH_NONWORD_UCP_PRE(CHARREACH_NONWORD);
-
-/** \brief Find all the edges with assertion flags. */
-static
-vector<NFAEdge> getAsserts(const NGHolder &g) {
- vector<NFAEdge> out;
- for (const auto &e : edges_range(g)) {
- if (g[e].assert_flags) {
- out.push_back(e);
- }
- }
- return out;
-}
-
-static
-void addToSplit(const NGHolder &g, NFAVertex v, map<u32, NFAVertex> *to_split) {
+#include "parser/position.h" // for POS flags
+#include "util/bitutils.h" // for findAndClearLSB_32
+#include "util/boundary_reports.h"
+#include "util/container.h"
+#include "util/compile_context.h"
+#include "util/compile_error.h"
+#include "util/graph_range.h"
+#include "util/report_manager.h"
+#include "util/unicode_def.h"
+
+#include <queue>
+
+using namespace std;
+
+namespace ue2 {
+
+/** \brief Hard limit on the maximum number of vertices we'll clone before we
+ * throw up our hands and report 'Pattern too large.' */
+static const size_t MAX_CLONED_VERTICES = 2048;
+
+/** \brief The definition of \\w, since we use it everywhere in here. */
+static const CharReach CHARREACH_WORD(CharReach('a', 'z') |
+ CharReach('A', 'Z') | CharReach('0', '9') | CharReach('_'));
+
+/** \brief \\W is the inverse of \\w */
+static const CharReach CHARREACH_NONWORD(~CHARREACH_WORD);
+
+/** \brief Prefiltering definition of \\w for UCP mode.
+ *
+ * Includes all high bytes as to capture all non-ASCII, however depending on
+ * direction only continuers or starters are strictly required - as the input
+ * is well-formed, this laxness will not cost us. */
+static const CharReach CHARREACH_WORD_UCP_PRE(CHARREACH_WORD
+ | CharReach(128, 255));
+
+/** \brief Prefiltering definition of \\W for UCP Mode.
+ *
+ * (non-word already includes high bytes) */
+static const CharReach CHARREACH_NONWORD_UCP_PRE(CHARREACH_NONWORD);
+
+/** \brief Find all the edges with assertion flags. */
+static
+vector<NFAEdge> getAsserts(const NGHolder &g) {
+ vector<NFAEdge> out;
+ for (const auto &e : edges_range(g)) {
+ if (g[e].assert_flags) {
+ out.push_back(e);
+ }
+ }
+ return out;
+}
+
+static
+void addToSplit(const NGHolder &g, NFAVertex v, map<u32, NFAVertex> *to_split) {
DEBUG_PRINTF("%zu needs splitting\n", g[v].index);
- to_split->emplace(g[v].index, v);
-}
-
-/** \brief Find vertices that need to be split due to an assertion edge.
- *
- * A vertex needs to be split if has an edge to/from it with an assert with a
- * restriction on the relevant end. */
-static
-void findSplitters(const NGHolder &g, const vector<NFAEdge> &asserts,
- map<u32, NFAVertex> *to_split,
- map<u32, NFAVertex> *to_split_ucp) {
- for (const auto &e : asserts) {
- NFAVertex u = source(e, g);
- NFAVertex v = target(e, g);
- u32 flags = g[e].assert_flags;
- assert(flags);
-
- const CharReach &u_cr = g[u].char_reach;
- const CharReach &v_cr = g[v].char_reach;
-
- bool ucp_assert = flags & UCP_ASSERT_FLAGS;
- bool normal_assert = flags & NON_UCP_ASSERT_FLAGS;
- /* In reality, an expression can only be entirely ucp or not ucp */
- assert(ucp_assert != normal_assert);
-
- if (normal_assert) {
- /* assume any flag results in us have to split if the vertex is not
- * a subset of word or completely disjoint from it. We could be more
- * nuanced if flags is a disjunction of multiple assertions. */
- if (!u_cr.isSubsetOf(CHARREACH_WORD)
- && !u_cr.isSubsetOf(CHARREACH_NONWORD)
- && u != g.start) { /* start is always considered a nonword */
- addToSplit(g, u, to_split);
- }
-
- if (!v_cr.isSubsetOf(CHARREACH_WORD)
- && !v_cr.isSubsetOf(CHARREACH_NONWORD)
- && v != g.accept /* accept require special handling, done on a
- * per edge basis in resolve asserts
- */
- && v != g.acceptEod) { /* eod is always considered a nonword */
- addToSplit(g, v, to_split);
- }
- }
-
- if (ucp_assert) {
- /* note: the ucp prefilter crs overlap - requires a bit more care */
- if (u == g.start) { /* start never needs to be split,
- * treat nonword */
- } else if (flags & POS_FLAG_ASSERT_WORD_TO_ANY_UCP) {
- if (!u_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE)
- && !u_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) {
- addToSplit(g, u, to_split_ucp);
- }
- } else {
- assert(flags & POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP);
- if (!u_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE)
- && !u_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) {
- addToSplit(g, u, to_split_ucp);
- }
- }
-
- if (v == g.acceptEod /* eod is always considered a nonword */
- || v == g.accept) { /* accept require special handling, done on
- * a per edge basis in resolve asserts */
- } else if (flags & POS_FLAG_ASSERT_ANY_TO_WORD_UCP) {
- if (!v_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE)
- && !v_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) {
- addToSplit(g, v, to_split_ucp);
- }
- } else {
- assert(flags & POS_FLAG_ASSERT_ANY_TO_NONWORD_UCP);
- if (!v_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE)
- && !v_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) {
- addToSplit(g, v, to_split_ucp);
- }
- }
- }
- }
-}
-
-static
+ to_split->emplace(g[v].index, v);
+}
+
+/** \brief Find vertices that need to be split due to an assertion edge.
+ *
+ * A vertex needs to be split if has an edge to/from it with an assert with a
+ * restriction on the relevant end. */
+static
+void findSplitters(const NGHolder &g, const vector<NFAEdge> &asserts,
+ map<u32, NFAVertex> *to_split,
+ map<u32, NFAVertex> *to_split_ucp) {
+ for (const auto &e : asserts) {
+ NFAVertex u = source(e, g);
+ NFAVertex v = target(e, g);
+ u32 flags = g[e].assert_flags;
+ assert(flags);
+
+ const CharReach &u_cr = g[u].char_reach;
+ const CharReach &v_cr = g[v].char_reach;
+
+ bool ucp_assert = flags & UCP_ASSERT_FLAGS;
+ bool normal_assert = flags & NON_UCP_ASSERT_FLAGS;
+ /* In reality, an expression can only be entirely ucp or not ucp */
+ assert(ucp_assert != normal_assert);
+
+ if (normal_assert) {
+ /* assume any flag results in us have to split if the vertex is not
+ * a subset of word or completely disjoint from it. We could be more
+ * nuanced if flags is a disjunction of multiple assertions. */
+ if (!u_cr.isSubsetOf(CHARREACH_WORD)
+ && !u_cr.isSubsetOf(CHARREACH_NONWORD)
+ && u != g.start) { /* start is always considered a nonword */
+ addToSplit(g, u, to_split);
+ }
+
+ if (!v_cr.isSubsetOf(CHARREACH_WORD)
+ && !v_cr.isSubsetOf(CHARREACH_NONWORD)
+ && v != g.accept /* accept require special handling, done on a
+ * per edge basis in resolve asserts
+ */
+ && v != g.acceptEod) { /* eod is always considered a nonword */
+ addToSplit(g, v, to_split);
+ }
+ }
+
+ if (ucp_assert) {
+ /* note: the ucp prefilter crs overlap - requires a bit more care */
+ if (u == g.start) { /* start never needs to be split,
+ * treat nonword */
+ } else if (flags & POS_FLAG_ASSERT_WORD_TO_ANY_UCP) {
+ if (!u_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE)
+ && !u_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) {
+ addToSplit(g, u, to_split_ucp);
+ }
+ } else {
+ assert(flags & POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP);
+ if (!u_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE)
+ && !u_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) {
+ addToSplit(g, u, to_split_ucp);
+ }
+ }
+
+ if (v == g.acceptEod /* eod is always considered a nonword */
+ || v == g.accept) { /* accept require special handling, done on
+ * a per edge basis in resolve asserts */
+ } else if (flags & POS_FLAG_ASSERT_ANY_TO_WORD_UCP) {
+ if (!v_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE)
+ && !v_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) {
+ addToSplit(g, v, to_split_ucp);
+ }
+ } else {
+ assert(flags & POS_FLAG_ASSERT_ANY_TO_NONWORD_UCP);
+ if (!v_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE)
+ && !v_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) {
+ addToSplit(g, v, to_split_ucp);
+ }
+ }
+ }
+ }
+}
+
+static
void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
NFAVertex v, s32 adj) {
- // Don't try and set the report ID of a special vertex.
- assert(!is_special(v, g));
-
- // If there's a report set already, we're replacing it.
- g[v].reports.clear();
-
+ // Don't try and set the report ID of a special vertex.
+ assert(!is_special(v, g));
+
+ // If there's a report set already, we're replacing it.
+ g[v].reports.clear();
+
Report ir = rm.getBasicInternalReport(expr, adj);
-
- g[v].reports.insert(rm.getInternalId(ir));
+
+ g[v].reports.insert(rm.getInternalId(ir));
DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj);
-}
-
-static
+}
+
+static
NFAVertex makeClone(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
NFAVertex v, const CharReach &cr_mask) {
- NFAVertex clone = clone_vertex(g, v);
- g[clone].char_reach &= cr_mask;
- clone_out_edges(g, v, clone);
- clone_in_edges(g, v, clone);
-
- if (v == g.startDs) {
+ NFAVertex clone = clone_vertex(g, v);
+ g[clone].char_reach &= cr_mask;
+ clone_out_edges(g, v, clone);
+ clone_in_edges(g, v, clone);
+
+ if (v == g.startDs) {
if (expr.utf8) {
- g[clone].char_reach &= ~UTF_START_CR;
- }
-
- DEBUG_PRINTF("marked as virt\n");
- g[clone].assert_flags = POS_FLAG_VIRTUAL_START;
-
+ g[clone].char_reach &= ~UTF_START_CR;
+ }
+
+ DEBUG_PRINTF("marked as virt\n");
+ g[clone].assert_flags = POS_FLAG_VIRTUAL_START;
+
setReportId(rm, g, expr, clone, 0);
- }
-
- return clone;
-}
-
-static
+ }
+
+ return clone;
+}
+
+static
void splitVertex(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
NFAVertex v, bool ucp) {
- assert(v != g.start);
- assert(v != g.accept);
- assert(v != g.acceptEod);
+ assert(v != g.start);
+ assert(v != g.accept);
+ assert(v != g.acceptEod);
DEBUG_PRINTF("partitioning vertex %zu ucp:%d\n", g[v].index, (int)ucp);
-
- CharReach cr_word = ucp ? CHARREACH_WORD_UCP_PRE : CHARREACH_WORD;
- CharReach cr_nonword = ucp ? CHARREACH_NONWORD_UCP_PRE : CHARREACH_NONWORD;
-
- auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; };
-
- // Split v into word/nonword vertices with only asserting out-edges.
+
+ CharReach cr_word = ucp ? CHARREACH_WORD_UCP_PRE : CHARREACH_WORD;
+ CharReach cr_nonword = ucp ? CHARREACH_NONWORD_UCP_PRE : CHARREACH_NONWORD;
+
+ auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; };
+
+ // Split v into word/nonword vertices with only asserting out-edges.
NFAVertex w_out = makeClone(rm, g, expr, v, cr_word);
NFAVertex nw_out = makeClone(rm, g, expr, v, cr_nonword);
- remove_out_edge_if(w_out, has_no_assert, g);
- remove_out_edge_if(nw_out, has_no_assert, g);
-
- // Split v into word/nonword vertices with only asserting in-edges.
+ remove_out_edge_if(w_out, has_no_assert, g);
+ remove_out_edge_if(nw_out, has_no_assert, g);
+
+ // Split v into word/nonword vertices with only asserting in-edges.
NFAVertex w_in = makeClone(rm, g, expr, v, cr_word);
NFAVertex nw_in = makeClone(rm, g, expr, v, cr_nonword);
- remove_in_edge_if(w_in, has_no_assert, g);
- remove_in_edge_if(nw_in, has_no_assert, g);
-
- // Prune edges with asserts from original v.
- auto has_assert = [&g](const NFAEdge &e) { return g[e].assert_flags; };
- remove_in_edge_if(v, has_assert, g);
- remove_out_edge_if(v, has_assert, g);
-}
-
-static
+ remove_in_edge_if(w_in, has_no_assert, g);
+ remove_in_edge_if(nw_in, has_no_assert, g);
+
+ // Prune edges with asserts from original v.
+ auto has_assert = [&g](const NFAEdge &e) { return g[e].assert_flags; };
+ remove_in_edge_if(v, has_assert, g);
+ remove_out_edge_if(v, has_assert, g);
+}
+
+static
void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
set<NFAEdge> *dead) {
- for (const auto &e : edges_range(g)) {
- u32 flags = g[e].assert_flags;
- if (!flags) {
- continue;
- }
-
- NFAVertex u = source(e, g);
- NFAVertex v = target(e, g);
-
- assert(u != g.startDs);
-
- const CharReach &u_cr = g[u].char_reach;
- const CharReach &v_cr = g[v].char_reach;
-
- bool impassable = true;
- bool ucp = flags & UCP_ASSERT_FLAGS;
+ for (const auto &e : edges_range(g)) {
+ u32 flags = g[e].assert_flags;
+ if (!flags) {
+ continue;
+ }
+
+ NFAVertex u = source(e, g);
+ NFAVertex v = target(e, g);
+
+ assert(u != g.startDs);
+
+ const CharReach &u_cr = g[u].char_reach;
+ const CharReach &v_cr = g[v].char_reach;
+
+ bool impassable = true;
+ bool ucp = flags & UCP_ASSERT_FLAGS;
DEBUG_PRINTF("resolving edge %zu->%zu (flags=0x%x, ucp=%d)\n",
g[u].index, g[v].index, flags, (int)ucp);
- while (flags && impassable) {
- u32 flag = 1U << findAndClearLSB_32(&flags);
- switch (flag) {
- case POS_FLAG_ASSERT_NONWORD_TO_NONWORD:
- case POS_FLAG_ASSERT_NONWORD_TO_WORD:
- if ((u_cr & CHARREACH_NONWORD).none() && u != g.start) {
- continue;
- }
- break;
- case POS_FLAG_ASSERT_WORD_TO_NONWORD:
- case POS_FLAG_ASSERT_WORD_TO_WORD:
- if ((u_cr & CHARREACH_WORD).none() || u == g.start) {
- continue;
- }
- break;
- case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP:
- case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP:
- if ((u_cr & ~CHARREACH_NONWORD_UCP_PRE).any() && u != g.start) {
- continue;
- }
- break;
- case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP:
- case POS_FLAG_ASSERT_WORD_TO_WORD_UCP:
- if ((u_cr & ~CHARREACH_WORD_UCP_PRE).any() || u == g.start) {
- continue;
- }
- break;
- default:
- assert(0);
- }
-
- if (v == g.accept) {
- /* accept special will need to be treated specially later */
- impassable = false;
- continue;
- }
-
- switch (flag) {
- case POS_FLAG_ASSERT_NONWORD_TO_NONWORD:
- case POS_FLAG_ASSERT_WORD_TO_NONWORD:
- if ((v_cr & CHARREACH_NONWORD).none() && v != g.acceptEod) {
- continue;
- }
- break;
- case POS_FLAG_ASSERT_WORD_TO_WORD:
- case POS_FLAG_ASSERT_NONWORD_TO_WORD:
- if ((v_cr & CHARREACH_WORD).none() || v == g.acceptEod) {
- continue;
- }
- break;
- case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP:
- case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP:
- if ((v_cr & ~CHARREACH_NONWORD_UCP_PRE).any()
- && v != g.acceptEod) {
- continue;
- }
- break;
- case POS_FLAG_ASSERT_WORD_TO_WORD_UCP:
- case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP:
- if ((v_cr & ~CHARREACH_WORD_UCP_PRE).any()
- || v == g.acceptEod) {
- continue;
- }
- break;
- default:
- assert(0);
- }
- impassable = false;
- }
-
- if (impassable) {
- dead->insert(e);
- } else if (v == g.accept && !ucp) {
- bool u_w = (u_cr & CHARREACH_NONWORD).none() && u != g.start;
- UNUSED bool u_nw = (u_cr & CHARREACH_WORD).none() || u == g.start;
- assert(u_w != u_nw);
- bool v_w = false;
- bool v_nw = false;
-
- flags = g[e].assert_flags;
- if (u_w) {
- v_w = flags & POS_FLAG_ASSERT_WORD_TO_WORD;
- v_nw = flags & POS_FLAG_ASSERT_WORD_TO_NONWORD;
- } else {
- v_w = flags & POS_FLAG_ASSERT_NONWORD_TO_WORD;
- v_nw = flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD;
- }
- assert(v_w || v_nw);
- if (v_w && v_nw) {
- /* edge is effectively unconditional */
- g[e].assert_flags = 0;
- } else if (v_w) {
- /* need to add a word byte */
- NFAVertex vv = add_vertex(g);
+ while (flags && impassable) {
+ u32 flag = 1U << findAndClearLSB_32(&flags);
+ switch (flag) {
+ case POS_FLAG_ASSERT_NONWORD_TO_NONWORD:
+ case POS_FLAG_ASSERT_NONWORD_TO_WORD:
+ if ((u_cr & CHARREACH_NONWORD).none() && u != g.start) {
+ continue;
+ }
+ break;
+ case POS_FLAG_ASSERT_WORD_TO_NONWORD:
+ case POS_FLAG_ASSERT_WORD_TO_WORD:
+ if ((u_cr & CHARREACH_WORD).none() || u == g.start) {
+ continue;
+ }
+ break;
+ case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP:
+ case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP:
+ if ((u_cr & ~CHARREACH_NONWORD_UCP_PRE).any() && u != g.start) {
+ continue;
+ }
+ break;
+ case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP:
+ case POS_FLAG_ASSERT_WORD_TO_WORD_UCP:
+ if ((u_cr & ~CHARREACH_WORD_UCP_PRE).any() || u == g.start) {
+ continue;
+ }
+ break;
+ default:
+ assert(0);
+ }
+
+ if (v == g.accept) {
+ /* accept special will need to be treated specially later */
+ impassable = false;
+ continue;
+ }
+
+ switch (flag) {
+ case POS_FLAG_ASSERT_NONWORD_TO_NONWORD:
+ case POS_FLAG_ASSERT_WORD_TO_NONWORD:
+ if ((v_cr & CHARREACH_NONWORD).none() && v != g.acceptEod) {
+ continue;
+ }
+ break;
+ case POS_FLAG_ASSERT_WORD_TO_WORD:
+ case POS_FLAG_ASSERT_NONWORD_TO_WORD:
+ if ((v_cr & CHARREACH_WORD).none() || v == g.acceptEod) {
+ continue;
+ }
+ break;
+ case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP:
+ case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP:
+ if ((v_cr & ~CHARREACH_NONWORD_UCP_PRE).any()
+ && v != g.acceptEod) {
+ continue;
+ }
+ break;
+ case POS_FLAG_ASSERT_WORD_TO_WORD_UCP:
+ case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP:
+ if ((v_cr & ~CHARREACH_WORD_UCP_PRE).any()
+ || v == g.acceptEod) {
+ continue;
+ }
+ break;
+ default:
+ assert(0);
+ }
+ impassable = false;
+ }
+
+ if (impassable) {
+ dead->insert(e);
+ } else if (v == g.accept && !ucp) {
+ bool u_w = (u_cr & CHARREACH_NONWORD).none() && u != g.start;
+ UNUSED bool u_nw = (u_cr & CHARREACH_WORD).none() || u == g.start;
+ assert(u_w != u_nw);
+ bool v_w = false;
+ bool v_nw = false;
+
+ flags = g[e].assert_flags;
+ if (u_w) {
+ v_w = flags & POS_FLAG_ASSERT_WORD_TO_WORD;
+ v_nw = flags & POS_FLAG_ASSERT_WORD_TO_NONWORD;
+ } else {
+ v_w = flags & POS_FLAG_ASSERT_NONWORD_TO_WORD;
+ v_nw = flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD;
+ }
+ assert(v_w || v_nw);
+ if (v_w && v_nw) {
+ /* edge is effectively unconditional */
+ g[e].assert_flags = 0;
+ } else if (v_w) {
+ /* need to add a word byte */
+ NFAVertex vv = add_vertex(g);
setReportId(rm, g, expr, vv, -1);
- g[vv].char_reach = CHARREACH_WORD;
- add_edge(vv, g.accept, g);
- g[e].assert_flags = 0;
- add_edge(u, vv, g[e], g);
- dead->insert(e);
- } else {
- /* need to add a non word byte or see eod */
- NFAVertex vv = add_vertex(g);
+ g[vv].char_reach = CHARREACH_WORD;
+ add_edge(vv, g.accept, g);
+ g[e].assert_flags = 0;
+ add_edge(u, vv, g[e], g);
+ dead->insert(e);
+ } else {
+ /* need to add a non word byte or see eod */
+ NFAVertex vv = add_vertex(g);
setReportId(rm, g, expr, vv, -1);
- g[vv].char_reach = CHARREACH_NONWORD;
- add_edge(vv, g.accept, g);
- g[e].assert_flags = 0;
- add_edge(u, vv, g[e], g);
+ g[vv].char_reach = CHARREACH_NONWORD;
+ add_edge(vv, g.accept, g);
+ g[e].assert_flags = 0;
+ add_edge(u, vv, g[e], g);
/* there may already be a different edge from start to eod if so
* we need to make it unconditional and alive
*/
if (NFAEdge start_eod = edge(u, g.acceptEod, g)) {
- g[start_eod].assert_flags = 0;
- dead->erase(start_eod);
+ g[start_eod].assert_flags = 0;
+ dead->erase(start_eod);
} else {
add_edge(u, g.acceptEod, g[e], g);
- }
- dead->insert(e);
- }
- } else if (v == g.accept && ucp) {
- DEBUG_PRINTF("resolving ucp assert to accept\n");
- assert(u_cr.any());
- bool u_w = (u_cr & CHARREACH_WORD_UCP_PRE).any()
- && u != g.start;
- bool u_nw = (u_cr & CHARREACH_NONWORD_UCP_PRE).any()
- || u == g.start;
- assert(u_w || u_nw);
-
- bool v_w = false;
- bool v_nw = false;
-
- flags = g[e].assert_flags;
- if (u_w) {
- v_w |= flags & POS_FLAG_ASSERT_WORD_TO_WORD_UCP;
- v_nw |= flags & POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP;
- }
- if (u_nw) {
- v_w |= flags & POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP;
- v_nw |= flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP;
- }
- assert(v_w || v_nw);
- if (v_w && v_nw) {
- /* edge is effectively unconditional */
- g[e].assert_flags = 0;
- } else if (v_w) {
- /* need to add a word byte */
- NFAVertex vv = add_vertex(g);
+ }
+ dead->insert(e);
+ }
+ } else if (v == g.accept && ucp) {
+ DEBUG_PRINTF("resolving ucp assert to accept\n");
+ assert(u_cr.any());
+ bool u_w = (u_cr & CHARREACH_WORD_UCP_PRE).any()
+ && u != g.start;
+ bool u_nw = (u_cr & CHARREACH_NONWORD_UCP_PRE).any()
+ || u == g.start;
+ assert(u_w || u_nw);
+
+ bool v_w = false;
+ bool v_nw = false;
+
+ flags = g[e].assert_flags;
+ if (u_w) {
+ v_w |= flags & POS_FLAG_ASSERT_WORD_TO_WORD_UCP;
+ v_nw |= flags & POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP;
+ }
+ if (u_nw) {
+ v_w |= flags & POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP;
+ v_nw |= flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP;
+ }
+ assert(v_w || v_nw);
+ if (v_w && v_nw) {
+ /* edge is effectively unconditional */
+ g[e].assert_flags = 0;
+ } else if (v_w) {
+ /* need to add a word byte */
+ NFAVertex vv = add_vertex(g);
setReportId(rm, g, expr, vv, -1);
- g[vv].char_reach = CHARREACH_WORD_UCP_PRE;
- add_edge(vv, g.accept, g);
- g[e].assert_flags = 0;
- add_edge(u, vv, g[e], g);
- dead->insert(e);
- } else {
- /* need to add a non word byte or see eod */
- NFAVertex vv = add_vertex(g);
+ g[vv].char_reach = CHARREACH_WORD_UCP_PRE;
+ add_edge(vv, g.accept, g);
+ g[e].assert_flags = 0;
+ add_edge(u, vv, g[e], g);
+ dead->insert(e);
+ } else {
+ /* need to add a non word byte or see eod */
+ NFAVertex vv = add_vertex(g);
setReportId(rm, g, expr, vv, -1);
- g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE;
- add_edge(vv, g.accept, g);
- g[e].assert_flags = 0;
- add_edge(u, vv, g[e], g);
+ g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE;
+ add_edge(vv, g.accept, g);
+ g[e].assert_flags = 0;
+ add_edge(u, vv, g[e], g);
/* there may already be a different edge from start to eod if so
* we need to make it unconditional and alive
*/
if (NFAEdge start_eod = edge(u, g.acceptEod, g)) {
- g[start_eod].assert_flags = 0;
- dead->erase(start_eod);
+ g[start_eod].assert_flags = 0;
+ dead->erase(start_eod);
} else {
add_edge(u, g.acceptEod, g[e], g);
- }
- dead->insert(e);
- }
- } else {
- /* we can remove the asserts as we have partitioned the vertices
- * into w/nw around the assert edges
- */
- g[e].assert_flags = 0;
- }
- }
-}
-
+ }
+ dead->insert(e);
+ }
+ } else {
+ /* we can remove the asserts as we have partitioned the vertices
+ * into w/nw around the assert edges
+ */
+ g[e].assert_flags = 0;
+ }
+ }
+}
+
void resolveAsserts(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr) {
- vector<NFAEdge> asserts = getAsserts(g);
- if (asserts.empty()) {
- return;
- }
-
- map<u32, NFAVertex> to_split; /* by index, for determinism */
- map<u32, NFAVertex> to_split_ucp; /* by index, for determinism */
- findSplitters(g, asserts, &to_split, &to_split_ucp);
- if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) {
+ vector<NFAEdge> asserts = getAsserts(g);
+ if (asserts.empty()) {
+ return;
+ }
+
+ map<u32, NFAVertex> to_split; /* by index, for determinism */
+ map<u32, NFAVertex> to_split_ucp; /* by index, for determinism */
+ findSplitters(g, asserts, &to_split, &to_split_ucp);
+ if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) {
throw CompileError(expr.index, "Pattern is too large.");
- }
-
- for (const auto &m : to_split) {
- assert(!contains(to_split_ucp, m.first));
+ }
+
+ for (const auto &m : to_split) {
+ assert(!contains(to_split_ucp, m.first));
splitVertex(rm, g, expr, m.second, false);
- }
-
- for (const auto &m : to_split_ucp) {
+ }
+
+ for (const auto &m : to_split_ucp) {
splitVertex(rm, g, expr, m.second, true);
- }
-
- set<NFAEdge> dead;
+ }
+
+ set<NFAEdge> dead;
resolveEdges(rm, g, expr, &dead);
-
- remove_edges(dead, g);
+
+ remove_edges(dead, g);
renumber_vertices(g);
- pruneUseless(g);
- pruneEmptyVertices(g);
-
+ pruneUseless(g);
+ pruneEmptyVertices(g);
+
renumber_vertices(g);
renumber_edges(g);
- clearReports(g);
-}
-
+ clearReports(g);
+}
+
void ensureCodePointStart(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr) {
- /* In utf8 mode there is an implicit assertion that we start at codepoint
- * boundaries. Assert resolution handles the badness coming from asserts.
- * The only other source of trouble is startDs->accept connections.
- */
+ /* In utf8 mode there is an implicit assertion that we start at codepoint
+ * boundaries. Assert resolution handles the badness coming from asserts.
+ * The only other source of trouble is startDs->accept connections.
+ */
NFAEdge orig = edge(g.startDs, g.accept, g);
if (expr.utf8 && orig) {
DEBUG_PRINTF("rectifying %u\n", expr.report);
Report ir = rm.getBasicInternalReport(expr);
- ReportID rep = rm.getInternalId(ir);
-
- NFAVertex v_a = add_vertex(g);
- g[v_a].assert_flags = POS_FLAG_VIRTUAL_START;
- g[v_a].char_reach = UTF_ASCII_CR;
- add_edge(v_a, g.accept, g[orig], g);
-
- NFAVertex v_2 = add_vertex(g);
- g[v_2].assert_flags = POS_FLAG_VIRTUAL_START;
- g[v_2].char_reach = CharReach(UTF_TWO_BYTE_MIN, UTF_TWO_BYTE_MAX);
-
- NFAVertex v_3 = add_vertex(g);
- g[v_3].assert_flags = POS_FLAG_VIRTUAL_START;
- g[v_3].char_reach = CharReach(UTF_THREE_BYTE_MIN, UTF_THREE_BYTE_MAX);
-
- NFAVertex v_4 = add_vertex(g);
- g[v_4].assert_flags = POS_FLAG_VIRTUAL_START;
- g[v_4].char_reach = CharReach(UTF_FOUR_BYTE_MIN, UTF_FOUR_BYTE_MAX);
-
- NFAVertex v_c = add_vertex(g);
- g[v_c].assert_flags = POS_FLAG_VIRTUAL_START;
- g[v_c].char_reach = UTF_CONT_CR;
- add_edge(v_c, g.accept, g[orig], g);
-
- add_edge(v_2, v_c, g);
-
- NFAVertex v_3c = add_vertex(g);
- g[v_3c].assert_flags = POS_FLAG_VIRTUAL_START;
- g[v_3c].char_reach = UTF_CONT_CR;
- add_edge(v_3c, v_c, g);
- add_edge(v_3, v_3c, g);
-
- NFAVertex v_4c = add_vertex(g);
- g[v_4c].assert_flags = POS_FLAG_VIRTUAL_START;
- g[v_4c].char_reach = UTF_CONT_CR;
- add_edge(v_4c, v_3c, g);
- add_edge(v_4, v_4c, g);
-
- g[v_a].reports.insert(rep);
- g[v_c].reports.insert(rep);
-
- add_edge(g.start, v_a, g);
- add_edge(g.startDs, v_a, g);
- add_edge(g.start, v_2, g);
- add_edge(g.startDs, v_2, g);
- add_edge(g.start, v_3, g);
- add_edge(g.startDs, v_3, g);
- add_edge(g.start, v_4, g);
- add_edge(g.startDs, v_4, g);
- remove_edge(orig, g);
+ ReportID rep = rm.getInternalId(ir);
+
+ NFAVertex v_a = add_vertex(g);
+ g[v_a].assert_flags = POS_FLAG_VIRTUAL_START;
+ g[v_a].char_reach = UTF_ASCII_CR;
+ add_edge(v_a, g.accept, g[orig], g);
+
+ NFAVertex v_2 = add_vertex(g);
+ g[v_2].assert_flags = POS_FLAG_VIRTUAL_START;
+ g[v_2].char_reach = CharReach(UTF_TWO_BYTE_MIN, UTF_TWO_BYTE_MAX);
+
+ NFAVertex v_3 = add_vertex(g);
+ g[v_3].assert_flags = POS_FLAG_VIRTUAL_START;
+ g[v_3].char_reach = CharReach(UTF_THREE_BYTE_MIN, UTF_THREE_BYTE_MAX);
+
+ NFAVertex v_4 = add_vertex(g);
+ g[v_4].assert_flags = POS_FLAG_VIRTUAL_START;
+ g[v_4].char_reach = CharReach(UTF_FOUR_BYTE_MIN, UTF_FOUR_BYTE_MAX);
+
+ NFAVertex v_c = add_vertex(g);
+ g[v_c].assert_flags = POS_FLAG_VIRTUAL_START;
+ g[v_c].char_reach = UTF_CONT_CR;
+ add_edge(v_c, g.accept, g[orig], g);
+
+ add_edge(v_2, v_c, g);
+
+ NFAVertex v_3c = add_vertex(g);
+ g[v_3c].assert_flags = POS_FLAG_VIRTUAL_START;
+ g[v_3c].char_reach = UTF_CONT_CR;
+ add_edge(v_3c, v_c, g);
+ add_edge(v_3, v_3c, g);
+
+ NFAVertex v_4c = add_vertex(g);
+ g[v_4c].assert_flags = POS_FLAG_VIRTUAL_START;
+ g[v_4c].char_reach = UTF_CONT_CR;
+ add_edge(v_4c, v_3c, g);
+ add_edge(v_4, v_4c, g);
+
+ g[v_a].reports.insert(rep);
+ g[v_c].reports.insert(rep);
+
+ add_edge(g.start, v_a, g);
+ add_edge(g.startDs, v_a, g);
+ add_edge(g.start, v_2, g);
+ add_edge(g.startDs, v_2, g);
+ add_edge(g.start, v_3, g);
+ add_edge(g.startDs, v_3, g);
+ add_edge(g.start, v_4, g);
+ add_edge(g.startDs, v_4, g);
+ remove_edge(orig, g);
renumber_edges(g);
clearReports(g);
- }
-}
-
-} // namespace ue2
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_asserts.h b/contrib/libs/hyperscan/src/nfagraph/ng_asserts.h
index edbc3d5d80..2534f57147 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_asserts.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_asserts.h
@@ -1,50 +1,50 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Resolve special assert vertices.
- */
-
-#ifndef NG_ASSERTS_H
-#define NG_ASSERTS_H
-
-namespace ue2 {
-
-struct BoundaryReports;
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Resolve special assert vertices.
+ */
+
+#ifndef NG_ASSERTS_H
+#define NG_ASSERTS_H
+
+namespace ue2 {
+
+struct BoundaryReports;
class ExpressionInfo;
class NGHolder;
-class ReportManager;
-
+class ReportManager;
+
void resolveAsserts(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr);
-
+
void ensureCodePointStart(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr);
-
-} // namespace ue2
-
-#endif // NG_ASSERTS_H
+
+} // namespace ue2
+
+#endif // NG_ASSERTS_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_builder.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_builder.cpp
index 33edad8bef..60f667f491 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_builder.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_builder.cpp
@@ -1,278 +1,278 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief: NFA Graph Builder: used by Glushkov construction to construct an
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief: NFA Graph Builder: used by Glushkov construction to construct an
* NGHolder from a parsed expression.
- */
+ */
#include "ng_builder.h"
-#include "grey.h"
-#include "ng.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "compiler/compiler.h" // for ParsedExpression
-#include "util/compile_error.h"
-#include "util/make_unique.h"
-
-#include <cassert>
-
-using namespace std;
-
-namespace ue2 {
-
-namespace {
-
-/** Concrete implementation of NFABuilder interface. */
-class NFABuilderImpl : public NFABuilder {
-public:
- NFABuilderImpl(ReportManager &rm, const Grey &grey,
- const ParsedExpression &expr);
-
- ~NFABuilderImpl() override;
-
- Position makePositions(size_t nPositions) override;
- Position getStart() const override;
- Position getStartDotStar() const override;
- Position getAccept() const override;
- Position getAcceptEOD() const override;
-
- bool isSpecialState(Position p) const override;
-
- void setNodeReportID(Position position, int offsetAdjust) override;
- void addCharReach(Position position, const CharReach &cr) override;
- void setAssertFlag(Position position, u32 flag) override;
- u32 getAssertFlag(Position position) override;
-
- void addVertex(Position p) override;
-
- void addEdge(Position start, Position end) override;
-
- bool hasEdge(Position start, Position end) const override;
-
- u32 numVertices() const override { return vertIdx; }
-
- void cloneRegion(Position first, Position last,
- unsigned posOffset) override;
-
+#include "grey.h"
+#include "ng.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "compiler/compiler.h" // for ParsedExpression
+#include "util/compile_error.h"
+#include "util/make_unique.h"
+
+#include <cassert>
+
+using namespace std;
+
+namespace ue2 {
+
+namespace {
+
+/** Concrete implementation of NFABuilder interface. */
+class NFABuilderImpl : public NFABuilder {
+public:
+ NFABuilderImpl(ReportManager &rm, const Grey &grey,
+ const ParsedExpression &expr);
+
+ ~NFABuilderImpl() override;
+
+ Position makePositions(size_t nPositions) override;
+ Position getStart() const override;
+ Position getStartDotStar() const override;
+ Position getAccept() const override;
+ Position getAcceptEOD() const override;
+
+ bool isSpecialState(Position p) const override;
+
+ void setNodeReportID(Position position, int offsetAdjust) override;
+ void addCharReach(Position position, const CharReach &cr) override;
+ void setAssertFlag(Position position, u32 flag) override;
+ u32 getAssertFlag(Position position) override;
+
+ void addVertex(Position p) override;
+
+ void addEdge(Position start, Position end) override;
+
+ bool hasEdge(Position start, Position end) const override;
+
+ u32 numVertices() const override { return vertIdx; }
+
+ void cloneRegion(Position first, Position last,
+ unsigned posOffset) override;
+
BuiltExpression getGraph() override;
-
-private:
- /** fetch a vertex given its Position ID. */
- NFAVertex getVertex(Position pos) const;
-
- /** \brief Internal convenience function to add an edge (u, v). */
- pair<NFAEdge, bool> addEdge(NFAVertex u, NFAVertex v);
-
- /** \brief We use the ReportManager to hand out new internal reports. */
- ReportManager &rm;
-
- /** \brief Greybox: used for resource limits. */
- const Grey &grey;
-
+
+private:
+ /** fetch a vertex given its Position ID. */
+ NFAVertex getVertex(Position pos) const;
+
+ /** \brief Internal convenience function to add an edge (u, v). */
+ pair<NFAEdge, bool> addEdge(NFAVertex u, NFAVertex v);
+
+ /** \brief We use the ReportManager to hand out new internal reports. */
+ ReportManager &rm;
+
+ /** \brief Greybox: used for resource limits. */
+ const Grey &grey;
+
/** \brief Underlying graph. */
unique_ptr<NGHolder> graph;
-
+
/** \brief Underlying expression info. */
ExpressionInfo expr;
- /** \brief mapping from position to vertex. Use \ref getVertex for access.
- * */
- vector<NFAVertex> id2vertex;
-
- /** \brief Index of next vertex. */
- u32 vertIdx;
-}; // class NFABuilderImpl
-
-} // namespace
-
-NFABuilderImpl::NFABuilderImpl(ReportManager &rm_in, const Grey &grey_in,
+ /** \brief mapping from position to vertex. Use \ref getVertex for access.
+ * */
+ vector<NFAVertex> id2vertex;
+
+ /** \brief Index of next vertex. */
+ u32 vertIdx;
+}; // class NFABuilderImpl
+
+} // namespace
+
+NFABuilderImpl::NFABuilderImpl(ReportManager &rm_in, const Grey &grey_in,
const ParsedExpression &parsed)
: rm(rm_in), grey(grey_in), graph(ue2::make_unique<NGHolder>()),
expr(parsed.expr), vertIdx(N_SPECIALS) {
-
- // Reserve space for a reasonably-sized NFA
- id2vertex.reserve(64);
- id2vertex.resize(N_SPECIALS);
- id2vertex[NODE_START] = graph->start;
- id2vertex[NODE_START_DOTSTAR] = graph->startDs;
- id2vertex[NODE_ACCEPT] = graph->accept;
- id2vertex[NODE_ACCEPT_EOD] = graph->acceptEod;
-}
-
-NFABuilderImpl::~NFABuilderImpl() {
- // empty
-}
-
-NFAVertex NFABuilderImpl::getVertex(Position pos) const {
- assert(id2vertex.size() >= pos);
- const NFAVertex v = id2vertex[pos];
+
+ // Reserve space for a reasonably-sized NFA
+ id2vertex.reserve(64);
+ id2vertex.resize(N_SPECIALS);
+ id2vertex[NODE_START] = graph->start;
+ id2vertex[NODE_START_DOTSTAR] = graph->startDs;
+ id2vertex[NODE_ACCEPT] = graph->accept;
+ id2vertex[NODE_ACCEPT_EOD] = graph->acceptEod;
+}
+
+NFABuilderImpl::~NFABuilderImpl() {
+ // empty
+}
+
+NFAVertex NFABuilderImpl::getVertex(Position pos) const {
+ assert(id2vertex.size() >= pos);
+ const NFAVertex v = id2vertex[pos];
assert(v != NGHolder::null_vertex());
assert((*graph)[v].index == pos);
- return v;
-}
-
-void NFABuilderImpl::addVertex(Position pos) {
- // Enforce resource limit.
- if (pos > grey.limitGraphVertices) {
- throw CompileError("Pattern too large.");
- }
-
- NFAVertex v = add_vertex(*graph);
- if (id2vertex.size() <= pos) {
- id2vertex.resize(pos + 1);
- }
- id2vertex[pos] = v;
+ return v;
+}
+
+void NFABuilderImpl::addVertex(Position pos) {
+ // Enforce resource limit.
+ if (pos > grey.limitGraphVertices) {
+ throw CompileError("Pattern too large.");
+ }
+
+ NFAVertex v = add_vertex(*graph);
+ if (id2vertex.size() <= pos) {
+ id2vertex.resize(pos + 1);
+ }
+ id2vertex[pos] = v;
(*graph)[v].index = pos;
-}
-
+}
+
BuiltExpression NFABuilderImpl::getGraph() {
- DEBUG_PRINTF("built graph has %zu vertices and %zu edges\n",
- num_vertices(*graph), num_edges(*graph));
-
- if (num_edges(*graph) > grey.limitGraphEdges) {
- throw CompileError("Pattern too large.");
- }
- if (num_vertices(*graph) > grey.limitGraphVertices) {
- throw CompileError("Pattern too large.");
- }
-
+ DEBUG_PRINTF("built graph has %zu vertices and %zu edges\n",
+ num_vertices(*graph), num_edges(*graph));
+
+ if (num_edges(*graph) > grey.limitGraphEdges) {
+ throw CompileError("Pattern too large.");
+ }
+ if (num_vertices(*graph) > grey.limitGraphVertices) {
+ throw CompileError("Pattern too large.");
+ }
+
return { expr, move(graph) };
-}
-
-void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) {
+}
+
+void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) {
Report ir = rm.getBasicInternalReport(expr, offsetAdjust);
- DEBUG_PRINTF("setting report id on %u = (%u, %d, %u)\n",
+ DEBUG_PRINTF("setting report id on %u = (%u, %d, %u)\n",
pos, expr.report, offsetAdjust, ir.ekey);
-
- NFAVertex v = getVertex(pos);
- auto &reports = (*graph)[v].reports;
- reports.clear();
- reports.insert(rm.getInternalId(ir));
-}
-
-void NFABuilderImpl::addCharReach(Position pos, const CharReach &cr) {
- NFAVertex v = getVertex(pos);
+
+ NFAVertex v = getVertex(pos);
+ auto &reports = (*graph)[v].reports;
+ reports.clear();
+ reports.insert(rm.getInternalId(ir));
+}
+
+void NFABuilderImpl::addCharReach(Position pos, const CharReach &cr) {
+ NFAVertex v = getVertex(pos);
(*graph)[v].char_reach |= cr;
-}
-
-void NFABuilderImpl::setAssertFlag(Position pos, u32 flag) {
- NFAVertex v = getVertex(pos);
+}
+
+void NFABuilderImpl::setAssertFlag(Position pos, u32 flag) {
+ NFAVertex v = getVertex(pos);
(*graph)[v].assert_flags |= flag;
-}
-
-u32 NFABuilderImpl::getAssertFlag(Position pos) {
- NFAVertex v = getVertex(pos);
+}
+
+u32 NFABuilderImpl::getAssertFlag(Position pos) {
+ NFAVertex v = getVertex(pos);
return (*graph)[v].assert_flags;
-}
-
-pair<NFAEdge, bool> NFABuilderImpl::addEdge(NFAVertex u, NFAVertex v) {
- // assert that the edge doesn't already exist
+}
+
+pair<NFAEdge, bool> NFABuilderImpl::addEdge(NFAVertex u, NFAVertex v) {
+ // assert that the edge doesn't already exist
assert(edge(u, v, *graph).second == false);
-
+
return add_edge(u, v, *graph);
-}
-
-void NFABuilderImpl::addEdge(Position startPos, Position endPos) {
- DEBUG_PRINTF("%u -> %u\n", startPos, endPos);
- assert(startPos < vertIdx);
- assert(endPos < vertIdx);
-
- NFAVertex u = getVertex(startPos);
- NFAVertex v = getVertex(endPos);
-
- if ((u == graph->start || u == graph->startDs) && v == graph->startDs) {
- /* standard special -> special edges already exist */
+}
+
+void NFABuilderImpl::addEdge(Position startPos, Position endPos) {
+ DEBUG_PRINTF("%u -> %u\n", startPos, endPos);
+ assert(startPos < vertIdx);
+ assert(endPos < vertIdx);
+
+ NFAVertex u = getVertex(startPos);
+ NFAVertex v = getVertex(endPos);
+
+ if ((u == graph->start || u == graph->startDs) && v == graph->startDs) {
+ /* standard special -> special edges already exist */
assert(edge(u, v, *graph).second == true);
- return;
- }
-
+ return;
+ }
+
assert(edge(u, v, *graph).second == false);
- addEdge(u, v);
-}
-
-bool NFABuilderImpl::hasEdge(Position startPos, Position endPos) const {
+ addEdge(u, v);
+}
+
+bool NFABuilderImpl::hasEdge(Position startPos, Position endPos) const {
return edge(getVertex(startPos), getVertex(endPos), *graph).second;
-}
-
-Position NFABuilderImpl::getStart() const {
- return NODE_START;
-}
-
-Position NFABuilderImpl::getStartDotStar() const {
- return NODE_START_DOTSTAR;
-}
-
-Position NFABuilderImpl::getAccept() const {
- return NODE_ACCEPT;
-}
-
-Position NFABuilderImpl::getAcceptEOD() const {
- return NODE_ACCEPT_EOD;
-}
-
-bool NFABuilderImpl::isSpecialState(Position p) const {
- return (p == NODE_START || p == NODE_START_DOTSTAR ||
- p == NODE_ACCEPT || p == NODE_ACCEPT_EOD);
-}
-
-Position NFABuilderImpl::makePositions(size_t nPositions) {
- Position base = vertIdx;
- for (size_t i = 0; i < nPositions; i++) {
- addVertex(vertIdx++);
- }
- DEBUG_PRINTF("built %zu positions from base %u\n", nPositions, base);
- return base;
-}
-
-void NFABuilderImpl::cloneRegion(Position first, Position last, unsigned posOffset) {
+}
+
+Position NFABuilderImpl::getStart() const {
+ return NODE_START;
+}
+
+Position NFABuilderImpl::getStartDotStar() const {
+ return NODE_START_DOTSTAR;
+}
+
+Position NFABuilderImpl::getAccept() const {
+ return NODE_ACCEPT;
+}
+
+Position NFABuilderImpl::getAcceptEOD() const {
+ return NODE_ACCEPT_EOD;
+}
+
+bool NFABuilderImpl::isSpecialState(Position p) const {
+ return (p == NODE_START || p == NODE_START_DOTSTAR ||
+ p == NODE_ACCEPT || p == NODE_ACCEPT_EOD);
+}
+
+Position NFABuilderImpl::makePositions(size_t nPositions) {
+ Position base = vertIdx;
+ for (size_t i = 0; i < nPositions; i++) {
+ addVertex(vertIdx++);
+ }
+ DEBUG_PRINTF("built %zu positions from base %u\n", nPositions, base);
+ return base;
+}
+
+void NFABuilderImpl::cloneRegion(Position first, Position last, unsigned posOffset) {
NGHolder &g = *graph;
- assert(posOffset > 0);
-
- // walk the nodes between first and last and copy their vertex properties
- DEBUG_PRINTF("cloning nodes in [%u, %u], offset %u\n", first, last,
- posOffset);
- for (Position i = first; i <= last; ++i) {
- NFAVertex orig = getVertex(i);
- Position destIdx = i + posOffset;
- assert(destIdx < vertIdx);
- NFAVertex dest = getVertex(destIdx);
- g[dest] = g[orig]; // all properties
- g[dest].index = destIdx;
- }
-}
-
-unique_ptr<NFABuilder> makeNFABuilder(ReportManager &rm, const CompileContext &cc,
- const ParsedExpression &expr) {
- return ue2::make_unique<NFABuilderImpl>(rm, cc.grey, expr);
-}
-
-NFABuilder::~NFABuilder() { }
-
-} // namespace ue2
+ assert(posOffset > 0);
+
+ // walk the nodes between first and last and copy their vertex properties
+ DEBUG_PRINTF("cloning nodes in [%u, %u], offset %u\n", first, last,
+ posOffset);
+ for (Position i = first; i <= last; ++i) {
+ NFAVertex orig = getVertex(i);
+ Position destIdx = i + posOffset;
+ assert(destIdx < vertIdx);
+ NFAVertex dest = getVertex(destIdx);
+ g[dest] = g[orig]; // all properties
+ g[dest].index = destIdx;
+ }
+}
+
+unique_ptr<NFABuilder> makeNFABuilder(ReportManager &rm, const CompileContext &cc,
+ const ParsedExpression &expr) {
+ return ue2::make_unique<NFABuilderImpl>(rm, cc.grey, expr);
+}
+
+NFABuilder::~NFABuilder() { }
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_builder.h b/contrib/libs/hyperscan/src/nfagraph/ng_builder.h
index 7158620e70..9f71b62235 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_builder.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_builder.h
@@ -1,99 +1,99 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief: NFA Graph Builder: used by Glushkov construction to construct an
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief: NFA Graph Builder: used by Glushkov construction to construct an
* NGHolder from a parsed expression.
- */
-
-#ifndef NG_BUILDER_H
-#define NG_BUILDER_H
-
-#include "ue2common.h"
-
-#include "parser/position.h"
+ */
+
+#ifndef NG_BUILDER_H
+#define NG_BUILDER_H
+
+#include "ue2common.h"
+
+#include "parser/position.h"
#include "util/noncopyable.h"
-
-#include <memory>
-
-namespace ue2 {
-
-class CharReach;
-class ReportManager;
+
+#include <memory>
+
+namespace ue2 {
+
+class CharReach;
+class ReportManager;
struct BuiltExpression;
-struct CompileContext;
-
-class ParsedExpression;
-
-/** \brief Abstract builder interface. Use \ref makeNFABuilder to construct
- * one. Used by GlushkovBuildState. */
+struct CompileContext;
+
+class ParsedExpression;
+
+/** \brief Abstract builder interface. Use \ref makeNFABuilder to construct
+ * one. Used by GlushkovBuildState. */
class NFABuilder : noncopyable {
-public:
- virtual ~NFABuilder();
-
- virtual Position makePositions(size_t nPositions) = 0;
- virtual Position getStart() const = 0;
- virtual Position getStartDotStar() const = 0;
- virtual Position getAccept() const = 0;
- virtual Position getAcceptEOD() const = 0;
-
- virtual bool isSpecialState(Position p) const = 0;
-
- virtual void setNodeReportID(Position position, int offsetAdjust) = 0;
- virtual void addCharReach(Position position, const CharReach &cr) = 0;
-
- /* or-in vertex assertions */
- virtual void setAssertFlag(Position position, u32 flag) = 0;
- virtual u32 getAssertFlag(Position position) = 0;
-
- virtual void addVertex(Position p) = 0;
-
- virtual void addEdge(Position start, Position end) = 0;
-
- virtual bool hasEdge(Position start, Position end) const = 0;
-
- virtual u32 numVertices() const = 0;
-
- virtual void cloneRegion(Position first, Position last,
- unsigned posOffset) = 0;
-
- /**
+public:
+ virtual ~NFABuilder();
+
+ virtual Position makePositions(size_t nPositions) = 0;
+ virtual Position getStart() const = 0;
+ virtual Position getStartDotStar() const = 0;
+ virtual Position getAccept() const = 0;
+ virtual Position getAcceptEOD() const = 0;
+
+ virtual bool isSpecialState(Position p) const = 0;
+
+ virtual void setNodeReportID(Position position, int offsetAdjust) = 0;
+ virtual void addCharReach(Position position, const CharReach &cr) = 0;
+
+ /* or-in vertex assertions */
+ virtual void setAssertFlag(Position position, u32 flag) = 0;
+ virtual u32 getAssertFlag(Position position) = 0;
+
+ virtual void addVertex(Position p) = 0;
+
+ virtual void addEdge(Position start, Position end) = 0;
+
+ virtual bool hasEdge(Position start, Position end) const = 0;
+
+ virtual u32 numVertices() const = 0;
+
+ virtual void cloneRegion(Position first, Position last,
+ unsigned posOffset) = 0;
+
+ /**
* \brief Returns the built NGHolder graph and ExpressionInfo.
- * Note that this builder cannot be used after this call.
- */
+ * Note that this builder cannot be used after this call.
+ */
virtual BuiltExpression getGraph() = 0;
-};
-
-/** Construct a usable NFABuilder. */
-std::unique_ptr<NFABuilder> makeNFABuilder(ReportManager &rm,
- const CompileContext &cc,
- const ParsedExpression &expr);
-
-} // namespace ue2
-
-#endif
+};
+
+/** Construct a usable NFABuilder. */
+std::unique_ptr<NFABuilder> makeNFABuilder(ReportManager &rm,
+ const CompileContext &cc,
+ const ParsedExpression &expr);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.cpp
index 3474ca9875..3e9454eeed 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.cpp
@@ -1,232 +1,232 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Splits an NFA graph into its connected components.
- *
- * This pass takes a NGHolder and splits its graph into a set of connected
- * components, returning them as individual NGHolder graphs. For example, the
- * graph for the regex /foo.*bar|[a-z]{7,13}|hatstand|teakettle$/ will be split
- * into four NGHolders, representing these four components:
- *
- * - /foo.*bar/
- * - /[a-z]{7,13}/
- * - /hatstand/
- * - /teakettle$/
- *
- * The pass operates by creating an undirected graph from the input graph, and
- * then using the BGL's connected_components algorithm to do the work, cloning
- * the identified components into their own graphs. A "shell" of vertices
- * is identified and removed first from the head and tail of the graph, in
- * order to handle cases where there is a common head/tail region.
- *
- * Trivial cases, such as an alternation of single vertices like /a|b|c|d|e|f/,
- * are not split, as later optimisations will handle these cases efficiently.
- */
-#include "ng_calc_components.h"
-
-#include "ng_depth.h"
-#include "ng_holder.h"
-#include "ng_prune.h"
-#include "ng_util.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Splits an NFA graph into its connected components.
+ *
+ * This pass takes a NGHolder and splits its graph into a set of connected
+ * components, returning them as individual NGHolder graphs. For example, the
+ * graph for the regex /foo.*bar|[a-z]{7,13}|hatstand|teakettle$/ will be split
+ * into four NGHolders, representing these four components:
+ *
+ * - /foo.*bar/
+ * - /[a-z]{7,13}/
+ * - /hatstand/
+ * - /teakettle$/
+ *
+ * The pass operates by creating an undirected graph from the input graph, and
+ * then using the BGL's connected_components algorithm to do the work, cloning
+ * the identified components into their own graphs. A "shell" of vertices
+ * is identified and removed first from the head and tail of the graph, in
+ * order to handle cases where there is a common head/tail region.
+ *
+ * Trivial cases, such as an alternation of single vertices like /a|b|c|d|e|f/,
+ * are not split, as later optimisations will handle these cases efficiently.
+ */
+#include "ng_calc_components.h"
+
+#include "ng_depth.h"
+#include "ng_holder.h"
+#include "ng_prune.h"
+#include "ng_util.h"
#include "grey.h"
-#include "ue2common.h"
-#include "util/graph_range.h"
+#include "ue2common.h"
+#include "util/graph_range.h"
#include "util/graph_undirected.h"
-#include "util/make_unique.h"
-
-#include <map>
-#include <vector>
-
-#include <boost/graph/connected_components.hpp>
+#include "util/make_unique.h"
+
+#include <map>
+#include <vector>
+
+#include <boost/graph/connected_components.hpp>
#include <boost/graph/filtered_graph.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
-static constexpr u32 MAX_HEAD_SHELL_DEPTH = 3;
-static constexpr u32 MAX_TAIL_SHELL_DEPTH = 3;
-
-/**
- * \brief Returns true if the whole graph is just an alternation of character
- * classes.
- */
-bool isAlternationOfClasses(const NGHolder &g) {
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
- // Vertex must have in edges from starts only.
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (!is_any_start(u, g)) {
- return false;
- }
- }
- // Vertex must have out edges to accepts only.
- for (auto w : adjacent_vertices_range(v, g)) {
- if (!is_any_accept(w, g)) {
- return false;
- }
- }
- }
-
- DEBUG_PRINTF("alternation of single states, treating as one comp\n");
- return true;
-}
-
-/**
- * \brief Compute initial max distance to v from start (i.e. ignoring its own
- * self-loop).
- */
-static
-depth max_dist_from_start(const NGHolder &g,
- const vector<NFAVertexBidiDepth> &depths,
- NFAVertex v) {
- depth max_depth(0);
- for (const auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == v) {
- continue;
- }
- const auto &d = depths.at(g[u].index);
- if (d.fromStart.max.is_reachable()) {
- max_depth = max(max_depth, d.fromStart.max);
- }
- if (d.fromStartDotStar.max.is_reachable()) {
- max_depth = max(max_depth, d.fromStartDotStar.max);
- }
- }
- return max_depth + 1;
-}
-
-/**
- * \brief Compute initial max depth from v from accept (i.e. ignoring its own
- * self-loop).
- */
-static
-depth max_dist_to_accept(const NGHolder &g,
- const vector<NFAVertexBidiDepth> &depths,
- NFAVertex v) {
- depth max_depth(0);
- for (const auto w : adjacent_vertices_range(v, g)) {
- if (w == v) {
- continue;
- }
- const auto &d = depths.at(g[w].index);
- if (d.toAccept.max.is_reachable()) {
- max_depth = max(max_depth, d.toAccept.max);
- }
- if (d.toAcceptEod.max.is_reachable()) {
- max_depth = max(max_depth, d.toAcceptEod.max);
- }
- }
- return max_depth + 1;
-}
-
-static
-flat_set<NFAVertex> findHeadShell(const NGHolder &g,
- const vector<NFAVertexBidiDepth> &depths,
- const depth &max_dist) {
- flat_set<NFAVertex> shell;
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
- if (max_dist_from_start(g, depths, v) <= max_dist) {
- shell.insert(v);
- }
- }
-
- for (UNUSED auto v : shell) {
+
+using namespace std;
+
+namespace ue2 {
+
+static constexpr u32 MAX_HEAD_SHELL_DEPTH = 3;
+static constexpr u32 MAX_TAIL_SHELL_DEPTH = 3;
+
+/**
+ * \brief Returns true if the whole graph is just an alternation of character
+ * classes.
+ */
+bool isAlternationOfClasses(const NGHolder &g) {
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ // Vertex must have in edges from starts only.
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (!is_any_start(u, g)) {
+ return false;
+ }
+ }
+ // Vertex must have out edges to accepts only.
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (!is_any_accept(w, g)) {
+ return false;
+ }
+ }
+ }
+
+ DEBUG_PRINTF("alternation of single states, treating as one comp\n");
+ return true;
+}
+
+/**
+ * \brief Compute initial max distance to v from start (i.e. ignoring its own
+ * self-loop).
+ */
+static
+depth max_dist_from_start(const NGHolder &g,
+ const vector<NFAVertexBidiDepth> &depths,
+ NFAVertex v) {
+ depth max_depth(0);
+ for (const auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == v) {
+ continue;
+ }
+ const auto &d = depths.at(g[u].index);
+ if (d.fromStart.max.is_reachable()) {
+ max_depth = max(max_depth, d.fromStart.max);
+ }
+ if (d.fromStartDotStar.max.is_reachable()) {
+ max_depth = max(max_depth, d.fromStartDotStar.max);
+ }
+ }
+ return max_depth + 1;
+}
+
+/**
+ * \brief Compute initial max depth from v from accept (i.e. ignoring its own
+ * self-loop).
+ */
+static
+depth max_dist_to_accept(const NGHolder &g,
+ const vector<NFAVertexBidiDepth> &depths,
+ NFAVertex v) {
+ depth max_depth(0);
+ for (const auto w : adjacent_vertices_range(v, g)) {
+ if (w == v) {
+ continue;
+ }
+ const auto &d = depths.at(g[w].index);
+ if (d.toAccept.max.is_reachable()) {
+ max_depth = max(max_depth, d.toAccept.max);
+ }
+ if (d.toAcceptEod.max.is_reachable()) {
+ max_depth = max(max_depth, d.toAcceptEod.max);
+ }
+ }
+ return max_depth + 1;
+}
+
+static
+flat_set<NFAVertex> findHeadShell(const NGHolder &g,
+ const vector<NFAVertexBidiDepth> &depths,
+ const depth &max_dist) {
+ flat_set<NFAVertex> shell;
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ if (max_dist_from_start(g, depths, v) <= max_dist) {
+ shell.insert(v);
+ }
+ }
+
+ for (UNUSED auto v : shell) {
DEBUG_PRINTF("shell: %zu\n", g[v].index);
- }
-
- return shell;
-}
-
-static
-flat_set<NFAVertex> findTailShell(const NGHolder &g,
- const vector<NFAVertexBidiDepth> &depths,
- const depth &max_dist) {
- flat_set<NFAVertex> shell;
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
- if (max_dist_to_accept(g, depths, v) <= max_dist) {
- shell.insert(v);
- }
- }
-
- for (UNUSED auto v : shell) {
+ }
+
+ return shell;
+}
+
+static
+flat_set<NFAVertex> findTailShell(const NGHolder &g,
+ const vector<NFAVertexBidiDepth> &depths,
+ const depth &max_dist) {
+ flat_set<NFAVertex> shell;
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ if (max_dist_to_accept(g, depths, v) <= max_dist) {
+ shell.insert(v);
+ }
+ }
+
+ for (UNUSED auto v : shell) {
DEBUG_PRINTF("shell: %zu\n", g[v].index);
- }
-
- return shell;
-}
-
-static
-vector<NFAEdge> findShellEdges(const NGHolder &g,
- const flat_set<NFAVertex> &head_shell,
- const flat_set<NFAVertex> &tail_shell) {
- vector<NFAEdge> shell_edges;
-
- for (const auto &e : edges_range(g)) {
- auto u = source(e, g);
- auto v = target(e, g);
-
- if (v == g.startDs && is_any_start(u, g)) {
- continue;
- }
- if (u == g.accept && v == g.acceptEod) {
- continue;
- }
-
- if ((is_special(u, g) || contains(head_shell, u)) &&
- (is_special(v, g) || contains(tail_shell, v))) {
+ }
+
+ return shell;
+}
+
+static
+vector<NFAEdge> findShellEdges(const NGHolder &g,
+ const flat_set<NFAVertex> &head_shell,
+ const flat_set<NFAVertex> &tail_shell) {
+ vector<NFAEdge> shell_edges;
+
+ for (const auto &e : edges_range(g)) {
+ auto u = source(e, g);
+ auto v = target(e, g);
+
+ if (v == g.startDs && is_any_start(u, g)) {
+ continue;
+ }
+ if (u == g.accept && v == g.acceptEod) {
+ continue;
+ }
+
+ if ((is_special(u, g) || contains(head_shell, u)) &&
+ (is_special(v, g) || contains(tail_shell, v))) {
DEBUG_PRINTF("edge (%zu,%zu) is a shell edge\n", g[u].index,
g[v].index);
- shell_edges.push_back(e);
- }
- }
-
- return shell_edges;
-}
-
+ shell_edges.push_back(e);
+ }
+ }
+
+ return shell_edges;
+}
+
template<typename GetAdjRange>
bool shellHasOnePath(const NGHolder &g, const flat_set<NFAVertex> &shell,
GetAdjRange adj_range_func) {
if (shell.empty()) {
DEBUG_PRINTF("no shell\n");
return false;
- }
+ }
NFAVertex exit_vertex = NGHolder::null_vertex();
for (auto u : shell) {
@@ -246,62 +246,62 @@ bool shellHasOnePath(const NGHolder &g, const flat_set<NFAVertex> &shell,
}
return true;
-}
-
+}
+
/**
* True if all edges out of vertices in the head shell lead to at most a single
* outside vertex, or the inverse for the tail shell.
*/
-static
+static
bool shellHasOnePath(const NGHolder &g, const flat_set<NFAVertex> &head_shell,
const flat_set<NFAVertex> &tail_shell) {
if (shellHasOnePath(g, head_shell, adjacent_vertices_range<NGHolder>)) {
DEBUG_PRINTF("head shell has only one path through it\n");
return true;
- }
+ }
if (shellHasOnePath(g, tail_shell, inv_adjacent_vertices_range<NGHolder>)) {
DEBUG_PRINTF("tail shell has only one path into it\n");
return true;
}
return false;
-}
-
-/**
- * Common code called by calc- and recalc- below. Splits the given holder into
- * one or more connected components, adding them to the comps deque.
- */
-static
+}
+
+/**
+ * Common code called by calc- and recalc- below. Splits the given holder into
+ * one or more connected components, adding them to the comps deque.
+ */
+static
void splitIntoComponents(unique_ptr<NGHolder> g,
deque<unique_ptr<NGHolder>> &comps,
- const depth &max_head_depth,
- const depth &max_tail_depth, bool *shell_comp) {
+ const depth &max_head_depth,
+ const depth &max_tail_depth, bool *shell_comp) {
DEBUG_PRINTF("graph has %zu vertices\n", num_vertices(*g));
-
- assert(shell_comp);
- *shell_comp = false;
-
- // Compute "shell" head and tail subgraphs.
+
+ assert(shell_comp);
+ *shell_comp = false;
+
+ // Compute "shell" head and tail subgraphs.
auto depths = calcBidiDepths(*g);
auto head_shell = findHeadShell(*g, depths, max_head_depth);
auto tail_shell = findTailShell(*g, depths, max_tail_depth);
- for (auto v : head_shell) {
- tail_shell.erase(v);
- }
-
+ for (auto v : head_shell) {
+ tail_shell.erase(v);
+ }
+
if (head_shell.size() + tail_shell.size() + N_SPECIALS >=
num_vertices(*g)) {
- DEBUG_PRINTF("all in shell component\n");
+ DEBUG_PRINTF("all in shell component\n");
comps.push_back(std::move(g));
- *shell_comp = true;
- return;
- }
-
+ *shell_comp = true;
+ return;
+ }
+
// Find edges connecting the head and tail shells directly.
vector<NFAEdge> shell_edges = findShellEdges(*g, head_shell, tail_shell);
-
- DEBUG_PRINTF("%zu vertices in head, %zu in tail, %zu shell edges\n",
- head_shell.size(), tail_shell.size(), shell_edges.size());
-
+
+ DEBUG_PRINTF("%zu vertices in head, %zu in tail, %zu shell edges\n",
+ head_shell.size(), tail_shell.size(), shell_edges.size());
+
// If there are no shell edges and only one path out of the head shell or
// into the tail shell, we aren't going to find more than one component.
if (shell_edges.empty() && shellHasOnePath(*g, head_shell, tail_shell)) {
@@ -309,152 +309,152 @@ void splitIntoComponents(unique_ptr<NGHolder> g,
comps.push_back(std::move(g));
return;
}
-
+
auto ug = make_undirected_graph(*g);
-
+
// Filter specials and shell vertices from undirected graph.
unordered_set<NFAVertex> bad_vertices(
{g->start, g->startDs, g->accept, g->acceptEod});
bad_vertices.insert(head_shell.begin(), head_shell.end());
bad_vertices.insert(tail_shell.begin(), tail_shell.end());
-
+
auto filtered_ug = boost::make_filtered_graph(
ug, boost::keep_all(), make_bad_vertex_filter(&bad_vertices));
-
+
// Actually run the connected components algorithm.
map<NFAVertex, u32> split_components;
- const u32 num = connected_components(
+ const u32 num = connected_components(
filtered_ug, boost::make_assoc_property_map(split_components));
-
- assert(num > 0);
- if (num == 1 && shell_edges.empty()) {
- DEBUG_PRINTF("single component\n");
+
+ assert(num > 0);
+ if (num == 1 && shell_edges.empty()) {
+ DEBUG_PRINTF("single component\n");
comps.push_back(std::move(g));
- return;
- }
-
- DEBUG_PRINTF("broke graph into %u components\n", num);
-
- vector<deque<NFAVertex>> verts(num);
-
- // Collect vertex lists per component.
- for (const auto &m : split_components) {
+ return;
+ }
+
+ DEBUG_PRINTF("broke graph into %u components\n", num);
+
+ vector<deque<NFAVertex>> verts(num);
+
+ // Collect vertex lists per component.
+ for (const auto &m : split_components) {
NFAVertex v = m.first;
- u32 c = m.second;
- verts[c].push_back(v);
+ u32 c = m.second;
+ verts[c].push_back(v);
DEBUG_PRINTF("vertex %zu is in comp %u\n", (*g)[v].index, c);
- }
-
+ }
+
unordered_map<NFAVertex, NFAVertex> v_map; // temp map for fillHolder
- for (auto &vv : verts) {
- // Shells are in every component.
- vv.insert(vv.end(), begin(head_shell), end(head_shell));
- vv.insert(vv.end(), begin(tail_shell), end(tail_shell));
-
+ for (auto &vv : verts) {
+ // Shells are in every component.
+ vv.insert(vv.end(), begin(head_shell), end(head_shell));
+ vv.insert(vv.end(), begin(tail_shell), end(tail_shell));
+
/* Sort for determinism. Still required as NFAUndirectedVertex have
* no deterministic ordering (split_components map). */
sort(begin(vv), end(vv));
-
- auto gc = ue2::make_unique<NGHolder>();
- v_map.clear();
+
+ auto gc = ue2::make_unique<NGHolder>();
+ v_map.clear();
fillHolder(gc.get(), *g, vv, &v_map);
-
- // Remove shell edges, which will get their own component.
- for (const auto &e : shell_edges) {
+
+ // Remove shell edges, which will get their own component.
+ for (const auto &e : shell_edges) {
auto cu = v_map.at(source(e, *g));
auto cv = v_map.at(target(e, *g));
- assert(edge(cu, cv, *gc).second);
- remove_edge(cu, cv, *gc);
- }
-
- pruneUseless(*gc);
- DEBUG_PRINTF("component %zu has %zu vertices\n", comps.size(),
- num_vertices(*gc));
- comps.push_back(move(gc));
- }
-
- // Another component to handle the direct shell-to-shell edges.
- if (!shell_edges.empty()) {
- deque<NFAVertex> vv;
- vv.insert(vv.end(), begin(head_shell), end(head_shell));
- vv.insert(vv.end(), begin(tail_shell), end(tail_shell));
-
- auto gc = ue2::make_unique<NGHolder>();
- v_map.clear();
+ assert(edge(cu, cv, *gc).second);
+ remove_edge(cu, cv, *gc);
+ }
+
+ pruneUseless(*gc);
+ DEBUG_PRINTF("component %zu has %zu vertices\n", comps.size(),
+ num_vertices(*gc));
+ comps.push_back(move(gc));
+ }
+
+ // Another component to handle the direct shell-to-shell edges.
+ if (!shell_edges.empty()) {
+ deque<NFAVertex> vv;
+ vv.insert(vv.end(), begin(head_shell), end(head_shell));
+ vv.insert(vv.end(), begin(tail_shell), end(tail_shell));
+
+ auto gc = ue2::make_unique<NGHolder>();
+ v_map.clear();
fillHolder(gc.get(), *g, vv, &v_map);
-
- pruneUseless(*gc);
- DEBUG_PRINTF("shell edge component %zu has %zu vertices\n",
- comps.size(), num_vertices(*gc));
- comps.push_back(move(gc));
- *shell_comp = true;
- }
-
+
+ pruneUseless(*gc);
+ DEBUG_PRINTF("shell edge component %zu has %zu vertices\n",
+ comps.size(), num_vertices(*gc));
+ comps.push_back(move(gc));
+ *shell_comp = true;
+ }
+
// Ensure that only vertices with accept edges have reports.
for (auto &gc : comps) {
assert(gc);
clearReports(*gc);
}
- // We should never produce empty component graphs.
- assert(all_of(begin(comps), end(comps),
- [](const unique_ptr<NGHolder> &g_comp) {
- return num_vertices(*g_comp) > N_SPECIALS;
- }));
-}
-
+ // We should never produce empty component graphs.
+ assert(all_of(begin(comps), end(comps),
+ [](const unique_ptr<NGHolder> &g_comp) {
+ return num_vertices(*g_comp) > N_SPECIALS;
+ }));
+}
+
deque<unique_ptr<NGHolder>> calcComponents(unique_ptr<NGHolder> g,
const Grey &grey) {
- deque<unique_ptr<NGHolder>> comps;
-
- // For trivial cases, we needn't bother running the full
- // connected_components algorithm.
+ deque<unique_ptr<NGHolder>> comps;
+
+ // For trivial cases, we needn't bother running the full
+ // connected_components algorithm.
if (!grey.calcComponents || isAlternationOfClasses(*g)) {
comps.push_back(std::move(g));
- return comps;
- }
-
- bool shell_comp = false;
+ return comps;
+ }
+
+ bool shell_comp = false;
splitIntoComponents(std::move(g), comps, depth(MAX_HEAD_SHELL_DEPTH),
depth(MAX_TAIL_SHELL_DEPTH), &shell_comp);
-
- if (shell_comp) {
- DEBUG_PRINTF("re-running on shell comp\n");
- assert(!comps.empty());
+
+ if (shell_comp) {
+ DEBUG_PRINTF("re-running on shell comp\n");
+ assert(!comps.empty());
auto sc = std::move(comps.back());
- comps.pop_back();
+ comps.pop_back();
splitIntoComponents(std::move(sc), comps, depth(0), depth(0),
&shell_comp);
- }
-
- DEBUG_PRINTF("finished; split into %zu components\n", comps.size());
- return comps;
-}
-
+ }
+
+ DEBUG_PRINTF("finished; split into %zu components\n", comps.size());
+ return comps;
+}
+
void recalcComponents(deque<unique_ptr<NGHolder>> &comps, const Grey &grey) {
if (!grey.calcComponents) {
return;
}
- deque<unique_ptr<NGHolder>> out;
-
- for (auto &gc : comps) {
- if (!gc) {
- continue; // graph has been consumed already.
- }
-
- if (isAlternationOfClasses(*gc)) {
+ deque<unique_ptr<NGHolder>> out;
+
+ for (auto &gc : comps) {
+ if (!gc) {
+ continue; // graph has been consumed already.
+ }
+
+ if (isAlternationOfClasses(*gc)) {
out.push_back(std::move(gc));
- continue;
- }
-
+ continue;
+ }
+
auto gc_comps = calcComponents(std::move(gc), grey);
out.insert(end(out), std::make_move_iterator(begin(gc_comps)),
std::make_move_iterator(end(gc_comps)));
- }
-
- // Replace comps with our recalculated list.
- comps.swap(out);
-}
-
-} // namespace ue2
+ }
+
+ // Replace comps with our recalculated list.
+ comps.swap(out);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.h b/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.h
index 3c9cc08c24..1bcdc5f81e 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_calc_components.h
@@ -1,54 +1,54 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Splits an NFA graph into its connected components.
- */
-
-#ifndef NG_CALC_COMPONENTS_H
-#define NG_CALC_COMPONENTS_H
-
-#include <deque>
-#include <memory>
-
-namespace ue2 {
-
-class NGHolder;
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Splits an NFA graph into its connected components.
+ */
+
+#ifndef NG_CALC_COMPONENTS_H
+#define NG_CALC_COMPONENTS_H
+
+#include <deque>
+#include <memory>
+
+namespace ue2 {
+
+class NGHolder;
struct Grey;
-
-bool isAlternationOfClasses(const NGHolder &g);
-
+
+bool isAlternationOfClasses(const NGHolder &g);
+
std::deque<std::unique_ptr<NGHolder>>
calcComponents(std::unique_ptr<NGHolder> g, const Grey &grey);
-
+
void recalcComponents(std::deque<std::unique_ptr<NGHolder>> &comps,
const Grey &grey);
-
-} // namespace ue2
-
-#endif
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.cpp
index 8d84acfd9e..0b24bf07a8 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.cpp
@@ -1,213 +1,213 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Cyclic Path Redundancy pass. Removes redundant vertices on paths
- * leading to a cyclic repeat.
- *
- * This is a graph reduction pass intended to remove vertices that are
- * redundant because they lead solely to a cyclic vertex with a superset of
- * their character reachability. For example, in this pattern:
- *
- * /(abc|def|abcghi).*0123/s
- *
- * The vertices for 'ghi' can be removed due to the presence of the dot-star
- * repeat.
- *
- * Algorithm:
- *
- * for each cyclic vertex V:
- * for each proper predecessor U of V:
- * let S be the set of successors of U that are successors of V
- * (including V itself)
- * for each successor W of U not in S:
- * perform a DFS forward from W, stopping exploration when a vertex
- * in S is encountered;
- * if a vertex with reach not in reach(V) or an accept is encountered:
- * fail and continue to the next W.
- * else:
- * remove (U, W)
- *
- * NOTE: the following code is templated not just for fun, but so that we can
- * run this analysis both forward and in reverse over the graph.
- */
-#include "ng_cyclic_redundancy.h"
-
-#include "ng_holder.h"
-#include "ng_prune.h"
-#include "ng_util.h"
-#include "util/container.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Cyclic Path Redundancy pass. Removes redundant vertices on paths
+ * leading to a cyclic repeat.
+ *
+ * This is a graph reduction pass intended to remove vertices that are
+ * redundant because they lead solely to a cyclic vertex with a superset of
+ * their character reachability. For example, in this pattern:
+ *
+ * /(abc|def|abcghi).*0123/s
+ *
+ * The vertices for 'ghi' can be removed due to the presence of the dot-star
+ * repeat.
+ *
+ * Algorithm:
+ *
+ * for each cyclic vertex V:
+ * for each proper predecessor U of V:
+ * let S be the set of successors of U that are successors of V
+ * (including V itself)
+ * for each successor W of U not in S:
+ * perform a DFS forward from W, stopping exploration when a vertex
+ * in S is encountered;
+ * if a vertex with reach not in reach(V) or an accept is encountered:
+ * fail and continue to the next W.
+ * else:
+ * remove (U, W)
+ *
+ * NOTE: the following code is templated not just for fun, but so that we can
+ * run this analysis both forward and in reverse over the graph.
+ */
+#include "ng_cyclic_redundancy.h"
+
+#include "ng_holder.h"
+#include "ng_prune.h"
+#include "ng_util.h"
+#include "util/container.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
-
+
#include <algorithm>
-#include <boost/graph/depth_first_search.hpp>
-#include <boost/graph/reverse_graph.hpp>
-
-using namespace std;
-using boost::reverse_graph;
-
-namespace ue2 {
-
-namespace {
-
-// Terminator function for depth first traversal, tells us not to explore
-// beyond vertices in set S.
-template<class Vertex, class Graph>
-class VertexInSet {
- public:
- explicit VertexInSet(const flat_set<Vertex> &s) : verts(s) {}
- bool operator()(const Vertex &v, const Graph&) const {
- return contains(verts, v);
- }
-
- private:
- const flat_set<Vertex> &verts;
-};
-
-struct SearchFailed {};
-
-// Visitor for depth first traversal, throws an error if we encounter a vertex
-// with bad reach or a report.
-class SearchVisitor : public boost::default_dfs_visitor {
- public:
- explicit SearchVisitor(const CharReach &r) : cr(r) {}
-
- template<class Vertex, class Graph>
- void discover_vertex(const Vertex &v, const Graph &g) const {
+#include <boost/graph/depth_first_search.hpp>
+#include <boost/graph/reverse_graph.hpp>
+
+using namespace std;
+using boost::reverse_graph;
+
+namespace ue2 {
+
+namespace {
+
+// Terminator function for depth first traversal, tells us not to explore
+// beyond vertices in set S.
+template<class Vertex, class Graph>
+class VertexInSet {
+ public:
+ explicit VertexInSet(const flat_set<Vertex> &s) : verts(s) {}
+ bool operator()(const Vertex &v, const Graph&) const {
+ return contains(verts, v);
+ }
+
+ private:
+ const flat_set<Vertex> &verts;
+};
+
+struct SearchFailed {};
+
+// Visitor for depth first traversal, throws an error if we encounter a vertex
+// with bad reach or a report.
+class SearchVisitor : public boost::default_dfs_visitor {
+ public:
+ explicit SearchVisitor(const CharReach &r) : cr(r) {}
+
+ template<class Vertex, class Graph>
+ void discover_vertex(const Vertex &v, const Graph &g) const {
DEBUG_PRINTF("vertex %zu\n", g[v].index);
- if (is_special(v, g)) {
- DEBUG_PRINTF("start or accept\n");
- throw SearchFailed();
- }
-
- if (g[v].assert_flags) {
- DEBUG_PRINTF("assert flags\n");
- throw SearchFailed();
- }
-
- const CharReach &vcr = g[v].char_reach;
- if (vcr != (vcr & cr)) {
- DEBUG_PRINTF("bad reach\n");
- throw SearchFailed();
- }
- }
-
- private:
- const CharReach &cr;
-};
-
-} // namespace
-
+ if (is_special(v, g)) {
+ DEBUG_PRINTF("start or accept\n");
+ throw SearchFailed();
+ }
+
+ if (g[v].assert_flags) {
+ DEBUG_PRINTF("assert flags\n");
+ throw SearchFailed();
+ }
+
+ const CharReach &vcr = g[v].char_reach;
+ if (vcr != (vcr & cr)) {
+ DEBUG_PRINTF("bad reach\n");
+ throw SearchFailed();
+ }
+ }
+
+ private:
+ const CharReach &cr;
+};
+
+} // namespace
+
template<class Graph, class ColorMap>
-static
-bool searchForward(const Graph &g, const CharReach &reach,
+static
+bool searchForward(const Graph &g, const CharReach &reach,
ColorMap &colours,
- const flat_set<typename Graph::vertex_descriptor> &s,
- typename Graph::vertex_descriptor w) {
+ const flat_set<typename Graph::vertex_descriptor> &s,
+ typename Graph::vertex_descriptor w) {
colours.fill(small_color::white);
- try {
+ try {
depth_first_visit(g, w, SearchVisitor(reach), colours,
VertexInSet<typename Graph::vertex_descriptor, Graph>(s));
} catch (SearchFailed &) {
- return false;
- }
-
- return true;
-}
-
-static
+ return false;
+ }
+
+ return true;
+}
+
+static
NFAEdge to_raw(const NFAEdge &e, const NGHolder &) {
- return e;
-}
-
-static
+ return e;
+}
+
+static
NFAEdge to_raw(const reverse_graph<NGHolder, NGHolder &>::edge_descriptor &e,
const reverse_graph<NGHolder, NGHolder &> &g) {
return get(boost::edge_underlying, g, e);
-}
-
-/* returns true if we did stuff */
-template<class Graph>
-static
-bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v,
- NGHolder &raw) {
- bool did_stuff = false;
-
- const CharReach &reach = g[v].char_reach;
-
- typedef typename Graph::vertex_descriptor vertex_descriptor;
-
+}
+
+/* returns true if we did stuff */
+template<class Graph>
+static
+bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v,
+ NGHolder &raw) {
+ bool did_stuff = false;
+
+ const CharReach &reach = g[v].char_reach;
+
+ typedef typename Graph::vertex_descriptor vertex_descriptor;
+
// Colour map used for depth_first_visit().
auto colours = make_small_color_map(g);
- // precalc successors of v.
- flat_set<vertex_descriptor> succ_v;
- insert(&succ_v, adjacent_vertices(v, g));
-
- flat_set<vertex_descriptor> s;
-
- for (const auto &e : in_edges_range(v, g)) {
- vertex_descriptor u = source(e, g);
- if (u == v) {
- continue;
- }
- if (is_any_accept(u, g)) {
- continue;
- }
-
+ // precalc successors of v.
+ flat_set<vertex_descriptor> succ_v;
+ insert(&succ_v, adjacent_vertices(v, g));
+
+ flat_set<vertex_descriptor> s;
+
+ for (const auto &e : in_edges_range(v, g)) {
+ vertex_descriptor u = source(e, g);
+ if (u == v) {
+ continue;
+ }
+ if (is_any_accept(u, g)) {
+ continue;
+ }
+
DEBUG_PRINTF("- checking u %zu\n", g[u].index);
-
- // let s be intersection(succ(u), succ(v))
- s.clear();
- for (auto b : adjacent_vertices_range(u, g)) {
- if (contains(succ_v, b)) {
- s.insert(b);
- }
- }
-
- for (const auto &e_u : make_vector_from(out_edges(u, g))) {
- vertex_descriptor w = target(e_u, g);
- if (is_special(w, g) || contains(s, w)) {
- continue;
- }
-
- const CharReach &w_reach = g[w].char_reach;
- if (!w_reach.isSubsetOf(reach)) {
- continue;
- }
-
+
+ // let s be intersection(succ(u), succ(v))
+ s.clear();
+ for (auto b : adjacent_vertices_range(u, g)) {
+ if (contains(succ_v, b)) {
+ s.insert(b);
+ }
+ }
+
+ for (const auto &e_u : make_vector_from(out_edges(u, g))) {
+ vertex_descriptor w = target(e_u, g);
+ if (is_special(w, g) || contains(s, w)) {
+ continue;
+ }
+
+ const CharReach &w_reach = g[w].char_reach;
+ if (!w_reach.isSubsetOf(reach)) {
+ continue;
+ }
+
DEBUG_PRINTF(" - checking w %zu\n", g[w].index);
-
+
if (!searchForward(g, reach, colours, succ_v, w)) {
continue;
- }
+ }
DEBUG_PRINTF("removing edge (%zu,%zu)\n", g[u].index, g[w].index);
/* we are currently iterating over the in-edges of v, so it
@@ -215,50 +215,50 @@ bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v,
assert(w != v); /* as v is in s */
remove_edge(to_raw(e_u, g), raw);
did_stuff = true;
- }
- }
-
- return did_stuff;
-}
-
-template<class Graph>
-static
-bool cyclicPathRedundancyPass(Graph &g, NGHolder &raw) {
- bool did_stuff = false;
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g) || !edge(v, v, g).second) {
- continue;
- }
-
+ }
+ }
+
+ return did_stuff;
+}
+
+template<class Graph>
+static
+bool cyclicPathRedundancyPass(Graph &g, NGHolder &raw) {
+ bool did_stuff = false;
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g) || !edge(v, v, g).second) {
+ continue;
+ }
+
DEBUG_PRINTF("examining cyclic vertex %zu\n", g[v].index);
- did_stuff |= removeCyclicPathRedundancy(g, v, raw);
- }
-
- return did_stuff;
-}
-
-bool removeCyclicPathRedundancy(NGHolder &g) {
+ did_stuff |= removeCyclicPathRedundancy(g, v, raw);
+ }
+
+ return did_stuff;
+}
+
+bool removeCyclicPathRedundancy(NGHolder &g) {
assert(hasCorrectlyNumberedVertices(g));
- // Forward pass.
+ // Forward pass.
bool f_changed = cyclicPathRedundancyPass(g, g);
- if (f_changed) {
- DEBUG_PRINTF("edges removed by forward pass\n");
- pruneUseless(g);
- }
-
- // Reverse pass.
- DEBUG_PRINTF("REVERSE PASS\n");
+ if (f_changed) {
+ DEBUG_PRINTF("edges removed by forward pass\n");
+ pruneUseless(g);
+ }
+
+ // Reverse pass.
+ DEBUG_PRINTF("REVERSE PASS\n");
typedef reverse_graph<NGHolder, NGHolder &> RevGraph;
RevGraph revg(g);
- bool r_changed = cyclicPathRedundancyPass(revg, g);
- if (r_changed) {
- DEBUG_PRINTF("edges removed by reverse pass\n");
- pruneUseless(g);
- }
-
- return f_changed || r_changed;
-}
-
-} // namespace ue2
+ bool r_changed = cyclicPathRedundancyPass(revg, g);
+ if (r_changed) {
+ DEBUG_PRINTF("edges removed by reverse pass\n");
+ pruneUseless(g);
+ }
+
+ return f_changed || r_changed;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.h b/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.h
index 9a83c49361..3ce07c6688 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_cyclic_redundancy.h
@@ -1,45 +1,45 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Cyclic Path Redundancy pass. Removes redundant vertices on paths
- * leading to a cyclic repeat.
- */
-
-#ifndef NG_CYCLIC_REDUNDANCY_H
-#define NG_CYCLIC_REDUNDANCY_H
-
-namespace ue2 {
-
-class NGHolder;
-
-bool removeCyclicPathRedundancy(NGHolder &g);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Cyclic Path Redundancy pass. Removes redundant vertices on paths
+ * leading to a cyclic repeat.
+ */
+
+#ifndef NG_CYCLIC_REDUNDANCY_H
+#define NG_CYCLIC_REDUNDANCY_H
+
+namespace ue2 {
+
+class NGHolder;
+
+bool removeCyclicPathRedundancy(NGHolder &g);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_depth.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_depth.cpp
index e952ff445e..6c90326ce4 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_depth.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_depth.cpp
@@ -1,398 +1,398 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief NFA graph vertex depth calculations.
- */
-#include "ng_depth.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/graph_range.h"
+ * \brief NFA graph vertex depth calculations.
+ */
+#include "ng_depth.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
-
-#include <deque>
-#include <vector>
-
+
+#include <deque>
+#include <vector>
+
#include <boost/graph/breadth_first_search.hpp>
-#include <boost/graph/dag_shortest_paths.hpp>
-#include <boost/graph/depth_first_search.hpp>
-#include <boost/graph/filtered_graph.hpp>
+#include <boost/graph/dag_shortest_paths.hpp>
+#include <boost/graph/depth_first_search.hpp>
+#include <boost/graph/filtered_graph.hpp>
#include <boost/graph/property_maps/constant_property_map.hpp>
-#include <boost/graph/reverse_graph.hpp>
-#include <boost/graph/topological_sort.hpp>
+#include <boost/graph/reverse_graph.hpp>
+#include <boost/graph/topological_sort.hpp>
#include <boost/range/adaptor/reversed.hpp>
-
-using namespace std;
-using boost::filtered_graph;
+
+using namespace std;
+using boost::filtered_graph;
using boost::make_filtered_graph;
-using boost::make_constant_property;
-using boost::reverse_graph;
+using boost::make_constant_property;
+using boost::reverse_graph;
using boost::adaptors::reverse;
-
-namespace ue2 {
-
-namespace {
-
-/** Distance value used to indicate that the vertex can't be reached. */
-static constexpr int DIST_UNREACHABLE = INT_MAX;
-
-/**
- * Distance value used to indicate that the distance to a vertex is infinite
- * (for example, it's the max distance and there's a cycle in the path) or so
- * large that we should consider it effectively infinite.
- */
-static constexpr int DIST_INFINITY = INT_MAX - 1;
-
-//
-// Filters
-//
-
-template <class GraphT>
-struct NodeFilter {
- typedef typename GraphT::edge_descriptor EdgeT;
- NodeFilter() {} // BGL filters must be default-constructible.
- NodeFilter(const vector<bool> *bad_in, const GraphT *g_in)
- : bad(bad_in), g(g_in) { }
- bool operator()(const EdgeT &e) const {
- assert(g && bad);
-
- u32 src_idx = (*g)[source(e, *g)].index;
- u32 tar_idx = (*g)[target(e, *g)].index;
-
- if (tar_idx == NODE_START_DOTSTAR) {
- return false;
- }
-
- return !(*bad)[src_idx] && !(*bad)[tar_idx];
- }
-
-private:
- const vector<bool> *bad = nullptr;
- const GraphT *g = nullptr;
-};
-
-template <class GraphT>
-struct StartFilter {
- typedef typename GraphT::edge_descriptor EdgeT;
- StartFilter() {} // BGL filters must be default-constructible.
- explicit StartFilter(const GraphT *g_in) : g(g_in) { }
- bool operator()(const EdgeT &e) const {
- assert(g);
-
- u32 src_idx = (*g)[source(e, *g)].index;
- u32 tar_idx = (*g)[target(e, *g)].index;
-
- // Remove our stylised edges from anchored start to startDs.
- if (src_idx == NODE_START && tar_idx == NODE_START_DOTSTAR) {
- return false;
- }
- // Also remove the equivalent in the reversed direction.
- if (src_idx == NODE_ACCEPT_EOD && tar_idx == NODE_ACCEPT) {
- return false;
- }
- return true;
- }
-
-private:
- const GraphT *g = nullptr;
-};
-
-} // namespace
-
+
+namespace ue2 {
+
+namespace {
+
+/** Distance value used to indicate that the vertex can't be reached. */
+static constexpr int DIST_UNREACHABLE = INT_MAX;
+
+/**
+ * Distance value used to indicate that the distance to a vertex is infinite
+ * (for example, it's the max distance and there's a cycle in the path) or so
+ * large that we should consider it effectively infinite.
+ */
+static constexpr int DIST_INFINITY = INT_MAX - 1;
+
+//
+// Filters
+//
+
+template <class GraphT>
+struct NodeFilter {
+ typedef typename GraphT::edge_descriptor EdgeT;
+ NodeFilter() {} // BGL filters must be default-constructible.
+ NodeFilter(const vector<bool> *bad_in, const GraphT *g_in)
+ : bad(bad_in), g(g_in) { }
+ bool operator()(const EdgeT &e) const {
+ assert(g && bad);
+
+ u32 src_idx = (*g)[source(e, *g)].index;
+ u32 tar_idx = (*g)[target(e, *g)].index;
+
+ if (tar_idx == NODE_START_DOTSTAR) {
+ return false;
+ }
+
+ return !(*bad)[src_idx] && !(*bad)[tar_idx];
+ }
+
+private:
+ const vector<bool> *bad = nullptr;
+ const GraphT *g = nullptr;
+};
+
+template <class GraphT>
+struct StartFilter {
+ typedef typename GraphT::edge_descriptor EdgeT;
+ StartFilter() {} // BGL filters must be default-constructible.
+ explicit StartFilter(const GraphT *g_in) : g(g_in) { }
+ bool operator()(const EdgeT &e) const {
+ assert(g);
+
+ u32 src_idx = (*g)[source(e, *g)].index;
+ u32 tar_idx = (*g)[target(e, *g)].index;
+
+ // Remove our stylised edges from anchored start to startDs.
+ if (src_idx == NODE_START && tar_idx == NODE_START_DOTSTAR) {
+ return false;
+ }
+ // Also remove the equivalent in the reversed direction.
+ if (src_idx == NODE_ACCEPT_EOD && tar_idx == NODE_ACCEPT) {
+ return false;
+ }
+ return true;
+ }
+
+private:
+ const GraphT *g = nullptr;
+};
+
+} // namespace
+
template<class Graph>
-static
+static
vector<bool> findLoopReachable(const Graph &g,
const typename Graph::vertex_descriptor src) {
vector<bool> deadNodes(num_vertices(g));
-
+
using Edge = typename Graph::edge_descriptor;
using Vertex = typename Graph::vertex_descriptor;
using EdgeSet = set<Edge>;
- EdgeSet deadEdges;
- BackEdges<EdgeSet> be(deadEdges);
-
+ EdgeSet deadEdges;
+ BackEdges<EdgeSet> be(deadEdges);
+
auto colors = make_small_color_map(g);
-
+
depth_first_search(g, be, colors, src);
auto af = make_bad_edge_filter(&deadEdges);
auto acyclic_g = make_filtered_graph(g, af);
-
+
vector<Vertex> topoOrder; /* actually reverse topological order */
- topoOrder.reserve(deadNodes.size());
+ topoOrder.reserve(deadNodes.size());
topological_sort(acyclic_g, back_inserter(topoOrder), color_map(colors));
-
- for (const auto &e : deadEdges) {
+
+ for (const auto &e : deadEdges) {
size_t srcIdx = g[source(e, g)].index;
- if (srcIdx != NODE_START_DOTSTAR) {
- deadNodes[srcIdx] = true;
- }
- }
-
+ if (srcIdx != NODE_START_DOTSTAR) {
+ deadNodes[srcIdx] = true;
+ }
+ }
+
for (auto v : reverse(topoOrder)) {
- for (const auto &e : in_edges_range(v, g)) {
- if (deadNodes[g[source(e, g)].index]) {
- deadNodes[g[v].index] = true;
- break;
- }
- }
- }
+ for (const auto &e : in_edges_range(v, g)) {
+ if (deadNodes[g[source(e, g)].index]) {
+ deadNodes[g[v].index] = true;
+ break;
+ }
+ }
+ }
return deadNodes;
-}
-
-template <class GraphT>
-static
+}
+
+template <class GraphT>
+static
void calcDepthFromSource(const GraphT &g,
- typename GraphT::vertex_descriptor srcVertex,
+ typename GraphT::vertex_descriptor srcVertex,
const vector<bool> &deadNodes, vector<int> &dMin,
vector<int> &dMax) {
- typedef typename GraphT::edge_descriptor EdgeT;
-
+ typedef typename GraphT::edge_descriptor EdgeT;
+
const size_t numVerts = num_vertices(g);
-
- NodeFilter<GraphT> nf(&deadNodes, &g);
- StartFilter<GraphT> sf(&g);
-
- /* minimum distance needs to run on a graph with .*start unreachable
- * from start */
- typedef filtered_graph<GraphT, StartFilter<GraphT> > StartFilteredGraph;
- const StartFilteredGraph mindist_g(g, sf);
-
- /* maximum distance needs to run on a graph without cycles & nodes
- * reachable from cycles */
- typedef filtered_graph<GraphT, NodeFilter<GraphT> > NodeFilteredGraph;
- const NodeFilteredGraph maxdist_g(g, nf);
-
- // Record distance of each vertex from source using one of the following
- // algorithms.
-
- /* note: filtered graphs have same num_{vertices,edges} as base */
-
- dMin.assign(numVerts, DIST_UNREACHABLE);
- dMax.assign(numVerts, DIST_UNREACHABLE);
- dMin[mindist_g[srcVertex].index] = 0;
-
- using boost::make_iterator_property_map;
-
+
+ NodeFilter<GraphT> nf(&deadNodes, &g);
+ StartFilter<GraphT> sf(&g);
+
+ /* minimum distance needs to run on a graph with .*start unreachable
+ * from start */
+ typedef filtered_graph<GraphT, StartFilter<GraphT> > StartFilteredGraph;
+ const StartFilteredGraph mindist_g(g, sf);
+
+ /* maximum distance needs to run on a graph without cycles & nodes
+ * reachable from cycles */
+ typedef filtered_graph<GraphT, NodeFilter<GraphT> > NodeFilteredGraph;
+ const NodeFilteredGraph maxdist_g(g, nf);
+
+ // Record distance of each vertex from source using one of the following
+ // algorithms.
+
+ /* note: filtered graphs have same num_{vertices,edges} as base */
+
+ dMin.assign(numVerts, DIST_UNREACHABLE);
+ dMax.assign(numVerts, DIST_UNREACHABLE);
+ dMin[mindist_g[srcVertex].index] = 0;
+
+ using boost::make_iterator_property_map;
+
auto min_index_map = get(vertex_index, mindist_g);
-
- breadth_first_search(mindist_g, srcVertex,
- visitor(make_bfs_visitor(record_distances(
+
+ breadth_first_search(mindist_g, srcVertex,
+ visitor(make_bfs_visitor(record_distances(
make_iterator_property_map(dMin.begin(),
min_index_map),
boost::on_tree_edge())))
.color_map(make_small_color_map(mindist_g)));
-
+
auto max_index_map = get(vertex_index, maxdist_g);
-
- dag_shortest_paths(maxdist_g, srcVertex,
+
+ dag_shortest_paths(maxdist_g, srcVertex,
distance_map(make_iterator_property_map(dMax.begin(),
max_index_map))
.weight_map(make_constant_property<EdgeT>(-1))
.color_map(make_small_color_map(maxdist_g)));
-
- for (size_t i = 0; i < numVerts; i++) {
- if (dMin[i] > DIST_UNREACHABLE) {
- dMin[i] = DIST_UNREACHABLE;
- }
- DEBUG_PRINTF("%zu: dm %d %d\n", i, dMin[i], dMax[i]);
- if (dMax[i] >= DIST_UNREACHABLE && dMin[i] < DIST_UNREACHABLE) {
- dMax[i] = -DIST_INFINITY; /* max depths currently negative */
- DEBUG_PRINTF("bumping max to %d\n", dMax[i]);
- } else if (dMax[i] >= DIST_UNREACHABLE
- || dMax[i] < -DIST_UNREACHABLE) {
- dMax[i] = -DIST_UNREACHABLE;
- DEBUG_PRINTF("bumping max to %d\n", dMax[i]);
- }
- }
-}
-
-/**
- * \brief Convert the integer distance we use in our shortest path calculations
- * to a \ref depth value.
- */
-static
-depth depthFromDistance(int val) {
- assert(val >= 0);
- if (val >= DIST_UNREACHABLE) {
- return depth::unreachable();
- } else if (val == DIST_INFINITY) {
- return depth::infinity();
- }
- return depth((u32)val);
-}
-
-static
-DepthMinMax getDepths(u32 idx, const vector<int> &dMin,
- const vector<int> &dMax) {
- DepthMinMax d(depthFromDistance(dMin[idx]),
- depthFromDistance(-1 * dMax[idx]));
- DEBUG_PRINTF("idx=%u, depths=%s\n", idx, d.str().c_str());
- assert(d.min <= d.max);
- return d;
-}
-
-template<class Graph, class Output>
-static
+
+ for (size_t i = 0; i < numVerts; i++) {
+ if (dMin[i] > DIST_UNREACHABLE) {
+ dMin[i] = DIST_UNREACHABLE;
+ }
+ DEBUG_PRINTF("%zu: dm %d %d\n", i, dMin[i], dMax[i]);
+ if (dMax[i] >= DIST_UNREACHABLE && dMin[i] < DIST_UNREACHABLE) {
+ dMax[i] = -DIST_INFINITY; /* max depths currently negative */
+ DEBUG_PRINTF("bumping max to %d\n", dMax[i]);
+ } else if (dMax[i] >= DIST_UNREACHABLE
+ || dMax[i] < -DIST_UNREACHABLE) {
+ dMax[i] = -DIST_UNREACHABLE;
+ DEBUG_PRINTF("bumping max to %d\n", dMax[i]);
+ }
+ }
+}
+
+/**
+ * \brief Convert the integer distance we use in our shortest path calculations
+ * to a \ref depth value.
+ */
+static
+depth depthFromDistance(int val) {
+ assert(val >= 0);
+ if (val >= DIST_UNREACHABLE) {
+ return depth::unreachable();
+ } else if (val == DIST_INFINITY) {
+ return depth::infinity();
+ }
+ return depth((u32)val);
+}
+
+static
+DepthMinMax getDepths(u32 idx, const vector<int> &dMin,
+ const vector<int> &dMax) {
+ DepthMinMax d(depthFromDistance(dMin[idx]),
+ depthFromDistance(-1 * dMax[idx]));
+ DEBUG_PRINTF("idx=%u, depths=%s\n", idx, d.str().c_str());
+ assert(d.min <= d.max);
+ return d;
+}
+
+template<class Graph, class Output>
+static
void calcAndStoreDepth(const Graph &g,
- const typename Graph::vertex_descriptor src,
- const vector<bool> &deadNodes,
- vector<int> &dMin /* util */,
- vector<int> &dMax /* util */,
- vector<Output> &depths,
- DepthMinMax Output::*store) {
+ const typename Graph::vertex_descriptor src,
+ const vector<bool> &deadNodes,
+ vector<int> &dMin /* util */,
+ vector<int> &dMax /* util */,
+ vector<Output> &depths,
+ DepthMinMax Output::*store) {
calcDepthFromSource(g, src, deadNodes, dMin, dMax);
-
- for (auto v : vertices_range(g)) {
- u32 idx = g[v].index;
- assert(idx < depths.size());
- Output &d = depths.at(idx);
- d.*store = getDepths(idx, dMin, dMax);
- }
-}
-
+
+ for (auto v : vertices_range(g)) {
+ u32 idx = g[v].index;
+ assert(idx < depths.size());
+ Output &d = depths.at(idx);
+ d.*store = getDepths(idx, dMin, dMax);
+ }
+}
+
vector<NFAVertexDepth> calcDepths(const NGHolder &g) {
- assert(hasCorrectlyNumberedVertices(g));
- const size_t numVertices = num_vertices(g);
-
+ assert(hasCorrectlyNumberedVertices(g));
+ const size_t numVertices = num_vertices(g);
+
vector<NFAVertexDepth> depths(numVertices);
- vector<int> dMin;
- vector<int> dMax;
-
- /*
- * create a filtered graph for max depth calculations: all nodes/edges
- * reachable from a loop need to be removed
- */
+ vector<int> dMin;
+ vector<int> dMax;
+
+ /*
+ * create a filtered graph for max depth calculations: all nodes/edges
+ * reachable from a loop need to be removed
+ */
auto deadNodes = findLoopReachable(g, g.start);
-
- DEBUG_PRINTF("doing start\n");
+
+ DEBUG_PRINTF("doing start\n");
calcAndStoreDepth(g, g.start, deadNodes, dMin, dMax, depths,
&NFAVertexDepth::fromStart);
- DEBUG_PRINTF("doing startds\n");
+ DEBUG_PRINTF("doing startds\n");
calcAndStoreDepth(g, g.startDs, deadNodes, dMin, dMax, depths,
&NFAVertexDepth::fromStartDotStar);
return depths;
-}
-
+}
+
vector<NFAVertexRevDepth> calcRevDepths(const NGHolder &g) {
- assert(hasCorrectlyNumberedVertices(g));
- const size_t numVertices = num_vertices(g);
-
+ assert(hasCorrectlyNumberedVertices(g));
+ const size_t numVertices = num_vertices(g);
+
vector<NFAVertexRevDepth> depths(numVertices);
- vector<int> dMin;
- vector<int> dMax;
-
- /* reverse the graph before walking it */
+ vector<int> dMin;
+ vector<int> dMax;
+
+ /* reverse the graph before walking it */
typedef reverse_graph<NGHolder, const NGHolder &> RevNFAGraph;
const RevNFAGraph rg(g);
-
+
assert(num_vertices(g) == num_vertices(rg));
- /*
- * create a filtered graph for max depth calculations: all nodes/edges
- * reachable from a loop need to be removed
- */
+ /*
+ * create a filtered graph for max depth calculations: all nodes/edges
+ * reachable from a loop need to be removed
+ */
auto deadNodes = findLoopReachable(rg, g.acceptEod);
-
- DEBUG_PRINTF("doing accept\n");
- calcAndStoreDepth<RevNFAGraph, NFAVertexRevDepth>(
+
+ DEBUG_PRINTF("doing accept\n");
+ calcAndStoreDepth<RevNFAGraph, NFAVertexRevDepth>(
rg, g.accept, deadNodes, dMin, dMax, depths,
- &NFAVertexRevDepth::toAccept);
- DEBUG_PRINTF("doing accepteod\n");
- deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge.
- calcAndStoreDepth<RevNFAGraph, NFAVertexRevDepth>(
+ &NFAVertexRevDepth::toAccept);
+ DEBUG_PRINTF("doing accepteod\n");
+ deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge.
+ calcAndStoreDepth<RevNFAGraph, NFAVertexRevDepth>(
rg, g.acceptEod, deadNodes, dMin, dMax, depths,
- &NFAVertexRevDepth::toAcceptEod);
+ &NFAVertexRevDepth::toAcceptEod);
return depths;
-}
-
+}
+
vector<NFAVertexBidiDepth> calcBidiDepths(const NGHolder &g) {
- assert(hasCorrectlyNumberedVertices(g));
- const size_t numVertices = num_vertices(g);
-
+ assert(hasCorrectlyNumberedVertices(g));
+ const size_t numVertices = num_vertices(g);
+
vector<NFAVertexBidiDepth> depths(numVertices);
- vector<int> dMin;
- vector<int> dMax;
-
- /*
- * create a filtered graph for max depth calculations: all nodes/edges
- * reachable from a loop need to be removed
- */
+ vector<int> dMin;
+ vector<int> dMax;
+
+ /*
+ * create a filtered graph for max depth calculations: all nodes/edges
+ * reachable from a loop need to be removed
+ */
auto deadNodes = findLoopReachable(g, g.start);
-
- DEBUG_PRINTF("doing start\n");
+
+ DEBUG_PRINTF("doing start\n");
calcAndStoreDepth<NGHolder, NFAVertexBidiDepth>(
g, g.start, deadNodes, dMin, dMax, depths,
- &NFAVertexBidiDepth::fromStart);
- DEBUG_PRINTF("doing startds\n");
+ &NFAVertexBidiDepth::fromStart);
+ DEBUG_PRINTF("doing startds\n");
calcAndStoreDepth<NGHolder, NFAVertexBidiDepth>(
g, g.startDs, deadNodes, dMin, dMax, depths,
- &NFAVertexBidiDepth::fromStartDotStar);
-
- /* Now go backwards */
+ &NFAVertexBidiDepth::fromStartDotStar);
+
+ /* Now go backwards */
typedef reverse_graph<NGHolder, const NGHolder &> RevNFAGraph;
const RevNFAGraph rg(g);
deadNodes = findLoopReachable(rg, g.acceptEod);
-
- DEBUG_PRINTF("doing accept\n");
- calcAndStoreDepth<RevNFAGraph, NFAVertexBidiDepth>(
+
+ DEBUG_PRINTF("doing accept\n");
+ calcAndStoreDepth<RevNFAGraph, NFAVertexBidiDepth>(
rg, g.accept, deadNodes, dMin, dMax, depths,
- &NFAVertexBidiDepth::toAccept);
- DEBUG_PRINTF("doing accepteod\n");
- deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge.
- calcAndStoreDepth<RevNFAGraph, NFAVertexBidiDepth>(
+ &NFAVertexBidiDepth::toAccept);
+ DEBUG_PRINTF("doing accepteod\n");
+ deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge.
+ calcAndStoreDepth<RevNFAGraph, NFAVertexBidiDepth>(
rg, g.acceptEod, deadNodes, dMin, dMax, depths,
- &NFAVertexBidiDepth::toAcceptEod);
+ &NFAVertexBidiDepth::toAcceptEod);
return depths;
-}
-
+}
+
vector<DepthMinMax> calcDepthsFrom(const NGHolder &g, const NFAVertex src) {
- assert(hasCorrectlyNumberedVertices(g));
- const size_t numVertices = num_vertices(g);
-
+ assert(hasCorrectlyNumberedVertices(g));
+ const size_t numVertices = num_vertices(g);
+
auto deadNodes = findLoopReachable(g, g.start);
-
- vector<int> dMin, dMax;
+
+ vector<int> dMin, dMax;
calcDepthFromSource(g, src, deadNodes, dMin, dMax);
-
+
vector<DepthMinMax> depths(numVertices);
-
- for (auto v : vertices_range(g)) {
+
+ for (auto v : vertices_range(g)) {
auto idx = g[v].index;
- depths.at(idx) = getDepths(idx, dMin, dMax);
- }
+ depths.at(idx) = getDepths(idx, dMin, dMax);
+ }
return depths;
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_depth.h b/contrib/libs/hyperscan/src/nfagraph/ng_depth.h
index 418e5e4412..36cca87e84 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_depth.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_depth.h
@@ -1,99 +1,99 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief NFA graph vertex depth calculations.
- */
-
+ * \brief NFA graph vertex depth calculations.
+ */
+
#ifndef NG_DEPTH_H
#define NG_DEPTH_H
-
+
#include "ue2common.h"
-#include "nfagraph/ng_holder.h"
-#include "util/depth.h"
-
-#include <vector>
-
-namespace ue2 {
-
-/**
- * \brief Encapsulates min/max depths relative to the start and startDs
- * vertices.
- */
-struct NFAVertexDepth {
- DepthMinMax fromStart;
- DepthMinMax fromStartDotStar;
-};
-
-/**
- * \brief Encapsulates min/max depths relative to the accept and acceptEod
- * vertices.
- */
-struct NFAVertexRevDepth {
- DepthMinMax toAccept;
- DepthMinMax toAcceptEod;
-};
-
-/**
- * \brief Encapsulates min/max depths relative to all of our special vertices.
- */
+#include "nfagraph/ng_holder.h"
+#include "util/depth.h"
+
+#include <vector>
+
+namespace ue2 {
+
+/**
+ * \brief Encapsulates min/max depths relative to the start and startDs
+ * vertices.
+ */
+struct NFAVertexDepth {
+ DepthMinMax fromStart;
+ DepthMinMax fromStartDotStar;
+};
+
+/**
+ * \brief Encapsulates min/max depths relative to the accept and acceptEod
+ * vertices.
+ */
+struct NFAVertexRevDepth {
+ DepthMinMax toAccept;
+ DepthMinMax toAcceptEod;
+};
+
+/**
+ * \brief Encapsulates min/max depths relative to all of our special vertices.
+ */
struct NFAVertexBidiDepth {
DepthMinMax fromStart;
DepthMinMax fromStartDotStar;
DepthMinMax toAccept;
DepthMinMax toAcceptEod;
-};
-
-/**
+};
+
+/**
* \brief Calculate depths from start and startDs. Returns them in a vector,
* indexed by vertex index.
- */
+ */
std::vector<NFAVertexDepth> calcDepths(const NGHolder &g);
-
-/**
+
+/**
* \brief Calculate depths to accept and acceptEod. Returns them in a vector,
* indexed by vertex index.
- */
+ */
std::vector<NFAVertexRevDepth> calcRevDepths(const NGHolder &g);
-
-/**
+
+/**
* \brief Calculate depths to/from all special vertices. Returns them in a
* vector, indexed by vertex index.
- */
+ */
std::vector<NFAVertexBidiDepth> calcBidiDepths(const NGHolder &g);
-
+
/**
* \brief Calculate the (min, max) depths from the given \p src to every vertex
* in the graph and return them in a vector, indexed by \p vertex_index.
*/
std::vector<DepthMinMax> calcDepthsFrom(const NGHolder &g, const NFAVertex src);
-
-} // namespace ue2
-
+
+} // namespace ue2
+
#endif // NG_DEPTH_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_dominators.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_dominators.cpp
index 2589881009..d6a064d12f 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_dominators.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_dominators.cpp
@@ -1,73 +1,73 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Calculate dominator and post-dominator trees.
- *
- * A small wrapper around the BGL's lengauer_tarjan_dominator_tree algorithm.
- */
-#include "ng_dominators.h"
-
-#include "ue2common.h"
-#include "ng_holder.h"
-#include "ng_util.h"
-
-#include <boost-patched/graph/dominator_tree.hpp> // locally patched version
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Calculate dominator and post-dominator trees.
+ *
+ * A small wrapper around the BGL's lengauer_tarjan_dominator_tree algorithm.
+ */
+#include "ng_dominators.h"
+
+#include "ue2common.h"
+#include "ng_holder.h"
+#include "ng_util.h"
+
+#include <boost-patched/graph/dominator_tree.hpp> // locally patched version
#include <boost-patched/graph/reverse_graph.hpp>
-
-using namespace std;
-using boost::make_assoc_property_map;
-using boost::make_iterator_property_map;
-
-namespace ue2 {
-
-template <class Graph>
+
+using namespace std;
+using boost::make_assoc_property_map;
+using boost::make_iterator_property_map;
+
+namespace ue2 {
+
+template <class Graph>
unordered_map<NFAVertex, NFAVertex> calcDominators(const Graph &g,
typename Graph::vertex_descriptor source) {
using Vertex = typename Graph::vertex_descriptor;
- const size_t num_verts = num_vertices(g);
- auto index_map = get(&NFAGraphVertexProps::index, g);
-
- vector<size_t> dfnum(num_verts, 0);
+ const size_t num_verts = num_vertices(g);
+ auto index_map = get(&NFAGraphVertexProps::index, g);
+
+ vector<size_t> dfnum(num_verts, 0);
vector<Vertex> parents(num_verts, Graph::null_vertex());
-
- auto dfnum_map = make_iterator_property_map(dfnum.begin(), index_map);
- auto parent_map = make_iterator_property_map(parents.begin(), index_map);
+
+ auto dfnum_map = make_iterator_property_map(dfnum.begin(), index_map);
+ auto parent_map = make_iterator_property_map(parents.begin(), index_map);
vector<Vertex> vertices_by_dfnum(num_verts, Graph::null_vertex());
-
- // Output map.
+
+ // Output map.
vector<Vertex> doms(num_verts, Graph::null_vertex());
auto dom_map = make_iterator_property_map(doms.begin(), index_map);
-
- boost_ue2::lengauer_tarjan_dominator_tree(g, source, index_map, dfnum_map,
- parent_map, vertices_by_dfnum,
- dom_map);
-
+
+ boost_ue2::lengauer_tarjan_dominator_tree(g, source, index_map, dfnum_map,
+ parent_map, vertices_by_dfnum,
+ dom_map);
+
/* Translate back to an NFAVertex map */
unordered_map<NFAVertex, NFAVertex> doms2;
doms2.reserve(num_verts);
@@ -78,17 +78,17 @@ unordered_map<NFAVertex, NFAVertex> calcDominators(const Graph &g,
}
}
return doms2;
-}
-
+}
+
unordered_map<NFAVertex, NFAVertex> findDominators(const NGHolder &g) {
- assert(hasCorrectlyNumberedVertices(g));
+ assert(hasCorrectlyNumberedVertices(g));
return calcDominators(g, g.start);
-}
-
+}
+
unordered_map<NFAVertex, NFAVertex> findPostDominators(const NGHolder &g) {
- assert(hasCorrectlyNumberedVertices(g));
+ assert(hasCorrectlyNumberedVertices(g));
return calcDominators(boost::reverse_graph<NGHolder, const NGHolder &>(g),
- g.acceptEod);
-}
-
-} // namespace ue2
+ g.acceptEod);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_dominators.h b/contrib/libs/hyperscan/src/nfagraph/ng_dominators.h
index eefc7e93df..f505b7e471 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_dominators.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_dominators.h
@@ -1,50 +1,50 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Calculate dominator and post-dominator trees.
- *
- * A small wrapper around the BGL's lengauer_tarjan_dominator_tree algorithm.
- */
-
-#ifndef NG_DOMINATORS_H
-#define NG_DOMINATORS_H
-
-#include "ng_holder.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Calculate dominator and post-dominator trees.
+ *
+ * A small wrapper around the BGL's lengauer_tarjan_dominator_tree algorithm.
+ */
+
+#ifndef NG_DOMINATORS_H
+#define NG_DOMINATORS_H
+
+#include "ng_holder.h"
+
#include <unordered_map>
-namespace ue2 {
-
+namespace ue2 {
+
std::unordered_map<NFAVertex, NFAVertex> findDominators(const NGHolder &g);
-
+
std::unordered_map<NFAVertex, NFAVertex> findPostDominators(const NGHolder &g);
-
-} // namespace ue2
-
-#endif // NG_DOMINATORS_H
+
+} // namespace ue2
+
+#endif // NG_DOMINATORS_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_dump.h b/contrib/libs/hyperscan/src/nfagraph/ng_dump.h
index 6b22ac2e21..3e12d1d22e 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_dump.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_dump.h
@@ -1,175 +1,175 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Dump code for NFA graphs.
- */
-
-#ifndef NG_DUMP_H
-#define NG_DUMP_H
-
-#include "grey.h"
-#include "ng_holder.h" // for graph types
-#include "ue2common.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Dump code for NFA graphs.
+ */
+
+#ifndef NG_DUMP_H
+#define NG_DUMP_H
+
+#include "grey.h"
+#include "ng_holder.h" // for graph types
+#include "ue2common.h"
+
#include <unordered_map>
-#ifdef DUMP_SUPPORT
-#include <fstream>
-#endif
-
-struct RoseEngine;
-
-namespace ue2 {
-
-class NGHolder;
-class NG;
+#ifdef DUMP_SUPPORT
+#include <fstream>
+#endif
+
+struct RoseEngine;
+
+namespace ue2 {
+
+class NGHolder;
+class NG;
class ExpressionInfo;
-class ReportManager;
-
-// Implementations for stubs below -- all have the suffix "Impl".
-
-#ifdef DUMP_SUPPORT
-
-template <typename GraphT>
-void dumpGraphImpl(const char *name, const GraphT &g);
-
-template <typename GraphT>
-void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm);
-
+class ReportManager;
+
+// Implementations for stubs below -- all have the suffix "Impl".
+
+#ifdef DUMP_SUPPORT
+
+template <typename GraphT>
+void dumpGraphImpl(const char *name, const GraphT &g);
+
+template <typename GraphT>
+void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm);
+
void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr,
const char *name, const Grey &grey);
-
-void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr, u32 comp,
- const Grey &grey);
-
-void dumpSomSubComponentImpl(const NGHolder &g, const char *name, u32 expr,
- u32 comp, u32 plan, const Grey &grey);
-
-void dumpHolderImpl(const NGHolder &h, unsigned int stageNumber,
- const char *stageName, const Grey &grey);
-
-// Variant that takes a region map as well.
-void dumpHolderImpl(const NGHolder &h,
+
+void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr, u32 comp,
+ const Grey &grey);
+
+void dumpSomSubComponentImpl(const NGHolder &g, const char *name, u32 expr,
+ u32 comp, u32 plan, const Grey &grey);
+
+void dumpHolderImpl(const NGHolder &h, unsigned int stageNumber,
+ const char *stageName, const Grey &grey);
+
+// Variant that takes a region map as well.
+void dumpHolderImpl(const NGHolder &h,
const std::unordered_map<NFAVertex, u32> &region_map,
- unsigned int stageNumber, const char *stageName,
- const Grey &grey);
-
-template <typename GraphT>
-static inline void dumpGraph(UNUSED const char *name, UNUSED const GraphT &g) {
- dumpGraphImpl(name, g);
-}
-
-#endif // DUMP_SUPPORT
-
-// Stubs which call through to dump code if compiled in.
-
-UNUSED static inline
+ unsigned int stageNumber, const char *stageName,
+ const Grey &grey);
+
+template <typename GraphT>
+static inline void dumpGraph(UNUSED const char *name, UNUSED const GraphT &g) {
+ dumpGraphImpl(name, g);
+}
+
+#endif // DUMP_SUPPORT
+
+// Stubs which call through to dump code if compiled in.
+
+UNUSED static inline
void dumpDotWrapper(UNUSED const NGHolder &g, UNUSED const ExpressionInfo &expr,
UNUSED const char *name, UNUSED const Grey &grey) {
-#ifdef DUMP_SUPPORT
+#ifdef DUMP_SUPPORT
dumpDotWrapperImpl(g, expr, name, grey);
-#endif
-}
-
-UNUSED static inline
-void dumpComponent(UNUSED const NGHolder &h, UNUSED const char *name,
- UNUSED u32 expr, UNUSED u32 comp, UNUSED const Grey &grey) {
-#ifdef DUMP_SUPPORT
- dumpComponentImpl(h, name, expr, comp, grey);
-#endif
-}
-
-UNUSED static inline
-void dumpSomSubComponent(UNUSED const NGHolder &h, UNUSED const char *name,
- UNUSED u32 expr, UNUSED u32 comp, UNUSED u32 plan,
- UNUSED const Grey &grey) {
-#ifdef DUMP_SUPPORT
- dumpSomSubComponentImpl(h, name, expr, comp, plan, grey);
-#endif
-}
-
-UNUSED static inline
-void dumpHolder(UNUSED const NGHolder &h, UNUSED unsigned int stageNumber,
- UNUSED const char *name, UNUSED const Grey &grey) {
-#ifdef DUMP_SUPPORT
- dumpHolderImpl(h, stageNumber, name, grey);
-#endif
-}
-
-UNUSED static inline
-void dumpHolder(UNUSED const NGHolder &h,
+#endif
+}
+
+UNUSED static inline
+void dumpComponent(UNUSED const NGHolder &h, UNUSED const char *name,
+ UNUSED u32 expr, UNUSED u32 comp, UNUSED const Grey &grey) {
+#ifdef DUMP_SUPPORT
+ dumpComponentImpl(h, name, expr, comp, grey);
+#endif
+}
+
+UNUSED static inline
+void dumpSomSubComponent(UNUSED const NGHolder &h, UNUSED const char *name,
+ UNUSED u32 expr, UNUSED u32 comp, UNUSED u32 plan,
+ UNUSED const Grey &grey) {
+#ifdef DUMP_SUPPORT
+ dumpSomSubComponentImpl(h, name, expr, comp, plan, grey);
+#endif
+}
+
+UNUSED static inline
+void dumpHolder(UNUSED const NGHolder &h, UNUSED unsigned int stageNumber,
+ UNUSED const char *name, UNUSED const Grey &grey) {
+#ifdef DUMP_SUPPORT
+ dumpHolderImpl(h, stageNumber, name, grey);
+#endif
+}
+
+UNUSED static inline
+void dumpHolder(UNUSED const NGHolder &h,
UNUSED const std::unordered_map<NFAVertex, u32> &region_map,
- UNUSED unsigned int stageNumber, UNUSED const char *name,
- UNUSED const Grey &grey) {
-#ifdef DUMP_SUPPORT
- dumpHolderImpl(h, region_map, stageNumber, name, grey);
-#endif
-}
-
-#ifdef DUMP_SUPPORT
-void dumpReportManager(const ReportManager &rm, const Grey &grey);
-void dumpSmallWrite(const RoseEngine *rose, const Grey &grey);
-#else
-static UNUSED
-void dumpReportManager(const ReportManager &, const Grey &) {
-}
-static UNUSED
-void dumpSmallWrite(const RoseEngine *, const Grey &) {
-}
-#endif
-
-#ifdef DUMP_SUPPORT
-// replace boost's graphviz writer
-template <typename GraphT, typename WriterT, typename VertexID>
-static void writeGraphviz(std::ostream &out, const GraphT &g, WriterT w,
- const VertexID &vertex_id) {
- const std::string delimiter(" -> ");
- out << "digraph G {" << std::endl;
-
- typename boost::graph_traits<GraphT>::vertex_iterator i, end;
- for(boost::tie(i,end) = vertices(g); i != end; ++i) {
- out << get(vertex_id, *i);
- w(out, *i); // print vertex attributes
- out << ";" << std::endl;
- }
- typename boost::graph_traits<GraphT>::edge_iterator ei, edge_end;
- for(boost::tie(ei, edge_end) = edges(g); ei != edge_end; ++ei) {
- out << (get(vertex_id, source(*ei, g))) << delimiter
- << (get(vertex_id, target(*ei, g))) << " ";
- w(out, *ei); // print edge attributes
- out << ";" << std::endl;
- }
- out << "}" << std::endl;
-}
-
-#endif // DUMP_SUPPORT
-
-} // namespace ue2
-
-#endif // NG_DUMP_H
+ UNUSED unsigned int stageNumber, UNUSED const char *name,
+ UNUSED const Grey &grey) {
+#ifdef DUMP_SUPPORT
+ dumpHolderImpl(h, region_map, stageNumber, name, grey);
+#endif
+}
+
+#ifdef DUMP_SUPPORT
+void dumpReportManager(const ReportManager &rm, const Grey &grey);
+void dumpSmallWrite(const RoseEngine *rose, const Grey &grey);
+#else
+static UNUSED
+void dumpReportManager(const ReportManager &, const Grey &) {
+}
+static UNUSED
+void dumpSmallWrite(const RoseEngine *, const Grey &) {
+}
+#endif
+
+#ifdef DUMP_SUPPORT
+// replace boost's graphviz writer
+template <typename GraphT, typename WriterT, typename VertexID>
+static void writeGraphviz(std::ostream &out, const GraphT &g, WriterT w,
+ const VertexID &vertex_id) {
+ const std::string delimiter(" -> ");
+ out << "digraph G {" << std::endl;
+
+ typename boost::graph_traits<GraphT>::vertex_iterator i, end;
+ for(boost::tie(i,end) = vertices(g); i != end; ++i) {
+ out << get(vertex_id, *i);
+ w(out, *i); // print vertex attributes
+ out << ";" << std::endl;
+ }
+ typename boost::graph_traits<GraphT>::edge_iterator ei, edge_end;
+ for(boost::tie(ei, edge_end) = edges(g); ei != edge_end; ++ei) {
+ out << (get(vertex_id, source(*ei, g))) << delimiter
+ << (get(vertex_id, target(*ei, g))) << " ";
+ w(out, *ei); // print edge attributes
+ out << ";" << std::endl;
+ }
+ out << "}" << std::endl;
+}
+
+#endif // DUMP_SUPPORT
+
+} // namespace ue2
+
+#endif // NG_DUMP_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.cpp
index ed2de70598..b8354bd42a 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.cpp
@@ -1,186 +1,186 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Edge redundancy graph reductions.
- */
-#include "ng_edge_redundancy.h"
-
-#include "ng_holder.h"
-#include "ng_prune.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "parser/position.h"
-#include "util/compile_context.h"
-#include "util/container.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Edge redundancy graph reductions.
+ */
+#include "ng_edge_redundancy.h"
+
+#include "ng_holder.h"
+#include "ng_prune.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "parser/position.h"
+#include "util/compile_context.h"
+#include "util/container.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
-
-#include <set>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-/* reverse edge redundancy removal is possible but is not implemented as it
- * regressed rose pattern support in the regression suite: 19026 - 19027
- * (foo.{1,5}b?ar)
- *
- * If rose becomes smarter we can reimplement.
- */
-
-static never_inline
-bool checkVerticesFwd(const NGHolder &g, const set<NFAVertex> &sad,
- const set<NFAVertex> &happy) {
- /* need to check if for each vertex in sad if it has an edge to a happy
- * vertex */
- for (auto u : sad) {
- bool ok = false;
- for (auto v : adjacent_vertices_range(u, g)) {
- if (contains(happy, v)) {
- ok = true;
- break;
- }
- }
-
- if (!ok) {
- return false;
- }
- }
-
- return true;
-}
-
-static never_inline
-bool checkVerticesRev(const NGHolder &g, const set<NFAVertex> &sad,
- const set<NFAVertex> &happy) {
- /* need to check if for each vertex in sad if it has an edge to a happy
- * vertex */
- for (auto v : sad) {
- bool ok = false;
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (contains(happy, u)) {
- ok = true;
- break;
- }
- }
-
- if (!ok) {
- return false;
- }
- }
-
- return true;
-}
-
-/** \brief Redundant self-loop removal.
- *
- * A self loop on a vertex v can be removed if:
- *
- * For every vertex u in pred(v) either:
- * 1: u has a self loop and cr(v) subset of cr(u)
- * OR
- * 2: u has an edge to vertex satisfying criterion 1
- *
- * Note: we remove all dead loops at the end of the pass and do not check the
- * live status of the loops we are depending on during the analysis.
- *
- * We don't end up in situations where we remove a group of loops which depend
- * on each other as:
- *
- * - there must be at least one vertex not in the group which is a pred of some
- * member of the group (as we don't remove loops on specials)
- *
- * For each pred vertex of the group:
- * - the vertex must be 'sad' as it is not part of the group
- * - therefore it must have edges to each member of the group (to happy, trans)
- * - therefore the group is enabled simultaneously
- * - due to internal group edges, all members will still be active after the
- * next character.
- *
- * Actually, the vertex redundancy code will merge the entire group into one
- * cyclic state.
- */
-static
-bool removeEdgeRedundancyNearCyclesFwd(NGHolder &g, bool ignore_starts) {
- unsigned dead_count = 0;
-
- set<NFAVertex> happy;
- set<NFAVertex> sad;
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g) || !hasSelfLoop(v, g)) {
- continue;
- }
-
- const CharReach &cr_v = g[v].char_reach;
-
- happy.clear();
- sad.clear();
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == v) {
- continue;
- }
-
- if (!hasSelfLoop(u, g)) {
- sad.insert(u);
- continue;
- }
-
- if (ignore_starts) {
- if (u == g.startDs || is_virtual_start(u, g)) {
- sad.insert(u);
- continue;
- }
- }
-
- const CharReach &cr_u = g[u].char_reach;
-
- if ((cr_u & cr_v) != cr_v) {
- sad.insert(u);
- continue;
- }
-
- happy.insert(u);
- }
-
- if (!happy.empty() && checkVerticesFwd(g, sad, happy)) {
- dead_count++;
- remove_edge(v, v, g);
- }
- }
-
- DEBUG_PRINTF("found %u removable edges.\n", dead_count);
- return dead_count;
-}
-
+#include "util/graph_range.h"
+
+#include <set>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+/* reverse edge redundancy removal is possible but is not implemented as it
+ * regressed rose pattern support in the regression suite: 19026 - 19027
+ * (foo.{1,5}b?ar)
+ *
+ * If rose becomes smarter we can reimplement.
+ */
+
+static never_inline
+bool checkVerticesFwd(const NGHolder &g, const set<NFAVertex> &sad,
+ const set<NFAVertex> &happy) {
+ /* need to check if for each vertex in sad if it has an edge to a happy
+ * vertex */
+ for (auto u : sad) {
+ bool ok = false;
+ for (auto v : adjacent_vertices_range(u, g)) {
+ if (contains(happy, v)) {
+ ok = true;
+ break;
+ }
+ }
+
+ if (!ok) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static never_inline
+bool checkVerticesRev(const NGHolder &g, const set<NFAVertex> &sad,
+ const set<NFAVertex> &happy) {
+ /* need to check if for each vertex in sad if it has an edge to a happy
+ * vertex */
+ for (auto v : sad) {
+ bool ok = false;
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (contains(happy, u)) {
+ ok = true;
+ break;
+ }
+ }
+
+ if (!ok) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/** \brief Redundant self-loop removal.
+ *
+ * A self loop on a vertex v can be removed if:
+ *
+ * For every vertex u in pred(v) either:
+ * 1: u has a self loop and cr(v) subset of cr(u)
+ * OR
+ * 2: u has an edge to vertex satisfying criterion 1
+ *
+ * Note: we remove all dead loops at the end of the pass and do not check the
+ * live status of the loops we are depending on during the analysis.
+ *
+ * We don't end up in situations where we remove a group of loops which depend
+ * on each other as:
+ *
+ * - there must be at least one vertex not in the group which is a pred of some
+ * member of the group (as we don't remove loops on specials)
+ *
+ * For each pred vertex of the group:
+ * - the vertex must be 'sad' as it is not part of the group
+ * - therefore it must have edges to each member of the group (to happy, trans)
+ * - therefore the group is enabled simultaneously
+ * - due to internal group edges, all members will still be active after the
+ * next character.
+ *
+ * Actually, the vertex redundancy code will merge the entire group into one
+ * cyclic state.
+ */
+static
+bool removeEdgeRedundancyNearCyclesFwd(NGHolder &g, bool ignore_starts) {
+ unsigned dead_count = 0;
+
+ set<NFAVertex> happy;
+ set<NFAVertex> sad;
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g) || !hasSelfLoop(v, g)) {
+ continue;
+ }
+
+ const CharReach &cr_v = g[v].char_reach;
+
+ happy.clear();
+ sad.clear();
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == v) {
+ continue;
+ }
+
+ if (!hasSelfLoop(u, g)) {
+ sad.insert(u);
+ continue;
+ }
+
+ if (ignore_starts) {
+ if (u == g.startDs || is_virtual_start(u, g)) {
+ sad.insert(u);
+ continue;
+ }
+ }
+
+ const CharReach &cr_u = g[u].char_reach;
+
+ if ((cr_u & cr_v) != cr_v) {
+ sad.insert(u);
+ continue;
+ }
+
+ happy.insert(u);
+ }
+
+ if (!happy.empty() && checkVerticesFwd(g, sad, happy)) {
+ dead_count++;
+ remove_edge(v, v, g);
+ }
+ }
+
+ DEBUG_PRINTF("found %u removable edges.\n", dead_count);
+ return dead_count;
+}
+
static
bool checkReportsRev(const NGHolder &g, NFAVertex v,
const set<NFAVertex> &happy) {
@@ -203,336 +203,336 @@ bool checkReportsRev(const NGHolder &g, NFAVertex v,
return is_subset_of(g[v].reports, happy_reports);
}
-/** \brief Redundant self-loop removal (reverse version).
- *
- * A self loop on a vertex v can be removed if:
- *
- * For every vertex u in succ(v) either:
- * 1: u has a self loop and cr(v) is a subset of cr(u).
- * OR
- * 2: u is not an accept and u has an edge from a vertex satisfying
- * criterion 1.
- * OR
- * 3: u is in an accept and u has an edge from a vertex v' satisfying
- * criterion 1 and report(v) == report(v').
- */
-static
-bool removeEdgeRedundancyNearCyclesRev(NGHolder &g) {
- unsigned dead_count = 0;
-
- set<NFAVertex> happy;
- set<NFAVertex> sad;
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g) || !hasSelfLoop(v, g)) {
- continue;
- }
-
- const CharReach &cr_v = g[v].char_reach;
-
- happy.clear();
- sad.clear();
-
- for (auto u : adjacent_vertices_range(v, g)) {
- if (u == v) {
- continue;
- }
-
- if (!hasSelfLoop(u, g)) {
- sad.insert(u);
- continue;
- }
-
- assert(!is_special(u, g));
-
- const CharReach &cr_u = g[u].char_reach;
-
- if (!cr_v.isSubsetOf(cr_u)) {
- sad.insert(u);
- continue;
- }
-
- happy.insert(u);
- }
-
+/** \brief Redundant self-loop removal (reverse version).
+ *
+ * A self loop on a vertex v can be removed if:
+ *
+ * For every vertex u in succ(v) either:
+ * 1: u has a self loop and cr(v) is a subset of cr(u).
+ * OR
+ * 2: u is not an accept and u has an edge from a vertex satisfying
+ * criterion 1.
+ * OR
+ * 3: u is in an accept and u has an edge from a vertex v' satisfying
+ * criterion 1 and report(v) == report(v').
+ */
+static
+bool removeEdgeRedundancyNearCyclesRev(NGHolder &g) {
+ unsigned dead_count = 0;
+
+ set<NFAVertex> happy;
+ set<NFAVertex> sad;
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g) || !hasSelfLoop(v, g)) {
+ continue;
+ }
+
+ const CharReach &cr_v = g[v].char_reach;
+
+ happy.clear();
+ sad.clear();
+
+ for (auto u : adjacent_vertices_range(v, g)) {
+ if (u == v) {
+ continue;
+ }
+
+ if (!hasSelfLoop(u, g)) {
+ sad.insert(u);
+ continue;
+ }
+
+ assert(!is_special(u, g));
+
+ const CharReach &cr_u = g[u].char_reach;
+
+ if (!cr_v.isSubsetOf(cr_u)) {
+ sad.insert(u);
+ continue;
+ }
+
+ happy.insert(u);
+ }
+
if (!happy.empty() && checkVerticesRev(g, sad, happy)
&& checkReportsRev(g, v, happy)) {
- dead_count++;
- remove_edge(v, v, g);
- }
- }
-
- DEBUG_PRINTF("found %u removable edges.\n", dead_count);
- return dead_count;
-}
-
-static
-bool parentsSubsetOf(const NGHolder &g, NFAVertex v,
- const flat_set<NFAVertex> &other_parents, NFAVertex other,
- map<NFAVertex, bool> &done) {
- map<NFAVertex, bool>::const_iterator dit = done.find(v);
- if (dit != done.end()) {
- return dit->second;
- }
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == v && contains(other_parents, other)) {
- continue;
- }
-
- if (!contains(other_parents, u)) {
- done[v] = false;
- return false;
- }
- }
-
- done[v] = true;
- return true;
-}
-
-static
-bool checkFwdCandidate(const NGHolder &g, NFAVertex fixed_src,
- const flat_set<NFAVertex> &fixed_parents,
- const NFAEdge &candidate,
- map<NFAVertex, bool> &done) {
- NFAVertex w = source(candidate, g);
- NFAVertex v = target(candidate, g);
- const CharReach &cr_w = g[w].char_reach;
- const CharReach &cr_u = g[fixed_src].char_reach;
-
- /* There is no reason why self loops cannot be considered by this
- * transformation but the removal is already handled by many other
- * transformations. */
- if (w == v) {
- return false;
- }
-
- if (is_special(w, g)) {
- return false;
- }
-
- if (!cr_w.isSubsetOf(cr_u)) {
- return false;
- }
-
- /* check that each parent of w is also a parent of u */
- if (!parentsSubsetOf(g, w, fixed_parents, fixed_src, done)) {
- return false;
- }
-
+ dead_count++;
+ remove_edge(v, v, g);
+ }
+ }
+
+ DEBUG_PRINTF("found %u removable edges.\n", dead_count);
+ return dead_count;
+}
+
+static
+bool parentsSubsetOf(const NGHolder &g, NFAVertex v,
+ const flat_set<NFAVertex> &other_parents, NFAVertex other,
+ map<NFAVertex, bool> &done) {
+ map<NFAVertex, bool>::const_iterator dit = done.find(v);
+ if (dit != done.end()) {
+ return dit->second;
+ }
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == v && contains(other_parents, other)) {
+ continue;
+ }
+
+ if (!contains(other_parents, u)) {
+ done[v] = false;
+ return false;
+ }
+ }
+
+ done[v] = true;
+ return true;
+}
+
+static
+bool checkFwdCandidate(const NGHolder &g, NFAVertex fixed_src,
+ const flat_set<NFAVertex> &fixed_parents,
+ const NFAEdge &candidate,
+ map<NFAVertex, bool> &done) {
+ NFAVertex w = source(candidate, g);
+ NFAVertex v = target(candidate, g);
+ const CharReach &cr_w = g[w].char_reach;
+ const CharReach &cr_u = g[fixed_src].char_reach;
+
+ /* There is no reason why self loops cannot be considered by this
+ * transformation but the removal is already handled by many other
+ * transformations. */
+ if (w == v) {
+ return false;
+ }
+
+ if (is_special(w, g)) {
+ return false;
+ }
+
+ if (!cr_w.isSubsetOf(cr_u)) {
+ return false;
+ }
+
+ /* check that each parent of w is also a parent of u */
+ if (!parentsSubsetOf(g, w, fixed_parents, fixed_src, done)) {
+ return false;
+ }
+
DEBUG_PRINTF("edge (%zu, %zu) killed by edge (%zu, %zu)\n",
g[w].index, g[v].index, g[fixed_src].index, g[v].index);
- return true;
-}
-
-static never_inline
-void checkLargeOutU(const NGHolder &g, NFAVertex u,
- const flat_set<NFAVertex> &parents_u,
- flat_set<NFAVertex> &possible_w,
- map<NFAVertex, bool> &done,
- set<NFAEdge> *dead) {
- /* only vertices with at least one parent in common with u need to be
- * considered, and we also only consider potential siblings with subset
- * reach. */
- possible_w.clear();
- const CharReach &cr_u = g[u].char_reach;
- for (auto p : parents_u) {
- for (auto v : adjacent_vertices_range(p, g)) {
- const CharReach &cr_w = g[v].char_reach;
- if (cr_w.isSubsetOf(cr_u)) {
- possible_w.insert(v);
- }
- }
- }
-
- // If there's only one, it's us, and we have no work to do.
- if (possible_w.size() <= 1) {
- assert(possible_w.empty() || *possible_w.begin() == u);
- return;
- }
-
- for (const auto &e : out_edges_range(u, g)) {
- const NFAVertex v = target(e, g);
-
- if (is_special(v, g)) {
- continue;
- }
-
- if (contains(*dead, e)) {
- continue;
- }
-
- /* Now need check to find any edges which can be removed due to the
- * existence of edge e */
- for (const auto &e2 : in_edges_range(v, g)) {
- if (e == e2 || contains(*dead, e2)) {
- continue;
- }
-
- const NFAVertex w = source(e2, g);
- if (!contains(possible_w, w)) {
- continue;
- }
-
- if (checkFwdCandidate(g, u, parents_u, e2, done)) {
- dead->insert(e2);
- }
- }
- }
-}
-
-static never_inline
-void checkSmallOutU(const NGHolder &g, NFAVertex u,
- const flat_set<NFAVertex> &parents_u,
- map<NFAVertex, bool> &done,
- set<NFAEdge> *dead) {
- for (const auto &e : out_edges_range(u, g)) {
- const NFAVertex v = target(e, g);
-
- if (is_special(v, g)) {
- continue;
- }
-
- if (contains(*dead, e)) {
- continue;
- }
-
- /* Now need check to find any edges which can be removed due to the
- * existence of edge e */
- for (const auto &e2 : in_edges_range(v, g)) {
- if (e == e2 || contains(*dead, e2)) {
- continue;
- }
-
- if (checkFwdCandidate(g, u, parents_u, e2, done)) {
- dead->insert(e2);
- }
- }
- }
-}
-
-/** \brief Forward edge redundancy pass.
- *
- * An edge e from w to v is redundant if there exists an edge e' such that:
- * e' is from u to v
- * and: reach(w) is a subset of reach(u)
- * and: proper_pred(w) is a subset of pred(u)
- * and: self_loop(w) implies self_loop(u) or edge from (w to u)
- *
- * Note: edges to accepts also require report ID checks.
- */
-static
-bool removeEdgeRedundancyFwd(NGHolder &g, bool ignore_starts) {
- set<NFAEdge> dead;
- map<NFAVertex, bool> done;
- flat_set<NFAVertex> parents_u;
- flat_set<NFAVertex> possible_w;
-
- for (auto u : vertices_range(g)) {
- if (ignore_starts && (u == g.startDs || is_virtual_start(u, g))) {
- continue;
- }
-
- parents_u.clear();
- pred(g, u, &parents_u);
-
- done.clear();
+ return true;
+}
+
+static never_inline
+void checkLargeOutU(const NGHolder &g, NFAVertex u,
+ const flat_set<NFAVertex> &parents_u,
+ flat_set<NFAVertex> &possible_w,
+ map<NFAVertex, bool> &done,
+ set<NFAEdge> *dead) {
+ /* only vertices with at least one parent in common with u need to be
+ * considered, and we also only consider potential siblings with subset
+ * reach. */
+ possible_w.clear();
+ const CharReach &cr_u = g[u].char_reach;
+ for (auto p : parents_u) {
+ for (auto v : adjacent_vertices_range(p, g)) {
+ const CharReach &cr_w = g[v].char_reach;
+ if (cr_w.isSubsetOf(cr_u)) {
+ possible_w.insert(v);
+ }
+ }
+ }
+
+ // If there's only one, it's us, and we have no work to do.
+ if (possible_w.size() <= 1) {
+ assert(possible_w.empty() || *possible_w.begin() == u);
+ return;
+ }
+
+ for (const auto &e : out_edges_range(u, g)) {
+ const NFAVertex v = target(e, g);
+
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ if (contains(*dead, e)) {
+ continue;
+ }
+
+ /* Now need check to find any edges which can be removed due to the
+ * existence of edge e */
+ for (const auto &e2 : in_edges_range(v, g)) {
+ if (e == e2 || contains(*dead, e2)) {
+ continue;
+ }
+
+ const NFAVertex w = source(e2, g);
+ if (!contains(possible_w, w)) {
+ continue;
+ }
+
+ if (checkFwdCandidate(g, u, parents_u, e2, done)) {
+ dead->insert(e2);
+ }
+ }
+ }
+}
+
+static never_inline
+void checkSmallOutU(const NGHolder &g, NFAVertex u,
+ const flat_set<NFAVertex> &parents_u,
+ map<NFAVertex, bool> &done,
+ set<NFAEdge> *dead) {
+ for (const auto &e : out_edges_range(u, g)) {
+ const NFAVertex v = target(e, g);
+
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ if (contains(*dead, e)) {
+ continue;
+ }
+
+ /* Now need check to find any edges which can be removed due to the
+ * existence of edge e */
+ for (const auto &e2 : in_edges_range(v, g)) {
+ if (e == e2 || contains(*dead, e2)) {
+ continue;
+ }
+
+ if (checkFwdCandidate(g, u, parents_u, e2, done)) {
+ dead->insert(e2);
+ }
+ }
+ }
+}
+
+/** \brief Forward edge redundancy pass.
+ *
+ * An edge e from w to v is redundant if there exists an edge e' such that:
+ * e' is from u to v
+ * and: reach(w) is a subset of reach(u)
+ * and: proper_pred(w) is a subset of pred(u)
+ * and: self_loop(w) implies self_loop(u) or edge from (w to u)
+ *
+ * Note: edges to accepts also require report ID checks.
+ */
+static
+bool removeEdgeRedundancyFwd(NGHolder &g, bool ignore_starts) {
+ set<NFAEdge> dead;
+ map<NFAVertex, bool> done;
+ flat_set<NFAVertex> parents_u;
+ flat_set<NFAVertex> possible_w;
+
+ for (auto u : vertices_range(g)) {
+ if (ignore_starts && (u == g.startDs || is_virtual_start(u, g))) {
+ continue;
+ }
+
+ parents_u.clear();
+ pred(g, u, &parents_u);
+
+ done.clear();
if (out_degree(u, g) > 1) {
- checkLargeOutU(g, u, parents_u, possible_w, done, &dead);
- } else {
- checkSmallOutU(g, u, parents_u, done, &dead);
- }
- }
-
- if (dead.empty()) {
- return false;
- }
-
- DEBUG_PRINTF("found %zu removable non-selfloops.\n", dead.size());
- remove_edges(dead, g);
- pruneUseless(g);
- return true;
-}
-
-/** Entry point: Runs all the edge redundancy passes. If SoM is tracked,
- * don't consider startDs or virtual starts as cyclic vertices. */
-bool removeEdgeRedundancy(NGHolder &g, som_type som, const CompileContext &cc) {
- if (!cc.grey.removeEdgeRedundancy) {
- return false;
- }
-
- bool changed = false;
- changed |= removeEdgeRedundancyNearCyclesFwd(g, som);
- changed |= removeEdgeRedundancyNearCyclesRev(g);
- changed |= removeEdgeRedundancyFwd(g, som);
- return changed;
-}
-
-/** \brief Removes optional stuff from the front of floating patterns, since it's
- * redundant with startDs.
- *
- * For each successor of startDs, remove any in-edges that aren't from either
- * start or startDs. This allows us to prune redundant vertices at the start of
- * a pattern:
- *
- * /(hat)?stand --> /stand/
- *
- */
-bool removeSiblingsOfStartDotStar(NGHolder &g) {
- vector<NFAEdge> dead;
-
- for (auto v : adjacent_vertices_range(g.startDs, g)) {
+ checkLargeOutU(g, u, parents_u, possible_w, done, &dead);
+ } else {
+ checkSmallOutU(g, u, parents_u, done, &dead);
+ }
+ }
+
+ if (dead.empty()) {
+ return false;
+ }
+
+ DEBUG_PRINTF("found %zu removable non-selfloops.\n", dead.size());
+ remove_edges(dead, g);
+ pruneUseless(g);
+ return true;
+}
+
+/** Entry point: Runs all the edge redundancy passes. If SoM is tracked,
+ * don't consider startDs or virtual starts as cyclic vertices. */
+bool removeEdgeRedundancy(NGHolder &g, som_type som, const CompileContext &cc) {
+ if (!cc.grey.removeEdgeRedundancy) {
+ return false;
+ }
+
+ bool changed = false;
+ changed |= removeEdgeRedundancyNearCyclesFwd(g, som);
+ changed |= removeEdgeRedundancyNearCyclesRev(g);
+ changed |= removeEdgeRedundancyFwd(g, som);
+ return changed;
+}
+
+/** \brief Removes optional stuff from the front of floating patterns, since it's
+ * redundant with startDs.
+ *
+ * For each successor of startDs, remove any in-edges that aren't from either
+ * start or startDs. This allows us to prune redundant vertices at the start of
+ * a pattern:
+ *
+ * /(hat)?stand --> /stand/
+ *
+ */
+bool removeSiblingsOfStartDotStar(NGHolder &g) {
+ vector<NFAEdge> dead;
+
+ for (auto v : adjacent_vertices_range(g.startDs, g)) {
DEBUG_PRINTF("checking %zu\n", g[v].index);
- if (is_special(v, g)) {
- continue;
- }
-
- for (const auto &e : in_edges_range(v, g)) {
- NFAVertex u = source(e, g);
- if (is_special(u, g)) {
- continue;
- }
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ for (const auto &e : in_edges_range(v, g)) {
+ NFAVertex u = source(e, g);
+ if (is_special(u, g)) {
+ continue;
+ }
DEBUG_PRINTF("removing %zu->%zu\n", g[u].index, g[v].index);
- dead.push_back(e);
- }
- }
-
- if (dead.empty()) {
- return false;
- }
-
- DEBUG_PRINTF("found %zu removable edges.\n", dead.size());
- remove_edges(dead, g);
- pruneUseless(g);
- return true;
-}
-
-/** Removes all edges into virtual starts other than those from start/startDs,
- * providing there is an edge from startDs. This operation is an optimisation
- * for SOM mode. (see UE-1544) */
-bool optimiseVirtualStarts(NGHolder &g) {
- vector<NFAEdge> dead;
- for (auto v : adjacent_vertices_range(g.startDs, g)) {
- u32 flags = g[v].assert_flags;
- if (!(flags & POS_FLAG_VIRTUAL_START)) {
- continue;
- }
-
- for (const auto &e : in_edges_range(v, g)) {
- if (!is_any_start(source(e, g), g)) {
- dead.push_back(e);
- }
- }
- }
-
- if (dead.empty()) {
- return false;
- }
-
- DEBUG_PRINTF("removing %zu edges into virtual starts\n", dead.size());
- remove_edges(dead, g);
- pruneUseless(g);
- return true;
-}
-
-} // namespace ue2
+ dead.push_back(e);
+ }
+ }
+
+ if (dead.empty()) {
+ return false;
+ }
+
+ DEBUG_PRINTF("found %zu removable edges.\n", dead.size());
+ remove_edges(dead, g);
+ pruneUseless(g);
+ return true;
+}
+
+/** Removes all edges into virtual starts other than those from start/startDs,
+ * providing there is an edge from startDs. This operation is an optimisation
+ * for SOM mode. (see UE-1544) */
+bool optimiseVirtualStarts(NGHolder &g) {
+ vector<NFAEdge> dead;
+ for (auto v : adjacent_vertices_range(g.startDs, g)) {
+ u32 flags = g[v].assert_flags;
+ if (!(flags & POS_FLAG_VIRTUAL_START)) {
+ continue;
+ }
+
+ for (const auto &e : in_edges_range(v, g)) {
+ if (!is_any_start(source(e, g), g)) {
+ dead.push_back(e);
+ }
+ }
+ }
+
+ if (dead.empty()) {
+ return false;
+ }
+
+ DEBUG_PRINTF("removing %zu edges into virtual starts\n", dead.size());
+ remove_edges(dead, g);
+ pruneUseless(g);
+ return true;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.h b/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.h
index f589ff727e..08cf31f26c 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_edge_redundancy.h
@@ -1,65 +1,65 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Edge redundancy graph reductions.
- */
-#ifndef NG_EDGE_REDUNDANCY_H
-#define NG_EDGE_REDUNDANCY_H
-
-#include "som/som.h"
-
-namespace ue2 {
-
-class NGHolder;
-struct CompileContext;
-
-/** \brief Entry point: Runs all the edge redundancy passes. */
-bool removeEdgeRedundancy(NGHolder &g, som_type som, const CompileContext &cc);
-
-/** \brief Removes optional stuff from the front of floating patterns, since
- * it's redundant with startDs.
- *
- * For each successor of startDs, remove any in-edges that aren't from either
- * start or startDs. This allows us to prune redundant vertices at the start of
- * a pattern:
- *
- * /(hat)?stand --> /stand/
- *
- */
-bool removeSiblingsOfStartDotStar(NGHolder &g);
-
-/** \brief Removes all edges into virtual starts other than those from
- * start/startDs, providing there is an edge from startDs.
- *
- * This operation is an optimisation for SOM mode. (see UE-1544) */
-bool optimiseVirtualStarts(NGHolder &g);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Edge redundancy graph reductions.
+ */
+#ifndef NG_EDGE_REDUNDANCY_H
+#define NG_EDGE_REDUNDANCY_H
+
+#include "som/som.h"
+
+namespace ue2 {
+
+class NGHolder;
+struct CompileContext;
+
+/** \brief Entry point: Runs all the edge redundancy passes. */
+bool removeEdgeRedundancy(NGHolder &g, som_type som, const CompileContext &cc);
+
+/** \brief Removes optional stuff from the front of floating patterns, since
+ * it's redundant with startDs.
+ *
+ * For each successor of startDs, remove any in-edges that aren't from either
+ * start or startDs. This allows us to prune redundant vertices at the start of
+ * a pattern:
+ *
+ * /(hat)?stand --> /stand/
+ *
+ */
+bool removeSiblingsOfStartDotStar(NGHolder &g);
+
+/** \brief Removes all edges into virtual starts other than those from
+ * start/startDs, providing there is an edge from startDs.
+ *
+ * This operation is an optimisation for SOM mode. (see UE-1544) */
+bool optimiseVirtualStarts(NGHolder &g);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.cpp
index 90d6fd8b75..fba8ce7b74 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.cpp
@@ -1,317 +1,317 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Equivalence class graph reduction pass.
- */
-
-#include "ng_equivalence.h"
-
-#include "grey.h"
-#include "ng_depth.h"
-#include "ng_holder.h"
-#include "ng_util.h"
-#include "util/compile_context.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Equivalence class graph reduction pass.
+ */
+
+#include "ng_equivalence.h"
+
+#include "grey.h"
+#include "ng_depth.h"
+#include "ng_holder.h"
+#include "ng_util.h"
+#include "util/compile_context.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
+#include "util/graph_range.h"
#include "util/make_unique.h"
#include "util/unordered.h"
-
-#include <algorithm>
+
+#include <algorithm>
#include <memory>
-#include <set>
-#include <stack>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-enum EquivalenceType {
+#include <set>
+#include <stack>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+enum EquivalenceType {
LEFT_EQUIVALENCE,
- RIGHT_EQUIVALENCE,
-};
-
-namespace {
-class VertexInfo;
-
-// custom comparison functor for unordered_set and flat_set
-struct VertexInfoPtrCmp {
- // for flat_set
- bool operator()(const VertexInfo *a, const VertexInfo *b) const;
-};
-
+ RIGHT_EQUIVALENCE,
+};
+
+namespace {
+class VertexInfo;
+
+// custom comparison functor for unordered_set and flat_set
+struct VertexInfoPtrCmp {
+ // for flat_set
+ bool operator()(const VertexInfo *a, const VertexInfo *b) const;
+};
+
using VertexInfoSet = flat_set<VertexInfo *, VertexInfoPtrCmp>;
-/** Precalculated (and maintained) information about a vertex. */
-class VertexInfo {
-public:
- VertexInfo(NFAVertex v_in, const NGHolder &g)
+/** Precalculated (and maintained) information about a vertex. */
+class VertexInfo {
+public:
+ VertexInfo(NFAVertex v_in, const NGHolder &g)
: v(v_in), vert_index(g[v].index), cr(g[v].char_reach),
- equivalence_class(~0), vertex_flags(g[v].assert_flags) {}
-
+ equivalence_class(~0), vertex_flags(g[v].assert_flags) {}
+
VertexInfoSet pred; //!< predecessors of this vertex
VertexInfoSet succ; //!< successors of this vertex
- NFAVertex v;
+ NFAVertex v;
size_t vert_index;
- CharReach cr;
- CharReach pred_cr;
- CharReach succ_cr;
+ CharReach cr;
+ CharReach pred_cr;
+ CharReach succ_cr;
flat_set<u32> edge_tops; /**< tops on edge from start */
- unsigned equivalence_class;
- unsigned vertex_flags;
-};
-
-// compare two vertex info pointers on their vertex index
-bool VertexInfoPtrCmp::operator()(const VertexInfo *a,
- const VertexInfo *b) const {
- return a->vert_index < b->vert_index;
-}
-
-// to avoid traversing infomap each time we need to check the class during
-// partitioning, we will cache the information pertaining to a particular class
-class ClassInfo {
-public:
- struct ClassDepth {
- ClassDepth() {}
- ClassDepth(const NFAVertexDepth &d)
- : d1(d.fromStart), d2(d.fromStartDotStar) {}
- ClassDepth(const NFAVertexRevDepth &rd)
- : d1(rd.toAccept), d2(rd.toAcceptEod) {}
- DepthMinMax d1;
- DepthMinMax d2;
- };
+ unsigned equivalence_class;
+ unsigned vertex_flags;
+};
+
+// compare two vertex info pointers on their vertex index
+bool VertexInfoPtrCmp::operator()(const VertexInfo *a,
+ const VertexInfo *b) const {
+ return a->vert_index < b->vert_index;
+}
+
+// to avoid traversing infomap each time we need to check the class during
+// partitioning, we will cache the information pertaining to a particular class
+class ClassInfo {
+public:
+ struct ClassDepth {
+ ClassDepth() {}
+ ClassDepth(const NFAVertexDepth &d)
+ : d1(d.fromStart), d2(d.fromStartDotStar) {}
+ ClassDepth(const NFAVertexRevDepth &rd)
+ : d1(rd.toAccept), d2(rd.toAcceptEod) {}
+ DepthMinMax d1;
+ DepthMinMax d2;
+ };
ClassInfo(const NGHolder &g, const VertexInfo &vi, const ClassDepth &d_in,
- EquivalenceType eq)
+ EquivalenceType eq)
: /* reports only matter for right-equiv */
rs(eq == RIGHT_EQUIVALENCE ? g[vi.v].reports : flat_set<ReportID>()),
vertex_flags(vi.vertex_flags), edge_tops(vi.edge_tops), cr(vi.cr),
adjacent_cr(eq == LEFT_EQUIVALENCE ? vi.pred_cr : vi.succ_cr),
/* treat non-special vertices the same */
node_type(min(g[vi.v].index, size_t{N_SPECIALS})), depth(d_in) {}
-
+
bool operator==(const ClassInfo &b) const {
return node_type == b.node_type && depth.d1 == b.depth.d1 &&
depth.d2 == b.depth.d2 && cr == b.cr &&
adjacent_cr == b.adjacent_cr && edge_tops == b.edge_tops &&
vertex_flags == b.vertex_flags && rs == b.rs;
}
-
+
size_t hash() const {
return hash_all(rs, vertex_flags, cr, adjacent_cr, node_type, depth.d1,
depth.d2);
- }
-
-private:
- flat_set<ReportID> rs; /* for right equiv only */
- unsigned vertex_flags;
+ }
+
+private:
+ flat_set<ReportID> rs; /* for right equiv only */
+ unsigned vertex_flags;
flat_set<u32> edge_tops;
- CharReach cr;
- CharReach adjacent_cr;
- unsigned node_type;
- ClassDepth depth;
-};
-
-// work queue class. this contraption has two goals:
-// 1. uniqueness of elements
-// 2. FILO operation
-class WorkQueue {
-public:
- explicit WorkQueue(unsigned c) {
- q.reserve(c);
- }
- // unique push
- void push(unsigned id) {
- if (ids.insert(id).second) {
- q.push_back(id);
- }
- }
-
- // pop
- unsigned pop() {
- unsigned id = q.back();
- ids.erase(id);
- q.pop_back();
- return id;
- }
-
- void append(WorkQueue &other) {
- for (const auto &e : other) {
- push(e);
- }
- }
-
- void clear() {
- ids.clear();
- q.clear();
- }
-
- bool empty() const {
- return ids.empty();
- }
-
- vector<unsigned>::const_iterator begin() const {
- return q.begin();
- }
-
- vector<unsigned>::const_iterator end() const {
- return q.end();
- }
-
- size_t capacity() const {
- return q.capacity();
- }
-private:
+ CharReach cr;
+ CharReach adjacent_cr;
+ unsigned node_type;
+ ClassDepth depth;
+};
+
+// work queue class. this contraption has two goals:
+// 1. uniqueness of elements
+// 2. FILO operation
+class WorkQueue {
+public:
+ explicit WorkQueue(unsigned c) {
+ q.reserve(c);
+ }
+ // unique push
+ void push(unsigned id) {
+ if (ids.insert(id).second) {
+ q.push_back(id);
+ }
+ }
+
+ // pop
+ unsigned pop() {
+ unsigned id = q.back();
+ ids.erase(id);
+ q.pop_back();
+ return id;
+ }
+
+ void append(WorkQueue &other) {
+ for (const auto &e : other) {
+ push(e);
+ }
+ }
+
+ void clear() {
+ ids.clear();
+ q.clear();
+ }
+
+ bool empty() const {
+ return ids.empty();
+ }
+
+ vector<unsigned>::const_iterator begin() const {
+ return q.begin();
+ }
+
+ vector<unsigned>::const_iterator end() const {
+ return q.end();
+ }
+
+ size_t capacity() const {
+ return q.capacity();
+ }
+private:
unordered_set<unsigned> ids; //!< stores id's, for uniqueness
- vector<unsigned> q; //!< vector of id's that we use as FILO.
-};
-
-}
-
-static
-bool outIsIrreducible(NFAVertex &v, const NGHolder &g) {
- unsigned nonSpecialVertices = 0;
- for (auto w : adjacent_vertices_range(v, g)) {
- if (!is_special(w, g) && w != v) {
- nonSpecialVertices++;
- }
- }
- return nonSpecialVertices == 1;
-}
-
-static
-bool inIsIrreducible(NFAVertex &v, const NGHolder &g) {
- unsigned nonSpecialVertices = 0;
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (!is_special(u, g) && u != v) {
- nonSpecialVertices++;
- }
- }
- return nonSpecialVertices == 1;
-}
-
-/** Cheaply check whether this graph can't be reduced at all, because it is
- * just a chain of vertices with no other edges. */
-static
-bool isIrreducible(const NGHolder &g) {
- for (auto v : vertices_range(g)) {
- // skip specials
- if (is_special(v, g)) {
- continue;
- }
-
- // we want meaningful in_degree to be 1. we also want to make sure we
- // don't count self-loop + 1 incoming edge as not irreducible
- if (in_degree(v, g) != 1 && !inIsIrreducible(v, g)) {
- return false;
- }
- // we want meaningful out_degree to be 1. we also want to make sure we
- // don't count self-loop + 1 outgoing edge as not irreducible
- if (out_degree(v, g) != 1 && !outIsIrreducible(v, g)) {
- return false;
- }
- }
-
- return true;
-}
-
-#ifndef NDEBUG
-static
-bool hasEdgeAsserts(NFAVertex v, const NGHolder &g) {
- for (const auto &e : in_edges_range(v, g)) {
- if (g[e].assert_flags != 0) {
- return true;
- }
- }
- for (const auto &e : out_edges_range(v, g)) {
- if (g[e].assert_flags != 0) {
- return true;
- }
- }
- return false;
-}
-#endif
-
-// populate VertexInfo table
-static
+ vector<unsigned> q; //!< vector of id's that we use as FILO.
+};
+
+}
+
+static
+bool outIsIrreducible(NFAVertex &v, const NGHolder &g) {
+ unsigned nonSpecialVertices = 0;
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (!is_special(w, g) && w != v) {
+ nonSpecialVertices++;
+ }
+ }
+ return nonSpecialVertices == 1;
+}
+
+static
+bool inIsIrreducible(NFAVertex &v, const NGHolder &g) {
+ unsigned nonSpecialVertices = 0;
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (!is_special(u, g) && u != v) {
+ nonSpecialVertices++;
+ }
+ }
+ return nonSpecialVertices == 1;
+}
+
+/** Cheaply check whether this graph can't be reduced at all, because it is
+ * just a chain of vertices with no other edges. */
+static
+bool isIrreducible(const NGHolder &g) {
+ for (auto v : vertices_range(g)) {
+ // skip specials
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ // we want meaningful in_degree to be 1. we also want to make sure we
+ // don't count self-loop + 1 incoming edge as not irreducible
+ if (in_degree(v, g) != 1 && !inIsIrreducible(v, g)) {
+ return false;
+ }
+ // we want meaningful out_degree to be 1. we also want to make sure we
+ // don't count self-loop + 1 outgoing edge as not irreducible
+ if (out_degree(v, g) != 1 && !outIsIrreducible(v, g)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+#ifndef NDEBUG
+static
+bool hasEdgeAsserts(NFAVertex v, const NGHolder &g) {
+ for (const auto &e : in_edges_range(v, g)) {
+ if (g[e].assert_flags != 0) {
+ return true;
+ }
+ }
+ for (const auto &e : out_edges_range(v, g)) {
+ if (g[e].assert_flags != 0) {
+ return true;
+ }
+ }
+ return false;
+}
+#endif
+
+// populate VertexInfo table
+static
vector<unique_ptr<VertexInfo>> getVertexInfos(const NGHolder &g) {
const size_t num_verts = num_vertices(g);
vector<unique_ptr<VertexInfo>> infos;
infos.reserve(num_verts * 2);
- vector<VertexInfo *> vertex_map; // indexed by vertex_index property
+ vector<VertexInfo *> vertex_map; // indexed by vertex_index property
vertex_map.resize(num_verts);
-
- for (auto v : vertices_range(g)) {
+
+ for (auto v : vertices_range(g)) {
infos.push_back(std::make_unique<VertexInfo>(v, g));
vertex_map[g[v].index] = infos.back().get();
}
-
+
// now, go through each vertex and populate its predecessor and successor
// lists
for (auto &vi : infos) {
assert(vi);
NFAVertex v = vi->v;
-
- // find predecessors
+
+ // find predecessors
for (const auto &e : in_edges_range(v, g)) {
- NFAVertex u = source(e, g);
+ NFAVertex u = source(e, g);
VertexInfo *u_vi = vertex_map[g[u].index];
-
+
vi->pred_cr |= u_vi->cr;
vi->pred.insert(u_vi);
-
- // also set up edge tops
- if (is_triggered(g) && u == g.start) {
+
+ // also set up edge tops
+ if (is_triggered(g) && u == g.start) {
vi->edge_tops = g[e].tops;
- }
- }
-
- // find successors
+ }
+ }
+
+ // find successors
for (auto w : adjacent_vertices_range(v, g)) {
VertexInfo *w_vi = vertex_map[g[w].index];
vi->succ_cr |= w_vi->cr;
vi->succ.insert(w_vi);
- }
+ }
assert(!hasEdgeAsserts(vi->v, g));
- }
+ }
return infos;
-}
-
-// store equivalence class in VertexInfo for each vertex
-static
+}
+
+// store equivalence class in VertexInfo for each vertex
+static
vector<VertexInfoSet> partitionGraph(vector<unique_ptr<VertexInfo>> &infos,
WorkQueue &work_queue, const NGHolder &g,
EquivalenceType eq) {
const size_t num_verts = infos.size();
-
+
vector<VertexInfoSet> classes;
ue2_unordered_map<ClassInfo, unsigned> classinfomap;
@@ -320,323 +320,323 @@ vector<VertexInfoSet> partitionGraph(vector<unique_ptr<VertexInfo>> &infos,
classes.reserve(num_verts);
classinfomap.reserve(num_verts);
- // get distances from start (or accept) for all vertices
- // only one of them is used at a time, never both
- vector<NFAVertexDepth> depths;
- vector<NFAVertexRevDepth> rdepths;
-
- if (eq == LEFT_EQUIVALENCE) {
+ // get distances from start (or accept) for all vertices
+ // only one of them is used at a time, never both
+ vector<NFAVertexDepth> depths;
+ vector<NFAVertexRevDepth> rdepths;
+
+ if (eq == LEFT_EQUIVALENCE) {
depths = calcDepths(g);
- } else {
+ } else {
rdepths = calcRevDepths(g);
- }
-
- // partition the graph based on CharReach
+ }
+
+ // partition the graph based on CharReach
for (auto &vi : infos) {
assert(vi);
- ClassInfo::ClassDepth depth;
-
- if (eq == LEFT_EQUIVALENCE) {
+ ClassInfo::ClassDepth depth;
+
+ if (eq == LEFT_EQUIVALENCE) {
depth = depths[vi->vert_index];
- } else {
+ } else {
depth = rdepths[vi->vert_index];
- }
+ }
ClassInfo ci(g, *vi, depth, eq);
-
- auto ii = classinfomap.find(ci);
- if (ii == classinfomap.end()) {
+
+ auto ii = classinfomap.find(ci);
+ if (ii == classinfomap.end()) {
// vertex is in a new equivalence class by itself.
unsigned eq_class = classes.size();
vi->equivalence_class = eq_class;
classes.push_back({vi.get()});
classinfomap.emplace(move(ci), eq_class);
- } else {
+ } else {
// vertex is added to an existing class.
- unsigned eq_class = ii->second;
+ unsigned eq_class = ii->second;
vi->equivalence_class = eq_class;
classes.at(eq_class).insert(vi.get());
-
- // we now know that this particular class has more than one
- // vertex, so we add it to the work queue
- work_queue.push(eq_class);
- }
- }
+
+ // we now know that this particular class has more than one
+ // vertex, so we add it to the work queue
+ work_queue.push(eq_class);
+ }
+ }
DEBUG_PRINTF("partitioned, %zu equivalence classes\n", classes.size());
return classes;
-}
-
-// generalized equivalence processing (left and right)
-// basically, goes through every vertex in a class and checks if all successor or
-// predecessor classes match in all vertices. if classes mismatch, a vertex is
-// split into a separate class, along with all vertices having the same set of
-// successor/predecessor classes. the opposite side (successors for left
-// equivalence, predecessors for right equivalence) classes get revalidated in
-// case of a split.
-static
+}
+
+// generalized equivalence processing (left and right)
+// basically, goes through every vertex in a class and checks if all successor or
+// predecessor classes match in all vertices. if classes mismatch, a vertex is
+// split into a separate class, along with all vertices having the same set of
+// successor/predecessor classes. the opposite side (successors for left
+// equivalence, predecessors for right equivalence) classes get revalidated in
+// case of a split.
+static
void equivalence(vector<VertexInfoSet> &classes, WorkQueue &work_queue,
- EquivalenceType eq_type) {
- // now, go through the work queue until it's empty
- map<flat_set<unsigned>, VertexInfoSet> tentative_classmap;
- flat_set<unsigned> cur_classes;
- // local work queue, to store classes we want to revalidate in case of split
- WorkQueue reval_queue(work_queue.capacity());
-
- while (!work_queue.empty()) {
- // dequeue our class from the work queue
- unsigned cur_class = work_queue.pop();
-
- // get all vertices in current equivalence class
+ EquivalenceType eq_type) {
+ // now, go through the work queue until it's empty
+ map<flat_set<unsigned>, VertexInfoSet> tentative_classmap;
+ flat_set<unsigned> cur_classes;
+ // local work queue, to store classes we want to revalidate in case of split
+ WorkQueue reval_queue(work_queue.capacity());
+
+ while (!work_queue.empty()) {
+ // dequeue our class from the work queue
+ unsigned cur_class = work_queue.pop();
+
+ // get all vertices in current equivalence class
VertexInfoSet &cur_class_vertices = classes.at(cur_class);
-
- if (cur_class_vertices.size() < 2) {
- continue;
- }
-
- // clear data from previous iterations
- tentative_classmap.clear();
-
- DEBUG_PRINTF("doing equivalence pass for class %u, %zd vertices\n",
- cur_class, cur_class_vertices.size());
-
- // go through vertices in this class
- for (VertexInfo *vi : cur_class_vertices) {
- cur_classes.clear();
-
- // get vertex lists for equivalence vertices and vertices for
- // revalidation in case of split
- const auto &eq_vertices =
- (eq_type == LEFT_EQUIVALENCE) ? vi->pred : vi->succ;
- const auto &reval_vertices =
- (eq_type == LEFT_EQUIVALENCE) ? vi->succ : vi->pred;
-
- // go through equivalence and note the classes
- for (const VertexInfo *tmp : eq_vertices) {
- cur_classes.insert(tmp->equivalence_class);
- }
-
- // note all the classes that need to be reevaluated
- for (const VertexInfo *tmp : reval_vertices) {
- reval_queue.push(tmp->equivalence_class);
- }
-
- VertexInfoSet &tentative_classes = tentative_classmap[cur_classes];
- tentative_classes.insert(vi);
- }
-
- // if we found more than one class, split and revalidate everything
- if (tentative_classmap.size() > 1) {
- auto tmi = tentative_classmap.begin();
-
- // start from the second class
- for (++tmi; tmi != tentative_classmap.end(); ++tmi) {
- const VertexInfoSet &vertices_to_split = tmi->second;
+
+ if (cur_class_vertices.size() < 2) {
+ continue;
+ }
+
+ // clear data from previous iterations
+ tentative_classmap.clear();
+
+ DEBUG_PRINTF("doing equivalence pass for class %u, %zd vertices\n",
+ cur_class, cur_class_vertices.size());
+
+ // go through vertices in this class
+ for (VertexInfo *vi : cur_class_vertices) {
+ cur_classes.clear();
+
+ // get vertex lists for equivalence vertices and vertices for
+ // revalidation in case of split
+ const auto &eq_vertices =
+ (eq_type == LEFT_EQUIVALENCE) ? vi->pred : vi->succ;
+ const auto &reval_vertices =
+ (eq_type == LEFT_EQUIVALENCE) ? vi->succ : vi->pred;
+
+ // go through equivalence and note the classes
+ for (const VertexInfo *tmp : eq_vertices) {
+ cur_classes.insert(tmp->equivalence_class);
+ }
+
+ // note all the classes that need to be reevaluated
+ for (const VertexInfo *tmp : reval_vertices) {
+ reval_queue.push(tmp->equivalence_class);
+ }
+
+ VertexInfoSet &tentative_classes = tentative_classmap[cur_classes];
+ tentative_classes.insert(vi);
+ }
+
+ // if we found more than one class, split and revalidate everything
+ if (tentative_classmap.size() > 1) {
+ auto tmi = tentative_classmap.begin();
+
+ // start from the second class
+ for (++tmi; tmi != tentative_classmap.end(); ++tmi) {
+ const VertexInfoSet &vertices_to_split = tmi->second;
unsigned new_class = classes.size();
VertexInfoSet new_class_vertices;
-
- for (VertexInfo *vi : vertices_to_split) {
- vi->equivalence_class = new_class;
+
+ for (VertexInfo *vi : vertices_to_split) {
+ vi->equivalence_class = new_class;
// note: we cannot use the cur_class_vertices ref, as it is
// invalidated by modifications to the classes vector.
classes[cur_class].erase(vi);
- new_class_vertices.insert(vi);
- }
+ new_class_vertices.insert(vi);
+ }
classes.push_back(move(new_class_vertices));
if (contains(tmi->first, cur_class)) {
- reval_queue.push(new_class);
- }
- }
- work_queue.append(reval_queue);
- }
- reval_queue.clear();
- }
-}
-
-static
-bool require_separate_eod_vertex(const VertexInfoSet &vert_infos,
- const NGHolder &g) {
- /* We require separate eod and normal accept vertices for a class if we have
- * both normal accepts and eod accepts AND the reports are different for eod
- * and non-eod reports. */
-
- flat_set<ReportID> non_eod;
- flat_set<ReportID> eod;
-
- for (const VertexInfo *vi : vert_infos) {
- NFAVertex v = vi->v;
-
- if (edge(v, g.accept, g).second) {
- insert(&non_eod, g[v].reports);
- }
-
- if (edge(v, g.acceptEod, g).second) {
- insert(&eod, g[v].reports);
- }
- }
-
- if (non_eod.empty() || eod.empty()) {
- return false;
- }
-
- return non_eod != eod;
-
-}
-
-static
+ reval_queue.push(new_class);
+ }
+ }
+ work_queue.append(reval_queue);
+ }
+ reval_queue.clear();
+ }
+}
+
+static
+bool require_separate_eod_vertex(const VertexInfoSet &vert_infos,
+ const NGHolder &g) {
+ /* We require separate eod and normal accept vertices for a class if we have
+ * both normal accepts and eod accepts AND the reports are different for eod
+ * and non-eod reports. */
+
+ flat_set<ReportID> non_eod;
+ flat_set<ReportID> eod;
+
+ for (const VertexInfo *vi : vert_infos) {
+ NFAVertex v = vi->v;
+
+ if (edge(v, g.accept, g).second) {
+ insert(&non_eod, g[v].reports);
+ }
+
+ if (edge(v, g.acceptEod, g).second) {
+ insert(&eod, g[v].reports);
+ }
+ }
+
+ if (non_eod.empty() || eod.empty()) {
+ return false;
+ }
+
+ return non_eod != eod;
+
+}
+
+static
void mergeClass(vector<unique_ptr<VertexInfo>> &infos, NGHolder &g,
unsigned eq_class, VertexInfoSet &cur_class_vertices,
set<NFAVertex> *toRemove) {
- DEBUG_PRINTF("Replacing %zd vertices from equivalence class %u with a "
- "single vertex.\n", cur_class_vertices.size(), eq_class);
-
- // replace equivalence class with a single vertex:
- // 1. create new vertex with matching properties
- // 2. wire all predecessors to new vertex
- // 2a. update info for new vertex with new predecessors
- // 2b. update each predecessor's successor list
- // 3. wire all successors to new vertex
- // 3a. update info for new vertex with new successors
- // 3b. update each successor's predecessor list
- // 4. remove old vertex
-
- // any differences between vertex properties were resolved during
- // initial partitioning, so we assume that every vertex in equivalence
- // class has the same CharReach et al.
- // so, we find the first vertex in our class and get all its properties
-
- /* For left equivalence, if the members have different reporting behaviour
- * we sometimes require two vertices to be created (one connected to accept
- * and one to accepteod) */
-
- NFAVertex old_v = (*cur_class_vertices.begin())->v;
- NFAVertex new_v = clone_vertex(g, old_v); /* set up new vertex with same
- * props */
- g[new_v].reports.clear(); /* populated as we pull in succs */
-
- // store this vertex in our global vertex list
+ DEBUG_PRINTF("Replacing %zd vertices from equivalence class %u with a "
+ "single vertex.\n", cur_class_vertices.size(), eq_class);
+
+ // replace equivalence class with a single vertex:
+ // 1. create new vertex with matching properties
+ // 2. wire all predecessors to new vertex
+ // 2a. update info for new vertex with new predecessors
+ // 2b. update each predecessor's successor list
+ // 3. wire all successors to new vertex
+ // 3a. update info for new vertex with new successors
+ // 3b. update each successor's predecessor list
+ // 4. remove old vertex
+
+ // any differences between vertex properties were resolved during
+ // initial partitioning, so we assume that every vertex in equivalence
+ // class has the same CharReach et al.
+ // so, we find the first vertex in our class and get all its properties
+
+ /* For left equivalence, if the members have different reporting behaviour
+ * we sometimes require two vertices to be created (one connected to accept
+ * and one to accepteod) */
+
+ NFAVertex old_v = (*cur_class_vertices.begin())->v;
+ NFAVertex new_v = clone_vertex(g, old_v); /* set up new vertex with same
+ * props */
+ g[new_v].reports.clear(); /* populated as we pull in succs */
+
+ // store this vertex in our global vertex list
infos.push_back(std::make_unique<VertexInfo>(new_v, g));
VertexInfo *new_vertex_info = infos.back().get();
-
- NFAVertex new_v_eod = NGHolder::null_vertex();
- VertexInfo *new_vertex_info_eod = nullptr;
-
- if (require_separate_eod_vertex(cur_class_vertices, g)) {
- new_v_eod = clone_vertex(g, old_v);
- g[new_v_eod].reports.clear();
+
+ NFAVertex new_v_eod = NGHolder::null_vertex();
+ VertexInfo *new_vertex_info_eod = nullptr;
+
+ if (require_separate_eod_vertex(cur_class_vertices, g)) {
+ new_v_eod = clone_vertex(g, old_v);
+ g[new_v_eod].reports.clear();
infos.push_back(std::make_unique<VertexInfo>(new_v_eod, g));
new_vertex_info_eod = infos.back().get();
- }
-
+ }
+
const auto &edgetops = (*cur_class_vertices.begin())->edge_tops;
- for (VertexInfo *old_vertex_info : cur_class_vertices) {
- assert(old_vertex_info->equivalence_class == eq_class);
-
- // mark this vertex for removal
- toRemove->insert(old_vertex_info->v);
-
- // for each predecessor, add edge to new vertex and update info
- for (VertexInfo *pred_info : old_vertex_info->pred) {
- // update info for new vertex
- new_vertex_info->pred.insert(pred_info);
- if (new_vertex_info_eod) {
- new_vertex_info_eod->pred.insert(pred_info);
- }
-
- // update info for predecessor
- pred_info->succ.erase(old_vertex_info);
-
- // if edge doesn't exist, create it
+ for (VertexInfo *old_vertex_info : cur_class_vertices) {
+ assert(old_vertex_info->equivalence_class == eq_class);
+
+ // mark this vertex for removal
+ toRemove->insert(old_vertex_info->v);
+
+ // for each predecessor, add edge to new vertex and update info
+ for (VertexInfo *pred_info : old_vertex_info->pred) {
+ // update info for new vertex
+ new_vertex_info->pred.insert(pred_info);
+ if (new_vertex_info_eod) {
+ new_vertex_info_eod->pred.insert(pred_info);
+ }
+
+ // update info for predecessor
+ pred_info->succ.erase(old_vertex_info);
+
+ // if edge doesn't exist, create it
NFAEdge e = add_edge_if_not_present(pred_info->v, new_v, g);
-
+
// put edge tops, if applicable
if (!edgetops.empty()) {
assert(g[e].tops.empty() || g[e].tops == edgetops);
g[e].tops = edgetops;
- }
-
- pred_info->succ.insert(new_vertex_info);
-
- if (new_v_eod) {
- NFAEdge ee = add_edge_if_not_present(pred_info->v, new_v_eod,
+ }
+
+ pred_info->succ.insert(new_vertex_info);
+
+ if (new_v_eod) {
+ NFAEdge ee = add_edge_if_not_present(pred_info->v, new_v_eod,
g);
-
+
// put edge tops, if applicable
if (!edgetops.empty()) {
assert(g[e].tops.empty() || g[e].tops == edgetops);
g[ee].tops = edgetops;
- }
-
- pred_info->succ.insert(new_vertex_info_eod);
- }
- }
-
- // for each successor, add edge from new vertex and update info
- for (VertexInfo *succ_info : old_vertex_info->succ) {
- NFAVertex succ_v = succ_info->v;
-
- // update info for successor
- succ_info->pred.erase(old_vertex_info);
-
- if (new_v_eod && succ_v == g.acceptEod) {
- // update info for new vertex
- new_vertex_info_eod->succ.insert(succ_info);
- insert(&g[new_v_eod].reports,
- g[old_vertex_info->v].reports);
-
- add_edge_if_not_present(new_v_eod, succ_v, g);
- succ_info->pred.insert(new_vertex_info_eod);
- } else {
- // update info for new vertex
- new_vertex_info->succ.insert(succ_info);
-
- // if edge doesn't exist, create it
- add_edge_if_not_present(new_v, succ_v, g);
- succ_info->pred.insert(new_vertex_info);
-
- if (is_any_accept(succ_v, g)) {
- insert(&g[new_v].reports,
- g[old_vertex_info->v].reports);
- }
- }
- }
- }
-
- // update classmap
- new_vertex_info->equivalence_class = eq_class;
- cur_class_vertices.insert(new_vertex_info);
-}
-
-// walk through vertices of an equivalence class and replace them with a single
-// vertex (or, in rare cases for left equiv, a pair if we cannot satisfy the
-// report behaviour with a single vertex).
-static
+ }
+
+ pred_info->succ.insert(new_vertex_info_eod);
+ }
+ }
+
+ // for each successor, add edge from new vertex and update info
+ for (VertexInfo *succ_info : old_vertex_info->succ) {
+ NFAVertex succ_v = succ_info->v;
+
+ // update info for successor
+ succ_info->pred.erase(old_vertex_info);
+
+ if (new_v_eod && succ_v == g.acceptEod) {
+ // update info for new vertex
+ new_vertex_info_eod->succ.insert(succ_info);
+ insert(&g[new_v_eod].reports,
+ g[old_vertex_info->v].reports);
+
+ add_edge_if_not_present(new_v_eod, succ_v, g);
+ succ_info->pred.insert(new_vertex_info_eod);
+ } else {
+ // update info for new vertex
+ new_vertex_info->succ.insert(succ_info);
+
+ // if edge doesn't exist, create it
+ add_edge_if_not_present(new_v, succ_v, g);
+ succ_info->pred.insert(new_vertex_info);
+
+ if (is_any_accept(succ_v, g)) {
+ insert(&g[new_v].reports,
+ g[old_vertex_info->v].reports);
+ }
+ }
+ }
+ }
+
+ // update classmap
+ new_vertex_info->equivalence_class = eq_class;
+ cur_class_vertices.insert(new_vertex_info);
+}
+
+// walk through vertices of an equivalence class and replace them with a single
+// vertex (or, in rare cases for left equiv, a pair if we cannot satisfy the
+// report behaviour with a single vertex).
+static
bool mergeEquivalentClasses(vector<VertexInfoSet> &classes,
vector<unique_ptr<VertexInfo>> &infos,
- NGHolder &g) {
- bool merged = false;
- set<NFAVertex> toRemove;
-
- // go through all classes and merge classes with more than one vertex
+ NGHolder &g) {
+ bool merged = false;
+ set<NFAVertex> toRemove;
+
+ // go through all classes and merge classes with more than one vertex
for (unsigned eq_class = 0; eq_class < classes.size(); eq_class++) {
- // get all vertices in current equivalence class
+ // get all vertices in current equivalence class
VertexInfoSet &cur_class_vertices = classes[eq_class];
-
- // we don't care for single-vertex classes
- if (cur_class_vertices.size() > 1) {
- merged = true;
- mergeClass(infos, g, eq_class, cur_class_vertices, &toRemove);
- }
- }
-
- // remove all dead vertices
- DEBUG_PRINTF("removing %zd vertices.\n", toRemove.size());
- remove_vertices(toRemove, g);
-
- return merged;
-}
-
+
+ // we don't care for single-vertex classes
+ if (cur_class_vertices.size() > 1) {
+ merged = true;
+ mergeClass(infos, g, eq_class, cur_class_vertices, &toRemove);
+ }
+ }
+
+ // remove all dead vertices
+ DEBUG_PRINTF("removing %zd vertices.\n", toRemove.size());
+ remove_vertices(toRemove, g);
+
+ return merged;
+}
+
static
bool reduceGraphEquivalences(NGHolder &g, EquivalenceType eq_type) {
// create a list of equivalence classes to check
@@ -657,26 +657,26 @@ bool reduceGraphEquivalences(NGHolder &g, EquivalenceType eq_type) {
return mergeEquivalentClasses(classes, infos, g);
}
-bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) {
- if (!cc.grey.equivalenceEnable) {
- DEBUG_PRINTF("equivalence processing disabled in grey box\n");
- return false;
- }
+bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) {
+ if (!cc.grey.equivalenceEnable) {
+ DEBUG_PRINTF("equivalence processing disabled in grey box\n");
+ return false;
+ }
renumber_vertices(g);
-
- // Cheap check: if all the non-special vertices have in-degree one and
- // out-degree one, there's no redundancy in this here graph and we can
- // vamoose.
- if (isIrreducible(g)) {
- DEBUG_PRINTF("skipping equivalence processing, graph is irreducible\n");
- return false;
- }
-
- // take note if we have merged any vertices
- bool merge = false;
+
+ // Cheap check: if all the non-special vertices have in-degree one and
+ // out-degree one, there's no redundancy in this here graph and we can
+ // vamoose.
+ if (isIrreducible(g)) {
+ DEBUG_PRINTF("skipping equivalence processing, graph is irreducible\n");
+ return false;
+ }
+
+ // take note if we have merged any vertices
+ bool merge = false;
merge |= reduceGraphEquivalences(g, LEFT_EQUIVALENCE);
merge |= reduceGraphEquivalences(g, RIGHT_EQUIVALENCE);
- return merge;
-}
-
-} // namespace ue2
+ return merge;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.h b/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.h
index d716841e94..ef8f92e7e3 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_equivalence.h
@@ -1,47 +1,47 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Equivalence class graph reduction pass.
- */
-
-#ifndef NG_EQUIVALENCE_H_
-#define NG_EQUIVALENCE_H_
-
-namespace ue2 {
-
-class NGHolder;
-struct CompileContext;
-
-/** Attempt to make the NFA graph \p g smaller by performing a number of local
- * transformations. */
-bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc);
-
-} // namespace ue2
-
-#endif /* NG_EQUIVALENCE_H_ */
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Equivalence class graph reduction pass.
+ */
+
+#ifndef NG_EQUIVALENCE_H_
+#define NG_EQUIVALENCE_H_
+
+namespace ue2 {
+
+class NGHolder;
+struct CompileContext;
+
+/** Attempt to make the NFA graph \p g smaller by performing a number of local
+ * transformations. */
+bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc);
+
+} // namespace ue2
+
+#endif /* NG_EQUIVALENCE_H_ */
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_execute.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_execute.cpp
index 9ef0f01ce7..9d90489471 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_execute.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_execute.cpp
@@ -1,328 +1,328 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Execute an NFA over a given input, returning the set of states that
- * are active afterwards.
- *
- * Note: although our external interfaces for execute_graph() use std::set, we
- * use a dynamic bitset containing the vertex indices internally for
- * performance.
- */
-#include "ng_execute.h"
-
-#include "ng_holder.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph_range.h"
-#include "util/ue2string.h"
-
-#include <sstream>
-#include <string>
-
-#include <boost/dynamic_bitset.hpp>
-#include <boost/graph/depth_first_search.hpp>
-#include <boost/graph/reverse_graph.hpp>
-
-using namespace std;
-using boost::dynamic_bitset;
-
-namespace ue2 {
-
-struct StateInfo {
- StateInfo(NFAVertex v, const CharReach &cr) : vertex(v), reach(cr) {}
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Execute an NFA over a given input, returning the set of states that
+ * are active afterwards.
+ *
+ * Note: although our external interfaces for execute_graph() use std::set, we
+ * use a dynamic bitset containing the vertex indices internally for
+ * performance.
+ */
+#include "ng_execute.h"
+
+#include "ng_holder.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
+#include "util/ue2string.h"
+
+#include <sstream>
+#include <string>
+
+#include <boost/dynamic_bitset.hpp>
+#include <boost/graph/depth_first_search.hpp>
+#include <boost/graph/reverse_graph.hpp>
+
+using namespace std;
+using boost::dynamic_bitset;
+
+namespace ue2 {
+
+struct StateInfo {
+ StateInfo(NFAVertex v, const CharReach &cr) : vertex(v), reach(cr) {}
StateInfo() : vertex(NGHolder::null_vertex()) {}
- NFAVertex vertex;
- CharReach reach;
-};
-
-#ifdef DEBUG
-static
-std::string dumpStates(const dynamic_bitset<> &s) {
- std::ostringstream oss;
- for (size_t i = s.find_first(); i != s.npos; i = s.find_next(i)) {
- oss << i << " ";
- }
- return oss.str();
-}
-#endif
-
-static
-void step(const NGHolder &g, const vector<StateInfo> &info,
- const dynamic_bitset<> &in, dynamic_bitset<> *out) {
- out->reset();
- for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
- NFAVertex u = info[i].vertex;
- for (auto v : adjacent_vertices_range(u, g)) {
- out->set(g[v].index);
- }
- }
-}
-
-static
-void filter_by_reach(const vector<StateInfo> &info, dynamic_bitset<> *states,
- const CharReach &cr) {
- for (size_t i = states->find_first(); i != states->npos;
- i = states->find_next(i)) {
- if ((info[i].reach & cr).none()) {
- states->reset(i);
- }
- }
-}
-
-template<typename inputT>
-static
-void execute_graph_i(const NGHolder &g, const vector<StateInfo> &info,
- const inputT &input, dynamic_bitset<> *states,
- bool kill_sds) {
- dynamic_bitset<> &curr = *states;
- dynamic_bitset<> next(curr.size());
- DEBUG_PRINTF("%zu states in\n", states->count());
-
- for (const auto &e : input) {
- DEBUG_PRINTF("processing %s\n", describeClass(e).c_str());
- step(g, info, curr, &next);
- if (kill_sds) {
- next.reset(NODE_START_DOTSTAR);
- }
- filter_by_reach(info, &next, e);
- next.swap(curr);
-
- if (curr.empty()) {
- DEBUG_PRINTF("went dead\n");
- break;
- }
- }
-
- DEBUG_PRINTF("%zu states out\n", states->size());
-}
-
-static
-dynamic_bitset<> makeStateBitset(const NGHolder &g,
- const flat_set<NFAVertex> &in) {
- dynamic_bitset<> work_states(num_vertices(g));
- for (const auto &v : in) {
- u32 idx = g[v].index;
- work_states.set(idx);
- }
- return work_states;
-}
-
-static
-flat_set<NFAVertex> getVertices(const dynamic_bitset<> &in,
- const vector<StateInfo> &info) {
- flat_set<NFAVertex> out;
- for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
- out.insert(info[i].vertex);
- }
- return out;
-}
-
-static
-vector<StateInfo> makeInfoTable(const NGHolder &g) {
- vector<StateInfo> info(num_vertices(g));
- for (auto v : vertices_range(g)) {
- u32 idx = g[v].index;
- const CharReach &cr = g[v].char_reach;
- assert(idx < info.size());
- info[idx] = StateInfo(v, cr);
- }
- return info;
-}
-
-flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input,
- const flat_set<NFAVertex> &initial_states,
- bool kill_sds) {
- assert(hasCorrectlyNumberedVertices(g));
-
- auto info = makeInfoTable(g);
- auto work_states = makeStateBitset(g, initial_states);
-
- execute_graph_i(g, info, input, &work_states, kill_sds);
-
- return getVertices(work_states, info);
-}
-
-flat_set<NFAVertex> execute_graph(const NGHolder &g,
- const vector<CharReach> &input,
- const flat_set<NFAVertex> &initial_states) {
- assert(hasCorrectlyNumberedVertices(g));
-
- auto info = makeInfoTable(g);
- auto work_states = makeStateBitset(g, initial_states);
-
- execute_graph_i(g, info, input, &work_states, false);
-
- return getVertices(work_states, info);
-}
-
-namespace {
-class eg_visitor : public boost::default_dfs_visitor {
-public:
- eg_visitor(const NGHolder &running_g_in, const vector<StateInfo> &info_in,
- const NGHolder &input_g_in,
- map<NFAVertex, dynamic_bitset<> > &states_in)
- : vertex_count(num_vertices(running_g_in)), running_g(running_g_in),
- info(info_in), input_g(input_g_in), states(states_in),
- succs(vertex_count) {}
-
+ NFAVertex vertex;
+ CharReach reach;
+};
+
+#ifdef DEBUG
+static
+std::string dumpStates(const dynamic_bitset<> &s) {
+ std::ostringstream oss;
+ for (size_t i = s.find_first(); i != s.npos; i = s.find_next(i)) {
+ oss << i << " ";
+ }
+ return oss.str();
+}
+#endif
+
+static
+void step(const NGHolder &g, const vector<StateInfo> &info,
+ const dynamic_bitset<> &in, dynamic_bitset<> *out) {
+ out->reset();
+ for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
+ NFAVertex u = info[i].vertex;
+ for (auto v : adjacent_vertices_range(u, g)) {
+ out->set(g[v].index);
+ }
+ }
+}
+
+static
+void filter_by_reach(const vector<StateInfo> &info, dynamic_bitset<> *states,
+ const CharReach &cr) {
+ for (size_t i = states->find_first(); i != states->npos;
+ i = states->find_next(i)) {
+ if ((info[i].reach & cr).none()) {
+ states->reset(i);
+ }
+ }
+}
+
+template<typename inputT>
+static
+void execute_graph_i(const NGHolder &g, const vector<StateInfo> &info,
+ const inputT &input, dynamic_bitset<> *states,
+ bool kill_sds) {
+ dynamic_bitset<> &curr = *states;
+ dynamic_bitset<> next(curr.size());
+ DEBUG_PRINTF("%zu states in\n", states->count());
+
+ for (const auto &e : input) {
+ DEBUG_PRINTF("processing %s\n", describeClass(e).c_str());
+ step(g, info, curr, &next);
+ if (kill_sds) {
+ next.reset(NODE_START_DOTSTAR);
+ }
+ filter_by_reach(info, &next, e);
+ next.swap(curr);
+
+ if (curr.empty()) {
+ DEBUG_PRINTF("went dead\n");
+ break;
+ }
+ }
+
+ DEBUG_PRINTF("%zu states out\n", states->size());
+}
+
+static
+dynamic_bitset<> makeStateBitset(const NGHolder &g,
+ const flat_set<NFAVertex> &in) {
+ dynamic_bitset<> work_states(num_vertices(g));
+ for (const auto &v : in) {
+ u32 idx = g[v].index;
+ work_states.set(idx);
+ }
+ return work_states;
+}
+
+static
+flat_set<NFAVertex> getVertices(const dynamic_bitset<> &in,
+ const vector<StateInfo> &info) {
+ flat_set<NFAVertex> out;
+ for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
+ out.insert(info[i].vertex);
+ }
+ return out;
+}
+
+static
+vector<StateInfo> makeInfoTable(const NGHolder &g) {
+ vector<StateInfo> info(num_vertices(g));
+ for (auto v : vertices_range(g)) {
+ u32 idx = g[v].index;
+ const CharReach &cr = g[v].char_reach;
+ assert(idx < info.size());
+ info[idx] = StateInfo(v, cr);
+ }
+ return info;
+}
+
+flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input,
+ const flat_set<NFAVertex> &initial_states,
+ bool kill_sds) {
+ assert(hasCorrectlyNumberedVertices(g));
+
+ auto info = makeInfoTable(g);
+ auto work_states = makeStateBitset(g, initial_states);
+
+ execute_graph_i(g, info, input, &work_states, kill_sds);
+
+ return getVertices(work_states, info);
+}
+
+flat_set<NFAVertex> execute_graph(const NGHolder &g,
+ const vector<CharReach> &input,
+ const flat_set<NFAVertex> &initial_states) {
+ assert(hasCorrectlyNumberedVertices(g));
+
+ auto info = makeInfoTable(g);
+ auto work_states = makeStateBitset(g, initial_states);
+
+ execute_graph_i(g, info, input, &work_states, false);
+
+ return getVertices(work_states, info);
+}
+
+namespace {
+class eg_visitor : public boost::default_dfs_visitor {
+public:
+ eg_visitor(const NGHolder &running_g_in, const vector<StateInfo> &info_in,
+ const NGHolder &input_g_in,
+ map<NFAVertex, dynamic_bitset<> > &states_in)
+ : vertex_count(num_vertices(running_g_in)), running_g(running_g_in),
+ info(info_in), input_g(input_g_in), states(states_in),
+ succs(vertex_count) {}
+
void finish_vertex(NFAVertex input_v,
const boost::reverse_graph<NGHolder, const NGHolder &> &) {
- if (input_v == input_g.accept) {
- return;
- }
- assert(input_v != input_g.acceptEod);
-
+ if (input_v == input_g.accept) {
+ return;
+ }
+ assert(input_v != input_g.acceptEod);
+
DEBUG_PRINTF("finished p%zu\n", input_g[input_v].index);
-
- /* finish vertex is called on vertex --> implies that all its parents
- * (in the forward graph) are also finished. Our parents will have
- * pushed all of their successors for us into our stateset. */
- states[input_v].resize(vertex_count);
- dynamic_bitset<> our_states = states[input_v];
- states[input_v].reset();
-
- filter_by_reach(info, &our_states,
- input_g[input_v].char_reach);
-
- if (input_v != input_g.startDs &&
- edge(input_v, input_v, input_g).second) {
- bool changed;
- do {
- DEBUG_PRINTF("actually not finished -> have self loop\n");
- succs.reset();
- step(running_g, info, our_states, &succs);
- filter_by_reach(info, &succs,
- input_g[input_v].char_reach);
- dynamic_bitset<> our_states2 = our_states | succs;
- changed = our_states2 != our_states;
- our_states.swap(our_states2);
- } while (changed);
- }
-
- DEBUG_PRINTF(" active rstates: %s\n", dumpStates(our_states).c_str());
-
- succs.reset();
- step(running_g, info, our_states, &succs);
-
- /* we need to push into all our (forward) children their successors
- * from us. */
- for (auto v : adjacent_vertices_range(input_v, input_g)) {
+
+ /* finish vertex is called on vertex --> implies that all its parents
+ * (in the forward graph) are also finished. Our parents will have
+ * pushed all of their successors for us into our stateset. */
+ states[input_v].resize(vertex_count);
+ dynamic_bitset<> our_states = states[input_v];
+ states[input_v].reset();
+
+ filter_by_reach(info, &our_states,
+ input_g[input_v].char_reach);
+
+ if (input_v != input_g.startDs &&
+ edge(input_v, input_v, input_g).second) {
+ bool changed;
+ do {
+ DEBUG_PRINTF("actually not finished -> have self loop\n");
+ succs.reset();
+ step(running_g, info, our_states, &succs);
+ filter_by_reach(info, &succs,
+ input_g[input_v].char_reach);
+ dynamic_bitset<> our_states2 = our_states | succs;
+ changed = our_states2 != our_states;
+ our_states.swap(our_states2);
+ } while (changed);
+ }
+
+ DEBUG_PRINTF(" active rstates: %s\n", dumpStates(our_states).c_str());
+
+ succs.reset();
+ step(running_g, info, our_states, &succs);
+
+ /* we need to push into all our (forward) children their successors
+ * from us. */
+ for (auto v : adjacent_vertices_range(input_v, input_g)) {
DEBUG_PRINTF("pushing our states to pstate %zu\n",
- input_g[v].index);
- if (v == input_g.startDs) {
- /* no need for intra start edges */
- continue;
- }
-
- states[v].resize(vertex_count); // May not yet exist
-
- if (v != input_g.accept) {
- states[v] |= succs;
- } else {
- /* accept is a magical pseudo state which does not consume
- * characters and we are using to collect the output states. We
- * must fill it with our states rather than our succs. */
- DEBUG_PRINTF("prev outputted rstates: %s\n",
- dumpStates(states[v]).c_str());
- DEBUG_PRINTF("outputted rstates: %s\n",
- dumpStates(our_states).c_str());
-
- states[v] |= our_states;
-
- DEBUG_PRINTF("new outputted rstates: %s\n",
- dumpStates(states[v]).c_str());
- }
- }
-
- /* note: the states at this vertex are no longer required */
- }
-
-private:
- const size_t vertex_count;
- const NGHolder &running_g;
- const vector<StateInfo> &info;
- const NGHolder &input_g;
- map<NFAVertex, dynamic_bitset<> > &states; /* vertex in input_g -> set of
- states in running_g */
- dynamic_bitset<> succs; // temp use internally
-};
-} // namespace
-
-flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
- const NGHolder &input_dag,
- const flat_set<NFAVertex> &input_start_states,
- const flat_set<NFAVertex> &initial_states) {
- DEBUG_PRINTF("g has %zu vertices, input_dag has %zu vertices\n",
- num_vertices(running_g), num_vertices(input_dag));
- assert(hasCorrectlyNumberedVertices(running_g));
- assert(in_degree(input_dag.acceptEod, input_dag) == 1);
-
- map<NFAVertex, boost::default_color_type> colours;
- /* could just a topo order, but really it is time to pull a slightly bigger
- * gun: DFS */
+ input_g[v].index);
+ if (v == input_g.startDs) {
+ /* no need for intra start edges */
+ continue;
+ }
+
+ states[v].resize(vertex_count); // May not yet exist
+
+ if (v != input_g.accept) {
+ states[v] |= succs;
+ } else {
+ /* accept is a magical pseudo state which does not consume
+ * characters and we are using to collect the output states. We
+ * must fill it with our states rather than our succs. */
+ DEBUG_PRINTF("prev outputted rstates: %s\n",
+ dumpStates(states[v]).c_str());
+ DEBUG_PRINTF("outputted rstates: %s\n",
+ dumpStates(our_states).c_str());
+
+ states[v] |= our_states;
+
+ DEBUG_PRINTF("new outputted rstates: %s\n",
+ dumpStates(states[v]).c_str());
+ }
+ }
+
+ /* note: the states at this vertex are no longer required */
+ }
+
+private:
+ const size_t vertex_count;
+ const NGHolder &running_g;
+ const vector<StateInfo> &info;
+ const NGHolder &input_g;
+ map<NFAVertex, dynamic_bitset<> > &states; /* vertex in input_g -> set of
+ states in running_g */
+ dynamic_bitset<> succs; // temp use internally
+};
+} // namespace
+
+flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
+ const NGHolder &input_dag,
+ const flat_set<NFAVertex> &input_start_states,
+ const flat_set<NFAVertex> &initial_states) {
+ DEBUG_PRINTF("g has %zu vertices, input_dag has %zu vertices\n",
+ num_vertices(running_g), num_vertices(input_dag));
+ assert(hasCorrectlyNumberedVertices(running_g));
+ assert(in_degree(input_dag.acceptEod, input_dag) == 1);
+
+ map<NFAVertex, boost::default_color_type> colours;
+ /* could just a topo order, but really it is time to pull a slightly bigger
+ * gun: DFS */
boost::reverse_graph<NGHolder, const NGHolder &> revg(input_dag);
- map<NFAVertex, dynamic_bitset<> > dfs_states;
-
- auto info = makeInfoTable(running_g);
- auto input_fs = makeStateBitset(running_g, initial_states);
-
- for (auto v : input_start_states) {
- dfs_states[v] = input_fs;
- }
-
- depth_first_visit(revg, input_dag.accept,
- eg_visitor(running_g, info, input_dag, dfs_states),
- make_assoc_property_map(colours));
-
- auto states = getVertices(dfs_states[input_dag.accept], info);
-
-#ifdef DEBUG
- DEBUG_PRINTF(" output rstates:");
- for (const auto &v : states) {
+ map<NFAVertex, dynamic_bitset<> > dfs_states;
+
+ auto info = makeInfoTable(running_g);
+ auto input_fs = makeStateBitset(running_g, initial_states);
+
+ for (auto v : input_start_states) {
+ dfs_states[v] = input_fs;
+ }
+
+ depth_first_visit(revg, input_dag.accept,
+ eg_visitor(running_g, info, input_dag, dfs_states),
+ make_assoc_property_map(colours));
+
+ auto states = getVertices(dfs_states[input_dag.accept], info);
+
+#ifdef DEBUG
+ DEBUG_PRINTF(" output rstates:");
+ for (const auto &v : states) {
printf(" %zu", running_g[v].index);
- }
- printf("\n");
-#endif
-
- return states;
-}
-
-flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
- const NGHolder &input_dag,
- const flat_set<NFAVertex> &initial_states) {
- auto input_start_states = {input_dag.start, input_dag.startDs};
- return execute_graph(running_g, input_dag, input_start_states,
- initial_states);
-}
-
+ }
+ printf("\n");
+#endif
+
+ return states;
+}
+
+flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
+ const NGHolder &input_dag,
+ const flat_set<NFAVertex> &initial_states) {
+ auto input_start_states = {input_dag.start, input_dag.startDs};
+ return execute_graph(running_g, input_dag, input_start_states,
+ initial_states);
+}
+
static
bool can_die_early(const NGHolder &g, const vector<StateInfo> &info,
const dynamic_bitset<> &s,
@@ -368,4 +368,4 @@ bool can_die_early(const NGHolder &g, u32 age_limit) {
age_limit);
}
-} // namespace ue2
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_execute.h b/contrib/libs/hyperscan/src/nfagraph/ng_execute.h
index 17625b2aa3..32f5520d33 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_execute.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_execute.h
@@ -1,72 +1,72 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Execute an NFA over a given input, returning the set of states that
- * are active afterwards.
- */
-
-#ifndef NG_EXECUTE_H
-#define NG_EXECUTE_H
-
-#include "ng_holder.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Execute an NFA over a given input, returning the set of states that
+ * are active afterwards.
+ */
+
+#ifndef NG_EXECUTE_H
+#define NG_EXECUTE_H
+
+#include "ng_holder.h"
#include "util/flat_containers.h"
-
-#include <vector>
-
-namespace ue2 {
-
-class CharReach;
-struct ue2_literal;
-
-flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input,
- const flat_set<NFAVertex> &initial,
- bool kill_sds = false);
-
-flat_set<NFAVertex> execute_graph(const NGHolder &g,
- const std::vector<CharReach> &input,
- const flat_set<NFAVertex> &initial);
-
-/** on exit, states contains any state which may still be enabled after
- * receiving an input which corresponds to some path through the input_dag from
- * start or startDs to accept. input_dag MUST be acyclic aside from self-loops.
- */
-flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag,
- const flat_set<NFAVertex> &initial);
-
-/* as above, but able to specify the source states for the input graph */
-flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag,
- const flat_set<NFAVertex> &input_start_states,
- const flat_set<NFAVertex> &initial);
-
+
+#include <vector>
+
+namespace ue2 {
+
+class CharReach;
+struct ue2_literal;
+
+flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input,
+ const flat_set<NFAVertex> &initial,
+ bool kill_sds = false);
+
+flat_set<NFAVertex> execute_graph(const NGHolder &g,
+ const std::vector<CharReach> &input,
+ const flat_set<NFAVertex> &initial);
+
+/** on exit, states contains any state which may still be enabled after
+ * receiving an input which corresponds to some path through the input_dag from
+ * start or startDs to accept. input_dag MUST be acyclic aside from self-loops.
+ */
+flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag,
+ const flat_set<NFAVertex> &initial);
+
+/* as above, but able to specify the source states for the input graph */
+flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag,
+ const flat_set<NFAVertex> &input_start_states,
+ const flat_set<NFAVertex> &initial);
+
/* returns true if it is possible for the nfa to die within age_limit bytes */
bool can_die_early(const NGHolder &g, u32 age_limit);
-} // namespace ue2
-
-#endif
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.cpp
index 378c22bf82..f8abbd04a2 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.cpp
@@ -1,102 +1,102 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
* \brief Code for discovering properties of an NFA graph used by
* hs_expression_info().
- */
-#include "ng_expr_info.h"
-
-#include "hs_internal.h"
-#include "ng.h"
-#include "ng_asserts.h"
-#include "ng_depth.h"
-#include "ng_edge_redundancy.h"
+ */
+#include "ng_expr_info.h"
+
+#include "hs_internal.h"
+#include "ng.h"
+#include "ng_asserts.h"
+#include "ng_depth.h"
+#include "ng_edge_redundancy.h"
#include "ng_extparam.h"
#include "ng_fuzzy.h"
-#include "ng_holder.h"
+#include "ng_holder.h"
#include "ng_prune.h"
-#include "ng_reports.h"
-#include "ng_util.h"
-#include "ue2common.h"
+#include "ng_reports.h"
+#include "ng_util.h"
+#include "ue2common.h"
#include "compiler/expression_info.h"
-#include "parser/position.h" // for POS flags
-#include "util/boundary_reports.h"
-#include "util/compile_context.h"
-#include "util/depth.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-#include "util/report_manager.h"
-
-#include <limits.h>
-#include <set>
-
-using namespace std;
-
-namespace ue2 {
-
-/* get rid of leading \b and multiline ^ vertices */
-static
+#include "parser/position.h" // for POS flags
+#include "util/boundary_reports.h"
+#include "util/compile_context.h"
+#include "util/depth.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+#include "util/report_manager.h"
+
+#include <limits.h>
+#include <set>
+
+using namespace std;
+
+namespace ue2 {
+
+/* get rid of leading \b and multiline ^ vertices */
+static
void removeLeadingVirtualVerticesFromRoot(NGHolder &g, NFAVertex root) {
- vector<NFAVertex> victims;
-
+ vector<NFAVertex> victims;
+
for (auto v : adjacent_vertices_range(root, g)) {
if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
- DEBUG_PRINTF("(?m)^ vertex or leading \\[bB] vertex\n");
- victims.push_back(v);
- }
- }
-
- for (auto u : victims) {
+ DEBUG_PRINTF("(?m)^ vertex or leading \\[bB] vertex\n");
+ victims.push_back(v);
+ }
+ }
+
+ for (auto u : victims) {
for (auto v : adjacent_vertices_range(u, g)) {
add_edge_if_not_present(root, v, g);
- }
- }
-
+ }
+ }
+
remove_vertices(victims, g);
-}
-
-static
+}
+
+static
void checkVertex(const ReportManager &rm, const NGHolder &g, NFAVertex v,
- const vector<DepthMinMax> &depths, DepthMinMax &info) {
+ const vector<DepthMinMax> &depths, DepthMinMax &info) {
if (is_any_accept(v, g)) {
- return;
- }
+ return;
+ }
if (is_any_start(v, g)) {
info.min = depth(0);
- info.max = max(info.max, depth(0));
- return;
- }
-
+ info.max = max(info.max, depth(0));
+ return;
+ }
+
u32 idx = g[v].index;
- assert(idx < depths.size());
- const DepthMinMax &d = depths.at(idx);
-
+ assert(idx < depths.size());
+ const DepthMinMax &d = depths.at(idx);
+
for (ReportID report_id : g[v].reports) {
const Report &report = rm.getReport(report_id);
assert(report.type == EXTERNAL_CALLBACK);
@@ -126,24 +126,24 @@ void checkVertex(const ReportManager &rm, const NGHolder &g, NFAVertex v,
rd.str().c_str());
info = unionDepthMinMax(info, rd);
- }
-}
-
-static
+ }
+}
+
+static
bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g) {
for (const auto &report_id : all_reports(g)) {
- if (rm.getReport(report_id).offsetAdjust) {
- return true;
- }
- }
- return false;
-}
-
+ if (rm.getReport(report_id).offsetAdjust) {
+ return true;
+ }
+ }
+ return false;
+}
+
void fillExpressionInfo(ReportManager &rm, const CompileContext &cc,
NGHolder &g, ExpressionInfo &expr,
hs_expr_info *info) {
- assert(info);
-
+ assert(info);
+
// remove reports that aren't on vertices connected to accept.
clearReports(g);
@@ -154,16 +154,16 @@ void fillExpressionInfo(ReportManager &rm, const CompileContext &cc,
* match those in NG::addGraph().
*/
- /* ensure utf8 starts at cp boundary */
+ /* ensure utf8 starts at cp boundary */
ensureCodePointStart(rm, g, expr);
-
+
if (can_never_match(g)) {
throw CompileError(expr.index, "Pattern can never match.");
}
-
+
bool hamming = expr.hamm_distance > 0;
u32 e_dist = hamming ? expr.hamm_distance : expr.edit_distance;
-
+
// validate graph's suitability for fuzzing
validate_fuzzy_compile(g, e_dist, hamming, expr.utf8, cc.grey);
@@ -189,30 +189,30 @@ void fillExpressionInfo(ReportManager &rm, const CompileContext &cc,
auto depths = calcDepthsFrom(g, g.start);
- DepthMinMax d;
-
+ DepthMinMax d;
+
for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
checkVertex(rm, g, u, depths, d);
- }
-
+ }
+
for (auto u : inv_adjacent_vertices_range(g.acceptEod, g)) {
checkVertex(rm, g, u, depths, d);
- }
-
- if (d.max.is_finite()) {
- info->max_width = d.max;
- } else {
- info->max_width = UINT_MAX;
- }
- if (d.min.is_finite()) {
- info->min_width = d.min;
- } else {
- info->min_width = UINT_MAX;
- }
-
+ }
+
+ if (d.max.is_finite()) {
+ info->max_width = d.max;
+ } else {
+ info->max_width = UINT_MAX;
+ }
+ if (d.min.is_finite()) {
+ info->min_width = d.min;
+ } else {
+ info->min_width = UINT_MAX;
+ }
+
info->unordered_matches = hasOffsetAdjust(rm, g);
info->matches_at_eod = can_match_at_eod(g);
info->matches_only_at_eod = can_only_match_at_eod(g);
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.h b/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.h
index 9500338f55..f9bd680939 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_expr_info.h
@@ -1,51 +1,51 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
* \brief Code for discovering properties of an expression used by
- * hs_expression_info.
- */
-
-#ifndef NG_EXPR_INFO_H
-#define NG_EXPR_INFO_H
-
-struct hs_expr_info;
-
-namespace ue2 {
-
+ * hs_expression_info.
+ */
+
+#ifndef NG_EXPR_INFO_H
+#define NG_EXPR_INFO_H
+
+struct hs_expr_info;
+
+namespace ue2 {
+
class ExpressionInfo;
class NGHolder;
-class ReportManager;
+class ReportManager;
struct CompileContext;
-
+
void fillExpressionInfo(ReportManager &rm, const CompileContext &cc,
NGHolder &g, ExpressionInfo &expr, hs_expr_info *info);
-
-} // namespace ue2
-
-#endif // NG_EXPR_INFO_H
+
+} // namespace ue2
+
+#endif // NG_EXPR_INFO_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp
index cee47ffe70..6eb23113f3 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_extparam.cpp
@@ -1,74 +1,74 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Propagate extended parameters to vertex reports and reduce graph if
- * possible.
- *
- * This code handles the propagation of the extension parameters specified by
+ * \brief Propagate extended parameters to vertex reports and reduce graph if
+ * possible.
+ *
+ * This code handles the propagation of the extension parameters specified by
* the user with the \ref hs_expr_ext structure into the reports on the graph's
- * vertices.
- *
- * There are also some analyses that prune edges that cannot contribute to a
- * match given these constraints, or transform the graph in order to make a
- * constraint implicit.
- */
+ * vertices.
+ *
+ * There are also some analyses that prune edges that cannot contribute to a
+ * match given these constraints, or transform the graph in order to make a
+ * constraint implicit.
+ */
#include "ng_extparam.h"
-#include "ng.h"
-#include "ng_depth.h"
-#include "ng_dump.h"
-#include "ng_prune.h"
-#include "ng_reports.h"
-#include "ng_som_util.h"
-#include "ng_width.h"
-#include "ng_util.h"
-#include "ue2common.h"
+#include "ng.h"
+#include "ng_depth.h"
+#include "ng_dump.h"
+#include "ng_prune.h"
+#include "ng_reports.h"
+#include "ng_som_util.h"
+#include "ng_width.h"
+#include "ng_util.h"
+#include "ue2common.h"
#include "compiler/compiler.h"
-#include "parser/position.h"
-#include "util/compile_context.h"
-#include "util/compile_error.h"
-#include "util/container.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-
-#include <sstream>
-#include <string>
-
-using namespace std;
-
-namespace ue2 {
-
-static const u32 MAX_MAXOFFSET_TO_ANCHOR = 2000;
-static const u32 MAX_MINLENGTH_TO_CONVERT = 2000;
-
+#include "parser/position.h"
+#include "util/compile_context.h"
+#include "util/compile_error.h"
+#include "util/container.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+
+#include <sstream>
+#include <string>
+
+using namespace std;
+
+namespace ue2 {
+
+static const u32 MAX_MAXOFFSET_TO_ANCHOR = 2000;
+static const u32 MAX_MINLENGTH_TO_CONVERT = 2000;
+
/** True if all the given reports have the same extparam bounds. */
template<typename Container>
bool hasSameBounds(const Container &reports, const ReportManager &rm) {
@@ -91,82 +91,82 @@ bool hasSameBounds(const Container &reports, const ReportManager &rm) {
* \brief Find the (min, max) offset adjustment for the reports on a given
* vertex.
*/
-static
-pair<s32,s32> getMinMaxOffsetAdjust(const ReportManager &rm,
- const NGHolder &g, NFAVertex v) {
- s32 minAdj = 0, maxAdj = 0;
- const auto &reports = g[v].reports;
- for (auto ri = reports.begin(), re = reports.end(); ri != re; ++ri) {
- const Report &ir = rm.getReport(*ri);
- if (ri == reports.begin()) {
- minAdj = ir.offsetAdjust;
- maxAdj = ir.offsetAdjust;
- } else {
- minAdj = min(minAdj, ir.offsetAdjust);
- maxAdj = max(maxAdj, ir.offsetAdjust);
- }
- }
-
- return make_pair(minAdj, maxAdj);
-}
-
-/** \brief Find the (min, max) length of any match for the given holder. */
-static
-DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) {
- DepthMinMax match_depths;
-
- vector<DepthMinMax> depths = getDistancesFromSOM(g);
-
- pair<s32, s32> adj;
-
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- u32 idx = g[v].index;
- DepthMinMax d = depths[idx]; // copy
- adj = getMinMaxOffsetAdjust(rm, g, v);
- DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
- d.str().c_str(), adj.first, adj.second);
- d.min += adj.first;
- d.max += adj.second;
- match_depths = unionDepthMinMax(match_depths, d);
- }
-
- for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
- if (v == g.accept) {
- continue;
- }
- u32 idx = g[v].index;
- DepthMinMax d = depths[idx]; // copy
- adj = getMinMaxOffsetAdjust(rm, g, v);
- DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
- d.str().c_str(), adj.first, adj.second);
- d.min += adj.first;
- d.max += adj.second;
- match_depths = unionDepthMinMax(match_depths, d);
- }
-
- DEBUG_PRINTF("match_depths=%s\n", match_depths.str().c_str());
-
- assert(match_depths.min.is_reachable());
- assert(match_depths.max.is_reachable());
- return match_depths;
-}
-
+static
+pair<s32,s32> getMinMaxOffsetAdjust(const ReportManager &rm,
+ const NGHolder &g, NFAVertex v) {
+ s32 minAdj = 0, maxAdj = 0;
+ const auto &reports = g[v].reports;
+ for (auto ri = reports.begin(), re = reports.end(); ri != re; ++ri) {
+ const Report &ir = rm.getReport(*ri);
+ if (ri == reports.begin()) {
+ minAdj = ir.offsetAdjust;
+ maxAdj = ir.offsetAdjust;
+ } else {
+ minAdj = min(minAdj, ir.offsetAdjust);
+ maxAdj = max(maxAdj, ir.offsetAdjust);
+ }
+ }
+
+ return make_pair(minAdj, maxAdj);
+}
+
+/** \brief Find the (min, max) length of any match for the given holder. */
+static
+DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) {
+ DepthMinMax match_depths;
+
+ vector<DepthMinMax> depths = getDistancesFromSOM(g);
+
+ pair<s32, s32> adj;
+
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ u32 idx = g[v].index;
+ DepthMinMax d = depths[idx]; // copy
+ adj = getMinMaxOffsetAdjust(rm, g, v);
+ DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
+ d.str().c_str(), adj.first, adj.second);
+ d.min += adj.first;
+ d.max += adj.second;
+ match_depths = unionDepthMinMax(match_depths, d);
+ }
+
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ if (v == g.accept) {
+ continue;
+ }
+ u32 idx = g[v].index;
+ DepthMinMax d = depths[idx]; // copy
+ adj = getMinMaxOffsetAdjust(rm, g, v);
+ DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
+ d.str().c_str(), adj.first, adj.second);
+ d.min += adj.first;
+ d.max += adj.second;
+ match_depths = unionDepthMinMax(match_depths, d);
+ }
+
+ DEBUG_PRINTF("match_depths=%s\n", match_depths.str().c_str());
+
+ assert(match_depths.min.is_reachable());
+ assert(match_depths.max.is_reachable());
+ return match_depths;
+}
+
template<typename Function>
void replaceReports(NGHolder &g, NFAVertex accept, flat_set<NFAVertex> &seen,
Function func) {
- for (auto v : inv_adjacent_vertices_range(accept, g)) {
- if (v == g.accept) {
+ for (auto v : inv_adjacent_vertices_range(accept, g)) {
+ if (v == g.accept) {
// Don't operate on accept: the accept->acceptEod edge is stylised.
- assert(accept == g.acceptEod);
+ assert(accept == g.acceptEod);
assert(g[v].reports.empty());
- continue;
- }
-
+ continue;
+ }
+
if (!seen.insert(v).second) {
continue; // We have already processed v.
- }
-
- auto &reports = g[v].reports;
+ }
+
+ auto &reports = g[v].reports;
if (reports.empty()) {
continue;
}
@@ -177,7 +177,7 @@ void replaceReports(NGHolder &g, NFAVertex accept, flat_set<NFAVertex> &seen,
reports = std::move(new_reports);
}
}
-
+
/**
* Generic function for replacing all the reports in the graph.
*
@@ -190,7 +190,7 @@ void replaceReports(NGHolder &g, Function func) {
replaceReports(g, g.accept, seen, func);
replaceReports(g, g.acceptEod, seen, func);
}
-
+
/** \brief Replace the graph's reports with new reports that specify bounds. */
static
void updateReportBounds(ReportManager &rm, NGHolder &g,
@@ -199,9 +199,9 @@ void updateReportBounds(ReportManager &rm, NGHolder &g,
replaceReports(g, [&](NFAVertex, ReportID id) {
Report report = rm.getReport(id); // make a copy
assert(!report.hasBounds());
-
+
// Note that we need to cope with offset adjustment here.
-
+
report.minOffset = expr.min_offset - report.offsetAdjust;
if (expr.max_offset == MAX_OFFSET) {
report.maxOffset = MAX_OFFSET;
@@ -209,30 +209,30 @@ void updateReportBounds(ReportManager &rm, NGHolder &g,
report.maxOffset = expr.max_offset - report.offsetAdjust;
}
assert(report.maxOffset >= report.minOffset);
-
+
report.minLength = expr.min_length;
if (expr.min_length && !expr.som) {
report.quashSom = true;
- }
-
+ }
+
DEBUG_PRINTF("id %u -> min_offset=%llu, max_offset=%llu, "
"min_length=%llu\n", id, report.minOffset,
report.maxOffset, report.minLength);
return rm.getInternalId(report);
});
-}
-
-static
-bool hasVirtualStarts(const NGHolder &g) {
- for (auto v : adjacent_vertices_range(g.start, g)) {
- if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
- return true;
- }
- }
- return false;
-}
-
+}
+
+static
+bool hasVirtualStarts(const NGHolder &g) {
+ for (auto v : adjacent_vertices_range(g.start, g)) {
+ if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
+ return true;
+ }
+ }
+ return false;
+}
+
/** Set the min_length param for all reports to zero. */
static
void clearMinLengthParam(NGHolder &g, ReportManager &rm) {
@@ -272,11 +272,11 @@ void clearOffsetParams(NGHolder &g, ReportManager &rm) {
* can use that knowledge to anchor it which will limit its lifespan. Note that
* we can't use this transformation if there's a min_length, as it's currently
* handled using "sly SOM".
- *
- * Note that it is possible to handle graphs that have a combination of
- * anchored and unanchored paths, but it's too tricky for the moment.
- */
-static
+ *
+ * Note that it is possible to handle graphs that have a combination of
+ * anchored and unanchored paths, but it's too tricky for the moment.
+ */
+static
bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) {
if (!isFloating(g)) {
return false;
@@ -303,99 +303,99 @@ bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) {
const depth minWidth = findMinWidth(g);
const depth maxWidth = findMaxWidth(g);
- assert(minWidth <= maxWidth);
- assert(maxWidth.is_reachable());
-
+ assert(minWidth <= maxWidth);
+ assert(maxWidth.is_reachable());
+
const auto &first_report = rm.getReport(*reports.begin());
const auto min_offset = first_report.minOffset;
const auto max_offset = first_report.maxOffset;
assert(max_offset < MAX_OFFSET);
- DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n",
+ DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n",
minWidth.str().c_str(), maxWidth.str().c_str(),
min_offset, max_offset);
-
+
if (max_offset > MAX_MAXOFFSET_TO_ANCHOR) {
- return false;
- }
-
+ return false;
+ }
+
if (max_offset < minWidth) {
- assert(0);
- return false;
- }
-
- // If the pattern has virtual starts, we probably don't want to touch it.
- if (hasVirtualStarts(g)) {
- DEBUG_PRINTF("virtual starts, bailing\n");
- return false;
- }
-
- // Similarly, bail if the pattern is vacuous. TODO: this could be done, we
- // would just need to be a little careful with reports.
- if (isVacuous(g)) {
- DEBUG_PRINTF("vacuous, bailing\n");
- return false;
- }
-
- u32 min_bound, max_bound;
- if (maxWidth.is_infinite()) {
- min_bound = 0;
+ assert(0);
+ return false;
+ }
+
+ // If the pattern has virtual starts, we probably don't want to touch it.
+ if (hasVirtualStarts(g)) {
+ DEBUG_PRINTF("virtual starts, bailing\n");
+ return false;
+ }
+
+ // Similarly, bail if the pattern is vacuous. TODO: this could be done, we
+ // would just need to be a little careful with reports.
+ if (isVacuous(g)) {
+ DEBUG_PRINTF("vacuous, bailing\n");
+ return false;
+ }
+
+ u32 min_bound, max_bound;
+ if (maxWidth.is_infinite()) {
+ min_bound = 0;
max_bound = max_offset - minWidth;
- } else {
+ } else {
min_bound = min_offset > maxWidth ? min_offset - maxWidth : 0;
max_bound = max_offset - minWidth;
- }
-
- DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound);
-
- vector<NFAVertex> initials;
- for (auto v : adjacent_vertices_range(g.startDs, g)) {
- if (v == g.startDs) {
- continue;
- }
- initials.push_back(v);
- }
- if (initials.empty()) {
- DEBUG_PRINTF("no initial vertices\n");
- return false;
- }
-
- // Wire up 'min_offset' mandatory dots from anchored start.
- NFAVertex u = g.start;
- for (u32 i = 0; i < min_bound; i++) {
- NFAVertex v = add_vertex(g);
- g[v].char_reach.setall();
- add_edge(u, v, g);
- u = v;
- }
-
- NFAVertex head = u;
-
- // Wire up optional dots for (max_offset - min_offset).
- for (u32 i = 0; i < max_bound - min_bound; i++) {
- NFAVertex v = add_vertex(g);
- g[v].char_reach.setall();
- if (head != u) {
- add_edge(head, v, g);
- }
- add_edge(u, v, g);
- u = v;
- }
-
- // Remove edges from starts and wire both head and u to our initials.
- for (auto v : initials) {
- remove_edge(g.startDs, v, g);
- remove_edge(g.start, v, g);
-
- if (head != u) {
- add_edge(head, v, g);
- }
- add_edge(u, v, g);
- }
-
+ }
+
+ DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound);
+
+ vector<NFAVertex> initials;
+ for (auto v : adjacent_vertices_range(g.startDs, g)) {
+ if (v == g.startDs) {
+ continue;
+ }
+ initials.push_back(v);
+ }
+ if (initials.empty()) {
+ DEBUG_PRINTF("no initial vertices\n");
+ return false;
+ }
+
+ // Wire up 'min_offset' mandatory dots from anchored start.
+ NFAVertex u = g.start;
+ for (u32 i = 0; i < min_bound; i++) {
+ NFAVertex v = add_vertex(g);
+ g[v].char_reach.setall();
+ add_edge(u, v, g);
+ u = v;
+ }
+
+ NFAVertex head = u;
+
+ // Wire up optional dots for (max_offset - min_offset).
+ for (u32 i = 0; i < max_bound - min_bound; i++) {
+ NFAVertex v = add_vertex(g);
+ g[v].char_reach.setall();
+ if (head != u) {
+ add_edge(head, v, g);
+ }
+ add_edge(u, v, g);
+ u = v;
+ }
+
+ // Remove edges from starts and wire both head and u to our initials.
+ for (auto v : initials) {
+ remove_edge(g.startDs, v, g);
+ remove_edge(g.start, v, g);
+
+ if (head != u) {
+ add_edge(head, v, g);
+ }
+ add_edge(u, v, g);
+ }
+
renumber_vertices(g);
renumber_edges(g);
-
+
if (minWidth == maxWidth) {
// For a fixed width pattern, we can retire the offsets as
// they are implicit in the graph now.
@@ -403,68 +403,68 @@ bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) {
}
clearReports(g);
- return true;
-}
-
-static
-NFAVertex findSingleCyclic(const NGHolder &g) {
+ return true;
+}
+
+static
+NFAVertex findSingleCyclic(const NGHolder &g) {
NFAVertex v = NGHolder::null_vertex();
- for (const auto &e : edges_range(g)) {
- if (source(e, g) == target(e, g)) {
- if (source(e, g) == g.startDs) {
- continue;
- }
+ for (const auto &e : edges_range(g)) {
+ if (source(e, g) == target(e, g)) {
+ if (source(e, g) == g.startDs) {
+ continue;
+ }
if (v != NGHolder::null_vertex()) {
- // More than one cyclic vertex.
+ // More than one cyclic vertex.
return NGHolder::null_vertex();
- }
- v = source(e, g);
- }
- }
-
+ }
+ v = source(e, g);
+ }
+ }
+
if (v != NGHolder::null_vertex()) {
DEBUG_PRINTF("cyclic is %zu\n", g[v].index);
- assert(!is_special(v, g));
- }
- return v;
-}
-
-static
+ assert(!is_special(v, g));
+ }
+ return v;
+}
+
+static
bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g,
- int *adjust) {
- const auto &reports = all_reports(g);
- if (reports.empty()) {
- assert(0);
- return false;
- }
-
- int offsetAdjust = rm.getReport(*reports.begin()).offsetAdjust;
- for (auto report : reports) {
- const Report &ir = rm.getReport(report);
- if (ir.offsetAdjust != offsetAdjust) {
- DEBUG_PRINTF("different adjusts!\n");
- return false;
- }
- }
-
- *adjust = offsetAdjust;
- return true;
-}
-
+ int *adjust) {
+ const auto &reports = all_reports(g);
+ if (reports.empty()) {
+ assert(0);
+ return false;
+ }
+
+ int offsetAdjust = rm.getReport(*reports.begin()).offsetAdjust;
+ for (auto report : reports) {
+ const Report &ir = rm.getReport(report);
+ if (ir.offsetAdjust != offsetAdjust) {
+ DEBUG_PRINTF("different adjusts!\n");
+ return false;
+ }
+ }
+
+ *adjust = offsetAdjust;
+ return true;
+}
+
/**
* If the pattern has a min_length and is of "ratchet" form with one unbounded
- * repeat, that repeat can become a bounded repeat.
- *
- * /foo.*bar/{min_length=100} --> /foo.{94,}bar/
- */
-static
+ * repeat, that repeat can become a bounded repeat.
+ *
+ * /foo.*bar/{min_length=100} --> /foo.{94,}bar/
+ */
+static
bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) {
const auto &reports = all_reports(g);
-
+
if (reports.empty()) {
- return false;
- }
-
+ return false;
+ }
+
if (!hasSameBounds(reports, rm)) {
DEBUG_PRINTF("mixed report bounds\n");
return false;
@@ -475,249 +475,249 @@ bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) {
return false;
}
- // If the pattern has virtual starts, we probably don't want to touch it.
- if (hasVirtualStarts(g)) {
- DEBUG_PRINTF("virtual starts, bailing\n");
- return false;
- }
-
- // The graph must contain a single cyclic vertex (other than startDs), and
- // that vertex can have one pred and one successor.
- NFAVertex cyclic = findSingleCyclic(g);
+ // If the pattern has virtual starts, we probably don't want to touch it.
+ if (hasVirtualStarts(g)) {
+ DEBUG_PRINTF("virtual starts, bailing\n");
+ return false;
+ }
+
+ // The graph must contain a single cyclic vertex (other than startDs), and
+ // that vertex can have one pred and one successor.
+ NFAVertex cyclic = findSingleCyclic(g);
if (cyclic == NGHolder::null_vertex()) {
- return false;
- }
-
+ return false;
+ }
+
NGHolder::adjacency_iterator ai, ae;
- tie(ai, ae) = adjacent_vertices(g.start, g);
- if (*ai == g.startDs) {
- ++ai;
- }
- NFAVertex v = *ai;
- if (++ai != ae) {
- DEBUG_PRINTF("more than one initial vertex\n");
- return false;
- }
-
- u32 width = 0;
-
- // Walk from the start vertex to the cyclic state and ensure we have a
- // chain of vertices.
- while (v != cyclic) {
+ tie(ai, ae) = adjacent_vertices(g.start, g);
+ if (*ai == g.startDs) {
+ ++ai;
+ }
+ NFAVertex v = *ai;
+ if (++ai != ae) {
+ DEBUG_PRINTF("more than one initial vertex\n");
+ return false;
+ }
+
+ u32 width = 0;
+
+ // Walk from the start vertex to the cyclic state and ensure we have a
+ // chain of vertices.
+ while (v != cyclic) {
DEBUG_PRINTF("vertex %zu\n", g[v].index);
- width++;
+ width++;
auto succ = succs(v, g);
- if (contains(succ, cyclic)) {
- if (succ.size() == 1) {
- v = cyclic;
- } else if (succ.size() == 2) {
- // Cyclic and jump edge.
- succ.erase(cyclic);
- NFAVertex v2 = *succ.begin();
- if (!edge(cyclic, v2, g).second) {
- DEBUG_PRINTF("bad form\n");
- return false;
- }
- v = cyclic;
- } else {
- DEBUG_PRINTF("bad form\n");
- return false;
- }
- } else {
- if (succ.size() != 1) {
- DEBUG_PRINTF("bad form\n");
- return false;
- }
- v = *succ.begin();
- }
- }
-
- // Check the cyclic state is A-OK.
- v = getSoleDestVertex(g, cyclic);
+ if (contains(succ, cyclic)) {
+ if (succ.size() == 1) {
+ v = cyclic;
+ } else if (succ.size() == 2) {
+ // Cyclic and jump edge.
+ succ.erase(cyclic);
+ NFAVertex v2 = *succ.begin();
+ if (!edge(cyclic, v2, g).second) {
+ DEBUG_PRINTF("bad form\n");
+ return false;
+ }
+ v = cyclic;
+ } else {
+ DEBUG_PRINTF("bad form\n");
+ return false;
+ }
+ } else {
+ if (succ.size() != 1) {
+ DEBUG_PRINTF("bad form\n");
+ return false;
+ }
+ v = *succ.begin();
+ }
+ }
+
+ // Check the cyclic state is A-OK.
+ v = getSoleDestVertex(g, cyclic);
if (v == NGHolder::null_vertex()) {
- DEBUG_PRINTF("cyclic has more than one successor\n");
- return false;
- }
-
- // Walk from the cyclic state to an accept and ensure we have a chain of
- // vertices.
- while (!is_any_accept(v, g)) {
+ DEBUG_PRINTF("cyclic has more than one successor\n");
+ return false;
+ }
+
+ // Walk from the cyclic state to an accept and ensure we have a chain of
+ // vertices.
+ while (!is_any_accept(v, g)) {
DEBUG_PRINTF("vertex %zu\n", g[v].index);
- width++;
+ width++;
auto succ = succs(v, g);
- if (succ.size() != 1) {
- DEBUG_PRINTF("bad form\n");
- return false;
- }
- v = *succ.begin();
- }
-
- int offsetAdjust = 0;
- if (!hasOffsetAdjust(rm, g, &offsetAdjust)) {
- return false;
- }
- DEBUG_PRINTF("adjusting width by %d\n", offsetAdjust);
- width += offsetAdjust;
-
+ if (succ.size() != 1) {
+ DEBUG_PRINTF("bad form\n");
+ return false;
+ }
+ v = *succ.begin();
+ }
+
+ int offsetAdjust = 0;
+ if (!hasOffsetAdjust(rm, g, &offsetAdjust)) {
+ return false;
+ }
+ DEBUG_PRINTF("adjusting width by %d\n", offsetAdjust);
+ width += offsetAdjust;
+
DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width,
- g[cyclic].index);
-
+ g[cyclic].index);
+
if (width >= min_length) {
- DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n",
+ DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n",
min_length, width);
clearMinLengthParam(g, rm);
- return true;
- }
-
- vector<NFAVertex> preds;
- vector<NFAEdge> dead;
- for (auto u : inv_adjacent_vertices_range(cyclic, g)) {
+ return true;
+ }
+
+ vector<NFAVertex> preds;
+ vector<NFAEdge> dead;
+ for (auto u : inv_adjacent_vertices_range(cyclic, g)) {
DEBUG_PRINTF("pred %zu\n", g[u].index);
- if (u == cyclic) {
- continue;
- }
- preds.push_back(u);
-
- // We want to delete the out-edges of each predecessor, but need to
- // make sure we don't delete the startDs self loop.
- for (const auto &e : out_edges_range(u, g)) {
- if (target(e, g) != g.startDs) {
- dead.push_back(e);
- }
- }
- }
-
- remove_edges(dead, g);
-
- assert(!preds.empty());
-
- const CharReach &cr = g[cyclic].char_reach;
-
+ if (u == cyclic) {
+ continue;
+ }
+ preds.push_back(u);
+
+ // We want to delete the out-edges of each predecessor, but need to
+ // make sure we don't delete the startDs self loop.
+ for (const auto &e : out_edges_range(u, g)) {
+ if (target(e, g) != g.startDs) {
+ dead.push_back(e);
+ }
+ }
+ }
+
+ remove_edges(dead, g);
+
+ assert(!preds.empty());
+
+ const CharReach &cr = g[cyclic].char_reach;
+
for (u32 i = 0; i < min_length - width - 1; ++i) {
- v = add_vertex(g);
- g[v].char_reach = cr;
-
- for (auto u : preds) {
- add_edge(u, v, g);
- }
- preds.clear();
- preds.push_back(v);
- }
- assert(!preds.empty());
- for (auto u : preds) {
- add_edge(u, cyclic, g);
- }
-
+ v = add_vertex(g);
+ g[v].char_reach = cr;
+
+ for (auto u : preds) {
+ add_edge(u, v, g);
+ }
+ preds.clear();
+ preds.push_back(v);
+ }
+ assert(!preds.empty());
+ for (auto u : preds) {
+ add_edge(u, cyclic, g);
+ }
+
renumber_vertices(g);
renumber_edges(g);
clearMinLengthParam(g, rm);
- clearReports(g);
- return true;
-}
-
-static
+ clearReports(g);
+ return true;
+}
+
+static
bool hasExtParams(const ExpressionInfo &expr) {
if (expr.min_length != 0) {
- return true;
- }
+ return true;
+ }
if (expr.min_offset != 0) {
- return true;
- }
+ return true;
+ }
if (expr.max_offset != MAX_OFFSET) {
- return true;
- }
- return false;
-}
-
-static
-const depth& maxDistToAccept(const NFAVertexBidiDepth &d) {
- if (d.toAccept.max.is_unreachable()) {
- return d.toAcceptEod.max;
- } else if (d.toAcceptEod.max.is_unreachable()) {
- return d.toAccept.max;
- }
- return max(d.toAccept.max, d.toAcceptEod.max);
-}
-
-static
-const depth& minDistFromStart(const NFAVertexBidiDepth &d) {
- return min(d.fromStartDotStar.min, d.fromStart.min);
-}
-
-static
-const depth& minDistToAccept(const NFAVertexBidiDepth &d) {
- return min(d.toAccept.min, d.toAcceptEod.min);
-}
-
-static
+ return true;
+ }
+ return false;
+}
+
+static
+const depth& maxDistToAccept(const NFAVertexBidiDepth &d) {
+ if (d.toAccept.max.is_unreachable()) {
+ return d.toAcceptEod.max;
+ } else if (d.toAcceptEod.max.is_unreachable()) {
+ return d.toAccept.max;
+ }
+ return max(d.toAccept.max, d.toAcceptEod.max);
+}
+
+static
+const depth& minDistFromStart(const NFAVertexBidiDepth &d) {
+ return min(d.fromStartDotStar.min, d.fromStart.min);
+}
+
+static
+const depth& minDistToAccept(const NFAVertexBidiDepth &d) {
+ return min(d.toAccept.min, d.toAcceptEod.min);
+}
+
+static
bool isEdgePrunable(const NGHolder &g, const Report &report,
- const vector<NFAVertexBidiDepth> &depths,
- const NFAEdge &e) {
- const NFAVertex u = source(e, g);
- const NFAVertex v = target(e, g);
-
+ const vector<NFAVertexBidiDepth> &depths,
+ const NFAEdge &e) {
+ const NFAVertex u = source(e, g);
+ const NFAVertex v = target(e, g);
+
DEBUG_PRINTF("edge (%zu,%zu)\n", g[u].index, g[v].index);
-
- // Leave our special-to-special edges alone.
- if (is_special(u, g) && is_special(v, g)) {
- DEBUG_PRINTF("ignoring special-to-special\n");
- return false;
- }
-
- // We must be careful around start: we don't want to remove (start, v) if
- // (startDs, v) exists as well, since later code will assume the presence
- // of both edges, but other cases are OK.
- if (u == g.start && edge(g.startDs, v, g).second) {
- DEBUG_PRINTF("ignoring unanchored start edge\n");
- return false;
- }
-
- u32 u_idx = g[u].index;
- u32 v_idx = g[v].index;
- assert(u_idx < depths.size() && v_idx < depths.size());
-
- const NFAVertexBidiDepth &du = depths.at(u_idx);
- const NFAVertexBidiDepth &dv = depths.at(v_idx);
-
+
+ // Leave our special-to-special edges alone.
+ if (is_special(u, g) && is_special(v, g)) {
+ DEBUG_PRINTF("ignoring special-to-special\n");
+ return false;
+ }
+
+ // We must be careful around start: we don't want to remove (start, v) if
+ // (startDs, v) exists as well, since later code will assume the presence
+ // of both edges, but other cases are OK.
+ if (u == g.start && edge(g.startDs, v, g).second) {
+ DEBUG_PRINTF("ignoring unanchored start edge\n");
+ return false;
+ }
+
+ u32 u_idx = g[u].index;
+ u32 v_idx = g[v].index;
+ assert(u_idx < depths.size() && v_idx < depths.size());
+
+ const NFAVertexBidiDepth &du = depths.at(u_idx);
+ const NFAVertexBidiDepth &dv = depths.at(v_idx);
+
if (report.minOffset) {
depth max_offset = maxDistFromStartOfData(du) + maxDistToAccept(dv);
if (max_offset.is_finite() && max_offset < report.minOffset) {
- DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str());
- return true;
- }
- }
-
+ DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str());
+ return true;
+ }
+ }
+
if (report.maxOffset != MAX_OFFSET) {
- depth min_offset = minDistFromStart(du) + minDistToAccept(dv);
- assert(min_offset.is_finite());
-
+ depth min_offset = minDistFromStart(du) + minDistToAccept(dv);
+ assert(min_offset.is_finite());
+
if (min_offset > report.maxOffset) {
- DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str());
- return true;
- }
- }
-
+ DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str());
+ return true;
+ }
+ }
+
if (report.minLength && is_any_accept(v, g)) {
- // Simple take on min_length. If we're an edge to accept and our max
- // dist from start is too small, we can be pruned.
+ // Simple take on min_length. If we're an edge to accept and our max
+ // dist from start is too small, we can be pruned.
const depth &width = maxDistFromInit(du);
if (width.is_finite() && width < report.minLength) {
- DEBUG_PRINTF("max width %s from start too small for min_length\n",
- width.str().c_str());
- return true;
- }
- }
-
- return false;
-}
-
-static
+ DEBUG_PRINTF("max width %s from start too small for min_length\n",
+ width.str().c_str());
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static
void pruneExtUnreachable(NGHolder &g, const ReportManager &rm) {
const auto &reports = all_reports(g);
if (reports.empty()) {
return;
}
-
+
if (!hasSameBounds(reports, rm)) {
DEBUG_PRINTF("report bounds vary\n");
return;
@@ -727,32 +727,32 @@ void pruneExtUnreachable(NGHolder &g, const ReportManager &rm) {
auto depths = calcBidiDepths(g);
- vector<NFAEdge> dead;
-
- for (const auto &e : edges_range(g)) {
+ vector<NFAEdge> dead;
+
+ for (const auto &e : edges_range(g)) {
if (isEdgePrunable(g, report, depths, e)) {
- DEBUG_PRINTF("pruning\n");
- dead.push_back(e);
- }
- }
-
- if (dead.empty()) {
- return;
- }
-
- remove_edges(dead, g);
- pruneUseless(g);
+ DEBUG_PRINTF("pruning\n");
+ dead.push_back(e);
+ }
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
+ remove_edges(dead, g);
+ pruneUseless(g);
clearReports(g);
-}
-
+}
+
/**
* Remove vacuous edges in graphs where the min_offset or min_length
* constraints dictate that they can never produce a match.
*/
-static
+static
void pruneVacuousEdges(NGHolder &g, const ReportManager &rm) {
- vector<NFAEdge> dead;
-
+ vector<NFAEdge> dead;
+
auto has_min_offset = [&](NFAVertex v) {
assert(!g[v].reports.empty()); // must be reporter
return all_of_in(g[v].reports, [&](ReportID id) {
@@ -767,157 +767,157 @@ void pruneVacuousEdges(NGHolder &g, const ReportManager &rm) {
});
};
- for (const auto &e : edges_range(g)) {
- const NFAVertex u = source(e, g);
- const NFAVertex v = target(e, g);
-
+ for (const auto &e : edges_range(g)) {
+ const NFAVertex u = source(e, g);
+ const NFAVertex v = target(e, g);
+
// Special case: Crudely remove vacuous edges from start in graphs with
// a min_offset.
if (u == g.start && is_any_accept(v, g) && has_min_offset(u)) {
- DEBUG_PRINTF("vacuous edge in graph with min_offset!\n");
- dead.push_back(e);
- continue;
- }
-
- // If a min_length is set, vacuous edges can be removed.
+ DEBUG_PRINTF("vacuous edge in graph with min_offset!\n");
+ dead.push_back(e);
+ continue;
+ }
+
+ // If a min_length is set, vacuous edges can be removed.
if (is_any_start(u, g) && is_any_accept(v, g) && has_min_length(u)) {
- DEBUG_PRINTF("vacuous edge in graph with min_length!\n");
- dead.push_back(e);
- continue;
- }
- }
-
- if (dead.empty()) {
- return;
- }
-
+ DEBUG_PRINTF("vacuous edge in graph with min_length!\n");
+ dead.push_back(e);
+ continue;
+ }
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
DEBUG_PRINTF("removing %zu vacuous edges\n", dead.size());
- remove_edges(dead, g);
- pruneUseless(g);
+ remove_edges(dead, g);
+ pruneUseless(g);
clearReports(g);
-}
-
-static
+}
+
+static
void pruneUnmatchable(NGHolder &g, const vector<DepthMinMax> &depths,
- const ReportManager &rm, NFAVertex accept) {
- vector<NFAEdge> dead;
-
- for (const auto &e : in_edges_range(accept, g)) {
- NFAVertex v = source(e, g);
- if (v == g.accept) {
- assert(accept == g.acceptEod); // stylised edge
- continue;
- }
-
+ const ReportManager &rm, NFAVertex accept) {
+ vector<NFAEdge> dead;
+
+ for (const auto &e : in_edges_range(accept, g)) {
+ NFAVertex v = source(e, g);
+ if (v == g.accept) {
+ assert(accept == g.acceptEod); // stylised edge
+ continue;
+ }
+
if (!hasSameBounds(g[v].reports, rm)) {
continue;
}
const auto &report = rm.getReport(*g[v].reports.begin());
- u32 idx = g[v].index;
- DepthMinMax d = depths[idx]; // copy
- pair<s32, s32> adj = getMinMaxOffsetAdjust(rm, g, v);
- DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
- d.str().c_str(), adj.first, adj.second);
- d.min += adj.first;
- d.max += adj.second;
-
+ u32 idx = g[v].index;
+ DepthMinMax d = depths[idx]; // copy
+ pair<s32, s32> adj = getMinMaxOffsetAdjust(rm, g, v);
+ DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
+ d.str().c_str(), adj.first, adj.second);
+ d.min += adj.first;
+ d.max += adj.second;
+
if (d.max.is_finite() && d.max < report.minLength) {
- DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n",
+ DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n",
d.max.str().c_str(), report.minLength);
- dead.push_back(e);
- continue;
- }
-
+ dead.push_back(e);
+ continue;
+ }
+
if (report.maxOffset != MAX_OFFSET && d.min > report.maxOffset) {
- DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n",
+ DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n",
d.min.str().c_str(), report.maxOffset);
- dead.push_back(e);
- continue;
- }
- }
-
- remove_edges(dead, g);
-}
-
+ dead.push_back(e);
+ continue;
+ }
+ }
+
+ remove_edges(dead, g);
+}
+
/**
* Remove edges to accepts that can never produce a match long enough to
* satisfy our min_length and max_offset constraints.
*/
-static
+static
void pruneUnmatchable(NGHolder &g, const ReportManager &rm) {
if (!any_of_in(all_reports(g), [&](ReportID id) {
return rm.getReport(id).minLength > 0;
})) {
- return;
- }
-
- vector<DepthMinMax> depths = getDistancesFromSOM(g);
-
- pruneUnmatchable(g, depths, rm, g.accept);
- pruneUnmatchable(g, depths, rm, g.acceptEod);
-
- pruneUseless(g);
+ return;
+ }
+
+ vector<DepthMinMax> depths = getDistancesFromSOM(g);
+
+ pruneUnmatchable(g, depths, rm, g.accept);
+ pruneUnmatchable(g, depths, rm, g.acceptEod);
+
+ pruneUseless(g);
clearReports(g);
-}
-
-static
-bool hasOffsetAdjustments(const ReportManager &rm, const NGHolder &g) {
+}
+
+static
+bool hasOffsetAdjustments(const ReportManager &rm, const NGHolder &g) {
return any_of_in(all_reports(g), [&rm](ReportID id) {
return rm.getReport(id).offsetAdjust != 0;
});
-}
-
+}
+
void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr,
ReportManager &rm) {
if (!hasExtParams(expr)) {
- return;
- }
-
- depth minWidth = findMinWidth(g);
- depth maxWidth = findMaxWidth(g);
- bool is_anchored = !has_proper_successor(g.startDs, g)
- && out_degree(g.start, g);
-
- DepthMinMax match_depths = findMatchLengths(rm, g);
- DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str());
-
+ return;
+ }
+
+ depth minWidth = findMinWidth(g);
+ depth maxWidth = findMaxWidth(g);
+ bool is_anchored = !has_proper_successor(g.startDs, g)
+ && out_degree(g.start, g);
+
+ DepthMinMax match_depths = findMatchLengths(rm, g);
+ DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str());
+
if (is_anchored && maxWidth.is_finite() && expr.min_offset > maxWidth) {
- ostringstream oss;
- oss << "Expression is anchored and cannot satisfy min_offset="
+ ostringstream oss;
+ oss << "Expression is anchored and cannot satisfy min_offset="
<< expr.min_offset << " as it can only produce matches of length "
- << maxWidth << " bytes at most.";
+ << maxWidth << " bytes at most.";
throw CompileError(expr.index, oss.str());
- }
-
+ }
+
if (minWidth > expr.max_offset) {
- ostringstream oss;
+ ostringstream oss;
oss << "Expression has max_offset=" << expr.max_offset
<< " but requires " << minWidth << " bytes to match.";
throw CompileError(expr.index, oss.str());
- }
-
+ }
+
if (maxWidth.is_finite() && match_depths.max < expr.min_length) {
- ostringstream oss;
+ ostringstream oss;
oss << "Expression has min_length=" << expr.min_length << " but can "
- "only produce matches of length " << match_depths.max <<
- " bytes at most.";
+ "only produce matches of length " << match_depths.max <<
+ " bytes at most.";
throw CompileError(expr.index, oss.str());
- }
-
+ }
+
if (expr.min_length && expr.min_length <= match_depths.min) {
- DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n",
+ DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n",
expr.min_length);
expr.min_length = 0;
- }
-
+ }
+
if (!hasExtParams(expr)) {
- return;
- }
-
+ return;
+ }
+
updateReportBounds(rm, g, expr);
}
-
+
/**
* If the pattern is completely anchored and has a min_length set, this can
* be converted to a min_offset.
@@ -926,8 +926,8 @@ static
void replaceMinLengthWithOffset(NGHolder &g, ReportManager &rm) {
if (has_proper_successor(g.startDs, g)) {
return; // not wholly anchored
- }
-
+ }
+
replaceReports(g, [&rm](NFAVertex, ReportID id) {
const auto &report = rm.getReport(id);
if (report.minLength) {
@@ -984,52 +984,52 @@ void reduceExtendedParams(NGHolder &g, ReportManager &rm, som_type som) {
[&](ReportID id) { return rm.getReport(id).hasBounds(); })) {
DEBUG_PRINTF("no extparam bounds\n");
return;
- }
-
+ }
+
DEBUG_PRINTF("graph has extparam bounds\n");
-
+
pruneVacuousEdges(g, rm);
if (can_never_match(g)) {
return;
}
-
+
pruneUnmatchable(g, rm);
if (can_never_match(g)) {
return;
- }
-
+ }
+
if (!hasOffsetAdjustments(rm, g)) {
pruneExtUnreachable(g, rm);
if (can_never_match(g)) {
return;
}
- }
-
+ }
+
replaceMinLengthWithOffset(g, rm);
if (can_never_match(g)) {
- return;
- }
-
- // If the pattern has a min_length and is of "ratchet" form with one
- // unbounded repeat, that repeat can become a bounded repeat.
- // e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/
+ return;
+ }
+
+ // If the pattern has a min_length and is of "ratchet" form with one
+ // unbounded repeat, that repeat can become a bounded repeat.
+ // e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/
transformMinLengthToRepeat(g, rm);
if (can_never_match(g)) {
return;
- }
-
- // If the pattern is unanchored, has a max_offset and has not asked for
- // SOM, we can use that knowledge to anchor it which will limit its
- // lifespan. Note that we can't use this transformation if there's a
- // min_length, as it's currently handled using "sly SOM".
+ }
+
+ // If the pattern is unanchored, has a max_offset and has not asked for
+ // SOM, we can use that knowledge to anchor it which will limit its
+ // lifespan. Note that we can't use this transformation if there's a
+ // min_length, as it's currently handled using "sly SOM".
if (som == SOM_NONE) {
anchorPatternWithBoundedRepeat(g, rm);
if (can_never_match(g)) {
return;
- }
- }
-
+ }
+ }
+
removeUnneededOffsetBounds(g, rm);
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_extparam.h b/contrib/libs/hyperscan/src/nfagraph/ng_extparam.h
index 43543b1255..ae818075c0 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_extparam.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_extparam.h
@@ -1,47 +1,47 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Propagate extended parameters to vertex reports and reduce graph if
- * possible.
- */
-
-#ifndef NG_EXTPARAM_H
-#define NG_EXTPARAM_H
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Propagate extended parameters to vertex reports and reduce graph if
+ * possible.
+ */
+
+#ifndef NG_EXTPARAM_H
+#define NG_EXTPARAM_H
+
#include "som/som.h"
-namespace ue2 {
-
+namespace ue2 {
+
class ExpressionInfo;
class NGHolder;
-class ReportManager;
-
+class ReportManager;
+
/**
* \brief Propagate extended parameter information to vertex reports. Will
* throw CompileError if this expression's extended parameters are not
@@ -52,13 +52,13 @@ class ReportManager;
*/
void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr,
ReportManager &rm);
-
+
/**
* \brief Perform graph reductions (if possible) to do with extended parameter
* constraints on reports.
*/
void reduceExtendedParams(NGHolder &g, ReportManager &rm, som_type som);
-} // namespace ue2
-
-#endif
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.cpp
index 01fb0090c6..8fb264d8a9 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.cpp
@@ -1,142 +1,142 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose mask construction from NGHolder.
- */
-#include "ng_fixed_width.h"
-
-#include "grey.h"
-#include "ng_holder.h"
-#include "ng_util.h"
-#include "rose/rose_build.h"
-#include "util/container.h"
-#include "ue2common.h"
-
-#include <algorithm>
-#include <iterator>
-#include <set>
-
-using namespace std;
-
-namespace ue2 {
-
-static
-bool findMask(const NGHolder &g, vector<CharReach> *mask, bool *anchored,
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose mask construction from NGHolder.
+ */
+#include "ng_fixed_width.h"
+
+#include "grey.h"
+#include "ng_holder.h"
+#include "ng_util.h"
+#include "rose/rose_build.h"
+#include "util/container.h"
+#include "ue2common.h"
+
+#include <algorithm>
+#include <iterator>
+#include <set>
+
+using namespace std;
+
+namespace ue2 {
+
+static
+bool findMask(const NGHolder &g, vector<CharReach> *mask, bool *anchored,
flat_set<ReportID> *reports) {
- DEBUG_PRINTF("looking for a mask pattern\n");
- set<NFAVertex> s_succ;
- insert(&s_succ, adjacent_vertices(g.start, g));
-
- set<NFAVertex> sds_succ;
- insert(&sds_succ, adjacent_vertices(g.startDs, g));
-
- *anchored = sds_succ.size() == 1; /* sds itself */
- bool floating = is_subset_of(s_succ, sds_succ);
-
- DEBUG_PRINTF("sds %zu s %zu%s%s\n", sds_succ.size(), s_succ.size(),
- *anchored ? " anchored" : "", floating ? " floating" : "");
- if (!*anchored && !floating) {
- DEBUG_PRINTF("semi-anchored\n");
- return false;
- }
-
- set<NFAVertex> &succs = *anchored ? s_succ : sds_succ;
- succs.erase(g.startDs);
- if (succs.size() != 1) {
- DEBUG_PRINTF("branchy root\n");
- return false;
- }
-
- NFAVertex u = *anchored ? g.start : g.startDs;
- NFAVertex v = *succs.begin();
-
- while (true) {
+ DEBUG_PRINTF("looking for a mask pattern\n");
+ set<NFAVertex> s_succ;
+ insert(&s_succ, adjacent_vertices(g.start, g));
+
+ set<NFAVertex> sds_succ;
+ insert(&sds_succ, adjacent_vertices(g.startDs, g));
+
+ *anchored = sds_succ.size() == 1; /* sds itself */
+ bool floating = is_subset_of(s_succ, sds_succ);
+
+ DEBUG_PRINTF("sds %zu s %zu%s%s\n", sds_succ.size(), s_succ.size(),
+ *anchored ? " anchored" : "", floating ? " floating" : "");
+ if (!*anchored && !floating) {
+ DEBUG_PRINTF("semi-anchored\n");
+ return false;
+ }
+
+ set<NFAVertex> &succs = *anchored ? s_succ : sds_succ;
+ succs.erase(g.startDs);
+ if (succs.size() != 1) {
+ DEBUG_PRINTF("branchy root\n");
+ return false;
+ }
+
+ NFAVertex u = *anchored ? g.start : g.startDs;
+ NFAVertex v = *succs.begin();
+
+ while (true) {
DEBUG_PRINTF("validating vertex %zu\n", g[v].index);
-
- assert(v != g.acceptEod);
-
- // If we've reached an accept, we MAY have found a valid Rose pattern
- if (v == g.accept) {
- DEBUG_PRINTF("accept\n");
- insert(reports, g[u].reports);
- return true;
- }
-
- mask->push_back(g[v].char_reach);
-
- if (out_degree(v, g) != 1) {
- DEBUG_PRINTF("out_degree != 1\n");
- return false; /* not a chain */
- }
-
- u = v;
- v = *adjacent_vertices(v, g).first;
-
- if (in_degree(v, g) != 1) {
- DEBUG_PRINTF("blargh\n"); /* picks up cases where there is no path
- * to case accept (large cycles),
- * ensures term */
- return false;
- }
- }
-}
-
-bool handleFixedWidth(RoseBuild &rose, const NGHolder &g, const Grey &grey) {
- if (!grey.roseMasks) {
- return false;
- }
-
- if (in_degree(g.acceptEod,g) != 1) {
- DEBUG_PRINTF("EOD anchoring not supported\n");
- return false;
- }
-
+
+ assert(v != g.acceptEod);
+
+ // If we've reached an accept, we MAY have found a valid Rose pattern
+ if (v == g.accept) {
+ DEBUG_PRINTF("accept\n");
+ insert(reports, g[u].reports);
+ return true;
+ }
+
+ mask->push_back(g[v].char_reach);
+
+ if (out_degree(v, g) != 1) {
+ DEBUG_PRINTF("out_degree != 1\n");
+ return false; /* not a chain */
+ }
+
+ u = v;
+ v = *adjacent_vertices(v, g).first;
+
+ if (in_degree(v, g) != 1) {
+ DEBUG_PRINTF("blargh\n"); /* picks up cases where there is no path
+ * to case accept (large cycles),
+ * ensures term */
+ return false;
+ }
+ }
+}
+
+bool handleFixedWidth(RoseBuild &rose, const NGHolder &g, const Grey &grey) {
+ if (!grey.roseMasks) {
+ return false;
+ }
+
+ if (in_degree(g.acceptEod,g) != 1) {
+ DEBUG_PRINTF("EOD anchoring not supported\n");
+ return false;
+ }
+
flat_set<ReportID> reports;
- bool anchored = false;
- vector<CharReach> mask;
-
- if (!findMask(g, &mask, &anchored, &reports)) {
- return false;
- }
-
- DEBUG_PRINTF("%smasky masky\n", anchored ? "anchored " : "");
-
- assert(!mask.empty());
- assert(!reports.empty());
-
- if (rose.add(anchored, mask, reports)) {
- DEBUG_PRINTF("added as rose mask\n");
- return true;
- } else {
- DEBUG_PRINTF("failed to add masky\n");
- return false;
- }
-}
-
-} // namespace ue2
+ bool anchored = false;
+ vector<CharReach> mask;
+
+ if (!findMask(g, &mask, &anchored, &reports)) {
+ return false;
+ }
+
+ DEBUG_PRINTF("%smasky masky\n", anchored ? "anchored " : "");
+
+ assert(!mask.empty());
+ assert(!reports.empty());
+
+ if (rose.add(anchored, mask, reports)) {
+ DEBUG_PRINTF("added as rose mask\n");
+ return true;
+ } else {
+ DEBUG_PRINTF("failed to add masky\n");
+ return false;
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.h b/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.h
index 7a2d0fff3b..d8286742cd 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_fixed_width.h
@@ -1,46 +1,46 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose mask construction from NGHolder.
- */
-
-#ifndef NG_FIXED_WIDTH_H
-#define NG_FIXED_WIDTH_H
-
-namespace ue2 {
-
-class RoseBuild;
-class NGHolder;
-struct Grey;
-
-bool handleFixedWidth(RoseBuild &build, const NGHolder &g, const Grey &grey);
-
-} // namespace ue2
-
-#endif // NG_FIXED_WIDTH_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose mask construction from NGHolder.
+ */
+
+#ifndef NG_FIXED_WIDTH_H
+#define NG_FIXED_WIDTH_H
+
+namespace ue2 {
+
+class RoseBuild;
+class NGHolder;
+struct Grey;
+
+bool handleFixedWidth(RoseBuild &build, const NGHolder &g, const Grey &grey);
+
+} // namespace ue2
+
+#endif // NG_FIXED_WIDTH_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_haig.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_haig.cpp
index f6594616c4..8054544772 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_haig.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_haig.cpp
@@ -1,124 +1,124 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Build code for Haig SOM DFA.
- */
-#include "ng_haig.h"
-
-#include "grey.h"
-#include "nfa/goughcompile.h"
-#include "ng_holder.h"
-#include "ng_mcclellan_internal.h"
-#include "ng_som_util.h"
-#include "ng_squash.h"
-#include "util/bitfield.h"
-#include "util/container.h"
-#include "util/determinise.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Build code for Haig SOM DFA.
+ */
+#include "ng_haig.h"
+
+#include "grey.h"
+#include "nfa/goughcompile.h"
+#include "ng_holder.h"
+#include "ng_mcclellan_internal.h"
+#include "ng_som_util.h"
+#include "ng_squash.h"
+#include "util/bitfield.h"
+#include "util/container.h"
+#include "util/determinise.h"
#include "util/flat_containers.h"
#include "util/graph.h"
-#include "util/graph_range.h"
+#include "util/graph_range.h"
#include "util/hash_dynamic_bitset.h"
-#include "util/make_unique.h"
+#include "util/make_unique.h"
#include "util/unordered.h"
-
-#include <algorithm>
-#include <functional>
-#include <map>
-#include <set>
-#include <vector>
-#include <boost/dynamic_bitset.hpp>
-
-using namespace std;
-using boost::dynamic_bitset;
-
-namespace ue2 {
-
-#define NFA_STATE_LIMIT 256
-
-#define HAIG_MAX_NFA_STATE 600
-#define HAIG_MAX_LIVE_SOM_SLOTS 32
-
-namespace {
-struct haig_too_wide {
-};
-
-template<typename stateset>
-static
+
+#include <algorithm>
+#include <functional>
+#include <map>
+#include <set>
+#include <vector>
+#include <boost/dynamic_bitset.hpp>
+
+using namespace std;
+using boost::dynamic_bitset;
+
+namespace ue2 {
+
+#define NFA_STATE_LIMIT 256
+
+#define HAIG_MAX_NFA_STATE 600
+#define HAIG_MAX_LIVE_SOM_SLOTS 32
+
+namespace {
+struct haig_too_wide {
+};
+
+template<typename stateset>
+static
void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused,
- stateset *init, stateset *initDS,
- vector<NFAVertex> *v_by_index) {
+ stateset *init, stateset *initDS,
+ vector<NFAVertex> *v_by_index) {
DEBUG_PRINTF("graph kind: %s\n", to_string(g.kind).c_str());
- for (auto v : vertices_range(g)) {
+ for (auto v : vertices_range(g)) {
if (contains(unused, v)) {
- continue;
- }
+ continue;
+ }
u32 v_index = g[v].index;
- if (is_any_start(v, g)) {
- init->set(v_index);
- if (hasSelfLoop(v, g) || is_triggered(g)) {
- DEBUG_PRINTF("setting %u\n", v_index);
- initDS->set(v_index);
- }
- }
- assert(v_index < init->size());
- }
-
- v_by_index->clear();
+ if (is_any_start(v, g)) {
+ init->set(v_index);
+ if (hasSelfLoop(v, g) || is_triggered(g)) {
+ DEBUG_PRINTF("setting %u\n", v_index);
+ initDS->set(v_index);
+ }
+ }
+ assert(v_index < init->size());
+ }
+
+ v_by_index->clear();
v_by_index->resize(num_vertices(g), NGHolder::null_vertex());
-
- for (auto v : vertices_range(g)) {
- u32 v_index = g[v].index;
+
+ for (auto v : vertices_range(g)) {
+ u32 v_index = g[v].index;
assert((*v_by_index)[v_index] == NGHolder::null_vertex());
- (*v_by_index)[v_index] = v;
- }
-}
-
-template<typename StateSet>
-void populateAccepts(const NGHolder &g, StateSet *accept, StateSet *acceptEod) {
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- accept->set(g[v].index);
- }
- for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
- if (v == g.accept) {
- continue;
- }
- acceptEod->set(g[v].index);
- }
-}
-
+ (*v_by_index)[v_index] = v;
+ }
+}
+
+template<typename StateSet>
+void populateAccepts(const NGHolder &g, StateSet *accept, StateSet *acceptEod) {
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ accept->set(g[v].index);
+ }
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ if (v == g.accept) {
+ continue;
+ }
+ acceptEod->set(g[v].index);
+ }
+}
+
template<typename Automaton_Traits>
-class Automaton_Base {
+class Automaton_Base {
public:
using StateSet = typename Automaton_Traits::StateSet;
using StateMap = typename Automaton_Traits::StateMap;
-protected:
+protected:
Automaton_Base(const NGHolder &graph_in, som_type som,
const vector<vector<CharReach>> &triggers,
bool unordered_som)
@@ -131,50 +131,50 @@ protected:
acceptEod(Automaton_Traits::init_states(numStates)),
toppable(Automaton_Traits::init_states(numStates)),
dead(Automaton_Traits::init_states(numStates)) {
- calculateAlphabet(graph, alpha, unalpha, &alphasize);
- assert(alphasize <= ALPHABET_SIZE);
-
+ calculateAlphabet(graph, alpha, unalpha, &alphasize);
+ assert(alphasize <= ALPHABET_SIZE);
+
populateInit(graph, unused, &init, &initDS, &v_by_index);
- populateAccepts(graph, &accept, &acceptEod);
-
- start_anchored = DEAD_STATE + 1;
- if (initDS == init) {
- start_floating = start_anchored;
- } else if (initDS.any()) {
- start_floating = start_anchored + 1;
- } else {
- start_floating = DEAD_STATE;
- }
-
+ populateAccepts(graph, &accept, &acceptEod);
+
+ start_anchored = DEAD_STATE + 1;
+ if (initDS == init) {
+ start_floating = start_anchored;
+ } else if (initDS.any()) {
+ start_floating = start_anchored + 1;
+ } else {
+ start_floating = DEAD_STATE;
+ }
+
cr_by_index = populateCR(graph, v_by_index, alpha);
- if (!unordered_som) {
- for (const auto &sq : findSquashers(graph, som)) {
- NFAVertex v = sq.first;
- u32 vert_id = graph[v].index;
- squash.set(vert_id);
- squash_mask[vert_id] = shrinkStateSet(sq.second);
- }
- }
-
- if (is_triggered(graph)) {
+ if (!unordered_som) {
+ for (const auto &sq : findSquashers(graph, som)) {
+ NFAVertex v = sq.first;
+ u32 vert_id = graph[v].index;
+ squash.set(vert_id);
+ squash_mask[vert_id] = shrinkStateSet(sq.second);
+ }
+ }
+
+ if (is_triggered(graph)) {
dynamic_bitset<> temp(numStates);
markToppableStarts(graph, unused, false, triggers, &temp);
toppable = Automaton_Traits::copy_states(temp, numStates);
- }
- }
-
-private:
- // Convert an NFAStateSet (as used by the squash code) into a StateSet.
- StateSet shrinkStateSet(const NFAStateSet &in) const {
+ }
+ }
+
+private:
+ // Convert an NFAStateSet (as used by the squash code) into a StateSet.
+ StateSet shrinkStateSet(const NFAStateSet &in) const {
StateSet out = Automaton_Traits::init_states(numStates);
- for (size_t i = in.find_first(); i != in.npos && i < out.size();
- i = in.find_next(i)) {
- out.set(i);
- }
- return out;
- }
-
+ for (size_t i = in.find_first(); i != in.npos && i < out.size();
+ i = in.find_next(i)) {
+ out.set(i);
+ }
+ return out;
+ }
+
void reports_i(const StateSet &in, bool eod, flat_set<ReportID> &rv) {
StateSet acc = in & (eod ? acceptEod : accept);
for (size_t i = acc.find_first(); i != StateSet::npos;
@@ -186,27 +186,27 @@ private:
}
}
-public:
- void transition(const StateSet &in, StateSet *next) {
- transition_graph(*this, v_by_index, in, next);
- }
-
- const vector<StateSet> initial() {
+public:
+ void transition(const StateSet &in, StateSet *next) {
+ transition_graph(*this, v_by_index, in, next);
+ }
+
+ const vector<StateSet> initial() {
vector<StateSet> rv = {init};
- if (start_floating != DEAD_STATE && start_floating != start_anchored) {
- rv.push_back(initDS);
- }
- return rv;
- }
-
- void reports(const StateSet &in, flat_set<ReportID> &rv) {
- reports_i(in, false, rv);
- }
-
- void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
- reports_i(in, true, rv);
- }
-
+ if (start_floating != DEAD_STATE && start_floating != start_anchored) {
+ rv.push_back(initDS);
+ }
+ return rv;
+ }
+
+ void reports(const StateSet &in, flat_set<ReportID> &rv) {
+ reports_i(in, false, rv);
+ }
+
+ void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
+ reports_i(in, true, rv);
+ }
+
static bool canPrune(const flat_set<ReportID> &) { return false; }
const NGHolder &graph;
@@ -223,40 +223,40 @@ public:
u16 start_anchored;
u16 start_floating;
- vector<NFAVertex> v_by_index;
- vector<CharReach> cr_by_index; /* pre alpha'ed */
- StateSet init;
- StateSet initDS;
- StateSet squash; /* states which allow us to mask out other states */
- StateSet accept;
- StateSet acceptEod;
- StateSet toppable; /* states which are allowed to be on when a top arrives,
- * triggered dfas only */
- map<u32, StateSet> squash_mask;
- StateSet dead;
-};
-
+ vector<NFAVertex> v_by_index;
+ vector<CharReach> cr_by_index; /* pre alpha'ed */
+ StateSet init;
+ StateSet initDS;
+ StateSet squash; /* states which allow us to mask out other states */
+ StateSet accept;
+ StateSet acceptEod;
+ StateSet toppable; /* states which are allowed to be on when a top arrives,
+ * triggered dfas only */
+ map<u32, StateSet> squash_mask;
+ StateSet dead;
+};
+
struct Big_Traits {
using StateSet = dynamic_bitset<>;
using StateMap = unordered_map<StateSet, dstate_id_t, hash_dynamic_bitset>;
-
+
static StateSet init_states(u32 num) {
return StateSet(num);
}
-
+
static StateSet copy_states(const dynamic_bitset<> &in, UNUSED u32 num) {
assert(in.size() == num);
return in;
}
};
-
+
class Automaton_Big : public Automaton_Base<Big_Traits> {
public:
Automaton_Big(const NGHolder &graph_in, som_type som,
const vector<vector<CharReach>> &triggers, bool unordered_som)
: Automaton_Base(graph_in, som, triggers, unordered_som) {}
};
-
+
struct Graph_Traits {
using StateSet = bitfield<NFA_STATE_LIMIT>;
using StateMap = unordered_map<StateSet, dstate_id_t>;
@@ -264,520 +264,520 @@ struct Graph_Traits {
static StateSet init_states(UNUSED u32 num) {
assert(num <= NFA_STATE_LIMIT);
return StateSet();
- }
-
+ }
+
static StateSet copy_states(const dynamic_bitset<> &in, u32 num) {
StateSet out = init_states(num);
- for (size_t i = in.find_first(); i != in.npos && i < out.size();
- i = in.find_next(i)) {
- out.set(i);
- }
- return out;
- }
+ for (size_t i = in.find_first(); i != in.npos && i < out.size();
+ i = in.find_next(i)) {
+ out.set(i);
+ }
+ return out;
+ }
};
-
+
class Automaton_Graph : public Automaton_Base<Graph_Traits> {
-public:
+public:
Automaton_Graph(const NGHolder &graph_in, som_type som,
const vector<vector<CharReach>> &triggers,
bool unordered_som)
: Automaton_Base(graph_in, som, triggers, unordered_som) {}
-};
-
-class Automaton_Haig_Merge {
-public:
+};
+
+class Automaton_Haig_Merge {
+public:
using StateSet = vector<u16>;
using StateMap = ue2_unordered_map<StateSet, dstate_id_t>;
-
- explicit Automaton_Haig_Merge(const vector<const raw_som_dfa *> &in)
- : nfas(in.begin(), in.end()), dead(in.size()) {
- calculateAlphabet();
- populateAsFs();
- }
-
- void populateAsFs(void) {
- bool fs_same = true;
- bool fs_dead = true;
-
- as.resize(nfas.size());
- fs.resize(nfas.size());
- for (u32 i = 0; i < nfas.size(); i++) {
- as[i] = nfas[i]->start_anchored;
- fs[i] = nfas[i]->start_floating;
-
- if (fs[i]) {
- fs_dead = false;
- }
-
- if (as[i] != fs[i]) {
- fs_same = false;
- }
- }
-
- start_anchored = DEAD_STATE + 1;
- if (fs_same) {
- start_floating = start_anchored;
- } else if (fs_dead) {
- start_floating = DEAD_STATE;
- } else {
- start_floating = start_anchored + 1;
- }
- }
-
- void calculateAlphabet(void) {
- DEBUG_PRINTF("calculating alphabet\n");
- vector<CharReach> esets(1, CharReach::dot());
-
- for (const auto &haig : nfas) {
- DEBUG_PRINTF("...next dfa alphabet\n");
- assert(haig);
- const auto &alpha_remap = haig->alpha_remap;
-
- for (size_t i = 0; i < esets.size(); i++) {
- assert(esets[i].any());
- if (esets[i].count() == 1) {
- DEBUG_PRINTF("skipping singleton eq set\n");
- continue;
- }
-
- CharReach t;
- u8 leader_s = alpha_remap[esets[i].find_first()];
-
- DEBUG_PRINTF("checking eq set, leader %02hhx \n", leader_s);
-
- for (size_t s = esets[i].find_first();
- s != CharReach::npos; s = esets[i].find_next(s)) {
- if (alpha_remap[s] != leader_s) {
- t.set(s);
- }
- }
-
- if (t.any() && t != esets[i]) {
- esets[i] &= ~t;
- esets.push_back(t);
- }
- }
- }
-
- alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
- }
-
- void transition(const StateSet &in, StateSet *next) {
- u16 t[ALPHABET_SIZE];
-
- for (u32 i = 0; i < alphasize; i++) {
- next[i].resize(nfas.size());
- }
-
- for (u32 j = 0; j < nfas.size(); j++) {
- getFullTransitionFromState(*nfas[j], in[j], t);
- for (u32 i = 0; i < alphasize; i++) {
- next[i][j]= t[unalpha[i]];
- }
- }
- }
-
- const vector<StateSet> initial() {
- vector<StateSet> rv(1, as);
- if (start_floating != DEAD_STATE && start_floating != start_anchored) {
- rv.push_back(fs);
- }
- return rv;
- }
-
-private:
- void reports_i(const StateSet &in, flat_set<ReportID> dstate::*r_set,
- flat_set<ReportID> &r) {
- for (u32 i = 0; i < nfas.size(); i++) {
- const auto &rs = nfas[i]->states[in[i]].*r_set;
- insert(&r, rs);
- }
- }
-
-public:
- void reports(const StateSet &in, flat_set<ReportID> &rv) {
- reports_i(in, &dstate::reports, rv);
- }
- void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
- reports_i(in, &dstate::reports_eod, rv);
- }
-
- static bool canPrune(const flat_set<ReportID> &) { return false; }
-
-private:
- vector<const raw_som_dfa *> nfas;
- vector<dstate_id_t> as;
- vector<dstate_id_t> fs;
-public:
- array<u16, ALPHABET_SIZE> alpha;
- array<u16, ALPHABET_SIZE> unalpha;
- u16 alphasize;
- StateSet dead;
-
- u16 start_anchored;
- u16 start_floating;
-};
-}
-
-enum bslm_mode {
- ONLY_EXISTING,
- INCLUDE_INVALID
-};
-
-static
-bool is_any_start_inc_virtual(NFAVertex v, const NGHolder &g) {
- return is_virtual_start(v, g) || is_any_start(v, g);
-}
-
-static
+
+ explicit Automaton_Haig_Merge(const vector<const raw_som_dfa *> &in)
+ : nfas(in.begin(), in.end()), dead(in.size()) {
+ calculateAlphabet();
+ populateAsFs();
+ }
+
+ void populateAsFs(void) {
+ bool fs_same = true;
+ bool fs_dead = true;
+
+ as.resize(nfas.size());
+ fs.resize(nfas.size());
+ for (u32 i = 0; i < nfas.size(); i++) {
+ as[i] = nfas[i]->start_anchored;
+ fs[i] = nfas[i]->start_floating;
+
+ if (fs[i]) {
+ fs_dead = false;
+ }
+
+ if (as[i] != fs[i]) {
+ fs_same = false;
+ }
+ }
+
+ start_anchored = DEAD_STATE + 1;
+ if (fs_same) {
+ start_floating = start_anchored;
+ } else if (fs_dead) {
+ start_floating = DEAD_STATE;
+ } else {
+ start_floating = start_anchored + 1;
+ }
+ }
+
+ void calculateAlphabet(void) {
+ DEBUG_PRINTF("calculating alphabet\n");
+ vector<CharReach> esets(1, CharReach::dot());
+
+ for (const auto &haig : nfas) {
+ DEBUG_PRINTF("...next dfa alphabet\n");
+ assert(haig);
+ const auto &alpha_remap = haig->alpha_remap;
+
+ for (size_t i = 0; i < esets.size(); i++) {
+ assert(esets[i].any());
+ if (esets[i].count() == 1) {
+ DEBUG_PRINTF("skipping singleton eq set\n");
+ continue;
+ }
+
+ CharReach t;
+ u8 leader_s = alpha_remap[esets[i].find_first()];
+
+ DEBUG_PRINTF("checking eq set, leader %02hhx \n", leader_s);
+
+ for (size_t s = esets[i].find_first();
+ s != CharReach::npos; s = esets[i].find_next(s)) {
+ if (alpha_remap[s] != leader_s) {
+ t.set(s);
+ }
+ }
+
+ if (t.any() && t != esets[i]) {
+ esets[i] &= ~t;
+ esets.push_back(t);
+ }
+ }
+ }
+
+ alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
+ }
+
+ void transition(const StateSet &in, StateSet *next) {
+ u16 t[ALPHABET_SIZE];
+
+ for (u32 i = 0; i < alphasize; i++) {
+ next[i].resize(nfas.size());
+ }
+
+ for (u32 j = 0; j < nfas.size(); j++) {
+ getFullTransitionFromState(*nfas[j], in[j], t);
+ for (u32 i = 0; i < alphasize; i++) {
+ next[i][j]= t[unalpha[i]];
+ }
+ }
+ }
+
+ const vector<StateSet> initial() {
+ vector<StateSet> rv(1, as);
+ if (start_floating != DEAD_STATE && start_floating != start_anchored) {
+ rv.push_back(fs);
+ }
+ return rv;
+ }
+
+private:
+ void reports_i(const StateSet &in, flat_set<ReportID> dstate::*r_set,
+ flat_set<ReportID> &r) {
+ for (u32 i = 0; i < nfas.size(); i++) {
+ const auto &rs = nfas[i]->states[in[i]].*r_set;
+ insert(&r, rs);
+ }
+ }
+
+public:
+ void reports(const StateSet &in, flat_set<ReportID> &rv) {
+ reports_i(in, &dstate::reports, rv);
+ }
+ void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
+ reports_i(in, &dstate::reports_eod, rv);
+ }
+
+ static bool canPrune(const flat_set<ReportID> &) { return false; }
+
+private:
+ vector<const raw_som_dfa *> nfas;
+ vector<dstate_id_t> as;
+ vector<dstate_id_t> fs;
+public:
+ array<u16, ALPHABET_SIZE> alpha;
+ array<u16, ALPHABET_SIZE> unalpha;
+ u16 alphasize;
+ StateSet dead;
+
+ u16 start_anchored;
+ u16 start_floating;
+};
+}
+
+enum bslm_mode {
+ ONLY_EXISTING,
+ INCLUDE_INVALID
+};
+
+static
+bool is_any_start_inc_virtual(NFAVertex v, const NGHolder &g) {
+ return is_virtual_start(v, g) || is_any_start(v, g);
+}
+
+static
s32 getSlotID(const NGHolder &g, UNUSED const flat_set<NFAVertex> &unused,
- NFAVertex v) {
- if (is_triggered(g) && v == g.start) {
+ NFAVertex v) {
+ if (is_triggered(g) && v == g.start) {
assert(!contains(unused, v));
- } else if (is_any_start_inc_virtual(v, g)) {
- return CREATE_NEW_SOM;
- }
-
- return g[v].index;
-}
-
-template<typename stateset>
-static
-void haig_do_preds(const NGHolder &g, const stateset &nfa_states,
- const vector<NFAVertex> &state_mapping,
- som_tran_info &preds) {
- for (size_t i = nfa_states.find_first(); i != stateset::npos;
- i = nfa_states.find_next(i)) {
- NFAVertex v = state_mapping[i];
- s32 slot_id = g[v].index;
-
+ } else if (is_any_start_inc_virtual(v, g)) {
+ return CREATE_NEW_SOM;
+ }
+
+ return g[v].index;
+}
+
+template<typename stateset>
+static
+void haig_do_preds(const NGHolder &g, const stateset &nfa_states,
+ const vector<NFAVertex> &state_mapping,
+ som_tran_info &preds) {
+ for (size_t i = nfa_states.find_first(); i != stateset::npos;
+ i = nfa_states.find_next(i)) {
+ NFAVertex v = state_mapping[i];
+ s32 slot_id = g[v].index;
+
DEBUG_PRINTF("d vertex %zu\n", g[v].index);
- vector<u32> &out_map = preds[slot_id];
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- out_map.push_back(g[u].index);
- }
-
- sort(out_map.begin(), out_map.end());
- assert(!out_map.empty() || v == g.start);
- }
-}
-
-template<typename stateset>
-static
+ vector<u32> &out_map = preds[slot_id];
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ out_map.push_back(g[u].index);
+ }
+
+ sort(out_map.begin(), out_map.end());
+ assert(!out_map.empty() || v == g.start);
+ }
+}
+
+template<typename stateset>
+static
void haig_do_report(const NGHolder &g, const flat_set<NFAVertex> &unused,
- NFAVertex accept_v, const stateset &source_nfa_states,
- const vector<NFAVertex> &state_mapping,
- set<som_report> &out) {
- for (size_t i = source_nfa_states.find_first(); i != stateset::npos;
- i = source_nfa_states.find_next(i)) {
- NFAVertex v = state_mapping[i];
- if (!edge(v, accept_v, g).second) {
- continue;
- }
- for (ReportID report_id : g[v].reports) {
+ NFAVertex accept_v, const stateset &source_nfa_states,
+ const vector<NFAVertex> &state_mapping,
+ set<som_report> &out) {
+ for (size_t i = source_nfa_states.find_first(); i != stateset::npos;
+ i = source_nfa_states.find_next(i)) {
+ NFAVertex v = state_mapping[i];
+ if (!edge(v, accept_v, g).second) {
+ continue;
+ }
+ for (ReportID report_id : g[v].reports) {
out.insert(som_report(report_id, getSlotID(g, unused, v)));
- }
- }
-}
-
-static
-void haig_note_starts(const NGHolder &g, map<u32, u32> *out) {
- if (is_triggered(g)) {
- return;
- }
-
- DEBUG_PRINTF("seeing who creates new som values\n");
-
- vector<DepthMinMax> depths = getDistancesFromSOM(g);
-
- for (auto v : vertices_range(g)) {
- if (is_any_start_inc_virtual(v, g)) {
+ }
+ }
+}
+
+static
+void haig_note_starts(const NGHolder &g, map<u32, u32> *out) {
+ if (is_triggered(g)) {
+ return;
+ }
+
+ DEBUG_PRINTF("seeing who creates new som values\n");
+
+ vector<DepthMinMax> depths = getDistancesFromSOM(g);
+
+ for (auto v : vertices_range(g)) {
+ if (is_any_start_inc_virtual(v, g)) {
DEBUG_PRINTF("%zu creates new som value\n", g[v].index);
- out->emplace(g[v].index, 0U);
- continue;
- }
-
- if (is_any_accept(v, g)) {
- continue;
- }
-
- const DepthMinMax &d = depths[g[v].index];
- if (d.min == d.max && d.min.is_finite()) {
+ out->emplace(g[v].index, 0U);
+ continue;
+ }
+
+ if (is_any_accept(v, g)) {
+ continue;
+ }
+
+ const DepthMinMax &d = depths[g[v].index];
+ if (d.min == d.max && d.min.is_finite()) {
DEBUG_PRINTF("%zu is fixed at %u\n", g[v].index, (u32)d.min);
- out->emplace(g[v].index, d.min);
- }
- }
-}
-
-template<class Auto>
-static
+ out->emplace(g[v].index, d.min);
+ }
+ }
+}
+
+template<class Auto>
+static
bool doHaig(const NGHolder &g, som_type som,
const vector<vector<CharReach>> &triggers, bool unordered_som,
raw_som_dfa *rdfa) {
- u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from
- a fight */
+ u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from
+ a fight */
using StateSet = typename Auto::StateSet;
- vector<StateSet> nfa_state_map;
+ vector<StateSet> nfa_state_map;
Auto n(g, som, triggers, unordered_som);
- try {
+ try {
if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
- DEBUG_PRINTF("state limit exceeded\n");
- return false;
- }
- } catch (haig_too_wide &) {
- DEBUG_PRINTF("too many live som states\n");
- return false;
- }
-
- rdfa->start_anchored = n.start_anchored;
- rdfa->start_floating = n.start_floating;
- rdfa->alpha_size = n.alphasize;
- rdfa->alpha_remap = n.alpha;
-
- rdfa->state_som.reserve(rdfa->states.size());
- for (u32 i = 0; i < rdfa->states.size(); i++) {
- rdfa->state_som.push_back(dstate_som());
- const StateSet &source_states = nfa_state_map[i];
- if (source_states.count() > HAIG_MAX_LIVE_SOM_SLOTS) {
- DEBUG_PRINTF("too many live states\n");
- return false;
- }
-
- DEBUG_PRINTF("generating som info for %u\n", i);
-
- haig_do_preds(g, source_states, n.v_by_index,
- rdfa->state_som.back().preds);
-
+ DEBUG_PRINTF("state limit exceeded\n");
+ return false;
+ }
+ } catch (haig_too_wide &) {
+ DEBUG_PRINTF("too many live som states\n");
+ return false;
+ }
+
+ rdfa->start_anchored = n.start_anchored;
+ rdfa->start_floating = n.start_floating;
+ rdfa->alpha_size = n.alphasize;
+ rdfa->alpha_remap = n.alpha;
+
+ rdfa->state_som.reserve(rdfa->states.size());
+ for (u32 i = 0; i < rdfa->states.size(); i++) {
+ rdfa->state_som.push_back(dstate_som());
+ const StateSet &source_states = nfa_state_map[i];
+ if (source_states.count() > HAIG_MAX_LIVE_SOM_SLOTS) {
+ DEBUG_PRINTF("too many live states\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("generating som info for %u\n", i);
+
+ haig_do_preds(g, source_states, n.v_by_index,
+ rdfa->state_som.back().preds);
+
haig_do_report(g, n.unused, g.accept, source_states, n.v_by_index,
- rdfa->state_som.back().reports);
+ rdfa->state_som.back().reports);
haig_do_report(g, n.unused, g.acceptEod, source_states, n.v_by_index,
- rdfa->state_som.back().reports_eod);
- }
-
- haig_note_starts(g, &rdfa->new_som_nfa_states);
-
- return true;
-}
-
+ rdfa->state_som.back().reports_eod);
+ }
+
+ haig_note_starts(g, &rdfa->new_som_nfa_states);
+
+ return true;
+}
+
unique_ptr<raw_som_dfa>
attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision,
const vector<vector<CharReach>> &triggers, const Grey &grey,
bool unordered_som) {
- assert(is_triggered(g) != triggers.empty());
- assert(!unordered_som || is_triggered(g));
-
- if (!grey.allowGough) {
- /* must be at least one engine capable of handling raw som dfas */
- return nullptr;
- }
-
- DEBUG_PRINTF("attempting to build haig \n");
- assert(allMatchStatesHaveReports(g));
- assert(hasCorrectlyNumberedVertices(g));
-
- u32 numStates = num_vertices(g);
- if (numStates > HAIG_MAX_NFA_STATE) {
- DEBUG_PRINTF("giving up... looks too big\n");
- return nullptr;
- }
-
- auto rdfa = ue2::make_unique<raw_som_dfa>(g.kind, unordered_som, NODE_START,
- somPrecision);
-
- DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates);
- bool rv;
- if (numStates <= NFA_STATE_LIMIT) {
- /* fast path */
+ assert(is_triggered(g) != triggers.empty());
+ assert(!unordered_som || is_triggered(g));
+
+ if (!grey.allowGough) {
+ /* must be at least one engine capable of handling raw som dfas */
+ return nullptr;
+ }
+
+ DEBUG_PRINTF("attempting to build haig \n");
+ assert(allMatchStatesHaveReports(g));
+ assert(hasCorrectlyNumberedVertices(g));
+
+ u32 numStates = num_vertices(g);
+ if (numStates > HAIG_MAX_NFA_STATE) {
+ DEBUG_PRINTF("giving up... looks too big\n");
+ return nullptr;
+ }
+
+ auto rdfa = ue2::make_unique<raw_som_dfa>(g.kind, unordered_som, NODE_START,
+ somPrecision);
+
+ DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates);
+ bool rv;
+ if (numStates <= NFA_STATE_LIMIT) {
+ /* fast path */
rv = doHaig<Automaton_Graph>(g, som, triggers, unordered_som,
- rdfa.get());
- } else {
- /* not the fast path */
+ rdfa.get());
+ } else {
+ /* not the fast path */
rv = doHaig<Automaton_Big>(g, som, triggers, unordered_som, rdfa.get());
- }
-
- if (!rv) {
- return nullptr;
- }
-
- DEBUG_PRINTF("determinised, building impl dfa (a,f) = (%hu,%hu)\n",
- rdfa->start_anchored, rdfa->start_floating);
-
- assert(rdfa->kind == g.kind);
- return rdfa;
-}
-
-static
-void haig_merge_do_preds(const vector<const raw_som_dfa *> &dfas,
- const vector<u32> &per_dfa_adj,
- const vector<dstate_id_t> &source_nfa_states,
- som_tran_info &som_tran) {
- for (u32 d = 0; d < dfas.size(); ++d) {
- u32 adj = per_dfa_adj[d];
-
- const som_tran_info &som_tran_d
- = dfas[d]->state_som[source_nfa_states[d]].preds;
- for (som_tran_info::const_iterator it = som_tran_d.begin();
- it != som_tran_d.end(); ++it) {
- assert(it->first != CREATE_NEW_SOM);
- u32 dest_slot = it->first < N_SPECIALS ? it->first
- : it->first + adj;
- vector<u32> &out = som_tran[dest_slot];
-
- if (!out.empty()) {
- /* stylised specials already done; it does not matter who builds
- the preds */
- assert(dest_slot < N_SPECIALS);
- continue;
- }
- for (vector<u32>::const_iterator jt = it->second.begin();
- jt != it->second.end(); ++jt) {
- if (*jt < N_SPECIALS || *jt == CREATE_NEW_SOM) {
- out.push_back(*jt);
- } else {
- out.push_back(*jt + adj);
- }
- }
- }
- }
-}
-
-static
-void haig_merge_note_starts(const vector<const raw_som_dfa *> &dfas,
- const vector<u32> &per_dfa_adj,
- map<u32, u32> *out) {
- for (u32 d = 0; d < dfas.size(); ++d) {
- u32 adj = per_dfa_adj[d];
- const map<u32, u32> &new_soms = dfas[d]->new_som_nfa_states;
- for (map<u32, u32>::const_iterator it = new_soms.begin();
- it != new_soms.end(); ++it) {
- if (it->first < N_SPECIALS) {
- assert(!it->second);
- out->emplace(it->first, 0U);
- } else {
- assert(d + 1 >= per_dfa_adj.size()
- || it->first + adj < per_dfa_adj[d + 1]);
- out->emplace(it->first + adj, it->second);
- }
- }
- }
-}
-
-static never_inline
-void haig_merge_do_report(const vector<const raw_som_dfa *> &dfas,
- const vector<u32> &per_dfa_adj,
- const vector<dstate_id_t> &source_nfa_states,
- bool eod, set<som_report> &out) {
- for (u32 d = 0; d < dfas.size(); ++d) {
- u32 adj = per_dfa_adj[d];
-
- const set<som_report> &reps = eod
- ? dfas[d]->state_som[source_nfa_states[d]].reports_eod
- : dfas[d]->state_som[source_nfa_states[d]].reports;
- for (set<som_report>::const_iterator it = reps.begin();
- it != reps.end(); ++it) {
- u32 slot = it->slot;
- if (slot != CREATE_NEW_SOM && slot >= N_SPECIALS) {
- slot += adj;
- }
- out.insert(som_report(it->report, slot));
- }
- }
-}
-
-static
-u32 total_slots_used(const raw_som_dfa &rdfa) {
- u32 rv = 0;
- for (vector<dstate_som>::const_iterator it = rdfa.state_som.begin();
- it != rdfa.state_som.end(); ++it) {
- for (som_tran_info::const_iterator jt = it->preds.begin();
- jt != it->preds.end(); ++jt) {
- assert(jt->first != CREATE_NEW_SOM);
- ENSURE_AT_LEAST(&rv, jt->first + 1);
- }
- }
- const map<u32, u32> &new_soms = rdfa.new_som_nfa_states;
- for (map<u32, u32>::const_iterator it = new_soms.begin();
- it != new_soms.end(); ++it) {
- ENSURE_AT_LEAST(&rv, it->first + 1);
- }
- return rv;
-}
-
-unique_ptr<raw_som_dfa> attemptToMergeHaig(const vector<const raw_som_dfa *> &dfas,
- u32 limit) {
- assert(!dfas.empty());
-
- Automaton_Haig_Merge n(dfas);
-
- DEBUG_PRINTF("merging %zu dfas\n", dfas.size());
-
- bool unordered_som = false;
- for (const auto &haig : dfas) {
- assert(haig);
- assert(haig->kind == dfas.front()->kind);
- unordered_som |= haig->unordered_som_triggers;
- if (haig->states.size() > limit) {
- DEBUG_PRINTF("too many states!\n");
- return nullptr;
- }
- }
-
+ }
+
+ if (!rv) {
+ return nullptr;
+ }
+
+ DEBUG_PRINTF("determinised, building impl dfa (a,f) = (%hu,%hu)\n",
+ rdfa->start_anchored, rdfa->start_floating);
+
+ assert(rdfa->kind == g.kind);
+ return rdfa;
+}
+
+static
+void haig_merge_do_preds(const vector<const raw_som_dfa *> &dfas,
+ const vector<u32> &per_dfa_adj,
+ const vector<dstate_id_t> &source_nfa_states,
+ som_tran_info &som_tran) {
+ for (u32 d = 0; d < dfas.size(); ++d) {
+ u32 adj = per_dfa_adj[d];
+
+ const som_tran_info &som_tran_d
+ = dfas[d]->state_som[source_nfa_states[d]].preds;
+ for (som_tran_info::const_iterator it = som_tran_d.begin();
+ it != som_tran_d.end(); ++it) {
+ assert(it->first != CREATE_NEW_SOM);
+ u32 dest_slot = it->first < N_SPECIALS ? it->first
+ : it->first + adj;
+ vector<u32> &out = som_tran[dest_slot];
+
+ if (!out.empty()) {
+ /* stylised specials already done; it does not matter who builds
+ the preds */
+ assert(dest_slot < N_SPECIALS);
+ continue;
+ }
+ for (vector<u32>::const_iterator jt = it->second.begin();
+ jt != it->second.end(); ++jt) {
+ if (*jt < N_SPECIALS || *jt == CREATE_NEW_SOM) {
+ out.push_back(*jt);
+ } else {
+ out.push_back(*jt + adj);
+ }
+ }
+ }
+ }
+}
+
+static
+void haig_merge_note_starts(const vector<const raw_som_dfa *> &dfas,
+ const vector<u32> &per_dfa_adj,
+ map<u32, u32> *out) {
+ for (u32 d = 0; d < dfas.size(); ++d) {
+ u32 adj = per_dfa_adj[d];
+ const map<u32, u32> &new_soms = dfas[d]->new_som_nfa_states;
+ for (map<u32, u32>::const_iterator it = new_soms.begin();
+ it != new_soms.end(); ++it) {
+ if (it->first < N_SPECIALS) {
+ assert(!it->second);
+ out->emplace(it->first, 0U);
+ } else {
+ assert(d + 1 >= per_dfa_adj.size()
+ || it->first + adj < per_dfa_adj[d + 1]);
+ out->emplace(it->first + adj, it->second);
+ }
+ }
+ }
+}
+
+static never_inline
+void haig_merge_do_report(const vector<const raw_som_dfa *> &dfas,
+ const vector<u32> &per_dfa_adj,
+ const vector<dstate_id_t> &source_nfa_states,
+ bool eod, set<som_report> &out) {
+ for (u32 d = 0; d < dfas.size(); ++d) {
+ u32 adj = per_dfa_adj[d];
+
+ const set<som_report> &reps = eod
+ ? dfas[d]->state_som[source_nfa_states[d]].reports_eod
+ : dfas[d]->state_som[source_nfa_states[d]].reports;
+ for (set<som_report>::const_iterator it = reps.begin();
+ it != reps.end(); ++it) {
+ u32 slot = it->slot;
+ if (slot != CREATE_NEW_SOM && slot >= N_SPECIALS) {
+ slot += adj;
+ }
+ out.insert(som_report(it->report, slot));
+ }
+ }
+}
+
+static
+u32 total_slots_used(const raw_som_dfa &rdfa) {
+ u32 rv = 0;
+ for (vector<dstate_som>::const_iterator it = rdfa.state_som.begin();
+ it != rdfa.state_som.end(); ++it) {
+ for (som_tran_info::const_iterator jt = it->preds.begin();
+ jt != it->preds.end(); ++jt) {
+ assert(jt->first != CREATE_NEW_SOM);
+ ENSURE_AT_LEAST(&rv, jt->first + 1);
+ }
+ }
+ const map<u32, u32> &new_soms = rdfa.new_som_nfa_states;
+ for (map<u32, u32>::const_iterator it = new_soms.begin();
+ it != new_soms.end(); ++it) {
+ ENSURE_AT_LEAST(&rv, it->first + 1);
+ }
+ return rv;
+}
+
+unique_ptr<raw_som_dfa> attemptToMergeHaig(const vector<const raw_som_dfa *> &dfas,
+ u32 limit) {
+ assert(!dfas.empty());
+
+ Automaton_Haig_Merge n(dfas);
+
+ DEBUG_PRINTF("merging %zu dfas\n", dfas.size());
+
+ bool unordered_som = false;
+ for (const auto &haig : dfas) {
+ assert(haig);
+ assert(haig->kind == dfas.front()->kind);
+ unordered_som |= haig->unordered_som_triggers;
+ if (haig->states.size() > limit) {
+ DEBUG_PRINTF("too many states!\n");
+ return nullptr;
+ }
+ }
+
using StateSet = Automaton_Haig_Merge::StateSet;
- vector<StateSet> nfa_state_map;
- auto rdfa = ue2::make_unique<raw_som_dfa>(dfas[0]->kind, unordered_som,
- NODE_START,
- dfas[0]->stream_som_loc_width);
-
+ vector<StateSet> nfa_state_map;
+ auto rdfa = ue2::make_unique<raw_som_dfa>(dfas[0]->kind, unordered_som,
+ NODE_START,
+ dfas[0]->stream_som_loc_width);
+
if (!determinise(n, rdfa->states, limit, &nfa_state_map)) {
DEBUG_PRINTF("state limit (%u) exceeded\n", limit);
- return nullptr; /* over state limit */
- }
-
- rdfa->start_anchored = n.start_anchored;
- rdfa->start_floating = n.start_floating;
- rdfa->alpha_size = n.alphasize;
- rdfa->alpha_remap = n.alpha;
-
- vector<u32> per_dfa_adj;
- u32 curr_adj = 0;
- for (const auto &haig : dfas) {
- per_dfa_adj.push_back(curr_adj);
- curr_adj += total_slots_used(*haig);
- if (curr_adj < per_dfa_adj.back()) {
- /* overflowed our som slot count */
- return nullptr;
- }
- }
-
- rdfa->state_som.reserve(rdfa->states.size());
- for (u32 i = 0; i < rdfa->states.size(); i++) {
- rdfa->state_som.push_back(dstate_som());
- const vector<dstate_id_t> &source_nfa_states = nfa_state_map[i];
- DEBUG_PRINTF("finishing state %u\n", i);
-
- haig_merge_do_preds(dfas, per_dfa_adj, source_nfa_states,
- rdfa->state_som.back().preds);
-
- if (rdfa->state_som.back().preds.size() > HAIG_MAX_LIVE_SOM_SLOTS) {
- DEBUG_PRINTF("som slot limit exceeded (%zu)\n",
- rdfa->state_som.back().preds.size());
- return nullptr;
- }
-
- haig_merge_do_report(dfas, per_dfa_adj, source_nfa_states,
- false /* not eod */,
- rdfa->state_som.back().reports);
- haig_merge_do_report(dfas, per_dfa_adj, source_nfa_states,
- true /* eod */,
- rdfa->state_som.back().reports_eod);
- }
-
- haig_merge_note_starts(dfas, per_dfa_adj, &rdfa->new_som_nfa_states);
-
- DEBUG_PRINTF("merged, building impl dfa (a,f) = (%hu,%hu)\n",
- rdfa->start_anchored, rdfa->start_floating);
-
- return rdfa;
-}
-
-} // namespace ue2
+ return nullptr; /* over state limit */
+ }
+
+ rdfa->start_anchored = n.start_anchored;
+ rdfa->start_floating = n.start_floating;
+ rdfa->alpha_size = n.alphasize;
+ rdfa->alpha_remap = n.alpha;
+
+ vector<u32> per_dfa_adj;
+ u32 curr_adj = 0;
+ for (const auto &haig : dfas) {
+ per_dfa_adj.push_back(curr_adj);
+ curr_adj += total_slots_used(*haig);
+ if (curr_adj < per_dfa_adj.back()) {
+ /* overflowed our som slot count */
+ return nullptr;
+ }
+ }
+
+ rdfa->state_som.reserve(rdfa->states.size());
+ for (u32 i = 0; i < rdfa->states.size(); i++) {
+ rdfa->state_som.push_back(dstate_som());
+ const vector<dstate_id_t> &source_nfa_states = nfa_state_map[i];
+ DEBUG_PRINTF("finishing state %u\n", i);
+
+ haig_merge_do_preds(dfas, per_dfa_adj, source_nfa_states,
+ rdfa->state_som.back().preds);
+
+ if (rdfa->state_som.back().preds.size() > HAIG_MAX_LIVE_SOM_SLOTS) {
+ DEBUG_PRINTF("som slot limit exceeded (%zu)\n",
+ rdfa->state_som.back().preds.size());
+ return nullptr;
+ }
+
+ haig_merge_do_report(dfas, per_dfa_adj, source_nfa_states,
+ false /* not eod */,
+ rdfa->state_som.back().reports);
+ haig_merge_do_report(dfas, per_dfa_adj, source_nfa_states,
+ true /* eod */,
+ rdfa->state_som.back().reports_eod);
+ }
+
+ haig_merge_note_starts(dfas, per_dfa_adj, &rdfa->new_som_nfa_states);
+
+ DEBUG_PRINTF("merged, building impl dfa (a,f) = (%hu,%hu)\n",
+ rdfa->start_anchored, rdfa->start_floating);
+
+ return rdfa;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_haig.h b/contrib/libs/hyperscan/src/nfagraph/ng_haig.h
index 136c2a7ddf..baff2f5866 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_haig.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_haig.h
@@ -1,68 +1,68 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Build code for Haig SOM DFA.
- */
-
-#ifndef NG_HAIG_H
-#define NG_HAIG_H
-
-#include "ue2common.h"
-#include "som/som.h"
-
-#include <memory>
-#include <vector>
-
-namespace ue2 {
-
-class CharReach;
-class NGHolder;
-struct Grey;
-struct raw_som_dfa;
-
-#define HAIG_FINAL_DFA_STATE_LIMIT 16383
-#define HAIG_HARD_DFA_STATE_LIMIT 8192
-
-/* unordered_som_triggers being true indicates that a live haig may be subjected
- * to later tops arriving with earlier soms (without the haig going dead in
- * between)
- */
-
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Build code for Haig SOM DFA.
+ */
+
+#ifndef NG_HAIG_H
+#define NG_HAIG_H
+
+#include "ue2common.h"
+#include "som/som.h"
+
+#include <memory>
+#include <vector>
+
+namespace ue2 {
+
+class CharReach;
+class NGHolder;
+struct Grey;
+struct raw_som_dfa;
+
+#define HAIG_FINAL_DFA_STATE_LIMIT 16383
+#define HAIG_HARD_DFA_STATE_LIMIT 8192
+
+/* unordered_som_triggers being true indicates that a live haig may be subjected
+ * to later tops arriving with earlier soms (without the haig going dead in
+ * between)
+ */
+
std::unique_ptr<raw_som_dfa>
attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision,
const std::vector<std::vector<CharReach>> &triggers,
const Grey &grey, bool unordered_som_triggers = false);
-
-std::unique_ptr<raw_som_dfa>
-attemptToMergeHaig(const std::vector<const raw_som_dfa *> &dfas,
- u32 limit = HAIG_HARD_DFA_STATE_LIMIT);
-
-} // namespace ue2
-
-#endif
+
+std::unique_ptr<raw_som_dfa>
+attemptToMergeHaig(const std::vector<const raw_som_dfa *> &dfas,
+ u32 limit = HAIG_HARD_DFA_STATE_LIMIT);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_holder.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_holder.cpp
index cbe2aadc25..a2fbb28863 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_holder.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_holder.cpp
@@ -1,98 +1,98 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "ng_holder.h"
-
-#include "ng_util.h"
-#include "ue2common.h"
-
-using namespace std;
-
-namespace ue2 {
-
-// internal use only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ng_holder.h"
+
+#include "ng_util.h"
+#include "ue2common.h"
+
+using namespace std;
+
+namespace ue2 {
+
+// internal use only
static NFAVertex addSpecialVertex(NGHolder &g, SpecialNodes id) {
NFAVertex v(add_vertex(g));
- g[v].index = id;
- return v;
-}
-
-NGHolder::NGHolder(nfa_kind k)
+ g[v].index = id;
+ return v;
+}
+
+NGHolder::NGHolder(nfa_kind k)
: kind (k),
- // add initial special nodes
+ // add initial special nodes
start(addSpecialVertex(*this, NODE_START)),
startDs(addSpecialVertex(*this, NODE_START_DOTSTAR)),
accept(addSpecialVertex(*this, NODE_ACCEPT)),
acceptEod(addSpecialVertex(*this, NODE_ACCEPT_EOD)) {
-
- // wire up some fake edges for the stylized bits of the NFA
- add_edge(start, startDs, *this);
- add_edge(startDs, startDs, *this);
- add_edge(accept, acceptEod, *this);
-
+
+ // wire up some fake edges for the stylized bits of the NFA
+ add_edge(start, startDs, *this);
+ add_edge(startDs, startDs, *this);
+ add_edge(accept, acceptEod, *this);
+
(*this)[start].char_reach.setall();
(*this)[startDs].char_reach.setall();
-}
-
-NGHolder::~NGHolder(void) {
- DEBUG_PRINTF("destroying holder @ %p\n", this);
-}
-
-void clear_graph(NGHolder &h) {
+}
+
+NGHolder::~NGHolder(void) {
+ DEBUG_PRINTF("destroying holder @ %p\n", this);
+}
+
+void clear_graph(NGHolder &h) {
NGHolder::vertex_iterator vi, ve;
- for (tie(vi, ve) = vertices(h); vi != ve;) {
- NFAVertex v = *vi;
- ++vi;
-
- clear_vertex(v, h);
- if (!is_special(v, h)) {
- remove_vertex(v, h);
- }
- }
-
- assert(num_vertices(h) == N_SPECIALS);
+ for (tie(vi, ve) = vertices(h); vi != ve;) {
+ NFAVertex v = *vi;
+ ++vi;
+
+ clear_vertex(v, h);
+ if (!is_special(v, h)) {
+ remove_vertex(v, h);
+ }
+ }
+
+ assert(num_vertices(h) == N_SPECIALS);
renumber_vertices(h); /* ensure that we reset our next allocated index */
renumber_edges(h);
-
- // Recreate special stylised edges.
- add_edge(h.start, h.startDs, h);
- add_edge(h.startDs, h.startDs, h);
- add_edge(h.accept, h.acceptEod, h);
-}
-
-NFAVertex NGHolder::getSpecialVertex(u32 id) const {
- switch (id) {
+
+ // Recreate special stylised edges.
+ add_edge(h.start, h.startDs, h);
+ add_edge(h.startDs, h.startDs, h);
+ add_edge(h.accept, h.acceptEod, h);
+}
+
+NFAVertex NGHolder::getSpecialVertex(u32 id) const {
+ switch (id) {
case NODE_START: return start;
case NODE_START_DOTSTAR: return startDs;
case NODE_ACCEPT: return accept;
case NODE_ACCEPT_EOD: return acceptEod;
default: return null_vertex();
- }
-}
-
-}
+ }
+}
+
+}
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_holder.h b/contrib/libs/hyperscan/src/nfagraph/ng_holder.h
index 8edc534835..36cf62447b 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_holder.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_holder.h
@@ -1,31 +1,31 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/** \file
* \brief Definition of the NGHolder type used for to represent general nfa
* graphs as well as all associated types (vertex and edge properties, etc).
@@ -34,17 +34,17 @@
* accepts.
*/
-#ifndef NG_HOLDER_H
-#define NG_HOLDER_H
-
-#include "ue2common.h"
-#include "nfa/nfa_kind.h"
+#ifndef NG_HOLDER_H
+#define NG_HOLDER_H
+
+#include "ue2common.h"
+#include "nfa/nfa_kind.h"
#include "util/charreach.h"
#include "util/flat_containers.h"
#include "util/ue2_graph.h"
-
-namespace ue2 {
-
+
+namespace ue2 {
+
/** \brief Properties associated with each vertex in an NFAGraph. */
struct NFAGraphVertexProps {
/** \brief Set of characters on which this vertex is reachable. */
@@ -95,139 +95,139 @@ enum SpecialNodes {
N_SPECIALS
};
-/** \brief Encapsulates an NFAGraph, stores special vertices and other
- * metadata.
- *
- * When constructed, the graph will have the following stylised "special"
- * edges:
- *
- * - (start, startDs)
- * - (startDs, startDs) (self-loop)
- * - (accept, acceptEod)
- */
+/** \brief Encapsulates an NFAGraph, stores special vertices and other
+ * metadata.
+ *
+ * When constructed, the graph will have the following stylised "special"
+ * edges:
+ *
+ * - (start, startDs)
+ * - (startDs, startDs) (self-loop)
+ * - (accept, acceptEod)
+ */
class NGHolder : public ue2_graph<NGHolder, NFAGraphVertexProps,
NFAGraphEdgeProps> {
-public:
- explicit NGHolder(nfa_kind kind);
+public:
+ explicit NGHolder(nfa_kind kind);
NGHolder(void) : NGHolder(NFA_OUTFIX) {};
- virtual ~NGHolder(void);
-
+ virtual ~NGHolder(void);
+
nfa_kind kind; /* Role that this plays in Rose */
-
+
static const size_t N_SPECIAL_VERTICES = N_SPECIALS;
public:
const vertex_descriptor start; //!< Anchored start vertex.
const vertex_descriptor startDs; //!< Unanchored start-dotstar vertex.
const vertex_descriptor accept; //!< Accept vertex.
const vertex_descriptor acceptEod; //!< Accept at EOD vertex.
-
+
vertex_descriptor getSpecialVertex(u32 id) const;
};
-
+
typedef NGHolder::vertex_descriptor NFAVertex;
typedef NGHolder::edge_descriptor NFAEdge;
-
-/** \brief True if the vertex \p v is one of our special vertices. */
-template <typename GraphT>
+
+/** \brief True if the vertex \p v is one of our special vertices. */
+template <typename GraphT>
bool is_special(const typename GraphT::vertex_descriptor v, const GraphT &g) {
- return g[v].index < N_SPECIALS;
-}
-
-/**
- * \brief Clears all non-special vertices and edges from the graph.
- *
- * Note: not the same as the BGL's clear() function, which removes all vertices
- * and edges.
- */
-void clear_graph(NGHolder &h);
-
-/*
- * \brief Clear and remove all of the vertices pointed to by the given iterator
- * range.
- *
- * If renumber is false, no renumbering of vertex indices is done.
- *
- * Note: should not be called with iterators that will be invalidated by vertex
- * removal (such as NFAGraph::vertex_iterator).
- */
-template <class Iter>
-void remove_vertices(Iter begin, Iter end, NGHolder &h, bool renumber = true) {
- if (begin == end) {
- return;
- }
-
- for (Iter it = begin; it != end; ++it) {
- NFAVertex v = *it;
- if (!is_special(v, h)) {
- clear_vertex(v, h);
- remove_vertex(v, h);
- } else {
- assert(0);
- }
- }
-
- if (renumber) {
+ return g[v].index < N_SPECIALS;
+}
+
+/**
+ * \brief Clears all non-special vertices and edges from the graph.
+ *
+ * Note: not the same as the BGL's clear() function, which removes all vertices
+ * and edges.
+ */
+void clear_graph(NGHolder &h);
+
+/*
+ * \brief Clear and remove all of the vertices pointed to by the given iterator
+ * range.
+ *
+ * If renumber is false, no renumbering of vertex indices is done.
+ *
+ * Note: should not be called with iterators that will be invalidated by vertex
+ * removal (such as NFAGraph::vertex_iterator).
+ */
+template <class Iter>
+void remove_vertices(Iter begin, Iter end, NGHolder &h, bool renumber = true) {
+ if (begin == end) {
+ return;
+ }
+
+ for (Iter it = begin; it != end; ++it) {
+ NFAVertex v = *it;
+ if (!is_special(v, h)) {
+ clear_vertex(v, h);
+ remove_vertex(v, h);
+ } else {
+ assert(0);
+ }
+ }
+
+ if (renumber) {
renumber_edges(h);
renumber_vertices(h);
- }
-}
-
-/** \brief Clear and remove all of the vertices pointed to by the vertex
- * descriptors in the given container.
- *
- * This is a convenience wrapper around the iterator variant above.
- */
-template <class Container>
-void remove_vertices(const Container &c, NGHolder &h, bool renumber = true) {
- remove_vertices(c.begin(), c.end(), h, renumber);
-}
-
-/*
- * \brief Clear and remove all of the edges pointed to by the given iterator
- * range.
- *
- * If renumber is false, no renumbering of vertex indices is done.
- *
- * Note: should not be called with iterators that will be invalidated by vertex
- * removal (such as NFAGraph::edge_iterator).
- */
-template <class Iter>
-void remove_edges(Iter begin, Iter end, NGHolder &h, bool renumber = true) {
- if (begin == end) {
- return;
- }
-
- for (Iter it = begin; it != end; ++it) {
- const NFAEdge &e = *it;
- remove_edge(e, h);
- }
-
- if (renumber) {
+ }
+}
+
+/** \brief Clear and remove all of the vertices pointed to by the vertex
+ * descriptors in the given container.
+ *
+ * This is a convenience wrapper around the iterator variant above.
+ */
+template <class Container>
+void remove_vertices(const Container &c, NGHolder &h, bool renumber = true) {
+ remove_vertices(c.begin(), c.end(), h, renumber);
+}
+
+/*
+ * \brief Clear and remove all of the edges pointed to by the given iterator
+ * range.
+ *
+ * If renumber is false, no renumbering of vertex indices is done.
+ *
+ * Note: should not be called with iterators that will be invalidated by vertex
+ * removal (such as NFAGraph::edge_iterator).
+ */
+template <class Iter>
+void remove_edges(Iter begin, Iter end, NGHolder &h, bool renumber = true) {
+ if (begin == end) {
+ return;
+ }
+
+ for (Iter it = begin; it != end; ++it) {
+ const NFAEdge &e = *it;
+ remove_edge(e, h);
+ }
+
+ if (renumber) {
renumber_edges(h);
- }
-}
-
+ }
+}
+
#define DEFAULT_TOP 0U
-/** \brief Clear and remove all of the edges pointed to by the edge descriptors
- * in the given container.
- *
- * This is a convenience wrapper around the iterator variant above.
- */
-template <class Container>
-void remove_edges(const Container &c, NGHolder &h, bool renumber = true) {
- remove_edges(c.begin(), c.end(), h, renumber);
-}
-
+/** \brief Clear and remove all of the edges pointed to by the edge descriptors
+ * in the given container.
+ *
+ * This is a convenience wrapper around the iterator variant above.
+ */
+template <class Container>
+void remove_edges(const Container &c, NGHolder &h, bool renumber = true) {
+ remove_edges(c.begin(), c.end(), h, renumber);
+}
+
inline
-bool is_triggered(const NGHolder &g) {
- return is_triggered(g.kind);
-}
-
+bool is_triggered(const NGHolder &g) {
+ return is_triggered(g.kind);
+}
+
inline
-bool generates_callbacks(const NGHolder &g) {
- return generates_callbacks(g.kind);
-}
+bool generates_callbacks(const NGHolder &g) {
+ return generates_callbacks(g.kind);
+}
inline
bool has_managed_reports(const NGHolder &g) {
@@ -239,6 +239,6 @@ bool inspects_states_for_accepts(const NGHolder &g) {
return inspects_states_for_accepts(g.kind);
}
-} // namespace ue2
-
-#endif
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.cpp
index 3e013ad5cc..35a09d0ea2 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.cpp
@@ -1,78 +1,78 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Loose equality testing for NGHolder graphs.
- *
- * Loose equality check for holders' graph structure and vertex_index,
- * vertex_char_reach and (optionally reports).
- */
-#include "ng_is_equal.h"
-
-#include "grey.h"
-#include "ng_holder.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/container.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Loose equality testing for NGHolder graphs.
+ *
+ * Loose equality check for holders' graph structure and vertex_index,
+ * vertex_char_reach and (optionally reports).
+ */
+#include "ng_is_equal.h"
+
+#include "grey.h"
+#include "ng_holder.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/container.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
-#include "util/make_unique.h"
-
-using namespace std;
-
-namespace ue2 {
-
-namespace {
-struct check_report {
- virtual ~check_report() {}
- virtual bool operator()(const flat_set<ReportID> &reports_a,
- const flat_set<ReportID> &reports_b) const = 0;
-};
-
-struct full_check_report : public check_report {
- bool operator()(const flat_set<ReportID> &reports_a,
- const flat_set<ReportID> &reports_b) const override {
- return reports_a == reports_b;
- }
-};
-
-struct equiv_check_report : public check_report {
- equiv_check_report(ReportID a_in, ReportID b_in)
- : a_rep(a_in), b_rep(b_in) {}
-
- bool operator()(const flat_set<ReportID> &reports_a,
- const flat_set<ReportID> &reports_b) const override {
- return contains(reports_a, a_rep) == contains(reports_b, b_rep);
- }
-private:
- ReportID a_rep;
- ReportID b_rep;
-};
+#include "util/graph_range.h"
+#include "util/make_unique.h"
+
+using namespace std;
+
+namespace ue2 {
+
+namespace {
+struct check_report {
+ virtual ~check_report() {}
+ virtual bool operator()(const flat_set<ReportID> &reports_a,
+ const flat_set<ReportID> &reports_b) const = 0;
+};
+
+struct full_check_report : public check_report {
+ bool operator()(const flat_set<ReportID> &reports_a,
+ const flat_set<ReportID> &reports_b) const override {
+ return reports_a == reports_b;
+ }
+};
+
+struct equiv_check_report : public check_report {
+ equiv_check_report(ReportID a_in, ReportID b_in)
+ : a_rep(a_in), b_rep(b_in) {}
+
+ bool operator()(const flat_set<ReportID> &reports_a,
+ const flat_set<ReportID> &reports_b) const override {
+ return contains(reports_a, a_rep) == contains(reports_b, b_rep);
+ }
+private:
+ ReportID a_rep;
+ ReportID b_rep;
+};
/** Comparison functor used to sort by vertex_index. */
template<typename Graph>
@@ -91,141 +91,141 @@ template<typename Graph>
static
VertexIndexOrdering<Graph> make_index_ordering(const Graph &g) {
return VertexIndexOrdering<Graph>(g);
-}
-
}
-static
-bool is_equal_i(const NGHolder &a, const NGHolder &b,
- const check_report &check_rep) {
- assert(hasCorrectlyNumberedVertices(a));
- assert(hasCorrectlyNumberedVertices(b));
-
- size_t num_verts = num_vertices(a);
- if (num_verts != num_vertices(b)) {
- return false;
- }
-
- vector<NFAVertex> vert_a;
- vector<NFAVertex> vert_b;
- vector<NFAVertex> adj_a;
- vector<NFAVertex> adj_b;
-
- vert_a.reserve(num_verts);
- vert_b.reserve(num_verts);
- adj_a.reserve(num_verts);
- adj_b.reserve(num_verts);
-
- insert(&vert_a, vert_a.end(), vertices(a));
- insert(&vert_b, vert_b.end(), vertices(b));
-
- sort(vert_a.begin(), vert_a.end(), make_index_ordering(a));
- sort(vert_b.begin(), vert_b.end(), make_index_ordering(b));
-
- for (size_t i = 0; i < vert_a.size(); i++) {
- NFAVertex va = vert_a[i];
- NFAVertex vb = vert_b[i];
+}
+
+static
+bool is_equal_i(const NGHolder &a, const NGHolder &b,
+ const check_report &check_rep) {
+ assert(hasCorrectlyNumberedVertices(a));
+ assert(hasCorrectlyNumberedVertices(b));
+
+ size_t num_verts = num_vertices(a);
+ if (num_verts != num_vertices(b)) {
+ return false;
+ }
+
+ vector<NFAVertex> vert_a;
+ vector<NFAVertex> vert_b;
+ vector<NFAVertex> adj_a;
+ vector<NFAVertex> adj_b;
+
+ vert_a.reserve(num_verts);
+ vert_b.reserve(num_verts);
+ adj_a.reserve(num_verts);
+ adj_b.reserve(num_verts);
+
+ insert(&vert_a, vert_a.end(), vertices(a));
+ insert(&vert_b, vert_b.end(), vertices(b));
+
+ sort(vert_a.begin(), vert_a.end(), make_index_ordering(a));
+ sort(vert_b.begin(), vert_b.end(), make_index_ordering(b));
+
+ for (size_t i = 0; i < vert_a.size(); i++) {
+ NFAVertex va = vert_a[i];
+ NFAVertex vb = vert_b[i];
DEBUG_PRINTF("vertex %zu\n", a[va].index);
-
- // Vertex index must be the same.
- if (a[va].index != b[vb].index) {
- DEBUG_PRINTF("bad index\n");
- return false;
- }
-
- // Reach must be the same.
- if (a[va].char_reach != b[vb].char_reach) {
- DEBUG_PRINTF("bad reach\n");
- return false;
- }
-
- if (!check_rep(a[va].reports, b[vb].reports)) {
- DEBUG_PRINTF("bad reports\n");
- return false;
- }
-
- // Other vertex properties may vary.
-
- /* Check successors */
- adj_a.clear();
- adj_b.clear();
- insert(&adj_a, adj_a.end(), adjacent_vertices(va, a));
- insert(&adj_b, adj_b.end(), adjacent_vertices(vb, b));
-
- if (adj_a.size() != adj_b.size()) {
- DEBUG_PRINTF("bad adj\n");
- return false;
- }
-
- sort(adj_a.begin(), adj_a.end(), make_index_ordering(a));
- sort(adj_b.begin(), adj_b.end(), make_index_ordering(b));
-
- for (size_t j = 0; j < adj_a.size(); j++) {
- if (a[adj_a[j]].index != b[adj_b[j]].index) {
- DEBUG_PRINTF("bad adj\n");
- return false;
- }
- }
- }
-
- /* check top for edges out of start */
+
+ // Vertex index must be the same.
+ if (a[va].index != b[vb].index) {
+ DEBUG_PRINTF("bad index\n");
+ return false;
+ }
+
+ // Reach must be the same.
+ if (a[va].char_reach != b[vb].char_reach) {
+ DEBUG_PRINTF("bad reach\n");
+ return false;
+ }
+
+ if (!check_rep(a[va].reports, b[vb].reports)) {
+ DEBUG_PRINTF("bad reports\n");
+ return false;
+ }
+
+ // Other vertex properties may vary.
+
+ /* Check successors */
+ adj_a.clear();
+ adj_b.clear();
+ insert(&adj_a, adj_a.end(), adjacent_vertices(va, a));
+ insert(&adj_b, adj_b.end(), adjacent_vertices(vb, b));
+
+ if (adj_a.size() != adj_b.size()) {
+ DEBUG_PRINTF("bad adj\n");
+ return false;
+ }
+
+ sort(adj_a.begin(), adj_a.end(), make_index_ordering(a));
+ sort(adj_b.begin(), adj_b.end(), make_index_ordering(b));
+
+ for (size_t j = 0; j < adj_a.size(); j++) {
+ if (a[adj_a[j]].index != b[adj_b[j]].index) {
+ DEBUG_PRINTF("bad adj\n");
+ return false;
+ }
+ }
+ }
+
+ /* check top for edges out of start */
vector<pair<u32, flat_set<u32>>> top_a;
vector<pair<u32, flat_set<u32>>> top_b;
-
- for (const auto &e : out_edges_range(a.start, a)) {
+
+ for (const auto &e : out_edges_range(a.start, a)) {
top_a.emplace_back(a[target(e, a)].index, a[e].tops);
- }
- for (const auto &e : out_edges_range(b.start, b)) {
+ }
+ for (const auto &e : out_edges_range(b.start, b)) {
top_b.emplace_back(b[target(e, b)].index, b[e].tops);
- }
-
- sort(top_a.begin(), top_a.end());
- sort(top_b.begin(), top_b.end());
-
- if (top_a != top_b) {
- DEBUG_PRINTF("bad top\n");
- return false;
- }
-
- DEBUG_PRINTF("good\n");
- return true;
-}
-
-/** \brief loose hash of an NGHolder; equal if is_equal would return true. */
-u64a hash_holder(const NGHolder &g) {
- size_t rv = 0;
-
- for (auto v : vertices_range(g)) {
+ }
+
+ sort(top_a.begin(), top_a.end());
+ sort(top_b.begin(), top_b.end());
+
+ if (top_a != top_b) {
+ DEBUG_PRINTF("bad top\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("good\n");
+ return true;
+}
+
+/** \brief loose hash of an NGHolder; equal if is_equal would return true. */
+u64a hash_holder(const NGHolder &g) {
+ size_t rv = 0;
+
+ for (auto v : vertices_range(g)) {
hash_combine(rv, g[v].index);
hash_combine(rv, g[v].char_reach);
-
- for (auto w : adjacent_vertices_range(v, g)) {
+
+ for (auto w : adjacent_vertices_range(v, g)) {
hash_combine(rv, g[w].index);
- }
- }
-
- return rv;
-}
-
-bool is_equal(const NGHolder &a, const NGHolder &b) {
- DEBUG_PRINTF("testing %p %p\n", &a, &b);
-
- if (&a == &b) {
- return true;
- }
-
- return is_equal_i(a, b, full_check_report());
-}
-
-bool is_equal(const NGHolder &a, ReportID a_rep,
- const NGHolder &b, ReportID b_rep) {
- DEBUG_PRINTF("testing %p %p\n", &a, &b);
-
- if (&a == &b && a_rep == b_rep) {
- return true;
- }
-
- return is_equal_i(a, b, equiv_check_report(a_rep, b_rep));
-}
-
-} // namespace ue2
+ }
+ }
+
+ return rv;
+}
+
+bool is_equal(const NGHolder &a, const NGHolder &b) {
+ DEBUG_PRINTF("testing %p %p\n", &a, &b);
+
+ if (&a == &b) {
+ return true;
+ }
+
+ return is_equal_i(a, b, full_check_report());
+}
+
+bool is_equal(const NGHolder &a, ReportID a_rep,
+ const NGHolder &b, ReportID b_rep) {
+ DEBUG_PRINTF("testing %p %p\n", &a, &b);
+
+ if (&a == &b && a_rep == b_rep) {
+ return true;
+ }
+
+ return is_equal_i(a, b, equiv_check_report(a_rep, b_rep));
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.h b/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.h
index baddc494b0..d8046270ff 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_is_equal.h
@@ -1,54 +1,54 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Loose equality testing for NGHolder graphs.
- *
- * Loose equality check for holders' graph structure and vertex_index,
- * vertex_char_reach and (optionally reports).
- */
-
-#ifndef NG_IS_EQUAL_H
-#define NG_IS_EQUAL_H
-
-#include "ue2common.h"
-
-#include <memory>
-
-namespace ue2 {
-
-class NGHolder;
-
-bool is_equal(const NGHolder &a, const NGHolder &b);
-bool is_equal(const NGHolder &a, ReportID a_r, const NGHolder &b, ReportID b_r);
-
-u64a hash_holder(const NGHolder &g);
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Loose equality testing for NGHolder graphs.
+ *
+ * Loose equality check for holders' graph structure and vertex_index,
+ * vertex_char_reach and (optionally reports).
+ */
+
+#ifndef NG_IS_EQUAL_H
+#define NG_IS_EQUAL_H
+
+#include "ue2common.h"
+
+#include <memory>
+
+namespace ue2 {
+
+class NGHolder;
+
+bool is_equal(const NGHolder &a, const NGHolder &b);
+bool is_equal(const NGHolder &a, ReportID a_r, const NGHolder &b, ReportID b_r);
+
+u64a hash_holder(const NGHolder &g);
+
// Util Functors
struct NGHolderHasher {
size_t operator()(const std::shared_ptr<const NGHolder> &h) const {
@@ -59,13 +59,13 @@ struct NGHolderHasher {
}
};
-struct NGHolderEqual {
- bool operator()(const std::shared_ptr<const NGHolder> &a,
- const std::shared_ptr<const NGHolder> &b) const {
- return is_equal(*a, *b);
- }
-};
-
-} // namespace ue2
-
-#endif // NG_IS_EQUAL_H
+struct NGHolderEqual {
+ bool operator()(const std::shared_ptr<const NGHolder> &a,
+ const std::shared_ptr<const NGHolder> &b) const {
+ return is_equal(*a, *b);
+ }
+};
+
+} // namespace ue2
+
+#endif // NG_IS_EQUAL_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp
index e6526a2414..d8ba503ce6 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp
@@ -1,349 +1,349 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Large Bounded Repeat (LBR) engine build code.
- */
-
-#include "ng_lbr.h"
-
-#include "grey.h"
-#include "ng_holder.h"
-#include "ng_repeat.h"
-#include "ng_reports.h"
+ * \brief Large Bounded Repeat (LBR) engine build code.
+ */
+
+#include "ng_lbr.h"
+
+#include "grey.h"
+#include "ng_holder.h"
+#include "ng_repeat.h"
+#include "ng_reports.h"
#include "nfa/castlecompile.h"
-#include "nfa/lbr_internal.h"
-#include "nfa/nfa_internal.h"
-#include "nfa/repeatcompile.h"
+#include "nfa/lbr_internal.h"
+#include "nfa/nfa_internal.h"
+#include "nfa/repeatcompile.h"
#include "nfa/shufticompile.h"
#include "nfa/trufflecompile.h"
-#include "util/alloc.h"
-#include "util/bitutils.h" // for lg2
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/depth.h"
-#include "util/dump_charclass.h"
+#include "util/alloc.h"
+#include "util/bitutils.h" // for lg2
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/depth.h"
+#include "util/dump_charclass.h"
#include "util/report_manager.h"
-#include "util/verify_types.h"
-
-using namespace std;
-
-namespace ue2 {
-
-static
-u32 depth_to_u32(const depth &d) {
- assert(d.is_reachable());
- if (d.is_infinite()) {
- return REPEAT_INF;
- }
-
- u32 d_val = d;
- assert(d_val < REPEAT_INF);
- return d_val;
-}
-
-template<class LbrStruct> static
-u64a* getTable(NFA *nfa) {
- char *ptr = (char *)nfa + sizeof(struct NFA) + sizeof(LbrStruct) +
- sizeof(RepeatInfo);
- ptr = ROUNDUP_PTR(ptr, alignof(u64a));
- return (u64a *)ptr;
-}
-
-template <class LbrStruct> static
-void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin,
- const depth &repeatMax, u32 minPeriod, enum RepeatType rtype) {
- assert(nfa);
-
- RepeatStateInfo rsi(rtype, repeatMin, repeatMax, minPeriod);
-
- DEBUG_PRINTF("selected %s model for {%s,%s} repeat\n",
- repeatTypeName(rtype), repeatMin.str().c_str(),
- repeatMax.str().c_str());
-
- // Fill the lbr_common structure first. Note that the RepeatInfo structure
- // directly follows the LbrStruct.
- const u32 info_offset = sizeof(LbrStruct);
- c->repeatInfoOffset = info_offset;
- c->report = report;
-
- RepeatInfo *info = (RepeatInfo *)((char *)c + info_offset);
- info->type = verify_u8(rtype);
- info->repeatMin = depth_to_u32(repeatMin);
- info->repeatMax = depth_to_u32(repeatMax);
- info->stateSize = rsi.stateSize;
- info->packedCtrlSize = rsi.packedCtrlSize;
- info->horizon = rsi.horizon;
- info->minPeriod = minPeriod;
- copy_bytes(&info->packedFieldSizes, rsi.packedFieldSizes);
- info->patchCount = rsi.patchCount;
- info->patchSize = rsi.patchSize;
- info->encodingSize = rsi.encodingSize;
- info->patchesOffset = rsi.patchesOffset;
-
- // Fill the NFA structure.
- nfa->nPositions = repeatMin;
- nfa->streamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize);
- nfa->scratchStateSize = (u32)sizeof(lbr_state);
- nfa->minWidth = verify_u32(repeatMin);
- nfa->maxWidth = repeatMax.is_finite() ? verify_u32(repeatMax) : 0;
-
- // Fill the lbr table for sparse lbr model.
- if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
- u64a *table = getTable<LbrStruct>(nfa);
- // Adjust table length according to the optimal patch length.
- size_t len = nfa->length;
- assert((u32)repeatMax >= rsi.patchSize);
- len -= sizeof(u64a) * ((u32)repeatMax - rsi.patchSize);
- nfa->length = verify_u32(len);
- info->length = verify_u32(sizeof(RepeatInfo)
- + sizeof(u64a) * (rsi.patchSize + 1));
- copy_bytes(table, rsi.table);
- }
-}
-
-template <class LbrStruct> static
+#include "util/verify_types.h"
+
+using namespace std;
+
+namespace ue2 {
+
+static
+u32 depth_to_u32(const depth &d) {
+ assert(d.is_reachable());
+ if (d.is_infinite()) {
+ return REPEAT_INF;
+ }
+
+ u32 d_val = d;
+ assert(d_val < REPEAT_INF);
+ return d_val;
+}
+
+template<class LbrStruct> static
+u64a* getTable(NFA *nfa) {
+ char *ptr = (char *)nfa + sizeof(struct NFA) + sizeof(LbrStruct) +
+ sizeof(RepeatInfo);
+ ptr = ROUNDUP_PTR(ptr, alignof(u64a));
+ return (u64a *)ptr;
+}
+
+template <class LbrStruct> static
+void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin,
+ const depth &repeatMax, u32 minPeriod, enum RepeatType rtype) {
+ assert(nfa);
+
+ RepeatStateInfo rsi(rtype, repeatMin, repeatMax, minPeriod);
+
+ DEBUG_PRINTF("selected %s model for {%s,%s} repeat\n",
+ repeatTypeName(rtype), repeatMin.str().c_str(),
+ repeatMax.str().c_str());
+
+ // Fill the lbr_common structure first. Note that the RepeatInfo structure
+ // directly follows the LbrStruct.
+ const u32 info_offset = sizeof(LbrStruct);
+ c->repeatInfoOffset = info_offset;
+ c->report = report;
+
+ RepeatInfo *info = (RepeatInfo *)((char *)c + info_offset);
+ info->type = verify_u8(rtype);
+ info->repeatMin = depth_to_u32(repeatMin);
+ info->repeatMax = depth_to_u32(repeatMax);
+ info->stateSize = rsi.stateSize;
+ info->packedCtrlSize = rsi.packedCtrlSize;
+ info->horizon = rsi.horizon;
+ info->minPeriod = minPeriod;
+ copy_bytes(&info->packedFieldSizes, rsi.packedFieldSizes);
+ info->patchCount = rsi.patchCount;
+ info->patchSize = rsi.patchSize;
+ info->encodingSize = rsi.encodingSize;
+ info->patchesOffset = rsi.patchesOffset;
+
+ // Fill the NFA structure.
+ nfa->nPositions = repeatMin;
+ nfa->streamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize);
+ nfa->scratchStateSize = (u32)sizeof(lbr_state);
+ nfa->minWidth = verify_u32(repeatMin);
+ nfa->maxWidth = repeatMax.is_finite() ? verify_u32(repeatMax) : 0;
+
+ // Fill the lbr table for sparse lbr model.
+ if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
+ u64a *table = getTable<LbrStruct>(nfa);
+ // Adjust table length according to the optimal patch length.
+ size_t len = nfa->length;
+ assert((u32)repeatMax >= rsi.patchSize);
+ len -= sizeof(u64a) * ((u32)repeatMax - rsi.patchSize);
+ nfa->length = verify_u32(len);
+ info->length = verify_u32(sizeof(RepeatInfo)
+ + sizeof(u64a) * (rsi.patchSize + 1));
+ copy_bytes(table, rsi.table);
+ }
+}
+
+template <class LbrStruct> static
bytecode_ptr<NFA> makeLbrNfa(NFAEngineType nfa_type, enum RepeatType rtype,
const depth &repeatMax) {
- size_t tableLen = 0;
- if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
- tableLen = sizeof(u64a) * (repeatMax + 1);
- }
- size_t len = sizeof(NFA) + sizeof(LbrStruct) + sizeof(RepeatInfo) +
- tableLen + sizeof(u64a);
+ size_t tableLen = 0;
+ if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
+ tableLen = sizeof(u64a) * (repeatMax + 1);
+ }
+ size_t len = sizeof(NFA) + sizeof(LbrStruct) + sizeof(RepeatInfo) +
+ tableLen + sizeof(u64a);
auto nfa = make_zeroed_bytecode_ptr<NFA>(len);
- nfa->type = verify_u8(nfa_type);
- nfa->length = verify_u32(len);
- return nfa;
-}
-
-static
+ nfa->type = verify_u8(nfa_type);
+ nfa->length = verify_u32(len);
+ return nfa;
+}
+
+static
bytecode_ptr<NFA> buildLbrDot(const CharReach &cr, const depth &repeatMin,
const depth &repeatMax, u32 minPeriod,
bool is_reset, ReportID report) {
- if (!cr.all()) {
- return nullptr;
- }
-
- enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
- is_reset);
+ if (!cr.all()) {
+ return nullptr;
+ }
+
+ enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
+ is_reset);
auto nfa = makeLbrNfa<lbr_dot>(LBR_NFA_DOT, rtype, repeatMax);
- struct lbr_dot *ld = (struct lbr_dot *)getMutableImplNfa(nfa.get());
-
- fillNfa<lbr_dot>(nfa.get(), &ld->common, report, repeatMin, repeatMax,
- minPeriod, rtype);
-
- DEBUG_PRINTF("built dot lbr\n");
- return nfa;
-}
-
-static
+ struct lbr_dot *ld = (struct lbr_dot *)getMutableImplNfa(nfa.get());
+
+ fillNfa<lbr_dot>(nfa.get(), &ld->common, report, repeatMin, repeatMax,
+ minPeriod, rtype);
+
+ DEBUG_PRINTF("built dot lbr\n");
+ return nfa;
+}
+
+static
bytecode_ptr<NFA> buildLbrVerm(const CharReach &cr, const depth &repeatMin,
const depth &repeatMax, u32 minPeriod,
bool is_reset, ReportID report) {
- const CharReach escapes(~cr);
-
- if (escapes.count() != 1) {
- return nullptr;
- }
-
- enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
- is_reset);
+ const CharReach escapes(~cr);
+
+ if (escapes.count() != 1) {
+ return nullptr;
+ }
+
+ enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
+ is_reset);
auto nfa = makeLbrNfa<lbr_verm>(LBR_NFA_VERM, rtype, repeatMax);
- struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get());
- lv->c = escapes.find_first();
-
- fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax,
- minPeriod, rtype);
-
- DEBUG_PRINTF("built verm lbr\n");
- return nfa;
-}
-
-static
+ struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get());
+ lv->c = escapes.find_first();
+
+ fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax,
+ minPeriod, rtype);
+
+ DEBUG_PRINTF("built verm lbr\n");
+ return nfa;
+}
+
+static
bytecode_ptr<NFA> buildLbrNVerm(const CharReach &cr, const depth &repeatMin,
const depth &repeatMax, u32 minPeriod,
bool is_reset, ReportID report) {
- const CharReach escapes(cr);
-
- if (escapes.count() != 1) {
- return nullptr;
- }
-
- enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
- is_reset);
+ const CharReach escapes(cr);
+
+ if (escapes.count() != 1) {
+ return nullptr;
+ }
+
+ enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
+ is_reset);
auto nfa = makeLbrNfa<lbr_verm>(LBR_NFA_NVERM, rtype, repeatMax);
- struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get());
- lv->c = escapes.find_first();
-
- fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax,
- minPeriod, rtype);
-
- DEBUG_PRINTF("built negated verm lbr\n");
- return nfa;
-}
-
-static
+ struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get());
+ lv->c = escapes.find_first();
+
+ fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax,
+ minPeriod, rtype);
+
+ DEBUG_PRINTF("built negated verm lbr\n");
+ return nfa;
+}
+
+static
bytecode_ptr<NFA> buildLbrShuf(const CharReach &cr, const depth &repeatMin,
const depth &repeatMax, u32 minPeriod,
bool is_reset, ReportID report) {
- enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
- is_reset);
+ enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
+ is_reset);
auto nfa = makeLbrNfa<lbr_shuf>(LBR_NFA_SHUF, rtype, repeatMax);
- struct lbr_shuf *ls = (struct lbr_shuf *)getMutableImplNfa(nfa.get());
-
- fillNfa<lbr_shuf>(nfa.get(), &ls->common, report, repeatMin, repeatMax,
- minPeriod, rtype);
-
+ struct lbr_shuf *ls = (struct lbr_shuf *)getMutableImplNfa(nfa.get());
+
+ fillNfa<lbr_shuf>(nfa.get(), &ls->common, report, repeatMin, repeatMax,
+ minPeriod, rtype);
+
if (shuftiBuildMasks(~cr, (u8 *)&ls->mask_lo, (u8 *)&ls->mask_hi) == -1) {
- return nullptr;
- }
-
- DEBUG_PRINTF("built shuf lbr\n");
- return nfa;
-}
-
-static
+ return nullptr;
+ }
+
+ DEBUG_PRINTF("built shuf lbr\n");
+ return nfa;
+}
+
+static
bytecode_ptr<NFA> buildLbrTruf(const CharReach &cr, const depth &repeatMin,
const depth &repeatMax, u32 minPeriod,
bool is_reset, ReportID report) {
- enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
- is_reset);
+ enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
+ is_reset);
auto nfa = makeLbrNfa<lbr_truf>(LBR_NFA_TRUF, rtype, repeatMax);
- struct lbr_truf *lc = (struct lbr_truf *)getMutableImplNfa(nfa.get());
-
- fillNfa<lbr_truf>(nfa.get(), &lc->common, report, repeatMin, repeatMax,
- minPeriod, rtype);
-
+ struct lbr_truf *lc = (struct lbr_truf *)getMutableImplNfa(nfa.get());
+
+ fillNfa<lbr_truf>(nfa.get(), &lc->common, report, repeatMin, repeatMax,
+ minPeriod, rtype);
+
truffleBuildMasks(~cr, (u8 *)&lc->mask1, (u8 *)&lc->mask2);
-
- DEBUG_PRINTF("built truffle lbr\n");
- return nfa;
-}
-
-static
+
+ DEBUG_PRINTF("built truffle lbr\n");
+ return nfa;
+}
+
+static
bytecode_ptr<NFA> constructLBR(const CharReach &cr, const depth &repeatMin,
const depth &repeatMax, u32 minPeriod,
bool is_reset, ReportID report) {
- DEBUG_PRINTF("bounds={%s,%s}, cr=%s (count %zu), report=%u\n",
- repeatMin.str().c_str(), repeatMax.str().c_str(),
- describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count(),
- report);
- assert(repeatMin <= repeatMax);
- assert(repeatMax.is_reachable());
-
+ DEBUG_PRINTF("bounds={%s,%s}, cr=%s (count %zu), report=%u\n",
+ repeatMin.str().c_str(), repeatMax.str().c_str(),
+ describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count(),
+ report);
+ assert(repeatMin <= repeatMax);
+ assert(repeatMax.is_reachable());
+
auto nfa =
buildLbrDot(cr, repeatMin, repeatMax, minPeriod, is_reset, report);
-
- if (!nfa) {
- nfa = buildLbrVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
- report);
- }
- if (!nfa) {
- nfa = buildLbrNVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
- report);
- }
- if (!nfa) {
- nfa = buildLbrShuf(cr, repeatMin, repeatMax, minPeriod, is_reset,
- report);
- }
- if (!nfa) {
- nfa = buildLbrTruf(cr, repeatMin, repeatMax, minPeriod, is_reset,
- report);
- }
-
- if (!nfa) {
- assert(0);
- return nullptr;
- }
-
- return nfa;
-}
-
+
+ if (!nfa) {
+ nfa = buildLbrVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
+ report);
+ }
+ if (!nfa) {
+ nfa = buildLbrNVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
+ report);
+ }
+ if (!nfa) {
+ nfa = buildLbrShuf(cr, repeatMin, repeatMax, minPeriod, is_reset,
+ report);
+ }
+ if (!nfa) {
+ nfa = buildLbrTruf(cr, repeatMin, repeatMax, minPeriod, is_reset,
+ report);
+ }
+
+ if (!nfa) {
+ assert(0);
+ return nullptr;
+ }
+
+ return nfa;
+}
+
bytecode_ptr<NFA> constructLBR(const CastleProto &proto,
const vector<vector<CharReach>> &triggers,
const CompileContext &cc,
const ReportManager &rm) {
- if (!cc.grey.allowLbr) {
- return nullptr;
- }
-
+ if (!cc.grey.allowLbr) {
+ return nullptr;
+ }
+
if (proto.repeats.size() != 1) {
return nullptr;
}
const PureRepeat &repeat = proto.repeats.begin()->second;
- assert(!repeat.reach.none());
-
- if (repeat.reports.size() != 1) {
- DEBUG_PRINTF("too many reports\n");
- return nullptr;
- }
-
- bool is_reset;
- u32 min_period = minPeriod(triggers, repeat.reach, &is_reset);
-
- if (depth(min_period) > repeat.bounds.max) {
- DEBUG_PRINTF("trigger is longer than repeat; only need one offset\n");
- is_reset = true;
- }
-
- ReportID report = *repeat.reports.begin();
+ assert(!repeat.reach.none());
+
+ if (repeat.reports.size() != 1) {
+ DEBUG_PRINTF("too many reports\n");
+ return nullptr;
+ }
+
+ bool is_reset;
+ u32 min_period = minPeriod(triggers, repeat.reach, &is_reset);
+
+ if (depth(min_period) > repeat.bounds.max) {
+ DEBUG_PRINTF("trigger is longer than repeat; only need one offset\n");
+ is_reset = true;
+ }
+
+ ReportID report = *repeat.reports.begin();
if (has_managed_reports(proto.kind)) {
report = rm.getProgramOffset(report);
}
-
- DEBUG_PRINTF("building LBR %s\n", repeat.bounds.str().c_str());
- return constructLBR(repeat.reach, repeat.bounds.min, repeat.bounds.max,
- min_period, is_reset, report);
-}
-
-/** \brief Construct an LBR engine from the given graph \p g. */
+
+ DEBUG_PRINTF("building LBR %s\n", repeat.bounds.str().c_str());
+ return constructLBR(repeat.reach, repeat.bounds.min, repeat.bounds.max,
+ min_period, is_reset, report);
+}
+
+/** \brief Construct an LBR engine from the given graph \p g. */
bytecode_ptr<NFA> constructLBR(const NGHolder &g,
const vector<vector<CharReach>> &triggers,
const CompileContext &cc,
const ReportManager &rm) {
- if (!cc.grey.allowLbr) {
- return nullptr;
- }
-
- PureRepeat repeat;
- if (!isPureRepeat(g, repeat)) {
- return nullptr;
- }
- if (repeat.reports.size() != 1) {
- DEBUG_PRINTF("too many reports\n");
+ if (!cc.grey.allowLbr) {
+ return nullptr;
+ }
+
+ PureRepeat repeat;
+ if (!isPureRepeat(g, repeat)) {
return nullptr;
- }
-
+ }
+ if (repeat.reports.size() != 1) {
+ DEBUG_PRINTF("too many reports\n");
+ return nullptr;
+ }
+
CastleProto proto(g.kind, repeat);
return constructLBR(proto, triggers, cc, rm);
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_lbr.h b/contrib/libs/hyperscan/src/nfagraph/ng_lbr.h
index 55a77fcd1e..c181dbb9e7 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_lbr.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_lbr.h
@@ -1,71 +1,71 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Large Bounded Repeat (LBR) engine build code.
- */
-
-#ifndef NG_LBR_H
-#define NG_LBR_H
-
-#include "ue2common.h"
+ * \brief Large Bounded Repeat (LBR) engine build code.
+ */
+
+#ifndef NG_LBR_H
+#define NG_LBR_H
+
+#include "ue2common.h"
#include "util/bytecode_ptr.h"
-
-#include <memory>
-#include <vector>
-
-struct NFA;
-
-namespace ue2 {
-
-class CharReach;
-class NGHolder;
-class ReportManager;
+
+#include <memory>
+#include <vector>
+
+struct NFA;
+
+namespace ue2 {
+
+class CharReach;
+class NGHolder;
+class ReportManager;
struct CastleProto;
-struct CompileContext;
-struct Grey;
-
-/** \brief Construct an LBR engine from the given graph \p g. */
+struct CompileContext;
+struct Grey;
+
+/** \brief Construct an LBR engine from the given graph \p g. */
bytecode_ptr<NFA>
-constructLBR(const NGHolder &g,
- const std::vector<std::vector<CharReach>> &triggers,
+constructLBR(const NGHolder &g,
+ const std::vector<std::vector<CharReach>> &triggers,
const CompileContext &cc, const ReportManager &rm);
-
+
/**
* \brief Construct an LBR engine from the given CastleProto, which should
* contain only one repeat.
*/
bytecode_ptr<NFA>
constructLBR(const CastleProto &proto,
- const std::vector<std::vector<CharReach>> &triggers,
+ const std::vector<std::vector<CharReach>> &triggers,
const CompileContext &cc, const ReportManager &rm);
-
-} // namespace ue2
-
-#endif // NG_LBR_H
+
+} // namespace ue2
+
+#endif // NG_LBR_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_limex.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_limex.cpp
index 0f939f122f..2f0a55eab9 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_limex.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_limex.cpp
@@ -1,198 +1,198 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Limex NFA construction code.
- */
-
-#include "ng_limex.h"
-
-#include "grey.h"
-#include "ng_equivalence.h"
-#include "ng_holder.h"
-#include "ng_misc_opt.h"
-#include "ng_prune.h"
-#include "ng_redundancy.h"
-#include "ng_repeat.h"
-#include "ng_reports.h"
-#include "ng_restructuring.h"
-#include "ng_squash.h"
-#include "ng_util.h"
-#include "ng_width.h"
-#include "ue2common.h"
-#include "nfa/limex_compile.h"
-#include "nfa/limex_limits.h"
-#include "nfa/nfa_internal.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/graph_range.h"
+ * \brief Limex NFA construction code.
+ */
+
+#include "ng_limex.h"
+
+#include "grey.h"
+#include "ng_equivalence.h"
+#include "ng_holder.h"
+#include "ng_misc_opt.h"
+#include "ng_prune.h"
+#include "ng_redundancy.h"
+#include "ng_repeat.h"
+#include "ng_reports.h"
+#include "ng_restructuring.h"
+#include "ng_squash.h"
+#include "ng_util.h"
+#include "ng_width.h"
+#include "ue2common.h"
+#include "nfa/limex_compile.h"
+#include "nfa/limex_limits.h"
+#include "nfa/nfa_internal.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/graph_range.h"
#include "util/report_manager.h"
#include "util/flat_containers.h"
-#include "util/verify_types.h"
-
+#include "util/verify_types.h"
+
#include <algorithm>
-#include <map>
+#include <map>
#include <unordered_map>
#include <unordered_set>
-#include <vector>
-
+#include <vector>
+
#include <boost/range/adaptor/map.hpp>
-using namespace std;
+using namespace std;
using boost::adaptors::map_values;
using boost::adaptors::map_keys;
-
-namespace ue2 {
-
-#ifndef NDEBUG
-// Some sanity checking for the graph; returns false if something is wrong.
-// Only used in assertions.
-static
-bool sanityCheckGraph(const NGHolder &g,
+
+namespace ue2 {
+
+#ifndef NDEBUG
+// Some sanity checking for the graph; returns false if something is wrong.
+// Only used in assertions.
+static
+bool sanityCheckGraph(const NGHolder &g,
const unordered_map<NFAVertex, u32> &state_ids) {
unordered_set<u32> seen_states;
-
- for (auto v : vertices_range(g)) {
- // Non-specials should have non-empty reachability.
- if (!is_special(v, g)) {
- if (g[v].char_reach.none()) {
+
+ for (auto v : vertices_range(g)) {
+ // Non-specials should have non-empty reachability.
+ if (!is_special(v, g)) {
+ if (g[v].char_reach.none()) {
DEBUG_PRINTF("vertex %zu has empty reach\n", g[v].index);
- return false;
- }
- }
-
+ return false;
+ }
+ }
+
// Vertices with edges to accept or acceptEod must have reports and
// other vertices must not have them.
- if (is_match_vertex(v, g) && v != g.accept) {
- if (g[v].reports.empty()) {
+ if (is_match_vertex(v, g) && v != g.accept) {
+ if (g[v].reports.empty()) {
DEBUG_PRINTF("vertex %zu has no reports\n", g[v].index);
- return false;
- }
+ return false;
+ }
} else if (!g[v].reports.empty()) {
DEBUG_PRINTF("vertex %zu has reports but no accept edge\n",
g[v].index);
return false;
- }
-
- // Participant vertices should have distinct state indices.
- if (!contains(state_ids, v)) {
+ }
+
+ // Participant vertices should have distinct state indices.
+ if (!contains(state_ids, v)) {
DEBUG_PRINTF("vertex %zu has no state index!\n", g[v].index);
- return false;
- }
- u32 s = state_ids.at(v);
- if (s != NO_STATE && !seen_states.insert(s).second) {
+ return false;
+ }
+ u32 s = state_ids.at(v);
+ if (s != NO_STATE && !seen_states.insert(s).second) {
DEBUG_PRINTF("vertex %zu has dupe state %u\n", g[v].index, s);
- return false;
- }
- }
-
- return true;
-}
-#endif
-
-static
+ return false;
+ }
+ }
+
+ return true;
+}
+#endif
+
+static
unordered_map<NFAVertex, NFAStateSet> findSquashStates(const NGHolder &g,
const vector<BoundedRepeatData> &repeats) {
auto squashMap = findSquashers(g);
- filterSquashers(g, squashMap);
-
- /* We also filter out the cyclic states representing bounded repeats, as
+ filterSquashers(g, squashMap);
+
+ /* We also filter out the cyclic states representing bounded repeats, as
* they are not really cyclic -- they may turn off unexpectedly. */
- for (const auto &br : repeats) {
+ for (const auto &br : repeats) {
if (br.repeatMax.is_finite()) {
squashMap.erase(br.cyclic);
}
- }
+ }
return squashMap;
-}
-
-/**
- * \brief Drop edges from start to vertices that also have an edge from
- * startDs.
- *
- * Note that this also includes the (start, startDs) edge, which is not
- * necessary for actual NFA implementation (and is actually something we don't
- * want to affect state numbering, etc).
- */
-static
-void dropRedundantStartEdges(NGHolder &g) {
- remove_out_edge_if(g.start, [&](const NFAEdge &e) {
- return edge(g.startDs, target(e, g), g).second;
- }, g);
-
- // Ensure that we always remove (start, startDs), even if startDs has had
- // its self-loop removed as an optimization.
- remove_edge(g.start, g.startDs, g);
-}
-
-static
+}
+
+/**
+ * \brief Drop edges from start to vertices that also have an edge from
+ * startDs.
+ *
+ * Note that this also includes the (start, startDs) edge, which is not
+ * necessary for actual NFA implementation (and is actually something we don't
+ * want to affect state numbering, etc).
+ */
+static
+void dropRedundantStartEdges(NGHolder &g) {
+ remove_out_edge_if(g.start, [&](const NFAEdge &e) {
+ return edge(g.startDs, target(e, g), g).second;
+ }, g);
+
+ // Ensure that we always remove (start, startDs), even if startDs has had
+ // its self-loop removed as an optimization.
+ remove_edge(g.start, g.startDs, g);
+}
+
+static
CharReach calcTopVertexReach(const flat_set<u32> &tops,
const map<u32, CharReach> &top_reach) {
CharReach top_cr;
for (u32 t : tops) {
- if (contains(top_reach, t)) {
+ if (contains(top_reach, t)) {
top_cr |= top_reach.at(t);
- } else {
- top_cr = CharReach::dot();
+ } else {
+ top_cr = CharReach::dot();
break;
- }
+ }
}
return top_cr;
}
-
+
static
NFAVertex makeTopStartVertex(NGHolder &g, const flat_set<u32> &tops,
const flat_set<NFAVertex> &succs,
const map<u32, CharReach> &top_reach) {
assert(!succs.empty());
assert(!tops.empty());
-
+
bool reporter = false;
-
+
NFAVertex u = add_vertex(g[g.start], g);
CharReach top_cr = calcTopVertexReach(tops, top_reach);
g[u].char_reach = top_cr;
-
+
for (auto v : succs) {
if (v == g.accept || v == g.acceptEod) {
reporter = true;
}
add_edge(u, v, g);
}
-
+
// Only retain reports (which we copied on add_vertex above) for new top
// vertices connected to accepts.
if (!reporter) {
g[u].reports.clear();
}
-
+
return u;
}
@@ -208,11 +208,11 @@ void pickNextTopStateToHandle(const map<u32, flat_set<NFAVertex>> &top_succs,
if (best == top_succs.end()
|| it->second.size() < best->second.size()) {
best = it;
- }
- }
+ }
+ }
assert(best != top_succs.end());
assert(!best->second.empty()); /* should already been pruned */
-
+
*picked_tops = { best->first };
*picked_succs = best->second;
} else {
@@ -224,16 +224,16 @@ void pickNextTopStateToHandle(const map<u32, flat_set<NFAVertex>> &top_succs,
|| (it->second.size() == best->second.size()
&& it->second < best->second)) {
best = it;
- }
- }
+ }
+ }
assert(best != succ_tops.end());
assert(!best->second.empty()); /* should already been pruned */
*picked_succs = { best->first };
*picked_tops = best->second;
- }
+ }
}
-
+
static
void expandCbsByTops(const map<u32, flat_set<NFAVertex>> &unhandled_top_succs,
const map<u32, flat_set<NFAVertex>> &top_succs,
@@ -462,153 +462,153 @@ void makeTopStates(NGHolder &g, map<u32, set<NFAVertex>> &tops_out,
}
assert(unhandled_top_succs.empty());
- // We are completely replacing the start vertex, so clear its reports.
- clear_out_edges(g.start, g);
- add_edge(g.start, g.startDs, g);
- g[g.start].reports.clear();
-}
-
-static
-set<NFAVertex> findZombies(const NGHolder &h,
- const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
+ // We are completely replacing the start vertex, so clear its reports.
+ clear_out_edges(g.start, g);
+ add_edge(g.start, g.startDs, g);
+ g[g.start].reports.clear();
+}
+
+static
+set<NFAVertex> findZombies(const NGHolder &h,
+ const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
const unordered_map<NFAVertex, u32> &state_ids,
- const CompileContext &cc) {
- set<NFAVertex> zombies;
- if (!cc.grey.allowZombies) {
- return zombies;
- }
-
- // We only use zombie masks in streaming mode.
- if (!cc.streaming) {
- return zombies;
- }
-
- if (in_degree(h.acceptEod, h) != 1 || all_reports(h).size() != 1) {
+ const CompileContext &cc) {
+ set<NFAVertex> zombies;
+ if (!cc.grey.allowZombies) {
+ return zombies;
+ }
+
+ // We only use zombie masks in streaming mode.
+ if (!cc.streaming) {
+ return zombies;
+ }
+
+ if (in_degree(h.acceptEod, h) != 1 || all_reports(h).size() != 1) {
DEBUG_PRINTF("cannot be made undead - bad reports\n");
- return zombies;
- }
-
- for (auto u : inv_adjacent_vertices_range(h.accept, h)) {
- assert(h[u].reports.size() == 1);
- for (auto v : adjacent_vertices_range(u, h)) {
- if (edge(v, h.accept, h).second
- && h[v].char_reach.all()) {
- if (!contains(br_cyclic, v)) {
- goto ok;
- }
-
- const BoundedRepeatSummary &sum = br_cyclic.at(v);
-
- if (u == v && sum.repeatMax.is_infinite()) {
- goto ok;
- }
-
- }
- }
- DEBUG_PRINTF("does not go to dot accept\n");
- return zombies;
- ok:;
- }
-
- for (const auto &v : inv_adjacent_vertices_range(h.accept, h)) {
- if (state_ids.at(v) != NO_STATE) {
- zombies.insert(v);
- }
- }
- return zombies;
-}
-
-static
+ return zombies;
+ }
+
+ for (auto u : inv_adjacent_vertices_range(h.accept, h)) {
+ assert(h[u].reports.size() == 1);
+ for (auto v : adjacent_vertices_range(u, h)) {
+ if (edge(v, h.accept, h).second
+ && h[v].char_reach.all()) {
+ if (!contains(br_cyclic, v)) {
+ goto ok;
+ }
+
+ const BoundedRepeatSummary &sum = br_cyclic.at(v);
+
+ if (u == v && sum.repeatMax.is_infinite()) {
+ goto ok;
+ }
+
+ }
+ }
+ DEBUG_PRINTF("does not go to dot accept\n");
+ return zombies;
+ ok:;
+ }
+
+ for (const auto &v : inv_adjacent_vertices_range(h.accept, h)) {
+ if (state_ids.at(v) != NO_STATE) {
+ zombies.insert(v);
+ }
+ }
+ return zombies;
+}
+
+static
void reverseStateOrdering(unordered_map<NFAVertex, u32> &state_ids) {
- vector<NFAVertex> ordering;
- for (auto &e : state_ids) {
- if (e.second == NO_STATE) {
- continue;
- }
- ordering.push_back(e.first);
- }
-
- // Sort in reverse order by state ID.
- sort(ordering.begin(), ordering.end(),
- [&state_ids](NFAVertex a, NFAVertex b) {
- return state_ids.at(a) > state_ids.at(b);
- });
-
- u32 stateNum = 0;
-
- for (const auto &v : ordering) {
- DEBUG_PRINTF("renumber, %u -> %u\n", state_ids.at(v), stateNum);
- state_ids[v] = stateNum++;
- }
-}
-
-static
-map<u32, CharReach>
-findTopReach(const map<u32, vector<vector<CharReach>>> &triggers) {
- map<u32, CharReach> top_reach;
-
- for (const auto &m : triggers) {
- const auto top = m.first;
- CharReach cr;
- for (const auto &trigger : m.second) {
- if (trigger.empty()) {
- // We don't know anything about this trigger. Assume it can
- // have any reach.
- cr.setall();
- break;
- }
- cr |= *trigger.rbegin();
- }
-
- top_reach.emplace(top, cr);
- }
-
- return top_reach;
-}
-
-static
-unique_ptr<NGHolder>
-prepareGraph(const NGHolder &h_in, const ReportManager *rm,
- const map<u32, u32> &fixed_depth_tops,
- const map<u32, vector<vector<CharReach>>> &triggers,
- bool impl_test_only, const CompileContext &cc,
+ vector<NFAVertex> ordering;
+ for (auto &e : state_ids) {
+ if (e.second == NO_STATE) {
+ continue;
+ }
+ ordering.push_back(e.first);
+ }
+
+ // Sort in reverse order by state ID.
+ sort(ordering.begin(), ordering.end(),
+ [&state_ids](NFAVertex a, NFAVertex b) {
+ return state_ids.at(a) > state_ids.at(b);
+ });
+
+ u32 stateNum = 0;
+
+ for (const auto &v : ordering) {
+ DEBUG_PRINTF("renumber, %u -> %u\n", state_ids.at(v), stateNum);
+ state_ids[v] = stateNum++;
+ }
+}
+
+static
+map<u32, CharReach>
+findTopReach(const map<u32, vector<vector<CharReach>>> &triggers) {
+ map<u32, CharReach> top_reach;
+
+ for (const auto &m : triggers) {
+ const auto top = m.first;
+ CharReach cr;
+ for (const auto &trigger : m.second) {
+ if (trigger.empty()) {
+ // We don't know anything about this trigger. Assume it can
+ // have any reach.
+ cr.setall();
+ break;
+ }
+ cr |= *trigger.rbegin();
+ }
+
+ top_reach.emplace(top, cr);
+ }
+
+ return top_reach;
+}
+
+static
+unique_ptr<NGHolder>
+prepareGraph(const NGHolder &h_in, const ReportManager *rm,
+ const map<u32, u32> &fixed_depth_tops,
+ const map<u32, vector<vector<CharReach>>> &triggers,
+ bool impl_test_only, const CompileContext &cc,
unordered_map<NFAVertex, u32> &state_ids,
vector<BoundedRepeatData> &repeats,
map<u32, set<NFAVertex>> &tops) {
- assert(is_triggered(h_in) || fixed_depth_tops.empty());
-
- unique_ptr<NGHolder> h = cloneHolder(h_in);
-
- // Bounded repeat handling.
- analyseRepeats(*h, rm, fixed_depth_tops, triggers, &repeats, cc.streaming,
- impl_test_only, cc.grey);
-
- // If we're building a rose/suffix, do the top dance.
+ assert(is_triggered(h_in) || fixed_depth_tops.empty());
+
+ unique_ptr<NGHolder> h = cloneHolder(h_in);
+
+ // Bounded repeat handling.
+ analyseRepeats(*h, rm, fixed_depth_tops, triggers, &repeats, cc.streaming,
+ impl_test_only, cc.grey);
+
+ // If we're building a rose/suffix, do the top dance.
flat_set<NFAVertex> topVerts;
- if (is_triggered(*h)) {
- makeTopStates(*h, tops, findTopReach(triggers));
+ if (is_triggered(*h)) {
+ makeTopStates(*h, tops, findTopReach(triggers));
for (const auto &vv : tops | map_values) {
insert(&topVerts, vv);
}
- }
-
- dropRedundantStartEdges(*h);
-
- // Do state numbering
+ }
+
+ dropRedundantStartEdges(*h);
+
+ // Do state numbering
state_ids = numberStates(*h, topVerts);
-
- // In debugging, we sometimes like to reverse the state numbering to stress
- // the NFA construction code.
- if (cc.grey.numberNFAStatesWrong) {
- reverseStateOrdering(state_ids);
- }
-
- assert(sanityCheckGraph(*h, state_ids));
- return h;
-}
-
-static
+
+ // In debugging, we sometimes like to reverse the state numbering to stress
+ // the NFA construction code.
+ if (cc.grey.numberNFAStatesWrong) {
+ reverseStateOrdering(state_ids);
+ }
+
+ assert(sanityCheckGraph(*h, state_ids));
+ return h;
+}
+
+static
void remapReportsToPrograms(NGHolder &h, const ReportManager &rm) {
for (const auto &v : vertices_range(h)) {
auto &reports = h[v].reports;
@@ -629,234 +629,234 @@ void remapReportsToPrograms(NGHolder &h, const ReportManager &rm) {
static
bytecode_ptr<NFA>
-constructNFA(const NGHolder &h_in, const ReportManager *rm,
- const map<u32, u32> &fixed_depth_tops,
- const map<u32, vector<vector<CharReach>>> &triggers,
+constructNFA(const NGHolder &h_in, const ReportManager *rm,
+ const map<u32, u32> &fixed_depth_tops,
+ const map<u32, vector<vector<CharReach>>> &triggers,
bool compress_state, bool do_accel, bool impl_test_only,
bool &fast, u32 hint, const CompileContext &cc) {
if (!has_managed_reports(h_in)) {
- rm = nullptr;
- } else {
- assert(rm);
- }
-
+ rm = nullptr;
+ } else {
+ assert(rm);
+ }
+
unordered_map<NFAVertex, u32> state_ids;
- vector<BoundedRepeatData> repeats;
+ vector<BoundedRepeatData> repeats;
map<u32, set<NFAVertex>> tops;
- unique_ptr<NGHolder> h
- = prepareGraph(h_in, rm, fixed_depth_tops, triggers, impl_test_only, cc,
- state_ids, repeats, tops);
-
- // Quick exit: if we've got an embarrassment of riches, i.e. more states
- // than we can implement in our largest NFA model, bail here.
+ unique_ptr<NGHolder> h
+ = prepareGraph(h_in, rm, fixed_depth_tops, triggers, impl_test_only, cc,
+ state_ids, repeats, tops);
+
+ // Quick exit: if we've got an embarrassment of riches, i.e. more states
+ // than we can implement in our largest NFA model, bail here.
u32 numStates = countStates(state_ids);
- if (numStates > NFA_MAX_STATES) {
- DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
- return nullptr;
- }
-
- map<NFAVertex, BoundedRepeatSummary> br_cyclic;
- for (const auto &br : repeats) {
- br_cyclic[br.cyclic] = BoundedRepeatSummary(br.repeatMin, br.repeatMax);
- }
-
+ if (numStates > NFA_MAX_STATES) {
+ DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
+ return nullptr;
+ }
+
+ map<NFAVertex, BoundedRepeatSummary> br_cyclic;
+ for (const auto &br : repeats) {
+ br_cyclic[br.cyclic] = BoundedRepeatSummary(br.repeatMin, br.repeatMax);
+ }
+
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
unordered_map<NFAVertex, NFAStateSet> squashMap;
-
- // build map of squashed and squashers
- if (cc.grey.squashNFA) {
+
+ // build map of squashed and squashers
+ if (cc.grey.squashNFA) {
squashMap = findSquashStates(*h, repeats);
-
- if (rm && cc.grey.highlanderSquash) {
- reportSquashMap = findHighlanderSquashers(*h, *rm);
- }
- }
-
- set<NFAVertex> zombies = findZombies(*h, br_cyclic, state_ids, cc);
-
+
+ if (rm && cc.grey.highlanderSquash) {
+ reportSquashMap = findHighlanderSquashers(*h, *rm);
+ }
+ }
+
+ set<NFAVertex> zombies = findZombies(*h, br_cyclic, state_ids, cc);
+
if (has_managed_reports(*h)) {
assert(rm);
remapReportsToPrograms(*h, *rm);
}
- if (!cc.streaming || !cc.grey.compressNFAState) {
- compress_state = false;
- }
-
- return generate(*h, state_ids, repeats, reportSquashMap, squashMap, tops,
+ if (!cc.streaming || !cc.grey.compressNFAState) {
+ compress_state = false;
+ }
+
+ return generate(*h, state_ids, repeats, reportSquashMap, squashMap, tops,
zombies, do_accel, compress_state, fast, hint, cc);
-}
-
+}
+
bytecode_ptr<NFA>
-constructNFA(const NGHolder &h_in, const ReportManager *rm,
- const map<u32, u32> &fixed_depth_tops,
- const map<u32, vector<vector<CharReach>>> &triggers,
+constructNFA(const NGHolder &h_in, const ReportManager *rm,
+ const map<u32, u32> &fixed_depth_tops,
+ const map<u32, vector<vector<CharReach>>> &triggers,
bool compress_state, bool &fast, const CompileContext &cc) {
- const u32 hint = INVALID_NFA;
- const bool do_accel = cc.grey.accelerateNFA;
- const bool impl_test_only = false;
- return constructNFA(h_in, rm, fixed_depth_tops, triggers, compress_state,
+ const u32 hint = INVALID_NFA;
+ const bool do_accel = cc.grey.accelerateNFA;
+ const bool impl_test_only = false;
+ return constructNFA(h_in, rm, fixed_depth_tops, triggers, compress_state,
do_accel, impl_test_only, fast, hint, cc);
-}
-
-#ifndef RELEASE_BUILD
-// Variant that allows a hint to be specified.
+}
+
+#ifndef RELEASE_BUILD
+// Variant that allows a hint to be specified.
bytecode_ptr<NFA>
-constructNFA(const NGHolder &h_in, const ReportManager *rm,
- const map<u32, u32> &fixed_depth_tops,
- const map<u32, vector<vector<CharReach>>> &triggers,
+constructNFA(const NGHolder &h_in, const ReportManager *rm,
+ const map<u32, u32> &fixed_depth_tops,
+ const map<u32, vector<vector<CharReach>>> &triggers,
bool compress_state, bool &fast, u32 hint, const CompileContext &cc) {
- const bool do_accel = cc.grey.accelerateNFA;
- const bool impl_test_only = false;
+ const bool do_accel = cc.grey.accelerateNFA;
+ const bool impl_test_only = false;
return constructNFA(h_in, rm, fixed_depth_tops, triggers, compress_state,
do_accel, impl_test_only, fast, hint, cc);
-}
-#endif // RELEASE_BUILD
-
-static
+}
+#endif // RELEASE_BUILD
+
+static
bytecode_ptr<NFA> constructReversedNFA_i(const NGHolder &h_in, u32 hint,
const CompileContext &cc) {
- // Make a mutable copy of the graph that we can renumber etc.
- NGHolder h;
- cloneHolder(h, h_in);
- assert(h.kind == NFA_REV_PREFIX); /* triggered, raises internal callbacks */
-
- // Do state numbering.
+ // Make a mutable copy of the graph that we can renumber etc.
+ NGHolder h;
+ cloneHolder(h, h_in);
+ assert(h.kind == NFA_REV_PREFIX); /* triggered, raises internal callbacks */
+
+ // Do state numbering.
auto state_ids = numberStates(h, {});
-
- // Quick exit: if we've got an embarrassment of riches, i.e. more states
- // than we can implement in our largest NFA model, bail here.
+
+ // Quick exit: if we've got an embarrassment of riches, i.e. more states
+ // than we can implement in our largest NFA model, bail here.
u32 numStates = countStates(state_ids);
- if (numStates > NFA_MAX_STATES) {
- DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
- return nullptr;
- }
-
- assert(sanityCheckGraph(h, state_ids));
-
+ if (numStates > NFA_MAX_STATES) {
+ DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
+ return nullptr;
+ }
+
+ assert(sanityCheckGraph(h, state_ids));
+
map<u32, set<NFAVertex>> tops; /* only the standards tops for nfas */
- set<NFAVertex> zombies;
- vector<BoundedRepeatData> repeats;
+ set<NFAVertex> zombies;
+ vector<BoundedRepeatData> repeats;
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
unordered_map<NFAVertex, NFAStateSet> squashMap;
UNUSED bool fast = false;
-
- return generate(h, state_ids, repeats, reportSquashMap, squashMap, tops,
+
+ return generate(h, state_ids, repeats, reportSquashMap, squashMap, tops,
zombies, false, false, fast, hint, cc);
-}
-
+}
+
bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h_in,
const CompileContext &cc) {
- u32 hint = INVALID_NFA; // no hint
- return constructReversedNFA_i(h_in, hint, cc);
-}
-
-#ifndef RELEASE_BUILD
-// Variant that allows a hint to be specified.
+ u32 hint = INVALID_NFA; // no hint
+ return constructReversedNFA_i(h_in, hint, cc);
+}
+
+#ifndef RELEASE_BUILD
+// Variant that allows a hint to be specified.
bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h_in, u32 hint,
const CompileContext &cc) {
- return constructReversedNFA_i(h_in, hint, cc);
-}
-#endif // RELEASE_BUILD
-
-u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm,
- const CompileContext &cc) {
+ return constructReversedNFA_i(h_in, hint, cc);
+}
+#endif // RELEASE_BUILD
+
+u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm,
+ const CompileContext &cc) {
if (!cc.grey.allowLimExNFA) {
return false;
}
assert(!can_never_match(g));
- // Quick check: we can always implement an NFA with less than NFA_MAX_STATES
- // states. Note that top masks can generate extra states, so we account for
- // those here too.
+ // Quick check: we can always implement an NFA with less than NFA_MAX_STATES
+ // states. Note that top masks can generate extra states, so we account for
+ // those here too.
if (num_vertices(g) + getTops(g).size() < NFA_MAX_STATES) {
- return true;
- }
-
+ return true;
+ }
+
if (!has_managed_reports(g)) {
- rm = nullptr;
- } else {
- assert(rm);
- }
-
- // The BEST way to tell if an NFA is implementable is to implement it!
- const bool impl_test_only = true;
- const map<u32, u32> fixed_depth_tops; // empty
- const map<u32, vector<vector<CharReach>>> triggers; // empty
-
- /* Perform the first part of the construction process and see if the
- * resultant NGHolder has <= NFA_MAX_STATES. If it does, we know we can
- * implement it as an NFA. */
-
+ rm = nullptr;
+ } else {
+ assert(rm);
+ }
+
+ // The BEST way to tell if an NFA is implementable is to implement it!
+ const bool impl_test_only = true;
+ const map<u32, u32> fixed_depth_tops; // empty
+ const map<u32, vector<vector<CharReach>>> triggers; // empty
+
+ /* Perform the first part of the construction process and see if the
+ * resultant NGHolder has <= NFA_MAX_STATES. If it does, we know we can
+ * implement it as an NFA. */
+
unordered_map<NFAVertex, u32> state_ids;
- vector<BoundedRepeatData> repeats;
+ vector<BoundedRepeatData> repeats;
map<u32, set<NFAVertex>> tops;
- unique_ptr<NGHolder> h
- = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc,
- state_ids, repeats, tops);
- assert(h);
+ unique_ptr<NGHolder> h
+ = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc,
+ state_ids, repeats, tops);
+ assert(h);
u32 numStates = countStates(state_ids);
- if (numStates <= NFA_MAX_STATES) {
- return numStates;
- }
-
- return 0;
-}
-
-void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm,
- const CompileContext &cc) {
- NGHolder g_pristine;
- cloneHolder(g_pristine, g);
-
- reduceGraphEquivalences(g, cc);
-
- removeRedundancy(g, som);
-
+ if (numStates <= NFA_MAX_STATES) {
+ return numStates;
+ }
+
+ return 0;
+}
+
+void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm,
+ const CompileContext &cc) {
+ NGHolder g_pristine;
+ cloneHolder(g_pristine, g);
+
+ reduceGraphEquivalences(g, cc);
+
+ removeRedundancy(g, som);
+
if (rm && has_managed_reports(g)) {
- pruneHighlanderDominated(g, *rm);
- }
-
- if (!isImplementableNFA(g, rm, cc)) {
- DEBUG_PRINTF("reductions made graph unimplementable, roll back\n");
- clear_graph(g);
- cloneHolder(g, g_pristine);
- }
-}
-
-u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
- const CompileContext &cc) {
+ pruneHighlanderDominated(g, *rm);
+ }
+
+ if (!isImplementableNFA(g, rm, cc)) {
+ DEBUG_PRINTF("reductions made graph unimplementable, roll back\n");
+ clear_graph(g);
+ cloneHolder(g, g_pristine);
+ }
+}
+
+u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
+ const CompileContext &cc) {
if (!has_managed_reports(g)) {
- rm = nullptr;
- } else {
- assert(rm);
- }
-
- const bool impl_test_only = true;
- const map<u32, u32> fixed_depth_tops; // empty
- const map<u32, vector<vector<CharReach>>> triggers; // empty
-
+ rm = nullptr;
+ } else {
+ assert(rm);
+ }
+
+ const bool impl_test_only = true;
+ const map<u32, u32> fixed_depth_tops; // empty
+ const map<u32, vector<vector<CharReach>>> triggers; // empty
+
unordered_map<NFAVertex, u32> state_ids;
- vector<BoundedRepeatData> repeats;
+ vector<BoundedRepeatData> repeats;
map<u32, set<NFAVertex>> tops;
- unique_ptr<NGHolder> h
- = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc,
- state_ids, repeats, tops);
-
+ unique_ptr<NGHolder> h
+ = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc,
+ state_ids, repeats, tops);
+
if (!h || countStates(state_ids) > NFA_MAX_STATES) {
- DEBUG_PRINTF("not constructible\n");
- return NFA_MAX_ACCEL_STATES + 1;
- }
-
- assert(h->kind == g.kind);
-
- // Should have no bearing on accel calculation, so we leave these empty.
- const set<NFAVertex> zombies;
+ DEBUG_PRINTF("not constructible\n");
+ return NFA_MAX_ACCEL_STATES + 1;
+ }
+
+ assert(h->kind == g.kind);
+
+ // Should have no bearing on accel calculation, so we leave these empty.
+ const set<NFAVertex> zombies;
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
unordered_map<NFAVertex, NFAStateSet> squashMap;
-
- return countAccelStates(*h, state_ids, repeats, reportSquashMap, squashMap,
- tops, zombies, cc);
-}
-
-} // namespace ue2
+
+ return countAccelStates(*h, state_ids, repeats, reportSquashMap, squashMap,
+ tops, zombies, cc);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_limex.h b/contrib/libs/hyperscan/src/nfagraph/ng_limex.h
index 58a05ecb3e..7eba2eff06 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_limex.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_limex.h
@@ -1,147 +1,147 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Limex NFA construction code.
- */
-
-#ifndef NG_LIMEX_H
-#define NG_LIMEX_H
-
-#include "ue2common.h"
-#include "som/som.h"
+ * \brief Limex NFA construction code.
+ */
+
+#ifndef NG_LIMEX_H
+#define NG_LIMEX_H
+
+#include "ue2common.h"
+#include "som/som.h"
#include "util/bytecode_ptr.h"
-
-#include <map>
-#include <memory>
-#include <vector>
-
-struct NFA;
-
-namespace ue2 {
-
-class CharReach;
-class NG;
-class NGHolder;
-class ReportManager;
-struct CompileContext;
-
+
+#include <map>
+#include <memory>
+#include <vector>
+
+struct NFA;
+
+namespace ue2 {
+
+class CharReach;
+class NG;
+class NGHolder;
+class ReportManager;
+struct CompileContext;
+
/**
* \brief Determine if the given graph is implementable as an NFA.
- *
- * Returns zero if the NFA is not implementable (usually because it has too
- * many states for any of our models). Otherwise returns the number of states.
- *
- * ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and
- * NFA_INFIX use unmanaged rose-local reports.
- */
-u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm,
- const CompileContext &cc);
-
+ *
+ * Returns zero if the NFA is not implementable (usually because it has too
+ * many states for any of our models). Otherwise returns the number of states.
+ *
+ * ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and
+ * NFA_INFIX use unmanaged rose-local reports.
+ */
+u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm,
+ const CompileContext &cc);
+
/**
* \brief Late-stage graph reductions.
- *
- * This will call \ref removeRedundancy and apply its changes to the given
+ *
+ * This will call \ref removeRedundancy and apply its changes to the given
* holder only if it is implementable afterwards.
*/
void reduceImplementableGraph(NGHolder &g, som_type som,
const ReportManager *rm,
- const CompileContext &cc);
-
-/**
- * \brief For a given graph, count the number of accel states it will have in
- * an implementation.
- *
- * \return the number of accel states, or NFA_MAX_ACCEL_STATES + 1 if an
- * implementation would not be constructible.
- */
-u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
- const CompileContext &cc);
-
+ const CompileContext &cc);
+
+/**
+ * \brief For a given graph, count the number of accel states it will have in
+ * an implementation.
+ *
+ * \return the number of accel states, or NFA_MAX_ACCEL_STATES + 1 if an
+ * implementation would not be constructible.
+ */
+u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
+ const CompileContext &cc);
+
/**
* \brief Construct an NFA from the given graph.
- *
- * Returns zero if the NFA is not implementable (usually because it has too
- * many states for any of our models). Otherwise returns the number of states.
- *
- * ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and
- * NFA_INFIX use unmanaged rose-local reports.
- *
- * Note: this variant of the function allows a model to be specified with the
- * \a hint parameter.
- */
+ *
+ * Returns zero if the NFA is not implementable (usually because it has too
+ * many states for any of our models). Otherwise returns the number of states.
+ *
+ * ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and
+ * NFA_INFIX use unmanaged rose-local reports.
+ *
+ * Note: this variant of the function allows a model to be specified with the
+ * \a hint parameter.
+ */
bytecode_ptr<NFA>
-constructNFA(const NGHolder &g, const ReportManager *rm,
- const std::map<u32, u32> &fixed_depth_tops,
- const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
+constructNFA(const NGHolder &g, const ReportManager *rm,
+ const std::map<u32, u32> &fixed_depth_tops,
+ const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
bool compress_state, bool &fast, const CompileContext &cc);
-
+
/**
* \brief Build a reverse NFA from the graph given, which should have already
- * been reversed.
- *
- * Used for reverse NFAs used in SOM mode.
- */
+ * been reversed.
+ *
+ * Used for reverse NFAs used in SOM mode.
+ */
bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h,
const CompileContext &cc);
-
-#ifndef RELEASE_BUILD
-
+
+#ifndef RELEASE_BUILD
+
/**
* \brief Construct an NFA (with model type hint) from the given graph.
- *
- * Returns zero if the NFA is not implementable (usually because it has too
- * many states for any of our models). Otherwise returns the number of states.
- *
- * ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and
- * NFA_INFIX use unmanaged rose-local reports.
- *
- * Note: this variant of the function allows a model to be specified with the
- * \a hint parameter.
- */
+ *
+ * Returns zero if the NFA is not implementable (usually because it has too
+ * many states for any of our models). Otherwise returns the number of states.
+ *
+ * ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and
+ * NFA_INFIX use unmanaged rose-local reports.
+ *
+ * Note: this variant of the function allows a model to be specified with the
+ * \a hint parameter.
+ */
bytecode_ptr<NFA>
-constructNFA(const NGHolder &g, const ReportManager *rm,
- const std::map<u32, u32> &fixed_depth_tops,
- const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
+constructNFA(const NGHolder &g, const ReportManager *rm,
+ const std::map<u32, u32> &fixed_depth_tops,
+ const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
bool compress_state, bool &fast, u32 hint, const CompileContext &cc);
-
+
/**
* \brief Build a reverse NFA (with model type hint) from the graph given,
- * which should have already been reversed.
- *
- * Used for reverse NFAs used in SOM mode.
- */
+ * which should have already been reversed.
+ *
+ * Used for reverse NFAs used in SOM mode.
+ */
bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h, u32 hint,
const CompileContext &cc);
-
-#endif // RELEASE_BUILD
-
-} // namespace ue2
-
-#endif // NG_METEOR_H
+
+#endif // RELEASE_BUILD
+
+} // namespace ue2
+
+#endif // NG_METEOR_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp
index ca393131bc..f1f829f2c1 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp
@@ -1,141 +1,141 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief NFA acceleration analysis code.
- */
-#include "ng_limex_accel.h"
-
-#include "ng_holder.h"
-#include "ng_misc_opt.h"
-#include "ng_util.h"
-#include "ue2common.h"
-
-#include "nfa/accel.h"
-
-#include "util/bitutils.h" // for CASE_CLEAR
-#include "util/charreach.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief NFA acceleration analysis code.
+ */
+#include "ng_limex_accel.h"
+
+#include "ng_holder.h"
+#include "ng_misc_opt.h"
+#include "ng_util.h"
+#include "ue2common.h"
+
+#include "nfa/accel.h"
+
+#include "util/bitutils.h" // for CASE_CLEAR
+#include "util/charreach.h"
#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph_range.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
#include "util/small_vector.h"
#include "util/target_info.h"
-
-#include <algorithm>
-#include <map>
-
+
+#include <algorithm>
+#include <map>
+
#include <boost/range/adaptor/map.hpp>
-using namespace std;
+using namespace std;
using boost::adaptors::map_keys;
-
-namespace ue2 {
-
-#define WIDE_FRIEND_MIN 200
-
-static
-void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr,
- const flat_set<NFAVertex> &cands,
- const flat_set<NFAVertex> &preds,
- flat_set<NFAVertex> *next_cands,
- flat_set<NFAVertex> *next_preds,
- flat_set<NFAVertex> *friends) {
- for (auto v : cands) {
- if (contains(preds, v)) {
- continue;
- }
-
- const CharReach &acr = g[v].char_reach;
+
+namespace ue2 {
+
+#define WIDE_FRIEND_MIN 200
+
+static
+void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr,
+ const flat_set<NFAVertex> &cands,
+ const flat_set<NFAVertex> &preds,
+ flat_set<NFAVertex> *next_cands,
+ flat_set<NFAVertex> *next_preds,
+ flat_set<NFAVertex> *friends) {
+ for (auto v : cands) {
+ if (contains(preds, v)) {
+ continue;
+ }
+
+ const CharReach &acr = g[v].char_reach;
DEBUG_PRINTF("checking %zu\n", g[v].index);
-
- if (acr.count() < WIDE_FRIEND_MIN || !acr.isSubsetOf(cr)) {
- DEBUG_PRINTF("bad reach %zu\n", acr.count());
- continue;
- }
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (!contains(preds, u)) {
- DEBUG_PRINTF("bad pred\n");
- goto next_cand;
- }
- }
-
- next_preds->insert(v);
- insert(next_cands, adjacent_vertices(v, g));
-
+
+ if (acr.count() < WIDE_FRIEND_MIN || !acr.isSubsetOf(cr)) {
+ DEBUG_PRINTF("bad reach %zu\n", acr.count());
+ continue;
+ }
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (!contains(preds, u)) {
+ DEBUG_PRINTF("bad pred\n");
+ goto next_cand;
+ }
+ }
+
+ next_preds->insert(v);
+ insert(next_cands, adjacent_vertices(v, g));
+
DEBUG_PRINTF("%zu is a friend indeed\n", g[v].index);
- friends->insert(v);
- next_cand:;
- }
-}
-
-void findAccelFriends(const NGHolder &g, NFAVertex v,
- const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
- u32 offset, flat_set<NFAVertex> *friends) {
- /* A friend of an accel state is a successor state which can only be on when
- * the accel is on. This requires that it has a subset of the accel state's
- * preds and a charreach which is a subset of the accel state.
- *
- * A friend can be safely ignored when accelerating provided there is
- * sufficient back-off. A friend is useful if it has a wide reach.
- */
-
- /* BR cyclic states which may go stale cannot have friends as they may
- * suddenly turn off leading their so-called friends stranded and alone.
- * TODO: restrict to only stale going BR cyclics
- */
- if (contains(br_cyclic, v) && !br_cyclic.at(v).unbounded()) {
- return;
- }
-
- u32 friend_depth = offset + 1;
-
- flat_set<NFAVertex> preds;
- insert(&preds, inv_adjacent_vertices(v, g));
- const CharReach &cr = g[v].char_reach;
-
- flat_set<NFAVertex> cands;
- insert(&cands, adjacent_vertices(v, g));
-
- flat_set<NFAVertex> next_preds;
- flat_set<NFAVertex> next_cands;
- for (u32 i = 0; i < friend_depth; i++) {
- findAccelFriendGeneration(g, cr, cands, preds, &next_cands, &next_preds,
- friends);
- preds.insert(next_preds.begin(), next_preds.end());
- next_preds.clear();
- cands.swap(next_cands);
- next_cands.clear();
- }
-}
-
-static
+ friends->insert(v);
+ next_cand:;
+ }
+}
+
+void findAccelFriends(const NGHolder &g, NFAVertex v,
+ const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
+ u32 offset, flat_set<NFAVertex> *friends) {
+ /* A friend of an accel state is a successor state which can only be on when
+ * the accel is on. This requires that it has a subset of the accel state's
+ * preds and a charreach which is a subset of the accel state.
+ *
+ * A friend can be safely ignored when accelerating provided there is
+ * sufficient back-off. A friend is useful if it has a wide reach.
+ */
+
+ /* BR cyclic states which may go stale cannot have friends as they may
+ * suddenly turn off leading their so-called friends stranded and alone.
+ * TODO: restrict to only stale going BR cyclics
+ */
+ if (contains(br_cyclic, v) && !br_cyclic.at(v).unbounded()) {
+ return;
+ }
+
+ u32 friend_depth = offset + 1;
+
+ flat_set<NFAVertex> preds;
+ insert(&preds, inv_adjacent_vertices(v, g));
+ const CharReach &cr = g[v].char_reach;
+
+ flat_set<NFAVertex> cands;
+ insert(&cands, adjacent_vertices(v, g));
+
+ flat_set<NFAVertex> next_preds;
+ flat_set<NFAVertex> next_cands;
+ for (u32 i = 0; i < friend_depth; i++) {
+ findAccelFriendGeneration(g, cr, cands, preds, &next_cands, &next_preds,
+ friends);
+ preds.insert(next_preds.begin(), next_preds.end());
+ next_preds.clear();
+ cands.swap(next_cands);
+ next_cands.clear();
+ }
+}
+
+static
void findPaths(const NGHolder &g, NFAVertex v,
const vector<CharReach> &refined_cr,
vector<vector<CharReach>> *paths,
@@ -149,30 +149,30 @@ void findPaths(const NGHolder &g, NFAVertex v,
paths->push_back({});
if (!generates_callbacks(g) || v == g.acceptEod) {
paths->back().push_back(CharReach()); /* red tape options */
- }
+ }
return;
- }
-
+ }
+
/* for the escape 'literals' we want to use the minimal cr so we
* can be more selective */
const CharReach &cr = refined_cr[g[v].index];
-
+
if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER
|| hasSelfLoop(v, g)) {
/* give up on pushing past this point */
paths->push_back({cr});
return;
- }
-
+ }
+
vector<vector<CharReach>> curr;
- for (auto w : adjacent_vertices_range(v, g)) {
+ for (auto w : adjacent_vertices_range(v, g)) {
if (contains(forbidden, w)) {
/* path has looped back to one of the active+boring acceleration
* states. We can ignore this path if we have sufficient back-
* off. */
paths->push_back({cr});
- continue;
- }
+ continue;
+ }
u32 new_depth = depth - 1;
do {
@@ -183,55 +183,55 @@ void findPaths(const NGHolder &g, NFAVertex v,
for (auto &c : curr) {
c.push_back(cr);
paths->push_back(std::move(c));
- }
- }
-}
-
+ }
+ }
+}
+
namespace {
struct SAccelScheme {
SAccelScheme(CharReach cr_in, u32 offset_in)
: cr(std::move(cr_in)), offset(offset_in) {
assert(offset <= MAX_ACCEL_DEPTH);
- }
-
+ }
+
SAccelScheme() {}
-
+
bool operator<(const SAccelScheme &b) const {
const SAccelScheme &a = *this;
const size_t a_count = cr.count(), b_count = b.cr.count();
if (a_count != b_count) {
return a_count < b_count;
- }
-
+ }
+
/* TODO: give bonus if one is a 'caseless' character */
ORDER_CHECK(offset);
ORDER_CHECK(cr);
- return false;
- }
-
+ return false;
+ }
+
CharReach cr = CharReach::dot();
u32 offset = MAX_ACCEL_DEPTH + 1;
};
-}
-
+}
+
/**
* \brief Limit on the number of (recursive) calls to findBestInternal().
*/
static constexpr size_t MAX_FINDBEST_CALLS = 1000000;
-static
+static
void findBestInternal(vector<vector<CharReach>>::const_iterator pb,
vector<vector<CharReach>>::const_iterator pe,
size_t *num_calls, const SAccelScheme &curr,
SAccelScheme *best) {
assert(curr.offset <= MAX_ACCEL_DEPTH);
-
+
if (++(*num_calls) > MAX_FINDBEST_CALLS) {
DEBUG_PRINTF("hit num_calls limit %zu\n", *num_calls);
return;
- }
-
+ }
+
DEBUG_PRINTF("paths left %zu\n", pe - pb);
if (pb == pe) {
if (curr < *best) {
@@ -241,10 +241,10 @@ void findBestInternal(vector<vector<CharReach>>::const_iterator pb,
best->offset);
}
return;
- }
-
+ }
+
DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
-
+
small_vector<SAccelScheme, 10> priority_path;
priority_path.reserve(pb->size());
u32 i = 0;
@@ -255,8 +255,8 @@ void findBestInternal(vector<vector<CharReach>>::const_iterator pb,
continue;
}
priority_path.push_back(move(as));
- }
-
+ }
+
sort(priority_path.begin(), priority_path.end());
for (auto it = priority_path.begin(); it != priority_path.end(); ++it) {
auto jt = next(it);
@@ -267,9 +267,9 @@ void findBestInternal(vector<vector<CharReach>>::const_iterator pb,
}
priority_path.erase(next(it), jt);
DEBUG_PRINTF("||%zu\n", it->cr.count());
- }
+ }
DEBUG_PRINTF("---\n");
-
+
for (const SAccelScheme &in : priority_path) {
DEBUG_PRINTF("in: count %zu\n", in.cr.count());
if (*best < in) {
@@ -277,14 +277,14 @@ void findBestInternal(vector<vector<CharReach>>::const_iterator pb,
continue;
}
findBestInternal(pb + 1, pe, num_calls, in, best);
-
+
if (curr.cr == best->cr) {
return; /* could only get better by offset */
}
- }
-}
-
-static
+ }
+}
+
+static
SAccelScheme findBest(const vector<vector<CharReach>> &paths,
const CharReach &terminating) {
SAccelScheme curr(terminating, 0U);
@@ -296,52 +296,52 @@ SAccelScheme findBest(const vector<vector<CharReach>> &paths,
best.cr.count(), describeClass(best.cr).c_str(), best.offset);
return best;
}
-
+
namespace {
struct DAccelScheme {
DAccelScheme(CharReach cr_in, u32 offset_in)
: double_cr(std::move(cr_in)), double_offset(offset_in) {
assert(double_offset <= MAX_ACCEL_DEPTH);
}
-
+
bool operator<(const DAccelScheme &b) const {
const DAccelScheme &a = *this;
-
+
size_t a_dcount = a.double_cr.count();
size_t b_dcount = b.double_cr.count();
-
+
assert(!a.double_byte.empty() || a_dcount || a.double_offset);
assert(!b.double_byte.empty() || b_dcount || b.double_offset);
-
+
if (a_dcount != b_dcount) {
return a_dcount < b_dcount;
}
-
+
if (!a_dcount) {
bool cd_a = buildDvermMask(a.double_byte);
bool cd_b = buildDvermMask(b.double_byte);
if (cd_a != cd_b) {
return cd_a > cd_b;
- }
- }
-
+ }
+ }
+
ORDER_CHECK(double_byte.size());
ORDER_CHECK(double_offset);
-
+
/* TODO: give bonus if one is a 'caseless' character */
ORDER_CHECK(double_byte);
ORDER_CHECK(double_cr);
-
+
return false;
- }
-
+ }
+
flat_set<pair<u8, u8>> double_byte;
CharReach double_cr;
u32 double_offset = 0;
};
-}
-
-static
+}
+
+static
DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1,
const CharReach &cr_2_in, u32 offset_in) {
cr_1 &= ~as.double_cr;
@@ -352,29 +352,29 @@ DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1,
DEBUG_PRINTF("empty first element\n");
ENSURE_AT_LEAST(&as.double_offset, offset);
return as;
- }
+ }
if (cr_2_in != cr_2 || cr_2.none()) {
offset = offset_in + 1;
- }
-
+ }
+
size_t two_count = cr_1.count() * cr_2.count();
-
+
DEBUG_PRINTF("will generate raw %zu pairs\n", two_count);
if (!two_count) {
DEBUG_PRINTF("empty element\n");
ENSURE_AT_LEAST(&as.double_offset, offset);
return as;
- }
-
+ }
+
if (two_count > DOUBLE_SHUFTI_LIMIT) {
if (cr_2.count() < cr_1.count()) {
as.double_cr |= cr_2;
offset = offset_in + 1;
} else {
as.double_cr |= cr_1;
- }
+ }
} else {
for (auto i = cr_1.find_first(); i != CharReach::npos;
i = cr_1.find_next(i)) {
@@ -382,145 +382,145 @@ DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1,
j = cr_2.find_next(j)) {
as.double_byte.emplace(i, j);
}
- }
- }
-
+ }
+ }
+
ENSURE_AT_LEAST(&as.double_offset, offset);
DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n",
as.double_byte.size(), as.double_cr.count(), as.double_offset);
return as;
-}
-
-static
+}
+
+static
void findDoubleBest(vector<vector<CharReach> >::const_iterator pb,
- vector<vector<CharReach> >::const_iterator pe,
+ vector<vector<CharReach> >::const_iterator pe,
const DAccelScheme &curr, DAccelScheme *best) {
assert(curr.double_offset <= MAX_ACCEL_DEPTH);
- DEBUG_PRINTF("paths left %zu\n", pe - pb);
+ DEBUG_PRINTF("paths left %zu\n", pe - pb);
DEBUG_PRINTF("current base: %zu pairs, %zu singles, offset %u\n",
curr.double_byte.size(), curr.double_cr.count(),
curr.double_offset);
- if (pb == pe) {
+ if (pb == pe) {
if (curr < *best) {
*best = curr;
DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n",
best->double_byte.size(), best->double_cr.count(),
best->double_offset);
}
- return;
- }
-
- DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
-
+ return;
+ }
+
+ DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
+
small_vector<DAccelScheme, 10> priority_path;
priority_path.reserve(pb->size());
- u32 i = 0;
+ u32 i = 0;
for (auto p = pb->begin(); p != pb->end() && next(p) != pb->end();
- ++p, i++) {
+ ++p, i++) {
DAccelScheme as = make_double_accel(curr, *p, *next(p), i);
if (*best < as) {
DEBUG_PRINTF("worse\n");
continue;
}
priority_path.push_back(move(as));
- }
-
- sort(priority_path.begin(), priority_path.end());
+ }
+
+ sort(priority_path.begin(), priority_path.end());
DEBUG_PRINTF("%zu candidates for this path\n", priority_path.size());
DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n",
best->double_byte.size(), best->double_cr.count(),
best->double_offset);
-
+
for (const DAccelScheme &in : priority_path) {
DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n",
in.double_byte.size(), in.double_cr.count(),
in.double_offset);
if (*best < in) {
- DEBUG_PRINTF("worse\n");
- continue;
- }
+ DEBUG_PRINTF("worse\n");
+ continue;
+ }
findDoubleBest(pb + 1, pe, in, best);
- }
-}
-
-#ifdef DEBUG
-static
+ }
+}
+
+#ifdef DEBUG
+static
void dumpPaths(const vector<vector<CharReach>> &paths) {
for (const auto &path : paths) {
- DEBUG_PRINTF("path: [");
+ DEBUG_PRINTF("path: [");
for (const auto &cr : path) {
- printf(" [");
+ printf(" [");
describeClass(stdout, cr, 20, CC_OUT_TEXT);
- printf("]");
- }
- printf(" ]\n");
- }
-}
-#endif
-
-static
+ printf("]");
+ }
+ printf(" ]\n");
+ }
+}
+#endif
+
+static
void blowoutPathsLessStrictSegment(vector<vector<CharReach> > &paths) {
- /* paths segments which are a superset of an earlier segment should never be
- * picked as an acceleration segment -> to improve processing just replace
- * with dot */
+ /* paths segments which are a superset of an earlier segment should never be
+ * picked as an acceleration segment -> to improve processing just replace
+ * with dot */
for (auto &p : paths) {
for (auto it = p.begin(); it != p.end(); ++it) {
for (auto jt = next(it); jt != p.end(); ++jt) {
- if (it->isSubsetOf(*jt)) {
- *jt = CharReach::dot();
- }
- }
- }
- }
-}
-
-static
+ if (it->isSubsetOf(*jt)) {
+ *jt = CharReach::dot();
+ }
+ }
+ }
+ }
+}
+
+static
void unifyPathsLastSegment(vector<vector<CharReach> > &paths) {
- /* try to unify paths which only differ in the last segment */
+ /* try to unify paths which only differ in the last segment */
for (vector<vector<CharReach> >::iterator p = paths.begin();
p != paths.end() && p + 1 != paths.end();) {
- vector<CharReach> &a = *p;
- vector<CharReach> &b = *(p + 1);
-
- if (a.size() != b.size()) {
- ++p;
- continue;
- }
-
- u32 i = 0;
- for (; i < a.size() - 1; i++) {
- if (a[i] != b[i]) {
- break;
- }
- }
- if (i == a.size() - 1) {
- /* we can unify these paths */
- a[i] |= b[i];
+ vector<CharReach> &a = *p;
+ vector<CharReach> &b = *(p + 1);
+
+ if (a.size() != b.size()) {
+ ++p;
+ continue;
+ }
+
+ u32 i = 0;
+ for (; i < a.size() - 1; i++) {
+ if (a[i] != b[i]) {
+ break;
+ }
+ }
+ if (i == a.size() - 1) {
+ /* we can unify these paths */
+ a[i] |= b[i];
paths.erase(p + 1);
- } else {
- ++p;
- }
- }
-}
-
-static
+ } else {
+ ++p;
+ }
+ }
+}
+
+static
void improvePaths(vector<vector<CharReach> > &paths) {
-#ifdef DEBUG
- DEBUG_PRINTF("orig paths\n");
+#ifdef DEBUG
+ DEBUG_PRINTF("orig paths\n");
dumpPaths(paths);
-#endif
- blowoutPathsLessStrictSegment(paths);
-
+#endif
+ blowoutPathsLessStrictSegment(paths);
+
sort(paths.begin(), paths.end());
-
- unifyPathsLastSegment(paths);
-
-#ifdef DEBUG
- DEBUG_PRINTF("opt paths\n");
+
+ unifyPathsLastSegment(paths);
+
+#ifdef DEBUG
+ DEBUG_PRINTF("opt paths\n");
dumpPaths(paths);
-#endif
-}
-
+#endif
+}
+
#define MAX_DOUBLE_ACCEL_PATHS 10
static
@@ -611,227 +611,227 @@ AccelScheme findBestAccelScheme(vector<vector<CharReach>> paths,
return rv;
}
-AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
- const vector<CharReach> &refined_cr,
- const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
+AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
+ const vector<CharReach> &refined_cr,
+ const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
bool allow_wide, bool look_for_double_byte) {
- CharReach terminating;
- for (auto v : verts) {
- if (!hasSelfLoop(v, g)) {
- DEBUG_PRINTF("no self loop\n");
- return AccelScheme(); /* invalid scheme */
- }
-
- // check that this state is reachable on most characters
- terminating |= ~g[v].char_reach;
- }
-
- DEBUG_PRINTF("set vertex has %zu stop chars\n", terminating.count());
- size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR
- : ACCEL_MAX_STOP_CHAR;
- if (terminating.count() > limit) {
- return AccelScheme(); /* invalid scheme */
- }
-
+ CharReach terminating;
+ for (auto v : verts) {
+ if (!hasSelfLoop(v, g)) {
+ DEBUG_PRINTF("no self loop\n");
+ return AccelScheme(); /* invalid scheme */
+ }
+
+ // check that this state is reachable on most characters
+ terminating |= ~g[v].char_reach;
+ }
+
+ DEBUG_PRINTF("set vertex has %zu stop chars\n", terminating.count());
+ size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR
+ : ACCEL_MAX_STOP_CHAR;
+ if (terminating.count() > limit) {
+ return AccelScheme(); /* invalid scheme */
+ }
+
vector<vector<CharReach>> paths;
- flat_set<NFAVertex> ignore_vert_set(verts.begin(), verts.end());
-
- /* Note: we can not in general (TODO: ignore when possible) ignore entries
- * into the bounded repeat cyclic states as that is when the magic happens
- */
+ flat_set<NFAVertex> ignore_vert_set(verts.begin(), verts.end());
+
+ /* Note: we can not in general (TODO: ignore when possible) ignore entries
+ * into the bounded repeat cyclic states as that is when the magic happens
+ */
for (auto v : br_cyclic | map_keys) {
- /* TODO: can allow if repeatMin <= 1 ? */
+ /* TODO: can allow if repeatMin <= 1 ? */
ignore_vert_set.erase(v);
- }
-
- for (auto v : verts) {
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w != v) {
- findPaths(g, w, refined_cr, &paths, ignore_vert_set,
- MAX_ACCEL_DEPTH);
- }
- }
- }
-
- /* paths built wrong: reverse them */
+ }
+
+ for (auto v : verts) {
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w != v) {
+ findPaths(g, w, refined_cr, &paths, ignore_vert_set,
+ MAX_ACCEL_DEPTH);
+ }
+ }
+ }
+
+ /* paths built wrong: reverse them */
for (auto &path : paths) {
reverse(path.begin(), path.end());
- }
-
+ }
+
return findBestAccelScheme(std::move(paths), terminating,
look_for_double_byte);
-}
-
-NFAVertex get_sds_or_proxy(const NGHolder &g) {
- DEBUG_PRINTF("looking for sds proxy\n");
- if (proper_out_degree(g.startDs, g)) {
- return g.startDs;
- }
-
+}
+
+NFAVertex get_sds_or_proxy(const NGHolder &g) {
+ DEBUG_PRINTF("looking for sds proxy\n");
+ if (proper_out_degree(g.startDs, g)) {
+ return g.startDs;
+ }
+
NFAVertex v = NGHolder::null_vertex();
- for (auto w : adjacent_vertices_range(g.start, g)) {
- if (w != g.startDs) {
- if (!v) {
- v = w;
- } else {
- return g.startDs;
- }
- }
- }
-
- if (!v) {
- return g.startDs;
- }
-
- while (true) {
- if (hasSelfLoop(v, g)) {
+ for (auto w : adjacent_vertices_range(g.start, g)) {
+ if (w != g.startDs) {
+ if (!v) {
+ v = w;
+ } else {
+ return g.startDs;
+ }
+ }
+ }
+
+ if (!v) {
+ return g.startDs;
+ }
+
+ while (true) {
+ if (hasSelfLoop(v, g)) {
DEBUG_PRINTF("woot %zu\n", g[v].index);
- return v;
- }
- if (out_degree(v, g) != 1) {
- break;
- }
- NFAVertex u = getSoleDestVertex(g, v);
- if (!g[u].char_reach.all()) {
- break;
- }
- v = u;
- }
-
- return g.startDs;
-}
-
-/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */
-bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
- const vector<CharReach> &refined_cr,
- const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
- AccelScheme *as, bool allow_wide) {
- // For a state to be accelerable, our current criterion is that it be a
- // large character class with a self-loop and narrow set of possible other
- // successors (i.e. no special successors, union of successor reachability
- // is small).
- if (!hasSelfLoop(v, g)) {
- return false;
- }
-
- // check that this state is reachable on most characters
- /* we want to use the maximal reach here (in the graph) */
- CharReach terminating = g[v].char_reach;
- terminating.flip();
-
+ return v;
+ }
+ if (out_degree(v, g) != 1) {
+ break;
+ }
+ NFAVertex u = getSoleDestVertex(g, v);
+ if (!g[u].char_reach.all()) {
+ break;
+ }
+ v = u;
+ }
+
+ return g.startDs;
+}
+
+/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */
+bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
+ const vector<CharReach> &refined_cr,
+ const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
+ AccelScheme *as, bool allow_wide) {
+ // For a state to be accelerable, our current criterion is that it be a
+ // large character class with a self-loop and narrow set of possible other
+ // successors (i.e. no special successors, union of successor reachability
+ // is small).
+ if (!hasSelfLoop(v, g)) {
+ return false;
+ }
+
+ // check that this state is reachable on most characters
+ /* we want to use the maximal reach here (in the graph) */
+ CharReach terminating = g[v].char_reach;
+ terminating.flip();
+
DEBUG_PRINTF("vertex %zu is cyclic and has %zu stop chars%s\n",
- g[v].index, terminating.count(),
- allow_wide ? " (w)" : "");
-
- size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR
- : ACCEL_MAX_STOP_CHAR;
- if (terminating.count() > limit) {
- DEBUG_PRINTF("too leaky\n");
- return false;
- }
-
- flat_set<NFAVertex> curr, next;
-
- insert(&curr, adjacent_vertices(v, g));
- curr.erase(v); // erase self-loop
-
- // We consider offsets of zero through three; this is fairly arbitrary at
- // present and could probably be increased (FIXME)
- /* WARNING: would/could do horrible things to compile time */
- bool stop = false;
- vector<CharReach> depthReach(MAX_ACCEL_DEPTH);
- unsigned int depth;
- for (depth = 0; !stop && depth < MAX_ACCEL_DEPTH; depth++) {
- CharReach &cr = depthReach[depth];
- for (auto t : curr) {
- if (is_special(t, g)) {
- // We've bumped into the edge of the graph, so we should stop
- // searching.
- // Exception: iff our cyclic state is not a dot, than we can
- // safely accelerate towards an EOD accept.
-
- /* Exception: nfas that don't generate callbacks so accepts are
- * fine too */
- if (t == g.accept && !generates_callbacks(g)) {
- stop = true; // don't search beyond this depth
- continue;
- } else if (t == g.accept) {
- goto depth_done;
- }
-
- assert(t == g.acceptEod);
- stop = true; // don't search beyond this depth
- } else {
- // Non-special vertex
- insert(&next, adjacent_vertices(t, g));
- /* for the escape 'literals' we want to use the minimal cr so we
- * can be more selective */
- cr |= refined_cr[g[t].index];
- }
- }
-
- cr |= terminating;
- DEBUG_PRINTF("depth %u has unioned reach %zu\n", depth, cr.count());
-
- curr.swap(next);
- next.clear();
- }
-
-depth_done:
-
- if (depth == 0) {
- return false;
- }
-
- DEBUG_PRINTF("selecting from depth 0..%u\n", depth);
-
- /* Look for the most awesome acceleration evar */
- for (unsigned int i = 0; i < depth; i++) {
- if (depthReach[i].none()) {
- DEBUG_PRINTF("red tape acceleration engine depth %u\n", i);
+ g[v].index, terminating.count(),
+ allow_wide ? " (w)" : "");
+
+ size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR
+ : ACCEL_MAX_STOP_CHAR;
+ if (terminating.count() > limit) {
+ DEBUG_PRINTF("too leaky\n");
+ return false;
+ }
+
+ flat_set<NFAVertex> curr, next;
+
+ insert(&curr, adjacent_vertices(v, g));
+ curr.erase(v); // erase self-loop
+
+ // We consider offsets of zero through three; this is fairly arbitrary at
+ // present and could probably be increased (FIXME)
+ /* WARNING: would/could do horrible things to compile time */
+ bool stop = false;
+ vector<CharReach> depthReach(MAX_ACCEL_DEPTH);
+ unsigned int depth;
+ for (depth = 0; !stop && depth < MAX_ACCEL_DEPTH; depth++) {
+ CharReach &cr = depthReach[depth];
+ for (auto t : curr) {
+ if (is_special(t, g)) {
+ // We've bumped into the edge of the graph, so we should stop
+ // searching.
+ // Exception: iff our cyclic state is not a dot, than we can
+ // safely accelerate towards an EOD accept.
+
+ /* Exception: nfas that don't generate callbacks so accepts are
+ * fine too */
+ if (t == g.accept && !generates_callbacks(g)) {
+ stop = true; // don't search beyond this depth
+ continue;
+ } else if (t == g.accept) {
+ goto depth_done;
+ }
+
+ assert(t == g.acceptEod);
+ stop = true; // don't search beyond this depth
+ } else {
+ // Non-special vertex
+ insert(&next, adjacent_vertices(t, g));
+ /* for the escape 'literals' we want to use the minimal cr so we
+ * can be more selective */
+ cr |= refined_cr[g[t].index];
+ }
+ }
+
+ cr |= terminating;
+ DEBUG_PRINTF("depth %u has unioned reach %zu\n", depth, cr.count());
+
+ curr.swap(next);
+ next.clear();
+ }
+
+depth_done:
+
+ if (depth == 0) {
+ return false;
+ }
+
+ DEBUG_PRINTF("selecting from depth 0..%u\n", depth);
+
+ /* Look for the most awesome acceleration evar */
+ for (unsigned int i = 0; i < depth; i++) {
+ if (depthReach[i].none()) {
+ DEBUG_PRINTF("red tape acceleration engine depth %u\n", i);
*as = AccelScheme();
as->offset = i;
as->cr = CharReach();
- return true;
- }
- }
-
- // First, loop over our depths and see if we have a suitable 2-byte
- // caseful vermicelli option: this is the (second) fastest accel we have
- if (depth > 1) {
- for (unsigned int i = 0; i < (depth - 1); i++) {
- const CharReach &cra = depthReach[i];
- const CharReach &crb = depthReach[i + 1];
- if ((cra.count() == 1 && crb.count() == 1)
- || (cra.count() == 2 && crb.count() == 2
- && cra.isBit5Insensitive() && crb.isBit5Insensitive())) {
- DEBUG_PRINTF("two-byte vermicelli, depth %u\n", i);
+ return true;
+ }
+ }
+
+ // First, loop over our depths and see if we have a suitable 2-byte
+ // caseful vermicelli option: this is the (second) fastest accel we have
+ if (depth > 1) {
+ for (unsigned int i = 0; i < (depth - 1); i++) {
+ const CharReach &cra = depthReach[i];
+ const CharReach &crb = depthReach[i + 1];
+ if ((cra.count() == 1 && crb.count() == 1)
+ || (cra.count() == 2 && crb.count() == 2
+ && cra.isBit5Insensitive() && crb.isBit5Insensitive())) {
+ DEBUG_PRINTF("two-byte vermicelli, depth %u\n", i);
*as = AccelScheme();
as->offset = i;
- return true;
- }
- }
- }
-
- // Second option: a two-byte shufti (i.e. less than eight 2-byte
- // literals)
- if (depth > 1) {
- for (unsigned int i = 0; i < (depth - 1); i++) {
+ return true;
+ }
+ }
+ }
+
+ // Second option: a two-byte shufti (i.e. less than eight 2-byte
+ // literals)
+ if (depth > 1) {
+ for (unsigned int i = 0; i < (depth - 1); i++) {
if (depthReach[i].count() * depthReach[i+1].count()
<= DOUBLE_SHUFTI_LIMIT) {
- DEBUG_PRINTF("two-byte shufti, depth %u\n", i);
+ DEBUG_PRINTF("two-byte shufti, depth %u\n", i);
*as = AccelScheme();
as->offset = i;
- return true;
- }
- }
- }
-
+ return true;
+ }
+ }
+ }
+
// Look for offset accel schemes verm/shufti;
- vector<NFAVertex> verts(1, v);
+ vector<NFAVertex> verts(1, v);
*as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide, true);
- DEBUG_PRINTF("as width %zu\n", as->cr.count());
- return as->cr.count() <= ACCEL_MAX_STOP_CHAR || allow_wide;
-}
-
-} // namespace ue2
+ DEBUG_PRINTF("as width %zu\n", as->cr.count());
+ return as->cr.count() <= ACCEL_MAX_STOP_CHAR || allow_wide;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.h b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.h
index 766cfabbe6..f6f7f1b3cb 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.h
@@ -1,73 +1,73 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief NFA acceleration analysis code.
- */
-
-#ifndef NG_LIMEX_ACCEL_H
-#define NG_LIMEX_ACCEL_H
-
-#include "ng_holder.h"
-#include "ng_misc_opt.h"
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief NFA acceleration analysis code.
+ */
+
+#ifndef NG_LIMEX_ACCEL_H
+#define NG_LIMEX_ACCEL_H
+
+#include "ng_holder.h"
+#include "ng_misc_opt.h"
+#include "ue2common.h"
#include "nfa/accelcompile.h"
#include "util/accel_scheme.h"
-#include "util/charreach.h"
+#include "util/charreach.h"
#include "util/flat_containers.h"
-#include "util/order_check.h"
-
-#include <map>
-#include <vector>
-
-namespace ue2 {
-
-/* compile time accel defs */
-#define MAX_MERGED_ACCEL_STOPS 200
-#define ACCEL_MAX_STOP_CHAR 24
-#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */
-
+#include "util/order_check.h"
+
+#include <map>
+#include <vector>
+
+namespace ue2 {
+
+/* compile time accel defs */
+#define MAX_MERGED_ACCEL_STOPS 200
+#define ACCEL_MAX_STOP_CHAR 24
+#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */
+
// forward-declaration of CompileContext
struct CompileContext;
-void findAccelFriends(const NGHolder &g, NFAVertex v,
- const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
+void findAccelFriends(const NGHolder &g, NFAVertex v,
+ const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
u32 offset, flat_set<NFAVertex> *friends);
-
+
#define DOUBLE_SHUFTI_LIMIT 20
-
-NFAVertex get_sds_or_proxy(const NGHolder &g);
-
-AccelScheme nfaFindAccel(const NGHolder &g, const std::vector<NFAVertex> &verts,
- const std::vector<CharReach> &refined_cr,
- const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
+
+NFAVertex get_sds_or_proxy(const NGHolder &g);
+
+AccelScheme nfaFindAccel(const NGHolder &g, const std::vector<NFAVertex> &verts,
+ const std::vector<CharReach> &refined_cr,
+ const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
bool allow_wide, bool look_for_double_byte = false);
-
+
AccelScheme findBestAccelScheme(std::vector<std::vector<CharReach> > paths,
const CharReach &terminating,
bool look_for_double_byte = false);
@@ -75,12 +75,12 @@ AccelScheme findBestAccelScheme(std::vector<std::vector<CharReach> > paths,
/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). If a
* single byte accel scheme is found it is placed into *as
*/
-bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
- const std::vector<CharReach> &refined_cr,
- const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
- AccelScheme *as, bool allow_wide);
-
-
-} // namespace ue2
-
-#endif
+bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
+ const std::vector<CharReach> &refined_cr,
+ const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
+ AccelScheme *as, bool allow_wide);
+
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp
index 3b8c17eaf9..d25ac43e87 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.cpp
@@ -1,87 +1,87 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Literal analysis and scoring.
- */
-#include "ng_literal_analysis.h"
-
-#include "ng_holder.h"
-#include "ng_split.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "rose/rose_common.h"
-#include "util/compare.h"
-#include "util/depth.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Literal analysis and scoring.
+ */
+#include "ng_literal_analysis.h"
+
+#include "ng_holder.h"
+#include "ng_split.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "rose/rose_common.h"
+#include "util/compare.h"
+#include "util/depth.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
#include "util/ue2_graph.h"
-#include "util/ue2string.h"
-
-#include <algorithm>
-#include <fstream>
-#include <queue>
-
-#include <boost/graph/boykov_kolmogorov_max_flow.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
-/** Maximum number of paths to generate. */
-static const u32 MAX_WIDTH = 11;
-
-/** Scoring adjustment for 'uniqueness' in literal. */
-static const u64a WEIGHT_OF_UNIQUENESS = 250;
-
-namespace {
-
-/* Small literal graph type used for the suffix tree used in
- * compressAndScore. */
-
-struct LitGraphVertexProps {
+#include "util/ue2string.h"
+
+#include <algorithm>
+#include <fstream>
+#include <queue>
+
+#include <boost/graph/boykov_kolmogorov_max_flow.hpp>
+
+using namespace std;
+
+namespace ue2 {
+
+/** Maximum number of paths to generate. */
+static const u32 MAX_WIDTH = 11;
+
+/** Scoring adjustment for 'uniqueness' in literal. */
+static const u64a WEIGHT_OF_UNIQUENESS = 250;
+
+namespace {
+
+/* Small literal graph type used for the suffix tree used in
+ * compressAndScore. */
+
+struct LitGraphVertexProps {
LitGraphVertexProps() = default;
explicit LitGraphVertexProps(ue2_literal::elem c_in) : c(move(c_in)) {}
- ue2_literal::elem c; // string element (char + bool)
+ ue2_literal::elem c; // string element (char + bool)
size_t index = 0; // managed by ue2_graph
-};
-
-struct LitGraphEdgeProps {
+};
+
+struct LitGraphEdgeProps {
LitGraphEdgeProps() = default;
- explicit LitGraphEdgeProps(u64a score_in) : score(score_in) {}
- u64a score = NO_LITERAL_AT_EDGE_SCORE;
+ explicit LitGraphEdgeProps(u64a score_in) : score(score_in) {}
+ u64a score = NO_LITERAL_AT_EDGE_SCORE;
size_t index = 0; // managed by ue2_graph
-};
-
+};
+
struct LitGraph
: public ue2_graph<LitGraph, LitGraphVertexProps, LitGraphEdgeProps> {
-
+
LitGraph() : root(add_vertex(*this)), sink(add_vertex(*this)) {}
const vertex_descriptor root;
@@ -91,399 +91,399 @@ struct LitGraph
typedef LitGraph::vertex_descriptor LitVertex;
typedef LitGraph::edge_descriptor LitEdge;
-typedef pair<LitVertex, NFAVertex> VertexPair;
-typedef std::queue<VertexPair> LitVertexQ;
-
-} // namespace
-
-#ifdef DUMP_SUPPORT
-
-/** \brief Dump the literal graph in Graphviz format. */
-static UNUSED
+typedef pair<LitVertex, NFAVertex> VertexPair;
+typedef std::queue<VertexPair> LitVertexQ;
+
+} // namespace
+
+#ifdef DUMP_SUPPORT
+
+/** \brief Dump the literal graph in Graphviz format. */
+static UNUSED
void dumpGraph(const char *filename, const LitGraph &lg) {
- ofstream fout(filename);
-
- fout << "digraph G {" << endl;
-
- for (auto v : vertices_range(lg)) {
+ ofstream fout(filename);
+
+ fout << "digraph G {" << endl;
+
+ for (auto v : vertices_range(lg)) {
fout << lg[v].index;
if (v == lg.root) {
- fout << "[label=\"ROOT\"];";
+ fout << "[label=\"ROOT\"];";
} else if (v == lg.sink) {
- fout << "[label=\"SINK\"];";
- } else {
- ue2_literal s;
- s.push_back(lg[v].c);
- fout << "[label=\"" << dumpString(s) << "\"];";
- }
- fout << endl;
- }
-
- for (const auto &e : edges_range(lg)) {
- LitVertex u = source(e, lg), v = target(e, lg);
+ fout << "[label=\"SINK\"];";
+ } else {
+ ue2_literal s;
+ s.push_back(lg[v].c);
+ fout << "[label=\"" << dumpString(s) << "\"];";
+ }
+ fout << endl;
+ }
+
+ for (const auto &e : edges_range(lg)) {
+ LitVertex u = source(e, lg), v = target(e, lg);
fout << lg[u].index << " -> " << lg[v].index << "[label=\""
<< lg[e].score << "\"]"
<< ";" << endl;
- }
-
- fout << "}" << endl;
-}
-
-#endif // DUMP_SUPPORT
-
-static
-bool allowExpand(size_t numItems, size_t totalPathsSoFar) {
- if (numItems == 0) {
- return false;
- }
-
- if (numItems + totalPathsSoFar > MAX_WIDTH) {
- return false;
- }
-
- return true;
-}
-
-static
+ }
+
+ fout << "}" << endl;
+}
+
+#endif // DUMP_SUPPORT
+
+static
+bool allowExpand(size_t numItems, size_t totalPathsSoFar) {
+ if (numItems == 0) {
+ return false;
+ }
+
+ if (numItems + totalPathsSoFar > MAX_WIDTH) {
+ return false;
+ }
+
+ return true;
+}
+
+static
LitVertex addToLitGraph(LitGraph &lg, LitVertex pred,
const ue2_literal::elem &c) {
- // Check if we already have this in the graph.
- for (auto v : adjacent_vertices_range(pred, lg)) {
+ // Check if we already have this in the graph.
+ for (auto v : adjacent_vertices_range(pred, lg)) {
if (v == lg.sink) {
- continue;
- }
- if (lg[v].c == c) {
- return v;
- }
- }
-
- LitVertex lv = add_vertex(LitGraphVertexProps(c), lg);
- add_edge(pred, lv, lg);
- return lv;
-}
-
-static
+ continue;
+ }
+ if (lg[v].c == c) {
+ return v;
+ }
+ }
+
+ LitVertex lv = add_vertex(LitGraphVertexProps(c), lg);
+ add_edge(pred, lv, lg);
+ return lv;
+}
+
+static
void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex pred,
const CharReach &cr, NFAVertex v) {
for (size_t i = cr.find_first(); i != CharReach::npos;
i = cr.find_next(i)) {
- if (myisupper(i) && cr.test(mytolower(i))) {
- // ignore upper half of a nocase pair
- continue;
- }
-
- bool nocase = myislower(i) && cr.test(mytoupper(i));
- ue2_literal::elem c((char)i, nocase);
+ if (myisupper(i) && cr.test(mytolower(i))) {
+ // ignore upper half of a nocase pair
+ continue;
+ }
+
+ bool nocase = myislower(i) && cr.test(mytoupper(i));
+ ue2_literal::elem c((char)i, nocase);
LitVertex lv = addToLitGraph(lg, pred, c);
- workQ.push(VertexPair(lv, v));
- }
-}
-
-static
+ workQ.push(VertexPair(lv, v));
+ }
+}
+
+static
void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, const NGHolder &g,
const NFAEdge &e) {
- NFAVertex u = source(e, g);
- NFAVertex v = target(e, g);
- const CharReach &cr = g[v].char_reach;
-
- if (!allowExpand(cr.count(), 0)) {
- return;
- }
-
+ NFAVertex u = source(e, g);
+ NFAVertex v = target(e, g);
+ const CharReach &cr = g[v].char_reach;
+
+ if (!allowExpand(cr.count(), 0)) {
+ return;
+ }
+
addToQueue(workQ, lg, lg.root, cr, u);
-}
-
-static
-u32 crCardinality(const CharReach &cr) {
- // Special-case for handling dots, much faster than running the find_next
- // loop below.
- if (cr.all()) {
- return 230; // [^A-Z]
- }
-
- u32 rv = 0;
+}
+
+static
+u32 crCardinality(const CharReach &cr) {
+ // Special-case for handling dots, much faster than running the find_next
+ // loop below.
+ if (cr.all()) {
+ return 230; // [^A-Z]
+ }
+
+ u32 rv = 0;
for (size_t i = cr.find_first(); i != CharReach::npos;
i = cr.find_next(i)) {
- if (myisupper(i) && cr.test(mytolower(i))) {
- // ignore upper half of a nocase pair
- continue;
- }
- rv++;
- }
-
- return rv;
-}
-
-/** Filter out literals that include other literals as suffixes. We do this by
- * identifying vertices connected to the sink and removing their other
- * out-edges. */
-static
+ if (myisupper(i) && cr.test(mytolower(i))) {
+ // ignore upper half of a nocase pair
+ continue;
+ }
+ rv++;
+ }
+
+ return rv;
+}
+
+/** Filter out literals that include other literals as suffixes. We do this by
+ * identifying vertices connected to the sink and removing their other
+ * out-edges. */
+static
void filterLitGraph(LitGraph &lg) {
for (auto v : inv_adjacent_vertices_range(lg.sink, lg)) {
remove_out_edge_if(v, [&lg](const LitEdge &e) {
return target(e, lg) != lg.sink;
- }, lg);
- }
-
- // We could do a DFS-and-prune here, if we wanted. Right now, we just
- // handle it in extractLiterals by throwing away paths that don't run all
- // the way from sink to root.
-}
-
-/** Extracts all the literals from the given literal graph. Walks the graph
- * from each predecessor of the sink (note: it's a suffix tree except for this
- * convenience) towards the source, storing each string as we go. */
-static
+ }, lg);
+ }
+
+ // We could do a DFS-and-prune here, if we wanted. Right now, we just
+ // handle it in extractLiterals by throwing away paths that don't run all
+ // the way from sink to root.
+}
+
+/** Extracts all the literals from the given literal graph. Walks the graph
+ * from each predecessor of the sink (note: it's a suffix tree except for this
+ * convenience) towards the source, storing each string as we go. */
+static
void extractLiterals(const LitGraph &lg, set<ue2_literal> &s) {
- ue2_literal lit;
-
+ ue2_literal lit;
+
for (auto u : inv_adjacent_vertices_range(lg.sink, lg)) {
- lit.clear();
+ lit.clear();
while (u != lg.root) {
- lit.push_back(lg[u].c);
- assert(in_degree(u, lg) <= 1);
- LitGraph::inv_adjacency_iterator ai2, ae2;
- tie(ai2, ae2) = inv_adjacent_vertices(u, lg);
- if (ai2 == ae2) {
- // Path has been cut, time for the next literal.
- goto next_literal;
- }
- u = *ai2;
- }
- s.insert(lit);
-next_literal:
- ;
- }
-}
-
-#ifndef NDEBUG
-static
-bool hasSuffixLiterals(const set<ue2_literal> &s) {
- for (auto it = s.begin(), ite = s.end(); it != ite; ++it) {
- for (auto jt = std::next(it); jt != ite; ++jt) {
- if (isSuffix(*it, *jt) || isSuffix(*jt, *it)) {
- DEBUG_PRINTF("'%s' and '%s' have suffix issues\n",
- dumpString(*it).c_str(),
- dumpString(*jt).c_str());
- return true;
- }
- }
- }
- return false;
-}
-#endif
-
-static
-void processWorkQueue(const NGHolder &g, const NFAEdge &e,
- set<ue2_literal> &s) {
- if (is_special(target(e, g), g)) {
- return;
- }
-
- LitGraph lg;
-
- LitVertexQ workQ;
+ lit.push_back(lg[u].c);
+ assert(in_degree(u, lg) <= 1);
+ LitGraph::inv_adjacency_iterator ai2, ae2;
+ tie(ai2, ae2) = inv_adjacent_vertices(u, lg);
+ if (ai2 == ae2) {
+ // Path has been cut, time for the next literal.
+ goto next_literal;
+ }
+ u = *ai2;
+ }
+ s.insert(lit);
+next_literal:
+ ;
+ }
+}
+
+#ifndef NDEBUG
+static
+bool hasSuffixLiterals(const set<ue2_literal> &s) {
+ for (auto it = s.begin(), ite = s.end(); it != ite; ++it) {
+ for (auto jt = std::next(it); jt != ite; ++jt) {
+ if (isSuffix(*it, *jt) || isSuffix(*jt, *it)) {
+ DEBUG_PRINTF("'%s' and '%s' have suffix issues\n",
+ dumpString(*it).c_str(),
+ dumpString(*jt).c_str());
+ return true;
+ }
+ }
+ }
+ return false;
+}
+#endif
+
+static
+void processWorkQueue(const NGHolder &g, const NFAEdge &e,
+ set<ue2_literal> &s) {
+ if (is_special(target(e, g), g)) {
+ return;
+ }
+
+ LitGraph lg;
+
+ LitVertexQ workQ;
initWorkQueue(workQ, lg, g, e);
-
- while (!workQ.empty()) {
- const LitVertex lv = workQ.front().first;
- const NFAVertex &t = workQ.front().second;
- const CharReach &cr = g[t].char_reach;
-
- u32 cr_card = crCardinality(cr);
- size_t numItems = cr_card * in_degree(t, g);
+
+ while (!workQ.empty()) {
+ const LitVertex lv = workQ.front().first;
+ const NFAVertex &t = workQ.front().second;
+ const CharReach &cr = g[t].char_reach;
+
+ u32 cr_card = crCardinality(cr);
+ size_t numItems = cr_card * in_degree(t, g);
size_t committed_count = workQ.size() + in_degree(lg.sink, lg) - 1;
-
- if (g[t].index == NODE_START) {
- // reached start, add to literal set
+
+ if (g[t].index == NODE_START) {
+ // reached start, add to literal set
add_edge_if_not_present(lv, lg.sink, lg);
- goto next_work_elem;
- }
-
- // Expand next vertex
- if (allowExpand(numItems, committed_count)) {
- for (auto u : inv_adjacent_vertices_range(t, g)) {
+ goto next_work_elem;
+ }
+
+ // Expand next vertex
+ if (allowExpand(numItems, committed_count)) {
+ for (auto u : inv_adjacent_vertices_range(t, g)) {
addToQueue(workQ, lg, lv, cr, u);
- }
- goto next_work_elem;
- }
-
- // Expand this vertex
- if (allowExpand(cr_card, committed_count)) {
- for (size_t i = cr.find_first(); i != CharReach::npos;
- i = cr.find_next(i)) {
- if (myisupper(i) && cr.test(mytolower(i))) {
- // ignore upper half of a nocase pair
- continue;
- }
-
- bool nocase = myislower(i) && cr.test(mytoupper(i));
- ue2_literal::elem c((char)i, nocase);
+ }
+ goto next_work_elem;
+ }
+
+ // Expand this vertex
+ if (allowExpand(cr_card, committed_count)) {
+ for (size_t i = cr.find_first(); i != CharReach::npos;
+ i = cr.find_next(i)) {
+ if (myisupper(i) && cr.test(mytolower(i))) {
+ // ignore upper half of a nocase pair
+ continue;
+ }
+
+ bool nocase = myislower(i) && cr.test(mytoupper(i));
+ ue2_literal::elem c((char)i, nocase);
LitVertex lt = addToLitGraph(lg, lv, c);
add_edge_if_not_present(lt, lg.sink, lg);
- }
- goto next_work_elem;
- }
-
- // add to literal set
+ }
+ goto next_work_elem;
+ }
+
+ // add to literal set
add_edge_if_not_present(lv, lg.sink, lg);
- next_work_elem:
- workQ.pop();
- }
-
+ next_work_elem:
+ workQ.pop();
+ }
+
filterLitGraph(lg);
//dumpGraph("litgraph.dot", lg);
extractLiterals(lg, s);
-
- // Our literal set should contain no literal that is a suffix of another.
- assert(!hasSuffixLiterals(s));
-
+
+ // Our literal set should contain no literal that is a suffix of another.
+ assert(!hasSuffixLiterals(s));
+
DEBUG_PRINTF("edge %zu (%zu->%zu) produced %zu literals\n", g[e].index,
- g[source(e, g)].index, g[target(e, g)].index, s.size());
-}
-
+ g[source(e, g)].index, g[target(e, g)].index, s.size());
+}
+
bool bad_mixed_sensitivity(const ue2_literal &s) {
/* TODO: if the mixed cases is entirely within MAX_MASK2_WIDTH of the end,
* we should be able to handle it */
return mixed_sensitivity(s) && s.length() > MAX_MASK2_WIDTH;
}
-static
-u64a litUniqueness(const string &s) {
- CharReach seen(s);
- return seen.count();
-}
-
-/** Count the significant bits of this literal (i.e. seven for nocase alpha,
- * eight for everything else). */
-static
-u64a litCountBits(const ue2_literal &lit) {
- u64a n = 0;
- for (const auto &c : lit) {
- n += c.nocase ? 7 : 8;
- }
- return n;
-}
-
-/** Returns a fairly arbitrary score for the given literal, used to compare the
- * suitability of different candidates. */
-static
-u64a scoreLiteral(const ue2_literal &s) {
- // old scoring scheme: SUM(s in S: 1/s.len()^2)
- // now weight (currently 75/25) with number of unique chars
- // in the string
- u64a len = litCountBits(s);
- u64a lenUnique = litUniqueness(s.get_string()) * 8;
-
- u64a weightedLen = (1000ULL - WEIGHT_OF_UNIQUENESS) * len +
- WEIGHT_OF_UNIQUENESS * lenUnique;
- weightedLen /= 8;
-
- DEBUG_PRINTF("scored literal '%s' %llu\n",
- escapeString(s.get_string()).c_str(), weightedLen);
-
- return weightedLen;
-}
-
-
-/**
- * calculateScore has the following properties:
- * - score of literal is the same as the score of the reversed literal;
- * - score of substring of literal is worse than the original literal's score;
- * - score of any literal should be non-zero.
- */
-static
-u64a calculateScore(const ue2_literal &s) {
- if (s.empty()) {
- return NO_LITERAL_AT_EDGE_SCORE;
- }
-
- u64a weightedLen = scoreLiteral(s);
-
- DEBUG_PRINTF("len %zu, wl %llu\n", s.length(), weightedLen);
- u64a rv = 1000000000000000ULL/(weightedLen * weightedLen * weightedLen);
-
- if (!rv) {
- rv = 1;
- }
- DEBUG_PRINTF("len %zu, score %llu\n", s.length(), rv);
- return rv;
-}
-
-/** Adds a literal in reverse order, building up a suffix tree. */
-static
+static
+u64a litUniqueness(const string &s) {
+ CharReach seen(s);
+ return seen.count();
+}
+
+/** Count the significant bits of this literal (i.e. seven for nocase alpha,
+ * eight for everything else). */
+static
+u64a litCountBits(const ue2_literal &lit) {
+ u64a n = 0;
+ for (const auto &c : lit) {
+ n += c.nocase ? 7 : 8;
+ }
+ return n;
+}
+
+/** Returns a fairly arbitrary score for the given literal, used to compare the
+ * suitability of different candidates. */
+static
+u64a scoreLiteral(const ue2_literal &s) {
+ // old scoring scheme: SUM(s in S: 1/s.len()^2)
+ // now weight (currently 75/25) with number of unique chars
+ // in the string
+ u64a len = litCountBits(s);
+ u64a lenUnique = litUniqueness(s.get_string()) * 8;
+
+ u64a weightedLen = (1000ULL - WEIGHT_OF_UNIQUENESS) * len +
+ WEIGHT_OF_UNIQUENESS * lenUnique;
+ weightedLen /= 8;
+
+ DEBUG_PRINTF("scored literal '%s' %llu\n",
+ escapeString(s.get_string()).c_str(), weightedLen);
+
+ return weightedLen;
+}
+
+
+/**
+ * calculateScore has the following properties:
+ * - score of literal is the same as the score of the reversed literal;
+ * - score of substring of literal is worse than the original literal's score;
+ * - score of any literal should be non-zero.
+ */
+static
+u64a calculateScore(const ue2_literal &s) {
+ if (s.empty()) {
+ return NO_LITERAL_AT_EDGE_SCORE;
+ }
+
+ u64a weightedLen = scoreLiteral(s);
+
+ DEBUG_PRINTF("len %zu, wl %llu\n", s.length(), weightedLen);
+ u64a rv = 1000000000000000ULL/(weightedLen * weightedLen * weightedLen);
+
+ if (!rv) {
+ rv = 1;
+ }
+ DEBUG_PRINTF("len %zu, score %llu\n", s.length(), rv);
+ return rv;
+}
+
+/** Adds a literal in reverse order, building up a suffix tree. */
+static
void addReversedLiteral(const ue2_literal &lit, LitGraph &lg) {
- DEBUG_PRINTF("literal: '%s'\n", escapeString(lit).c_str());
- ue2_literal suffix;
+ DEBUG_PRINTF("literal: '%s'\n", escapeString(lit).c_str());
+ ue2_literal suffix;
LitVertex v = lg.root;
- for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
- suffix.push_back(*it);
- LitVertex w;
- for (auto v2 : adjacent_vertices_range(v, lg)) {
+ for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
+ suffix.push_back(*it);
+ LitVertex w;
+ for (auto v2 : adjacent_vertices_range(v, lg)) {
if (v2 != lg.sink && lg[v2].c == *it) {
- w = v2;
- goto next_char;
- }
- }
- w = add_vertex(LitGraphVertexProps(*it), lg);
- add_edge(v, w, LitGraphEdgeProps(calculateScore(suffix)), lg);
-next_char:
- v = w;
- }
-
- // Wire the last vertex to the sink.
+ w = v2;
+ goto next_char;
+ }
+ }
+ w = add_vertex(LitGraphVertexProps(*it), lg);
+ add_edge(v, w, LitGraphEdgeProps(calculateScore(suffix)), lg);
+next_char:
+ v = w;
+ }
+
+ // Wire the last vertex to the sink.
add_edge(v, lg.sink, lg);
-}
-
-static
-void extractLiterals(const vector<LitEdge> &cutset, const LitGraph &lg,
+}
+
+static
+void extractLiterals(const vector<LitEdge> &cutset, const LitGraph &lg,
set<ue2_literal> &s) {
- for (const auto &e : cutset) {
+ for (const auto &e : cutset) {
LitVertex u = source(e, lg);
LitVertex v = target(e, lg);
- ue2_literal lit;
- lit.push_back(lg[v].c);
+ ue2_literal lit;
+ lit.push_back(lg[v].c);
while (u != lg.root) {
- lit.push_back(lg[u].c);
- assert(in_degree(u, lg) == 1);
- LitGraph::inv_adjacency_iterator ai, ae;
- tie(ai, ae) = inv_adjacent_vertices(u, lg);
- if (ai == ae) {
- // Path has been cut, time for the next literal.
- goto next_literal;
- }
- u = *ai;
- }
- DEBUG_PRINTF("extracted: '%s'\n", escapeString(lit).c_str());
- s.insert(lit);
-next_literal:
- ;
- }
-}
-
-#ifdef DEBUG
-static UNUSED
+ lit.push_back(lg[u].c);
+ assert(in_degree(u, lg) == 1);
+ LitGraph::inv_adjacency_iterator ai, ae;
+ tie(ai, ae) = inv_adjacent_vertices(u, lg);
+ if (ai == ae) {
+ // Path has been cut, time for the next literal.
+ goto next_literal;
+ }
+ u = *ai;
+ }
+ DEBUG_PRINTF("extracted: '%s'\n", escapeString(lit).c_str());
+ s.insert(lit);
+next_literal:
+ ;
+ }
+}
+
+#ifdef DEBUG
+static UNUSED
const char *describeColor(small_color c) {
- switch (c) {
+ switch (c) {
case small_color::white:
- return "white";
+ return "white";
case small_color::gray:
- return "gray";
+ return "gray";
case small_color::black:
- return "black";
- default:
- return "unknown";
- }
-}
-#endif
-
-/**
- * The BGL's boykov_kolmogorov_max_flow requires that all edges have their
+ return "black";
+ default:
+ return "unknown";
+ }
+}
+#endif
+
+/**
+ * The BGL's boykov_kolmogorov_max_flow requires that all edges have their
* reverse edge in the graph. This function adds them, returning a vector
* mapping edge index to reverse edge. Note: LitGraph should be a DAG so there
* should be no existing reverse_edges.
- */
-static
+ */
+static
vector<LitEdge> add_reverse_edges_and_index(LitGraph &lg) {
const size_t edge_count = num_edges(lg);
vector<LitEdge> fwd_edges;
@@ -491,137 +491,137 @@ vector<LitEdge> add_reverse_edges_and_index(LitGraph &lg) {
for (const auto &e : edges_range(lg)) {
fwd_edges.push_back(e);
}
-
+
vector<LitEdge> rev_map(2 * edge_count);
-
+
for (const auto &e : fwd_edges) {
LitVertex u = source(e, lg);
LitVertex v = target(e, lg);
-
+
assert(!edge(v, u, lg).second);
-
+
LitEdge rev = add_edge(v, u, LitGraphEdgeProps(0), lg).first;
rev_map[lg[e].index] = rev;
rev_map[lg[rev].index] = e;
- }
-
+ }
+
return rev_map;
-}
-
-static
+}
+
+static
void findMinCut(LitGraph &lg, vector<LitEdge> &cutset) {
- cutset.clear();
-
+ cutset.clear();
+
//dumpGraph("litgraph.dot", lg);
-
+
assert(!in_degree(lg.root, lg));
assert(!out_degree(lg.sink, lg));
size_t num_real_edges = num_edges(lg);
-
- // Add reverse edges for the convenience of the BGL's max flow algorithm.
+
+ // Add reverse edges for the convenience of the BGL's max flow algorithm.
vector<LitEdge> rev_edges = add_reverse_edges_and_index(lg);
-
+
const auto v_index_map = get(&LitGraphVertexProps::index, lg);
const auto e_index_map = get(&LitGraphEdgeProps::index, lg);
- const size_t num_verts = num_vertices(lg);
+ const size_t num_verts = num_vertices(lg);
auto colors = make_small_color_map(lg);
- vector<s32> distances(num_verts);
- vector<LitEdge> predecessors(num_verts);
+ vector<s32> distances(num_verts);
+ vector<LitEdge> predecessors(num_verts);
vector<u64a> residuals(num_edges(lg));
-
- UNUSED u64a flow = boykov_kolmogorov_max_flow(lg,
- get(&LitGraphEdgeProps::score, lg),
+
+ UNUSED u64a flow = boykov_kolmogorov_max_flow(lg,
+ get(&LitGraphEdgeProps::score, lg),
make_iterator_property_map(residuals.begin(), e_index_map),
make_iterator_property_map(rev_edges.begin(), e_index_map),
- make_iterator_property_map(predecessors.begin(), v_index_map),
+ make_iterator_property_map(predecessors.begin(), v_index_map),
colors,
- make_iterator_property_map(distances.begin(), v_index_map),
+ make_iterator_property_map(distances.begin(), v_index_map),
v_index_map, lg.root, lg.sink);
- DEBUG_PRINTF("done, flow = %llu\n", flow);
-
+ DEBUG_PRINTF("done, flow = %llu\n", flow);
+
/* remove reverse edges */
remove_edge_if([&](const LitEdge &e) {
return lg[e].index >= num_real_edges;
}, lg);
-
- vector<LitEdge> white_cut, black_cut;
- u64a white_flow = 0, black_flow = 0;
-
- for (const auto &e : edges_range(lg)) {
- const LitVertex u = source(e, lg), v = target(e, lg);
+
+ vector<LitEdge> white_cut, black_cut;
+ u64a white_flow = 0, black_flow = 0;
+
+ for (const auto &e : edges_range(lg)) {
+ const LitVertex u = source(e, lg), v = target(e, lg);
const auto ucolor = get(colors, u);
const auto vcolor = get(colors, v);
-
+
DEBUG_PRINTF("edge %zu:%s -> %zu:%s score %llu\n", lg[u].index,
describeColor(ucolor), lg[v].index, describeColor(vcolor),
- lg[e].score);
-
+ lg[e].score);
+
if (ucolor != small_color::white && vcolor == small_color::white) {
assert(v != lg.sink);
- white_cut.push_back(e);
- white_flow += lg[e].score;
- }
+ white_cut.push_back(e);
+ white_flow += lg[e].score;
+ }
if (ucolor == small_color::black && vcolor != small_color::black) {
assert(v != lg.sink);
- black_cut.push_back(e);
- black_flow += lg[e].score;
- }
- }
-
- DEBUG_PRINTF("white flow = %llu, black flow = %llu\n",
- white_flow, black_flow);
- assert(white_flow && black_flow);
-
- if (white_flow <= black_flow) {
- DEBUG_PRINTF("selected white cut\n");
- cutset.swap(white_cut);
- } else {
- DEBUG_PRINTF("selected black cut\n");
- cutset.swap(black_cut);
- }
-
- DEBUG_PRINTF("min cut has %zu edges\n", cutset.size());
- assert(!cutset.empty());
-}
-
-/** Takes a set of literals and derives a better one from them, returning its
- * score. Literals with a common suffix S will be replaced with S. (for
- * example, {foobar, fooobar} -> {oobar}).
- */
-u64a compressAndScore(set<ue2_literal> &s) {
- if (s.empty()) {
- return NO_LITERAL_AT_EDGE_SCORE;
- }
-
- if (s.size() == 1) {
- return calculateScore(*s.begin());
- }
-
- UNUSED u64a initialScore = scoreSet(s);
- DEBUG_PRINTF("begin, initial literals have score %llu\n",
- initialScore);
-
- LitGraph lg;
-
- for (const auto &lit : s) {
+ black_cut.push_back(e);
+ black_flow += lg[e].score;
+ }
+ }
+
+ DEBUG_PRINTF("white flow = %llu, black flow = %llu\n",
+ white_flow, black_flow);
+ assert(white_flow && black_flow);
+
+ if (white_flow <= black_flow) {
+ DEBUG_PRINTF("selected white cut\n");
+ cutset.swap(white_cut);
+ } else {
+ DEBUG_PRINTF("selected black cut\n");
+ cutset.swap(black_cut);
+ }
+
+ DEBUG_PRINTF("min cut has %zu edges\n", cutset.size());
+ assert(!cutset.empty());
+}
+
+/** Takes a set of literals and derives a better one from them, returning its
+ * score. Literals with a common suffix S will be replaced with S. (for
+ * example, {foobar, fooobar} -> {oobar}).
+ */
+u64a compressAndScore(set<ue2_literal> &s) {
+ if (s.empty()) {
+ return NO_LITERAL_AT_EDGE_SCORE;
+ }
+
+ if (s.size() == 1) {
+ return calculateScore(*s.begin());
+ }
+
+ UNUSED u64a initialScore = scoreSet(s);
+ DEBUG_PRINTF("begin, initial literals have score %llu\n",
+ initialScore);
+
+ LitGraph lg;
+
+ for (const auto &lit : s) {
addReversedLiteral(lit, lg);
- }
-
- DEBUG_PRINTF("suffix tree has %zu vertices and %zu edges\n",
- num_vertices(lg), num_edges(lg));
-
- vector<LitEdge> cutset;
+ }
+
+ DEBUG_PRINTF("suffix tree has %zu vertices and %zu edges\n",
+ num_vertices(lg), num_edges(lg));
+
+ vector<LitEdge> cutset;
findMinCut(lg, cutset);
-
- s.clear();
+
+ s.clear();
extractLiterals(cutset, lg, s);
-
- u64a score = scoreSet(s);
- DEBUG_PRINTF("compressed score is %llu\n", score);
- assert(score <= initialScore);
- return score;
-}
-
+
+ u64a score = scoreSet(s);
+ DEBUG_PRINTF("compressed score is %llu\n", score);
+ assert(score <= initialScore);
+ return score;
+}
+
/* like compressAndScore, but replaces long mixed sensitivity literals with
* something weaker. */
u64a sanitizeAndCompressAndScore(set<ue2_literal> &lits) {
@@ -664,191 +664,191 @@ u64a sanitizeAndCompressAndScore(set<ue2_literal> &lits) {
return compressAndScore(lits);
}
-u64a scoreSet(const set<ue2_literal> &s) {
- if (s.empty()) {
- return NO_LITERAL_AT_EDGE_SCORE;
- }
-
- u64a score = 1ULL;
-
- for (const auto &lit : s) {
- score += calculateScore(lit);
- }
-
- return score;
-}
-
-set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAEdge &e) {
- set<ue2_literal> s;
- processWorkQueue(g, e, s);
- return s;
-}
-
-set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v,
- bool only_first_encounter) {
- set<ue2_literal> s;
-
- if (is_special(v, g)) {
- return s;
- }
-
- set<ue2_literal> ls;
-
- for (const auto &e : in_edges_range(v, g)) {
- if (source(e, g) == v && only_first_encounter) {
- continue; /* ignore self loop on root vertex as we are interested in
- * the first time we visit the vertex on the way to
- * accept. In fact, we can ignore any back edges - but
- * they would require a bit of effort to discover. */
- }
-
- ls = getLiteralSet(g, e);
- if (ls.empty()) {
- s.clear();
- return s;
- } else {
- s.insert(ls.begin(), ls.end());
- }
- }
-
- return s;
-}
-
+u64a scoreSet(const set<ue2_literal> &s) {
+ if (s.empty()) {
+ return NO_LITERAL_AT_EDGE_SCORE;
+ }
+
+ u64a score = 1ULL;
+
+ for (const auto &lit : s) {
+ score += calculateScore(lit);
+ }
+
+ return score;
+}
+
+set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAEdge &e) {
+ set<ue2_literal> s;
+ processWorkQueue(g, e, s);
+ return s;
+}
+
+set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v,
+ bool only_first_encounter) {
+ set<ue2_literal> s;
+
+ if (is_special(v, g)) {
+ return s;
+ }
+
+ set<ue2_literal> ls;
+
+ for (const auto &e : in_edges_range(v, g)) {
+ if (source(e, g) == v && only_first_encounter) {
+ continue; /* ignore self loop on root vertex as we are interested in
+ * the first time we visit the vertex on the way to
+ * accept. In fact, we can ignore any back edges - but
+ * they would require a bit of effort to discover. */
+ }
+
+ ls = getLiteralSet(g, e);
+ if (ls.empty()) {
+ s.clear();
+ return s;
+ } else {
+ s.insert(ls.begin(), ls.end());
+ }
+ }
+
+ return s;
+}
+
vector<u64a> scoreEdges(const NGHolder &g, const flat_set<NFAEdge> &known_bad) {
- assert(hasCorrectlyNumberedEdges(g));
-
- vector<u64a> scores(num_edges(g));
-
- for (const auto &e : edges_range(g)) {
- u32 eidx = g[e].index;
- assert(eidx < scores.size());
+ assert(hasCorrectlyNumberedEdges(g));
+
+ vector<u64a> scores(num_edges(g));
+
+ for (const auto &e : edges_range(g)) {
+ u32 eidx = g[e].index;
+ assert(eidx < scores.size());
if (contains(known_bad, e)) {
scores[eidx] = NO_LITERAL_AT_EDGE_SCORE;
} else {
set<ue2_literal> ls = getLiteralSet(g, e);
scores[eidx] = compressAndScore(ls);
}
- }
-
- return scores;
-}
-
+ }
+
+ return scores;
+}
+
bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out,
NGHolder *rhs) {
DEBUG_PRINTF("looking for leading floating literal\n");
set<NFAVertex> s_succ;
insert(&s_succ, adjacent_vertices(g.start, g));
-
+
set<NFAVertex> sds_succ;
insert(&sds_succ, adjacent_vertices(g.startDs, g));
-
+
bool floating = is_subset_of(s_succ, sds_succ);
if (!floating) {
DEBUG_PRINTF("not floating\n");
return false;
}
-
+
sds_succ.erase(g.startDs);
if (sds_succ.size() != 1) {
DEBUG_PRINTF("branchy root\n");
return false;
}
-
+
NFAVertex u = g.startDs;
NFAVertex v = *sds_succ.begin();
-
- while (true) {
+
+ while (true) {
DEBUG_PRINTF("validating vertex %zu\n", g[v].index);
-
- assert(v != g.acceptEod && v != g.accept);
-
- const CharReach &cr = g[v].char_reach;
- if (cr.count() != 1 && !cr.isCaselessChar()) {
- break;
- }
-
- // Rose can only handle mixed-sensitivity literals up to the max mask
- // length.
- if (lit_out->length() >= MAX_MASK2_WIDTH) {
- if (mixed_sensitivity(*lit_out)) {
- DEBUG_PRINTF("long and mixed sensitivity\n");
- break;
- }
- if (ourisalpha((char)cr.find_first())) {
- if (cr.isCaselessChar() != lit_out->any_nocase()) {
- DEBUG_PRINTF("stop at mixed sensitivity on '%c'\n",
- (char)cr.find_first());
- break;
- }
- }
- }
-
- if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
- DEBUG_PRINTF("connection to accept\n");
- break;
- }
-
- lit_out->push_back(cr.find_first(), cr.isCaselessChar());
- u = v;
-
- if (out_degree(v, g) != 1) {
- DEBUG_PRINTF("out_degree != 1\n");
- break;
- }
-
- v = *adjacent_vertices(v, g).first;
-
- if (in_degree(v, g) != 1) {
- DEBUG_PRINTF("blargh\n"); /* picks up cases where there is no path
- * to case accept (large cycles),
- * ensures term */
- break;
- }
- }
-
- if (lit_out->empty()) {
- return false;
- }
- assert(u != g.startDs);
-
+
+ assert(v != g.acceptEod && v != g.accept);
+
+ const CharReach &cr = g[v].char_reach;
+ if (cr.count() != 1 && !cr.isCaselessChar()) {
+ break;
+ }
+
+ // Rose can only handle mixed-sensitivity literals up to the max mask
+ // length.
+ if (lit_out->length() >= MAX_MASK2_WIDTH) {
+ if (mixed_sensitivity(*lit_out)) {
+ DEBUG_PRINTF("long and mixed sensitivity\n");
+ break;
+ }
+ if (ourisalpha((char)cr.find_first())) {
+ if (cr.isCaselessChar() != lit_out->any_nocase()) {
+ DEBUG_PRINTF("stop at mixed sensitivity on '%c'\n",
+ (char)cr.find_first());
+ break;
+ }
+ }
+ }
+
+ if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
+ DEBUG_PRINTF("connection to accept\n");
+ break;
+ }
+
+ lit_out->push_back(cr.find_first(), cr.isCaselessChar());
+ u = v;
+
+ if (out_degree(v, g) != 1) {
+ DEBUG_PRINTF("out_degree != 1\n");
+ break;
+ }
+
+ v = *adjacent_vertices(v, g).first;
+
+ if (in_degree(v, g) != 1) {
+ DEBUG_PRINTF("blargh\n"); /* picks up cases where there is no path
+ * to case accept (large cycles),
+ * ensures term */
+ break;
+ }
+ }
+
+ if (lit_out->empty()) {
+ return false;
+ }
+ assert(u != g.startDs);
+
unordered_map<NFAVertex, NFAVertex> rhs_map;
vector<NFAVertex> pivots = make_vector_from(adjacent_vertices(u, g));
- splitRHS(g, pivots, rhs, &rhs_map);
-
- DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(*lit_out).c_str(),
- lit_out->length());
- assert(is_triggered(*rhs));
- return true;
-}
-
-bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out) {
- if (in_degree(g.acceptEod, g) != 1) {
- return false;
- }
-
- NFAVertex v = getSoleSourceVertex(g, g.accept);
-
- if (!v) {
- return false;
- }
-
- set<ue2_literal> s = getLiteralSet(g, v, false);
-
- if (s.size() != 1) {
- return false;
- }
-
- const ue2_literal &lit = *s.begin();
-
- if (lit.length() > MAX_MASK2_WIDTH && mixed_sensitivity(lit)) {
- DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this.\n");
- return false;
- }
-
- *lit_out = lit;
- return true;
-}
-
+ splitRHS(g, pivots, rhs, &rhs_map);
+
+ DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(*lit_out).c_str(),
+ lit_out->length());
+ assert(is_triggered(*rhs));
+ return true;
+}
+
+bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out) {
+ if (in_degree(g.acceptEod, g) != 1) {
+ return false;
+ }
+
+ NFAVertex v = getSoleSourceVertex(g, g.accept);
+
+ if (!v) {
+ return false;
+ }
+
+ set<ue2_literal> s = getLiteralSet(g, v, false);
+
+ if (s.size() != 1) {
+ return false;
+ }
+
+ const ue2_literal &lit = *s.begin();
+
+ if (lit.length() > MAX_MASK2_WIDTH && mixed_sensitivity(lit)) {
+ DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this.\n");
+ return false;
+ }
+
+ *lit_out = lit;
+ return true;
+}
+
bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit) {
NFAVertex v = g.accept;
@@ -894,4 +894,4 @@ bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit) {
return true;
}
-} // namespace ue2
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.h b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.h
index 943a6d33c9..6bb8755610 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_analysis.h
@@ -1,62 +1,62 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Literal analysis and scoring.
- */
-
-#ifndef NG_LITERAL_ANALYSIS_H
-#define NG_LITERAL_ANALYSIS_H
-
-#include <set>
-#include <vector>
-
-#include "ng_holder.h"
-#include "util/ue2string.h"
-
-namespace ue2 {
-
-#define NO_LITERAL_AT_EDGE_SCORE 10000000ULL
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Literal analysis and scoring.
+ */
+
+#ifndef NG_LITERAL_ANALYSIS_H
+#define NG_LITERAL_ANALYSIS_H
+
+#include <set>
+#include <vector>
+
+#include "ng_holder.h"
+#include "util/ue2string.h"
+
+namespace ue2 {
+
+#define NO_LITERAL_AT_EDGE_SCORE 10000000ULL
#define INVALID_EDGE_CAP 100000000ULL /* special-to-special score */
-
-class NGHolder;
-
-/**
- * Fetch the literal set for a given vertex, returning it in \p s. Note: does
- * NOT take into account any constraints due to streaming mode requirements.
- *
- * if only_first_encounter is requested, the output set may drop literals
- * generated by revisiting the destination vertex.
- */
-std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v,
- bool only_first_encounter = true);
-std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAEdge &e);
-
+
+class NGHolder;
+
+/**
+ * Fetch the literal set for a given vertex, returning it in \p s. Note: does
+ * NOT take into account any constraints due to streaming mode requirements.
+ *
+ * if only_first_encounter is requested, the output set may drop literals
+ * generated by revisiting the destination vertex.
+ */
+std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v,
+ bool only_first_encounter = true);
+std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAEdge &e);
+
/**
* Returns true if we are unable to use a mixed sensitivity literal in rose (as
* our literal matchers are generally either case sensitive or not).
@@ -68,31 +68,31 @@ bool bad_mixed_sensitivity(const ue2_literal &s);
/**
* Score all the edges in the given graph, returning them in \p scores indexed
- * by edge_index. */
+ * by edge_index. */
std::vector<u64a> scoreEdges(const NGHolder &h,
const flat_set<NFAEdge> &known_bad = {});
-
-/** Returns a score for a literal set. Lower scores are better. */
-u64a scoreSet(const std::set<ue2_literal> &s);
-
-/** Compress a literal set to fewer literals. */
-u64a compressAndScore(std::set<ue2_literal> &s);
-
+
+/** Returns a score for a literal set. Lower scores are better. */
+u64a scoreSet(const std::set<ue2_literal> &s);
+
+/** Compress a literal set to fewer literals. */
+u64a compressAndScore(std::set<ue2_literal> &s);
+
/**
* Compress a literal set to fewer literals and replace any long mixed
* sensitivity literals with supported literals.
*/
u64a sanitizeAndCompressAndScore(std::set<ue2_literal> &s);
-bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out,
- NGHolder *rhs);
-
-bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out);
-
+bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out,
+ NGHolder *rhs);
+
+bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out);
+
/** \brief Returns true if the given literal is the only thing in the graph,
* from (start or startDs) to accept. */
bool literalIsWholeGraph(const NGHolder &g, const ue2_literal &lit);
-} // namespace ue2
-
-#endif
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.cpp
index 4e085d9913..4d3965dfe2 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.cpp
@@ -1,227 +1,227 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Literal Component Splitting. Identifies literals that span the
- * graph and moves them into Rose.
- */
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Literal Component Splitting. Identifies literals that span the
+ * graph and moves them into Rose.
+ */
#include "ng_literal_component.h"
-#include "grey.h"
-#include "ng.h"
-#include "ng_prune.h"
-#include "ng_util.h"
-#include "ue2common.h"
+#include "grey.h"
+#include "ng.h"
+#include "ng_prune.h"
+#include "ng_util.h"
+#include "ue2common.h"
#include "compiler/compiler.h"
-#include "rose/rose_build.h"
-#include "util/container.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-#include "util/ue2string.h"
-
+#include "rose/rose_build.h"
+#include "util/container.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+#include "util/ue2string.h"
+
#include <unordered_set>
-using namespace std;
-
-namespace ue2 {
-
-static
+using namespace std;
+
+namespace ue2 {
+
+static
bool isLiteralChar(const NGHolder &g, NFAVertex v, bool &nocase,
bool &casefixed) {
- const CharReach &cr = g[v].char_reach;
- const size_t num = cr.count();
- if (num > 2) {
- return false; // char class
- }
-
- if (!casefixed) {
- if (num == 2 && cr.isCaselessChar()) {
- nocase = true;
- casefixed = true;
- return true;
- } else if (num == 1) {
- if (cr.isAlpha()) {
- nocase = false;
- casefixed = true;
- }
- // otherwise, still acceptable but we can't fix caselessness yet
- return true;
- }
- } else {
- // nocase property is fixed
- if (nocase) {
- if ((num == 2 && cr.isCaselessChar()) ||
- (num == 1 && !cr.isAlpha())) {
- return true;
- }
- } else {
- return (num == 1);
- }
- }
-
- return false;
-}
-
-static
-void addToString(string &s, const NGHolder &g, NFAVertex v) {
- const CharReach &cr = g[v].char_reach;
- assert(cr.count() == 1 || cr.isCaselessChar());
-
- char c = (char)cr.find_first();
- s.push_back(c);
-}
-
-static
+ const CharReach &cr = g[v].char_reach;
+ const size_t num = cr.count();
+ if (num > 2) {
+ return false; // char class
+ }
+
+ if (!casefixed) {
+ if (num == 2 && cr.isCaselessChar()) {
+ nocase = true;
+ casefixed = true;
+ return true;
+ } else if (num == 1) {
+ if (cr.isAlpha()) {
+ nocase = false;
+ casefixed = true;
+ }
+ // otherwise, still acceptable but we can't fix caselessness yet
+ return true;
+ }
+ } else {
+ // nocase property is fixed
+ if (nocase) {
+ if ((num == 2 && cr.isCaselessChar()) ||
+ (num == 1 && !cr.isAlpha())) {
+ return true;
+ }
+ } else {
+ return (num == 1);
+ }
+ }
+
+ return false;
+}
+
+static
+void addToString(string &s, const NGHolder &g, NFAVertex v) {
+ const CharReach &cr = g[v].char_reach;
+ assert(cr.count() == 1 || cr.isCaselessChar());
+
+ char c = (char)cr.find_first();
+ s.push_back(c);
+}
+
+static
bool splitOffLiteral(NG &ng, NGHolder &g, NFAVertex v, const bool anchored,
- set<NFAVertex> &dead) {
+ set<NFAVertex> &dead) {
DEBUG_PRINTF("examine vertex %zu\n", g[v].index);
- bool nocase = false, casefixed = false;
-
- assert(!is_special(v, g));
-
- size_t reqInDegree;
- if (anchored) {
- reqInDegree = 1;
- assert(edge(g.start, v, g).second);
- } else {
- reqInDegree = 2;
- assert(edge(g.start, v, g).second);
- assert(edge(g.startDs, v, g).second);
- }
+ bool nocase = false, casefixed = false;
+
+ assert(!is_special(v, g));
+
+ size_t reqInDegree;
+ if (anchored) {
+ reqInDegree = 1;
+ assert(edge(g.start, v, g).second);
+ } else {
+ reqInDegree = 2;
+ assert(edge(g.start, v, g).second);
+ assert(edge(g.startDs, v, g).second);
+ }
if (in_degree(v, g) > reqInDegree) {
- DEBUG_PRINTF("extra in-edges\n");
- return false;
- }
-
- if (!isLiteralChar(g, v, nocase, casefixed)) {
- DEBUG_PRINTF("not literal\n");
- return false;
- }
-
- string literal;
- addToString(literal, g, v);
-
- // Remaining vertices must come in a chain, each with one in-edge and one
- // out-edge only.
- NFAVertex u;
- while (1) {
- if (out_degree(v, g) != 1) {
- DEBUG_PRINTF("branches, not literal\n");
- return false;
- }
-
- u = v; // previous vertex
- v = *(adjacent_vertices(v, g).first);
-
+ DEBUG_PRINTF("extra in-edges\n");
+ return false;
+ }
+
+ if (!isLiteralChar(g, v, nocase, casefixed)) {
+ DEBUG_PRINTF("not literal\n");
+ return false;
+ }
+
+ string literal;
+ addToString(literal, g, v);
+
+ // Remaining vertices must come in a chain, each with one in-edge and one
+ // out-edge only.
+ NFAVertex u;
+ while (1) {
+ if (out_degree(v, g) != 1) {
+ DEBUG_PRINTF("branches, not literal\n");
+ return false;
+ }
+
+ u = v; // previous vertex
+ v = *(adjacent_vertices(v, g).first);
+
DEBUG_PRINTF("loop, v=%zu\n", g[v].index);
-
- if (is_special(v, g)) {
- if (v == g.accept || v == g.acceptEod) {
- break; // OK
- } else {
- assert(0); // start?
- return false;
- }
- } else {
- // Ordinary, must be literal
- if (!isLiteralChar(g, v, nocase, casefixed)) {
- DEBUG_PRINTF("not literal\n");
- return false;
- }
- if (in_degree(v, g) != 1) {
- DEBUG_PRINTF("branches, not literal\n");
- return false;
- }
- }
-
- addToString(literal, g, v);
- }
-
- // Successfully found a literal; there might be multiple report IDs, in
- // which case we add all the reports.
- assert(!is_special(u, g));
- bool eod = v == g.acceptEod;
- assert(eod || v == g.accept);
-
- DEBUG_PRINTF("success: found %s literal '%s'\n",
- anchored ? "anchored" : "unanchored",
- escapeString(literal).c_str());
-
- // Literals of length 1 are better served going through later optimisation
- // passes, where they might be combined together into a character class.
- if (literal.length() == 1) {
- DEBUG_PRINTF("skipping literal of length 1\n");
- return false;
- }
-
- ng.rose->add(anchored, eod, ue2_literal(literal, nocase), g[u].reports);
-
- // Remove the terminal vertex. Later, we rely on pruneUseless to remove the
- // other vertices in this chain, since they'll no longer lead to an accept.
- dead.insert(u);
-
- return true;
-}
-
-/** \brief Split off literals. True if any changes were made to the graph. */
+
+ if (is_special(v, g)) {
+ if (v == g.accept || v == g.acceptEod) {
+ break; // OK
+ } else {
+ assert(0); // start?
+ return false;
+ }
+ } else {
+ // Ordinary, must be literal
+ if (!isLiteralChar(g, v, nocase, casefixed)) {
+ DEBUG_PRINTF("not literal\n");
+ return false;
+ }
+ if (in_degree(v, g) != 1) {
+ DEBUG_PRINTF("branches, not literal\n");
+ return false;
+ }
+ }
+
+ addToString(literal, g, v);
+ }
+
+ // Successfully found a literal; there might be multiple report IDs, in
+ // which case we add all the reports.
+ assert(!is_special(u, g));
+ bool eod = v == g.acceptEod;
+ assert(eod || v == g.accept);
+
+ DEBUG_PRINTF("success: found %s literal '%s'\n",
+ anchored ? "anchored" : "unanchored",
+ escapeString(literal).c_str());
+
+ // Literals of length 1 are better served going through later optimisation
+ // passes, where they might be combined together into a character class.
+ if (literal.length() == 1) {
+ DEBUG_PRINTF("skipping literal of length 1\n");
+ return false;
+ }
+
+ ng.rose->add(anchored, eod, ue2_literal(literal, nocase), g[u].reports);
+
+ // Remove the terminal vertex. Later, we rely on pruneUseless to remove the
+ // other vertices in this chain, since they'll no longer lead to an accept.
+ dead.insert(u);
+
+ return true;
+}
+
+/** \brief Split off literals. True if any changes were made to the graph. */
bool splitOffLiterals(NG &ng, NGHolder &g) {
if (!ng.cc.grey.allowLiteral) {
- return false;
- }
-
- bool changed = false;
- set<NFAVertex> dead;
-
+ return false;
+ }
+
+ bool changed = false;
+ set<NFAVertex> dead;
+
unordered_set<NFAVertex> unanchored; // for faster lookup.
- insert(&unanchored, adjacent_vertices(g.startDs, g));
-
- // Anchored literals.
- for (auto v : adjacent_vertices_range(g.start, g)) {
- if (!is_special(v, g) && !contains(unanchored, v)) {
- changed |= splitOffLiteral(ng, g, v, true, dead);
- }
- }
-
- // Unanchored literals.
- for (auto v : adjacent_vertices_range(g.startDs, g)) {
- if (!is_special(v, g)) {
- changed |= splitOffLiteral(ng, g, v, false, dead);
- }
- }
-
- if (changed) {
- remove_vertices(dead, g);
- pruneUseless(g);
- return true;
- }
-
- return false;
-}
-
-} // namespace ue2
+ insert(&unanchored, adjacent_vertices(g.startDs, g));
+
+ // Anchored literals.
+ for (auto v : adjacent_vertices_range(g.start, g)) {
+ if (!is_special(v, g) && !contains(unanchored, v)) {
+ changed |= splitOffLiteral(ng, g, v, true, dead);
+ }
+ }
+
+ // Unanchored literals.
+ for (auto v : adjacent_vertices_range(g.startDs, g)) {
+ if (!is_special(v, g)) {
+ changed |= splitOffLiteral(ng, g, v, false, dead);
+ }
+ }
+
+ if (changed) {
+ remove_vertices(dead, g);
+ pruneUseless(g);
+ return true;
+ }
+
+ return false;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.h b/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.h
index 0cd8422ae7..1f284ce367 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_component.h
@@ -1,47 +1,47 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Literal Component Splitting. Identifies literals that span the
- * graph and moves them into Rose.
- */
-
-#ifndef NG_LITERAL_COMPONENT_H
-#define NG_LITERAL_COMPONENT_H
-
-namespace ue2 {
-
-class NG;
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Literal Component Splitting. Identifies literals that span the
+ * graph and moves them into Rose.
+ */
+
+#ifndef NG_LITERAL_COMPONENT_H
+#define NG_LITERAL_COMPONENT_H
+
+namespace ue2 {
+
+class NG;
class NGHolder;
-
-/** \brief Split off literals. True if any changes were made to the graph. */
+
+/** \brief Split off literals. True if any changes were made to the graph. */
bool splitOffLiterals(NG &ng, NGHolder &g);
-
-} // namespace ue2
-
-#endif // NG_LITERAL_COMPONENT_H
+
+} // namespace ue2
+
+#endif // NG_LITERAL_COMPONENT_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.cpp
index 5d2f4ca5df..61a31dbf34 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.cpp
@@ -1,252 +1,252 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Analysis for literals decorated by leading/trailing assertions or
- * character classes.
- */
-#include "ng_literal_decorated.h"
-
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_util.h"
-#include "rose/rose_build.h"
-#include "rose/rose_in_graph.h"
-#include "rose/rose_in_util.h"
-#include "util/compile_context.h"
-#include "util/dump_charclass.h"
-#include "util/make_unique.h"
-
-#include <algorithm>
-#include <memory>
-#include <sstream>
-
-using namespace std;
-
-namespace ue2 {
-
-namespace {
-
-/** \brief Max fixed-width paths to generate from a graph. */
-static constexpr size_t MAX_PATHS = 10;
-
-/** \brief Max degree for any non-special vertex in the graph. */
-static constexpr size_t MAX_VERTEX_DEGREE = 6;
-
-using Path = vector<NFAVertex>;
-
-} // namespace
-
-static
-bool findPaths(const NGHolder &g, vector<Path> &paths) {
- vector<NFAVertex> order = getTopoOrdering(g);
-
- vector<size_t> read_count(num_vertices(g));
- vector<vector<Path>> built(num_vertices(g));
-
- for (auto it = order.rbegin(); it != order.rend(); ++it) {
- NFAVertex v = *it;
- auto &out = built[g[v].index];
- assert(out.empty());
-
- read_count[g[v].index] = out_degree(v, g);
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Analysis for literals decorated by leading/trailing assertions or
+ * character classes.
+ */
+#include "ng_literal_decorated.h"
+
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_util.h"
+#include "rose/rose_build.h"
+#include "rose/rose_in_graph.h"
+#include "rose/rose_in_util.h"
+#include "util/compile_context.h"
+#include "util/dump_charclass.h"
+#include "util/make_unique.h"
+
+#include <algorithm>
+#include <memory>
+#include <sstream>
+
+using namespace std;
+
+namespace ue2 {
+
+namespace {
+
+/** \brief Max fixed-width paths to generate from a graph. */
+static constexpr size_t MAX_PATHS = 10;
+
+/** \brief Max degree for any non-special vertex in the graph. */
+static constexpr size_t MAX_VERTEX_DEGREE = 6;
+
+using Path = vector<NFAVertex>;
+
+} // namespace
+
+static
+bool findPaths(const NGHolder &g, vector<Path> &paths) {
+ vector<NFAVertex> order = getTopoOrdering(g);
+
+ vector<size_t> read_count(num_vertices(g));
+ vector<vector<Path>> built(num_vertices(g));
+
+ for (auto it = order.rbegin(); it != order.rend(); ++it) {
+ NFAVertex v = *it;
+ auto &out = built[g[v].index];
+ assert(out.empty());
+
+ read_count[g[v].index] = out_degree(v, g);
+
DEBUG_PRINTF("setting read_count to %zu for %zu\n",
- read_count[g[v].index], g[v].index);
-
- if (v == g.start || v == g.startDs) {
- out.push_back({v});
- continue;
- }
-
- // The paths to v are the paths to v's predecessors, with v added to
- // the end of each.
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- // We have a stylized connection from start -> startDs, but we
- // don't need anchored and unanchored versions of the same path.
- if (u == g.start && edge(g.startDs, v, g).second) {
- continue;
- }
-
- // Similarly, avoid the accept->acceptEod edge.
- if (u == g.accept) {
- assert(v == g.acceptEod);
- continue;
- }
-
- assert(!built[g[u].index].empty());
- assert(read_count[g[u].index]);
-
- for (const auto &p : built[g[u].index]) {
- out.push_back(p);
- out.back().push_back(v);
-
- if (out.size() > MAX_PATHS) {
- // All these paths should eventually end up at a sink, so
- // we've blown past our limit.
- DEBUG_PRINTF("path limit exceeded\n");
- return false;
- }
- }
-
- read_count[g[u].index]--;
- if (!read_count[g[u].index]) {
+ read_count[g[v].index], g[v].index);
+
+ if (v == g.start || v == g.startDs) {
+ out.push_back({v});
+ continue;
+ }
+
+ // The paths to v are the paths to v's predecessors, with v added to
+ // the end of each.
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ // We have a stylized connection from start -> startDs, but we
+ // don't need anchored and unanchored versions of the same path.
+ if (u == g.start && edge(g.startDs, v, g).second) {
+ continue;
+ }
+
+ // Similarly, avoid the accept->acceptEod edge.
+ if (u == g.accept) {
+ assert(v == g.acceptEod);
+ continue;
+ }
+
+ assert(!built[g[u].index].empty());
+ assert(read_count[g[u].index]);
+
+ for (const auto &p : built[g[u].index]) {
+ out.push_back(p);
+ out.back().push_back(v);
+
+ if (out.size() > MAX_PATHS) {
+ // All these paths should eventually end up at a sink, so
+ // we've blown past our limit.
+ DEBUG_PRINTF("path limit exceeded\n");
+ return false;
+ }
+ }
+
+ read_count[g[u].index]--;
+ if (!read_count[g[u].index]) {
DEBUG_PRINTF("clearing %zu as finished reading\n", g[u].index);
- built[g[u].index].clear();
- built[g[u].index].shrink_to_fit();
- }
- }
- }
-
- insert(&paths, paths.end(), built[NODE_ACCEPT]);
- insert(&paths, paths.end(), built[NODE_ACCEPT_EOD]);
-
- DEBUG_PRINTF("%zu paths generated\n", paths.size());
-
- return paths.size() <= MAX_PATHS;
-}
-
-static
-bool hasLargeDegreeVertex(const NGHolder &g) {
- for (const auto &v : vertices_range(g)) {
- if (is_special(v, g)) { // specials can have large degree
- continue;
- }
+ built[g[u].index].clear();
+ built[g[u].index].shrink_to_fit();
+ }
+ }
+ }
+
+ insert(&paths, paths.end(), built[NODE_ACCEPT]);
+ insert(&paths, paths.end(), built[NODE_ACCEPT_EOD]);
+
+ DEBUG_PRINTF("%zu paths generated\n", paths.size());
+
+ return paths.size() <= MAX_PATHS;
+}
+
+static
+bool hasLargeDegreeVertex(const NGHolder &g) {
+ for (const auto &v : vertices_range(g)) {
+ if (is_special(v, g)) { // specials can have large degree
+ continue;
+ }
if (degree(v, g) > MAX_VERTEX_DEGREE) {
DEBUG_PRINTF("vertex %zu has degree %zu\n", g[v].index,
degree(v, g));
- return true;
- }
- }
- return false;
-}
-
-#if defined(DEBUG) || defined(DUMP_SUPPORT)
-static UNUSED
-string dumpPath(const NGHolder &g, const Path &path) {
- ostringstream oss;
- for (const auto &v : path) {
- switch (g[v].index) {
- case NODE_START:
- oss << "<start>";
- break;
- case NODE_START_DOTSTAR:
- oss << "<startDs>";
- break;
- case NODE_ACCEPT:
- oss << "<accept>";
- break;
- case NODE_ACCEPT_EOD:
- oss << "<acceptEod>";
- break;
- default:
- oss << describeClass(g[v].char_reach);
- break;
- }
- }
- return oss.str();
-}
-#endif
-
-struct PathMask {
- PathMask(const NGHolder &g, const Path &path)
- : is_anchored(path.front() == g.start),
- is_eod(path.back() == g.acceptEod) {
- assert(path.size() >= 2);
- mask.reserve(path.size() - 2);
- for (const auto &v : path) {
- if (is_special(v, g)) {
- continue;
- }
- mask.push_back(g[v].char_reach);
- }
-
- // Reports are attached to the second-to-last vertex.
+ return true;
+ }
+ }
+ return false;
+}
+
+#if defined(DEBUG) || defined(DUMP_SUPPORT)
+static UNUSED
+string dumpPath(const NGHolder &g, const Path &path) {
+ ostringstream oss;
+ for (const auto &v : path) {
+ switch (g[v].index) {
+ case NODE_START:
+ oss << "<start>";
+ break;
+ case NODE_START_DOTSTAR:
+ oss << "<startDs>";
+ break;
+ case NODE_ACCEPT:
+ oss << "<accept>";
+ break;
+ case NODE_ACCEPT_EOD:
+ oss << "<acceptEod>";
+ break;
+ default:
+ oss << describeClass(g[v].char_reach);
+ break;
+ }
+ }
+ return oss.str();
+}
+#endif
+
+struct PathMask {
+ PathMask(const NGHolder &g, const Path &path)
+ : is_anchored(path.front() == g.start),
+ is_eod(path.back() == g.acceptEod) {
+ assert(path.size() >= 2);
+ mask.reserve(path.size() - 2);
+ for (const auto &v : path) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ mask.push_back(g[v].char_reach);
+ }
+
+ // Reports are attached to the second-to-last vertex.
NFAVertex u = *std::next(path.rbegin());
reports = g[u].reports;
- assert(!reports.empty());
- }
-
- vector<CharReach> mask;
+ assert(!reports.empty());
+ }
+
+ vector<CharReach> mask;
flat_set<ReportID> reports;
- bool is_anchored;
- bool is_eod;
-};
-
-bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g,
- const CompileContext &cc) {
- if (!cc.grey.allowDecoratedLiteral) {
- return false;
- }
-
- if (!isAcyclic(g)) {
- DEBUG_PRINTF("not acyclic\n");
- return false;
- }
-
+ bool is_anchored;
+ bool is_eod;
+};
+
+bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g,
+ const CompileContext &cc) {
+ if (!cc.grey.allowDecoratedLiteral) {
+ return false;
+ }
+
+ if (!isAcyclic(g)) {
+ DEBUG_PRINTF("not acyclic\n");
+ return false;
+ }
+
if (!hasNarrowReachVertex(g)) {
DEBUG_PRINTF("no narrow reach vertices\n");
return false;
}
- if (hasLargeDegreeVertex(g)) {
- DEBUG_PRINTF("large degree\n");
- return false;
- }
-
- vector<Path> paths;
- if (!findPaths(g, paths)) {
- DEBUG_PRINTF("couldn't split into a small number of paths\n");
- return false;
- }
-
- assert(!paths.empty());
- assert(paths.size() <= MAX_PATHS);
-
- vector<PathMask> masks;
- masks.reserve(paths.size());
-
- for (const auto &path : paths) {
- DEBUG_PRINTF("path: %s\n", dumpPath(g, path).c_str());
- PathMask pm(g, path);
- if (!rose.validateMask(pm.mask, pm.reports, pm.is_anchored,
- pm.is_eod)) {
- DEBUG_PRINTF("failed validation\n");
- return false;
- }
- masks.push_back(move(pm));
- }
-
- for (const auto &pm : masks) {
- rose.addMask(pm.mask, pm.reports, pm.is_anchored, pm.is_eod);
- }
-
- DEBUG_PRINTF("all ok, %zu masks added\n", masks.size());
- return true;
-}
-
-} // namespace ue2
+ if (hasLargeDegreeVertex(g)) {
+ DEBUG_PRINTF("large degree\n");
+ return false;
+ }
+
+ vector<Path> paths;
+ if (!findPaths(g, paths)) {
+ DEBUG_PRINTF("couldn't split into a small number of paths\n");
+ return false;
+ }
+
+ assert(!paths.empty());
+ assert(paths.size() <= MAX_PATHS);
+
+ vector<PathMask> masks;
+ masks.reserve(paths.size());
+
+ for (const auto &path : paths) {
+ DEBUG_PRINTF("path: %s\n", dumpPath(g, path).c_str());
+ PathMask pm(g, path);
+ if (!rose.validateMask(pm.mask, pm.reports, pm.is_anchored,
+ pm.is_eod)) {
+ DEBUG_PRINTF("failed validation\n");
+ return false;
+ }
+ masks.push_back(move(pm));
+ }
+
+ for (const auto &pm : masks) {
+ rose.addMask(pm.mask, pm.reports, pm.is_anchored, pm.is_eod);
+ }
+
+ DEBUG_PRINTF("all ok, %zu masks added\n", masks.size());
+ return true;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.h b/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.h
index 603679e809..ff18c7d746 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_literal_decorated.h
@@ -1,52 +1,52 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Analysis for literals decorated by leading/trailing assertions or
- * character classes.
- */
-
-#ifndef NFAGRAPH_NG_LITERAL_DECORATED_H
-#define NFAGRAPH_NG_LITERAL_DECORATED_H
-
-namespace ue2 {
-
-class RoseBuild;
-class NGHolder;
-struct CompileContext;
-
-/**
- * \brief If the graph contains only a decorated literal, feed it to the Rose
- * builder. Returns true on success.
- */
-bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g,
- const CompileContext &cc);
-
-} // namespace ue2
-
-#endif // NFAGRAPH_NG_LITERAL_DECORATED_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Analysis for literals decorated by leading/trailing assertions or
+ * character classes.
+ */
+
+#ifndef NFAGRAPH_NG_LITERAL_DECORATED_H
+#define NFAGRAPH_NG_LITERAL_DECORATED_H
+
+namespace ue2 {
+
+class RoseBuild;
+class NGHolder;
+struct CompileContext;
+
+/**
+ * \brief If the graph contains only a decorated literal, feed it to the Rose
+ * builder. Returns true on success.
+ */
+bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g,
+ const CompileContext &cc);
+
+} // namespace ue2
+
+#endif // NFAGRAPH_NG_LITERAL_DECORATED_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.cpp
index 7d84aabe30..4ce5dc153b 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.cpp
@@ -1,352 +1,352 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Build code for McClellan DFA.
- */
-#include "ng_mcclellan.h"
-
-#include "grey.h"
-#include "nfa/dfa_min.h"
-#include "nfa/rdfa.h"
-#include "ng_holder.h"
-#include "ng_mcclellan_internal.h"
-#include "ng_squash.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/bitfield.h"
-#include "util/determinise.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Build code for McClellan DFA.
+ */
+#include "ng_mcclellan.h"
+
+#include "grey.h"
+#include "nfa/dfa_min.h"
+#include "nfa/rdfa.h"
+#include "ng_holder.h"
+#include "ng_mcclellan_internal.h"
+#include "ng_squash.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/bitfield.h"
+#include "util/determinise.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
+#include "util/graph_range.h"
#include "util/hash.h"
#include "util/hash_dynamic_bitset.h"
-#include "util/make_unique.h"
-#include "util/report_manager.h"
-
-#include <algorithm>
-#include <functional>
-#include <map>
-#include <set>
+#include "util/make_unique.h"
+#include "util/report_manager.h"
+
+#include <algorithm>
+#include <functional>
+#include <map>
+#include <set>
#include <unordered_map>
-#include <vector>
-
-#include <boost/dynamic_bitset.hpp>
-
-using namespace std;
-using boost::dynamic_bitset;
-
-namespace ue2 {
-
-#define FINAL_DFA_STATE_LIMIT 16383
-#define DFA_STATE_LIMIT 1024
-#define NFA_STATE_LIMIT 256
-
-u16 buildAlphabetFromEquivSets(const std::vector<CharReach> &esets,
- array<u16, ALPHABET_SIZE> &alpha,
- array<u16, ALPHABET_SIZE> &unalpha) {
- u16 i = 0;
- for (; i < esets.size(); i++) {
- const CharReach &cr = esets[i];
-
-#ifdef DEBUG
- DEBUG_PRINTF("eq set: ");
- for (size_t s = cr.find_first(); s != CharReach::npos;
- s = cr.find_next(s)) {
- printf("%02hhx ", (u8)s);
- }
- printf("-> %u\n", i);
-#endif
- u16 leader = cr.find_first();
- for (size_t s = cr.find_first(); s != CharReach::npos;
- s = cr.find_next(s)) {
- alpha[s] = i;
- }
- unalpha[i] = leader;
- }
-
- for (u16 j = N_CHARS; j < ALPHABET_SIZE; j++, i++) {
- alpha[j] = i;
- unalpha[i] = j;
- }
-
- return i; // alphabet size
-}
-
-void calculateAlphabet(const NGHolder &g, array<u16, ALPHABET_SIZE> &alpha,
- array<u16, ALPHABET_SIZE> &unalpha, u16 *alphasize) {
- vector<CharReach> esets(1, CharReach::dot());
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
-
- const CharReach &cr = g[v].char_reach;
-
- for (size_t i = 0; i < esets.size(); i++) {
- if (esets[i].count() == 1) {
- continue;
- }
-
- CharReach t = cr & esets[i];
- if (t.any() && t != esets[i]) {
- esets[i] &= ~t;
- esets.push_back(t);
- }
- }
- }
- // for deterministic compiles
- sort(esets.begin(), esets.end());
-
- assert(alphasize);
- *alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
-}
-
-static
-bool allExternalReports(const ReportManager &rm,
- const flat_set<ReportID> &reports) {
- for (auto report_id : reports) {
- if (!isExternalReport(rm.getReport(report_id))) {
- return false;
- }
- }
-
- return true;
-}
-
-static
-dstate_id_t successor(const vector<dstate> &dstates, dstate_id_t c,
- const array<u16, ALPHABET_SIZE> &alpha, symbol_t s) {
- return dstates[c].next[alpha[s]];
-}
-
-void getFullTransitionFromState(const raw_dfa &n, dstate_id_t state,
- dstate_id_t *out_table) {
- for (u32 i = 0; i < ALPHABET_SIZE; i++) {
- out_table[i] = successor(n.states, state, n.alpha_remap, i);
- }
-}
-
-template<typename stateset>
-static
+#include <vector>
+
+#include <boost/dynamic_bitset.hpp>
+
+using namespace std;
+using boost::dynamic_bitset;
+
+namespace ue2 {
+
+#define FINAL_DFA_STATE_LIMIT 16383
+#define DFA_STATE_LIMIT 1024
+#define NFA_STATE_LIMIT 256
+
+u16 buildAlphabetFromEquivSets(const std::vector<CharReach> &esets,
+ array<u16, ALPHABET_SIZE> &alpha,
+ array<u16, ALPHABET_SIZE> &unalpha) {
+ u16 i = 0;
+ for (; i < esets.size(); i++) {
+ const CharReach &cr = esets[i];
+
+#ifdef DEBUG
+ DEBUG_PRINTF("eq set: ");
+ for (size_t s = cr.find_first(); s != CharReach::npos;
+ s = cr.find_next(s)) {
+ printf("%02hhx ", (u8)s);
+ }
+ printf("-> %u\n", i);
+#endif
+ u16 leader = cr.find_first();
+ for (size_t s = cr.find_first(); s != CharReach::npos;
+ s = cr.find_next(s)) {
+ alpha[s] = i;
+ }
+ unalpha[i] = leader;
+ }
+
+ for (u16 j = N_CHARS; j < ALPHABET_SIZE; j++, i++) {
+ alpha[j] = i;
+ unalpha[i] = j;
+ }
+
+ return i; // alphabet size
+}
+
+void calculateAlphabet(const NGHolder &g, array<u16, ALPHABET_SIZE> &alpha,
+ array<u16, ALPHABET_SIZE> &unalpha, u16 *alphasize) {
+ vector<CharReach> esets(1, CharReach::dot());
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ const CharReach &cr = g[v].char_reach;
+
+ for (size_t i = 0; i < esets.size(); i++) {
+ if (esets[i].count() == 1) {
+ continue;
+ }
+
+ CharReach t = cr & esets[i];
+ if (t.any() && t != esets[i]) {
+ esets[i] &= ~t;
+ esets.push_back(t);
+ }
+ }
+ }
+ // for deterministic compiles
+ sort(esets.begin(), esets.end());
+
+ assert(alphasize);
+ *alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
+}
+
+static
+bool allExternalReports(const ReportManager &rm,
+ const flat_set<ReportID> &reports) {
+ for (auto report_id : reports) {
+ if (!isExternalReport(rm.getReport(report_id))) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static
+dstate_id_t successor(const vector<dstate> &dstates, dstate_id_t c,
+ const array<u16, ALPHABET_SIZE> &alpha, symbol_t s) {
+ return dstates[c].next[alpha[s]];
+}
+
+void getFullTransitionFromState(const raw_dfa &n, dstate_id_t state,
+ dstate_id_t *out_table) {
+ for (u32 i = 0; i < ALPHABET_SIZE; i++) {
+ out_table[i] = successor(n.states, state, n.alpha_remap, i);
+ }
+}
+
+template<typename stateset>
+static
void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused,
- stateset *init, stateset *init_deep,
- vector<NFAVertex> *v_by_index) {
- for (auto v : vertices_range(g)) {
+ stateset *init, stateset *init_deep,
+ vector<NFAVertex> *v_by_index) {
+ for (auto v : vertices_range(g)) {
if (contains(unused, v)) {
- continue;
- }
-
- u32 vert_id = g[v].index;
- assert(vert_id < init->size());
-
- if (is_any_start(v, g)) {
- init->set(vert_id);
- if (hasSelfLoop(v, g) || is_triggered(g)) {
- DEBUG_PRINTF("setting %u\n", vert_id);
- init_deep->set(vert_id);
- }
- }
- }
-
- v_by_index->clear();
+ continue;
+ }
+
+ u32 vert_id = g[v].index;
+ assert(vert_id < init->size());
+
+ if (is_any_start(v, g)) {
+ init->set(vert_id);
+ if (hasSelfLoop(v, g) || is_triggered(g)) {
+ DEBUG_PRINTF("setting %u\n", vert_id);
+ init_deep->set(vert_id);
+ }
+ }
+ }
+
+ v_by_index->clear();
v_by_index->resize(num_vertices(g), NGHolder::null_vertex());
-
- for (auto v : vertices_range(g)) {
- u32 vert_id = g[v].index;
+
+ for (auto v : vertices_range(g)) {
+ u32 vert_id = g[v].index;
assert((*v_by_index)[vert_id] == NGHolder::null_vertex());
- (*v_by_index)[vert_id] = v;
- }
-
- if (is_triggered(g)) {
- *init_deep = *init;
- }
-}
-
-template<typename StateSet>
+ (*v_by_index)[vert_id] = v;
+ }
+
+ if (is_triggered(g)) {
+ *init_deep = *init;
+ }
+}
+
+template<typename StateSet>
void populateAccepts(const NGHolder &g, const flat_set<NFAVertex> &unused,
- StateSet *accept, StateSet *acceptEod) {
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ StateSet *accept, StateSet *acceptEod) {
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
if (contains(unused, v)) {
continue;
- }
+ }
accept->set(g[v].index);
- }
- for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
- if (v == g.accept) {
- continue;
- }
+ }
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ if (v == g.accept) {
+ continue;
+ }
if (contains(unused, v)) {
continue;
- }
+ }
acceptEod->set(g[v].index);
- }
-}
-
-static
-bool canPruneEdgesFromAccept(const ReportManager &rm, const NGHolder &g) {
- bool seen = false;
- u32 ekey = 0;
-
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- if (is_special(v, g)) {
- continue;
- }
-
- for (auto report_id : g[v].reports) {
- const Report &ir = rm.getReport(report_id);
-
- if (!isSimpleExhaustible(ir)) {
- return false;
- }
-
- if (!seen) {
- seen = true;
- ekey = ir.ekey;
- } else if (ekey != ir.ekey) {
- return false;
- }
- }
- }
-
- /* need to check accept eod does not have any unseen reports as well */
- for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
- if (is_special(v, g)) {
- continue;
- }
-
- for (auto report_id : g[v].reports) {
- const Report &ir = rm.getReport(report_id);
-
- if (!isSimpleExhaustible(ir)) {
- return false;
- }
-
- if (!seen) {
- seen = true;
- ekey = ir.ekey;
- } else if (ekey != ir.ekey) {
- return false;
- }
- }
- }
-
- return true;
-}
-
-static
-bool overhangMatchesTrigger(const vector<vector<CharReach> > &all_triggers,
- vector<CharReach>::const_reverse_iterator itb,
- vector<CharReach>::const_reverse_iterator ite) {
- for (const auto &trigger : all_triggers) {
- vector<CharReach>::const_reverse_iterator it = itb;
- vector<CharReach>::const_reverse_iterator kt = trigger.rbegin();
- for (; it != ite && kt != trigger.rend(); ++it, ++kt) {
- if ((*it & *kt).none()) {
- /* this trigger does not match the overhang, try next */
- goto try_next_trigger;
- }
- }
-
- return true;
- try_next_trigger:;
- }
-
- return false; /* no trigger matches the over hang */
-}
-
-static
-bool triggerAllowed(const NGHolder &g, const NFAVertex v,
- const vector<vector<CharReach> > &all_triggers,
- const vector<CharReach> &trigger) {
+ }
+}
+
+static
+bool canPruneEdgesFromAccept(const ReportManager &rm, const NGHolder &g) {
+ bool seen = false;
+ u32 ekey = 0;
+
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ for (auto report_id : g[v].reports) {
+ const Report &ir = rm.getReport(report_id);
+
+ if (!isSimpleExhaustible(ir)) {
+ return false;
+ }
+
+ if (!seen) {
+ seen = true;
+ ekey = ir.ekey;
+ } else if (ekey != ir.ekey) {
+ return false;
+ }
+ }
+ }
+
+ /* need to check accept eod does not have any unseen reports as well */
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ for (auto report_id : g[v].reports) {
+ const Report &ir = rm.getReport(report_id);
+
+ if (!isSimpleExhaustible(ir)) {
+ return false;
+ }
+
+ if (!seen) {
+ seen = true;
+ ekey = ir.ekey;
+ } else if (ekey != ir.ekey) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+static
+bool overhangMatchesTrigger(const vector<vector<CharReach> > &all_triggers,
+ vector<CharReach>::const_reverse_iterator itb,
+ vector<CharReach>::const_reverse_iterator ite) {
+ for (const auto &trigger : all_triggers) {
+ vector<CharReach>::const_reverse_iterator it = itb;
+ vector<CharReach>::const_reverse_iterator kt = trigger.rbegin();
+ for (; it != ite && kt != trigger.rend(); ++it, ++kt) {
+ if ((*it & *kt).none()) {
+ /* this trigger does not match the overhang, try next */
+ goto try_next_trigger;
+ }
+ }
+
+ return true;
+ try_next_trigger:;
+ }
+
+ return false; /* no trigger matches the over hang */
+}
+
+static
+bool triggerAllowed(const NGHolder &g, const NFAVertex v,
+ const vector<vector<CharReach> > &all_triggers,
+ const vector<CharReach> &trigger) {
flat_set<NFAVertex> curr({v});
flat_set<NFAVertex> next;
-
- for (auto it = trigger.rbegin(); it != trigger.rend(); ++it) {
- next.clear();
-
- for (auto u : curr) {
- assert(u != g.startDs); /* triggered graphs should not use sds */
- if (u == g.start) {
- if (overhangMatchesTrigger(all_triggers, it, trigger.rend())) {
- return true;
- }
- continue;
- }
-
- if ((g[u].char_reach & *it).none()) {
- continue;
- }
- insert(&next, inv_adjacent_vertices(u, g));
- }
-
- if (next.empty()) {
- return false;
- }
-
- next.swap(curr);
- }
-
- return true;
-}
-
+
+ for (auto it = trigger.rbegin(); it != trigger.rend(); ++it) {
+ next.clear();
+
+ for (auto u : curr) {
+ assert(u != g.startDs); /* triggered graphs should not use sds */
+ if (u == g.start) {
+ if (overhangMatchesTrigger(all_triggers, it, trigger.rend())) {
+ return true;
+ }
+ continue;
+ }
+
+ if ((g[u].char_reach & *it).none()) {
+ continue;
+ }
+ insert(&next, inv_adjacent_vertices(u, g));
+ }
+
+ if (next.empty()) {
+ return false;
+ }
+
+ next.swap(curr);
+ }
+
+ return true;
+}
+
void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused,
- bool single_trigger,
- const vector<vector<CharReach>> &triggers,
- dynamic_bitset<> *out) {
- if (single_trigger) {
- return; /* no live states can lead to new states */
- }
-
- for (auto v : vertices_range(g)) {
+ bool single_trigger,
+ const vector<vector<CharReach>> &triggers,
+ dynamic_bitset<> *out) {
+ if (single_trigger) {
+ return; /* no live states can lead to new states */
+ }
+
+ for (auto v : vertices_range(g)) {
if (contains(unused, v)) {
- continue;
- }
- for (const auto &trigger : triggers) {
- if (triggerAllowed(g, v, triggers, trigger)) {
+ continue;
+ }
+ for (const auto &trigger : triggers) {
+ if (triggerAllowed(g, v, triggers, trigger)) {
DEBUG_PRINTF("idx %zu is valid location for top\n", g[v].index);
out->set(g[v].index);
- break;
- }
- }
- }
-
- assert(out->test(g[g.start].index));
-}
-
-namespace {
-
+ break;
+ }
+ }
+ }
+
+ assert(out->test(g[g.start].index));
+}
+
+namespace {
+
template<typename Automaton_Traits>
class Automaton_Base {
-public:
+public:
using StateSet = typename Automaton_Traits::StateSet;
using StateMap = typename Automaton_Traits::StateMap;
-
+
Automaton_Base(const ReportManager *rm_in, const NGHolder &graph_in,
bool single_trigger,
const vector<vector<CharReach>> &triggers, bool prunable_in)
@@ -362,117 +362,117 @@ public:
prunable(prunable_in) {
populateInit(graph, unused, &init, &initDS, &v_by_index);
populateAccepts(graph, unused, &accept, &acceptEod);
-
- start_anchored = DEAD_STATE + 1;
- if (initDS == init) {
- start_floating = start_anchored;
- } else if (initDS.any()) {
- start_floating = start_anchored + 1;
- } else {
- start_floating = DEAD_STATE;
- }
-
- calculateAlphabet(graph, alpha, unalpha, &alphasize);
-
- for (const auto &sq : findSquashers(graph)) {
- NFAVertex v = sq.first;
- u32 vert_id = graph[v].index;
- squash.set(vert_id);
+
+ start_anchored = DEAD_STATE + 1;
+ if (initDS == init) {
+ start_floating = start_anchored;
+ } else if (initDS.any()) {
+ start_floating = start_anchored + 1;
+ } else {
+ start_floating = DEAD_STATE;
+ }
+
+ calculateAlphabet(graph, alpha, unalpha, &alphasize);
+
+ for (const auto &sq : findSquashers(graph)) {
+ NFAVertex v = sq.first;
+ u32 vert_id = graph[v].index;
+ squash.set(vert_id);
squash_mask[vert_id]
= Automaton_Traits::copy_states(std::move(sq.second),
numStates);
- }
-
- cr_by_index = populateCR(graph, v_by_index, alpha);
- if (is_triggered(graph)) {
+ }
+
+ cr_by_index = populateCR(graph, v_by_index, alpha);
+ if (is_triggered(graph)) {
dynamic_bitset<> temp(numStates);
markToppableStarts(graph, unused, single_trigger, triggers,
&temp);
toppable = Automaton_Traits::copy_states(std::move(temp),
numStates);
- }
- }
-
-public:
- void transition(const StateSet &in, StateSet *next) {
- transition_graph(*this, v_by_index, in, next);
- }
-
- const vector<StateSet> initial() {
+ }
+ }
+
+public:
+ void transition(const StateSet &in, StateSet *next) {
+ transition_graph(*this, v_by_index, in, next);
+ }
+
+ const vector<StateSet> initial() {
vector<StateSet> rv = {init};
- if (start_floating != DEAD_STATE && start_floating != start_anchored) {
- rv.push_back(initDS);
- }
- return rv;
- }
-
-private:
- void reports_i(const StateSet &in, bool eod, flat_set<ReportID> &rv) {
- StateSet acc = in & (eod ? acceptEod : accept);
- for (size_t i = acc.find_first(); i != StateSet::npos;
- i = acc.find_next(i)) {
- NFAVertex v = v_by_index[i];
- DEBUG_PRINTF("marking report\n");
- const auto &my_reports = graph[v].reports;
- rv.insert(my_reports.begin(), my_reports.end());
- }
- }
-
-public:
- void reports(const StateSet &in, flat_set<ReportID> &rv) {
- reports_i(in, false, rv);
- }
- void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
- reports_i(in, true, rv);
- }
-
- bool canPrune(const flat_set<ReportID> &test_reports) const {
- if (!rm || !prunable || !canPruneEdgesFromAccept(*rm, graph)) {
- return false;
- }
- return allExternalReports(*rm, test_reports);
- }
-
-private:
- const ReportManager *rm;
-public:
- const NGHolder &graph;
- u32 numStates;
+ if (start_floating != DEAD_STATE && start_floating != start_anchored) {
+ rv.push_back(initDS);
+ }
+ return rv;
+ }
+
+private:
+ void reports_i(const StateSet &in, bool eod, flat_set<ReportID> &rv) {
+ StateSet acc = in & (eod ? acceptEod : accept);
+ for (size_t i = acc.find_first(); i != StateSet::npos;
+ i = acc.find_next(i)) {
+ NFAVertex v = v_by_index[i];
+ DEBUG_PRINTF("marking report\n");
+ const auto &my_reports = graph[v].reports;
+ rv.insert(my_reports.begin(), my_reports.end());
+ }
+ }
+
+public:
+ void reports(const StateSet &in, flat_set<ReportID> &rv) {
+ reports_i(in, false, rv);
+ }
+ void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
+ reports_i(in, true, rv);
+ }
+
+ bool canPrune(const flat_set<ReportID> &test_reports) const {
+ if (!rm || !prunable || !canPruneEdgesFromAccept(*rm, graph)) {
+ return false;
+ }
+ return allExternalReports(*rm, test_reports);
+ }
+
+private:
+ const ReportManager *rm;
+public:
+ const NGHolder &graph;
+ u32 numStates;
const flat_set<NFAVertex> unused;
- vector<NFAVertex> v_by_index;
- vector<CharReach> cr_by_index; /* pre alpha'ed */
- StateSet init;
- StateSet initDS;
- StateSet squash; /* states which allow us to mask out other states */
- StateSet accept;
- StateSet acceptEod;
- StateSet toppable; /* states which are allowed to be on when a top arrives,
- * triggered dfas only */
+ vector<NFAVertex> v_by_index;
+ vector<CharReach> cr_by_index; /* pre alpha'ed */
+ StateSet init;
+ StateSet initDS;
+ StateSet squash; /* states which allow us to mask out other states */
+ StateSet accept;
+ StateSet acceptEod;
+ StateSet toppable; /* states which are allowed to be on when a top arrives,
+ * triggered dfas only */
StateSet dead;
- map<u32, StateSet> squash_mask;
- bool prunable;
- array<u16, ALPHABET_SIZE> alpha;
- array<u16, ALPHABET_SIZE> unalpha;
- u16 alphasize;
-
- u16 start_anchored;
- u16 start_floating;
-};
-
+ map<u32, StateSet> squash_mask;
+ bool prunable;
+ array<u16, ALPHABET_SIZE> alpha;
+ array<u16, ALPHABET_SIZE> unalpha;
+ u16 alphasize;
+
+ u16 start_anchored;
+ u16 start_floating;
+};
+
struct Big_Traits {
using StateSet = dynamic_bitset<>;
using StateMap = unordered_map<StateSet, dstate_id_t, hash_dynamic_bitset>;
-
+
static StateSet init_states(u32 num) {
return StateSet(num);
}
-
+
static StateSet copy_states(dynamic_bitset<> in, UNUSED u32 num) {
assert(in.size() == num);
return in;
}
};
-
+
class Automaton_Big : public Automaton_Base<Big_Traits> {
public:
Automaton_Big(const ReportManager *rm_in, const NGHolder &graph_in,
@@ -481,42 +481,42 @@ public:
: Automaton_Base(rm_in, graph_in, single_trigger, triggers,
prunable_in) {}
};
-
+
struct Graph_Traits {
using StateSet = bitfield<NFA_STATE_LIMIT>;
using StateMap = unordered_map<StateSet, dstate_id_t>;
-
+
static StateSet init_states(UNUSED u32 num) {
assert(num <= NFA_STATE_LIMIT);
return StateSet();
- }
-
+ }
+
static StateSet copy_states(const dynamic_bitset<> &in, u32 num) {
StateSet out = init_states(num);
- for (size_t i = in.find_first(); i != in.npos && i < out.size();
- i = in.find_next(i)) {
- out.set(i);
- }
- return out;
- }
+ for (size_t i = in.find_first(); i != in.npos && i < out.size();
+ i = in.find_next(i)) {
+ out.set(i);
+ }
+ return out;
+ }
};
-
+
class Automaton_Graph : public Automaton_Base<Graph_Traits> {
-public:
+public:
Automaton_Graph(const ReportManager *rm_in, const NGHolder &graph_in,
bool single_trigger,
const vector<vector<CharReach>> &triggers, bool prunable_in)
: Automaton_Base(rm_in, graph_in, single_trigger, triggers,
prunable_in) {}
};
-
+
} // namespace
-
+
static
bool startIsRedundant(const NGHolder &g) {
set<NFAVertex> start;
set<NFAVertex> startDs;
-
+
insert(&start, adjacent_vertices(g.start, g));
insert(&startDs, adjacent_vertices(g.startDs, g));
@@ -527,42 +527,42 @@ flat_set<NFAVertex> getRedundantStarts(const NGHolder &g) {
flat_set<NFAVertex> dead;
if (startIsRedundant(g)) {
dead.insert(g.start);
- }
+ }
if (proper_out_degree(g.startDs, g) == 0) {
dead.insert(g.startDs);
- }
+ }
return dead;
}
-
+
unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph,
const ReportManager *rm, bool single_trigger,
- const vector<vector<CharReach>> &triggers,
- const Grey &grey, bool finalChance) {
- if (!grey.allowMcClellan) {
- return nullptr;
- }
-
+ const vector<vector<CharReach>> &triggers,
+ const Grey &grey, bool finalChance) {
+ if (!grey.allowMcClellan) {
+ return nullptr;
+ }
+
DEBUG_PRINTF("attempting to build %s mcclellan\n",
to_string(graph.kind).c_str());
- assert(allMatchStatesHaveReports(graph));
-
+ assert(allMatchStatesHaveReports(graph));
+
bool prunable = grey.highlanderPruneDFA && has_managed_reports(graph);
assert(rm || !has_managed_reports(graph));
if (!has_managed_reports(graph)) {
- rm = nullptr;
- }
-
- assert(triggers.empty() == !is_triggered(graph));
-
- /* We must be getting desperate if it is an outfix, so use the final chance
- * state limit logic */
- u32 state_limit
- = (graph.kind == NFA_OUTFIX || finalChance) ? FINAL_DFA_STATE_LIMIT
- : DFA_STATE_LIMIT;
-
- const u32 numStates = num_vertices(graph);
- DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates);
-
+ rm = nullptr;
+ }
+
+ assert(triggers.empty() == !is_triggered(graph));
+
+ /* We must be getting desperate if it is an outfix, so use the final chance
+ * state limit logic */
+ u32 state_limit
+ = (graph.kind == NFA_OUTFIX || finalChance) ? FINAL_DFA_STATE_LIMIT
+ : DFA_STATE_LIMIT;
+
+ const u32 numStates = num_vertices(graph);
+ DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates);
+
if (numStates > FINAL_DFA_STATE_LIMIT) {
DEBUG_PRINTF("rejecting nfa as too many vertices\n");
return nullptr;
@@ -570,47 +570,47 @@ unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph,
auto rdfa = ue2::make_unique<raw_dfa>(graph.kind);
- if (numStates <= NFA_STATE_LIMIT) {
- /* Fast path. Automaton_Graph uses a bitfield internally to represent
- * states and is quicker than Automaton_Big. */
+ if (numStates <= NFA_STATE_LIMIT) {
+ /* Fast path. Automaton_Graph uses a bitfield internally to represent
+ * states and is quicker than Automaton_Big. */
Automaton_Graph n(rm, graph, single_trigger, triggers, prunable);
if (!determinise(n, rdfa->states, state_limit)) {
- DEBUG_PRINTF("state limit exceeded\n");
- return nullptr; /* over state limit */
- }
-
- rdfa->start_anchored = n.start_anchored;
- rdfa->start_floating = n.start_floating;
- rdfa->alpha_size = n.alphasize;
- rdfa->alpha_remap = n.alpha;
- } else {
- /* Slow path. Too many states to use Automaton_Graph. */
+ DEBUG_PRINTF("state limit exceeded\n");
+ return nullptr; /* over state limit */
+ }
+
+ rdfa->start_anchored = n.start_anchored;
+ rdfa->start_floating = n.start_floating;
+ rdfa->alpha_size = n.alphasize;
+ rdfa->alpha_remap = n.alpha;
+ } else {
+ /* Slow path. Too many states to use Automaton_Graph. */
Automaton_Big n(rm, graph, single_trigger, triggers, prunable);
if (!determinise(n, rdfa->states, state_limit)) {
- DEBUG_PRINTF("state limit exceeded\n");
- return nullptr; /* over state limit */
- }
-
- rdfa->start_anchored = n.start_anchored;
- rdfa->start_floating = n.start_floating;
- rdfa->alpha_size = n.alphasize;
- rdfa->alpha_remap = n.alpha;
- }
-
- minimize_hopcroft(*rdfa, grey);
-
- DEBUG_PRINTF("after determinised into %zu states, building impl dfa "
- "(a,f) = (%hu,%hu)\n", rdfa->states.size(),
- rdfa->start_anchored, rdfa->start_floating);
-
- return rdfa;
-}
-
-unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g, const ReportManager *rm,
- const Grey &grey) {
- assert(!is_triggered(g));
- vector<vector<CharReach>> triggers;
- return buildMcClellan(g, rm, false, triggers, grey);
-}
-
-} // namespace ue2
+ DEBUG_PRINTF("state limit exceeded\n");
+ return nullptr; /* over state limit */
+ }
+
+ rdfa->start_anchored = n.start_anchored;
+ rdfa->start_floating = n.start_floating;
+ rdfa->alpha_size = n.alphasize;
+ rdfa->alpha_remap = n.alpha;
+ }
+
+ minimize_hopcroft(*rdfa, grey);
+
+ DEBUG_PRINTF("after determinised into %zu states, building impl dfa "
+ "(a,f) = (%hu,%hu)\n", rdfa->states.size(),
+ rdfa->start_anchored, rdfa->start_floating);
+
+ return rdfa;
+}
+
+unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g, const ReportManager *rm,
+ const Grey &grey) {
+ assert(!is_triggered(g));
+ vector<vector<CharReach>> triggers;
+ return buildMcClellan(g, rm, false, triggers, grey);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.h b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.h
index 8183a0d2e7..1a4042ce66 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan.h
@@ -1,81 +1,81 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Build code for McClellan DFA.
- */
-
-#ifndef NG_MCCLELLAN_H
-#define NG_MCCLELLAN_H
-
-#include "ue2common.h"
-
-#include <memory>
-#include <vector>
-
-namespace ue2 {
-
-class CharReach;
-class NGHolder;
-class ReportManager;
-struct Grey;
-struct raw_dfa;
-
-/**
- * \brief Determinises an NFA Graph into a raw_dfa.
- *
- * \param g
- * The NGHolder.
- * \param rm
- * A pointer to the ReportManager, if managed reports are used (e.g.
- * for outfixes/suffixes). Otherwise nullptr.
- * \param single_trigger
- * True if it is known that the nfa will only ever be trigger once.
- * \param triggers
- * Representing when tops may arrive. Only used by NFA_INFIX and
- * NFA_SUFFIX, should be empty for other types.
- * \param grey
- * Grey box object.
- * \param finalChance
- * Allows us to build bigger DFAs as the only alternative is an outfix.
- *
- * \return A raw_dfa, or nullptr on failure (state limit blown).
- */
-std::unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g,
- const ReportManager *rm, bool single_trigger,
- const std::vector<std::vector<CharReach>> &triggers,
- const Grey &grey, bool finalChance = false);
-
-/** Convenience wrapper for non-triggered engines */
-std::unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g,
- const ReportManager *rm,
- const Grey &grey);
-
-} // namespace ue2
-
-#endif // NG_MCCLELLAN_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Build code for McClellan DFA.
+ */
+
+#ifndef NG_MCCLELLAN_H
+#define NG_MCCLELLAN_H
+
+#include "ue2common.h"
+
+#include <memory>
+#include <vector>
+
+namespace ue2 {
+
+class CharReach;
+class NGHolder;
+class ReportManager;
+struct Grey;
+struct raw_dfa;
+
+/**
+ * \brief Determinises an NFA Graph into a raw_dfa.
+ *
+ * \param g
+ * The NGHolder.
+ * \param rm
+ * A pointer to the ReportManager, if managed reports are used (e.g.
+ * for outfixes/suffixes). Otherwise nullptr.
+ * \param single_trigger
+ * True if it is known that the nfa will only ever be trigger once.
+ * \param triggers
+ * Representing when tops may arrive. Only used by NFA_INFIX and
+ * NFA_SUFFIX, should be empty for other types.
+ * \param grey
+ * Grey box object.
+ * \param finalChance
+ * Allows us to build bigger DFAs as the only alternative is an outfix.
+ *
+ * \return A raw_dfa, or nullptr on failure (state limit blown).
+ */
+std::unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g,
+ const ReportManager *rm, bool single_trigger,
+ const std::vector<std::vector<CharReach>> &triggers,
+ const Grey &grey, bool finalChance = false);
+
+/** Convenience wrapper for non-triggered engines */
+std::unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g,
+ const ReportManager *rm,
+ const Grey &grey);
+
+} // namespace ue2
+
+#endif // NG_MCCLELLAN_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan_internal.h b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan_internal.h
index bfe030b0aa..f069d7336f 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan_internal.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_mcclellan_internal.h
@@ -1,73 +1,73 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Shared build code for DFAs (McClellan, Haig).
- */
-
-#ifndef NG_MCCLELLAN_INTERNAL_H
-#define NG_MCCLELLAN_INTERNAL_H
-
-#include "ue2common.h"
-#include "nfa/mcclellancompile.h"
-#include "nfagraph/ng_holder.h"
-#include "util/charreach.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Shared build code for DFAs (McClellan, Haig).
+ */
+
+#ifndef NG_MCCLELLAN_INTERNAL_H
+#define NG_MCCLELLAN_INTERNAL_H
+
+#include "ue2common.h"
+#include "nfa/mcclellancompile.h"
+#include "nfagraph/ng_holder.h"
+#include "util/charreach.h"
+#include "util/graph_range.h"
#include "util/flat_containers.h"
-
-#include <boost/dynamic_bitset.hpp>
-
-#include <map>
-#include <vector>
-
-namespace ue2 {
-
-struct raw_dfa;
-
-/** Fills alpha, unalpha and returns alphabet size. */
-u16 buildAlphabetFromEquivSets(const std::vector<CharReach> &esets,
- std::array<u16, ALPHABET_SIZE> &alpha,
- std::array<u16, ALPHABET_SIZE> &unalpha);
-
-/** \brief Calculates an alphabet remapping based on the symbols which the
- * graph discriminates on. Throws in some special DFA symbols as well. */
-void calculateAlphabet(const NGHolder &g, std::array<u16, ALPHABET_SIZE> &alpha,
- std::array<u16, ALPHABET_SIZE> &unalpha, u16 *alphasize);
-
-void getFullTransitionFromState(const raw_dfa &n, u16 state,
- u16 *out_table);
-
-/** produce a map of states on which it is valid to receive tops */
+
+#include <boost/dynamic_bitset.hpp>
+
+#include <map>
+#include <vector>
+
+namespace ue2 {
+
+struct raw_dfa;
+
+/** Fills alpha, unalpha and returns alphabet size. */
+u16 buildAlphabetFromEquivSets(const std::vector<CharReach> &esets,
+ std::array<u16, ALPHABET_SIZE> &alpha,
+ std::array<u16, ALPHABET_SIZE> &unalpha);
+
+/** \brief Calculates an alphabet remapping based on the symbols which the
+ * graph discriminates on. Throws in some special DFA symbols as well. */
+void calculateAlphabet(const NGHolder &g, std::array<u16, ALPHABET_SIZE> &alpha,
+ std::array<u16, ALPHABET_SIZE> &unalpha, u16 *alphasize);
+
+void getFullTransitionFromState(const raw_dfa &n, u16 state,
+ u16 *out_table);
+
+/** produce a map of states on which it is valid to receive tops */
void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused,
- bool single_trigger,
- const std::vector<std::vector<CharReach>> &triggers,
- boost::dynamic_bitset<> *out);
-
+ bool single_trigger,
+ const std::vector<std::vector<CharReach>> &triggers,
+ boost::dynamic_bitset<> *out);
+
/**
* \brief Returns a set of start vertices that will not participate in an
* implementation of this graph. These are either starts with no successors or
@@ -75,75 +75,75 @@ void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused,
*/
flat_set<NFAVertex> getRedundantStarts(const NGHolder &g);
-template<typename autom>
-void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId,
- const typename autom::StateSet &in,
- typename autom::StateSet *next) {
- typedef typename autom::StateSet StateSet;
- const NGHolder &graph = nfa.graph;
+template<typename autom>
+void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId,
+ const typename autom::StateSet &in,
+ typename autom::StateSet *next) {
+ typedef typename autom::StateSet StateSet;
+ const NGHolder &graph = nfa.graph;
const auto &unused = nfa.unused;
- const auto &alpha = nfa.alpha;
- const StateSet &squash = nfa.squash;
- const std::map<u32, StateSet> &squash_mask = nfa.squash_mask;
- const std::vector<CharReach> &cr_by_index = nfa.cr_by_index;
-
- for (symbol_t s = 0; s < nfa.alphasize; s++) {
- next[s].reset();
- }
-
- /* generate top transitions, false -> top = selfloop */
- bool top_allowed = is_triggered(graph);
-
- StateSet succ = nfa.dead;
- for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
- NFAVertex u = vByStateId[i];
-
- for (const auto &v : adjacent_vertices_range(u, graph)) {
+ const auto &alpha = nfa.alpha;
+ const StateSet &squash = nfa.squash;
+ const std::map<u32, StateSet> &squash_mask = nfa.squash_mask;
+ const std::vector<CharReach> &cr_by_index = nfa.cr_by_index;
+
+ for (symbol_t s = 0; s < nfa.alphasize; s++) {
+ next[s].reset();
+ }
+
+ /* generate top transitions, false -> top = selfloop */
+ bool top_allowed = is_triggered(graph);
+
+ StateSet succ = nfa.dead;
+ for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
+ NFAVertex u = vByStateId[i];
+
+ for (const auto &v : adjacent_vertices_range(u, graph)) {
if (contains(unused, v)) {
- continue;
- }
- succ.set(graph[v].index);
- }
-
- if (top_allowed && !nfa.toppable.test(i)) {
- /* we don't need to generate a top at this location as we are in
- * an nfa state which cannot be on when a trigger arrives. */
- top_allowed = false;
- }
- }
-
- StateSet active_squash = succ & squash;
- if (active_squash.any()) {
- for (size_t j = active_squash.find_first(); j != active_squash.npos;
- j = active_squash.find_next(j)) {
- succ &= squash_mask.find(j)->second;
- }
- }
-
- for (size_t j = succ.find_first(); j != succ.npos; j = succ.find_next(j)) {
- const CharReach &cr = cr_by_index[j];
- for (size_t s = cr.find_first(); s != cr.npos; s = cr.find_next(s)) {
- next[s].set(j); /* already alpha'ed */
- }
- }
-
- next[alpha[TOP]] = in;
-
- if (top_allowed) {
- /* we don't add in the anchored starts as the only case as the only
- * time it is appropriate is if no characters have been consumed.*/
- next[alpha[TOP]] |= nfa.initDS;
-
- active_squash = next[alpha[TOP]] & squash;
- if (active_squash.any()) {
- for (size_t j = active_squash.find_first(); j != active_squash.npos;
- j = active_squash.find_next(j)) {
- next[alpha[TOP]] &= squash_mask.find(j)->second;
- }
- }
- }
-}
-
-} // namespace ue2
-
-#endif
+ continue;
+ }
+ succ.set(graph[v].index);
+ }
+
+ if (top_allowed && !nfa.toppable.test(i)) {
+ /* we don't need to generate a top at this location as we are in
+ * an nfa state which cannot be on when a trigger arrives. */
+ top_allowed = false;
+ }
+ }
+
+ StateSet active_squash = succ & squash;
+ if (active_squash.any()) {
+ for (size_t j = active_squash.find_first(); j != active_squash.npos;
+ j = active_squash.find_next(j)) {
+ succ &= squash_mask.find(j)->second;
+ }
+ }
+
+ for (size_t j = succ.find_first(); j != succ.npos; j = succ.find_next(j)) {
+ const CharReach &cr = cr_by_index[j];
+ for (size_t s = cr.find_first(); s != cr.npos; s = cr.find_next(s)) {
+ next[s].set(j); /* already alpha'ed */
+ }
+ }
+
+ next[alpha[TOP]] = in;
+
+ if (top_allowed) {
+ /* we don't add in the anchored starts as the only case as the only
+ * time it is appropriate is if no characters have been consumed.*/
+ next[alpha[TOP]] |= nfa.initDS;
+
+ active_squash = next[alpha[TOP]] & squash;
+ if (active_squash.any()) {
+ for (size_t j = active_squash.find_first(); j != active_squash.npos;
+ j = active_squash.find_next(j)) {
+ next[alpha[TOP]] &= squash_mask.find(j)->second;
+ }
+ }
+ }
+}
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.cpp
index b1c8b9b001..8aaaf99fde 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.cpp
@@ -1,556 +1,556 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Miscellaneous optimisations.
- *
- * We sometimes see patterns of the form:
- *
- * /^.*<[^<]*foobaz/s
- *
- * This is bad for Rose as the escapes from the cyclic state are the same as
- * the trigger. However, we can transform this into:
- *
- * /^.*<.*foobaz/s
- *
- * ... as the first dot star can eat all but the last '<'.
- *
- * Slightly more formally:
- *
- * Given a cyclic state v with character reachability v_cr and proper preds
- * {p1 .. pn} with character reachability {p1_cr .. pn_cr}.
- *
- * let v_cr' = union(intersection(p1_cr .. pn_cr), v_cr)
- *
- * v_cr can be replaced with v_cr' without changing the behaviour of the system
- * if:
- *
- * for any given proper pred pi: if pi is set in the nfa then after consuming
- * any symbol in v_cr', pi will still be set in the nfa and every successor of
- * v is a successor of pi.
- *
- * The easiest way for this condition to be satisfied is for each proper pred
- * pi to have all its preds all have an edge to a pred of pi with a character
- * reachability containing v_cr'. There are, however, other ways to establish
- * the condition holds.
- *
- * Note: a similar transformation can be applied in reverse, details left as an
- * exercise for the interested reader. */
-#include "ng_misc_opt.h"
-
-#include "ng_holder.h"
-#include "ng_prune.h"
-#include "ng_util.h"
-#include "util/charreach.h"
-#include "util/container.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Miscellaneous optimisations.
+ *
+ * We sometimes see patterns of the form:
+ *
+ * /^.*<[^<]*foobaz/s
+ *
+ * This is bad for Rose as the escapes from the cyclic state are the same as
+ * the trigger. However, we can transform this into:
+ *
+ * /^.*<.*foobaz/s
+ *
+ * ... as the first dot star can eat all but the last '<'.
+ *
+ * Slightly more formally:
+ *
+ * Given a cyclic state v with character reachability v_cr and proper preds
+ * {p1 .. pn} with character reachability {p1_cr .. pn_cr}.
+ *
+ * let v_cr' = union(intersection(p1_cr .. pn_cr), v_cr)
+ *
+ * v_cr can be replaced with v_cr' without changing the behaviour of the system
+ * if:
+ *
+ * for any given proper pred pi: if pi is set in the nfa then after consuming
+ * any symbol in v_cr', pi will still be set in the nfa and every successor of
+ * v is a successor of pi.
+ *
+ * The easiest way for this condition to be satisfied is for each proper pred
+ * pi to have all its preds all have an edge to a pred of pi with a character
+ * reachability containing v_cr'. There are, however, other ways to establish
+ * the condition holds.
+ *
+ * Note: a similar transformation can be applied in reverse, details left as an
+ * exercise for the interested reader. */
+#include "ng_misc_opt.h"
+
+#include "ng_holder.h"
+#include "ng_prune.h"
+#include "ng_util.h"
+#include "util/charreach.h"
+#include "util/container.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
#include "util/flat_containers.h"
-#include "ue2common.h"
-
+#include "ue2common.h"
+
#include <boost/dynamic_bitset.hpp>
#include <boost/graph/depth_first_search.hpp>
#include <boost/graph/filtered_graph.hpp>
-#include <map>
-#include <set>
-#include <vector>
-
-using namespace std;
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace std;
using boost::make_filtered_graph;
-
-namespace ue2 {
-
-static
-void findCandidates(NGHolder &g, const vector<NFAVertex> &ordering,
- vector<NFAVertex> *cand) {
- for (auto it = ordering.rbegin(), ite = ordering.rend(); it != ite; ++it) {
- NFAVertex v = *it;
-
- if (is_special(v, g)
- || !hasSelfLoop(v, g)
- || g[v].char_reach.all()) {
- continue;
- }
-
- // For `v' to be a candidate, its predecessors must all have the same
- // successor set as `v'.
-
+
+namespace ue2 {
+
+static
+void findCandidates(NGHolder &g, const vector<NFAVertex> &ordering,
+ vector<NFAVertex> *cand) {
+ for (auto it = ordering.rbegin(), ite = ordering.rend(); it != ite; ++it) {
+ NFAVertex v = *it;
+
+ if (is_special(v, g)
+ || !hasSelfLoop(v, g)
+ || g[v].char_reach.all()) {
+ continue;
+ }
+
+ // For `v' to be a candidate, its predecessors must all have the same
+ // successor set as `v'.
+
auto succ_v = succs(v, g);
flat_set<NFAVertex> succ_u;
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- succ_u.clear();
- succ(g, u, &succ_u);
- if (succ_v != succ_u) {
- goto next_cand;
- }
- }
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ succ_u.clear();
+ succ(g, u, &succ_u);
+ if (succ_v != succ_u) {
+ goto next_cand;
+ }
+ }
DEBUG_PRINTF("vertex %zu is a candidate\n", g[v].index);
- cand->push_back(v);
- next_cand:;
- }
-}
-
-static
-void findCandidates_rev(NGHolder &g, const vector<NFAVertex> &ordering,
- vector<NFAVertex> *cand) {
- for (auto it = ordering.begin(), ite = ordering.end(); it != ite; ++it) {
- NFAVertex v = *it;
-
- if (is_special(v, g)
- || !hasSelfLoop(v, g)
- || g[v].char_reach.all()) {
- continue;
- }
-
- // For `v' to be a candidate, its predecessors must all have the same
- // successor set as `v'.
-
+ cand->push_back(v);
+ next_cand:;
+ }
+}
+
+static
+void findCandidates_rev(NGHolder &g, const vector<NFAVertex> &ordering,
+ vector<NFAVertex> *cand) {
+ for (auto it = ordering.begin(), ite = ordering.end(); it != ite; ++it) {
+ NFAVertex v = *it;
+
+ if (is_special(v, g)
+ || !hasSelfLoop(v, g)
+ || g[v].char_reach.all()) {
+ continue;
+ }
+
+ // For `v' to be a candidate, its predecessors must all have the same
+ // successor set as `v'.
+
auto pred_v = preds(v, g);
flat_set<NFAVertex> pred_u;
-
- for (auto u : adjacent_vertices_range(v, g)) {
- pred_u.clear();
- pred(g, u, &pred_u);
- if (pred_v != pred_u) {
- goto next_cand;
- }
- }
+
+ for (auto u : adjacent_vertices_range(v, g)) {
+ pred_u.clear();
+ pred(g, u, &pred_u);
+ if (pred_v != pred_u) {
+ goto next_cand;
+ }
+ }
DEBUG_PRINTF("vertex %zu is a candidate\n", g[v].index);
- cand->push_back(v);
- next_cand:;
- }
-}
-
-/** Find the intersection of the reachability of the predecessors of \p v. */
-static
-void predCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) {
- add.setall();
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u != v) {
- add &= g[u].char_reach;
- }
- }
-}
-
-/** Find the intersection of the reachability of the successors of \p v. */
-static
-void succCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) {
- add.setall();
- for (auto u : adjacent_vertices_range(v, g)) {
- if (u != v) {
- add &= g[u].char_reach;
- }
- }
-}
-
-/** The sustain set is used to show that once vertex p is on it stays on given
- * the alphabet new_cr. Every vertex pp in the sustain set has the following
- * properties:
- * -# an edge to p
- * -# enough edges to vertices in the sustain set to ensure that a vertex in
- * the sustain set will be on after consuming a character. */
-static
-set<NFAVertex> findSustainSet(const NGHolder &g, NFAVertex p,
- bool ignore_starts, const CharReach &new_cr) {
+ cand->push_back(v);
+ next_cand:;
+ }
+}
+
+/** Find the intersection of the reachability of the predecessors of \p v. */
+static
+void predCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) {
+ add.setall();
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u != v) {
+ add &= g[u].char_reach;
+ }
+ }
+}
+
+/** Find the intersection of the reachability of the successors of \p v. */
+static
+void succCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) {
+ add.setall();
+ for (auto u : adjacent_vertices_range(v, g)) {
+ if (u != v) {
+ add &= g[u].char_reach;
+ }
+ }
+}
+
+/** The sustain set is used to show that once vertex p is on it stays on given
+ * the alphabet new_cr. Every vertex pp in the sustain set has the following
+ * properties:
+ * -# an edge to p
+ * -# enough edges to vertices in the sustain set to ensure that a vertex in
+ * the sustain set will be on after consuming a character. */
+static
+set<NFAVertex> findSustainSet(const NGHolder &g, NFAVertex p,
+ bool ignore_starts, const CharReach &new_cr) {
auto cand = preds<set<NFAVertex>>(p, g);
- if (ignore_starts) {
- cand.erase(g.startDs);
- }
- /* remove elements from cand until the sustain set property holds */
- bool changed;
- do {
- DEBUG_PRINTF("|cand| %zu\n", cand.size());
- changed = false;
- set<NFAVertex>::const_iterator it = cand.begin();
- while (it != cand.end()) {
- NFAVertex u = *it;
- ++it;
- CharReach sus_cr;
- for (auto v : adjacent_vertices_range(u, g)) {
- if (contains(cand, v)) {
- sus_cr |= g[v].char_reach;
- }
- }
-
- if (!new_cr.isSubsetOf(sus_cr)) {
- cand.erase(u);
- changed = true;
- }
- }
- } while (changed);
-
- /* Note: it may be possible to find a (larger) sustain set for a smaller
- * new_cr */
- return cand;
-}
-
-/** Finds the reverse version of the sustain set.. whatever that means. */
-static
-set<NFAVertex> findSustainSet_rev(const NGHolder &g, NFAVertex p,
- const CharReach &new_cr) {
+ if (ignore_starts) {
+ cand.erase(g.startDs);
+ }
+ /* remove elements from cand until the sustain set property holds */
+ bool changed;
+ do {
+ DEBUG_PRINTF("|cand| %zu\n", cand.size());
+ changed = false;
+ set<NFAVertex>::const_iterator it = cand.begin();
+ while (it != cand.end()) {
+ NFAVertex u = *it;
+ ++it;
+ CharReach sus_cr;
+ for (auto v : adjacent_vertices_range(u, g)) {
+ if (contains(cand, v)) {
+ sus_cr |= g[v].char_reach;
+ }
+ }
+
+ if (!new_cr.isSubsetOf(sus_cr)) {
+ cand.erase(u);
+ changed = true;
+ }
+ }
+ } while (changed);
+
+ /* Note: it may be possible to find a (larger) sustain set for a smaller
+ * new_cr */
+ return cand;
+}
+
+/** Finds the reverse version of the sustain set.. whatever that means. */
+static
+set<NFAVertex> findSustainSet_rev(const NGHolder &g, NFAVertex p,
+ const CharReach &new_cr) {
auto cand = succs<set<NFAVertex>>(p, g);
- /* remove elements from cand until the sustain set property holds */
- bool changed;
- do {
- changed = false;
- set<NFAVertex>::const_iterator it = cand.begin();
- while (it != cand.end()) {
- NFAVertex u = *it;
- ++it;
- CharReach sus_cr;
- for (auto v : inv_adjacent_vertices_range(u, g)) {
- if (contains(cand, v)) {
- sus_cr |= g[v].char_reach;
- }
- }
-
- if (!new_cr.isSubsetOf(sus_cr)) {
- cand.erase(u);
- changed = true;
- }
- }
- } while (changed);
-
- /* Note: it may be possible to find a (larger) sustain set for a smaller
- * new_cr */
- return cand;
-}
-
-static
-bool enlargeCyclicVertex(NGHolder &g, som_type som, NFAVertex v) {
+ /* remove elements from cand until the sustain set property holds */
+ bool changed;
+ do {
+ changed = false;
+ set<NFAVertex>::const_iterator it = cand.begin();
+ while (it != cand.end()) {
+ NFAVertex u = *it;
+ ++it;
+ CharReach sus_cr;
+ for (auto v : inv_adjacent_vertices_range(u, g)) {
+ if (contains(cand, v)) {
+ sus_cr |= g[v].char_reach;
+ }
+ }
+
+ if (!new_cr.isSubsetOf(sus_cr)) {
+ cand.erase(u);
+ changed = true;
+ }
+ }
+ } while (changed);
+
+ /* Note: it may be possible to find a (larger) sustain set for a smaller
+ * new_cr */
+ return cand;
+}
+
+static
+bool enlargeCyclicVertex(NGHolder &g, som_type som, NFAVertex v) {
DEBUG_PRINTF("considering vertex %zu\n", g[v].index);
- const CharReach &v_cr = g[v].char_reach;
-
- CharReach add;
- predCRIntersection(g, v, add);
-
- add |= v_cr;
-
- if (add == v_cr) {
- DEBUG_PRINTF("no benefit\n");
- return false;
- }
-
- DEBUG_PRINTF("cr of width %zu up for grabs\n", add.count() - v_cr.count());
-
- for (auto p : inv_adjacent_vertices_range(v, g)) {
- if (p == v) {
- continue;
- }
+ const CharReach &v_cr = g[v].char_reach;
+
+ CharReach add;
+ predCRIntersection(g, v, add);
+
+ add |= v_cr;
+
+ if (add == v_cr) {
+ DEBUG_PRINTF("no benefit\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("cr of width %zu up for grabs\n", add.count() - v_cr.count());
+
+ for (auto p : inv_adjacent_vertices_range(v, g)) {
+ if (p == v) {
+ continue;
+ }
DEBUG_PRINTF("looking at pred %zu\n", g[p].index);
-
- bool ignore_sds = som; /* if we are tracking som, entries into a state
- from sds are significant. */
-
- set<NFAVertex> sustain = findSustainSet(g, p, ignore_sds, add);
- DEBUG_PRINTF("sustain set is %zu\n", sustain.size());
- if (sustain.empty()) {
- DEBUG_PRINTF("yawn\n");
- }
-
- for (auto pp : inv_adjacent_vertices_range(p, g)) {
- /* we need to ensure that whenever pp sets p, that a member of the
- sustain set is set. Note: p's cr may be not be a subset of
- new_cr */
- CharReach sustain_cr;
- for (auto pv : adjacent_vertices_range(pp, g)) {
- if (contains(sustain, pv)) {
- sustain_cr |= g[pv].char_reach;
- }
- }
- if (!g[p].char_reach.isSubsetOf(sustain_cr)) {
- DEBUG_PRINTF("unable to establish that preds are forced on\n");
- return false;
- }
- }
- }
-
- /* the cr can be increased */
- g[v].char_reach = add;
+
+ bool ignore_sds = som; /* if we are tracking som, entries into a state
+ from sds are significant. */
+
+ set<NFAVertex> sustain = findSustainSet(g, p, ignore_sds, add);
+ DEBUG_PRINTF("sustain set is %zu\n", sustain.size());
+ if (sustain.empty()) {
+ DEBUG_PRINTF("yawn\n");
+ }
+
+ for (auto pp : inv_adjacent_vertices_range(p, g)) {
+ /* we need to ensure that whenever pp sets p, that a member of the
+ sustain set is set. Note: p's cr may be not be a subset of
+ new_cr */
+ CharReach sustain_cr;
+ for (auto pv : adjacent_vertices_range(pp, g)) {
+ if (contains(sustain, pv)) {
+ sustain_cr |= g[pv].char_reach;
+ }
+ }
+ if (!g[p].char_reach.isSubsetOf(sustain_cr)) {
+ DEBUG_PRINTF("unable to establish that preds are forced on\n");
+ return false;
+ }
+ }
+ }
+
+ /* the cr can be increased */
+ g[v].char_reach = add;
DEBUG_PRINTF("vertex %zu was widened\n", g[v].index);
- return true;
-}
-
-static
-bool enlargeCyclicVertex_rev(NGHolder &g, NFAVertex v) {
+ return true;
+}
+
+static
+bool enlargeCyclicVertex_rev(NGHolder &g, NFAVertex v) {
DEBUG_PRINTF("considering vertex %zu\n", g[v].index);
- const CharReach &v_cr = g[v].char_reach;
-
- CharReach add;
- succCRIntersection(g, v, add);
-
- add |= v_cr;
-
- if (add == v_cr) {
- DEBUG_PRINTF("no benefit\n");
- return false;
- }
-
- DEBUG_PRINTF("cr of width %zu up for grabs\n", add.count() - v_cr.count());
-
- for (auto p : adjacent_vertices_range(v, g)) {
- if (p == v) {
- continue;
- }
+ const CharReach &v_cr = g[v].char_reach;
+
+ CharReach add;
+ succCRIntersection(g, v, add);
+
+ add |= v_cr;
+
+ if (add == v_cr) {
+ DEBUG_PRINTF("no benefit\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("cr of width %zu up for grabs\n", add.count() - v_cr.count());
+
+ for (auto p : adjacent_vertices_range(v, g)) {
+ if (p == v) {
+ continue;
+ }
DEBUG_PRINTF("looking at succ %zu\n", g[p].index);
-
- set<NFAVertex> sustain = findSustainSet_rev(g, p, add);
- DEBUG_PRINTF("sustain set is %zu\n", sustain.size());
- if (sustain.empty()) {
- DEBUG_PRINTF("yawn\n");
- }
-
- for (auto pp : adjacent_vertices_range(p, g)) {
- /* we need to ensure something - see fwd ver */
- CharReach sustain_cr;
- for (auto pv : inv_adjacent_vertices_range(pp, g)) {
- if (contains(sustain, pv)) {
- sustain_cr |= g[pv].char_reach;
- }
- }
- if (!g[p].char_reach.isSubsetOf(sustain_cr)) {
- DEBUG_PRINTF("unable to establish that succs are thingy\n");
- return false;
- }
- }
- }
-
- /* the cr can be increased */
- g[v].char_reach = add;
+
+ set<NFAVertex> sustain = findSustainSet_rev(g, p, add);
+ DEBUG_PRINTF("sustain set is %zu\n", sustain.size());
+ if (sustain.empty()) {
+ DEBUG_PRINTF("yawn\n");
+ }
+
+ for (auto pp : adjacent_vertices_range(p, g)) {
+ /* we need to ensure something - see fwd ver */
+ CharReach sustain_cr;
+ for (auto pv : inv_adjacent_vertices_range(pp, g)) {
+ if (contains(sustain, pv)) {
+ sustain_cr |= g[pv].char_reach;
+ }
+ }
+ if (!g[p].char_reach.isSubsetOf(sustain_cr)) {
+ DEBUG_PRINTF("unable to establish that succs are thingy\n");
+ return false;
+ }
+ }
+ }
+
+ /* the cr can be increased */
+ g[v].char_reach = add;
DEBUG_PRINTF("vertex %zu was widened\n", g[v].index);
- return true;
-}
-
-static
-bool enlargeCyclicCR(NGHolder &g, som_type som,
- const vector<NFAVertex> &ordering) {
- DEBUG_PRINTF("hello\n");
-
- vector<NFAVertex> candidates;
- findCandidates(g, ordering, &candidates);
-
- bool rv = false;
- for (auto v : candidates) {
- rv |= enlargeCyclicVertex(g, som, v);
- }
-
- return rv;
-}
-
-static
-bool enlargeCyclicCR_rev(NGHolder &g, const vector<NFAVertex> &ordering) {
- DEBUG_PRINTF("olleh\n");
-
- vector<NFAVertex> candidates;
- findCandidates_rev(g, ordering, &candidates);
-
- bool rv = false;
- for (auto v : candidates) {
- rv |= enlargeCyclicVertex_rev(g, v);
- }
-
- return rv;
-}
-
-bool improveGraph(NGHolder &g, som_type som) {
- /* use a topo ordering so that we can get chains of cyclic states
- * done in one sweep */
-
- const vector<NFAVertex> ordering = getTopoOrdering(g);
-
- return enlargeCyclicCR(g, som, ordering)
- | enlargeCyclicCR_rev(g, ordering);
-}
-
-/** finds a smaller reachability for a state by the reverse transformation of
- * enlargeCyclicCR. */
-CharReach reduced_cr(NFAVertex v, const NGHolder &g,
- const map<NFAVertex, BoundedRepeatSummary> &br_cyclic) {
+ return true;
+}
+
+static
+bool enlargeCyclicCR(NGHolder &g, som_type som,
+ const vector<NFAVertex> &ordering) {
+ DEBUG_PRINTF("hello\n");
+
+ vector<NFAVertex> candidates;
+ findCandidates(g, ordering, &candidates);
+
+ bool rv = false;
+ for (auto v : candidates) {
+ rv |= enlargeCyclicVertex(g, som, v);
+ }
+
+ return rv;
+}
+
+static
+bool enlargeCyclicCR_rev(NGHolder &g, const vector<NFAVertex> &ordering) {
+ DEBUG_PRINTF("olleh\n");
+
+ vector<NFAVertex> candidates;
+ findCandidates_rev(g, ordering, &candidates);
+
+ bool rv = false;
+ for (auto v : candidates) {
+ rv |= enlargeCyclicVertex_rev(g, v);
+ }
+
+ return rv;
+}
+
+bool improveGraph(NGHolder &g, som_type som) {
+ /* use a topo ordering so that we can get chains of cyclic states
+ * done in one sweep */
+
+ const vector<NFAVertex> ordering = getTopoOrdering(g);
+
+ return enlargeCyclicCR(g, som, ordering)
+ | enlargeCyclicCR_rev(g, ordering);
+}
+
+/** finds a smaller reachability for a state by the reverse transformation of
+ * enlargeCyclicCR. */
+CharReach reduced_cr(NFAVertex v, const NGHolder &g,
+ const map<NFAVertex, BoundedRepeatSummary> &br_cyclic) {
DEBUG_PRINTF("find minimal cr for %zu\n", g[v].index);
- CharReach v_cr = g[v].char_reach;
- if (proper_in_degree(v, g) != 1) {
- return v_cr;
- }
-
- NFAVertex pred = getSoleSourceVertex(g, v);
- assert(pred);
-
- /* require pred to be fed by one vertex OR (start + startDS) */
- NFAVertex predpred;
- size_t idp = in_degree(pred, g);
- if (hasSelfLoop(pred, g)) {
- return v_cr; /* not cliche */
- } else if (idp == 1) {
- predpred = getSoleSourceVertex(g, pred);
- } else if (idp == 2
- && edge(g.start, pred, g).second
- && edge(g.startDs, pred, g).second) {
- predpred = g.startDs;
- } else {
- return v_cr; /* not cliche */
- }
-
- assert(predpred);
-
- /* require predpred to be cyclic and its cr to be a superset of
- pred and v */
- if (!hasSelfLoop(predpred, g)) {
- return v_cr; /* not cliche */
- }
-
- if (contains(br_cyclic, predpred)
- && !br_cyclic.at(predpred).unbounded()) {
- return v_cr; /* fake cyclic */
- }
-
- const CharReach &p_cr = g[pred].char_reach;
- const CharReach &pp_cr = g[predpred].char_reach;
- if (!v_cr.isSubsetOf(pp_cr) || !p_cr.isSubsetOf(pp_cr)) {
- return v_cr; /* not cliche */
- }
-
- DEBUG_PRINTF("confirming [x]* prop\n");
- /* we require all of v succs to be succ of p */
- set<NFAVertex> v_succ;
- insert(&v_succ, adjacent_vertices(v, g));
- set<NFAVertex> p_succ;
- insert(&p_succ, adjacent_vertices(pred, g));
-
- if (!is_subset_of(v_succ, p_succ)) {
- DEBUG_PRINTF("fail\n");
- return v_cr; /* not cliche */
- }
-
- if (contains(v_succ, g.accept) || contains(v_succ, g.acceptEod)) {
- /* need to check that reports of v are a subset of p's */
- if (!is_subset_of(g[v].reports,
- g[pred].reports)) {
- DEBUG_PRINTF("fail - reports not subset\n");
- return v_cr; /* not cliche */
- }
- }
-
- DEBUG_PRINTF("woot success\n");
- v_cr &= ~p_cr;
- return v_cr;
-}
-
-vector<CharReach> reduced_cr(const NGHolder &g,
- const map<NFAVertex, BoundedRepeatSummary> &br_cyclic) {
- assert(hasCorrectlyNumberedVertices(g));
- vector<CharReach> refined_cr(num_vertices(g), CharReach());
-
- for (auto v : vertices_range(g)) {
- u32 v_idx = g[v].index;
- refined_cr[v_idx] = reduced_cr(v, g, br_cyclic);
- }
-
- return refined_cr;
-}
-
-static
-bool anyOutSpecial(NFAVertex v, const NGHolder &g) {
- for (auto w : adjacent_vertices_range(v, g)) {
- if (is_special(w, g) && w != v) {
- return true;
- }
- }
- return false;
-}
-
-bool mergeCyclicDotStars(NGHolder &g) {
- set<NFAVertex> verticesToRemove;
- set<NFAEdge> edgesToRemove;
-
- // avoid graphs where startDs is not a free spirit
- if (out_degree(g.startDs, g) > 1) {
- return false;
- }
-
- // check if any of the connected vertices are dots
- for (auto v : adjacent_vertices_range(g.start, g)) {
- if (is_special(v, g)) {
- continue;
- }
- const CharReach &cr = g[v].char_reach;
-
- // if this is a cyclic dot
- if (cr.all() && edge(v, v, g).second) {
- // prevent insane graphs
- if (anyOutSpecial(v, g)) {
- continue;
- }
- // we don't know if we're going to remove this vertex yet
- vector<NFAEdge> deadEdges;
-
- // check if all adjacent vertices have edges from start
- for (const auto &e : out_edges_range(v, g)) {
- NFAVertex t = target(e, g);
- // skip self
- if (t == v) {
- continue;
- }
- // skip vertices that don't have edges from start
- if (!edge(g.start, t, g).second) {
- continue;
- }
- // add an edge from startDs to this vertex
- add_edge_if_not_present(g.startDs, t, g);
-
- // mark this edge for removal
- deadEdges.push_back(e);
- }
- // if the number of edges to be removed equals out degree, vertex
- // needs to be removed; else, only remove the edges
- if (deadEdges.size() == proper_out_degree(v, g)) {
- verticesToRemove.insert(v);
- } else {
- edgesToRemove.insert(deadEdges.begin(), deadEdges.end());
- }
- }
- }
-
- if (verticesToRemove.empty() && edgesToRemove.empty()) {
- return false;
- }
-
- DEBUG_PRINTF("removing %zu edges and %zu vertices\n", edgesToRemove.size(),
- verticesToRemove.size());
- remove_edges(edgesToRemove, g);
- remove_vertices(verticesToRemove, g);
- /* some predecessors to the cyclic vertices may no longer be useful (no out
- * edges), so we can remove them */
- pruneUseless(g);
- return true;
-}
-
+ CharReach v_cr = g[v].char_reach;
+ if (proper_in_degree(v, g) != 1) {
+ return v_cr;
+ }
+
+ NFAVertex pred = getSoleSourceVertex(g, v);
+ assert(pred);
+
+ /* require pred to be fed by one vertex OR (start + startDS) */
+ NFAVertex predpred;
+ size_t idp = in_degree(pred, g);
+ if (hasSelfLoop(pred, g)) {
+ return v_cr; /* not cliche */
+ } else if (idp == 1) {
+ predpred = getSoleSourceVertex(g, pred);
+ } else if (idp == 2
+ && edge(g.start, pred, g).second
+ && edge(g.startDs, pred, g).second) {
+ predpred = g.startDs;
+ } else {
+ return v_cr; /* not cliche */
+ }
+
+ assert(predpred);
+
+ /* require predpred to be cyclic and its cr to be a superset of
+ pred and v */
+ if (!hasSelfLoop(predpred, g)) {
+ return v_cr; /* not cliche */
+ }
+
+ if (contains(br_cyclic, predpred)
+ && !br_cyclic.at(predpred).unbounded()) {
+ return v_cr; /* fake cyclic */
+ }
+
+ const CharReach &p_cr = g[pred].char_reach;
+ const CharReach &pp_cr = g[predpred].char_reach;
+ if (!v_cr.isSubsetOf(pp_cr) || !p_cr.isSubsetOf(pp_cr)) {
+ return v_cr; /* not cliche */
+ }
+
+ DEBUG_PRINTF("confirming [x]* prop\n");
+ /* we require all of v succs to be succ of p */
+ set<NFAVertex> v_succ;
+ insert(&v_succ, adjacent_vertices(v, g));
+ set<NFAVertex> p_succ;
+ insert(&p_succ, adjacent_vertices(pred, g));
+
+ if (!is_subset_of(v_succ, p_succ)) {
+ DEBUG_PRINTF("fail\n");
+ return v_cr; /* not cliche */
+ }
+
+ if (contains(v_succ, g.accept) || contains(v_succ, g.acceptEod)) {
+ /* need to check that reports of v are a subset of p's */
+ if (!is_subset_of(g[v].reports,
+ g[pred].reports)) {
+ DEBUG_PRINTF("fail - reports not subset\n");
+ return v_cr; /* not cliche */
+ }
+ }
+
+ DEBUG_PRINTF("woot success\n");
+ v_cr &= ~p_cr;
+ return v_cr;
+}
+
+vector<CharReach> reduced_cr(const NGHolder &g,
+ const map<NFAVertex, BoundedRepeatSummary> &br_cyclic) {
+ assert(hasCorrectlyNumberedVertices(g));
+ vector<CharReach> refined_cr(num_vertices(g), CharReach());
+
+ for (auto v : vertices_range(g)) {
+ u32 v_idx = g[v].index;
+ refined_cr[v_idx] = reduced_cr(v, g, br_cyclic);
+ }
+
+ return refined_cr;
+}
+
+static
+bool anyOutSpecial(NFAVertex v, const NGHolder &g) {
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (is_special(w, g) && w != v) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool mergeCyclicDotStars(NGHolder &g) {
+ set<NFAVertex> verticesToRemove;
+ set<NFAEdge> edgesToRemove;
+
+ // avoid graphs where startDs is not a free spirit
+ if (out_degree(g.startDs, g) > 1) {
+ return false;
+ }
+
+ // check if any of the connected vertices are dots
+ for (auto v : adjacent_vertices_range(g.start, g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ const CharReach &cr = g[v].char_reach;
+
+ // if this is a cyclic dot
+ if (cr.all() && edge(v, v, g).second) {
+ // prevent insane graphs
+ if (anyOutSpecial(v, g)) {
+ continue;
+ }
+ // we don't know if we're going to remove this vertex yet
+ vector<NFAEdge> deadEdges;
+
+ // check if all adjacent vertices have edges from start
+ for (const auto &e : out_edges_range(v, g)) {
+ NFAVertex t = target(e, g);
+ // skip self
+ if (t == v) {
+ continue;
+ }
+ // skip vertices that don't have edges from start
+ if (!edge(g.start, t, g).second) {
+ continue;
+ }
+ // add an edge from startDs to this vertex
+ add_edge_if_not_present(g.startDs, t, g);
+
+ // mark this edge for removal
+ deadEdges.push_back(e);
+ }
+ // if the number of edges to be removed equals out degree, vertex
+ // needs to be removed; else, only remove the edges
+ if (deadEdges.size() == proper_out_degree(v, g)) {
+ verticesToRemove.insert(v);
+ } else {
+ edgesToRemove.insert(deadEdges.begin(), deadEdges.end());
+ }
+ }
+ }
+
+ if (verticesToRemove.empty() && edgesToRemove.empty()) {
+ return false;
+ }
+
+ DEBUG_PRINTF("removing %zu edges and %zu vertices\n", edgesToRemove.size(),
+ verticesToRemove.size());
+ remove_edges(edgesToRemove, g);
+ remove_vertices(verticesToRemove, g);
+ /* some predecessors to the cyclic vertices may no longer be useful (no out
+ * edges), so we can remove them */
+ pruneUseless(g);
+ return true;
+}
+
struct PrunePathsInfo {
explicit PrunePathsInfo(const NGHolder &g)
: color_map(make_small_color_map(g)), bad(num_vertices(g)) {}
@@ -725,4 +725,4 @@ bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &g, som_type som) {
return changed;
}
-} // namespace ue2
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.h b/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.h
index 70bc7741cb..5ed089dc05 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_misc_opt.h
@@ -1,77 +1,77 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Miscellaneous optimisations.
- */
-
-#ifndef NG_MISC_OPT_H
-#define NG_MISC_OPT_H
-
-#include <map>
-#include <vector>
-
-#include "ng_holder.h"
-#include "som/som.h"
-#include "util/depth.h"
-
-namespace ue2 {
-
-/** Small structure describing the bounds on a repeat. */
-struct BoundedRepeatSummary {
- BoundedRepeatSummary(void) : repeatMin(0), repeatMax(depth::infinity()) {}
- BoundedRepeatSummary(const depth &min_in, const depth &max_in)
- : repeatMin(min_in), repeatMax(max_in) {
- assert(repeatMin <= repeatMax);
- assert(repeatMax.is_reachable());
- }
- bool unbounded(void) const { return repeatMax.is_infinite(); }
-
- depth repeatMin; //!< minimum repeat bound.
- depth repeatMax; //!< maximum repeat bound.
-};
-
-/* returns true if anything changed */
-bool improveGraph(NGHolder &g, som_type som);
-
-/** Sometimes the reach of a vertex is greater than it needs to be to reduce
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Miscellaneous optimisations.
+ */
+
+#ifndef NG_MISC_OPT_H
+#define NG_MISC_OPT_H
+
+#include <map>
+#include <vector>
+
+#include "ng_holder.h"
+#include "som/som.h"
+#include "util/depth.h"
+
+namespace ue2 {
+
+/** Small structure describing the bounds on a repeat. */
+struct BoundedRepeatSummary {
+ BoundedRepeatSummary(void) : repeatMin(0), repeatMax(depth::infinity()) {}
+ BoundedRepeatSummary(const depth &min_in, const depth &max_in)
+ : repeatMin(min_in), repeatMax(max_in) {
+ assert(repeatMin <= repeatMax);
+ assert(repeatMax.is_reachable());
+ }
+ bool unbounded(void) const { return repeatMax.is_infinite(); }
+
+ depth repeatMin; //!< minimum repeat bound.
+ depth repeatMax; //!< maximum repeat bound.
+};
+
+/* returns true if anything changed */
+bool improveGraph(NGHolder &g, som_type som);
+
+/** Sometimes the reach of a vertex is greater than it needs to be to reduce
* stop chars for the benefit of the rest of our code base (accel, etc). In
* these circumstances, we can treat the reach as the smaller one as
- * the graphs are equivalent. */
-CharReach reduced_cr(NFAVertex v, const NGHolder &g,
- const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic);
-
-std::vector<CharReach> reduced_cr(const NGHolder &g,
- const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic);
-
-/** Remove cyclic stars connected to start */
-bool mergeCyclicDotStars(NGHolder &g);
-
+ * the graphs are equivalent. */
+CharReach reduced_cr(NFAVertex v, const NGHolder &g,
+ const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic);
+
+std::vector<CharReach> reduced_cr(const NGHolder &g,
+ const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic);
+
+/** Remove cyclic stars connected to start */
+bool mergeCyclicDotStars(NGHolder &g);
+
/**
* Given a cyclic state 'c' with a broad reach and a later state 'v' that is
* only reachable if c is still on, then any edges to a successor of a direct
@@ -79,6 +79,6 @@ bool mergeCyclicDotStars(NGHolder &g);
*/
bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &h, som_type som);
-} // namespace ue2
-
-#endif
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_netflow.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_netflow.cpp
index b81b397bd2..780a319f5d 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_netflow.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_netflow.cpp
@@ -1,220 +1,220 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Network flow (min flow, max cut) algorithms.
- */
-#include "ng_netflow.h"
-
-#include "ng_holder.h"
-#include "ng_literal_analysis.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/container.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Network flow (min flow, max cut) algorithms.
+ */
+#include "ng_netflow.h"
+
+#include "ng_holder.h"
+#include "ng_literal_analysis.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/container.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
-
-#include <algorithm>
-#include <boost/graph/boykov_kolmogorov_max_flow.hpp>
-
-using namespace std;
-using boost::default_color_type;
-
-namespace ue2 {
-
-static
-void addReverseEdge(const NGHolder &g, vector<NFAEdge> &reverseEdge,
- NFAEdge fwd, NFAEdge rev) {
- u32 fwdIndex = g[fwd].index;
- u32 revIndex = g[rev].index;
-
- // Make sure our vector is big enough.
- size_t sz = max(fwdIndex, revIndex) + 1;
- if (reverseEdge.size() < sz) {
- reverseEdge.resize(sz);
- }
-
- // Add entries to list.
- reverseEdge[fwdIndex] = rev;
- reverseEdge[revIndex] = fwd;
-}
-
-/** Add temporary reverse edges to the graph \p g, as they are required by the
- * BGL's boykov_kolmogorov_max_flow algorithm. */
-static
-void addReverseEdges(NGHolder &g, vector<NFAEdge> &reverseEdge,
- vector<u64a> &capacityMap) {
- // We're probably going to need space for 2x edge count.
- const size_t numEdges = num_edges(g);
- reverseEdge.reserve(numEdges * 2);
- capacityMap.reserve(numEdges * 2);
-
- // To avoid walking the graph for _ages_, we build a temporary map of all
- // edges indexed by vertex pair for existence checks.
- map<pair<size_t, size_t>, NFAEdge> allEdges;
- for (const auto &e : edges_range(g)) {
- NFAVertex u = source(e, g), v = target(e, g);
- size_t uidx = g[u].index, vidx = g[v].index;
- allEdges[make_pair(uidx, vidx)] = e;
- }
-
- // Now we walk over all edges and add their reverse edges to the reverseEdge
- // vector, also adding them to the graph when they don't already exist.
- for (const auto &m : allEdges) {
- const NFAEdge &fwd = m.second;
- const size_t uidx = m.first.first, vidx = m.first.second;
-
- auto it = allEdges.find(make_pair(vidx, uidx));
- if (it == allEdges.end()) {
- // No reverse edge, add one.
- NFAVertex u = source(fwd, g), v = target(fwd, g);
+
+#include <algorithm>
+#include <boost/graph/boykov_kolmogorov_max_flow.hpp>
+
+using namespace std;
+using boost::default_color_type;
+
+namespace ue2 {
+
+static
+void addReverseEdge(const NGHolder &g, vector<NFAEdge> &reverseEdge,
+ NFAEdge fwd, NFAEdge rev) {
+ u32 fwdIndex = g[fwd].index;
+ u32 revIndex = g[rev].index;
+
+ // Make sure our vector is big enough.
+ size_t sz = max(fwdIndex, revIndex) + 1;
+ if (reverseEdge.size() < sz) {
+ reverseEdge.resize(sz);
+ }
+
+ // Add entries to list.
+ reverseEdge[fwdIndex] = rev;
+ reverseEdge[revIndex] = fwd;
+}
+
+/** Add temporary reverse edges to the graph \p g, as they are required by the
+ * BGL's boykov_kolmogorov_max_flow algorithm. */
+static
+void addReverseEdges(NGHolder &g, vector<NFAEdge> &reverseEdge,
+ vector<u64a> &capacityMap) {
+ // We're probably going to need space for 2x edge count.
+ const size_t numEdges = num_edges(g);
+ reverseEdge.reserve(numEdges * 2);
+ capacityMap.reserve(numEdges * 2);
+
+ // To avoid walking the graph for _ages_, we build a temporary map of all
+ // edges indexed by vertex pair for existence checks.
+ map<pair<size_t, size_t>, NFAEdge> allEdges;
+ for (const auto &e : edges_range(g)) {
+ NFAVertex u = source(e, g), v = target(e, g);
+ size_t uidx = g[u].index, vidx = g[v].index;
+ allEdges[make_pair(uidx, vidx)] = e;
+ }
+
+ // Now we walk over all edges and add their reverse edges to the reverseEdge
+ // vector, also adding them to the graph when they don't already exist.
+ for (const auto &m : allEdges) {
+ const NFAEdge &fwd = m.second;
+ const size_t uidx = m.first.first, vidx = m.first.second;
+
+ auto it = allEdges.find(make_pair(vidx, uidx));
+ if (it == allEdges.end()) {
+ // No reverse edge, add one.
+ NFAVertex u = source(fwd, g), v = target(fwd, g);
NFAEdge rev = add_edge(v, u, g);
- it = allEdges.insert(make_pair(make_pair(vidx, uidx), rev)).first;
- // Add to capacity map.
- u32 revIndex = g[rev].index;
- if (capacityMap.size() < revIndex + 1) {
- capacityMap.resize(revIndex + 1);
- }
- capacityMap[revIndex] = 0;
- }
-
- addReverseEdge(g, reverseEdge, fwd, it->second);
- }
-}
-
-/** Remove all edges with indices >= \p idx. */
-static
-void removeEdgesFromIndex(NGHolder &g, vector<u64a> &capacityMap, u32 idx) {
- remove_edge_if([&](const NFAEdge &e) { return g[e].index >= idx; }, g);
- capacityMap.resize(idx);
+ it = allEdges.insert(make_pair(make_pair(vidx, uidx), rev)).first;
+ // Add to capacity map.
+ u32 revIndex = g[rev].index;
+ if (capacityMap.size() < revIndex + 1) {
+ capacityMap.resize(revIndex + 1);
+ }
+ capacityMap[revIndex] = 0;
+ }
+
+ addReverseEdge(g, reverseEdge, fwd, it->second);
+ }
+}
+
+/** Remove all edges with indices >= \p idx. */
+static
+void removeEdgesFromIndex(NGHolder &g, vector<u64a> &capacityMap, u32 idx) {
+ remove_edge_if([&](const NFAEdge &e) { return g[e].index >= idx; }, g);
+ capacityMap.resize(idx);
renumber_edges(g);
-}
-
-/** A wrapper around boykov_kolmogorov_max_flow, returns the max flow and
- * colour map (from which we can find the min cut). */
-static
-u64a getMaxFlow(NGHolder &h, const vector<u64a> &capacityMap_in,
+}
+
+/** A wrapper around boykov_kolmogorov_max_flow, returns the max flow and
+ * colour map (from which we can find the min cut). */
+static
+u64a getMaxFlow(NGHolder &h, const vector<u64a> &capacityMap_in,
decltype(make_small_color_map(NGHolder())) &colorMap) {
- vector<u64a> capacityMap = capacityMap_in;
- NFAVertex src = h.start;
- NFAVertex sink = h.acceptEod;
-
- // netflow relies on these stylised edges, as all starts should be covered
- // by our source and all accepts by our sink.
- assert(edge(h.start, h.startDs, h).second);
- assert(edge(h.accept, h.acceptEod, h).second);
-
- // The boykov_kolmogorov_max_flow algorithm requires us to have reverse
- // edges for all edges in the graph, so we create them here (and remove
- // them after the call).
- const unsigned int numRealEdges = num_edges(h);
- vector<NFAEdge> reverseEdges;
- addReverseEdges(h, reverseEdges, capacityMap);
-
- const unsigned int numTotalEdges = num_edges(h);
- const unsigned int numVertices = num_vertices(h);
-
- vector<u64a> edgeResiduals(numTotalEdges);
- vector<NFAEdge> predecessors(numVertices);
- vector<s32> distances(numVertices);
-
+ vector<u64a> capacityMap = capacityMap_in;
+ NFAVertex src = h.start;
+ NFAVertex sink = h.acceptEod;
+
+ // netflow relies on these stylised edges, as all starts should be covered
+ // by our source and all accepts by our sink.
+ assert(edge(h.start, h.startDs, h).second);
+ assert(edge(h.accept, h.acceptEod, h).second);
+
+ // The boykov_kolmogorov_max_flow algorithm requires us to have reverse
+ // edges for all edges in the graph, so we create them here (and remove
+ // them after the call).
+ const unsigned int numRealEdges = num_edges(h);
+ vector<NFAEdge> reverseEdges;
+ addReverseEdges(h, reverseEdges, capacityMap);
+
+ const unsigned int numTotalEdges = num_edges(h);
+ const unsigned int numVertices = num_vertices(h);
+
+ vector<u64a> edgeResiduals(numTotalEdges);
+ vector<NFAEdge> predecessors(numVertices);
+ vector<s32> distances(numVertices);
+
auto v_index_map = get(vertex_index, h);
auto e_index_map = get(edge_index, h);
-
+
u64a flow = boykov_kolmogorov_max_flow(h,
- make_iterator_property_map(capacityMap.begin(), e_index_map),
- make_iterator_property_map(edgeResiduals.begin(), e_index_map),
- make_iterator_property_map(reverseEdges.begin(), e_index_map),
- make_iterator_property_map(predecessors.begin(), v_index_map),
+ make_iterator_property_map(capacityMap.begin(), e_index_map),
+ make_iterator_property_map(edgeResiduals.begin(), e_index_map),
+ make_iterator_property_map(reverseEdges.begin(), e_index_map),
+ make_iterator_property_map(predecessors.begin(), v_index_map),
colorMap,
- make_iterator_property_map(distances.begin(), v_index_map),
- v_index_map,
- src, sink);
-
- // Remove reverse edges from graph.
- removeEdgesFromIndex(h, capacityMap, numRealEdges);
+ make_iterator_property_map(distances.begin(), v_index_map),
+ v_index_map,
+ src, sink);
+
+ // Remove reverse edges from graph.
+ removeEdgesFromIndex(h, capacityMap, numRealEdges);
assert(num_edges(h) == numRealEdges);
-
- DEBUG_PRINTF("flow = %llu\n", flow);
- return flow;
-}
-
-/** Returns a min cut (in \p cutset) for the graph in \p h. */
-vector<NFAEdge> findMinCut(NGHolder &h, const vector<u64a> &scores) {
- assert(hasCorrectlyNumberedEdges(h));
- assert(hasCorrectlyNumberedVertices(h));
-
+
+ DEBUG_PRINTF("flow = %llu\n", flow);
+ return flow;
+}
+
+/** Returns a min cut (in \p cutset) for the graph in \p h. */
+vector<NFAEdge> findMinCut(NGHolder &h, const vector<u64a> &scores) {
+ assert(hasCorrectlyNumberedEdges(h));
+ assert(hasCorrectlyNumberedVertices(h));
+
auto colors = make_small_color_map(h);
u64a flow = getMaxFlow(h, scores, colors);
-
- vector<NFAEdge> picked_white;
- vector<NFAEdge> picked_black;
- u64a observed_black_flow = 0;
- u64a observed_white_flow = 0;
-
- for (const auto &e : edges_range(h)) {
- NFAVertex from = source(e, h);
- NFAVertex to = target(e, h);
- u64a ec = scores[h[e].index];
- if (ec == 0) {
- continue; // skips, among other things, reverse edges
- }
-
+
+ vector<NFAEdge> picked_white;
+ vector<NFAEdge> picked_black;
+ u64a observed_black_flow = 0;
+ u64a observed_white_flow = 0;
+
+ for (const auto &e : edges_range(h)) {
+ NFAVertex from = source(e, h);
+ NFAVertex to = target(e, h);
+ u64a ec = scores[h[e].index];
+ if (ec == 0) {
+ continue; // skips, among other things, reverse edges
+ }
+
auto fromColor = get(colors, from);
auto toColor = get(colors, to);
-
+
if (fromColor != small_color::white && toColor == small_color::white) {
- assert(ec <= INVALID_EDGE_CAP);
+ assert(ec <= INVALID_EDGE_CAP);
DEBUG_PRINTF("found white cut edge %zu->%zu cap %llu\n",
- h[from].index, h[to].index, ec);
- observed_white_flow += ec;
- picked_white.push_back(e);
- }
+ h[from].index, h[to].index, ec);
+ observed_white_flow += ec;
+ picked_white.push_back(e);
+ }
if (fromColor == small_color::black && toColor != small_color::black) {
- assert(ec <= INVALID_EDGE_CAP);
+ assert(ec <= INVALID_EDGE_CAP);
DEBUG_PRINTF("found black cut edge %zu->%zu cap %llu\n",
- h[from].index, h[to].index, ec);
- observed_black_flow += ec;
- picked_black.push_back(e);
- }
- }
-
- DEBUG_PRINTF("min flow = %llu b flow = %llu w flow %llu\n", flow,
- observed_black_flow, observed_white_flow);
+ h[from].index, h[to].index, ec);
+ observed_black_flow += ec;
+ picked_black.push_back(e);
+ }
+ }
+
+ DEBUG_PRINTF("min flow = %llu b flow = %llu w flow %llu\n", flow,
+ observed_black_flow, observed_white_flow);
if (min(observed_white_flow, observed_black_flow) != flow) {
- DEBUG_PRINTF("bad cut\n");
- }
-
- if (observed_white_flow < observed_black_flow) {
- return picked_white;
- } else {
- return picked_black;
- }
-}
-
-} // namespace ue2
+ DEBUG_PRINTF("bad cut\n");
+ }
+
+ if (observed_white_flow < observed_black_flow) {
+ return picked_white;
+ } else {
+ return picked_black;
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_netflow.h b/contrib/libs/hyperscan/src/nfagraph/ng_netflow.h
index 9e9b32e2b3..d8e00b8e17 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_netflow.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_netflow.h
@@ -1,49 +1,49 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Network flow (min flow, max cut) algorithms.
- */
-#ifndef NG_NETFLOW_H
-#define NG_NETFLOW_H
-
-#include "ng_holder.h"
-#include "ue2common.h"
-
-#include <vector>
-
-namespace ue2 {
-
-class NGHolder;
-
-/** Returns a min cut (in \p cutset) for the graph in \p h. */
-std::vector<NFAEdge> findMinCut(NGHolder &h, const std::vector<u64a> &scores);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Network flow (min flow, max cut) algorithms.
+ */
+#ifndef NG_NETFLOW_H
+#define NG_NETFLOW_H
+
+#include "ng_holder.h"
+#include "ue2common.h"
+
+#include <vector>
+
+namespace ue2 {
+
+class NGHolder;
+
+/** Returns a min cut (in \p cutset) for the graph in \p h. */
+std::vector<NFAEdge> findMinCut(NGHolder &h, const std::vector<u64a> &scores);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.cpp
index 9ad642ad09..04611872a4 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.cpp
@@ -1,240 +1,240 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Prefilter Reductions.
- *
- * This file contains routines for reducing the size of an NFA graph that we
- * know will be used as a prefilter.
- *
- * The approach used is to consider the graph as a chain of region subgraphs,
- * and to reduce the size of the graph by replacing regions with constructs
- * that can be implemented in fewer states.
- *
- * Right now, the approach used is to replace a region with a bounded repeat of
- * vertices (with bounds derived from the min/max width of the region
- * subgraph). These vertices are given the union of the region's character
- * reachability.
- *
- * For regions with bounded max width, this strategy is quite dependent on the
- * LimEx NFA's bounded repeat functionality.
- */
-#include "ng_prefilter.h"
-
-#include "ng_holder.h"
-#include "ng_region.h"
-#include "ng_util.h"
-#include "ng_width.h"
-#include "ue2common.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph_range.h"
-
-#include <queue>
+ * \brief Prefilter Reductions.
+ *
+ * This file contains routines for reducing the size of an NFA graph that we
+ * know will be used as a prefilter.
+ *
+ * The approach used is to consider the graph as a chain of region subgraphs,
+ * and to reduce the size of the graph by replacing regions with constructs
+ * that can be implemented in fewer states.
+ *
+ * Right now, the approach used is to replace a region with a bounded repeat of
+ * vertices (with bounds derived from the min/max width of the region
+ * subgraph). These vertices are given the union of the region's character
+ * reachability.
+ *
+ * For regions with bounded max width, this strategy is quite dependent on the
+ * LimEx NFA's bounded repeat functionality.
+ */
+#include "ng_prefilter.h"
+
+#include "ng_holder.h"
+#include "ng_region.h"
+#include "ng_util.h"
+#include "ng_width.h"
+#include "ue2common.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
+
+#include <queue>
#include <unordered_map>
#include <unordered_set>
-
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_values;
-
-namespace ue2 {
-
-/** Keep attempting to reduce the size of the graph until the number of
- * vertices falls below this value. */
-static const size_t MAX_COMPONENT_VERTICES = 128;
-
-/** Only replace a region with at least this many vertices. */
-static const size_t MIN_REPLACE_VERTICES = 2;
-
-/** Estimate of how many vertices are required to represent a bounded repeat in
- * the implementation NFA. */
-static const size_t BOUNDED_REPEAT_COUNT = 4;
-
-/** Scoring penalty for boundary regions. */
-static const size_t PENALTY_BOUNDARY = 32;
-
+
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_values;
+
+namespace ue2 {
+
+/** Keep attempting to reduce the size of the graph until the number of
+ * vertices falls below this value. */
+static const size_t MAX_COMPONENT_VERTICES = 128;
+
+/** Only replace a region with at least this many vertices. */
+static const size_t MIN_REPLACE_VERTICES = 2;
+
+/** Estimate of how many vertices are required to represent a bounded repeat in
+ * the implementation NFA. */
+static const size_t BOUNDED_REPEAT_COUNT = 4;
+
+/** Scoring penalty for boundary regions. */
+static const size_t PENALTY_BOUNDARY = 32;
+
/** Regions with max bounds greater than this value will have their max bound
* replaced with inf. */
static const size_t MAX_REPLACE_BOUND = 10000;
-namespace {
-
-/** Information describing a region. */
-struct RegionInfo {
- explicit RegionInfo(u32 id_in) : id(id_in) {}
- u32 id; //!< region id
- deque<NFAVertex> vertices; //!< vertices in the region
- CharReach reach; //!< union of region reach
+namespace {
+
+/** Information describing a region. */
+struct RegionInfo {
+ explicit RegionInfo(u32 id_in) : id(id_in) {}
+ u32 id; //!< region id
+ deque<NFAVertex> vertices; //!< vertices in the region
+ CharReach reach; //!< union of region reach
depth minWidth{0}; //!< min width of region subgraph
depth maxWidth{depth::infinity()}; //!< max width of region subgraph
- bool atBoundary = false; //!< region is next to an accept
-
- // Bigger score is better.
- size_t score() const {
+ bool atBoundary = false; //!< region is next to an accept
+
+ // Bigger score is better.
+ size_t score() const {
// TODO: charreach should be a signal?
- size_t numVertices = vertices.size();
- if (atBoundary) {
- return numVertices - min(PENALTY_BOUNDARY, numVertices);
- } else {
- return numVertices;
- }
- }
-};
-
-/** Comparator used to order regions for consideration in a priority queue. */
-struct RegionInfoQueueComp {
- bool operator()(const RegionInfo &r1, const RegionInfo &r2) const {
- size_t score1 = r1.score(), score2 = r2.score();
- if (score1 != score2) {
- return score1 < score2;
- }
- if (r1.reach.count() != r2.reach.count()) {
- return r1.reach.count() < r2.reach.count();
- }
- return r1.id < r2.id;
- }
-};
-
-} // namespace
-
-static
-void findWidths(const NGHolder &g,
+ size_t numVertices = vertices.size();
+ if (atBoundary) {
+ return numVertices - min(PENALTY_BOUNDARY, numVertices);
+ } else {
+ return numVertices;
+ }
+ }
+};
+
+/** Comparator used to order regions for consideration in a priority queue. */
+struct RegionInfoQueueComp {
+ bool operator()(const RegionInfo &r1, const RegionInfo &r2) const {
+ size_t score1 = r1.score(), score2 = r2.score();
+ if (score1 != score2) {
+ return score1 < score2;
+ }
+ if (r1.reach.count() != r2.reach.count()) {
+ return r1.reach.count() < r2.reach.count();
+ }
+ return r1.id < r2.id;
+ }
+};
+
+} // namespace
+
+static
+void findWidths(const NGHolder &g,
const unordered_map<NFAVertex, u32> &region_map,
- RegionInfo &ri) {
- NGHolder rg;
+ RegionInfo &ri) {
+ NGHolder rg;
unordered_map<NFAVertex, NFAVertex> mapping;
- fillHolder(&rg, g, ri.vertices, &mapping);
-
- // Wire our entries to start and our exits to accept.
- for (auto v : ri.vertices) {
- NFAVertex v_new = mapping[v];
+ fillHolder(&rg, g, ri.vertices, &mapping);
+
+ // Wire our entries to start and our exits to accept.
+ for (auto v : ri.vertices) {
+ NFAVertex v_new = mapping[v];
assert(v_new != NGHolder::null_vertex());
-
- if (isRegionEntry(g, v, region_map) &&
- !edge(rg.start, v_new, rg).second) {
- add_edge(rg.start, v_new, rg);
- }
- if (isRegionExit(g, v, region_map) &&
- !edge(v_new, rg.accept, rg).second) {
- add_edge(v_new, rg.accept, rg);
- }
- }
-
- ri.minWidth = findMinWidth(rg);
- ri.maxWidth = findMaxWidth(rg);
-}
-
-// acc can be either h.accept or h.acceptEod.
-static
-void markBoundaryRegions(const NGHolder &h,
+
+ if (isRegionEntry(g, v, region_map) &&
+ !edge(rg.start, v_new, rg).second) {
+ add_edge(rg.start, v_new, rg);
+ }
+ if (isRegionExit(g, v, region_map) &&
+ !edge(v_new, rg.accept, rg).second) {
+ add_edge(v_new, rg.accept, rg);
+ }
+ }
+
+ ri.minWidth = findMinWidth(rg);
+ ri.maxWidth = findMaxWidth(rg);
+}
+
+// acc can be either h.accept or h.acceptEod.
+static
+void markBoundaryRegions(const NGHolder &h,
const unordered_map<NFAVertex, u32> &region_map,
- map<u32, RegionInfo> &regions, NFAVertex acc) {
- for (auto v : inv_adjacent_vertices_range(acc, h)) {
- if (is_special(v, h)) {
- continue;
- }
- u32 id = region_map.at(v);
-
+ map<u32, RegionInfo> &regions, NFAVertex acc) {
+ for (auto v : inv_adjacent_vertices_range(acc, h)) {
+ if (is_special(v, h)) {
+ continue;
+ }
+ u32 id = region_map.at(v);
+
auto ri = regions.find(id);
- if (ri == regions.end()) {
- continue; // Not tracking this region as it's too small.
- }
-
- ri->second.atBoundary = true;
- }
-}
-
-static
-map<u32, RegionInfo> findRegionInfo(const NGHolder &h,
+ if (ri == regions.end()) {
+ continue; // Not tracking this region as it's too small.
+ }
+
+ ri->second.atBoundary = true;
+ }
+}
+
+static
+map<u32, RegionInfo> findRegionInfo(const NGHolder &h,
const unordered_map<NFAVertex, u32> &region_map) {
- map<u32, RegionInfo> regions;
- for (auto v : vertices_range(h)) {
- if (is_special(v, h)) {
- continue;
- }
- u32 id = region_map.at(v);
+ map<u32, RegionInfo> regions;
+ for (auto v : vertices_range(h)) {
+ if (is_special(v, h)) {
+ continue;
+ }
+ u32 id = region_map.at(v);
RegionInfo &ri = regions.emplace(id, RegionInfo(id)).first->second;
- ri.vertices.push_back(v);
- ri.reach |= h[v].char_reach;
- }
-
- // There's no point tracking more information about regions that we won't
- // consider replacing, so we remove them from the region map.
+ ri.vertices.push_back(v);
+ ri.reach |= h[v].char_reach;
+ }
+
+ // There's no point tracking more information about regions that we won't
+ // consider replacing, so we remove them from the region map.
for (auto it = regions.begin(); it != regions.end();) {
- if (it->second.vertices.size() < MIN_REPLACE_VERTICES) {
- regions.erase(it++);
- } else {
- ++it;
- }
- }
-
- DEBUG_PRINTF("%zu regions\n", regions.size());
-
- markBoundaryRegions(h, region_map, regions, h.accept);
- markBoundaryRegions(h, region_map, regions, h.acceptEod);
-
- // Determine min/max widths.
- for (RegionInfo &ri : regions | map_values) {
- findWidths(h, region_map, ri);
- DEBUG_PRINTF("region %u %shas widths [%s,%s]\n", ri.id,
- ri.atBoundary ? "(boundary) " : "",
- ri.minWidth.str().c_str(), ri.maxWidth.str().c_str());
- }
-
- return regions;
-}
-
-static
+ if (it->second.vertices.size() < MIN_REPLACE_VERTICES) {
+ regions.erase(it++);
+ } else {
+ ++it;
+ }
+ }
+
+ DEBUG_PRINTF("%zu regions\n", regions.size());
+
+ markBoundaryRegions(h, region_map, regions, h.accept);
+ markBoundaryRegions(h, region_map, regions, h.acceptEod);
+
+ // Determine min/max widths.
+ for (RegionInfo &ri : regions | map_values) {
+ findWidths(h, region_map, ri);
+ DEBUG_PRINTF("region %u %shas widths [%s,%s]\n", ri.id,
+ ri.atBoundary ? "(boundary) " : "",
+ ri.minWidth.str().c_str(), ri.maxWidth.str().c_str());
+ }
+
+ return regions;
+}
+
+static
void copyInEdges(NGHolder &g, NFAVertex from, NFAVertex to) {
- for (const auto &e : in_edges_range(from, g)) {
- NFAVertex u = source(e, g);
+ for (const auto &e : in_edges_range(from, g)) {
+ NFAVertex u = source(e, g);
add_edge_if_not_present(u, to, g[e], g);
- }
-}
-
-static
+ }
+}
+
+static
void copyOutEdges(NGHolder &g, NFAVertex from, NFAVertex to) {
- for (const auto &e : out_edges_range(from, g)) {
- NFAVertex t = target(e, g);
- add_edge_if_not_present(to, t, g[e], g);
-
- if (is_any_accept(t, g)) {
- const auto &reports = g[from].reports;
- g[to].reports.insert(reports.begin(), reports.end());
- }
- }
-}
-
-static
+ for (const auto &e : out_edges_range(from, g)) {
+ NFAVertex t = target(e, g);
+ add_edge_if_not_present(to, t, g[e], g);
+
+ if (is_any_accept(t, g)) {
+ const auto &reports = g[from].reports;
+ g[to].reports.insert(reports.begin(), reports.end());
+ }
+ }
+}
+
+static
void removeInteriorEdges(NGHolder &g, const RegionInfo &ri) {
// Set of vertices in region, for quick lookups.
const unordered_set<NFAVertex> rverts(ri.vertices.begin(),
@@ -250,12 +250,12 @@ void removeInteriorEdges(NGHolder &g, const RegionInfo &ri) {
}
static
-void replaceRegion(NGHolder &g, const RegionInfo &ri,
- size_t *verticesAdded, size_t *verticesRemoved) {
- // TODO: more complex replacements.
- assert(ri.vertices.size() >= MIN_REPLACE_VERTICES);
- assert(ri.minWidth.is_finite());
-
+void replaceRegion(NGHolder &g, const RegionInfo &ri,
+ size_t *verticesAdded, size_t *verticesRemoved) {
+ // TODO: more complex replacements.
+ assert(ri.vertices.size() >= MIN_REPLACE_VERTICES);
+ assert(ri.minWidth.is_finite());
+
depth minWidth = ri.minWidth;
depth maxWidth = ri.maxWidth;
@@ -265,129 +265,129 @@ void replaceRegion(NGHolder &g, const RegionInfo &ri,
maxWidth = depth::infinity();
}
- size_t replacementSize;
+ size_t replacementSize;
if (minWidth == maxWidth || maxWidth.is_infinite()) {
replacementSize = minWidth; // {N} or {N,}
- } else {
+ } else {
replacementSize = maxWidth; // {N,M} case
- }
-
- DEBUG_PRINTF("orig size %zu, replace size %zu\n", ri.vertices.size(),
- replacementSize);
-
+ }
+
+ DEBUG_PRINTF("orig size %zu, replace size %zu\n", ri.vertices.size(),
+ replacementSize);
+
vector<NFAVertex> verts;
verts.reserve(replacementSize);
- for (size_t i = 0; i < replacementSize; i++) {
- NFAVertex v = add_vertex(g);
- g[v].char_reach = ri.reach;
- if (i > 0) {
- add_edge(verts.back(), v, g);
- }
- verts.push_back(v);
- }
-
+ for (size_t i = 0; i < replacementSize; i++) {
+ NFAVertex v = add_vertex(g);
+ g[v].char_reach = ri.reach;
+ if (i > 0) {
+ add_edge(verts.back(), v, g);
+ }
+ verts.push_back(v);
+ }
+
if (maxWidth.is_infinite()) {
- add_edge(verts.back(), verts.back(), g);
- }
-
+ add_edge(verts.back(), verts.back(), g);
+ }
+
removeInteriorEdges(g, ri);
-
- for (size_t i = 0; i < replacementSize; i++) {
- NFAVertex v_new = verts[i];
-
- for (auto v_old : ri.vertices) {
- if (i == 0) {
+
+ for (size_t i = 0; i < replacementSize; i++) {
+ NFAVertex v_new = verts[i];
+
+ for (auto v_old : ri.vertices) {
+ if (i == 0) {
copyInEdges(g, v_old, v_new);
- }
- if (i + 1 >= ri.minWidth) {
+ }
+ if (i + 1 >= ri.minWidth) {
copyOutEdges(g, v_old, v_new);
- }
- }
- }
-
- remove_vertices(ri.vertices, g, false);
-
- *verticesAdded = verts.size();
- *verticesRemoved = ri.vertices.size();
-}
-
-namespace {
-struct SourceHasEdgeToAccept {
- explicit SourceHasEdgeToAccept(const NGHolder &g_in) : g(g_in) {}
- bool operator()(const NFAEdge &e) const {
- return edge(source(e, g), g.accept, g).second;
- }
- const NGHolder &g;
-};
-}
-
-static
-void reduceRegions(NGHolder &h) {
- map<u32, RegionInfo> regions = findRegionInfo(h, assignRegions(h));
-
- RegionInfoQueueComp cmp;
- priority_queue<RegionInfo, deque<RegionInfo>, RegionInfoQueueComp> pq(cmp);
-
- size_t numVertices = 0;
- for (const RegionInfo &ri : regions | map_values) {
- numVertices += ri.vertices.size();
- pq.push(ri);
- }
-
- while (numVertices > MAX_COMPONENT_VERTICES && !pq.empty()) {
- const RegionInfo &ri = pq.top();
- DEBUG_PRINTF("region %u: vertices=%zu reach=%s score=%zu, "
- "widths=[%s,%s]\n",
- ri.id, ri.vertices.size(), describeClass(ri.reach).c_str(),
- ri.score(), ri.minWidth.str().c_str(),
- ri.maxWidth.str().c_str());
-
- size_t verticesAdded = 0;
- size_t verticesRemoved = 0;
- replaceRegion(h, ri, &verticesAdded, &verticesRemoved);
- DEBUG_PRINTF("%zu vertices removed, %zu vertices added\n",
- verticesRemoved, verticesAdded);
-
- // We are trusting that implementation NFAs will be able to use the
- // LimEx bounded repeat code here.
- numVertices -= verticesRemoved;
- numVertices += BOUNDED_REPEAT_COUNT;
-
- DEBUG_PRINTF("numVertices is now %zu\n", numVertices);
- pq.pop();
- }
-
- // We may have vertices that have edges to both accept and acceptEod: in
- // this case, we can optimize for performance by removing the acceptEod
- // edges.
+ }
+ }
+ }
+
+ remove_vertices(ri.vertices, g, false);
+
+ *verticesAdded = verts.size();
+ *verticesRemoved = ri.vertices.size();
+}
+
+namespace {
+struct SourceHasEdgeToAccept {
+ explicit SourceHasEdgeToAccept(const NGHolder &g_in) : g(g_in) {}
+ bool operator()(const NFAEdge &e) const {
+ return edge(source(e, g), g.accept, g).second;
+ }
+ const NGHolder &g;
+};
+}
+
+static
+void reduceRegions(NGHolder &h) {
+ map<u32, RegionInfo> regions = findRegionInfo(h, assignRegions(h));
+
+ RegionInfoQueueComp cmp;
+ priority_queue<RegionInfo, deque<RegionInfo>, RegionInfoQueueComp> pq(cmp);
+
+ size_t numVertices = 0;
+ for (const RegionInfo &ri : regions | map_values) {
+ numVertices += ri.vertices.size();
+ pq.push(ri);
+ }
+
+ while (numVertices > MAX_COMPONENT_VERTICES && !pq.empty()) {
+ const RegionInfo &ri = pq.top();
+ DEBUG_PRINTF("region %u: vertices=%zu reach=%s score=%zu, "
+ "widths=[%s,%s]\n",
+ ri.id, ri.vertices.size(), describeClass(ri.reach).c_str(),
+ ri.score(), ri.minWidth.str().c_str(),
+ ri.maxWidth.str().c_str());
+
+ size_t verticesAdded = 0;
+ size_t verticesRemoved = 0;
+ replaceRegion(h, ri, &verticesAdded, &verticesRemoved);
+ DEBUG_PRINTF("%zu vertices removed, %zu vertices added\n",
+ verticesRemoved, verticesAdded);
+
+ // We are trusting that implementation NFAs will be able to use the
+ // LimEx bounded repeat code here.
+ numVertices -= verticesRemoved;
+ numVertices += BOUNDED_REPEAT_COUNT;
+
+ DEBUG_PRINTF("numVertices is now %zu\n", numVertices);
+ pq.pop();
+ }
+
+ // We may have vertices that have edges to both accept and acceptEod: in
+ // this case, we can optimize for performance by removing the acceptEod
+ // edges.
remove_in_edge_if(h.acceptEod, SourceHasEdgeToAccept(h), h);
-}
-
-void prefilterReductions(NGHolder &h, const CompileContext &cc) {
- if (!cc.grey.prefilterReductions) {
- return;
- }
-
- if (num_vertices(h) <= MAX_COMPONENT_VERTICES) {
- DEBUG_PRINTF("graph is already small enough (%zu vertices)\n",
- num_vertices(h));
- return;
- }
-
+}
+
+void prefilterReductions(NGHolder &h, const CompileContext &cc) {
+ if (!cc.grey.prefilterReductions) {
+ return;
+ }
+
+ if (num_vertices(h) <= MAX_COMPONENT_VERTICES) {
+ DEBUG_PRINTF("graph is already small enough (%zu vertices)\n",
+ num_vertices(h));
+ return;
+ }
+
DEBUG_PRINTF("before: graph with %zu vertices, %zu edges\n",
num_vertices(h), num_edges(h));
-
+
renumber_vertices(h);
renumber_edges(h);
-
- reduceRegions(h);
-
+
+ reduceRegions(h);
+
renumber_vertices(h);
renumber_edges(h);
DEBUG_PRINTF("after: graph with %zu vertices, %zu edges\n",
num_vertices(h), num_edges(h));
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.h b/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.h
index e1f5c13f37..88cbefd2de 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_prefilter.h
@@ -1,45 +1,45 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Prefilter Reductions.
- */
-
-#ifndef NG_PREFILTER_H
-#define NG_PREFILTER_H
-
-namespace ue2 {
-
-class NGHolder;
-struct CompileContext;
-
-void prefilterReductions(NGHolder &h, const CompileContext &cc);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Prefilter Reductions.
+ */
+
+#ifndef NG_PREFILTER_H
+#define NG_PREFILTER_H
+
+namespace ue2 {
+
+class NGHolder;
+struct CompileContext;
+
+void prefilterReductions(NGHolder &h, const CompileContext &cc);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp
index 997f652d0d..adda70312f 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_prune.cpp
@@ -1,434 +1,434 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Functions for pruning unreachable vertices or reports from the graph.
- */
-#include "ng_prune.h"
-
-#include "ng_dominators.h"
-#include "ng_holder.h"
-#include "ng_reports.h"
-#include "ng_util.h"
-#include "util/container.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Functions for pruning unreachable vertices or reports from the graph.
+ */
+#include "ng_prune.h"
+
+#include "ng_dominators.h"
+#include "ng_holder.h"
+#include "ng_reports.h"
+#include "ng_util.h"
+#include "util/container.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
-#include "util/report_manager.h"
-
-#include <deque>
-#include <map>
-
-#include <boost/graph/depth_first_search.hpp>
-#include <boost/graph/reverse_graph.hpp>
-
-using namespace std;
-using boost::default_color_type;
-using boost::reverse_graph;
-
-namespace ue2 {
-
-/** Remove any vertices that can't be reached by traversing the graph in
- * reverse from acceptEod. */
-void pruneUnreachable(NGHolder &g) {
- deque<NFAVertex> dead;
-
+#include "util/report_manager.h"
+
+#include <deque>
+#include <map>
+
+#include <boost/graph/depth_first_search.hpp>
+#include <boost/graph/reverse_graph.hpp>
+
+using namespace std;
+using boost::default_color_type;
+using boost::reverse_graph;
+
+namespace ue2 {
+
+/** Remove any vertices that can't be reached by traversing the graph in
+ * reverse from acceptEod. */
+void pruneUnreachable(NGHolder &g) {
+ deque<NFAVertex> dead;
+
if (in_degree(g.acceptEod, g) == 1 && !in_degree(g.accept, g)
&& edge(g.accept, g.acceptEod, g).second) {
- // Trivial case: there are no in-edges to our accepts (other than
- // accept->acceptEod), so all non-specials are unreachable.
- for (auto v : vertices_range(g)) {
- if (!is_special(v, g)) {
- dead.push_back(v);
- }
- }
- } else {
- // Walk a reverse graph from acceptEod with Boost's depth_first_visit
- // call.
+ // Trivial case: there are no in-edges to our accepts (other than
+ // accept->acceptEod), so all non-specials are unreachable.
+ for (auto v : vertices_range(g)) {
+ if (!is_special(v, g)) {
+ dead.push_back(v);
+ }
+ }
+ } else {
+ // Walk a reverse graph from acceptEod with Boost's depth_first_visit
+ // call.
typedef reverse_graph<NGHolder, NGHolder &> RevNFAGraph;
RevNFAGraph revg(g);
-
+
map<RevNFAGraph::vertex_descriptor, default_color_type> colours;
-
- depth_first_visit(revg, g.acceptEod,
- make_dfs_visitor(boost::null_visitor()),
- make_assoc_property_map(colours));
-
- DEBUG_PRINTF("color map has %zu entries after DFV\n", colours.size());
-
- // All non-special vertices that aren't in the colour map (because they
- // weren't reached) can be removed.
- for (auto v : vertices_range(revg)) {
- if (is_special(v, revg)) {
- continue;
- }
- if (!contains(colours, v)) {
- dead.push_back(v);
- }
- }
- }
-
- if (dead.empty()) {
- DEBUG_PRINTF("no unreachable vertices\n");
- return;
- }
-
- remove_vertices(dead, g, false);
- DEBUG_PRINTF("removed %zu unreachable vertices\n", dead.size());
-}
-
-template<class nfag_t>
-static
+
+ depth_first_visit(revg, g.acceptEod,
+ make_dfs_visitor(boost::null_visitor()),
+ make_assoc_property_map(colours));
+
+ DEBUG_PRINTF("color map has %zu entries after DFV\n", colours.size());
+
+ // All non-special vertices that aren't in the colour map (because they
+ // weren't reached) can be removed.
+ for (auto v : vertices_range(revg)) {
+ if (is_special(v, revg)) {
+ continue;
+ }
+ if (!contains(colours, v)) {
+ dead.push_back(v);
+ }
+ }
+ }
+
+ if (dead.empty()) {
+ DEBUG_PRINTF("no unreachable vertices\n");
+ return;
+ }
+
+ remove_vertices(dead, g, false);
+ DEBUG_PRINTF("removed %zu unreachable vertices\n", dead.size());
+}
+
+template<class nfag_t>
+static
bool pruneForwardUseless(NGHolder &h, const nfag_t &g,
typename nfag_t::vertex_descriptor s,
decltype(make_small_color_map(NGHolder())) &colors) {
- // Begin with all vertices set to white, as DFV only marks visited
- // vertices.
+ // Begin with all vertices set to white, as DFV only marks visited
+ // vertices.
colors.fill(small_color::white);
-
+
depth_first_visit(g, s, make_dfs_visitor(boost::null_visitor()), colors);
-
- vector<NFAVertex> dead;
-
- // All non-special vertices that are still white can be removed.
- for (auto v : vertices_range(g)) {
+
+ vector<NFAVertex> dead;
+
+ // All non-special vertices that are still white can be removed.
+ for (auto v : vertices_range(g)) {
if (!is_special(v, g) && get(colors, v) == small_color::white) {
DEBUG_PRINTF("vertex %zu is unreachable from %zu\n",
- g[v].index, g[s].index);
+ g[v].index, g[s].index);
dead.push_back(NFAVertex(v));
- }
- }
-
- if (dead.empty()) {
- return false;
- }
-
- DEBUG_PRINTF("removing %zu vertices\n", dead.size());
- remove_vertices(dead, h, false);
- return true;
-}
-
-/** Remove any vertices which can't be reached by traversing the graph forward
- * from start or in reverse from acceptEod. If \p renumber is false, no
- * vertex/edge renumbering is done. */
-void pruneUseless(NGHolder &g, bool renumber) {
- DEBUG_PRINTF("pruning useless vertices\n");
- assert(hasCorrectlyNumberedVertices(g));
+ }
+ }
+
+ if (dead.empty()) {
+ return false;
+ }
+
+ DEBUG_PRINTF("removing %zu vertices\n", dead.size());
+ remove_vertices(dead, h, false);
+ return true;
+}
+
+/** Remove any vertices which can't be reached by traversing the graph forward
+ * from start or in reverse from acceptEod. If \p renumber is false, no
+ * vertex/edge renumbering is done. */
+void pruneUseless(NGHolder &g, bool renumber) {
+ DEBUG_PRINTF("pruning useless vertices\n");
+ assert(hasCorrectlyNumberedVertices(g));
auto colors = make_small_color_map(g);
-
+
bool work_done = pruneForwardUseless(g, g, g.start, colors);
work_done |= pruneForwardUseless(g, reverse_graph<NGHolder, NGHolder &>(g),
g.acceptEod, colors);
-
- if (!work_done) {
- return;
- }
-
- if (renumber) {
+
+ if (!work_done) {
+ return;
+ }
+
+ if (renumber) {
renumber_edges(g);
renumber_vertices(g);
- }
-}
-
-/** This code removes any vertices which do not accept any symbols. Any
- * vertices which no longer lie on a path from a start to an accept are also
- * pruned. */
-void pruneEmptyVertices(NGHolder &g) {
- DEBUG_PRINTF("pruning empty vertices\n");
- vector<NFAVertex> dead;
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
-
- const CharReach &cr = g[v].char_reach;
- if (cr.none()) {
+ }
+}
+
+/** This code removes any vertices which do not accept any symbols. Any
+ * vertices which no longer lie on a path from a start to an accept are also
+ * pruned. */
+void pruneEmptyVertices(NGHolder &g) {
+ DEBUG_PRINTF("pruning empty vertices\n");
+ vector<NFAVertex> dead;
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ const CharReach &cr = g[v].char_reach;
+ if (cr.none()) {
DEBUG_PRINTF("empty: %zu\n", g[v].index);
- dead.push_back(v);
- }
- }
-
- if (dead.empty()) {
- return;
- }
-
- remove_vertices(dead, g);
- pruneUseless(g);
-}
-
-/** Remove any edges from vertices that generate accepts (for Highlander
- * graphs). */
-void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) {
- // Safety check: all reports must be simple exhaustible reports, or this is
- // not safe. This optimisation should be called early enough that no
- // internal reports have been added.
- for (auto report_id : all_reports(g)) {
- const Report &ir = rm.getReport(report_id);
-
- if (ir.ekey == INVALID_EKEY || ir.hasBounds() ||
- !isExternalReport(ir)) {
- DEBUG_PRINTF("report %u is not external highlander with "
- "no bounds\n", report_id);
- return;
- }
- }
-
- vector<NFAEdge> dead;
- for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
- if (is_special(u, g)) {
- continue;
- }
-
- // We can prune any out-edges that aren't accepts
- for (const auto &e : out_edges_range(u, g)) {
- if (!is_any_accept(target(e, g), g)) {
- dead.push_back(e);
- }
- }
- }
-
- if (dead.empty()) {
- return;
- }
-
- DEBUG_PRINTF("found %zu removable edges due to single match\n", dead.size());
- remove_edges(dead, g);
- pruneUseless(g);
-}
-
-static
-bool isDominatedByReporter(const NGHolder &g,
+ dead.push_back(v);
+ }
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
+ remove_vertices(dead, g);
+ pruneUseless(g);
+}
+
+/** Remove any edges from vertices that generate accepts (for Highlander
+ * graphs). */
+void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) {
+ // Safety check: all reports must be simple exhaustible reports, or this is
+ // not safe. This optimisation should be called early enough that no
+ // internal reports have been added.
+ for (auto report_id : all_reports(g)) {
+ const Report &ir = rm.getReport(report_id);
+
+ if (ir.ekey == INVALID_EKEY || ir.hasBounds() ||
+ !isExternalReport(ir)) {
+ DEBUG_PRINTF("report %u is not external highlander with "
+ "no bounds\n", report_id);
+ return;
+ }
+ }
+
+ vector<NFAEdge> dead;
+ for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
+ if (is_special(u, g)) {
+ continue;
+ }
+
+ // We can prune any out-edges that aren't accepts
+ for (const auto &e : out_edges_range(u, g)) {
+ if (!is_any_accept(target(e, g), g)) {
+ dead.push_back(e);
+ }
+ }
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
+ DEBUG_PRINTF("found %zu removable edges due to single match\n", dead.size());
+ remove_edges(dead, g);
+ pruneUseless(g);
+}
+
+static
+bool isDominatedByReporter(const NGHolder &g,
const unordered_map<NFAVertex, NFAVertex> &dom,
- NFAVertex v, ReportID report_id) {
- for (auto it = dom.find(v); it != end(dom); it = dom.find(v)) {
- NFAVertex u = it->second;
- // Note: reporters with edges only to acceptEod are not considered to
- // dominate.
- if (edge(u, g.accept, g).second && contains(g[u].reports, report_id)) {
+ NFAVertex v, ReportID report_id) {
+ for (auto it = dom.find(v); it != end(dom); it = dom.find(v)) {
+ NFAVertex u = it->second;
+ // Note: reporters with edges only to acceptEod are not considered to
+ // dominate.
+ if (edge(u, g.accept, g).second && contains(g[u].reports, report_id)) {
DEBUG_PRINTF("%zu is dominated by %zu, and both report %u\n",
- g[v].index, g[u].index, report_id);
- return true;
- }
- v = u;
- }
- return false;
-}
-
-/**
- * True if the vertex has (a) a self-loop, (b) only out-edges to accept and
- * itself and (c) only simple exhaustible reports.
- */
-static
-bool hasOnlySelfLoopAndExhaustibleAccepts(const NGHolder &g,
- const ReportManager &rm,
- NFAVertex v) {
- if (!edge(v, v, g).second) {
- return false;
- }
-
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w != v && w != g.accept) {
- return false;
- }
- }
-
- for (const auto &report_id : g[v].reports) {
- if (!isSimpleExhaustible(rm.getReport(report_id))) {
- return false;
- }
- }
-
- return true;
-}
-
-void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) {
- vector<NFAVertex> reporters;
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- for (const auto &report_id : g[v].reports) {
- const Report &r = rm.getReport(report_id);
- if (isSimpleExhaustible(r)) {
- reporters.push_back(v);
- break;
- }
- }
- }
- for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
- for (const auto &report_id : g[v].reports) {
- const Report &r = rm.getReport(report_id);
- if (isSimpleExhaustible(r)) {
- reporters.push_back(v);
- break;
- }
- }
- }
-
- if (reporters.empty()) {
- return;
- }
-
-
+ g[v].index, g[u].index, report_id);
+ return true;
+ }
+ v = u;
+ }
+ return false;
+}
+
+/**
+ * True if the vertex has (a) a self-loop, (b) only out-edges to accept and
+ * itself and (c) only simple exhaustible reports.
+ */
+static
+bool hasOnlySelfLoopAndExhaustibleAccepts(const NGHolder &g,
+ const ReportManager &rm,
+ NFAVertex v) {
+ if (!edge(v, v, g).second) {
+ return false;
+ }
+
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w != v && w != g.accept) {
+ return false;
+ }
+ }
+
+ for (const auto &report_id : g[v].reports) {
+ if (!isSimpleExhaustible(rm.getReport(report_id))) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) {
+ vector<NFAVertex> reporters;
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ for (const auto &report_id : g[v].reports) {
+ const Report &r = rm.getReport(report_id);
+ if (isSimpleExhaustible(r)) {
+ reporters.push_back(v);
+ break;
+ }
+ }
+ }
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ for (const auto &report_id : g[v].reports) {
+ const Report &r = rm.getReport(report_id);
+ if (isSimpleExhaustible(r)) {
+ reporters.push_back(v);
+ break;
+ }
+ }
+ }
+
+ if (reporters.empty()) {
+ return;
+ }
+
+
sort(begin(reporters), end(reporters));
- reporters.erase(unique(begin(reporters), end(reporters)), end(reporters));
-
- DEBUG_PRINTF("%zu vertices have simple exhaustible reports\n",
- reporters.size());
-
- const auto &dom = findDominators(g);
- bool modified = false;
-
- // If a reporter vertex is dominated by another with the same report, we
- // can remove that report; if all reports are removed, we can remove the
- // vertex entirely.
- for (const auto v : reporters) {
- const auto reports = g[v].reports; // copy, as we're going to mutate
- for (const auto &report_id : reports) {
- if (!isSimpleExhaustible(rm.getReport(report_id))) {
- continue;
- }
- if (isDominatedByReporter(g, dom, v, report_id)) {
+ reporters.erase(unique(begin(reporters), end(reporters)), end(reporters));
+
+ DEBUG_PRINTF("%zu vertices have simple exhaustible reports\n",
+ reporters.size());
+
+ const auto &dom = findDominators(g);
+ bool modified = false;
+
+ // If a reporter vertex is dominated by another with the same report, we
+ // can remove that report; if all reports are removed, we can remove the
+ // vertex entirely.
+ for (const auto v : reporters) {
+ const auto reports = g[v].reports; // copy, as we're going to mutate
+ for (const auto &report_id : reports) {
+ if (!isSimpleExhaustible(rm.getReport(report_id))) {
+ continue;
+ }
+ if (isDominatedByReporter(g, dom, v, report_id)) {
DEBUG_PRINTF("removed dominated report %u from vertex %zu\n",
- report_id, g[v].index);
- g[v].reports.erase(report_id);
- }
- }
-
- if (g[v].reports.empty()) {
+ report_id, g[v].index);
+ g[v].reports.erase(report_id);
+ }
+ }
+
+ if (g[v].reports.empty()) {
DEBUG_PRINTF("removed edges to accepts from %zu, no reports left\n",
- g[v].index);
- remove_edge(v, g.accept, g);
- remove_edge(v, g.acceptEod, g);
- modified = true;
- }
- }
-
- // If a reporter vertex has a self-loop, but otherwise only leads to accept
- // (note: NOT acceptEod) and has simple exhaustible reports, we can delete
- // the self-loop.
- for (const auto v : reporters) {
- if (hasOnlySelfLoopAndExhaustibleAccepts(g, rm, v)) {
- remove_edge(v, v, g);
- modified = true;
+ g[v].index);
+ remove_edge(v, g.accept, g);
+ remove_edge(v, g.acceptEod, g);
+ modified = true;
+ }
+ }
+
+ // If a reporter vertex has a self-loop, but otherwise only leads to accept
+ // (note: NOT acceptEod) and has simple exhaustible reports, we can delete
+ // the self-loop.
+ for (const auto v : reporters) {
+ if (hasOnlySelfLoopAndExhaustibleAccepts(g, rm, v)) {
+ remove_edge(v, v, g);
+ modified = true;
DEBUG_PRINTF("removed self-loop on %zu\n", g[v].index);
- }
- }
-
- if (!modified) {
- return;
- }
-
- pruneUseless(g);
-
- // We may have only removed self-loops, in which case pruneUseless wouldn't
- // renumber, so we do edge renumbering explicitly here.
+ }
+ }
+
+ if (!modified) {
+ return;
+ }
+
+ pruneUseless(g);
+
+ // We may have only removed self-loops, in which case pruneUseless wouldn't
+ // renumber, so we do edge renumbering explicitly here.
renumber_edges(g);
-}
-
-/** Removes the given Report ID from vertices connected to accept, and then
- * prunes useless vertices that have had their report sets reduced to empty. */
-void pruneReport(NGHolder &g, ReportID report) {
- set<NFAEdge> dead;
-
- for (const auto &e : in_edges_range(g.accept, g)) {
- NFAVertex u = source(e, g);
- auto &reports = g[u].reports;
- if (contains(reports, report)) {
- reports.erase(report);
- if (reports.empty()) {
- dead.insert(e);
- }
- }
- }
-
- for (const auto &e : in_edges_range(g.acceptEod, g)) {
- NFAVertex u = source(e, g);
- if (u == g.accept) {
- continue;
- }
- auto &reports = g[u].reports;
- if (contains(reports, report)) {
- reports.erase(report);
- if (reports.empty()) {
- dead.insert(e);
- }
- }
- }
-
- if (dead.empty()) {
- return;
- }
-
- remove_edges(dead, g);
- pruneUnreachable(g);
+}
+
+/** Removes the given Report ID from vertices connected to accept, and then
+ * prunes useless vertices that have had their report sets reduced to empty. */
+void pruneReport(NGHolder &g, ReportID report) {
+ set<NFAEdge> dead;
+
+ for (const auto &e : in_edges_range(g.accept, g)) {
+ NFAVertex u = source(e, g);
+ auto &reports = g[u].reports;
+ if (contains(reports, report)) {
+ reports.erase(report);
+ if (reports.empty()) {
+ dead.insert(e);
+ }
+ }
+ }
+
+ for (const auto &e : in_edges_range(g.acceptEod, g)) {
+ NFAVertex u = source(e, g);
+ if (u == g.accept) {
+ continue;
+ }
+ auto &reports = g[u].reports;
+ if (contains(reports, report)) {
+ reports.erase(report);
+ if (reports.empty()) {
+ dead.insert(e);
+ }
+ }
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
+ remove_edges(dead, g);
+ pruneUnreachable(g);
renumber_vertices(g);
renumber_edges(g);
-}
-
-/** Removes all Report IDs bar the given one from vertices connected to accept,
- * and then prunes useless vertices that have had their report sets reduced to
- * empty. */
-void pruneAllOtherReports(NGHolder &g, ReportID report) {
- set<NFAEdge> dead;
-
- for (const auto &e : in_edges_range(g.accept, g)) {
- NFAVertex u = source(e, g);
- auto &reports = g[u].reports;
- if (contains(reports, report)) {
- reports.clear();
- reports.insert(report);
- } else {
- reports.clear();
- dead.insert(e);
- }
- }
-
- for (const auto &e : in_edges_range(g.acceptEod, g)) {
- NFAVertex u = source(e, g);
- if (u == g.accept) {
- continue;
- }
- auto &reports = g[u].reports;
- if (contains(reports, report)) {
- reports.clear();
- reports.insert(report);
- } else {
- reports.clear();
- dead.insert(e);
- }
- }
-
- if (dead.empty()) {
- return;
- }
-
- remove_edges(dead, g);
- pruneUnreachable(g);
+}
+
+/** Removes all Report IDs bar the given one from vertices connected to accept,
+ * and then prunes useless vertices that have had their report sets reduced to
+ * empty. */
+void pruneAllOtherReports(NGHolder &g, ReportID report) {
+ set<NFAEdge> dead;
+
+ for (const auto &e : in_edges_range(g.accept, g)) {
+ NFAVertex u = source(e, g);
+ auto &reports = g[u].reports;
+ if (contains(reports, report)) {
+ reports.clear();
+ reports.insert(report);
+ } else {
+ reports.clear();
+ dead.insert(e);
+ }
+ }
+
+ for (const auto &e : in_edges_range(g.acceptEod, g)) {
+ NFAVertex u = source(e, g);
+ if (u == g.accept) {
+ continue;
+ }
+ auto &reports = g[u].reports;
+ if (contains(reports, report)) {
+ reports.clear();
+ reports.insert(report);
+ } else {
+ reports.clear();
+ dead.insert(e);
+ }
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
+ remove_edges(dead, g);
+ pruneUnreachable(g);
renumber_vertices(g);
renumber_edges(g);
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_prune.h b/contrib/libs/hyperscan/src/nfagraph/ng_prune.h
index 0dcef7c8d5..475953be3c 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_prune.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_prune.h
@@ -1,75 +1,75 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Functions for pruning unreachable vertices or reports from the graph.
- */
-
-#ifndef NG_PRUNE_H
-#define NG_PRUNE_H
-
-#include "ue2common.h"
-
-namespace ue2 {
-
-class NGHolder;
-class ReportManager;
-
-/** Remove any vertices that can't be reached by traversing the graph in
- * reverse from acceptEod. */
-void pruneUnreachable(NGHolder &g);
-
-/** Remove any vertices which can't be reached by traversing the graph forward
- * from start or in reverse from acceptEod. If \p renumber is false, no
- * vertex/edge renumbering is done. */
-void pruneUseless(NGHolder &g, bool renumber = true);
-
-/** Remove any vertices with empty reachability. */
-void pruneEmptyVertices(NGHolder &g);
-
-/** Remove any edges from vertices that generate accepts (for Highlander
- * graphs). */
-void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm);
-
-/**
- * Prune highlander reports that are dominated by earlier ones in the graph.
- */
-void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm);
-
-/** Removes the given Report ID from vertices connected to accept, and then
- * prunes useless vertices that have had their report sets reduced to empty. */
-void pruneReport(NGHolder &g, ReportID report);
-
-/** Removes all Report IDs bar the given one from vertices connected to accept,
- * and then prunes useless vertices that have had their report sets reduced to
- * empty. */
-void pruneAllOtherReports(NGHolder &g, ReportID report);
-
-} // namespace ue2
-
-#endif // NG_PRUNE_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Functions for pruning unreachable vertices or reports from the graph.
+ */
+
+#ifndef NG_PRUNE_H
+#define NG_PRUNE_H
+
+#include "ue2common.h"
+
+namespace ue2 {
+
+class NGHolder;
+class ReportManager;
+
+/** Remove any vertices that can't be reached by traversing the graph in
+ * reverse from acceptEod. */
+void pruneUnreachable(NGHolder &g);
+
+/** Remove any vertices which can't be reached by traversing the graph forward
+ * from start or in reverse from acceptEod. If \p renumber is false, no
+ * vertex/edge renumbering is done. */
+void pruneUseless(NGHolder &g, bool renumber = true);
+
+/** Remove any vertices with empty reachability. */
+void pruneEmptyVertices(NGHolder &g);
+
+/** Remove any edges from vertices that generate accepts (for Highlander
+ * graphs). */
+void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm);
+
+/**
+ * Prune highlander reports that are dominated by earlier ones in the graph.
+ */
+void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm);
+
+/** Removes the given Report ID from vertices connected to accept, and then
+ * prunes useless vertices that have had their report sets reduced to empty. */
+void pruneReport(NGHolder &g, ReportID report);
+
+/** Removes all Report IDs bar the given one from vertices connected to accept,
+ * and then prunes useless vertices that have had their report sets reduced to
+ * empty. */
+void pruneAllOtherReports(NGHolder &g, ReportID report);
+
+} // namespace ue2
+
+#endif // NG_PRUNE_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_puff.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_puff.cpp
index eb1f7114f6..984518b0fc 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_puff.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_puff.cpp
@@ -1,578 +1,578 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Puff construction from NGHolder.
- */
-#include "ng_puff.h"
-
-#include "grey.h"
-#include "ng_depth.h"
-#include "ng_holder.h"
-#include "ng_prune.h"
-#include "ng_repeat.h"
-#include "ng_reports.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "nfa/nfa_api_queue.h"
-#include "nfa/mpvcompile.h"
-#include "rose/rose_build.h"
-#include "util/compile_context.h"
-#include "util/graph_range.h"
-#include "util/report_manager.h"
-
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-static const unsigned MIN_PUFF_LENGTH = 16;
-static const unsigned HEAD_BACKOFF = 16;
-
-static
-size_t countChain(const NGHolder &g, NFAVertex v) {
- size_t count = 0;
- while (v) {
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Puff construction from NGHolder.
+ */
+#include "ng_puff.h"
+
+#include "grey.h"
+#include "ng_depth.h"
+#include "ng_holder.h"
+#include "ng_prune.h"
+#include "ng_repeat.h"
+#include "ng_reports.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "nfa/nfa_api_queue.h"
+#include "nfa/mpvcompile.h"
+#include "rose/rose_build.h"
+#include "util/compile_context.h"
+#include "util/graph_range.h"
+#include "util/report_manager.h"
+
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+static const unsigned MIN_PUFF_LENGTH = 16;
+static const unsigned HEAD_BACKOFF = 16;
+
+static
+size_t countChain(const NGHolder &g, NFAVertex v) {
+ size_t count = 0;
+ while (v) {
DEBUG_PRINTF("counting vertex %zu\n", g[v].index);
- if (is_special(v, g)) {
- break;
- }
-
- count++;
- v = getSoleDestVertex(g, v);
- }
- DEBUG_PRINTF("done %zu\n", count);
- return count;
-}
-
-static
-void wireNewAccepts(NGHolder &g, NFAVertex head,
- const flat_set<ReportID> &chain_reports) {
- for (auto u : inv_adjacent_vertices_range(head, g)) {
- if (is_special(u, g)) {
- continue;
- }
-
+ if (is_special(v, g)) {
+ break;
+ }
+
+ count++;
+ v = getSoleDestVertex(g, v);
+ }
+ DEBUG_PRINTF("done %zu\n", count);
+ return count;
+}
+
+static
+void wireNewAccepts(NGHolder &g, NFAVertex head,
+ const flat_set<ReportID> &chain_reports) {
+ for (auto u : inv_adjacent_vertices_range(head, g)) {
+ if (is_special(u, g)) {
+ continue;
+ }
+
DEBUG_PRINTF("adding edge: %zu -> accept\n", g[u].index);
- assert(!edge(u, g.accept, g).second);
- assert(!edge(u, g.acceptEod, g).second);
- add_edge(u, g.accept, g);
-
- // Replace reports with our chain reports.
- auto &u_reports = g[u].reports;
- u_reports.clear();
- u_reports.insert(chain_reports.begin(), chain_reports.end());
- }
-}
-
-static
-bool isFixedDepth(const NGHolder &g, NFAVertex v) {
- // If the vertex is reachable from startDs, it can't be fixed depth.
+ assert(!edge(u, g.accept, g).second);
+ assert(!edge(u, g.acceptEod, g).second);
+ add_edge(u, g.accept, g);
+
+ // Replace reports with our chain reports.
+ auto &u_reports = g[u].reports;
+ u_reports.clear();
+ u_reports.insert(chain_reports.begin(), chain_reports.end());
+ }
+}
+
+static
+bool isFixedDepth(const NGHolder &g, NFAVertex v) {
+ // If the vertex is reachable from startDs, it can't be fixed depth.
auto depthFromStartDs = calcDepthsFrom(g, g.startDs);
-
- u32 idx = g[v].index;
- const DepthMinMax &ds = depthFromStartDs.at(idx);
- if (ds.min.is_reachable()) {
- DEBUG_PRINTF("vertex reachable from startDs\n");
- return false;
- }
-
+
+ u32 idx = g[v].index;
+ const DepthMinMax &ds = depthFromStartDs.at(idx);
+ if (ds.min.is_reachable()) {
+ DEBUG_PRINTF("vertex reachable from startDs\n");
+ return false;
+ }
+
auto depthFromStart = calcDepthsFrom(g, g.start);
-
- /* we can still consider the head of a puff chain as at fixed depth if
- * it has a self-loop: so we look at all the preds of v (other than v
- * itself) */
-
- assert(v && !is_special(v, g));
-
- u32 count = 0;
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == v) {
- continue; // self-loop
- }
- count++;
-
- idx = g[u].index;
- const DepthMinMax &d = depthFromStart.at(idx);
- if (d.min != d.max) {
- return false;
- }
- }
-
- return count != 0; // at least one fixed-depth pred
-}
-
-static
-bool singleStart(const NGHolder &g) {
- set<NFAVertex> seen;
-
- for (auto v : adjacent_vertices_range(g.start, g)) {
- if (!is_special(v, g)) {
+
+ /* we can still consider the head of a puff chain as at fixed depth if
+ * it has a self-loop: so we look at all the preds of v (other than v
+ * itself) */
+
+ assert(v && !is_special(v, g));
+
+ u32 count = 0;
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == v) {
+ continue; // self-loop
+ }
+ count++;
+
+ idx = g[u].index;
+ const DepthMinMax &d = depthFromStart.at(idx);
+ if (d.min != d.max) {
+ return false;
+ }
+ }
+
+ return count != 0; // at least one fixed-depth pred
+}
+
+static
+bool singleStart(const NGHolder &g) {
+ set<NFAVertex> seen;
+
+ for (auto v : adjacent_vertices_range(g.start, g)) {
+ if (!is_special(v, g)) {
DEBUG_PRINTF("saw %zu\n", g[v].index);
- seen.insert(v);
- }
- }
- for (auto v : adjacent_vertices_range(g.startDs, g)) {
- if (!is_special(v, g)) {
+ seen.insert(v);
+ }
+ }
+ for (auto v : adjacent_vertices_range(g.startDs, g)) {
+ if (!is_special(v, g)) {
DEBUG_PRINTF("saw %zu\n", g[v].index);
- seen.insert(v);
- }
- }
-
- DEBUG_PRINTF("comp has %zu starts\n", seen.size());
-
- return seen.size() == 1;
-}
-
-static
-bool triggerResetsPuff(const NGHolder &g, NFAVertex head) {
- const CharReach puff_escapes = ~g[head].char_reach;
-
- for (auto u : inv_adjacent_vertices_range(head, g)) {
- if (!g[u].char_reach.isSubsetOf(puff_escapes)) {
+ seen.insert(v);
+ }
+ }
+
+ DEBUG_PRINTF("comp has %zu starts\n", seen.size());
+
+ return seen.size() == 1;
+}
+
+static
+bool triggerResetsPuff(const NGHolder &g, NFAVertex head) {
+ const CharReach puff_escapes = ~g[head].char_reach;
+
+ for (auto u : inv_adjacent_vertices_range(head, g)) {
+ if (!g[u].char_reach.isSubsetOf(puff_escapes)) {
DEBUG_PRINTF("no reset on trigger %zu %zu\n", g[u].index,
- g[head].index);
- return false;
- }
- }
-
- DEBUG_PRINTF("reset on trigger\n");
- return true;
-}
-
-/** ".*[X]{N}" can be treated as ".*[X]{N,}" (misc_opt does reverse transform)
- * */
-static
-bool triggerFloodsPuff(const NGHolder &g, NFAVertex head) {
+ g[head].index);
+ return false;
+ }
+ }
+
+ DEBUG_PRINTF("reset on trigger\n");
+ return true;
+}
+
+/** ".*[X]{N}" can be treated as ".*[X]{N,}" (misc_opt does reverse transform)
+ * */
+static
+bool triggerFloodsPuff(const NGHolder &g, NFAVertex head) {
DEBUG_PRINTF("head = %zu\n", g[head].index);
-
- const CharReach &puff_cr = g[head].char_reach;
-
- /* we can use the pred of the head as the base of our check if it the cr
- * matches as if
- * head cr subsetof pred cr: if head is being pushed on then puff must
- * still being pushed on
- * pred cr subsetof head cr: if the puff matches then head must be also
- * always be on if the is connected to a wide enough cyclic
- */
- if (proper_in_degree(head, g) == 1
- && puff_cr == g[getSoleSourceVertex(g, head)].char_reach) {
- head = getSoleSourceVertex(g, head);
+
+ const CharReach &puff_cr = g[head].char_reach;
+
+ /* we can use the pred of the head as the base of our check if it the cr
+ * matches as if
+ * head cr subsetof pred cr: if head is being pushed on then puff must
+ * still being pushed on
+ * pred cr subsetof head cr: if the puff matches then head must be also
+ * always be on if the is connected to a wide enough cyclic
+ */
+ if (proper_in_degree(head, g) == 1
+ && puff_cr == g[getSoleSourceVertex(g, head)].char_reach) {
+ head = getSoleSourceVertex(g, head);
DEBUG_PRINTF("temp new head = %zu\n", g[head].index);
- }
-
- for (auto s : inv_adjacent_vertices_range(head, g)) {
+ }
+
+ for (auto s : inv_adjacent_vertices_range(head, g)) {
DEBUG_PRINTF("s = %zu\n", g[s].index);
- if (!puff_cr.isSubsetOf(g[s].char_reach)) {
+ if (!puff_cr.isSubsetOf(g[s].char_reach)) {
DEBUG_PRINTF("no flood on trigger %zu %zu\n", g[s].index,
g[head].index);
- return false;
- }
-
- if (!hasSelfLoop(s, g) && s != g.start) {
- DEBUG_PRINTF("no self loop\n");
- return false;
- }
-
- if (s == g.start && !edge(g.startDs, head, g).second) {
- DEBUG_PRINTF("not float\n");
- return false;
- }
- }
-
- DEBUG_PRINTF("reset on trigger\n");
- return true;
-}
-
-static
-u32 allowedSquashDistance(const CharReach &cr, u32 min_width, const NGHolder &g,
- NFAVertex pv, bool prefilter) {
- CharReach accept_cr;
- DEBUG_PRINTF("hello |cr|=%zu %d\n", cr.count(), (int)cr.find_first());
-
- if (prefilter) {
- /* a later prefilter stage make weaken the lead up so we can't be sure
- * that all the triggers will be squashing the puffette. */
- return 0;
- }
-
- /* TODO: inspect further back in the pattern */
- for (auto u : inv_adjacent_vertices_range(pv, g)) {
- accept_cr |= g[u].char_reach;
- }
-
- DEBUG_PRINTF("|accept_cr|=%zu\n", accept_cr.count());
-
- if ((accept_cr & cr).any()) {
- return 0; /* the accept byte doesn't always kill the puffette. TODO:
- * maybe if we look further back we could find something that
- * would kill the puffette... */
- }
- DEBUG_PRINTF("returning squash distance of %u\n", min_width);
- return min_width;
-}
-
-/** Gives a stronger puff trigger when the trigger is connected to a wide
- * cyclic state (aside from sds) */
-static
-void improveHead(NGHolder &g, NFAVertex *a, vector<NFAVertex> *nodes) {
- DEBUG_PRINTF("attempting to improve puff trigger\n");
- assert(!nodes->empty());
- const CharReach &puff_cr = g[nodes->back()].char_reach;
- if (puff_cr.all()) {
- return; /* we can't really do much with this one */
- }
-
- /* add the runway */
- DEBUG_PRINTF("backing off - allowing a decent header\n");
- assert(nodes->size() > HEAD_BACKOFF);
- for (u32 i = 0; i < HEAD_BACKOFF - 1; i++) {
- nodes->pop_back();
- }
- *a = nodes->back();
- nodes->pop_back();
-}
-
-static
-void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv,
- const CharReach &cr, const ReportID report, u32 width,
- bool fixed_depth, bool unbounded, bool auto_restart,
- RoseBuild &rose, ReportManager &rm,
- flat_set<ReportID> &chain_reports, bool prefilter) {
- DEBUG_PRINTF("constructing Puff for report %u\n", report);
+ return false;
+ }
+
+ if (!hasSelfLoop(s, g) && s != g.start) {
+ DEBUG_PRINTF("no self loop\n");
+ return false;
+ }
+
+ if (s == g.start && !edge(g.startDs, head, g).second) {
+ DEBUG_PRINTF("not float\n");
+ return false;
+ }
+ }
+
+ DEBUG_PRINTF("reset on trigger\n");
+ return true;
+}
+
+static
+u32 allowedSquashDistance(const CharReach &cr, u32 min_width, const NGHolder &g,
+ NFAVertex pv, bool prefilter) {
+ CharReach accept_cr;
+ DEBUG_PRINTF("hello |cr|=%zu %d\n", cr.count(), (int)cr.find_first());
+
+ if (prefilter) {
+ /* a later prefilter stage make weaken the lead up so we can't be sure
+ * that all the triggers will be squashing the puffette. */
+ return 0;
+ }
+
+ /* TODO: inspect further back in the pattern */
+ for (auto u : inv_adjacent_vertices_range(pv, g)) {
+ accept_cr |= g[u].char_reach;
+ }
+
+ DEBUG_PRINTF("|accept_cr|=%zu\n", accept_cr.count());
+
+ if ((accept_cr & cr).any()) {
+ return 0; /* the accept byte doesn't always kill the puffette. TODO:
+ * maybe if we look further back we could find something that
+ * would kill the puffette... */
+ }
+ DEBUG_PRINTF("returning squash distance of %u\n", min_width);
+ return min_width;
+}
+
+/** Gives a stronger puff trigger when the trigger is connected to a wide
+ * cyclic state (aside from sds) */
+static
+void improveHead(NGHolder &g, NFAVertex *a, vector<NFAVertex> *nodes) {
+ DEBUG_PRINTF("attempting to improve puff trigger\n");
+ assert(!nodes->empty());
+ const CharReach &puff_cr = g[nodes->back()].char_reach;
+ if (puff_cr.all()) {
+ return; /* we can't really do much with this one */
+ }
+
+ /* add the runway */
+ DEBUG_PRINTF("backing off - allowing a decent header\n");
+ assert(nodes->size() > HEAD_BACKOFF);
+ for (u32 i = 0; i < HEAD_BACKOFF - 1; i++) {
+ nodes->pop_back();
+ }
+ *a = nodes->back();
+ nodes->pop_back();
+}
+
+static
+void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv,
+ const CharReach &cr, const ReportID report, u32 width,
+ bool fixed_depth, bool unbounded, bool auto_restart,
+ RoseBuild &rose, ReportManager &rm,
+ flat_set<ReportID> &chain_reports, bool prefilter) {
+ DEBUG_PRINTF("constructing Puff for report %u\n", report);
DEBUG_PRINTF("a = %zu\n", g[a].index);
-
+
const Report &puff_report = rm.getReport(report);
const bool simple_exhaust = isSimpleExhaustible(puff_report);
- const bool pureAnchored = a == g.start && singleStart(g);
- if (!pureAnchored) {
- if (a == g.startDs || a == g.start) {
- DEBUG_PRINTF("add outfix ar(false)\n");
-
+ const bool pureAnchored = a == g.start && singleStart(g);
+ if (!pureAnchored) {
+ if (a == g.startDs || a == g.start) {
+ DEBUG_PRINTF("add outfix ar(false)\n");
+
raw_puff rp(width, unbounded, report, cr, auto_restart,
simple_exhaust);
- rose.addOutfix(rp);
- return;
- }
-
- DEBUG_PRINTF("add chain tail\n");
- u32 qi = ~0U;
- u32 event = MQE_TOP;
- raw_puff rp(width, unbounded, report, cr);
- rose.addChainTail(rp, &qi, &event);
- assert(qi != ~0U);
- u32 squashDistance = allowedSquashDistance(cr, width, g, puffv,
- prefilter);
-
+ rose.addOutfix(rp);
+ return;
+ }
+
+ DEBUG_PRINTF("add chain tail\n");
+ u32 qi = ~0U;
+ u32 event = MQE_TOP;
+ raw_puff rp(width, unbounded, report, cr);
+ rose.addChainTail(rp, &qi, &event);
+ assert(qi != ~0U);
+ u32 squashDistance = allowedSquashDistance(cr, width, g, puffv,
+ prefilter);
+
Report ir = makeMpvTrigger(event, squashDistance);
- /* only need to trigger once if floatingUnboundedDot */
- bool floatingUnboundedDot = unbounded && cr.all() && !fixed_depth;
- if (floatingUnboundedDot) {
- ir.ekey = rm.getUnassociatedExhaustibleKey();
- }
- ReportID id = rm.getInternalId(ir);
- chain_reports.insert(id);
- } else {
- DEBUG_PRINTF("add outfix ar(%d)\n", (int)auto_restart);
- assert(!auto_restart || unbounded);
+ /* only need to trigger once if floatingUnboundedDot */
+ bool floatingUnboundedDot = unbounded && cr.all() && !fixed_depth;
+ if (floatingUnboundedDot) {
+ ir.ekey = rm.getUnassociatedExhaustibleKey();
+ }
+ ReportID id = rm.getInternalId(ir);
+ chain_reports.insert(id);
+ } else {
+ DEBUG_PRINTF("add outfix ar(%d)\n", (int)auto_restart);
+ assert(!auto_restart || unbounded);
raw_puff rp(width, unbounded, report, cr, auto_restart, simple_exhaust);
- rose.addOutfix(rp);
- }
-}
-
-static
-bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a,
- set<NFAVertex> &dead, const CompileContext &cc,
- bool prefilter) {
- DEBUG_PRINTF("hello\n");
- vector<NFAVertex> nodes;
- const CharReach &cr = g[a].char_reach;
- bool isDot = cr.all();
- bool unbounded = false;
- bool exhaustible = can_exhaust(g, rm);
-
- while (true) {
- if (is_special(a, g)) {
- DEBUG_PRINTF("stopped puffing due to special vertex\n");
- break;
- }
-
- if (g[a].char_reach != cr) {
- DEBUG_PRINTF("stopped puffing due to change in character "
- "reachability\n");
- break;
- }
-
- if (proper_in_degree(a, g) != 1) {
- DEBUG_PRINTF("stopped puffing due to in degree != 1\n");
- break;
- }
-
- size_t outDegree = out_degree(a, g);
- if (outDegree != 1 && (!hasSelfLoop(a, g) || outDegree != 2)) {
- DEBUG_PRINTF("stopping puffing due to out degree\n");
- break;
- }
-
- if (hasSelfLoop(a, g)) {
- DEBUG_PRINTF("has self-loop, marking unbounded\n");
- unbounded = true;
- }
-
- nodes.push_back(a);
+ rose.addOutfix(rp);
+ }
+}
+
+static
+bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a,
+ set<NFAVertex> &dead, const CompileContext &cc,
+ bool prefilter) {
+ DEBUG_PRINTF("hello\n");
+ vector<NFAVertex> nodes;
+ const CharReach &cr = g[a].char_reach;
+ bool isDot = cr.all();
+ bool unbounded = false;
+ bool exhaustible = can_exhaust(g, rm);
+
+ while (true) {
+ if (is_special(a, g)) {
+ DEBUG_PRINTF("stopped puffing due to special vertex\n");
+ break;
+ }
+
+ if (g[a].char_reach != cr) {
+ DEBUG_PRINTF("stopped puffing due to change in character "
+ "reachability\n");
+ break;
+ }
+
+ if (proper_in_degree(a, g) != 1) {
+ DEBUG_PRINTF("stopped puffing due to in degree != 1\n");
+ break;
+ }
+
+ size_t outDegree = out_degree(a, g);
+ if (outDegree != 1 && (!hasSelfLoop(a, g) || outDegree != 2)) {
+ DEBUG_PRINTF("stopping puffing due to out degree\n");
+ break;
+ }
+
+ if (hasSelfLoop(a, g)) {
+ DEBUG_PRINTF("has self-loop, marking unbounded\n");
+ unbounded = true;
+ }
+
+ nodes.push_back(a);
DEBUG_PRINTF("vertex %zu has in_degree %zu\n", g[a].index,
- in_degree(a, g));
-
- a = getSoleSourceVertex(g, a);
-
- assert(a); /* already checked that old a had a proper in degree of 1 */
-
- // Snark: we can't handle this case, because we can only handle a
- // single report ID on a vertex
- if (is_match_vertex(a, g)) {
- DEBUG_PRINTF("stop puffing due to vertex that leads to accept\n");
- if (!nodes.empty()) {
- nodes.pop_back();
- }
- break;
- }
- }
-
- if (!nodes.empty() && proper_in_degree(nodes.back(), g) != 1) {
- for (auto u : inv_adjacent_vertices_range(nodes.back(), g)) {
- if (is_special(u, g)) {
- DEBUG_PRINTF("pop\n");
- a = nodes.back();
- nodes.pop_back();
- break;
- }
- }
- }
-
- if (a != g.startDs && edge(g.startDs, a, g).second
- && proper_out_degree(a, g) == 1
- && g[a].char_reach == cr) {
- nodes.push_back(a);
- a = g.startDs;
- }
-
- bool auto_restart = false;
-
+ in_degree(a, g));
+
+ a = getSoleSourceVertex(g, a);
+
+ assert(a); /* already checked that old a had a proper in degree of 1 */
+
+ // Snark: we can't handle this case, because we can only handle a
+ // single report ID on a vertex
+ if (is_match_vertex(a, g)) {
+ DEBUG_PRINTF("stop puffing due to vertex that leads to accept\n");
+ if (!nodes.empty()) {
+ nodes.pop_back();
+ }
+ break;
+ }
+ }
+
+ if (!nodes.empty() && proper_in_degree(nodes.back(), g) != 1) {
+ for (auto u : inv_adjacent_vertices_range(nodes.back(), g)) {
+ if (is_special(u, g)) {
+ DEBUG_PRINTF("pop\n");
+ a = nodes.back();
+ nodes.pop_back();
+ break;
+ }
+ }
+ }
+
+ if (a != g.startDs && edge(g.startDs, a, g).second
+ && proper_out_degree(a, g) == 1
+ && g[a].char_reach == cr) {
+ nodes.push_back(a);
+ a = g.startDs;
+ }
+
+ bool auto_restart = false;
+
DEBUG_PRINTF("a = %zu\n", g[a].index);
-
- if (nodes.size() < MIN_PUFF_LENGTH || a == g.startDs) {
+
+ if (nodes.size() < MIN_PUFF_LENGTH || a == g.startDs) {
DEBUG_PRINTF("bad %zu %zu\n", nodes.size(), g[a].index);
- if (nodes.size() < MIN_PUFF_LENGTH) {
- return false;
- } else {
- DEBUG_PRINTF("mark unbounded\n");
- unbounded = true;
- a = g.start;
- auto_restart = !isDot;
- }
- }
-
- bool supported = false;
- bool fixed_depth = isFixedDepth(g, nodes.back());
-
- if (exhaustible) {
- supported = true;
- } else if (fixed_depth) {
- supported = true;
- } else if (unbounded) {
- /* any C{n, } can be supported as all ranges will be squashed together
- * only need to track the first */
- supported = true;
- } else if (triggerResetsPuff(g, nodes.back())) {
- supported = true;
- } else if (triggerFloodsPuff(g, nodes.back())) {
- DEBUG_PRINTF("trigger floods puff\n");
- supported = true;
- unbounded = true;
- }
-
- if (!supported) {
- DEBUG_PRINTF("not supported\n");
- return false;
- }
-
- if (cc.grey.puffImproveHead && a != g.start) {
- if (edge(g.startDs, a, g).second) {
- goto skip_improve; /* direct sds cases are better handled by auto
- * restarting puffettes */
- }
-
- if (fixed_depth) {
- goto skip_improve; /* no danger of trigger floods */
- }
-
- /* if we come after something literalish don't bother */
- if (g[a].char_reach.count() <= 2
- && in_degree(a, g) == 1
- && g[getSoleSourceVertex(g, a)].char_reach.count() <= 2) {
- goto skip_improve;
- }
-
- if (nodes.size() < MIN_PUFF_LENGTH + HEAD_BACKOFF) {
- return false; /* not enough of the puff left to worth bothering
- about */
- }
-
- improveHead(g, &a, &nodes);
- skip_improve:;
- }
-
- assert(!nodes.empty());
- const auto &reports = g[nodes[0]].reports;
- assert(!reports.empty());
-
- for (auto report : reports) {
- const Report &ir = rm.getReport(report);
- const bool highlander = ir.ekey != INVALID_EKEY;
- if (!unbounded && highlander && !isSimpleExhaustible(ir)) {
- DEBUG_PRINTF("report %u is bounded highlander but not simple "
- "exhaustible\n",
- report);
- return false;
- }
-
- if (ir.type == INTERNAL_ROSE_CHAIN) {
- DEBUG_PRINTF("puffettes cannot be chained together\n");
- return false;
- }
- }
-
- NFAVertex puffv = nodes.back();
+ if (nodes.size() < MIN_PUFF_LENGTH) {
+ return false;
+ } else {
+ DEBUG_PRINTF("mark unbounded\n");
+ unbounded = true;
+ a = g.start;
+ auto_restart = !isDot;
+ }
+ }
+
+ bool supported = false;
+ bool fixed_depth = isFixedDepth(g, nodes.back());
+
+ if (exhaustible) {
+ supported = true;
+ } else if (fixed_depth) {
+ supported = true;
+ } else if (unbounded) {
+ /* any C{n, } can be supported as all ranges will be squashed together
+ * only need to track the first */
+ supported = true;
+ } else if (triggerResetsPuff(g, nodes.back())) {
+ supported = true;
+ } else if (triggerFloodsPuff(g, nodes.back())) {
+ DEBUG_PRINTF("trigger floods puff\n");
+ supported = true;
+ unbounded = true;
+ }
+
+ if (!supported) {
+ DEBUG_PRINTF("not supported\n");
+ return false;
+ }
+
+ if (cc.grey.puffImproveHead && a != g.start) {
+ if (edge(g.startDs, a, g).second) {
+ goto skip_improve; /* direct sds cases are better handled by auto
+ * restarting puffettes */
+ }
+
+ if (fixed_depth) {
+ goto skip_improve; /* no danger of trigger floods */
+ }
+
+ /* if we come after something literalish don't bother */
+ if (g[a].char_reach.count() <= 2
+ && in_degree(a, g) == 1
+ && g[getSoleSourceVertex(g, a)].char_reach.count() <= 2) {
+ goto skip_improve;
+ }
+
+ if (nodes.size() < MIN_PUFF_LENGTH + HEAD_BACKOFF) {
+ return false; /* not enough of the puff left to worth bothering
+ about */
+ }
+
+ improveHead(g, &a, &nodes);
+ skip_improve:;
+ }
+
+ assert(!nodes.empty());
+ const auto &reports = g[nodes[0]].reports;
+ assert(!reports.empty());
+
+ for (auto report : reports) {
+ const Report &ir = rm.getReport(report);
+ const bool highlander = ir.ekey != INVALID_EKEY;
+ if (!unbounded && highlander && !isSimpleExhaustible(ir)) {
+ DEBUG_PRINTF("report %u is bounded highlander but not simple "
+ "exhaustible\n",
+ report);
+ return false;
+ }
+
+ if (ir.type == INTERNAL_ROSE_CHAIN) {
+ DEBUG_PRINTF("puffettes cannot be chained together\n");
+ return false;
+ }
+ }
+
+ NFAVertex puffv = nodes.back();
assert(puffv != NGHolder::null_vertex());
- u32 width = countChain(g, nodes.back());
-
- flat_set<ReportID> chain_reports;
-
- for (auto report : reports) {
- constructPuff(g, a, puffv, cr, report, width, fixed_depth, unbounded,
- auto_restart, rose, rm, chain_reports, prefilter);
- }
-
- if (!chain_reports.empty()) {
- wireNewAccepts(g, puffv, chain_reports);
- }
-
- dead.insert(nodes.begin(), nodes.end());
- return true;
-}
-
-bool splitOffPuffs(RoseBuild &rose, ReportManager &rm, NGHolder &g,
- bool prefilter, const CompileContext &cc) {
- if (!cc.grey.allowPuff) {
- return false;
- }
-
- size_t count = 0;
- set<NFAVertex> dead;
-
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- if (doComponent(rose, rm, g, v, dead, cc, prefilter)) {
- count++;
- }
- }
-
- if (!dead.empty()) {
- remove_vertices(dead, g);
- pruneUseless(g);
- }
-
- DEBUG_PRINTF("puffs: %zu\n", count);
- return num_vertices(g) <= N_SPECIALS;
-}
-
-bool isPuffable(const NGHolder &g, bool fixed_depth,
- const ReportManager &rm, const Grey &grey) {
- if (!grey.allowPuff) {
- return false;
- }
-
- if (!onlyOneTop(g)) {
- DEBUG_PRINTF("more than one top\n");
- return false;
- }
-
- const set<ReportID> reports = all_reports(g);
- if (reports.size() != 1) {
- DEBUG_PRINTF("too many reports\n");
- return false;
- }
-
- const Report &ir = rm.getReport(*reports.begin());
-
- if (ir.type == INTERNAL_ROSE_CHAIN) {
- DEBUG_PRINTF("puffettes cannot be chained together\n");
- return false;
- }
-
- PureRepeat repeat;
- if (!isPureRepeat(g, repeat)) {
- DEBUG_PRINTF("not pure bounded repeat\n");
- return false;
- }
-
- if (repeat.bounds.min == depth(0)) {
- DEBUG_PRINTF("repeat min bound is zero\n");
- return false;
- }
-
- // We can puff if:
- // (a) repeat is {N,}; or
- // (b) repeat is {N} and fixed-depth, or highlander (and will accept the
- // first match)
-
- DEBUG_PRINTF("repeat is %s\n", repeat.bounds.str().c_str());
-
- if (repeat.bounds.max.is_infinite()) {
- return true;
- }
-
- if (repeat.bounds.min == repeat.bounds.max) {
- if (fixed_depth) {
- DEBUG_PRINTF("fixed depth\n");
- return true;
- }
-
- const bool highlander = ir.ekey != INVALID_EKEY;
-
- // If we're highlander, we must be simple-exhaustible as well.
- if (highlander && isSimpleExhaustible(ir)) {
- return true;
- }
- }
-
- return false;
-}
-
-} // namespace ue2
+ u32 width = countChain(g, nodes.back());
+
+ flat_set<ReportID> chain_reports;
+
+ for (auto report : reports) {
+ constructPuff(g, a, puffv, cr, report, width, fixed_depth, unbounded,
+ auto_restart, rose, rm, chain_reports, prefilter);
+ }
+
+ if (!chain_reports.empty()) {
+ wireNewAccepts(g, puffv, chain_reports);
+ }
+
+ dead.insert(nodes.begin(), nodes.end());
+ return true;
+}
+
+bool splitOffPuffs(RoseBuild &rose, ReportManager &rm, NGHolder &g,
+ bool prefilter, const CompileContext &cc) {
+ if (!cc.grey.allowPuff) {
+ return false;
+ }
+
+ size_t count = 0;
+ set<NFAVertex> dead;
+
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ if (doComponent(rose, rm, g, v, dead, cc, prefilter)) {
+ count++;
+ }
+ }
+
+ if (!dead.empty()) {
+ remove_vertices(dead, g);
+ pruneUseless(g);
+ }
+
+ DEBUG_PRINTF("puffs: %zu\n", count);
+ return num_vertices(g) <= N_SPECIALS;
+}
+
+bool isPuffable(const NGHolder &g, bool fixed_depth,
+ const ReportManager &rm, const Grey &grey) {
+ if (!grey.allowPuff) {
+ return false;
+ }
+
+ if (!onlyOneTop(g)) {
+ DEBUG_PRINTF("more than one top\n");
+ return false;
+ }
+
+ const set<ReportID> reports = all_reports(g);
+ if (reports.size() != 1) {
+ DEBUG_PRINTF("too many reports\n");
+ return false;
+ }
+
+ const Report &ir = rm.getReport(*reports.begin());
+
+ if (ir.type == INTERNAL_ROSE_CHAIN) {
+ DEBUG_PRINTF("puffettes cannot be chained together\n");
+ return false;
+ }
+
+ PureRepeat repeat;
+ if (!isPureRepeat(g, repeat)) {
+ DEBUG_PRINTF("not pure bounded repeat\n");
+ return false;
+ }
+
+ if (repeat.bounds.min == depth(0)) {
+ DEBUG_PRINTF("repeat min bound is zero\n");
+ return false;
+ }
+
+ // We can puff if:
+ // (a) repeat is {N,}; or
+ // (b) repeat is {N} and fixed-depth, or highlander (and will accept the
+ // first match)
+
+ DEBUG_PRINTF("repeat is %s\n", repeat.bounds.str().c_str());
+
+ if (repeat.bounds.max.is_infinite()) {
+ return true;
+ }
+
+ if (repeat.bounds.min == repeat.bounds.max) {
+ if (fixed_depth) {
+ DEBUG_PRINTF("fixed depth\n");
+ return true;
+ }
+
+ const bool highlander = ir.ekey != INVALID_EKEY;
+
+ // If we're highlander, we must be simple-exhaustible as well.
+ if (highlander && isSimpleExhaustible(ir)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_puff.h b/contrib/libs/hyperscan/src/nfagraph/ng_puff.h
index af0237a594..c31e7540ba 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_puff.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_puff.h
@@ -1,56 +1,56 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Puff construction from NGHolder.
- */
-
-#ifndef NG_PUFF_H
-#define NG_PUFF_H
-
-namespace ue2 {
-
-struct CompileContext;
-struct Grey;
-class RoseBuild;
-class NGHolder;
-class ReportManager;
-
-/** \brief Split off portions of the graph that are implementable as Puff
- * engines. Returns true if the entire graph is consumed. */
-bool splitOffPuffs(RoseBuild &rose, ReportManager &rm, NGHolder &g,
- bool prefilter, const CompileContext &cc);
-
-/** \brief True if the entire graph in \a g could be constructed as a Puff
- * engine. */
-bool isPuffable(const NGHolder &g, bool fixed_depth, const ReportManager &rm,
- const Grey &grey);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Puff construction from NGHolder.
+ */
+
+#ifndef NG_PUFF_H
+#define NG_PUFF_H
+
+namespace ue2 {
+
+struct CompileContext;
+struct Grey;
+class RoseBuild;
+class NGHolder;
+class ReportManager;
+
+/** \brief Split off portions of the graph that are implementable as Puff
+ * engines. Returns true if the entire graph is consumed. */
+bool splitOffPuffs(RoseBuild &rose, ReportManager &rm, NGHolder &g,
+ bool prefilter, const CompileContext &cc);
+
+/** \brief True if the entire graph in \a g could be constructed as a Puff
+ * engine. */
+bool isPuffable(const NGHolder &g, bool fixed_depth, const ReportManager &rm,
+ const Grey &grey);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.cpp
index fc46907024..06b9daeeca 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.cpp
@@ -1,899 +1,899 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief NFA graph reductions.
- *
- * This code attempts to make the NFA graph smaller by performing a number of
- * local transformations:
- *
- * ### (1) removal of redundant vertices:
- *
- * v is redundant wrt to u if succ(v) is a subset of succ(u)
- * AND pred(v) is a subset of pred(u)
- * AND cr(v) is a subset of cr(u)
- *
- * ### (2) 'diamond' transformation:
- *
- * given succ(v) == succ(u) and pred(v) == pred(u),
- * v and u can be replaced by w with succ(w) = succ(v), pred(w) = pred(v),
- * and cr(w) = union(cr(v), cr(u))
- *
- * ### (3) locally identifiable left equivalence:
- *
- * given pred(v) == pred(u) (**) and cr(v) == cr(u),
- * v and u can be replaced by w with pred(w) = pred(v), cr(w) = cr(v),
- * and succ(w) = union(succ(v), succ(u))
- *
- * ### (4) locally identifiable right equivalence:
- *
- * given succ(v) == succ(u) (**) and cr(v) == cr(u),
- * v and u can be replaced by w with succ(w) = succ(v), cr(w) = cr(v),
- * and pred(w) = union(pred(v), pred(u))
- *
- * NOTE (**): for left and right equivalence, we can also do the transform if
- * set(u) contains u, set(v) contains v and the sets are otherwise equal. This
- * enables equivalent vertices with self-loops to be merged.
- *
- * If v and u raise accepts, they can only be merged if they raise the same
- * report IDs.
- *
- * Transformations are applied repeatedly until the graph stops changing.
- *
- * Note that the final graph may depend on the order in which these
- * transformations are applied. In order to reduce the non-determinism the
- * following order is imposed: (1); (2); (3) + (4).
- */
-#include "ng_redundancy.h"
-
-#include "ng_holder.h"
-#include "ng_calc_components.h"
-#include "ng_dominators.h"
-#include "ng_prune.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/container.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief NFA graph reductions.
+ *
+ * This code attempts to make the NFA graph smaller by performing a number of
+ * local transformations:
+ *
+ * ### (1) removal of redundant vertices:
+ *
+ * v is redundant wrt to u if succ(v) is a subset of succ(u)
+ * AND pred(v) is a subset of pred(u)
+ * AND cr(v) is a subset of cr(u)
+ *
+ * ### (2) 'diamond' transformation:
+ *
+ * given succ(v) == succ(u) and pred(v) == pred(u),
+ * v and u can be replaced by w with succ(w) = succ(v), pred(w) = pred(v),
+ * and cr(w) = union(cr(v), cr(u))
+ *
+ * ### (3) locally identifiable left equivalence:
+ *
+ * given pred(v) == pred(u) (**) and cr(v) == cr(u),
+ * v and u can be replaced by w with pred(w) = pred(v), cr(w) = cr(v),
+ * and succ(w) = union(succ(v), succ(u))
+ *
+ * ### (4) locally identifiable right equivalence:
+ *
+ * given succ(v) == succ(u) (**) and cr(v) == cr(u),
+ * v and u can be replaced by w with succ(w) = succ(v), cr(w) = cr(v),
+ * and pred(w) = union(pred(v), pred(u))
+ *
+ * NOTE (**): for left and right equivalence, we can also do the transform if
+ * set(u) contains u, set(v) contains v and the sets are otherwise equal. This
+ * enables equivalent vertices with self-loops to be merged.
+ *
+ * If v and u raise accepts, they can only be merged if they raise the same
+ * report IDs.
+ *
+ * Transformations are applied repeatedly until the graph stops changing.
+ *
+ * Note that the final graph may depend on the order in which these
+ * transformations are applied. In order to reduce the non-determinism the
+ * following order is imposed: (1); (2); (3) + (4).
+ */
+#include "ng_redundancy.h"
+
+#include "ng_holder.h"
+#include "ng_calc_components.h"
+#include "ng_dominators.h"
+#include "ng_prune.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/container.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
-
-#include <algorithm>
-#include <cassert>
-#include <map>
-#include <set>
-#include <vector>
-
-#include <boost/graph/depth_first_search.hpp>
-#include <boost/graph/reverse_graph.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
-namespace {
-
-/** Precalculated (and maintained) information about a vertex. */
-class VertexInfo {
-public:
- flat_set<NFAVertex> pred; //!< predecessors of this vertex
- flat_set<NFAVertex> succ; //!< successors of this vertex
- bool isAccept = false; //!< does this vertex lead to accept?
- bool isRemoved = false; //!< have we already removed this vertex?
-
- size_t inDegree() const { return pred.size(); }
- size_t outDegree() const { return succ.size(); }
-};
-
-class VertexInfoMap {
-public:
- explicit VertexInfoMap(const NGHolder &gg)
- : g(gg), infos(num_vertices(gg)) {}
- VertexInfo &operator[](NFAVertex v) {
- u32 i = g[v].index;
- assert(i < infos.size());
- return infos[i];
- }
-
- const VertexInfo &operator[](NFAVertex v) const {
- u32 i = g[v].index;
- assert(i < infos.size());
- return infos[i];
- }
-
-private:
- const NGHolder &g;
- vector<VertexInfo> infos;
-};
-
-} // namespace
-
-/** Populates the info map with their predecessor and successor states, and
- * whether they are accept states. */
-static
-void populateContainers(const NGHolder &g, VertexInfoMap &infoMap) {
- for (auto v : vertices_range(g)) {
- VertexInfo &info = infoMap[v];
- assert(info.pred.empty() && info.succ.empty());
-
- // Build successor and predecessor sets
- insert(&info.pred, inv_adjacent_vertices(v, g));
- insert(&info.succ, adjacent_vertices(v, g));
-
- // Note whether the vertex is an accept state
- if (!is_special(v, g)) {
- if (contains(info.succ, g.accept)
- || contains(info.succ, g.acceptEod)) {
- info.isAccept = true;
- }
- }
- }
-}
-
-/** Helper function to take the intersection of two sorted vertex sets
- * in-place. */
-static
-void inplaceIntersection(vector<NFAVertex> &vset1,
- const flat_set<NFAVertex> &vset2) {
+#include "util/graph_range.h"
+
+#include <algorithm>
+#include <cassert>
+#include <map>
+#include <set>
+#include <vector>
+
+#include <boost/graph/depth_first_search.hpp>
+#include <boost/graph/reverse_graph.hpp>
+
+using namespace std;
+
+namespace ue2 {
+
+namespace {
+
+/** Precalculated (and maintained) information about a vertex. */
+class VertexInfo {
+public:
+ flat_set<NFAVertex> pred; //!< predecessors of this vertex
+ flat_set<NFAVertex> succ; //!< successors of this vertex
+ bool isAccept = false; //!< does this vertex lead to accept?
+ bool isRemoved = false; //!< have we already removed this vertex?
+
+ size_t inDegree() const { return pred.size(); }
+ size_t outDegree() const { return succ.size(); }
+};
+
+class VertexInfoMap {
+public:
+ explicit VertexInfoMap(const NGHolder &gg)
+ : g(gg), infos(num_vertices(gg)) {}
+ VertexInfo &operator[](NFAVertex v) {
+ u32 i = g[v].index;
+ assert(i < infos.size());
+ return infos[i];
+ }
+
+ const VertexInfo &operator[](NFAVertex v) const {
+ u32 i = g[v].index;
+ assert(i < infos.size());
+ return infos[i];
+ }
+
+private:
+ const NGHolder &g;
+ vector<VertexInfo> infos;
+};
+
+} // namespace
+
+/** Populates the info map with their predecessor and successor states, and
+ * whether they are accept states. */
+static
+void populateContainers(const NGHolder &g, VertexInfoMap &infoMap) {
+ for (auto v : vertices_range(g)) {
+ VertexInfo &info = infoMap[v];
+ assert(info.pred.empty() && info.succ.empty());
+
+ // Build successor and predecessor sets
+ insert(&info.pred, inv_adjacent_vertices(v, g));
+ insert(&info.succ, adjacent_vertices(v, g));
+
+ // Note whether the vertex is an accept state
+ if (!is_special(v, g)) {
+ if (contains(info.succ, g.accept)
+ || contains(info.succ, g.acceptEod)) {
+ info.isAccept = true;
+ }
+ }
+ }
+}
+
+/** Helper function to take the intersection of two sorted vertex sets
+ * in-place. */
+static
+void inplaceIntersection(vector<NFAVertex> &vset1,
+ const flat_set<NFAVertex> &vset2) {
const NFAVertex GONE = NGHolder::null_vertex();
-
- vector<NFAVertex>::iterator it = vset1.begin(), ite = vset1.end();
- flat_set<NFAVertex>::const_iterator jt = vset2.begin(), jte = vset2.end();
-
- while ((it != ite) && (jt != jte)) {
- assert(*it != GONE);
-
- if (*it < *jt) {
- // present in vset1 but not in vset2. Set to null, remove in a
- // second pass.
- *it = GONE;
- ++it;
- } else if (*jt < *it) {
- // present in vset2 but not in vset1, skip.
- ++jt;
- } else {
- // present in both sets.
- ++it; ++jt;
- }
- }
-
- // Left overs are only in that set.
- vset1.erase(it, ite);
-
- // Remove nulls created above.
- vset1.erase(remove(vset1.begin(), vset1.end(), GONE), vset1.end());
-}
-
-/** Find the intersection of the successors of our predecessors. */
-static
-void succPredIntersection(const NFAVertex v, const flat_set<NFAVertex> &predSet,
- const VertexInfoMap &infoMap,
- vector<NFAVertex> &intersection,
- bool considerSelf = true /* follow self loops */) {
- /* find a good seed for the intersection */
- const flat_set<NFAVertex> *best = nullptr;
- for (auto u : predSet) {
- if (!considerSelf && u == v) {
- continue;
- }
-
- const flat_set<NFAVertex> &succSet = infoMap[u].succ;
- if (!best || succSet.size() <= best->size()) {
- best = &succSet;
-
- // Break out if we've reduced our intersection to [v]
- if (best->size() == 1) {
- assert(*(best->begin()) == v);
- intersection.push_back(v);
- return;
- }
- }
- }
-
- if (best) {
- insert(&intersection, intersection.end(), *best);
- }
-
- for (auto u : predSet) {
- if (!considerSelf && u == v) {
- continue;
- }
-
- inplaceIntersection(intersection, infoMap[u].succ);
-
- // Check: intersection should always be at least size 1
- assert(!intersection.empty());
-
- // Break out if we've reduced our intersection to [v]
- if (intersection.size() == 1) {
- assert(*intersection.begin() == v);
- return;
- }
- }
-}
-
-/** Find the intersection of the predecessors of our successors. */
-static
-void predSuccIntersection(const NFAVertex v,
- const flat_set<NFAVertex> &succSet,
- const VertexInfoMap &infoMap,
- vector<NFAVertex> &intersection,
- bool considerSelf = true /* follow self loops */) {
- /* find a good seed for the intersection */
- const flat_set<NFAVertex> *best = nullptr;
- for (auto w : succSet) {
- if (!considerSelf && w == v) {
- continue;
- }
-
- const flat_set<NFAVertex> &predSet = infoMap[w].pred;
- if (!best || predSet.size() <= best->size()) {
- best = &predSet;
-
- // Break out if we've reduced our intersection to [v]
- if (best->size() == 1) {
- assert(*(best->begin()) == v);
- intersection.push_back(v);
- return;
- }
- }
- }
-
- if (best) {
- insert(&intersection, intersection.end(), *best);
- }
-
- for (auto w : succSet) {
- if (!considerSelf && w == v) {
- continue;
- }
-
- inplaceIntersection(intersection, infoMap[w].pred);
-
- // Check: intersection should always be at least size 1
- assert(!intersection.empty());
-
- // Break out if we've reduced our intersection to [v]
- if (intersection.size() == 1) {
- assert(*intersection.begin() == v);
- return;
- }
- }
-}
-
-/** Update containers to take into account the removal of vertex v. */
-static
-void markForRemoval(const NFAVertex v, VertexInfoMap &infoMap,
- set<NFAVertex> &removable) {
- VertexInfo &info = infoMap[v];
- assert(!info.isRemoved);
- assert(!contains(removable, v));
- info.isRemoved = true;
- removable.insert(v);
-
- // remove v from its predecessors' successors
- for (auto u : info.pred) {
- infoMap[u].succ.erase(v);
- }
-
- // remove v from its successors' predecessors
- for (auto w : info.succ) {
- infoMap[w].pred.erase(v);
- }
-}
-
-static
-bool hasInEdgeTops(const NGHolder &g, NFAVertex v) {
+
+ vector<NFAVertex>::iterator it = vset1.begin(), ite = vset1.end();
+ flat_set<NFAVertex>::const_iterator jt = vset2.begin(), jte = vset2.end();
+
+ while ((it != ite) && (jt != jte)) {
+ assert(*it != GONE);
+
+ if (*it < *jt) {
+ // present in vset1 but not in vset2. Set to null, remove in a
+ // second pass.
+ *it = GONE;
+ ++it;
+ } else if (*jt < *it) {
+ // present in vset2 but not in vset1, skip.
+ ++jt;
+ } else {
+ // present in both sets.
+ ++it; ++jt;
+ }
+ }
+
+ // Left overs are only in that set.
+ vset1.erase(it, ite);
+
+ // Remove nulls created above.
+ vset1.erase(remove(vset1.begin(), vset1.end(), GONE), vset1.end());
+}
+
+/** Find the intersection of the successors of our predecessors. */
+static
+void succPredIntersection(const NFAVertex v, const flat_set<NFAVertex> &predSet,
+ const VertexInfoMap &infoMap,
+ vector<NFAVertex> &intersection,
+ bool considerSelf = true /* follow self loops */) {
+ /* find a good seed for the intersection */
+ const flat_set<NFAVertex> *best = nullptr;
+ for (auto u : predSet) {
+ if (!considerSelf && u == v) {
+ continue;
+ }
+
+ const flat_set<NFAVertex> &succSet = infoMap[u].succ;
+ if (!best || succSet.size() <= best->size()) {
+ best = &succSet;
+
+ // Break out if we've reduced our intersection to [v]
+ if (best->size() == 1) {
+ assert(*(best->begin()) == v);
+ intersection.push_back(v);
+ return;
+ }
+ }
+ }
+
+ if (best) {
+ insert(&intersection, intersection.end(), *best);
+ }
+
+ for (auto u : predSet) {
+ if (!considerSelf && u == v) {
+ continue;
+ }
+
+ inplaceIntersection(intersection, infoMap[u].succ);
+
+ // Check: intersection should always be at least size 1
+ assert(!intersection.empty());
+
+ // Break out if we've reduced our intersection to [v]
+ if (intersection.size() == 1) {
+ assert(*intersection.begin() == v);
+ return;
+ }
+ }
+}
+
+/** Find the intersection of the predecessors of our successors. */
+static
+void predSuccIntersection(const NFAVertex v,
+ const flat_set<NFAVertex> &succSet,
+ const VertexInfoMap &infoMap,
+ vector<NFAVertex> &intersection,
+ bool considerSelf = true /* follow self loops */) {
+ /* find a good seed for the intersection */
+ const flat_set<NFAVertex> *best = nullptr;
+ for (auto w : succSet) {
+ if (!considerSelf && w == v) {
+ continue;
+ }
+
+ const flat_set<NFAVertex> &predSet = infoMap[w].pred;
+ if (!best || predSet.size() <= best->size()) {
+ best = &predSet;
+
+ // Break out if we've reduced our intersection to [v]
+ if (best->size() == 1) {
+ assert(*(best->begin()) == v);
+ intersection.push_back(v);
+ return;
+ }
+ }
+ }
+
+ if (best) {
+ insert(&intersection, intersection.end(), *best);
+ }
+
+ for (auto w : succSet) {
+ if (!considerSelf && w == v) {
+ continue;
+ }
+
+ inplaceIntersection(intersection, infoMap[w].pred);
+
+ // Check: intersection should always be at least size 1
+ assert(!intersection.empty());
+
+ // Break out if we've reduced our intersection to [v]
+ if (intersection.size() == 1) {
+ assert(*intersection.begin() == v);
+ return;
+ }
+ }
+}
+
+/** Update containers to take into account the removal of vertex v. */
+static
+void markForRemoval(const NFAVertex v, VertexInfoMap &infoMap,
+ set<NFAVertex> &removable) {
+ VertexInfo &info = infoMap[v];
+ assert(!info.isRemoved);
+ assert(!contains(removable, v));
+ info.isRemoved = true;
+ removable.insert(v);
+
+ // remove v from its predecessors' successors
+ for (auto u : info.pred) {
+ infoMap[u].succ.erase(v);
+ }
+
+ // remove v from its successors' predecessors
+ for (auto w : info.succ) {
+ infoMap[w].pred.erase(v);
+ }
+}
+
+static
+bool hasInEdgeTops(const NGHolder &g, NFAVertex v) {
NFAEdge e = edge(g.start, v, g);
return e && !g[e].tops.empty();
-}
-
-/** Transform (1), removal of redundant vertices. */
-static
-bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
- set<NFAVertex> &removable) {
- /* useless merges can be done in any order, no need to take any care with
- * ordering */
-
- // Temporary vectors used for intersections below
- vector<NFAVertex> succPredSet, predSuccSet, intersection;
-
- bool changed = false;
- for (auto v : vertices_range(g)) {
- VertexInfo &info = infoMap[v];
-
- if (info.isRemoved) {
- continue;
- }
-
- assert(!contains(removable, v));
-
- if (is_special(v, g)) {
- continue;
- }
-
- /* we do not need to check for out edge tops - as only specials (start)
- * can have tops and they are already disqualified. */
- if (hasInEdgeTops(g, v)) {
- continue; // Conservatively skip anything with nonzero tops.
- }
-
- if (info.pred.empty() || info.succ.empty()) {
+}
+
+/** Transform (1), removal of redundant vertices. */
+static
+bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
+ set<NFAVertex> &removable) {
+ /* useless merges can be done in any order, no need to take any care with
+ * ordering */
+
+ // Temporary vectors used for intersections below
+ vector<NFAVertex> succPredSet, predSuccSet, intersection;
+
+ bool changed = false;
+ for (auto v : vertices_range(g)) {
+ VertexInfo &info = infoMap[v];
+
+ if (info.isRemoved) {
+ continue;
+ }
+
+ assert(!contains(removable, v));
+
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ /* we do not need to check for out edge tops - as only specials (start)
+ * can have tops and they are already disqualified. */
+ if (hasInEdgeTops(g, v)) {
+ continue; // Conservatively skip anything with nonzero tops.
+ }
+
+ if (info.pred.empty() || info.succ.empty()) {
DEBUG_PRINTF("vertex %zu has empty pred/succ list\n", g[v].index);
- assert(0); // non-special states should always have succ/pred lists
- continue;
- }
-
- // The following cases are more complex and rely on the intersection of
- // Succ(Pred(v)) and Pred(Succ(v))
-
- // Compute intersections, operating on the smaller set first
- // Note that we use vectors here, as set_intersection underneath
- // guarantees sorted output, and vectors were quite a bit
- // faster than sets or lists.
-
- succPredSet.clear();
- predSuccSet.clear();
-
- if (info.pred.size() <= info.succ.size()) {
- succPredIntersection(v, info.pred, infoMap, succPredSet);
- if (succPredSet.size() == 1) {
- // nobody in here but us chickens
- assert(*succPredSet.begin() == v);
- continue;
- }
- predSuccIntersection(v, info.succ, infoMap, predSuccSet);
- if (predSuccSet.size() == 1) {
- assert(*predSuccSet.begin() == v);
- continue;
- }
- } else {
- predSuccIntersection(v, info.succ, infoMap, predSuccSet);
- if (predSuccSet.size() == 1) {
- assert(*predSuccSet.begin() == v);
- continue;
- }
- succPredIntersection(v, info.pred, infoMap, succPredSet);
- if (succPredSet.size() == 1) {
- assert(*succPredSet.begin() == v);
- continue;
- }
- }
-
- // Find the intersection of Succ(Pred(v)) and Pred(Succ(v))
- intersection.clear();
- set_intersection(succPredSet.begin(), succPredSet.end(),
- predSuccSet.begin(), predSuccSet.end(),
- back_inserter(intersection));
-
- /* Boring if it is just us in the intersection */
- if (intersection.size() < 2) {
- continue;
- }
-
- // Compare char_reach, mark v for removal if any members of
- // the intersection have an equal or greater reach
- const CharReach &currReach = g[v].char_reach;
- const auto &currReports = g[v].reports;
- for (auto t : intersection) {
- const VertexInfo &info2 = infoMap[t];
-
- /* start is never a succ of a state, so will never be in the
- * predsucc/succpred intersection */
- assert(t != g.start);
-
- if (t == v || info2.isRemoved) {
- continue;
- }
-
- // For each candidate C to make V redundant, check:
- // if V is an accept state, C must be an accept state for
- // the same pattern
- // pred(C) is a superset of pred(V)
- // succ(C) is a superset of succ(V)
- // reach(C) is a superset of reach(V)
- //
- // Note: pred/sec tests are covered by the intersections
- // calculated above.
-
- /* note: links to accepts are also tracked in succs */
- if (info.isAccept && currReports != g[t].reports) {
- continue;
- }
-
- if (som) {
- if (t == g.startDs) {
- continue;
- }
- if (is_virtual_start(t, g) != is_virtual_start(v, g)) {
- continue;
- }
- }
-
- /* we do not need to check for out edge tops - as only start
- * can have tops and it has already been ruled out. */
- if (hasInEdgeTops(g, t)) {
- continue; // Conservatively skip anything with nonzero tops.
- }
-
- CharReach &otherReach = g[t].char_reach;
- if (currReach.isSubsetOf(otherReach)) {
+ assert(0); // non-special states should always have succ/pred lists
+ continue;
+ }
+
+ // The following cases are more complex and rely on the intersection of
+ // Succ(Pred(v)) and Pred(Succ(v))
+
+ // Compute intersections, operating on the smaller set first
+ // Note that we use vectors here, as set_intersection underneath
+ // guarantees sorted output, and vectors were quite a bit
+ // faster than sets or lists.
+
+ succPredSet.clear();
+ predSuccSet.clear();
+
+ if (info.pred.size() <= info.succ.size()) {
+ succPredIntersection(v, info.pred, infoMap, succPredSet);
+ if (succPredSet.size() == 1) {
+ // nobody in here but us chickens
+ assert(*succPredSet.begin() == v);
+ continue;
+ }
+ predSuccIntersection(v, info.succ, infoMap, predSuccSet);
+ if (predSuccSet.size() == 1) {
+ assert(*predSuccSet.begin() == v);
+ continue;
+ }
+ } else {
+ predSuccIntersection(v, info.succ, infoMap, predSuccSet);
+ if (predSuccSet.size() == 1) {
+ assert(*predSuccSet.begin() == v);
+ continue;
+ }
+ succPredIntersection(v, info.pred, infoMap, succPredSet);
+ if (succPredSet.size() == 1) {
+ assert(*succPredSet.begin() == v);
+ continue;
+ }
+ }
+
+ // Find the intersection of Succ(Pred(v)) and Pred(Succ(v))
+ intersection.clear();
+ set_intersection(succPredSet.begin(), succPredSet.end(),
+ predSuccSet.begin(), predSuccSet.end(),
+ back_inserter(intersection));
+
+ /* Boring if it is just us in the intersection */
+ if (intersection.size() < 2) {
+ continue;
+ }
+
+ // Compare char_reach, mark v for removal if any members of
+ // the intersection have an equal or greater reach
+ const CharReach &currReach = g[v].char_reach;
+ const auto &currReports = g[v].reports;
+ for (auto t : intersection) {
+ const VertexInfo &info2 = infoMap[t];
+
+ /* start is never a succ of a state, so will never be in the
+ * predsucc/succpred intersection */
+ assert(t != g.start);
+
+ if (t == v || info2.isRemoved) {
+ continue;
+ }
+
+ // For each candidate C to make V redundant, check:
+ // if V is an accept state, C must be an accept state for
+ // the same pattern
+ // pred(C) is a superset of pred(V)
+ // succ(C) is a superset of succ(V)
+ // reach(C) is a superset of reach(V)
+ //
+ // Note: pred/sec tests are covered by the intersections
+ // calculated above.
+
+ /* note: links to accepts are also tracked in succs */
+ if (info.isAccept && currReports != g[t].reports) {
+ continue;
+ }
+
+ if (som) {
+ if (t == g.startDs) {
+ continue;
+ }
+ if (is_virtual_start(t, g) != is_virtual_start(v, g)) {
+ continue;
+ }
+ }
+
+ /* we do not need to check for out edge tops - as only start
+ * can have tops and it has already been ruled out. */
+ if (hasInEdgeTops(g, t)) {
+ continue; // Conservatively skip anything with nonzero tops.
+ }
+
+ CharReach &otherReach = g[t].char_reach;
+ if (currReach.isSubsetOf(otherReach)) {
DEBUG_PRINTF("removing redundant vertex %zu (keeping %zu)\n",
- g[v].index, g[t].index);
- markForRemoval(v, infoMap, removable);
- changed = true;
- break;
- }
- }
- }
-
- return changed;
-}
-
-/** Transform (2), diamond merge pass. */
-static
-bool doDiamondMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
- set<NFAVertex> &removable) {
- // Temporary vectors used for intersections below
- vector<NFAVertex> succPredSet, predSuccSet, intersection;
-
- bool changed = false;
- for (auto v : vertices_range(g)) {
- VertexInfo &info = infoMap[v];
-
- if (info.isRemoved) {
- continue;
- }
-
- assert(!contains(removable, v));
-
- if (is_special(v, g)) {
- continue;
- }
-
- /* we do not need to check for out edge tops - as only specials (start)
- * can have tops and they are already disqualified. */
- if (hasInEdgeTops(g, v)) {
- continue; // Conservatively skip anything with nonzero tops.
- }
-
- if (info.pred.empty() || info.succ.empty()) {
- assert(0); // non-special states should always have succ/pred lists
- continue;
- }
-
- // The following cases are more complex and rely on the intersection of
- // Succ(Pred(v)) and Pred(Succ(v))
-
- // Compute intersections, operating on the smaller set first
- // Note that we use vectors here, as set_intersection underneath
- // guarantees sorted output, and vectors were quite a bit faster than
- // sets or lists.
-
- succPredSet.clear();
- predSuccSet.clear();
-
- if (info.pred.size() <= info.succ.size()) {
- succPredIntersection(v, info.pred, infoMap, succPredSet);
- if (succPredSet.size() == 1) {
- // nobody in here but us chickens
- assert(*succPredSet.begin() == v);
- continue;
- }
- predSuccIntersection(v, info.succ, infoMap, predSuccSet);
- if (predSuccSet.size() == 1) {
- assert(*predSuccSet.begin() == v);
- continue;
- }
- } else {
- predSuccIntersection(v, info.succ, infoMap, predSuccSet);
- if (predSuccSet.size() == 1) {
- assert(*predSuccSet.begin() == v);
- continue;
- }
- succPredIntersection(v, info.pred, infoMap, succPredSet);
- if (succPredSet.size() == 1) {
- assert(*succPredSet.begin() == v);
- continue;
- }
- }
-
- // Find the intersection of Succ(Pred(v)) and Pred(Succ(v))
- intersection.clear();
- set_intersection(succPredSet.begin(), succPredSet.end(),
- predSuccSet.begin(), predSuccSet.end(),
- back_inserter(intersection));
-
- /* Boring if it is just us in the intersection */
- if (intersection.size() < 2) {
- continue;
- }
-
- const CharReach &currReach = g[v].char_reach;
- const auto &currReports = g[v].reports;
- for (auto t : intersection) {
- const VertexInfo &info2 = infoMap[t];
-
- if (t == v || info2.isRemoved || is_special(t, g)) {
- continue;
- }
-
- /* note: links to accepts are also tracked in succs */
- if (info.isAccept && currReports != g[t].reports) {
- continue;
- }
-
- /* we do not need to check for out edge tops - as only specials
- * (start) can have tops and they are already disqualified. */
- if (hasInEdgeTops(g, t)) {
- continue; // Conservatively skip anything with nonzero tops.
- }
-
- if (som) {
- if (is_virtual_start(v, g) != is_virtual_start(t, g)) {
- continue; // can only merge like with like.
- }
- }
-
- // If in-degree of v == in-degree of target
- // and out-degree of v == out-degree of target
- // (because pred and succ are supersets)
- // then combine charreach of v into target and remove v
- if (info.inDegree() == info2.inDegree()
- && info.outDegree() == info2.outDegree()) {
- // add character reachability of v into target
- CharReach &otherReach = g[t].char_reach;
- otherReach |= currReach;
- // v can be removed
+ g[v].index, g[t].index);
+ markForRemoval(v, infoMap, removable);
+ changed = true;
+ break;
+ }
+ }
+ }
+
+ return changed;
+}
+
+/** Transform (2), diamond merge pass. */
+static
+bool doDiamondMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
+ set<NFAVertex> &removable) {
+ // Temporary vectors used for intersections below
+ vector<NFAVertex> succPredSet, predSuccSet, intersection;
+
+ bool changed = false;
+ for (auto v : vertices_range(g)) {
+ VertexInfo &info = infoMap[v];
+
+ if (info.isRemoved) {
+ continue;
+ }
+
+ assert(!contains(removable, v));
+
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ /* we do not need to check for out edge tops - as only specials (start)
+ * can have tops and they are already disqualified. */
+ if (hasInEdgeTops(g, v)) {
+ continue; // Conservatively skip anything with nonzero tops.
+ }
+
+ if (info.pred.empty() || info.succ.empty()) {
+ assert(0); // non-special states should always have succ/pred lists
+ continue;
+ }
+
+ // The following cases are more complex and rely on the intersection of
+ // Succ(Pred(v)) and Pred(Succ(v))
+
+ // Compute intersections, operating on the smaller set first
+ // Note that we use vectors here, as set_intersection underneath
+ // guarantees sorted output, and vectors were quite a bit faster than
+ // sets or lists.
+
+ succPredSet.clear();
+ predSuccSet.clear();
+
+ if (info.pred.size() <= info.succ.size()) {
+ succPredIntersection(v, info.pred, infoMap, succPredSet);
+ if (succPredSet.size() == 1) {
+ // nobody in here but us chickens
+ assert(*succPredSet.begin() == v);
+ continue;
+ }
+ predSuccIntersection(v, info.succ, infoMap, predSuccSet);
+ if (predSuccSet.size() == 1) {
+ assert(*predSuccSet.begin() == v);
+ continue;
+ }
+ } else {
+ predSuccIntersection(v, info.succ, infoMap, predSuccSet);
+ if (predSuccSet.size() == 1) {
+ assert(*predSuccSet.begin() == v);
+ continue;
+ }
+ succPredIntersection(v, info.pred, infoMap, succPredSet);
+ if (succPredSet.size() == 1) {
+ assert(*succPredSet.begin() == v);
+ continue;
+ }
+ }
+
+ // Find the intersection of Succ(Pred(v)) and Pred(Succ(v))
+ intersection.clear();
+ set_intersection(succPredSet.begin(), succPredSet.end(),
+ predSuccSet.begin(), predSuccSet.end(),
+ back_inserter(intersection));
+
+ /* Boring if it is just us in the intersection */
+ if (intersection.size() < 2) {
+ continue;
+ }
+
+ const CharReach &currReach = g[v].char_reach;
+ const auto &currReports = g[v].reports;
+ for (auto t : intersection) {
+ const VertexInfo &info2 = infoMap[t];
+
+ if (t == v || info2.isRemoved || is_special(t, g)) {
+ continue;
+ }
+
+ /* note: links to accepts are also tracked in succs */
+ if (info.isAccept && currReports != g[t].reports) {
+ continue;
+ }
+
+ /* we do not need to check for out edge tops - as only specials
+ * (start) can have tops and they are already disqualified. */
+ if (hasInEdgeTops(g, t)) {
+ continue; // Conservatively skip anything with nonzero tops.
+ }
+
+ if (som) {
+ if (is_virtual_start(v, g) != is_virtual_start(t, g)) {
+ continue; // can only merge like with like.
+ }
+ }
+
+ // If in-degree of v == in-degree of target
+ // and out-degree of v == out-degree of target
+ // (because pred and succ are supersets)
+ // then combine charreach of v into target and remove v
+ if (info.inDegree() == info2.inDegree()
+ && info.outDegree() == info2.outDegree()) {
+ // add character reachability of v into target
+ CharReach &otherReach = g[t].char_reach;
+ otherReach |= currReach;
+ // v can be removed
DEBUG_PRINTF("removing redundant vertex %zu and merging "
"reachability with vertex %zu\n",
- g[v].index, g[t].index);
- markForRemoval(v, infoMap, removable);
- changed = true;
- break;
- }
- }
- }
-
- return changed;
-}
-
-namespace {
-
-struct ReachMismatch {};
-
-class ReachSubsetVisitor : public boost::default_dfs_visitor {
-public:
- explicit ReachSubsetVisitor(const CharReach &r) : cr(r) {}
-
- template <class Graph, class Vertex>
- void discover_vertex(const Vertex &v, const Graph &g) const {
- if (is_any_start(v, g)) {
- return; // start vertices are OK
- } else if (is_special(v, g)) {
- assert(0);
- throw ReachMismatch(); // other special nodes??
- }
-
- const CharReach &vcr = g[v].char_reach;
- DEBUG_PRINTF("checking if vcr (%zu) is subset of (%zu)\n", vcr.count(),
- cr.count());
- if (vcr != (vcr & cr)) {
- throw ReachMismatch();
- }
- }
-
-private:
- const CharReach &cr;
-};
-
-/** Terminator function for DFS used in pathReachSubset. */
-template <class Graph, class Vertex> class VertexIs {
-public:
- explicit VertexIs(const Vertex &v) : vertex(v) {}
- bool operator()(const Vertex &v, const Graph &) const {
- return v == vertex;
- }
-
-private:
- Vertex vertex;
-};
-
-} // namespace
-
-/** Returns true if every vertex on paths leading to edge \p e has reachability
- * which is a subset of the reachability of \p dom */
-static
-bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom,
- const NGHolder &g) {
- const CharReach &domReach = g[dom].char_reach;
- if (domReach.all()) {
- return true;
- }
-
- NFAVertex start = source(e, g);
+ g[v].index, g[t].index);
+ markForRemoval(v, infoMap, removable);
+ changed = true;
+ break;
+ }
+ }
+ }
+
+ return changed;
+}
+
+namespace {
+
+struct ReachMismatch {};
+
+class ReachSubsetVisitor : public boost::default_dfs_visitor {
+public:
+ explicit ReachSubsetVisitor(const CharReach &r) : cr(r) {}
+
+ template <class Graph, class Vertex>
+ void discover_vertex(const Vertex &v, const Graph &g) const {
+ if (is_any_start(v, g)) {
+ return; // start vertices are OK
+ } else if (is_special(v, g)) {
+ assert(0);
+ throw ReachMismatch(); // other special nodes??
+ }
+
+ const CharReach &vcr = g[v].char_reach;
+ DEBUG_PRINTF("checking if vcr (%zu) is subset of (%zu)\n", vcr.count(),
+ cr.count());
+ if (vcr != (vcr & cr)) {
+ throw ReachMismatch();
+ }
+ }
+
+private:
+ const CharReach &cr;
+};
+
+/** Terminator function for DFS used in pathReachSubset. */
+template <class Graph, class Vertex> class VertexIs {
+public:
+ explicit VertexIs(const Vertex &v) : vertex(v) {}
+ bool operator()(const Vertex &v, const Graph &) const {
+ return v == vertex;
+ }
+
+private:
+ Vertex vertex;
+};
+
+} // namespace
+
+/** Returns true if every vertex on paths leading to edge \p e has reachability
+ * which is a subset of the reachability of \p dom */
+static
+bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom,
+ const NGHolder &g) {
+ const CharReach &domReach = g[dom].char_reach;
+ if (domReach.all()) {
+ return true;
+ }
+
+ NFAVertex start = source(e, g);
using RevGraph = boost::reverse_graph<NGHolder, const NGHolder &>;
- map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor;
-
- // Walk the graph backwards from v, examining each node. We fail (return
- // false) if we encounter a node with reach NOT a subset of domReach, and
- // we stop searching at dom.
- try {
+ map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor;
+
+ // Walk the graph backwards from v, examining each node. We fail (return
+ // false) if we encounter a node with reach NOT a subset of domReach, and
+ // we stop searching at dom.
+ try {
depth_first_visit(RevGraph(g), start,
- ReachSubsetVisitor(domReach),
- make_assoc_property_map(vertexColor),
- VertexIs<RevGraph, RevGraph::vertex_descriptor>(dom));
- } catch(ReachMismatch&) {
- return false;
- }
-
- return true;
-}
-
-/** Returns true if every vertex on paths leading from edge \p e has
- * reachability which is a subset of the reachability of \p dom */
-static
-bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom,
- const NGHolder &g) {
- const CharReach &domReach = g[dom].char_reach;
- if (domReach.all()) {
- return true;
- }
-
- NFAVertex start = target(e, g);
+ ReachSubsetVisitor(domReach),
+ make_assoc_property_map(vertexColor),
+ VertexIs<RevGraph, RevGraph::vertex_descriptor>(dom));
+ } catch(ReachMismatch&) {
+ return false;
+ }
+
+ return true;
+}
+
+/** Returns true if every vertex on paths leading from edge \p e has
+ * reachability which is a subset of the reachability of \p dom */
+static
+bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom,
+ const NGHolder &g) {
+ const CharReach &domReach = g[dom].char_reach;
+ if (domReach.all()) {
+ return true;
+ }
+
+ NFAVertex start = target(e, g);
map<NFAVertex, boost::default_color_type> vertexColor;
-
- // Walk the graph forward from v, examining each node. We fail (return
- // false) if we encounter a node with reach NOT a subset of domReach, and
- // we stop searching at dom.
- try {
+
+ // Walk the graph forward from v, examining each node. We fail (return
+ // false) if we encounter a node with reach NOT a subset of domReach, and
+ // we stop searching at dom.
+ try {
depth_first_visit(g, start, ReachSubsetVisitor(domReach),
- make_assoc_property_map(vertexColor),
+ make_assoc_property_map(vertexColor),
VertexIs<NGHolder, NFAVertex>(dom));
- } catch(ReachMismatch&) {
- return false;
- }
-
- return true;
-}
-
-static
-bool allOutsSpecial(NFAVertex v, const NGHolder &g) {
- for (auto w : adjacent_vertices_range(v, g)) {
- if (!is_special(w, g)) {
- return false;
- }
- }
- return true;
-}
-
-static
-bool allInsSpecial(NFAVertex v, const NGHolder &g) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (!is_special(u, g)) {
- return false;
- }
- }
- return true;
-}
-
-/** Cheaply check whether this graph can't be reduced at all, because it is
- * just a chain of vertices with no other edges. */
-static
-bool isIrreducible(const NGHolder &g) {
- for (auto v : vertices_range(g)) {
- // skip specials
- if (is_special(v, g)) {
- continue;
- }
-
- if (in_degree(v, g) != 1 && !allInsSpecial(v, g)) {
- return false;
- }
- if (out_degree(v, g) != 1 && !allOutsSpecial(v, g)) {
- return false;
- }
- }
-
- /* if calcComponents got sleepy and went home, the above checks don't hold
- * as it assumes there is only one connected component. */
- if (isAlternationOfClasses(g)) {
- return false;
- }
-
- return true;
-}
-
-static
-u32 findCyclic(const NGHolder &g, vector<bool> &cyclic) {
- u32 count = 0;
-
- cyclic.resize(num_vertices(g));
-
- for (auto v : vertices_range(g)) {
- assert(g[v].index < cyclic.size());
+ } catch(ReachMismatch&) {
+ return false;
+ }
+
+ return true;
+}
+
+static
+bool allOutsSpecial(NFAVertex v, const NGHolder &g) {
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (!is_special(w, g)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static
+bool allInsSpecial(NFAVertex v, const NGHolder &g) {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (!is_special(u, g)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/** Cheaply check whether this graph can't be reduced at all, because it is
+ * just a chain of vertices with no other edges. */
+static
+bool isIrreducible(const NGHolder &g) {
+ for (auto v : vertices_range(g)) {
+ // skip specials
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ if (in_degree(v, g) != 1 && !allInsSpecial(v, g)) {
+ return false;
+ }
+ if (out_degree(v, g) != 1 && !allOutsSpecial(v, g)) {
+ return false;
+ }
+ }
+
+ /* if calcComponents got sleepy and went home, the above checks don't hold
+ * as it assumes there is only one connected component. */
+ if (isAlternationOfClasses(g)) {
+ return false;
+ }
+
+ return true;
+}
+
+static
+u32 findCyclic(const NGHolder &g, vector<bool> &cyclic) {
+ u32 count = 0;
+
+ cyclic.resize(num_vertices(g));
+
+ for (auto v : vertices_range(g)) {
+ assert(g[v].index < cyclic.size());
if (hasSelfLoop(v, g)) {
- count++;
+ count++;
cyclic[g[v].index] = true;
- }
- }
-
- return count;
-}
-
-static
-void findCyclicDom(NGHolder &g, vector<bool> &cyclic,
- set<NFAEdge> &dead, som_type som) {
+ }
+ }
+
+ return count;
+}
+
+static
+void findCyclicDom(NGHolder &g, vector<bool> &cyclic,
+ set<NFAEdge> &dead, som_type som) {
auto dominators = findDominators(g);
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
-
- // Path in through a dominator (e.g. '.+a?foobar')
- NFAVertex dom = dominators[v];
- if (dom && cyclic[g[dom].index]
- && edge(dom, v, g).second) {
-
- if (som && dom == g.startDs) {
- continue;
- }
-
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ // Path in through a dominator (e.g. '.+a?foobar')
+ NFAVertex dom = dominators[v];
+ if (dom && cyclic[g[dom].index]
+ && edge(dom, v, g).second) {
+
+ if (som && dom == g.startDs) {
+ continue;
+ }
+
DEBUG_PRINTF("vertex %zu is dominated by directly-connected cyclic "
"vertex %zu\n", g[v].index, g[dom].index);
-
- // iff all paths through in-edge e of v involve vertices whose
- // reachability is a subset of reach(dom), we can delete edge e.
- for (const auto &e : in_edges_range(v, g)) {
- if (source(e, g) == dom) {
- continue;
- }
-
- if (reversePathReachSubset(e, dom, g)) {
+
+ // iff all paths through in-edge e of v involve vertices whose
+ // reachability is a subset of reach(dom), we can delete edge e.
+ for (const auto &e : in_edges_range(v, g)) {
+ if (source(e, g) == dom) {
+ continue;
+ }
+
+ if (reversePathReachSubset(e, dom, g)) {
DEBUG_PRINTF("edge (%zu, %zu) can be removed: leading "
"paths share dom reach\n",
- g[source(e, g)].index, g[target(e, g)].index);
- dead.insert(e);
- if (source(e, g) == v) {
- cyclic[g[v].index] = false;
- }
- continue;
- }
- }
- }
- }
-}
-
-static
-void findCyclicPostDom(NGHolder &g, vector<bool> &cyclic,
- set<NFAEdge> &dead) {
+ g[source(e, g)].index, g[target(e, g)].index);
+ dead.insert(e);
+ if (source(e, g) == v) {
+ cyclic[g[v].index] = false;
+ }
+ continue;
+ }
+ }
+ }
+ }
+}
+
+static
+void findCyclicPostDom(NGHolder &g, vector<bool> &cyclic,
+ set<NFAEdge> &dead) {
auto postdominators = findPostDominators(g);
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
-
- // Path out through a post-dominator (e.g. a?.+foobar')
- NFAVertex postdom = postdominators[v];
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ // Path out through a post-dominator (e.g. a?.+foobar')
+ NFAVertex postdom = postdominators[v];
if (postdom && cyclic[g[postdom].index] && edge(v, postdom, g).second) {
DEBUG_PRINTF("vertex %zu is postdominated by directly-connected "
"cyclic vertex %zu\n", g[v].index, g[postdom].index);
-
- // iff all paths through in-edge e of v involve vertices whose
- // reachability is a subset of reach(dom), we can delete edge e.
- for (const auto &e : out_edges_range(v, g)) {
- if (target(e, g) == postdom) {
- continue;
- }
-
- if (forwardPathReachSubset(e, postdom, g)) {
+
+ // iff all paths through in-edge e of v involve vertices whose
+ // reachability is a subset of reach(dom), we can delete edge e.
+ for (const auto &e : out_edges_range(v, g)) {
+ if (target(e, g) == postdom) {
+ continue;
+ }
+
+ if (forwardPathReachSubset(e, postdom, g)) {
DEBUG_PRINTF("edge (%zu, %zu) can be removed: trailing "
"paths share postdom reach\n",
- g[source(e, g)].index, g[target(e, g)].index);
- if (target(e, g) == v) {
- cyclic[g[v].index] = false;
- }
- dead.insert(e);
- continue;
- }
- }
- }
- }
-}
-
-bool removeRedundancy(NGHolder &g, som_type som) {
- DEBUG_PRINTF("rr som = %d\n", (int)som);
+ g[source(e, g)].index, g[target(e, g)].index);
+ if (target(e, g) == v) {
+ cyclic[g[v].index] = false;
+ }
+ dead.insert(e);
+ continue;
+ }
+ }
+ }
+ }
+}
+
+bool removeRedundancy(NGHolder &g, som_type som) {
+ DEBUG_PRINTF("rr som = %d\n", (int)som);
renumber_vertices(g);
-
- // Cheap check: if all the non-special vertices have in-degree one and
- // out-degree one, there's no redundancy in this here graph and we can
- // vamoose.
- if (isIrreducible(g)) {
- return false;
- }
-
- VertexInfoMap infoMap(g);
-
- // Populate maps of successors and predecessors, and accept status
- populateContainers(g, infoMap);
-
- /* Run multiple passes: terminate when a full pass doesn't remove
- * any vertices */
- bool doUseless = true;
- bool doDiamond = true;
- set<NFAVertex> removable;
- while (doUseless || doDiamond) {
- if (doUseless
- && doUselessMergePass(g, som, infoMap, removable)) {
- doDiamond = true;
- }
- doUseless = false;
-
- if (doDiamond
- && doDiamondMergePass(g, som, infoMap, removable)) {
- doUseless = true;
- }
- doDiamond = false;
- }
- DEBUG_PRINTF("found %zu removable vertices overall.\n", removable.size());
- remove_vertices(removable, g);
-
- return !removable.empty();
-}
-
-/** UE-524: remove edges into nodes that are dominated by cyclic nodes with
- * reachability that is a superset of all paths feeding into that edge. */
-bool removeCyclicDominated(NGHolder &g, som_type som) {
- set<NFAEdge> dead;
- vector<bool> cyclic;
- bool changed = false;
-
- findCyclic(g, cyclic);
-
- findCyclicDom(g, cyclic, dead, som);
- if (!dead.empty()) {
- remove_edges(dead, g);
- pruneUseless(g);
- dead.clear();
- cyclic.clear(); // need to recalculate cyclic as ids have changed
- findCyclic(g, cyclic);
- changed = true;
- }
-
- findCyclicPostDom(g, cyclic, dead);
- if (!dead.empty()) {
- remove_edges(dead, g);
- pruneUseless(g);
- dead.clear();
- changed = true;
- }
-
- return changed;
-}
-
-} // namespace ue2
+
+ // Cheap check: if all the non-special vertices have in-degree one and
+ // out-degree one, there's no redundancy in this here graph and we can
+ // vamoose.
+ if (isIrreducible(g)) {
+ return false;
+ }
+
+ VertexInfoMap infoMap(g);
+
+ // Populate maps of successors and predecessors, and accept status
+ populateContainers(g, infoMap);
+
+ /* Run multiple passes: terminate when a full pass doesn't remove
+ * any vertices */
+ bool doUseless = true;
+ bool doDiamond = true;
+ set<NFAVertex> removable;
+ while (doUseless || doDiamond) {
+ if (doUseless
+ && doUselessMergePass(g, som, infoMap, removable)) {
+ doDiamond = true;
+ }
+ doUseless = false;
+
+ if (doDiamond
+ && doDiamondMergePass(g, som, infoMap, removable)) {
+ doUseless = true;
+ }
+ doDiamond = false;
+ }
+ DEBUG_PRINTF("found %zu removable vertices overall.\n", removable.size());
+ remove_vertices(removable, g);
+
+ return !removable.empty();
+}
+
+/** UE-524: remove edges into nodes that are dominated by cyclic nodes with
+ * reachability that is a superset of all paths feeding into that edge. */
+bool removeCyclicDominated(NGHolder &g, som_type som) {
+ set<NFAEdge> dead;
+ vector<bool> cyclic;
+ bool changed = false;
+
+ findCyclic(g, cyclic);
+
+ findCyclicDom(g, cyclic, dead, som);
+ if (!dead.empty()) {
+ remove_edges(dead, g);
+ pruneUseless(g);
+ dead.clear();
+ cyclic.clear(); // need to recalculate cyclic as ids have changed
+ findCyclic(g, cyclic);
+ changed = true;
+ }
+
+ findCyclicPostDom(g, cyclic, dead);
+ if (!dead.empty()) {
+ remove_edges(dead, g);
+ pruneUseless(g);
+ dead.clear();
+ changed = true;
+ }
+
+ return changed;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.h b/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.h
index 617aed6b37..941844d061 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_redundancy.h
@@ -1,54 +1,54 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief NFA graph reductions.
- */
-
-#ifndef NG_REDUNDANCY_H
-#define NG_REDUNDANCY_H
-
-#include "som/som.h"
-
-namespace ue2 {
-
-class NGHolder;
-struct CompileContext;
-
-/** Attempt to make the NFA graph \p g smaller by performing a number of local
- * transformations. */
-bool removeRedundancy(NGHolder &g, som_type som);
-
-/** UE-524: remove edges into nodes that are dominated by cyclic nodes with
- * reachability that is a superset of all paths feeding into that edge. Returns
- * true if any edges/vertices were removed. */
-bool removeCyclicDominated(NGHolder &g, som_type som);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief NFA graph reductions.
+ */
+
+#ifndef NG_REDUNDANCY_H
+#define NG_REDUNDANCY_H
+
+#include "som/som.h"
+
+namespace ue2 {
+
+class NGHolder;
+struct CompileContext;
+
+/** Attempt to make the NFA graph \p g smaller by performing a number of local
+ * transformations. */
+bool removeRedundancy(NGHolder &g, som_type som);
+
+/** UE-524: remove edges into nodes that are dominated by cyclic nodes with
+ * reachability that is a superset of all paths feeding into that edge. Returns
+ * true if any edges/vertices were removed. */
+bool removeCyclicDominated(NGHolder &g, som_type som);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_region.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_region.cpp
index a879e34695..2675be643f 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_region.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_region.cpp
@@ -1,476 +1,476 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Region analysis.
- *
- * Definition: a \a region is a subset of vertices in a graph such that:
- * - the edges entering the region are a cutset of the graph
- * - for every in-edge (u, v) to the region there exist edges (u, w) for all
- * w in {w : w in region and w has an in-edge}
- * - the regions in a graph partition the graph
- *
- * Note:
- * - we partition a graph into the maximal number of regions
- * - similar properties for exit edges should hold as a consequence
- * - graph == sequence of regions
- * - a region is considered to have an epsilon vertex to allow jumps
- * - vertices which only lead to back edges need to be floated up in the topo
- * order
- *
- * Algorithm overview:
- * -# topo-order over the DAG skeleton;
- * -# incrementally add vertices to the current region until the boundary edges
- * form a valid cut-set;
- * -# for each back-edge, if the source and target are in different regions,
- * merge the regions (and all intervening regions) into a common region.
- */
-#include "ng_region.h"
-
-#include "ng_holder.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/container.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Region analysis.
+ *
+ * Definition: a \a region is a subset of vertices in a graph such that:
+ * - the edges entering the region are a cutset of the graph
+ * - for every in-edge (u, v) to the region there exist edges (u, w) for all
+ * w in {w : w in region and w has an in-edge}
+ * - the regions in a graph partition the graph
+ *
+ * Note:
+ * - we partition a graph into the maximal number of regions
+ * - similar properties for exit edges should hold as a consequence
+ * - graph == sequence of regions
+ * - a region is considered to have an epsilon vertex to allow jumps
+ * - vertices which only lead to back edges need to be floated up in the topo
+ * order
+ *
+ * Algorithm overview:
+ * -# topo-order over the DAG skeleton;
+ * -# incrementally add vertices to the current region until the boundary edges
+ * form a valid cut-set;
+ * -# for each back-edge, if the source and target are in different regions,
+ * merge the regions (and all intervening regions) into a common region.
+ */
+#include "ng_region.h"
+
+#include "ng_holder.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/container.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
-
-#include <set>
-#include <utility>
-#include <vector>
-
-#include <boost/graph/filtered_graph.hpp>
-#include <boost/graph/topological_sort.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
+
+#include <set>
+#include <utility>
+#include <vector>
+
+#include <boost/graph/filtered_graph.hpp>
+#include <boost/graph/topological_sort.hpp>
+
+using namespace std;
+
+namespace ue2 {
+
using BackEdgeSet = unordered_set<NFAEdge>;
using AcyclicGraph =
boost::filtered_graph<NGHolder, bad_edge_filter<BackEdgeSet>>;
-
-namespace {
-struct exit_info {
- explicit exit_info(NFAVertex v) : exit(v) {}
-
- NFAVertex exit;
+
+namespace {
+struct exit_info {
+ explicit exit_info(NFAVertex v) : exit(v) {}
+
+ NFAVertex exit;
flat_set<NFAVertex> open;
-};
-}
-
-static
-void checkAndAddExitCandidate(const AcyclicGraph &g,
+};
+}
+
+static
+void checkAndAddExitCandidate(const AcyclicGraph &g,
const unordered_set<NFAVertex> &r, NFAVertex v,
vector<exit_info> &exits) {
exit_info v_exit(v);
auto &open = v_exit.open;
-
- /* find the set of vertices reachable from v which are not in r */
- for (auto w : adjacent_vertices_range(v, g)) {
- if (!contains(r, w)) {
+
+ /* find the set of vertices reachable from v which are not in r */
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (!contains(r, w)) {
open.insert(w);
- }
- }
-
+ }
+ }
+
if (!open.empty()) {
DEBUG_PRINTF("exit %zu\n", g[v].index);
exits.push_back(move(v_exit));
- }
-}
-
-static
+ }
+}
+
+static
void findExits(const AcyclicGraph &g, const unordered_set<NFAVertex> &r,
vector<exit_info> &exits) {
exits.clear();
- for (auto v : r) {
- checkAndAddExitCandidate(g, r, v, exits);
- }
-}
-
-static
+ for (auto v : r) {
+ checkAndAddExitCandidate(g, r, v, exits);
+ }
+}
+
+static
void refineExits(const AcyclicGraph &g, const unordered_set<NFAVertex> &r,
NFAVertex new_v, vector<exit_info> &exits) {
/* new_v is no long an open edge */
for (auto &exit : exits) {
exit.open.erase(new_v);
- }
-
+ }
+
/* no open edges: no longer an exit */
exits.erase(remove_if(exits.begin(), exits.end(),
[&](const exit_info &exit) { return exit.open.empty(); }),
exits.end());
- checkAndAddExitCandidate(g, r, new_v, exits);
-}
-
-/** the set of exits from a candidate region are valid if: FIXME: document
- */
-static
-bool exitValid(UNUSED const AcyclicGraph &g, const vector<exit_info> &exits,
+ checkAndAddExitCandidate(g, r, new_v, exits);
+}
+
+/** the set of exits from a candidate region are valid if: FIXME: document
+ */
+static
+bool exitValid(UNUSED const AcyclicGraph &g, const vector<exit_info> &exits,
const flat_set<NFAVertex> &open_jumps) {
- if (exits.empty() || (exits.size() < 2 && open_jumps.empty())) {
- return true;
- }
- if (exits.size() == 1 && open_jumps.size() == 1) {
+ if (exits.empty() || (exits.size() < 2 && open_jumps.empty())) {
+ return true;
+ }
+ if (exits.size() == 1 && open_jumps.size() == 1) {
DEBUG_PRINTF("oj %zu, e %zu\n", g[*open_jumps.begin()].index,
- g[exits[0].exit].index);
- if (*open_jumps.begin() == exits[0].exit) {
- return true;
- }
- }
-
- assert(!exits.empty());
- const auto &enters = exits.front().open;
-
- if (!open_jumps.empty() && enters != open_jumps) {
- return false;
- }
-
- for (auto it = begin(exits) + 1; it != end(exits); ++it) {
- if (it->open != enters) {
- return false;
- }
- }
-
- return true;
-}
-
-static
+ g[exits[0].exit].index);
+ if (*open_jumps.begin() == exits[0].exit) {
+ return true;
+ }
+ }
+
+ assert(!exits.empty());
+ const auto &enters = exits.front().open;
+
+ if (!open_jumps.empty() && enters != open_jumps) {
+ return false;
+ }
+
+ for (auto it = begin(exits) + 1; it != end(exits); ++it) {
+ if (it->open != enters) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static
void setRegion(const unordered_set<NFAVertex> &r, u32 rid,
unordered_map<NFAVertex, u32> &regions) {
- for (auto v : r) {
- regions[v] = rid;
- }
-}
-
-static
-void buildInitialCandidate(const AcyclicGraph &g,
- vector<NFAVertex>::const_reverse_iterator &it,
- const vector<NFAVertex>::const_reverse_iterator &ite,
+ for (auto v : r) {
+ regions[v] = rid;
+ }
+}
+
+static
+void buildInitialCandidate(const AcyclicGraph &g,
+ vector<NFAVertex>::const_reverse_iterator &it,
+ const vector<NFAVertex>::const_reverse_iterator &ite,
unordered_set<NFAVertex> &candidate,
- /* in exits of prev region;
- * out exits from candidate */
+ /* in exits of prev region;
+ * out exits from candidate */
vector<exit_info> &exits,
flat_set<NFAVertex> &open_jumps) {
- if (it == ite) {
+ if (it == ite) {
candidate.clear();
exits.clear();
- return;
- }
-
+ return;
+ }
+
if (exits.empty()) {
- DEBUG_PRINTF("odd\n");
+ DEBUG_PRINTF("odd\n");
candidate.clear();
DEBUG_PRINTF("adding %zu to initial\n", g[*it].index);
candidate.insert(*it);
open_jumps.erase(*it);
checkAndAddExitCandidate(g, candidate, *it, exits);
- ++it;
- return;
- }
-
+ ++it;
+ return;
+ }
+
// Note: findExits() will clear exits, so it's safe to mutate/move its
// elements here.
auto &enters = exits.front().open;
candidate.clear();
-
- for (; it != ite; ++it) {
+
+ for (; it != ite; ++it) {
DEBUG_PRINTF("adding %zu to initial\n", g[*it].index);
candidate.insert(*it);
- if (contains(enters, *it)) {
- break;
- }
- }
-
- if (it != ite) {
- enters.erase(*it);
+ if (contains(enters, *it)) {
+ break;
+ }
+ }
+
+ if (it != ite) {
+ enters.erase(*it);
open_jumps = move(enters);
DEBUG_PRINTF("oj size = %zu\n", open_jumps.size());
- ++it;
- } else {
+ ++it;
+ } else {
open_jumps.clear();
- }
-
+ }
+
findExits(g, candidate, exits);
-}
-
-static
-void findDagLeaders(const NGHolder &h, const AcyclicGraph &g,
- const vector<NFAVertex> &topo,
+}
+
+static
+void findDagLeaders(const NGHolder &h, const AcyclicGraph &g,
+ const vector<NFAVertex> &topo,
unordered_map<NFAVertex, u32> &regions) {
- assert(!topo.empty());
- u32 curr_id = 0;
+ assert(!topo.empty());
+ u32 curr_id = 0;
auto t_it = topo.rbegin();
unordered_set<NFAVertex> candidate;
flat_set<NFAVertex> open_jumps;
DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index);
- assert(t_it != topo.rend());
- candidate.insert(*t_it++);
+ assert(t_it != topo.rend());
+ candidate.insert(*t_it++);
DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index);
- assert(t_it != topo.rend());
- candidate.insert(*t_it++);
-
+ assert(t_it != topo.rend());
+ candidate.insert(*t_it++);
+
vector<exit_info> exits;
findExits(g, candidate, exits);
- while (t_it != topo.rend()) {
- assert(!candidate.empty());
-
- if (exitValid(g, exits, open_jumps)) {
- if (contains(candidate, h.accept) && !open_jumps.empty()) {
- /* we have tried to make an optional region containing accept as
- * we have an open jump to eod. This candidate region needs to
- * be put in with the previous region. */
- curr_id--;
- DEBUG_PRINTF("merging in with region %u\n", curr_id);
- } else {
- DEBUG_PRINTF("setting region %u\n", curr_id);
- }
- setRegion(candidate, curr_id++, regions);
+ while (t_it != topo.rend()) {
+ assert(!candidate.empty());
+
+ if (exitValid(g, exits, open_jumps)) {
+ if (contains(candidate, h.accept) && !open_jumps.empty()) {
+ /* we have tried to make an optional region containing accept as
+ * we have an open jump to eod. This candidate region needs to
+ * be put in with the previous region. */
+ curr_id--;
+ DEBUG_PRINTF("merging in with region %u\n", curr_id);
+ } else {
+ DEBUG_PRINTF("setting region %u\n", curr_id);
+ }
+ setRegion(candidate, curr_id++, regions);
buildInitialCandidate(g, t_it, topo.rend(), candidate, exits,
open_jumps);
- } else {
- NFAVertex curr = *t_it;
+ } else {
+ NFAVertex curr = *t_it;
DEBUG_PRINTF("adding %zu to current\n", g[curr].index);
- candidate.insert(curr);
- open_jumps.erase(curr);
+ candidate.insert(curr);
+ open_jumps.erase(curr);
refineExits(g, candidate, *t_it, exits);
- DEBUG_PRINTF(" open jumps %zu exits %zu\n", open_jumps.size(),
- exits.size());
- ++t_it;
- }
- }
- /* assert exits valid */
- setRegion(candidate, curr_id, regions);
-}
-
-static
-void mergeUnderBackEdges(const NGHolder &g, const vector<NFAVertex> &topo,
- const BackEdgeSet &backEdges,
+ DEBUG_PRINTF(" open jumps %zu exits %zu\n", open_jumps.size(),
+ exits.size());
+ ++t_it;
+ }
+ }
+ /* assert exits valid */
+ setRegion(candidate, curr_id, regions);
+}
+
+static
+void mergeUnderBackEdges(const NGHolder &g, const vector<NFAVertex> &topo,
+ const BackEdgeSet &backEdges,
unordered_map<NFAVertex, u32> &regions) {
- for (const auto &e : backEdges) {
- NFAVertex u = source(e, g);
- NFAVertex v = target(e, g);
-
- u32 ru = regions[u];
- u32 rv = regions[v];
- if (ru == rv) {
- continue;
- }
-
+ for (const auto &e : backEdges) {
+ NFAVertex u = source(e, g);
+ NFAVertex v = target(e, g);
+
+ u32 ru = regions[u];
+ u32 rv = regions[v];
+ if (ru == rv) {
+ continue;
+ }
+
DEBUG_PRINTF("merging v = %zu(%u), u = %zu(%u)\n", g[v].index, rv,
- g[u].index, ru);
- assert(rv < ru);
-
- for (auto t : topo) {
- u32 r = regions[t];
- if (r <= ru && r > rv) {
- regions[t] = rv;
- } else if (r > ru) {
- regions[t] = rv + r - ru;
- }
- }
- }
-}
-
-static
-void reorderSpecials(const NGHolder &w, const AcyclicGraph &acyclic_g,
- vector<NFAVertex> &topoOrder) {
- // Start is last element of reverse topo ordering.
- auto it = find(topoOrder.begin(), topoOrder.end(), w.start);
- if (it != topoOrder.end() - 1) {
- DEBUG_PRINTF("repositioning start\n");
- assert(it != topoOrder.end());
- topoOrder.erase(it);
- topoOrder.insert(topoOrder.end(), w.start);
- }
-
- // StartDs is second-to-last element of reverse topo ordering.
- it = find(topoOrder.begin(), topoOrder.end(), w.startDs);
- if (it != topoOrder.end() - 2) {
- DEBUG_PRINTF("repositioning start ds\n");
- assert(it != topoOrder.end());
- topoOrder.erase(it);
- topoOrder.insert(topoOrder.end() - 1, w.startDs);
- }
-
- // AcceptEOD is first element of reverse topo ordering.
- it = find(topoOrder.begin(), topoOrder.end(), w.acceptEod);
- if (it != topoOrder.begin()) {
- DEBUG_PRINTF("repositioning accept\n");
- assert(it != topoOrder.end());
- topoOrder.erase(it);
- topoOrder.insert(topoOrder.begin(), w.acceptEod);
- }
-
- // Accept is second element of reverse topo ordering, if it's connected.
- it = find(topoOrder.begin(), topoOrder.end(), w.accept);
- if (it != topoOrder.begin() + 1) {
- DEBUG_PRINTF("repositioning accept\n");
- assert(it != topoOrder.end());
- topoOrder.erase(it);
- if (in_degree(w.accept, acyclic_g) != 0) {
- topoOrder.insert(topoOrder.begin() + 1, w.accept);
- }
- }
-}
-
-static
-void liftSinks(const AcyclicGraph &acyclic_g, vector<NFAVertex> &topoOrder) {
+ g[u].index, ru);
+ assert(rv < ru);
+
+ for (auto t : topo) {
+ u32 r = regions[t];
+ if (r <= ru && r > rv) {
+ regions[t] = rv;
+ } else if (r > ru) {
+ regions[t] = rv + r - ru;
+ }
+ }
+ }
+}
+
+static
+void reorderSpecials(const NGHolder &w, const AcyclicGraph &acyclic_g,
+ vector<NFAVertex> &topoOrder) {
+ // Start is last element of reverse topo ordering.
+ auto it = find(topoOrder.begin(), topoOrder.end(), w.start);
+ if (it != topoOrder.end() - 1) {
+ DEBUG_PRINTF("repositioning start\n");
+ assert(it != topoOrder.end());
+ topoOrder.erase(it);
+ topoOrder.insert(topoOrder.end(), w.start);
+ }
+
+ // StartDs is second-to-last element of reverse topo ordering.
+ it = find(topoOrder.begin(), topoOrder.end(), w.startDs);
+ if (it != topoOrder.end() - 2) {
+ DEBUG_PRINTF("repositioning start ds\n");
+ assert(it != topoOrder.end());
+ topoOrder.erase(it);
+ topoOrder.insert(topoOrder.end() - 1, w.startDs);
+ }
+
+ // AcceptEOD is first element of reverse topo ordering.
+ it = find(topoOrder.begin(), topoOrder.end(), w.acceptEod);
+ if (it != topoOrder.begin()) {
+ DEBUG_PRINTF("repositioning accept\n");
+ assert(it != topoOrder.end());
+ topoOrder.erase(it);
+ topoOrder.insert(topoOrder.begin(), w.acceptEod);
+ }
+
+ // Accept is second element of reverse topo ordering, if it's connected.
+ it = find(topoOrder.begin(), topoOrder.end(), w.accept);
+ if (it != topoOrder.begin() + 1) {
+ DEBUG_PRINTF("repositioning accept\n");
+ assert(it != topoOrder.end());
+ topoOrder.erase(it);
+ if (in_degree(w.accept, acyclic_g) != 0) {
+ topoOrder.insert(topoOrder.begin() + 1, w.accept);
+ }
+ }
+}
+
+static
+void liftSinks(const AcyclicGraph &acyclic_g, vector<NFAVertex> &topoOrder) {
unordered_set<NFAVertex> sinks;
- for (auto v : vertices_range(acyclic_g)) {
- if (is_special(v, acyclic_g)) {
- continue;
- }
-
- if (isLeafNode(v, acyclic_g)) {
+ for (auto v : vertices_range(acyclic_g)) {
+ if (is_special(v, acyclic_g)) {
+ continue;
+ }
+
+ if (isLeafNode(v, acyclic_g)) {
DEBUG_PRINTF("sink found %zu\n", acyclic_g[v].index);
sinks.insert(NFAVertex(v));
- }
- }
-
- if (sinks.empty()) {
- DEBUG_PRINTF("no sinks found\n");
- return;
- }
-
- bool changed;
- do {
- DEBUG_PRINTF("look\n");
- changed = false;
- for (auto v : vertices_range(acyclic_g)) {
+ }
+ }
+
+ if (sinks.empty()) {
+ DEBUG_PRINTF("no sinks found\n");
+ return;
+ }
+
+ bool changed;
+ do {
+ DEBUG_PRINTF("look\n");
+ changed = false;
+ for (auto v : vertices_range(acyclic_g)) {
if (is_special(v, acyclic_g) || contains(sinks, NFAVertex(v))) {
- continue;
- }
-
- for (auto w : adjacent_vertices_range(v, acyclic_g)) {
+ continue;
+ }
+
+ for (auto w : adjacent_vertices_range(v, acyclic_g)) {
if (!contains(sinks, NFAVertex(w))) {
- goto next;
- }
- }
-
+ goto next;
+ }
+ }
+
DEBUG_PRINTF("sink found %zu\n", acyclic_g[v].index);
sinks.insert(NFAVertex(v));
- changed = true;
- next:;
- }
- } while (changed);
-
- for (auto ri = topoOrder.rbegin() + 1; ri != topoOrder.rend(); ++ri) {
- if (!contains(sinks, *ri)) {
- continue;
- }
- NFAVertex s = *ri;
+ changed = true;
+ next:;
+ }
+ } while (changed);
+
+ for (auto ri = topoOrder.rbegin() + 1; ri != topoOrder.rend(); ++ri) {
+ if (!contains(sinks, *ri)) {
+ continue;
+ }
+ NFAVertex s = *ri;
DEBUG_PRINTF("handling sink %zu\n", acyclic_g[s].index);
unordered_set<NFAVertex> parents;
- for (const auto &e : in_edges_range(s, acyclic_g)) {
+ for (const auto &e : in_edges_range(s, acyclic_g)) {
parents.insert(NFAVertex(source(e, acyclic_g)));
- }
-
- /* vertex has no children not reachable on a back edge, bubble the
- * vertex up the topo order to be near its parents */
- vector<NFAVertex>::reverse_iterator rj = ri;
- --rj;
- while (rj != topoOrder.rbegin() && !contains(parents, *rj)) {
- /* sink is in rj + 1 */
- assert(*(rj + 1) == s);
- DEBUG_PRINTF("lifting\n");
- using std::swap;
- swap(*rj, *(rj + 1));
- --rj;
- }
- }
-}
-
+ }
+
+ /* vertex has no children not reachable on a back edge, bubble the
+ * vertex up the topo order to be near its parents */
+ vector<NFAVertex>::reverse_iterator rj = ri;
+ --rj;
+ while (rj != topoOrder.rbegin() && !contains(parents, *rj)) {
+ /* sink is in rj + 1 */
+ assert(*(rj + 1) == s);
+ DEBUG_PRINTF("lifting\n");
+ using std::swap;
+ swap(*rj, *(rj + 1));
+ --rj;
+ }
+ }
+}
+
using ColorMap = decltype(make_small_color_map(NGHolder()));
-/** Build a reverse topo ordering (with only the specials that are in use). We
- * also want to ensure vertices which only lead to back edges are placed near
- * their parents. */
-static
-vector<NFAVertex> buildTopoOrder(const NGHolder &w,
- const AcyclicGraph &acyclic_g,
+/** Build a reverse topo ordering (with only the specials that are in use). We
+ * also want to ensure vertices which only lead to back edges are placed near
+ * their parents. */
+static
+vector<NFAVertex> buildTopoOrder(const NGHolder &w,
+ const AcyclicGraph &acyclic_g,
ColorMap &colours) {
- vector<NFAVertex> topoOrder;
+ vector<NFAVertex> topoOrder;
topoOrder.reserve(num_vertices(w));
-
+
topological_sort(acyclic_g, back_inserter(topoOrder),
color_map(colours));
-
- reorderSpecials(w, acyclic_g, topoOrder);
-
- if (topoOrder.empty()) {
- return topoOrder;
- }
-
- liftSinks(acyclic_g, topoOrder);
-
- DEBUG_PRINTF("TOPO ORDER\n");
- for (auto ri = topoOrder.rbegin(); ri != topoOrder.rend(); ++ri) {
+
+ reorderSpecials(w, acyclic_g, topoOrder);
+
+ if (topoOrder.empty()) {
+ return topoOrder;
+ }
+
+ liftSinks(acyclic_g, topoOrder);
+
+ DEBUG_PRINTF("TOPO ORDER\n");
+ for (auto ri = topoOrder.rbegin(); ri != topoOrder.rend(); ++ri) {
DEBUG_PRINTF("[%zu]\n", acyclic_g[*ri].index);
- }
- DEBUG_PRINTF("----------\n");
-
- return topoOrder;
-}
-
+ }
+ DEBUG_PRINTF("----------\n");
+
+ return topoOrder;
+}
+
unordered_map<NFAVertex, u32> assignRegions(const NGHolder &g) {
- assert(hasCorrectlyNumberedVertices(g));
- const u32 numVertices = num_vertices(g);
- DEBUG_PRINTF("assigning regions for %u vertices in holder\n", numVertices);
-
+ assert(hasCorrectlyNumberedVertices(g));
+ const u32 numVertices = num_vertices(g);
+ DEBUG_PRINTF("assigning regions for %u vertices in holder\n", numVertices);
+
auto colours = make_small_color_map(g);
-
- // Build an acyclic graph for this NGHolder.
- BackEdgeSet deadEdges;
+
+ // Build an acyclic graph for this NGHolder.
+ BackEdgeSet deadEdges;
depth_first_search(g,
visitor(BackEdges<BackEdgeSet>(deadEdges))
.root_vertex(g.start)
.color_map(colours));
-
+
auto af = make_bad_edge_filter(&deadEdges);
AcyclicGraph acyclic_g(g, af);
-
- // Build a (reverse) topological ordering.
- vector<NFAVertex> topoOrder = buildTopoOrder(g, acyclic_g, colours);
-
- // Everybody starts in region 0.
+
+ // Build a (reverse) topological ordering.
+ vector<NFAVertex> topoOrder = buildTopoOrder(g, acyclic_g, colours);
+
+ // Everybody starts in region 0.
unordered_map<NFAVertex, u32> regions;
- regions.reserve(numVertices);
- for (auto v : vertices_range(g)) {
- regions.emplace(v, 0);
- }
-
- findDagLeaders(g, acyclic_g, topoOrder, regions);
- mergeUnderBackEdges(g, topoOrder, deadEdges, regions);
-
- return regions;
-}
-
-} // namespace ue2
+ regions.reserve(numVertices);
+ for (auto v : vertices_range(g)) {
+ regions.emplace(v, 0);
+ }
+
+ findDagLeaders(g, acyclic_g, topoOrder, regions);
+ mergeUnderBackEdges(g, topoOrder, deadEdges, regions);
+
+ return regions;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_region.h b/contrib/libs/hyperscan/src/nfagraph/ng_region.h
index dec8ea7a04..a4708a582e 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_region.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_region.h
@@ -1,219 +1,219 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Region analysis and utility functions.
- */
-
-#ifndef NG_REGION_H
-#define NG_REGION_H
-
-#include "ng_holder.h"
-#include "util/container.h"
-#include "util/graph_range.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Region analysis and utility functions.
+ */
+
+#ifndef NG_REGION_H
+#define NG_REGION_H
+
+#include "ng_holder.h"
+#include "util/container.h"
+#include "util/graph_range.h"
+
#include <unordered_map>
-#include <vector>
-
-namespace ue2 {
-
-/** \brief Assign a region ID to every vertex in the graph. */
+#include <vector>
+
+namespace ue2 {
+
+/** \brief Assign a region ID to every vertex in the graph. */
std::unordered_map<NFAVertex, u32> assignRegions(const NGHolder &g);
-
-/** \brief True if vertices \p a and \p b are in the same region. */
-template <class Graph>
-bool inSameRegion(const Graph &g, NFAVertex a, NFAVertex b,
+
+/** \brief True if vertices \p a and \p b are in the same region. */
+template <class Graph>
+bool inSameRegion(const Graph &g, NFAVertex a, NFAVertex b,
const std::unordered_map<NFAVertex, u32> &region_map) {
- assert(contains(region_map, a) && contains(region_map, b));
-
- return region_map.at(a) == region_map.at(b) &&
- is_special(a, g) == is_special(b, g);
-}
-
-/** \brief True if vertex \p b is in a later region than vertex \p a. */
-template <class Graph>
-bool inLaterRegion(const Graph &g, NFAVertex a, NFAVertex b,
+ assert(contains(region_map, a) && contains(region_map, b));
+
+ return region_map.at(a) == region_map.at(b) &&
+ is_special(a, g) == is_special(b, g);
+}
+
+/** \brief True if vertex \p b is in a later region than vertex \p a. */
+template <class Graph>
+bool inLaterRegion(const Graph &g, NFAVertex a, NFAVertex b,
const std::unordered_map<NFAVertex, u32> &region_map) {
- assert(contains(region_map, a) && contains(region_map, b));
-
- u32 aa = g[a].index;
- u32 bb = g[b].index;
-
- if (bb == NODE_START || bb == NODE_START_DOTSTAR) {
- return false;
- }
-
- if (aa == NODE_START || aa == NODE_START_DOTSTAR) {
- return true;
- }
-
- if (bb == NODE_ACCEPT || bb == NODE_ACCEPT_EOD) {
- return true;
- }
- if (aa == NODE_ACCEPT || aa == NODE_ACCEPT_EOD) {
- return false;
- }
-
- return region_map.at(a) < region_map.at(b);
-}
-
-/** \brief True if vertex \p b is in an earlier region than vertex \p a. */
-template <class Graph>
-bool inEarlierRegion(const Graph &g, NFAVertex a, NFAVertex b,
+ assert(contains(region_map, a) && contains(region_map, b));
+
+ u32 aa = g[a].index;
+ u32 bb = g[b].index;
+
+ if (bb == NODE_START || bb == NODE_START_DOTSTAR) {
+ return false;
+ }
+
+ if (aa == NODE_START || aa == NODE_START_DOTSTAR) {
+ return true;
+ }
+
+ if (bb == NODE_ACCEPT || bb == NODE_ACCEPT_EOD) {
+ return true;
+ }
+ if (aa == NODE_ACCEPT || aa == NODE_ACCEPT_EOD) {
+ return false;
+ }
+
+ return region_map.at(a) < region_map.at(b);
+}
+
+/** \brief True if vertex \p b is in an earlier region than vertex \p a. */
+template <class Graph>
+bool inEarlierRegion(const Graph &g, NFAVertex a, NFAVertex b,
const std::unordered_map<NFAVertex, u32> &region_map) {
- assert(contains(region_map, a) && contains(region_map, b));
-
- u32 aa = g[a].index;
- u32 bb = g[b].index;
-
- if (bb == NODE_START || bb == NODE_START_DOTSTAR) {
- return true;
- }
-
- if (aa == NODE_START || aa == NODE_START_DOTSTAR) {
- return false;
- }
-
- if (bb == NODE_ACCEPT || bb == NODE_ACCEPT_EOD) {
- return false;
- }
- if (aa == NODE_ACCEPT || aa == NODE_ACCEPT_EOD) {
- return true;
- }
-
- return region_map.at(b) < region_map.at(a);
-}
-
-/** \brief True if vertex \p v is an entry vertex for its region. */
-template <class Graph>
-bool isRegionEntry(const Graph &g, NFAVertex v,
+ assert(contains(region_map, a) && contains(region_map, b));
+
+ u32 aa = g[a].index;
+ u32 bb = g[b].index;
+
+ if (bb == NODE_START || bb == NODE_START_DOTSTAR) {
+ return true;
+ }
+
+ if (aa == NODE_START || aa == NODE_START_DOTSTAR) {
+ return false;
+ }
+
+ if (bb == NODE_ACCEPT || bb == NODE_ACCEPT_EOD) {
+ return false;
+ }
+ if (aa == NODE_ACCEPT || aa == NODE_ACCEPT_EOD) {
+ return true;
+ }
+
+ return region_map.at(b) < region_map.at(a);
+}
+
+/** \brief True if vertex \p v is an entry vertex for its region. */
+template <class Graph>
+bool isRegionEntry(const Graph &g, NFAVertex v,
const std::unordered_map<NFAVertex, u32> &region_map) {
- // Note that some graph types do not have inv_adjacent_vertices, so we must
- // use in_edges here.
- for (const auto &e : in_edges_range(v, g)) {
- if (!inSameRegion(g, v, source(e, g), region_map)) {
- return true;
- }
- }
-
- return false;
-}
-
-/** \brief True if vertex \p v is an exit vertex for its region. */
-template <class Graph>
-bool isRegionExit(const Graph &g, NFAVertex v,
+ // Note that some graph types do not have inv_adjacent_vertices, so we must
+ // use in_edges here.
+ for (const auto &e : in_edges_range(v, g)) {
+ if (!inSameRegion(g, v, source(e, g), region_map)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/** \brief True if vertex \p v is an exit vertex for its region. */
+template <class Graph>
+bool isRegionExit(const Graph &g, NFAVertex v,
const std::unordered_map<NFAVertex, u32> &region_map) {
- for (auto w : adjacent_vertices_range(v, g)) {
- if (!inSameRegion(g, v, w, region_map)) {
- return true;
- }
- }
-
- return false;
-}
-
-/** \brief True if vertex \p v is in a region all on its own. */
-template <class Graph>
-bool isSingletonRegion(const Graph &g, NFAVertex v,
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (!inSameRegion(g, v, w, region_map)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/** \brief True if vertex \p v is in a region all on its own. */
+template <class Graph>
+bool isSingletonRegion(const Graph &g, NFAVertex v,
const std::unordered_map<NFAVertex, u32> &region_map) {
- for (const auto &e : in_edges_range(v, g)) {
- auto u = source(e, g);
- if (u != v && inSameRegion(g, v, u, region_map)) {
- return false;
- }
-
- for (auto w : ue2::adjacent_vertices_range(u, g)) {
- if (w != v && inSameRegion(g, v, w, region_map)) {
- return false;
- }
- }
- }
-
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w != v && inSameRegion(g, v, w, region_map)) {
- return false;
- }
-
- for (const auto &e : in_edges_range(w, g)) {
- auto u = source(e, g);
- if (u != v && inSameRegion(g, v, u, region_map)) {
- return false;
- }
- }
-
- return true;
- }
-
- return true;
-}
-
-/**
- * \brief True if the region containing vertex \p v is optional. The vertex \p v
- * should be a region leader.
- */
-template <class Graph>
-bool isOptionalRegion(const Graph &g, NFAVertex v,
+ for (const auto &e : in_edges_range(v, g)) {
+ auto u = source(e, g);
+ if (u != v && inSameRegion(g, v, u, region_map)) {
+ return false;
+ }
+
+ for (auto w : ue2::adjacent_vertices_range(u, g)) {
+ if (w != v && inSameRegion(g, v, w, region_map)) {
+ return false;
+ }
+ }
+ }
+
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w != v && inSameRegion(g, v, w, region_map)) {
+ return false;
+ }
+
+ for (const auto &e : in_edges_range(w, g)) {
+ auto u = source(e, g);
+ if (u != v && inSameRegion(g, v, u, region_map)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ return true;
+}
+
+/**
+ * \brief True if the region containing vertex \p v is optional. The vertex \p v
+ * should be a region leader.
+ */
+template <class Graph>
+bool isOptionalRegion(const Graph &g, NFAVertex v,
const std::unordered_map<NFAVertex, u32> &region_map) {
- assert(isRegionEntry(g, v, region_map));
-
+ assert(isRegionEntry(g, v, region_map));
+
DEBUG_PRINTF("check if r%u is optional (inspecting v%zu)\n",
- region_map.at(v), g[v].index);
-
- // Region zero is never optional.
- assert(contains(region_map, v));
- if (region_map.at(v) == 0) {
- return false;
- }
-
- // Optional if v has a predecessor in an earlier region that has a
- // successor in a later one.
-
- for (const auto &e : in_edges_range(v, g)) {
- auto u = source(e, g);
- if (inSameRegion(g, v, u, region_map)) {
- continue;
- }
+ region_map.at(v), g[v].index);
+
+ // Region zero is never optional.
+ assert(contains(region_map, v));
+ if (region_map.at(v) == 0) {
+ return false;
+ }
+
+ // Optional if v has a predecessor in an earlier region that has a
+ // successor in a later one.
+
+ for (const auto &e : in_edges_range(v, g)) {
+ auto u = source(e, g);
+ if (inSameRegion(g, v, u, region_map)) {
+ continue;
+ }
DEBUG_PRINTF(" searching from u=%zu\n", g[u].index);
-
- assert(inEarlierRegion(g, v, u, region_map));
-
- for (auto w : adjacent_vertices_range(u, g)) {
+
+ assert(inEarlierRegion(g, v, u, region_map));
+
+ for (auto w : adjacent_vertices_range(u, g)) {
DEBUG_PRINTF(" searching to w=%zu\n", g[w].index);
- if (inLaterRegion(g, v, w, region_map)) {
- return true;
- }
- }
- return false;
- }
-
- return false;
-}
-
-} // namespace ue2
-
-#endif
+ if (inLaterRegion(g, v, w, region_map)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ return false;
+}
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.cpp
index 4eecb1f917..1126d4d6c9 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.cpp
@@ -1,270 +1,270 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Region Redundancy optimisation pass.
- *
- * Identifies and removes entire regions that are adjacent to a cyclic state
- * with a superset of their character reachability.
- */
-#include "ng_region_redundancy.h"
-
-#include "ng_holder.h"
-#include "ng_region.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/container.h"
-#include "util/graph_range.h"
-
-#include <set>
-
-using namespace std;
-
-namespace ue2 {
-
-namespace {
-
-/** Precalculated information about a region. */
-struct RegionInfo {
- NFAVertex entry; //!< arbitrary entry vertex
- CharReach cr; //!< union of the reach of all vertices in region
-};
-
-} // namespace
-
-static
-bool regionHasUnexpectedAccept(const NGHolder &g, const u32 region,
- const flat_set<ReportID> &expected_reports,
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Region Redundancy optimisation pass.
+ *
+ * Identifies and removes entire regions that are adjacent to a cyclic state
+ * with a superset of their character reachability.
+ */
+#include "ng_region_redundancy.h"
+
+#include "ng_holder.h"
+#include "ng_region.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/container.h"
+#include "util/graph_range.h"
+
+#include <set>
+
+using namespace std;
+
+namespace ue2 {
+
+namespace {
+
+/** Precalculated information about a region. */
+struct RegionInfo {
+ NFAVertex entry; //!< arbitrary entry vertex
+ CharReach cr; //!< union of the reach of all vertices in region
+};
+
+} // namespace
+
+static
+bool regionHasUnexpectedAccept(const NGHolder &g, const u32 region,
+ const flat_set<ReportID> &expected_reports,
const unordered_map<NFAVertex, u32> &region_map) {
- /* TODO: only check vertices connected to accept/acceptEOD */
- for (auto v : vertices_range(g)) {
- if (region != region_map.at(v)) {
- continue;
- }
-
- if (is_any_accept(v, g)) {
- return true; /* encountering an actual special in the region is
- * possible but definitely unexpected */
- }
-
- for (auto w : adjacent_vertices_range(v, g)) {
- if (is_any_accept(w, g) && g[v].reports != expected_reports) {
- return true;
- }
- }
- }
- return false;
-}
-
-static
-void processCyclicStateForward(NGHolder &h, NFAVertex cyc,
- const map<u32, RegionInfo> &info,
+ /* TODO: only check vertices connected to accept/acceptEOD */
+ for (auto v : vertices_range(g)) {
+ if (region != region_map.at(v)) {
+ continue;
+ }
+
+ if (is_any_accept(v, g)) {
+ return true; /* encountering an actual special in the region is
+ * possible but definitely unexpected */
+ }
+
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (is_any_accept(w, g) && g[v].reports != expected_reports) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+static
+void processCyclicStateForward(NGHolder &h, NFAVertex cyc,
+ const map<u32, RegionInfo> &info,
const unordered_map<NFAVertex, u32> &region_map,
- set<u32> &deadRegions) {
- u32 region = region_map.at(cyc);
- CharReach cr = h[cyc].char_reach;
- auto reports = h[cyc].reports;
-
+ set<u32> &deadRegions) {
+ u32 region = region_map.at(cyc);
+ CharReach cr = h[cyc].char_reach;
+ auto reports = h[cyc].reports;
+
DEBUG_PRINTF("going forward from %zu/%u\n", h[cyc].index,
- region);
-
- map<u32, RegionInfo>::const_iterator it;
- while ((it = info.find(++region)) != info.end()) {
- NFAVertex v = it->second.entry;
- const CharReach &region_cr = it->second.cr;
- assert(isRegionEntry(h, v, region_map) && !is_special(v, h));
+ region);
+
+ map<u32, RegionInfo>::const_iterator it;
+ while ((it = info.find(++region)) != info.end()) {
+ NFAVertex v = it->second.entry;
+ const CharReach &region_cr = it->second.cr;
+ assert(isRegionEntry(h, v, region_map) && !is_special(v, h));
DEBUG_PRINTF("checking %zu\n", h[v].index);
-
- if (!region_cr.isSubsetOf(cr)) {
- DEBUG_PRINTF("doesn't cover the reach of region %u\n", region);
- break;
- }
-
- if (isOptionalRegion(h, v, region_map)
- && !regionHasUnexpectedAccept(h, region, reports, region_map)) {
+
+ if (!region_cr.isSubsetOf(cr)) {
+ DEBUG_PRINTF("doesn't cover the reach of region %u\n", region);
+ break;
+ }
+
+ if (isOptionalRegion(h, v, region_map)
+ && !regionHasUnexpectedAccept(h, region, reports, region_map)) {
DEBUG_PRINTF("cyclic state %zu leads to optional region leader"
" %zu\n", h[cyc].index, h[v].index);
- deadRegions.insert(region);
- } else if (isSingletonRegion(h, v, region_map)) {
- /* we can use this region as straw and suck in optional regions on
- * the other side. This allows us to transform /a{n,m}/ to /a{n}/ */
- cr = h[v].char_reach;
- reports = h[v].reports;
- DEBUG_PRINTF("%u is straw\n", region);
- assert(cr.isSubsetOf(h[cyc].char_reach));
- if (hasSelfLoop(v, h)) {
- DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region);
- remove_edge(v, v, h);
- }
- } else {
- break;
- }
- }
-}
-
-static
-void processCyclicStateReverse(NGHolder &h, NFAVertex cyc,
- const map<u32, RegionInfo> &info,
+ deadRegions.insert(region);
+ } else if (isSingletonRegion(h, v, region_map)) {
+ /* we can use this region as straw and suck in optional regions on
+ * the other side. This allows us to transform /a{n,m}/ to /a{n}/ */
+ cr = h[v].char_reach;
+ reports = h[v].reports;
+ DEBUG_PRINTF("%u is straw\n", region);
+ assert(cr.isSubsetOf(h[cyc].char_reach));
+ if (hasSelfLoop(v, h)) {
+ DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region);
+ remove_edge(v, v, h);
+ }
+ } else {
+ break;
+ }
+ }
+}
+
+static
+void processCyclicStateReverse(NGHolder &h, NFAVertex cyc,
+ const map<u32, RegionInfo> &info,
const unordered_map<NFAVertex, u32> &region_map,
- set<u32> &deadRegions) {
- u32 region = region_map.at(cyc);
- CharReach cr = h[cyc].char_reach;
- auto reports = h[cyc].reports;
-
+ set<u32> &deadRegions) {
+ u32 region = region_map.at(cyc);
+ CharReach cr = h[cyc].char_reach;
+ auto reports = h[cyc].reports;
+
DEBUG_PRINTF("going back from %zu/%u\n", h[cyc].index, region);
-
- map<u32, RegionInfo>::const_iterator it;
- while ((it = info.find(--region)) != info.end()) {
- NFAVertex v = it->second.entry;
- const CharReach &region_cr = it->second.cr;
- assert(isRegionEntry(h, v, region_map) && !is_special(v, h));
+
+ map<u32, RegionInfo>::const_iterator it;
+ while ((it = info.find(--region)) != info.end()) {
+ NFAVertex v = it->second.entry;
+ const CharReach &region_cr = it->second.cr;
+ assert(isRegionEntry(h, v, region_map) && !is_special(v, h));
DEBUG_PRINTF("checking %zu\n", h[v].index);
-
- if (!region_cr.isSubsetOf(cr)) {
- DEBUG_PRINTF("doesn't cover the reach of region %u\n", region);
- break;
- }
-
- if (isOptionalRegion(h, v, region_map)
- && !regionHasUnexpectedAccept(h, region, reports, region_map)) {
+
+ if (!region_cr.isSubsetOf(cr)) {
+ DEBUG_PRINTF("doesn't cover the reach of region %u\n", region);
+ break;
+ }
+
+ if (isOptionalRegion(h, v, region_map)
+ && !regionHasUnexpectedAccept(h, region, reports, region_map)) {
DEBUG_PRINTF("cyclic state %zu trails optional region leader %zu\n",
- h[cyc].index, h[v].index);
- deadRegions.insert(region);
- } else if (isSingletonRegion(h, v, region_map)) {
- /* we can use this region as a reverse straw and suck in optional
- * regions on the other side. This allows us to transform
- * /^a?a{n}.*b/ to /^a{n}.*b/ */
- cr = h[v].char_reach;
- reports = h[v].reports;
- DEBUG_PRINTF("%u is straw\n", region);
- assert(cr.isSubsetOf(h[cyc].char_reach));
- if (hasSelfLoop(v, h)) {
- DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region);
- remove_edge(v, v, h);
- }
- } else {
- break;
- }
-
- if (!region) { // No wrapping
- break;
- }
- }
-}
-
-static
-map<u32, RegionInfo> buildRegionInfoMap(const NGHolder &g,
+ h[cyc].index, h[v].index);
+ deadRegions.insert(region);
+ } else if (isSingletonRegion(h, v, region_map)) {
+ /* we can use this region as a reverse straw and suck in optional
+ * regions on the other side. This allows us to transform
+ * /^a?a{n}.*b/ to /^a{n}.*b/ */
+ cr = h[v].char_reach;
+ reports = h[v].reports;
+ DEBUG_PRINTF("%u is straw\n", region);
+ assert(cr.isSubsetOf(h[cyc].char_reach));
+ if (hasSelfLoop(v, h)) {
+ DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region);
+ remove_edge(v, v, h);
+ }
+ } else {
+ break;
+ }
+
+ if (!region) { // No wrapping
+ break;
+ }
+ }
+}
+
+static
+map<u32, RegionInfo> buildRegionInfoMap(const NGHolder &g,
const unordered_map<NFAVertex, u32> &region_map) {
- map<u32, RegionInfo> info;
-
- for (auto v : vertices_range(g)) {
- u32 region = region_map.at(v);
- if (is_special(v, g) || region == 0) {
- continue;
- }
-
- RegionInfo &ri = info[region];
- ri.cr |= g[v].char_reach;
- if (isRegionEntry(g, v, region_map)) {
- ri.entry = v;
- }
- }
-
- return info;
-}
-
-static
-bool hasNoStartAnchoring(const NGHolder &h) {
- for (auto v : adjacent_vertices_range(h.start, h)) {
- if (!edge(h.startDs, v, h).second) {
- return false;
- }
- }
- return true;
-}
-
-void removeRegionRedundancy(NGHolder &g, som_type som) {
- auto region_map = assignRegions(g);
-
- map<u32, RegionInfo> info = buildRegionInfoMap(g, region_map);
-
- set<u32> deadRegions;
-
- /* if we are not tracking som, we can treat sds as a cyclic region if there
- * is no anchoring */
- if (!som && hasNoStartAnchoring(g)) {
- processCyclicStateForward(g, g.startDs, info, region_map, deadRegions);
- }
-
- // Walk the region mapping, looking for regions that consist of a single
- // cyclic node.
-
- for (const auto &m : info) {
- // Must not have already been removed
- if (contains(deadRegions, m.first)) {
- continue;
- }
-
- NFAVertex v = m.second.entry;
- /* require a singleton cyclic region */
- if (!hasSelfLoop(v, g) || !isSingletonRegion(g, v, region_map)) {
- continue;
- }
-
- if (som && is_virtual_start(v, g)) {
- continue;
- }
-
- processCyclicStateForward(g, v, info, region_map, deadRegions);
- processCyclicStateReverse(g, v, info, region_map, deadRegions);
- }
-
- if (deadRegions.empty()) {
- return;
- }
-
- vector<NFAVertex> dead;
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
- u32 region = region_map.at(v);
- if (contains(deadRegions, region)) {
- dead.push_back(v);
- }
- }
-
- if (!dead.empty()) {
- DEBUG_PRINTF("removing %zu vertices from %zu dead regions\n",
- dead.size(), deadRegions.size());
- remove_vertices(dead, g);
- }
-}
-
-} // namespace ue2
+ map<u32, RegionInfo> info;
+
+ for (auto v : vertices_range(g)) {
+ u32 region = region_map.at(v);
+ if (is_special(v, g) || region == 0) {
+ continue;
+ }
+
+ RegionInfo &ri = info[region];
+ ri.cr |= g[v].char_reach;
+ if (isRegionEntry(g, v, region_map)) {
+ ri.entry = v;
+ }
+ }
+
+ return info;
+}
+
+static
+bool hasNoStartAnchoring(const NGHolder &h) {
+ for (auto v : adjacent_vertices_range(h.start, h)) {
+ if (!edge(h.startDs, v, h).second) {
+ return false;
+ }
+ }
+ return true;
+}
+
+void removeRegionRedundancy(NGHolder &g, som_type som) {
+ auto region_map = assignRegions(g);
+
+ map<u32, RegionInfo> info = buildRegionInfoMap(g, region_map);
+
+ set<u32> deadRegions;
+
+ /* if we are not tracking som, we can treat sds as a cyclic region if there
+ * is no anchoring */
+ if (!som && hasNoStartAnchoring(g)) {
+ processCyclicStateForward(g, g.startDs, info, region_map, deadRegions);
+ }
+
+ // Walk the region mapping, looking for regions that consist of a single
+ // cyclic node.
+
+ for (const auto &m : info) {
+ // Must not have already been removed
+ if (contains(deadRegions, m.first)) {
+ continue;
+ }
+
+ NFAVertex v = m.second.entry;
+ /* require a singleton cyclic region */
+ if (!hasSelfLoop(v, g) || !isSingletonRegion(g, v, region_map)) {
+ continue;
+ }
+
+ if (som && is_virtual_start(v, g)) {
+ continue;
+ }
+
+ processCyclicStateForward(g, v, info, region_map, deadRegions);
+ processCyclicStateReverse(g, v, info, region_map, deadRegions);
+ }
+
+ if (deadRegions.empty()) {
+ return;
+ }
+
+ vector<NFAVertex> dead;
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ u32 region = region_map.at(v);
+ if (contains(deadRegions, region)) {
+ dead.push_back(v);
+ }
+ }
+
+ if (!dead.empty()) {
+ DEBUG_PRINTF("removing %zu vertices from %zu dead regions\n",
+ dead.size(), deadRegions.size());
+ remove_vertices(dead, g);
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.h b/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.h
index c4b4fc958c..f0b396ca43 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_region_redundancy.h
@@ -1,49 +1,49 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Region Redundancy optimisation pass.
- *
- * Identifies and removes entire regions that are adjacent to a cyclic state
- * with a superset of their character reachability.
- */
-
-#ifndef NG_REGION_REDUNDANCY_H
-#define NG_REGION_REDUNDANCY_H
-
-#include "som/som.h"
-
-namespace ue2 {
-
-class NGHolder;
-
-void removeRegionRedundancy(NGHolder &g, som_type som);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Region Redundancy optimisation pass.
+ *
+ * Identifies and removes entire regions that are adjacent to a cyclic state
+ * with a superset of their character reachability.
+ */
+
+#ifndef NG_REGION_REDUNDANCY_H
+#define NG_REGION_REDUNDANCY_H
+
+#include "som/som.h"
+
+namespace ue2 {
+
+class NGHolder;
+
+void removeRegionRedundancy(NGHolder &g, som_type som);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_repeat.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_repeat.cpp
index 72c7eee3f3..1f63ad3c6f 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_repeat.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_repeat.cpp
@@ -1,329 +1,329 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Bounded repeat analysis.
- */
-#include "ng_repeat.h"
-
-#include "grey.h"
-#include "ng_depth.h"
-#include "ng_holder.h"
-#include "ng_limex_accel.h"
-#include "ng_prune.h"
-#include "ng_reports.h"
-#include "ng_som_util.h"
-#include "ng_util.h"
-#include "nfa/accel.h"
-#include "nfa/limex_limits.h"
-#include "nfa/repeat_internal.h"
-#include "nfa/repeatcompile.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Bounded repeat analysis.
+ */
+#include "ng_repeat.h"
+
+#include "grey.h"
+#include "ng_depth.h"
+#include "ng_holder.h"
+#include "ng_limex_accel.h"
+#include "ng_prune.h"
+#include "ng_reports.h"
+#include "ng_som_util.h"
+#include "ng_util.h"
+#include "nfa/accel.h"
+#include "nfa/limex_limits.h"
+#include "nfa/repeat_internal.h"
+#include "nfa/repeatcompile.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
#include "util/graph_undirected.h"
-#include "util/report_manager.h"
+#include "util/report_manager.h"
#include "util/unordered.h"
-
-#include <algorithm>
-#include <map>
-#include <queue>
+
+#include <algorithm>
+#include <map>
+#include <queue>
#include <unordered_map>
#include <unordered_set>
-
-#include <boost/graph/connected_components.hpp>
-#include <boost/graph/depth_first_search.hpp>
-#include <boost/graph/filtered_graph.hpp>
-#include <boost/graph/reverse_graph.hpp>
-#include <boost/graph/topological_sort.hpp>
-#include <boost/icl/interval_set.hpp>
-
-using namespace std;
+
+#include <boost/graph/connected_components.hpp>
+#include <boost/graph/depth_first_search.hpp>
+#include <boost/graph/filtered_graph.hpp>
+#include <boost/graph/reverse_graph.hpp>
+#include <boost/graph/topological_sort.hpp>
+#include <boost/icl/interval_set.hpp>
+
+using namespace std;
using boost::depth_first_search;
using boost::depth_first_visit;
using boost::make_assoc_property_map;
-
-namespace ue2 {
-
-namespace {
-
+
+namespace ue2 {
+
+namespace {
+
/**
* \brief Filter that retains only edges between vertices with the same
* reachability. Special vertices are dropped.
*/
-template<class Graph>
-struct ReachFilter {
+template<class Graph>
+struct ReachFilter {
ReachFilter() = default;
- explicit ReachFilter(const Graph *g_in) : g(g_in) {}
-
- // Convenience typedefs.
+ explicit ReachFilter(const Graph *g_in) : g(g_in) {}
+
+ // Convenience typedefs.
using Traits = typename boost::graph_traits<Graph>;
using VertexDescriptor = typename Traits::vertex_descriptor;
using EdgeDescriptor = typename Traits::edge_descriptor;
-
+
bool operator()(const VertexDescriptor &v) const {
- assert(g);
- // Disallow special vertices, as otherwise we will try to remove them
- // later.
+ assert(g);
+ // Disallow special vertices, as otherwise we will try to remove them
+ // later.
return !is_special(v, *g);
}
-
+
bool operator()(const EdgeDescriptor &e) const {
assert(g);
- // Vertices must have the same reach.
+ // Vertices must have the same reach.
auto u = source(e, *g), v = target(e, *g);
- const CharReach &cr_u = (*g)[u].char_reach;
- const CharReach &cr_v = (*g)[v].char_reach;
- return cr_u == cr_v;
- }
-
- const Graph *g = nullptr;
-};
-
+ const CharReach &cr_u = (*g)[u].char_reach;
+ const CharReach &cr_v = (*g)[v].char_reach;
+ return cr_u == cr_v;
+ }
+
+ const Graph *g = nullptr;
+};
+
using RepeatGraph = boost::filtered_graph<NGHolder, ReachFilter<NGHolder>,
ReachFilter<NGHolder>>;
-
-struct ReachSubgraph {
- vector<NFAVertex> vertices;
+
+struct ReachSubgraph {
+ vector<NFAVertex> vertices;
depth repeatMin{0};
depth repeatMax{0};
- u32 minPeriod = 1;
- bool is_reset = false;
- enum RepeatType historyType = REPEAT_RING;
- bool bad = false; // if true, ignore this case
-};
-
-} // namespace
-
-static
-void findInitDepths(const NGHolder &g,
+ u32 minPeriod = 1;
+ bool is_reset = false;
+ enum RepeatType historyType = REPEAT_RING;
+ bool bad = false; // if true, ignore this case
+};
+
+} // namespace
+
+static
+void findInitDepths(const NGHolder &g,
unordered_map<NFAVertex, NFAVertexDepth> &depths) {
auto d = calcDepths(g);
-
- for (auto v : vertices_range(g)) {
+
+ for (auto v : vertices_range(g)) {
size_t idx = g[v].index;
- assert(idx < d.size());
+ assert(idx < d.size());
depths.emplace(v, d[idx]);
- }
-}
-
-static
+ }
+}
+
+static
vector<NFAVertex> buildTopoOrder(const RepeatGraph &g) {
/* Note: RepeatGraph is a filtered version of NGHolder and still has
* NFAVertex as its vertex descriptor */
typedef unordered_set<NFAEdge> EdgeSet;
- EdgeSet deadEdges;
-
- // We don't have indices spanning [0,N] on our filtered graph, so we
- // provide a colour map.
+ EdgeSet deadEdges;
+
+ // We don't have indices spanning [0,N] on our filtered graph, so we
+ // provide a colour map.
unordered_map<NFAVertex, boost::default_color_type> colours;
-
- depth_first_search(g, visitor(BackEdges<EdgeSet>(deadEdges)).
- color_map(make_assoc_property_map(colours)));
+
+ depth_first_search(g, visitor(BackEdges<EdgeSet>(deadEdges)).
+ color_map(make_assoc_property_map(colours)));
auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&deadEdges));
-
+
vector<NFAVertex> topoOrder;
- topological_sort(acyclic_g, back_inserter(topoOrder),
- color_map(make_assoc_property_map(colours)));
-
- reverse(topoOrder.begin(), topoOrder.end());
+ topological_sort(acyclic_g, back_inserter(topoOrder),
+ color_map(make_assoc_property_map(colours)));
+
+ reverse(topoOrder.begin(), topoOrder.end());
return topoOrder;
-}
-
-static
-void proper_pred(const NGHolder &g, NFAVertex v,
+}
+
+static
+void proper_pred(const NGHolder &g, NFAVertex v,
unordered_set<NFAVertex> &p) {
- pred(g, v, &p);
- p.erase(v); // self-loops
-}
-
-static
-void proper_succ(const NGHolder &g, NFAVertex v,
+ pred(g, v, &p);
+ p.erase(v); // self-loops
+}
+
+static
+void proper_succ(const NGHolder &g, NFAVertex v,
unordered_set<NFAVertex> &s) {
- succ(g, v, &s);
- s.erase(v); // self-loops
-}
-
-static
-bool roguePredecessor(const NGHolder &g, NFAVertex v,
+ succ(g, v, &s);
+ s.erase(v); // self-loops
+}
+
+static
+bool roguePredecessor(const NGHolder &g, NFAVertex v,
const unordered_set<NFAVertex> &involved,
const unordered_set<NFAVertex> &pred) {
- u32 seen = 0;
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (contains(involved, u)) {
- continue;
- }
- if (!contains(pred, u)) {
+ u32 seen = 0;
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (contains(involved, u)) {
+ continue;
+ }
+ if (!contains(pred, u)) {
DEBUG_PRINTF("%zu is a rogue pred\n", g[u].index);
- return true;
- }
-
- seen++;
- }
-
- // We must have edges from either (a) none of our external predecessors, or
- // (b) all of our external predecessors.
- if (!seen) {
- return false;
- }
- return pred.size() != seen;
-}
-
-static
-bool rogueSuccessor(const NGHolder &g, NFAVertex v,
+ return true;
+ }
+
+ seen++;
+ }
+
+ // We must have edges from either (a) none of our external predecessors, or
+ // (b) all of our external predecessors.
+ if (!seen) {
+ return false;
+ }
+ return pred.size() != seen;
+}
+
+static
+bool rogueSuccessor(const NGHolder &g, NFAVertex v,
const unordered_set<NFAVertex> &involved,
const unordered_set<NFAVertex> &succ) {
- u32 seen = 0;
- for (auto w : adjacent_vertices_range(v, g)) {
- if (contains(involved, w)) {
- continue;
- }
-
- if (!contains(succ, w)) {
+ u32 seen = 0;
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (contains(involved, w)) {
+ continue;
+ }
+
+ if (!contains(succ, w)) {
DEBUG_PRINTF("%zu is a rogue succ\n", g[w].index);
- return true;
- }
-
- seen++;
- }
-
- // We must have edges to either (a) none of our external successors, or
- // (b) all of our external successors.
- if (!seen) {
- return false;
- }
- return succ.size() != seen;
-}
-
-static
-bool hasDifferentTops(const NGHolder &g, const vector<NFAVertex> &verts) {
+ return true;
+ }
+
+ seen++;
+ }
+
+ // We must have edges to either (a) none of our external successors, or
+ // (b) all of our external successors.
+ if (!seen) {
+ return false;
+ }
+ return succ.size() != seen;
+}
+
+static
+bool hasDifferentTops(const NGHolder &g, const vector<NFAVertex> &verts) {
/* TODO: check that we need this now that we allow multiple tops */
const flat_set<u32> *tops = nullptr;
-
- for (auto v : verts) {
- for (const auto &e : in_edges_range(v, g)) {
- NFAVertex u = source(e, g);
- if (u != g.start && u != g.startDs) {
- continue; // Only edges from starts have valid top properties.
- }
+
+ for (auto v : verts) {
+ for (const auto &e : in_edges_range(v, g)) {
+ NFAVertex u = source(e, g);
+ if (u != g.start && u != g.startDs) {
+ continue; // Only edges from starts have valid top properties.
+ }
DEBUG_PRINTF("edge (%zu,%zu) with %zu tops\n", g[u].index,
g[v].index, g[e].tops.size());
if (!tops) {
tops = &g[e].tops;
} else if (g[e].tops != *tops) {
return true; // More than one set of tops.
- }
- }
- }
-
- return false;
-}
-
-static
-bool vertexIsBad(const NGHolder &g, NFAVertex v,
+ }
+ }
+ }
+
+ return false;
+}
+
+static
+bool vertexIsBad(const NGHolder &g, NFAVertex v,
const unordered_set<NFAVertex> &involved,
const unordered_set<NFAVertex> &tail,
const unordered_set<NFAVertex> &pred,
const unordered_set<NFAVertex> &succ,
- const flat_set<ReportID> &reports) {
+ const flat_set<ReportID> &reports) {
DEBUG_PRINTF("check vertex %zu\n", g[v].index);
-
- // We must drop any vertex that is the target of a back-edge within
- // our subgraph. The tail set contains all vertices that are after v in a
- // topo ordering.
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (contains(tail, u)) {
+
+ // We must drop any vertex that is the target of a back-edge within
+ // our subgraph. The tail set contains all vertices that are after v in a
+ // topo ordering.
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (contains(tail, u)) {
DEBUG_PRINTF("back-edge (%zu,%zu) in subgraph found\n",
- g[u].index, g[v].index);
- return true;
- }
- }
-
- // If this vertex has an entry from outside our subgraph, it must have
- // edges from *all* the vertices in pred and no other external entries.
- // Similarly for exits.
- if (roguePredecessor(g, v, involved, pred)) {
+ g[u].index, g[v].index);
+ return true;
+ }
+ }
+
+ // If this vertex has an entry from outside our subgraph, it must have
+ // edges from *all* the vertices in pred and no other external entries.
+ // Similarly for exits.
+ if (roguePredecessor(g, v, involved, pred)) {
DEBUG_PRINTF("preds for %zu not well-formed\n", g[v].index);
- return true;
- }
-
- if (rogueSuccessor(g, v, involved, succ)) {
+ return true;
+ }
+
+ if (rogueSuccessor(g, v, involved, succ)) {
DEBUG_PRINTF("succs for %zu not well-formed\n", g[v].index);
- return true;
- }
-
- // All reporting vertices should have the same reports.
- if (is_match_vertex(v, g) && reports != g[v].reports) {
+ return true;
+ }
+
+ // All reporting vertices should have the same reports.
+ if (is_match_vertex(v, g) && reports != g[v].reports) {
DEBUG_PRINTF("report mismatch to %zu\n", g[v].index);
- return true;
- }
-
- return false;
-}
-
-static
-void splitSubgraph(const NGHolder &g, const deque<NFAVertex> &verts,
- const u32 minNumVertices, queue<ReachSubgraph> &q) {
- DEBUG_PRINTF("entry\n");
-
- // We construct a copy of the graph using just the vertices we want, rather
- // than using a filtered_graph -- this way is faster.
- NGHolder verts_g;
+ return true;
+ }
+
+ return false;
+}
+
+static
+void splitSubgraph(const NGHolder &g, const deque<NFAVertex> &verts,
+ const u32 minNumVertices, queue<ReachSubgraph> &q) {
+ DEBUG_PRINTF("entry\n");
+
+ // We construct a copy of the graph using just the vertices we want, rather
+ // than using a filtered_graph -- this way is faster.
+ NGHolder verts_g;
unordered_map<NFAVertex, NFAVertex> verts_map; // in g -> in verts_g
- fillHolder(&verts_g, g, verts, &verts_map);
-
+ fillHolder(&verts_g, g, verts, &verts_map);
+
const auto ug = make_undirected_graph(verts_g);
-
+
unordered_map<NFAVertex, u32> repeatMap;
-
- size_t num = connected_components(ug, make_assoc_property_map(repeatMap));
- DEBUG_PRINTF("found %zu connected repeat components\n", num);
- assert(num > 0);
-
- vector<ReachSubgraph> rs(num);
-
- for (auto v : verts) {
+
+ size_t num = connected_components(ug, make_assoc_property_map(repeatMap));
+ DEBUG_PRINTF("found %zu connected repeat components\n", num);
+ assert(num > 0);
+
+ vector<ReachSubgraph> rs(num);
+
+ for (auto v : verts) {
assert(!is_special(v, g));
auto vu = verts_map.at(v);
- auto rit = repeatMap.find(vu);
- if (rit == repeatMap.end()) {
- continue; /* not part of a repeat */
- }
- u32 comp_id = rit->second;
- assert(comp_id < num);
- rs[comp_id].vertices.push_back(v);
- }
-
- for (const auto &rsi : rs) {
+ auto rit = repeatMap.find(vu);
+ if (rit == repeatMap.end()) {
+ continue; /* not part of a repeat */
+ }
+ u32 comp_id = rit->second;
+ assert(comp_id < num);
+ rs[comp_id].vertices.push_back(v);
+ }
+
+ for (const auto &rsi : rs) {
if (rsi.vertices.empty()) {
// Empty elements can happen when connected_components finds a
// subgraph consisting entirely of specials (which aren't added to
@@ -331,448 +331,448 @@ void splitSubgraph(const NGHolder &g, const deque<NFAVertex> &verts,
// these, so we skip them.
continue;
}
- DEBUG_PRINTF("repeat with %zu vertices\n", rsi.vertices.size());
- if (rsi.vertices.size() >= minNumVertices) {
- DEBUG_PRINTF("enqueuing\n");
- q.push(rsi);
- }
- }
-}
-
-static
-void findFirstReports(const NGHolder &g, const ReachSubgraph &rsi,
- flat_set<ReportID> &reports) {
- for (auto v : rsi.vertices) {
- if (is_match_vertex(v, g)) {
- reports = g[v].reports;
- return;
- }
- }
-}
-
-static
-void checkReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs,
- const u32 minNumVertices) {
- if (rs.empty()) {
- return;
- }
-
- DEBUG_PRINTF("%zu subgraphs\n", rs.size());
-
- vector<ReachSubgraph> rs_out;
-
- queue<ReachSubgraph> q;
- for (const auto &rsi : rs) {
- if (rsi.vertices.size() < minNumVertices) {
- continue;
- }
- q.push(rsi);
- }
-
- while (!q.empty()) {
- const ReachSubgraph &rsi = q.front();
-
- if (rsi.vertices.size() < minNumVertices) {
- q.pop(); // Too small for consideration as a repeat.
- continue;
- }
-
- DEBUG_PRINTF("subgraph with %zu vertices\n", rsi.vertices.size());
-
- // Check that all the edges from outside have the same tops. TODO: we
- // don't have to throw the whole subgraph out, we could do this check
- // on a per vertex basis.
- if (hasDifferentTops(g, rsi.vertices)) {
- DEBUG_PRINTF("different tops!\n");
- q.pop();
- continue;
- }
-
+ DEBUG_PRINTF("repeat with %zu vertices\n", rsi.vertices.size());
+ if (rsi.vertices.size() >= minNumVertices) {
+ DEBUG_PRINTF("enqueuing\n");
+ q.push(rsi);
+ }
+ }
+}
+
+static
+void findFirstReports(const NGHolder &g, const ReachSubgraph &rsi,
+ flat_set<ReportID> &reports) {
+ for (auto v : rsi.vertices) {
+ if (is_match_vertex(v, g)) {
+ reports = g[v].reports;
+ return;
+ }
+ }
+}
+
+static
+void checkReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs,
+ const u32 minNumVertices) {
+ if (rs.empty()) {
+ return;
+ }
+
+ DEBUG_PRINTF("%zu subgraphs\n", rs.size());
+
+ vector<ReachSubgraph> rs_out;
+
+ queue<ReachSubgraph> q;
+ for (const auto &rsi : rs) {
+ if (rsi.vertices.size() < minNumVertices) {
+ continue;
+ }
+ q.push(rsi);
+ }
+
+ while (!q.empty()) {
+ const ReachSubgraph &rsi = q.front();
+
+ if (rsi.vertices.size() < minNumVertices) {
+ q.pop(); // Too small for consideration as a repeat.
+ continue;
+ }
+
+ DEBUG_PRINTF("subgraph with %zu vertices\n", rsi.vertices.size());
+
+ // Check that all the edges from outside have the same tops. TODO: we
+ // don't have to throw the whole subgraph out, we could do this check
+ // on a per vertex basis.
+ if (hasDifferentTops(g, rsi.vertices)) {
+ DEBUG_PRINTF("different tops!\n");
+ q.pop();
+ continue;
+ }
+
unordered_set<NFAVertex> involved(rsi.vertices.begin(),
rsi.vertices.end());
unordered_set<NFAVertex> tail(involved); // to look for back-edges.
unordered_set<NFAVertex> pred, succ;
- proper_pred(g, rsi.vertices.front(), pred);
- proper_succ(g, rsi.vertices.back(), succ);
-
- flat_set<ReportID> reports;
- findFirstReports(g, rsi, reports);
-
- bool recalc = false;
- deque<NFAVertex> verts;
-
- for (auto v : rsi.vertices) {
- tail.erase(v); // now contains all vertices _after_ this one.
-
- if (vertexIsBad(g, v, involved, tail, pred, succ, reports)) {
- recalc = true;
- continue;
- }
-
- verts.push_back(v);
- }
-
- if (recalc) {
- if (verts.size() < minNumVertices) {
- DEBUG_PRINTF("subgraph got too small\n");
- q.pop();
- continue;
- }
- splitSubgraph(g, verts, minNumVertices, q);
- } else {
- DEBUG_PRINTF("subgraph is ok\n");
- rs_out.push_back(rsi);
- }
- q.pop();
- }
-
- rs.swap(rs_out);
-}
-
-namespace {
-class DistanceSet {
-private:
- // We use boost::icl to do the heavy lifting.
- typedef boost::icl::closed_interval<u32> ClosedInterval;
- typedef boost::icl::interval_set<u32, std::less, ClosedInterval>
- IntervalSet;
- IntervalSet distances;
-public:
- // Add a distance.
- void insert(u32 d) {
- distances.insert(d);
- }
-
- void add(const DistanceSet &a) {
- distances += a.distances; // union operation
- }
-
- // Increment all the distances by one and add.
- void add_incremented(const DistanceSet &a) {
- for (const auto &d : a.distances) {
- u32 lo = lower(d) + 1;
- u32 hi = upper(d) + 1;
- distances.insert(boost::icl::construct<ClosedInterval>(lo, hi));
- }
- }
-
-#ifdef DEBUG
- void dump() const {
- if (distances.empty()) {
- printf("<empty>");
- return;
- }
-
- for (const auto &d : distances) {
- printf("[%u,%u] ", lower(d), upper(d));
- }
- }
-#endif
-
- // True if this distance set is a single contiguous interval.
- bool is_contiguous() const {
- IntervalSet::const_iterator it = distances.begin();
- if (it == distances.end()) {
- return false;
- }
- ++it;
- return (it == distances.end());
- }
-
- pair<u32, u32> get_range() const {
- assert(is_contiguous());
- return make_pair(lower(distances), upper(distances));
- }
-};
-}
-
-/**
- * Returns false if the given bounds are too large to be implemented with our
- * runtime engines that handle bounded repeats.
- */
-static
-bool tooLargeToImplement(const depth &repeatMin, const depth &repeatMax) {
- if (!repeatMin.is_finite()) {
- DEBUG_PRINTF("non-finite min bound %s\n", repeatMin.str().c_str());
- assert(0); // this is a surprise!
- return true;
- }
-
- if ((u32)repeatMin >= REPEAT_INF) {
- DEBUG_PRINTF("min bound %s too large\n", repeatMin.str().c_str());
- return true;
- }
-
- if (repeatMax.is_finite() && (u32)repeatMax >= REPEAT_INF) {
- DEBUG_PRINTF("finite max bound %s too large\n", repeatMax.str().c_str());
- return true;
- }
-
- return false;
-}
-
-/** Returns false if the graph is not a supported bounded repeat. */
-static
-bool processSubgraph(const NGHolder &g, ReachSubgraph &rsi,
- u32 minNumVertices) {
- DEBUG_PRINTF("reach subgraph has %zu vertices\n", rsi.vertices.size());
-
- if (rsi.vertices.size() < minNumVertices) {
- DEBUG_PRINTF("too small, min is %u\n", minNumVertices);
- return false;
- }
-
- NFAVertex first = rsi.vertices.front();
- NFAVertex last = rsi.vertices.back();
-
+ proper_pred(g, rsi.vertices.front(), pred);
+ proper_succ(g, rsi.vertices.back(), succ);
+
+ flat_set<ReportID> reports;
+ findFirstReports(g, rsi, reports);
+
+ bool recalc = false;
+ deque<NFAVertex> verts;
+
+ for (auto v : rsi.vertices) {
+ tail.erase(v); // now contains all vertices _after_ this one.
+
+ if (vertexIsBad(g, v, involved, tail, pred, succ, reports)) {
+ recalc = true;
+ continue;
+ }
+
+ verts.push_back(v);
+ }
+
+ if (recalc) {
+ if (verts.size() < minNumVertices) {
+ DEBUG_PRINTF("subgraph got too small\n");
+ q.pop();
+ continue;
+ }
+ splitSubgraph(g, verts, minNumVertices, q);
+ } else {
+ DEBUG_PRINTF("subgraph is ok\n");
+ rs_out.push_back(rsi);
+ }
+ q.pop();
+ }
+
+ rs.swap(rs_out);
+}
+
+namespace {
+class DistanceSet {
+private:
+ // We use boost::icl to do the heavy lifting.
+ typedef boost::icl::closed_interval<u32> ClosedInterval;
+ typedef boost::icl::interval_set<u32, std::less, ClosedInterval>
+ IntervalSet;
+ IntervalSet distances;
+public:
+ // Add a distance.
+ void insert(u32 d) {
+ distances.insert(d);
+ }
+
+ void add(const DistanceSet &a) {
+ distances += a.distances; // union operation
+ }
+
+ // Increment all the distances by one and add.
+ void add_incremented(const DistanceSet &a) {
+ for (const auto &d : a.distances) {
+ u32 lo = lower(d) + 1;
+ u32 hi = upper(d) + 1;
+ distances.insert(boost::icl::construct<ClosedInterval>(lo, hi));
+ }
+ }
+
+#ifdef DEBUG
+ void dump() const {
+ if (distances.empty()) {
+ printf("<empty>");
+ return;
+ }
+
+ for (const auto &d : distances) {
+ printf("[%u,%u] ", lower(d), upper(d));
+ }
+ }
+#endif
+
+ // True if this distance set is a single contiguous interval.
+ bool is_contiguous() const {
+ IntervalSet::const_iterator it = distances.begin();
+ if (it == distances.end()) {
+ return false;
+ }
+ ++it;
+ return (it == distances.end());
+ }
+
+ pair<u32, u32> get_range() const {
+ assert(is_contiguous());
+ return make_pair(lower(distances), upper(distances));
+ }
+};
+}
+
+/**
+ * Returns false if the given bounds are too large to be implemented with our
+ * runtime engines that handle bounded repeats.
+ */
+static
+bool tooLargeToImplement(const depth &repeatMin, const depth &repeatMax) {
+ if (!repeatMin.is_finite()) {
+ DEBUG_PRINTF("non-finite min bound %s\n", repeatMin.str().c_str());
+ assert(0); // this is a surprise!
+ return true;
+ }
+
+ if ((u32)repeatMin >= REPEAT_INF) {
+ DEBUG_PRINTF("min bound %s too large\n", repeatMin.str().c_str());
+ return true;
+ }
+
+ if (repeatMax.is_finite() && (u32)repeatMax >= REPEAT_INF) {
+ DEBUG_PRINTF("finite max bound %s too large\n", repeatMax.str().c_str());
+ return true;
+ }
+
+ return false;
+}
+
+/** Returns false if the graph is not a supported bounded repeat. */
+static
+bool processSubgraph(const NGHolder &g, ReachSubgraph &rsi,
+ u32 minNumVertices) {
+ DEBUG_PRINTF("reach subgraph has %zu vertices\n", rsi.vertices.size());
+
+ if (rsi.vertices.size() < minNumVertices) {
+ DEBUG_PRINTF("too small, min is %u\n", minNumVertices);
+ return false;
+ }
+
+ NFAVertex first = rsi.vertices.front();
+ NFAVertex last = rsi.vertices.back();
+
typedef unordered_map<NFAVertex, DistanceSet> DistanceMap;
- DistanceMap dist;
-
- // Initial distance sets.
- for (auto u : inv_adjacent_vertices_range(first, g)) {
- if (u == first) {
- continue; // no self-loops
- }
+ DistanceMap dist;
+
+ // Initial distance sets.
+ for (auto u : inv_adjacent_vertices_range(first, g)) {
+ if (u == first) {
+ continue; // no self-loops
+ }
DEBUG_PRINTF("pred vertex %zu\n", g[u].index);
- dist[u].insert(0);
- }
-
- for (auto v : rsi.vertices) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == v) {
- continue; // no self-loops
- }
-
- auto di = dist.find(u);
- if (di == dist.end()) {
- assert(0);
- return false;
- }
-
- dist[v].add_incremented(di->second);
- }
- }
-
- // Remove pred distances from our map.
- for (auto u : inv_adjacent_vertices_range(first, g)) {
- if (u == first) {
- continue; // no self-loops
- }
- dist.erase(u);
- }
-
- // Calculate final union of distances.
- DistanceSet final_d;
- for (auto v : adjacent_vertices_range(last, g)) {
- if (v == last) {
- continue; // no self-loops
- }
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == v) {
- continue; // no self-loops
- }
- auto di = dist.find(u);
- if (di == dist.end()) {
- continue;
- }
- final_d.add(di->second);
- }
- }
-
-#ifdef DEBUG
- DEBUG_PRINTF("final_d dists: ");
- final_d.dump();
- printf("\n");
-#endif
-
- if (!final_d.is_contiguous()) {
- // not handled right now
- DEBUG_PRINTF("not contiguous!\n");
- return false;
- }
-
- pair<u32, u32> range = final_d.get_range();
- if (range.first > depth::max_value() || range.second > depth::max_value()) {
- DEBUG_PRINTF("repeat (%u,%u) not representable with depths\n",
- range.first, range.second);
- return false;
- }
+ dist[u].insert(0);
+ }
+
+ for (auto v : rsi.vertices) {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == v) {
+ continue; // no self-loops
+ }
+
+ auto di = dist.find(u);
+ if (di == dist.end()) {
+ assert(0);
+ return false;
+ }
+
+ dist[v].add_incremented(di->second);
+ }
+ }
+
+ // Remove pred distances from our map.
+ for (auto u : inv_adjacent_vertices_range(first, g)) {
+ if (u == first) {
+ continue; // no self-loops
+ }
+ dist.erase(u);
+ }
+
+ // Calculate final union of distances.
+ DistanceSet final_d;
+ for (auto v : adjacent_vertices_range(last, g)) {
+ if (v == last) {
+ continue; // no self-loops
+ }
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == v) {
+ continue; // no self-loops
+ }
+ auto di = dist.find(u);
+ if (di == dist.end()) {
+ continue;
+ }
+ final_d.add(di->second);
+ }
+ }
+
+#ifdef DEBUG
+ DEBUG_PRINTF("final_d dists: ");
+ final_d.dump();
+ printf("\n");
+#endif
+
+ if (!final_d.is_contiguous()) {
+ // not handled right now
+ DEBUG_PRINTF("not contiguous!\n");
+ return false;
+ }
+
+ pair<u32, u32> range = final_d.get_range();
+ if (range.first > depth::max_value() || range.second > depth::max_value()) {
+ DEBUG_PRINTF("repeat (%u,%u) not representable with depths\n",
+ range.first, range.second);
+ return false;
+ }
rsi.repeatMin = depth(range.first);
rsi.repeatMax = depth(range.second);
-
- // If we've got a self-loop anywhere, we've got inf max.
- if (anySelfLoop(g, rsi.vertices.begin(), rsi.vertices.end())) {
- DEBUG_PRINTF("repeat contains self-loop, setting max to INF\n");
- rsi.repeatMax = depth::infinity();
- }
-
- // If our pattern contains a bounded repeat that we wouldn't be able to
- // implement as runtime, then we have no strategy that leads to
- // implementation -- it's not like falling back to a DFA or other
- // non-repeat engine is going to succeed.
- if (tooLargeToImplement(rsi.repeatMin, rsi.repeatMax)) {
- throw CompileError("Pattern too large.");
- }
-
- return true;
-}
-
-static
-bool allPredsInSubgraph(NFAVertex v, const NGHolder &g,
+
+ // If we've got a self-loop anywhere, we've got inf max.
+ if (anySelfLoop(g, rsi.vertices.begin(), rsi.vertices.end())) {
+ DEBUG_PRINTF("repeat contains self-loop, setting max to INF\n");
+ rsi.repeatMax = depth::infinity();
+ }
+
+ // If our pattern contains a bounded repeat that we wouldn't be able to
+ // implement as runtime, then we have no strategy that leads to
+ // implementation -- it's not like falling back to a DFA or other
+ // non-repeat engine is going to succeed.
+ if (tooLargeToImplement(rsi.repeatMin, rsi.repeatMax)) {
+ throw CompileError("Pattern too large.");
+ }
+
+ return true;
+}
+
+static
+bool allPredsInSubgraph(NFAVertex v, const NGHolder &g,
const unordered_set<NFAVertex> &involved) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (!contains(involved, u)) {
- return false;
- }
- }
- return true;
-}
-
-static
-void buildTugTrigger(NGHolder &g, NFAVertex cyclic, NFAVertex v,
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (!contains(involved, u)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static
+void buildTugTrigger(NGHolder &g, NFAVertex cyclic, NFAVertex v,
const unordered_set<NFAVertex> &involved,
unordered_map<NFAVertex, NFAVertexDepth> &depths,
- vector<NFAVertex> &tugs) {
- if (allPredsInSubgraph(v, g, involved)) {
- // We can transform this vertex into a tug trigger in-place.
+ vector<NFAVertex> &tugs) {
+ if (allPredsInSubgraph(v, g, involved)) {
+ // We can transform this vertex into a tug trigger in-place.
DEBUG_PRINTF("all preds in subgraph, vertex %zu becomes tug\n",
- g[v].index);
- add_edge(cyclic, v, g);
- tugs.push_back(v);
- return;
- }
-
- // Some predecessors of v are not in the subgraph, so we need to clone v
- // and split up its in-edges.
- NFAVertex t = clone_vertex(g, v);
- depths[t] = depths[v];
-
+ g[v].index);
+ add_edge(cyclic, v, g);
+ tugs.push_back(v);
+ return;
+ }
+
+ // Some predecessors of v are not in the subgraph, so we need to clone v
+ // and split up its in-edges.
+ NFAVertex t = clone_vertex(g, v);
+ depths[t] = depths[v];
+
DEBUG_PRINTF("there are other paths, cloned tug %zu from vertex %zu\n",
- g[t].index, g[v].index);
-
- tugs.push_back(t);
- add_edge(cyclic, t, g);
-
- // New vertex gets all of v's successors, including v itself if it's
- // cyclic.
- clone_out_edges(g, v, t);
-}
-
-static
-NFAVertex createCyclic(NGHolder &g, ReachSubgraph &rsi) {
- NFAVertex last = rsi.vertices.back();
- NFAVertex cyclic = clone_vertex(g, last);
- add_edge(cyclic, cyclic, g);
-
+ g[t].index, g[v].index);
+
+ tugs.push_back(t);
+ add_edge(cyclic, t, g);
+
+ // New vertex gets all of v's successors, including v itself if it's
+ // cyclic.
+ clone_out_edges(g, v, t);
+}
+
+static
+NFAVertex createCyclic(NGHolder &g, ReachSubgraph &rsi) {
+ NFAVertex last = rsi.vertices.back();
+ NFAVertex cyclic = clone_vertex(g, last);
+ add_edge(cyclic, cyclic, g);
+
DEBUG_PRINTF("created cyclic vertex %zu\n", g[cyclic].index);
- return cyclic;
-}
-
-static
-NFAVertex createPos(NGHolder &g, ReachSubgraph &rsi) {
- NFAVertex pos = add_vertex(g);
- NFAVertex first = rsi.vertices.front();
-
- g[pos].char_reach = g[first].char_reach;
-
+ return cyclic;
+}
+
+static
+NFAVertex createPos(NGHolder &g, ReachSubgraph &rsi) {
+ NFAVertex pos = add_vertex(g);
+ NFAVertex first = rsi.vertices.front();
+
+ g[pos].char_reach = g[first].char_reach;
+
DEBUG_PRINTF("created pos vertex %zu\n", g[pos].index);
- return pos;
-}
-
-// 2 if v is directly connected to an accept, or 1 if one hop away,
-// or 0 otherwise.
-static
-u32 isCloseToAccept(const NGHolder &g, NFAVertex v) {
- if (is_any_accept(v, g)) {
- return 2;
- }
-
- for (auto w : adjacent_vertices_range(v, g)) {
- if (is_any_accept(w, g)) {
- return 1;
- }
- }
-
- return 0;
-}
-
-static
-u32 unpeelAmount(const NGHolder &g, const ReachSubgraph &rsi) {
- const NFAVertex last = rsi.vertices.back();
- u32 rv = 0;
-
- for (auto v : adjacent_vertices_range(last, g)) {
- rv = max(rv, isCloseToAccept(g, v));
- }
-
- return rv;
-}
-
-static
-void unpeelNearEnd(NGHolder &g, ReachSubgraph &rsi,
+ return pos;
+}
+
+// 2 if v is directly connected to an accept, or 1 if one hop away,
+// or 0 otherwise.
+static
+u32 isCloseToAccept(const NGHolder &g, NFAVertex v) {
+ if (is_any_accept(v, g)) {
+ return 2;
+ }
+
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (is_any_accept(w, g)) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static
+u32 unpeelAmount(const NGHolder &g, const ReachSubgraph &rsi) {
+ const NFAVertex last = rsi.vertices.back();
+ u32 rv = 0;
+
+ for (auto v : adjacent_vertices_range(last, g)) {
+ rv = max(rv, isCloseToAccept(g, v));
+ }
+
+ return rv;
+}
+
+static
+void unpeelNearEnd(NGHolder &g, ReachSubgraph &rsi,
unordered_map<NFAVertex, NFAVertexDepth> &depths,
- vector<NFAVertex> *succs) {
- u32 unpeel = unpeelAmount(g, rsi);
- DEBUG_PRINTF("unpeeling %u vertices\n", unpeel);
-
- while (unpeel) {
- NFAVertex last = rsi.vertices.back();
- NFAVertex first = rsi.vertices.front();
-
- NFAVertex d = clone_vertex(g, last);
- depths[d] = depths[last];
+ vector<NFAVertex> *succs) {
+ u32 unpeel = unpeelAmount(g, rsi);
+ DEBUG_PRINTF("unpeeling %u vertices\n", unpeel);
+
+ while (unpeel) {
+ NFAVertex last = rsi.vertices.back();
+ NFAVertex first = rsi.vertices.front();
+
+ NFAVertex d = clone_vertex(g, last);
+ depths[d] = depths[last];
DEBUG_PRINTF("created vertex %zu\n", g[d].index);
-
- for (auto v : *succs) {
- add_edge(d, v, g);
- }
-
- if (rsi.repeatMin > depth(1)) {
- rsi.repeatMin -= 1;
- } else {
- /* Skip edge for the cyclic state; note that we must clone their
- * edge properties as they may include tops. */
- for (const auto &e : in_edges_range(first, g)) {
- add_edge(source(e, g), d, g[e], g);
- }
- }
-
- succs->clear();
- succs->push_back(d);
-
- rsi.repeatMax -= 1;
-
- assert(rsi.repeatMin > depth(0));
- assert(rsi.repeatMax > depth(0));
-
- unpeel--;
- }
-}
-
-/** Fetch the set of successor vertices of this subgraph. */
-static
-void getSuccessors(const NGHolder &g, const ReachSubgraph &rsi,
- vector<NFAVertex> *succs) {
- assert(!rsi.vertices.empty());
- // Successors come from successors of last vertex.
- NFAVertex last = rsi.vertices.back();
-
- for (auto v : adjacent_vertices_range(last, g)) {
- if (v == last) { /* ignore self loop */
- continue;
- }
- succs->push_back(v);
- }
-}
-
-/** Disconnect the given subgraph from its predecessors and successors in the
- * NFA graph and replace it with a cyclic state. */
-static
-void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi,
+
+ for (auto v : *succs) {
+ add_edge(d, v, g);
+ }
+
+ if (rsi.repeatMin > depth(1)) {
+ rsi.repeatMin -= 1;
+ } else {
+ /* Skip edge for the cyclic state; note that we must clone their
+ * edge properties as they may include tops. */
+ for (const auto &e : in_edges_range(first, g)) {
+ add_edge(source(e, g), d, g[e], g);
+ }
+ }
+
+ succs->clear();
+ succs->push_back(d);
+
+ rsi.repeatMax -= 1;
+
+ assert(rsi.repeatMin > depth(0));
+ assert(rsi.repeatMax > depth(0));
+
+ unpeel--;
+ }
+}
+
+/** Fetch the set of successor vertices of this subgraph. */
+static
+void getSuccessors(const NGHolder &g, const ReachSubgraph &rsi,
+ vector<NFAVertex> *succs) {
+ assert(!rsi.vertices.empty());
+ // Successors come from successors of last vertex.
+ NFAVertex last = rsi.vertices.back();
+
+ for (auto v : adjacent_vertices_range(last, g)) {
+ if (v == last) { /* ignore self loop */
+ continue;
+ }
+ succs->push_back(v);
+ }
+}
+
+/** Disconnect the given subgraph from its predecessors and successors in the
+ * NFA graph and replace it with a cyclic state. */
+static
+void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi,
vector<BoundedRepeatData> *repeats,
unordered_map<NFAVertex, NFAVertexDepth> &depths,
unordered_set<NFAVertex> &created) {
- assert(!rsi.bad);
+ assert(!rsi.bad);
/* As we may need to unpeel 2 vertices, we need the width to be more than 2.
* This should only happen if the graph did not have redundancy pass
* performed on as vertex count checks would be prevent us reaching here.
@@ -780,396 +780,396 @@ void replaceSubgraphWithSpecial(NGHolder &g, ReachSubgraph &rsi,
if (rsi.repeatMax <= depth(2)) {
return;
}
- assert(rsi.repeatMin > depth(0));
- assert(rsi.repeatMax >= rsi.repeatMin);
+ assert(rsi.repeatMin > depth(0));
+ assert(rsi.repeatMax >= rsi.repeatMin);
assert(rsi.repeatMax > depth(2));
-
- DEBUG_PRINTF("entry\n");
-
+
+ DEBUG_PRINTF("entry\n");
+
const unordered_set<NFAVertex> involved(rsi.vertices.begin(),
- rsi.vertices.end());
- vector<NFAVertex> succs;
- getSuccessors(g, rsi, &succs);
-
- unpeelNearEnd(g, rsi, depths, &succs);
-
- // Create our replacement cyclic state with the same reachability and
- // report info as the last vertex in our topo-ordered list.
- NFAVertex cyclic = createCyclic(g, rsi);
- created.insert(cyclic);
-
- // One more special vertex is necessary: the positive trigger (same
- // reach as cyclic).
- NFAVertex pos_trigger = createPos(g, rsi);
- created.insert(pos_trigger);
- add_edge(pos_trigger, cyclic, g);
-
- // Update depths for our new vertices.
- NFAVertex first = rsi.vertices.front(), last = rsi.vertices.back();
- depths[pos_trigger] = depths[first];
- depths[cyclic].fromStart =
- unionDepthMinMax(depths[first].fromStart, depths[last].fromStart);
- depths[cyclic].fromStartDotStar = unionDepthMinMax(
- depths[first].fromStartDotStar, depths[last].fromStartDotStar);
-
- // Wire predecessors to positive trigger.
- for (const auto &e : in_edges_range(first, g)) {
- add_edge(source(e, g), pos_trigger, g[e], g);
- }
-
- // Wire cyclic state to tug trigger states built from successors.
- vector<NFAVertex> tugs;
- for (auto v : succs) {
- buildTugTrigger(g, cyclic, v, involved, depths, tugs);
- }
- created.insert(tugs.begin(), tugs.end());
- assert(!tugs.empty());
-
- // Wire pos trigger to tugs if min repeat is one -- this deals with cases
- // where we can get a pos and tug trigger on the same byte.
- if (rsi.repeatMin == depth(1)) {
- for (auto v : tugs) {
- add_edge(pos_trigger, v, g);
- }
- }
-
- // Remove the vertices/edges in the subgraph.
- remove_vertices(rsi.vertices, g, false);
- erase_all(&depths, rsi.vertices);
-
- repeats->push_back(BoundedRepeatData(rsi.historyType, rsi.repeatMin,
- rsi.repeatMax, rsi.minPeriod, cyclic,
- pos_trigger, tugs));
-}
-
-/** Variant for Rose-specific graphs that terminate in a sole accept, so we can
- * use a "lazy tug". See UE-1636. */
-static
-void replaceSubgraphWithLazySpecial(NGHolder &g, ReachSubgraph &rsi,
- vector<BoundedRepeatData> *repeats,
+ rsi.vertices.end());
+ vector<NFAVertex> succs;
+ getSuccessors(g, rsi, &succs);
+
+ unpeelNearEnd(g, rsi, depths, &succs);
+
+ // Create our replacement cyclic state with the same reachability and
+ // report info as the last vertex in our topo-ordered list.
+ NFAVertex cyclic = createCyclic(g, rsi);
+ created.insert(cyclic);
+
+ // One more special vertex is necessary: the positive trigger (same
+ // reach as cyclic).
+ NFAVertex pos_trigger = createPos(g, rsi);
+ created.insert(pos_trigger);
+ add_edge(pos_trigger, cyclic, g);
+
+ // Update depths for our new vertices.
+ NFAVertex first = rsi.vertices.front(), last = rsi.vertices.back();
+ depths[pos_trigger] = depths[first];
+ depths[cyclic].fromStart =
+ unionDepthMinMax(depths[first].fromStart, depths[last].fromStart);
+ depths[cyclic].fromStartDotStar = unionDepthMinMax(
+ depths[first].fromStartDotStar, depths[last].fromStartDotStar);
+
+ // Wire predecessors to positive trigger.
+ for (const auto &e : in_edges_range(first, g)) {
+ add_edge(source(e, g), pos_trigger, g[e], g);
+ }
+
+ // Wire cyclic state to tug trigger states built from successors.
+ vector<NFAVertex> tugs;
+ for (auto v : succs) {
+ buildTugTrigger(g, cyclic, v, involved, depths, tugs);
+ }
+ created.insert(tugs.begin(), tugs.end());
+ assert(!tugs.empty());
+
+ // Wire pos trigger to tugs if min repeat is one -- this deals with cases
+ // where we can get a pos and tug trigger on the same byte.
+ if (rsi.repeatMin == depth(1)) {
+ for (auto v : tugs) {
+ add_edge(pos_trigger, v, g);
+ }
+ }
+
+ // Remove the vertices/edges in the subgraph.
+ remove_vertices(rsi.vertices, g, false);
+ erase_all(&depths, rsi.vertices);
+
+ repeats->push_back(BoundedRepeatData(rsi.historyType, rsi.repeatMin,
+ rsi.repeatMax, rsi.minPeriod, cyclic,
+ pos_trigger, tugs));
+}
+
+/** Variant for Rose-specific graphs that terminate in a sole accept, so we can
+ * use a "lazy tug". See UE-1636. */
+static
+void replaceSubgraphWithLazySpecial(NGHolder &g, ReachSubgraph &rsi,
+ vector<BoundedRepeatData> *repeats,
unordered_map<NFAVertex, NFAVertexDepth> &depths,
unordered_set<NFAVertex> &created) {
- assert(!rsi.bad);
- assert(rsi.repeatMin);
- assert(rsi.repeatMax >= rsi.repeatMin);
-
- DEBUG_PRINTF("entry\n");
-
+ assert(!rsi.bad);
+ assert(rsi.repeatMin);
+ assert(rsi.repeatMax >= rsi.repeatMin);
+
+ DEBUG_PRINTF("entry\n");
+
const unordered_set<NFAVertex> involved(rsi.vertices.begin(),
rsi.vertices.end());
- vector<NFAVertex> succs;
- getSuccessors(g, rsi, &succs);
-
- // Create our replacement cyclic state with the same reachability and
- // report info as the last vertex in our topo-ordered list.
- NFAVertex cyclic = createCyclic(g, rsi);
- created.insert(cyclic);
-
- // One more special vertex is necessary: the positive trigger (same
- // reach as cyclic).
- NFAVertex pos_trigger = createPos(g, rsi);
- created.insert(pos_trigger);
- add_edge(pos_trigger, cyclic, g);
-
- // Update depths for our new vertices.
- NFAVertex first = rsi.vertices.front(), last = rsi.vertices.back();
- depths[pos_trigger] = depths[first];
- depths[cyclic].fromStart =
- unionDepthMinMax(depths[first].fromStart, depths[last].fromStart);
- depths[cyclic].fromStartDotStar = unionDepthMinMax(
- depths[first].fromStartDotStar, depths[last].fromStartDotStar);
-
- // Wire predecessors to positive trigger.
- for (const auto &e : in_edges_range(first, g)) {
- add_edge(source(e, g), pos_trigger, g[e], g);
- }
-
- // In the rose case, our tug is our cyclic, and it's wired to our
- // successors (which should be just the accept).
- vector<NFAVertex> tugs;
- assert(succs.size() == 1);
- for (auto v : succs) {
- add_edge(cyclic, v, g);
- }
-
- // Wire pos trigger to accept if min repeat is one -- this deals with cases
- // where we can get a pos and tug trigger on the same byte.
- if (rsi.repeatMin == depth(1)) {
- for (auto v : succs) {
- add_edge(pos_trigger, v, g);
- g[pos_trigger].reports = g[cyclic].reports;
- }
- }
-
- // Remove the vertices/edges in the subgraph.
- remove_vertices(rsi.vertices, g, false);
- erase_all(&depths, rsi.vertices);
-
- repeats->push_back(BoundedRepeatData(rsi.historyType, rsi.repeatMin,
- rsi.repeatMax, rsi.minPeriod, cyclic,
- pos_trigger, tugs));
-}
-
-static
-bool isCompBigEnough(const RepeatGraph &rg, const u32 minRepeat) {
- // filtered_graph doesn't filter the num_vertices call.
- size_t n = 0;
- RepeatGraph::vertex_iterator vi, ve;
- for (tie(vi, ve) = vertices(rg); vi != ve; ++vi) {
- if (++n >= minRepeat) {
- return true;
- }
- }
- return false;
-}
-
-// Marks the subgraph as bad if it can't be handled.
-static
-void reprocessSubgraph(const NGHolder &h, const Grey &grey,
- ReachSubgraph &rsi) {
- vector<ReachSubgraph> rs(1, rsi);
- checkReachSubgraphs(h, rs, grey.minExtBoundedRepeatSize);
- if (rs.size() != 1) {
- DEBUG_PRINTF("subgraph split into %zu\n", rs.size());
- rsi.bad = true;
- return;
- }
-
- rsi = rs.back(); // Potentially modified.
-
- if (processSubgraph(h, rsi, grey.minExtBoundedRepeatSize)) {
- DEBUG_PRINTF("reprocessed subgraph is {%s,%s} repeat\n",
- rsi.repeatMin.str().c_str(), rsi.repeatMax.str().c_str());
- } else {
- DEBUG_PRINTF("reprocessed subgraph is bad\n");
- rsi.bad = true;
- }
-}
-
-/** Remove vertices from the beginning and end of the vertex set that are
- * involved in other repeats as a result of earlier repeat transformations. */
-static
-bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi,
+ vector<NFAVertex> succs;
+ getSuccessors(g, rsi, &succs);
+
+ // Create our replacement cyclic state with the same reachability and
+ // report info as the last vertex in our topo-ordered list.
+ NFAVertex cyclic = createCyclic(g, rsi);
+ created.insert(cyclic);
+
+ // One more special vertex is necessary: the positive trigger (same
+ // reach as cyclic).
+ NFAVertex pos_trigger = createPos(g, rsi);
+ created.insert(pos_trigger);
+ add_edge(pos_trigger, cyclic, g);
+
+ // Update depths for our new vertices.
+ NFAVertex first = rsi.vertices.front(), last = rsi.vertices.back();
+ depths[pos_trigger] = depths[first];
+ depths[cyclic].fromStart =
+ unionDepthMinMax(depths[first].fromStart, depths[last].fromStart);
+ depths[cyclic].fromStartDotStar = unionDepthMinMax(
+ depths[first].fromStartDotStar, depths[last].fromStartDotStar);
+
+ // Wire predecessors to positive trigger.
+ for (const auto &e : in_edges_range(first, g)) {
+ add_edge(source(e, g), pos_trigger, g[e], g);
+ }
+
+ // In the rose case, our tug is our cyclic, and it's wired to our
+ // successors (which should be just the accept).
+ vector<NFAVertex> tugs;
+ assert(succs.size() == 1);
+ for (auto v : succs) {
+ add_edge(cyclic, v, g);
+ }
+
+ // Wire pos trigger to accept if min repeat is one -- this deals with cases
+ // where we can get a pos and tug trigger on the same byte.
+ if (rsi.repeatMin == depth(1)) {
+ for (auto v : succs) {
+ add_edge(pos_trigger, v, g);
+ g[pos_trigger].reports = g[cyclic].reports;
+ }
+ }
+
+ // Remove the vertices/edges in the subgraph.
+ remove_vertices(rsi.vertices, g, false);
+ erase_all(&depths, rsi.vertices);
+
+ repeats->push_back(BoundedRepeatData(rsi.historyType, rsi.repeatMin,
+ rsi.repeatMax, rsi.minPeriod, cyclic,
+ pos_trigger, tugs));
+}
+
+static
+bool isCompBigEnough(const RepeatGraph &rg, const u32 minRepeat) {
+ // filtered_graph doesn't filter the num_vertices call.
+ size_t n = 0;
+ RepeatGraph::vertex_iterator vi, ve;
+ for (tie(vi, ve) = vertices(rg); vi != ve; ++vi) {
+ if (++n >= minRepeat) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// Marks the subgraph as bad if it can't be handled.
+static
+void reprocessSubgraph(const NGHolder &h, const Grey &grey,
+ ReachSubgraph &rsi) {
+ vector<ReachSubgraph> rs(1, rsi);
+ checkReachSubgraphs(h, rs, grey.minExtBoundedRepeatSize);
+ if (rs.size() != 1) {
+ DEBUG_PRINTF("subgraph split into %zu\n", rs.size());
+ rsi.bad = true;
+ return;
+ }
+
+ rsi = rs.back(); // Potentially modified.
+
+ if (processSubgraph(h, rsi, grey.minExtBoundedRepeatSize)) {
+ DEBUG_PRINTF("reprocessed subgraph is {%s,%s} repeat\n",
+ rsi.repeatMin.str().c_str(), rsi.repeatMax.str().c_str());
+ } else {
+ DEBUG_PRINTF("reprocessed subgraph is bad\n");
+ rsi.bad = true;
+ }
+}
+
+/** Remove vertices from the beginning and end of the vertex set that are
+ * involved in other repeats as a result of earlier repeat transformations. */
+static
+bool peelSubgraph(const NGHolder &g, const Grey &grey, ReachSubgraph &rsi,
const unordered_set<NFAVertex> &created) {
- assert(!rsi.bad);
-
- if (created.empty()) {
- return true;
- }
-
- if (rsi.vertices.empty()) {
- return false;
- }
-
- // Peel involved vertices from the front.
- vector<NFAVertex>::iterator zap = rsi.vertices.end();
- for (auto it = rsi.vertices.begin(), ite = rsi.vertices.end(); it != ite;
- ++it) {
- if (!contains(created, *it)) {
- zap = it;
- break;
- } else {
+ assert(!rsi.bad);
+
+ if (created.empty()) {
+ return true;
+ }
+
+ if (rsi.vertices.empty()) {
+ return false;
+ }
+
+ // Peel involved vertices from the front.
+ vector<NFAVertex>::iterator zap = rsi.vertices.end();
+ for (auto it = rsi.vertices.begin(), ite = rsi.vertices.end(); it != ite;
+ ++it) {
+ if (!contains(created, *it)) {
+ zap = it;
+ break;
+ } else {
DEBUG_PRINTF("%zu is involved in another repeat\n", g[*it].index);
- }
- }
- DEBUG_PRINTF("peeling %zu vertices from front\n",
- distance(rsi.vertices.begin(), zap));
- rsi.vertices.erase(rsi.vertices.begin(), zap);
-
- // Peel involved vertices and vertices with edges to involved vertices from
- // the back; otherwise we may try to transform a POS into a TUG.
- zap = rsi.vertices.begin();
- for (auto it = rsi.vertices.rbegin(), ite = rsi.vertices.rend(); it != ite;
- ++it) {
- if (!contains(created, *it) &&
- !contains_any_of(created, adjacent_vertices(*it, g))) {
- zap = it.base(); // Note: erases everything after it.
- break;
- } else {
+ }
+ }
+ DEBUG_PRINTF("peeling %zu vertices from front\n",
+ distance(rsi.vertices.begin(), zap));
+ rsi.vertices.erase(rsi.vertices.begin(), zap);
+
+ // Peel involved vertices and vertices with edges to involved vertices from
+ // the back; otherwise we may try to transform a POS into a TUG.
+ zap = rsi.vertices.begin();
+ for (auto it = rsi.vertices.rbegin(), ite = rsi.vertices.rend(); it != ite;
+ ++it) {
+ if (!contains(created, *it) &&
+ !contains_any_of(created, adjacent_vertices(*it, g))) {
+ zap = it.base(); // Note: erases everything after it.
+ break;
+ } else {
DEBUG_PRINTF("%zu is involved in another repeat\n", g[*it].index);
- }
- }
- DEBUG_PRINTF("peeling %zu vertices from back\n",
- distance(zap, rsi.vertices.end()));
- rsi.vertices.erase(zap, rsi.vertices.end());
-
- // If vertices in the middle are involved in other repeats, it's a definite
- // no-no.
- for (auto v : rsi.vertices) {
- if (contains(created, v)) {
+ }
+ }
+ DEBUG_PRINTF("peeling %zu vertices from back\n",
+ distance(zap, rsi.vertices.end()));
+ rsi.vertices.erase(zap, rsi.vertices.end());
+
+ // If vertices in the middle are involved in other repeats, it's a definite
+ // no-no.
+ for (auto v : rsi.vertices) {
+ if (contains(created, v)) {
DEBUG_PRINTF("vertex %zu is in another repeat\n", g[v].index);
- return false;
- }
- }
-
- reprocessSubgraph(g, grey, rsi);
- return !rsi.bad;
-}
-
-/** For performance reasons, it's nice not to have an exceptional state right
- * next to a startDs state: that way we can do double-byte accel, whereas
- * otherwise the NEG trigger would limit us to single. This might be a good
- * idea to extend to cyclic states, too. */
-static
-void peelStartDotStar(const NGHolder &g,
+ return false;
+ }
+ }
+
+ reprocessSubgraph(g, grey, rsi);
+ return !rsi.bad;
+}
+
+/** For performance reasons, it's nice not to have an exceptional state right
+ * next to a startDs state: that way we can do double-byte accel, whereas
+ * otherwise the NEG trigger would limit us to single. This might be a good
+ * idea to extend to cyclic states, too. */
+static
+void peelStartDotStar(const NGHolder &g,
const unordered_map<NFAVertex, NFAVertexDepth> &depths,
const Grey &grey, ReachSubgraph &rsi) {
- if (rsi.vertices.size() < 1) {
- return;
- }
-
- NFAVertex first = rsi.vertices.front();
- if (depths.at(first).fromStartDotStar.min == depth(1)) {
+ if (rsi.vertices.size() < 1) {
+ return;
+ }
+
+ NFAVertex first = rsi.vertices.front();
+ if (depths.at(first).fromStartDotStar.min == depth(1)) {
DEBUG_PRINTF("peeling start front vertex %zu\n", g[first].index);
- rsi.vertices.erase(rsi.vertices.begin());
- reprocessSubgraph(g, grey, rsi);
- }
-}
-
-static
-void buildReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs,
- const u32 minNumVertices) {
+ rsi.vertices.erase(rsi.vertices.begin());
+ reprocessSubgraph(g, grey, rsi);
+ }
+}
+
+static
+void buildReachSubgraphs(const NGHolder &g, vector<ReachSubgraph> &rs,
+ const u32 minNumVertices) {
const ReachFilter<NGHolder> fil(&g);
const RepeatGraph rg(g, fil, fil);
-
- if (!isCompBigEnough(rg, minNumVertices)) {
- DEBUG_PRINTF("component not big enough, bailing\n");
- return;
- }
-
+
+ if (!isCompBigEnough(rg, minNumVertices)) {
+ DEBUG_PRINTF("component not big enough, bailing\n");
+ return;
+ }
+
const auto ug = make_undirected_graph(rg);
-
+
unordered_map<NFAVertex, u32> repeatMap;
-
- unsigned int num;
- num = connected_components(ug, make_assoc_property_map(repeatMap));
- DEBUG_PRINTF("found %u connected repeat components\n", num);
-
- // Now, we build a set of topo-ordered ReachSubgraphs.
+
+ unsigned int num;
+ num = connected_components(ug, make_assoc_property_map(repeatMap));
+ DEBUG_PRINTF("found %u connected repeat components\n", num);
+
+ // Now, we build a set of topo-ordered ReachSubgraphs.
vector<NFAVertex> topoOrder = buildTopoOrder(rg);
-
- rs.resize(num);
-
- for (auto v : topoOrder) {
+
+ rs.resize(num);
+
+ for (auto v : topoOrder) {
auto rit = repeatMap.find(v);
- if (rit == repeatMap.end()) {
- continue; /* not part of a repeat */
- }
- u32 comp_id = rit->second;
- assert(comp_id < num);
- rs[comp_id].vertices.push_back(v);
- }
-
-#ifdef DEBUG
- for (size_t i = 0; i < rs.size(); i++) {
- DEBUG_PRINTF("rs %zu has %zu vertices.\n", i, rs[i].vertices.size());
- }
-#endif
-}
-
-static
-bool hasSkipEdges(const NGHolder &g, const ReachSubgraph &rsi) {
- assert(!rsi.vertices.empty());
-
- const NFAVertex first = rsi.vertices.front();
- const NFAVertex last = rsi.vertices.back();
-
- // All of the preds of first must have edges to all the successors of last.
- for (auto u : inv_adjacent_vertices_range(first, g)) {
- for (auto v : adjacent_vertices_range(last, g)) {
- if (!edge(u, v, g).second) {
- return false;
- }
- }
- }
-
- return true;
-}
-
-/* depth info is valid as calculated at entry */
-static
-bool entered_at_fixed_offset(NFAVertex v, const NGHolder &g,
+ if (rit == repeatMap.end()) {
+ continue; /* not part of a repeat */
+ }
+ u32 comp_id = rit->second;
+ assert(comp_id < num);
+ rs[comp_id].vertices.push_back(v);
+ }
+
+#ifdef DEBUG
+ for (size_t i = 0; i < rs.size(); i++) {
+ DEBUG_PRINTF("rs %zu has %zu vertices.\n", i, rs[i].vertices.size());
+ }
+#endif
+}
+
+static
+bool hasSkipEdges(const NGHolder &g, const ReachSubgraph &rsi) {
+ assert(!rsi.vertices.empty());
+
+ const NFAVertex first = rsi.vertices.front();
+ const NFAVertex last = rsi.vertices.back();
+
+ // All of the preds of first must have edges to all the successors of last.
+ for (auto u : inv_adjacent_vertices_range(first, g)) {
+ for (auto v : adjacent_vertices_range(last, g)) {
+ if (!edge(u, v, g).second) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+/* depth info is valid as calculated at entry */
+static
+bool entered_at_fixed_offset(NFAVertex v, const NGHolder &g,
const unordered_map<NFAVertex, NFAVertexDepth> &depths,
const unordered_set<NFAVertex> &reached_by_fixed_tops) {
- DEBUG_PRINTF("|reached_by_fixed_tops| %zu\n",
- reached_by_fixed_tops.size());
- if (is_triggered(g) && !contains(reached_by_fixed_tops, v)) {
- /* can't do this for infix/suffixes unless we know trigger literals
- * can only occur at one offset */
+ DEBUG_PRINTF("|reached_by_fixed_tops| %zu\n",
+ reached_by_fixed_tops.size());
+ if (is_triggered(g) && !contains(reached_by_fixed_tops, v)) {
+ /* can't do this for infix/suffixes unless we know trigger literals
+ * can only occur at one offset */
DEBUG_PRINTF("bad top(s) for %zu\n", g[v].index);
- return false;
- }
-
- if (depths.at(v).fromStartDotStar.min.is_reachable()) {
- DEBUG_PRINTF("reachable from startDs\n");
- return false;
- }
-
- /* look at preds as v may be cyclic */
- const depth &first = depths.at(v).fromStart.min;
- assert(first.is_reachable());
- if (!first.is_finite()) {
- DEBUG_PRINTF("first not finite\n");
- return false;
- }
- DEBUG_PRINTF("first is at least %s from start\n", first.str().c_str());
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- const depth &u_max_depth = depths.at(u).fromStart.max;
+ return false;
+ }
+
+ if (depths.at(v).fromStartDotStar.min.is_reachable()) {
+ DEBUG_PRINTF("reachable from startDs\n");
+ return false;
+ }
+
+ /* look at preds as v may be cyclic */
+ const depth &first = depths.at(v).fromStart.min;
+ assert(first.is_reachable());
+ if (!first.is_finite()) {
+ DEBUG_PRINTF("first not finite\n");
+ return false;
+ }
+ DEBUG_PRINTF("first is at least %s from start\n", first.str().c_str());
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ const depth &u_max_depth = depths.at(u).fromStart.max;
DEBUG_PRINTF("pred %zu max depth %s from start\n", g[u].index,
u_max_depth.str().c_str());
- if (u_max_depth != first - depth(1)) {
- return false;
- }
- }
-
- return true;
-}
-
-static
-NFAVertex buildTriggerStates(NGHolder &g, const vector<CharReach> &trigger,
- u32 top) {
- NFAVertex u = g.start;
- for (const auto &cr : trigger) {
- NFAVertex v = add_vertex(g);
- g[v].char_reach = cr;
- add_edge(u, v, g);
- if (u == g.start) {
+ if (u_max_depth != first - depth(1)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static
+NFAVertex buildTriggerStates(NGHolder &g, const vector<CharReach> &trigger,
+ u32 top) {
+ NFAVertex u = g.start;
+ for (const auto &cr : trigger) {
+ NFAVertex v = add_vertex(g);
+ g[v].char_reach = cr;
+ add_edge(u, v, g);
+ if (u == g.start) {
g[edge(u, v, g)].tops.insert(top);
- }
- u = v;
- }
-
+ }
+ u = v;
+ }
+
DEBUG_PRINTF("trigger len=%zu has sink %zu\n", trigger.size(), g[u].index);
- return u;
-}
-
-/**
- * For triggered graphs, replace the "top" edges from start with the triggers
- * they represent, for the purposes of determining sole entry.
- */
-static
-void addTriggers(NGHolder &g,
- const map<u32, vector<vector<CharReach>>> &triggers) {
- if (!is_triggered(g)) {
- assert(triggers.empty());
- return;
- }
-
- vector<NFAEdge> dead;
- map<u32, vector<NFAVertex>> starts_by_top;
-
- for (const auto &e : out_edges_range(g.start, g)) {
- const NFAVertex &v = target(e, g);
- if (v == g.startDs) {
- continue;
- }
-
+ return u;
+}
+
+/**
+ * For triggered graphs, replace the "top" edges from start with the triggers
+ * they represent, for the purposes of determining sole entry.
+ */
+static
+void addTriggers(NGHolder &g,
+ const map<u32, vector<vector<CharReach>>> &triggers) {
+ if (!is_triggered(g)) {
+ assert(triggers.empty());
+ return;
+ }
+
+ vector<NFAEdge> dead;
+ map<u32, vector<NFAVertex>> starts_by_top;
+
+ for (const auto &e : out_edges_range(g.start, g)) {
+ const NFAVertex &v = target(e, g);
+ if (v == g.startDs) {
+ continue;
+ }
+
const auto &tops = g[e].tops;
-
- // The caller may not have given us complete trigger information. If we
- // don't have any triggers for a particular top, we should just leave
- // it alone.
+
+ // The caller may not have given us complete trigger information. If we
+ // don't have any triggers for a particular top, we should just leave
+ // it alone.
for (u32 top : tops) {
if (!contains(triggers, top)) {
DEBUG_PRINTF("no triggers for top %u\n", top);
@@ -1177,946 +1177,946 @@ void addTriggers(NGHolder &g,
}
starts_by_top[top].push_back(v);
- }
- dead.push_back(e);
+ }
+ dead.push_back(e);
next_edge:;
- }
-
- remove_edges(dead, g);
-
- for (const auto &m : starts_by_top) {
- const auto &top = m.first;
- const auto &starts = m.second;
-
- assert(contains(triggers, top));
- const auto &top_triggers = triggers.at(top);
-
- for (const auto &trigger : top_triggers) {
- NFAVertex u = buildTriggerStates(g, trigger, top);
- for (const auto &v : starts) {
- add_edge_if_not_present(u, v, g);
- }
- }
- }
-}
-
-static
-CharReach predReach(const NGHolder &g, NFAVertex v) {
- CharReach cr;
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- cr |= g[u].char_reach;
- }
- return cr;
-}
-
-/**
- * Filter the given vertex map (which maps from vertices in another graph to
- * vertices in subg) so that it only contains vertices that actually exist in
- * subg.
- */
-static
-void filterMap(const NGHolder &subg,
+ }
+
+ remove_edges(dead, g);
+
+ for (const auto &m : starts_by_top) {
+ const auto &top = m.first;
+ const auto &starts = m.second;
+
+ assert(contains(triggers, top));
+ const auto &top_triggers = triggers.at(top);
+
+ for (const auto &trigger : top_triggers) {
+ NFAVertex u = buildTriggerStates(g, trigger, top);
+ for (const auto &v : starts) {
+ add_edge_if_not_present(u, v, g);
+ }
+ }
+ }
+}
+
+static
+CharReach predReach(const NGHolder &g, NFAVertex v) {
+ CharReach cr;
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ cr |= g[u].char_reach;
+ }
+ return cr;
+}
+
+/**
+ * Filter the given vertex map (which maps from vertices in another graph to
+ * vertices in subg) so that it only contains vertices that actually exist in
+ * subg.
+ */
+static
+void filterMap(const NGHolder &subg,
unordered_map<NFAVertex, NFAVertex> &vmap) {
NGHolder::vertex_iterator vi, ve;
- tie(vi, ve) = vertices(subg);
+ tie(vi, ve) = vertices(subg);
const unordered_set<NFAVertex> remaining_verts(vi, ve);
-
+
unordered_map<NFAVertex, NFAVertex> fmap; // filtered map
-
- for (const auto &m : vmap) {
- if (contains(remaining_verts, m.second)) {
- fmap.insert(m);
- }
- }
-
- vmap.swap(fmap);
-}
-
-/** Construct a graph for sole entry analysis that only considers paths through
- * the bounded repeat. */
-static
-void buildRepeatGraph(NGHolder &rg,
+
+ for (const auto &m : vmap) {
+ if (contains(remaining_verts, m.second)) {
+ fmap.insert(m);
+ }
+ }
+
+ vmap.swap(fmap);
+}
+
+/** Construct a graph for sole entry analysis that only considers paths through
+ * the bounded repeat. */
+static
+void buildRepeatGraph(NGHolder &rg,
unordered_map<NFAVertex, NFAVertex> &rg_map,
- const NGHolder &g, const ReachSubgraph &rsi,
- const map<u32, vector<vector<CharReach>>> &triggers) {
- cloneHolder(rg, g, &rg_map);
- assert(rg.kind == g.kind);
-
- clear_in_edges(rg.accept, rg);
- clear_in_edges(rg.acceptEod, rg);
- add_edge(rg.accept, rg.acceptEod, rg);
-
- // Find the set of vertices in rg involved in the repeat.
+ const NGHolder &g, const ReachSubgraph &rsi,
+ const map<u32, vector<vector<CharReach>>> &triggers) {
+ cloneHolder(rg, g, &rg_map);
+ assert(rg.kind == g.kind);
+
+ clear_in_edges(rg.accept, rg);
+ clear_in_edges(rg.acceptEod, rg);
+ add_edge(rg.accept, rg.acceptEod, rg);
+
+ // Find the set of vertices in rg involved in the repeat.
unordered_set<NFAVertex> rg_involved;
- for (const auto &v : rsi.vertices) {
- assert(contains(rg_map, v));
- rg_involved.insert(rg_map.at(v));
- }
-
- // Remove all out-edges from repeat vertices that aren't to other repeat
- // vertices, then connect terminal repeat vertices to accept.
- for (const auto &v : rsi.vertices) {
- NFAVertex rv = rg_map.at(v);
- remove_out_edge_if(rv, [&](const NFAEdge &e) {
- return !contains(rg_involved, target(e, rg));
- }, rg);
- if (!has_successor(rv, rg)) { // no interior out-edges
- add_edge(rv, rg.accept, rg);
- }
- }
-
- pruneUseless(rg);
-
- if (is_triggered(rg)) {
- // Add vertices for all our triggers
- addTriggers(rg, triggers);
+ for (const auto &v : rsi.vertices) {
+ assert(contains(rg_map, v));
+ rg_involved.insert(rg_map.at(v));
+ }
+
+ // Remove all out-edges from repeat vertices that aren't to other repeat
+ // vertices, then connect terminal repeat vertices to accept.
+ for (const auto &v : rsi.vertices) {
+ NFAVertex rv = rg_map.at(v);
+ remove_out_edge_if(rv, [&](const NFAEdge &e) {
+ return !contains(rg_involved, target(e, rg));
+ }, rg);
+ if (!has_successor(rv, rg)) { // no interior out-edges
+ add_edge(rv, rg.accept, rg);
+ }
+ }
+
+ pruneUseless(rg);
+
+ if (is_triggered(rg)) {
+ // Add vertices for all our triggers
+ addTriggers(rg, triggers);
renumber_vertices(rg);
-
- // We don't know anything about how often this graph is triggered, so we
- // make the start vertex cyclic for the purposes of this analysis ONLY.
- add_edge(rg.start, rg.start, rg);
- }
-
- filterMap(rg, rg_map);
-
- // All of our repeat vertices should have vertices in rg.
- assert(all_of(begin(rsi.vertices), end(rsi.vertices),
- [&](const NFAVertex &v) { return contains(rg_map, v); }));
-}
-
-/**
- * Construct an input DAG which accepts on all entries to the repeat.
- */
-static
-void buildInputGraph(NGHolder &lhs,
+
+ // We don't know anything about how often this graph is triggered, so we
+ // make the start vertex cyclic for the purposes of this analysis ONLY.
+ add_edge(rg.start, rg.start, rg);
+ }
+
+ filterMap(rg, rg_map);
+
+ // All of our repeat vertices should have vertices in rg.
+ assert(all_of(begin(rsi.vertices), end(rsi.vertices),
+ [&](const NFAVertex &v) { return contains(rg_map, v); }));
+}
+
+/**
+ * Construct an input DAG which accepts on all entries to the repeat.
+ */
+static
+void buildInputGraph(NGHolder &lhs,
unordered_map<NFAVertex, NFAVertex> &lhs_map,
- const NGHolder &g, const NFAVertex first,
- const map<u32, vector<vector<CharReach>>> &triggers) {
+ const NGHolder &g, const NFAVertex first,
+ const map<u32, vector<vector<CharReach>>> &triggers) {
DEBUG_PRINTF("building lhs with first=%zu\n", g[first].index);
- cloneHolder(lhs, g, &lhs_map);
- assert(g.kind == lhs.kind);
- addTriggers(lhs, triggers);
+ cloneHolder(lhs, g, &lhs_map);
+ assert(g.kind == lhs.kind);
+ addTriggers(lhs, triggers);
renumber_vertices(lhs);
-
- // Replace each back-edge (u,v) with an edge (startDs,v), which will
- // generate entries at at least the rate of the loop created by that
- // back-edge.
- set<NFAEdge> dead;
- BackEdges<set<NFAEdge> > backEdgeVisitor(dead);
+
+ // Replace each back-edge (u,v) with an edge (startDs,v), which will
+ // generate entries at at least the rate of the loop created by that
+ // back-edge.
+ set<NFAEdge> dead;
+ BackEdges<set<NFAEdge> > backEdgeVisitor(dead);
depth_first_search(lhs, visitor(backEdgeVisitor).root_vertex(lhs.start));
- for (const auto &e : dead) {
- const NFAVertex u = source(e, lhs), v = target(e, lhs);
- if (u == v) {
- continue; // Self-loops are OK.
- }
-
+ for (const auto &e : dead) {
+ const NFAVertex u = source(e, lhs), v = target(e, lhs);
+ if (u == v) {
+ continue; // Self-loops are OK.
+ }
+
DEBUG_PRINTF("replacing back-edge (%zu,%zu) with edge (startDs,%zu)\n",
lhs[u].index, lhs[v].index, lhs[v].index);
-
- add_edge_if_not_present(lhs.startDs, v, lhs);
- remove_edge(e, lhs);
- }
-
- clear_in_edges(lhs.accept, lhs);
- clear_in_edges(lhs.acceptEod, lhs);
- add_edge(lhs.accept, lhs.acceptEod, lhs);
-
- // Wire the predecessors of the first repeat vertex to accept, then prune.
- NFAVertex lhs_first = lhs_map.at(first);
- for (auto u : inv_adjacent_vertices_range(lhs_first, lhs)) {
- add_edge_if_not_present(u, lhs.accept, lhs);
- }
-
- pruneUseless(lhs);
- filterMap(lhs, lhs_map);
-}
-
-/**
- * Maximum number of vertices in the input DAG to actually allow sole entry
- * calculation (as very large cases make sentClearsTail take a long, long time
- * to complete.)
- */
-static const size_t MAX_SOLE_ENTRY_VERTICES = 10000;
-
-/** True if (1) fixed offset or (2) reentries to this subgraph must involve a
- * character which escapes the repeat, meaning that we only need to store a
- * single offset at runtime. See UE-1361. */
-static
-bool hasSoleEntry(const NGHolder &g, const ReachSubgraph &rsi,
+
+ add_edge_if_not_present(lhs.startDs, v, lhs);
+ remove_edge(e, lhs);
+ }
+
+ clear_in_edges(lhs.accept, lhs);
+ clear_in_edges(lhs.acceptEod, lhs);
+ add_edge(lhs.accept, lhs.acceptEod, lhs);
+
+ // Wire the predecessors of the first repeat vertex to accept, then prune.
+ NFAVertex lhs_first = lhs_map.at(first);
+ for (auto u : inv_adjacent_vertices_range(lhs_first, lhs)) {
+ add_edge_if_not_present(u, lhs.accept, lhs);
+ }
+
+ pruneUseless(lhs);
+ filterMap(lhs, lhs_map);
+}
+
+/**
+ * Maximum number of vertices in the input DAG to actually allow sole entry
+ * calculation (as very large cases make sentClearsTail take a long, long time
+ * to complete.)
+ */
+static const size_t MAX_SOLE_ENTRY_VERTICES = 10000;
+
+/** True if (1) fixed offset or (2) reentries to this subgraph must involve a
+ * character which escapes the repeat, meaning that we only need to store a
+ * single offset at runtime. See UE-1361. */
+static
+bool hasSoleEntry(const NGHolder &g, const ReachSubgraph &rsi,
const unordered_map<NFAVertex, NFAVertexDepth> &depths,
const unordered_set<NFAVertex> &reached_by_fixed_tops,
- const map<u32, vector<vector<CharReach>>> &triggers) {
- DEBUG_PRINTF("checking repeat {%s,%s}\n", rsi.repeatMin.str().c_str(),
- rsi.repeatMax.str().c_str());
- NFAVertex first = rsi.vertices.front();
- const CharReach &repeatReach = g[first].char_reach;
-
- /* trivial case first is at a fixed depth */
- if (entered_at_fixed_offset(first, g, depths, reached_by_fixed_tops)) {
- DEBUG_PRINTF("fixed depth\n");
- return true;
- }
-
- DEBUG_PRINTF("repeat reach is %s\n", describeClass(repeatReach).c_str());
-
- // Nothing can escape a dot repeat.
- if (repeatReach.all()) {
- DEBUG_PRINTF("dot repeat cannot be escaped\n");
- return false;
- }
-
- // Another easy case: if the union of the reach of all entries to the
- // repeat will always escape the repeat, we have sole entry.
- if (predReach(g, first).isSubsetOf(~repeatReach)) {
- DEBUG_PRINTF("pred reach %s, which is subset of repeat escape\n",
- describeClass(predReach(g, first)).c_str());
- return true;
- }
-
- NGHolder rg;
+ const map<u32, vector<vector<CharReach>>> &triggers) {
+ DEBUG_PRINTF("checking repeat {%s,%s}\n", rsi.repeatMin.str().c_str(),
+ rsi.repeatMax.str().c_str());
+ NFAVertex first = rsi.vertices.front();
+ const CharReach &repeatReach = g[first].char_reach;
+
+ /* trivial case first is at a fixed depth */
+ if (entered_at_fixed_offset(first, g, depths, reached_by_fixed_tops)) {
+ DEBUG_PRINTF("fixed depth\n");
+ return true;
+ }
+
+ DEBUG_PRINTF("repeat reach is %s\n", describeClass(repeatReach).c_str());
+
+ // Nothing can escape a dot repeat.
+ if (repeatReach.all()) {
+ DEBUG_PRINTF("dot repeat cannot be escaped\n");
+ return false;
+ }
+
+ // Another easy case: if the union of the reach of all entries to the
+ // repeat will always escape the repeat, we have sole entry.
+ if (predReach(g, first).isSubsetOf(~repeatReach)) {
+ DEBUG_PRINTF("pred reach %s, which is subset of repeat escape\n",
+ describeClass(predReach(g, first)).c_str());
+ return true;
+ }
+
+ NGHolder rg;
unordered_map<NFAVertex, NFAVertex> rg_map;
- buildRepeatGraph(rg, rg_map, g, rsi, triggers);
- assert(rg.kind == g.kind);
-
- NGHolder lhs;
+ buildRepeatGraph(rg, rg_map, g, rsi, triggers);
+ assert(rg.kind == g.kind);
+
+ NGHolder lhs;
unordered_map<NFAVertex, NFAVertex> lhs_map;
- buildInputGraph(lhs, lhs_map, g, first, triggers);
- assert(lhs.kind == g.kind);
-
- if (num_vertices(lhs) > MAX_SOLE_ENTRY_VERTICES) {
- DEBUG_PRINTF("too many vertices (%zu) for sole entry test.\n",
- num_vertices(lhs));
- return false;
- }
-
- // Split the repeat graph into two regions: vertices in the LHS input DAG
- // are in one region, vertices in the bounded repeat are in another.
- const u32 lhs_region = 1;
- const u32 repeat_region = 2;
+ buildInputGraph(lhs, lhs_map, g, first, triggers);
+ assert(lhs.kind == g.kind);
+
+ if (num_vertices(lhs) > MAX_SOLE_ENTRY_VERTICES) {
+ DEBUG_PRINTF("too many vertices (%zu) for sole entry test.\n",
+ num_vertices(lhs));
+ return false;
+ }
+
+ // Split the repeat graph into two regions: vertices in the LHS input DAG
+ // are in one region, vertices in the bounded repeat are in another.
+ const u32 lhs_region = 1;
+ const u32 repeat_region = 2;
unordered_map<NFAVertex, u32> region_map;
-
- for (const auto &v : rsi.vertices) {
- assert(!is_special(v, g)); // no specials in repeats
- assert(contains(rg_map, v));
+
+ for (const auto &v : rsi.vertices) {
+ assert(!is_special(v, g)); // no specials in repeats
+ assert(contains(rg_map, v));
DEBUG_PRINTF("rg vertex %zu in repeat\n", rg[rg_map.at(v)].index);
- region_map.emplace(rg_map.at(v), repeat_region);
- }
-
- for (const auto &v : vertices_range(rg)) {
- if (!contains(region_map, v)) {
+ region_map.emplace(rg_map.at(v), repeat_region);
+ }
+
+ for (const auto &v : vertices_range(rg)) {
+ if (!contains(region_map, v)) {
DEBUG_PRINTF("rg vertex %zu in lhs (trigger)\n", rg[v].index);
- region_map.emplace(v, lhs_region);
- }
- }
-
- u32 bad_region = 0;
- if (sentClearsTail(rg, region_map, lhs, lhs_region, &bad_region)) {
- DEBUG_PRINTF("input dag clears repeat: sole entry\n");
- return true;
- }
-
- DEBUG_PRINTF("not sole entry\n");
- return false;
-}
-
-namespace {
-
-template<class Graph>
-struct StrawWalker {
- StrawWalker(const NGHolder &h_in, const Graph &g_in,
- const vector<BoundedRepeatData> &all_repeats)
- : h(h_in), g(g_in), repeats(all_repeats) {}
-
- /** True if v is a cyclic that belongs to a bounded repeat (one without an
- * inf max bound). */
- bool isBoundedRepeatCyclic(NFAVertex v) const {
- for (const auto &r : repeats) {
- if (r.repeatMax.is_finite() && r.cyclic == v) {
- return true;
- }
- }
- return false;
- }
-
- NFAVertex step(NFAVertex v) const {
- typename Graph::adjacency_iterator ai, ae;
- tie(ai, ae) = adjacent_vertices(v, g);
- assert(ai != ae);
- NFAVertex next = *ai;
- if (next == v) { // Ignore self loop.
- ++ai;
- if (ai == ae) {
+ region_map.emplace(v, lhs_region);
+ }
+ }
+
+ u32 bad_region = 0;
+ if (sentClearsTail(rg, region_map, lhs, lhs_region, &bad_region)) {
+ DEBUG_PRINTF("input dag clears repeat: sole entry\n");
+ return true;
+ }
+
+ DEBUG_PRINTF("not sole entry\n");
+ return false;
+}
+
+namespace {
+
+template<class Graph>
+struct StrawWalker {
+ StrawWalker(const NGHolder &h_in, const Graph &g_in,
+ const vector<BoundedRepeatData> &all_repeats)
+ : h(h_in), g(g_in), repeats(all_repeats) {}
+
+ /** True if v is a cyclic that belongs to a bounded repeat (one without an
+ * inf max bound). */
+ bool isBoundedRepeatCyclic(NFAVertex v) const {
+ for (const auto &r : repeats) {
+ if (r.repeatMax.is_finite() && r.cyclic == v) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ NFAVertex step(NFAVertex v) const {
+ typename Graph::adjacency_iterator ai, ae;
+ tie(ai, ae) = adjacent_vertices(v, g);
+ assert(ai != ae);
+ NFAVertex next = *ai;
+ if (next == v) { // Ignore self loop.
+ ++ai;
+ if (ai == ae) {
return NGHolder::null_vertex();
- }
- next = *ai;
- }
- ++ai;
- if (ai != ae && *ai == v) { // Ignore self loop
- ++ai;
- }
- if (ai != ae) {
- DEBUG_PRINTF("more than one succ\n");
- set<NFAVertex> succs;
- insert(&succs, adjacent_vertices(v, g));
- succs.erase(v);
- for (tie(ai, ae) = adjacent_vertices(v, g); ai != ae; ++ai) {
- next = *ai;
+ }
+ next = *ai;
+ }
+ ++ai;
+ if (ai != ae && *ai == v) { // Ignore self loop
+ ++ai;
+ }
+ if (ai != ae) {
+ DEBUG_PRINTF("more than one succ\n");
+ set<NFAVertex> succs;
+ insert(&succs, adjacent_vertices(v, g));
+ succs.erase(v);
+ for (tie(ai, ae) = adjacent_vertices(v, g); ai != ae; ++ai) {
+ next = *ai;
DEBUG_PRINTF("checking %zu\n", g[next].index);
- if (next == v) {
- continue;
- }
- set<NFAVertex> lsuccs;
- insert(&lsuccs, adjacent_vertices(next, g));
-
- if (lsuccs != succs) {
- continue;
- }
-
- // Ensure that if v is in connected to accept, the reports
- // on `next` much match.
- if (is_match_vertex(v, h) && g[v].reports != g[next].reports) {
- DEBUG_PRINTF("report mismatch\n");
- continue;
- }
-
- return next;
- }
- DEBUG_PRINTF("bailing\n");
+ if (next == v) {
+ continue;
+ }
+ set<NFAVertex> lsuccs;
+ insert(&lsuccs, adjacent_vertices(next, g));
+
+ if (lsuccs != succs) {
+ continue;
+ }
+
+ // Ensure that if v is in connected to accept, the reports
+ // on `next` much match.
+ if (is_match_vertex(v, h) && g[v].reports != g[next].reports) {
+ DEBUG_PRINTF("report mismatch\n");
+ continue;
+ }
+
+ return next;
+ }
+ DEBUG_PRINTF("bailing\n");
return NGHolder::null_vertex();
- }
- return next;
- }
-
- NFAVertex walk(NFAVertex v, vector<NFAVertex> &straw) const {
+ }
+ return next;
+ }
+
+ NFAVertex walk(NFAVertex v, vector<NFAVertex> &straw) const {
DEBUG_PRINTF("walk from %zu\n", g[v].index);
unordered_set<NFAVertex> visited;
- straw.clear();
-
- while (!is_special(v, g)) {
+ straw.clear();
+
+ while (!is_special(v, g)) {
DEBUG_PRINTF("checking %zu\n", g[v].index);
- NFAVertex next = step(v);
+ NFAVertex next = step(v);
if (next == NGHolder::null_vertex()) {
- break;
- }
- if (!visited.insert(next).second) {
+ break;
+ }
+ if (!visited.insert(next).second) {
DEBUG_PRINTF("already visited %zu, bailing\n", g[next].index);
- break; /* don't want to get stuck in any complicated loops */
- }
-
- const CharReach &reach_v = g[v].char_reach;
- const CharReach &reach_next = g[next].char_reach;
- if (!reach_v.isSubsetOf(reach_next)) {
+ break; /* don't want to get stuck in any complicated loops */
+ }
+
+ const CharReach &reach_v = g[v].char_reach;
+ const CharReach &reach_next = g[next].char_reach;
+ if (!reach_v.isSubsetOf(reach_next)) {
DEBUG_PRINTF("%zu's reach is not a superset of %zu's\n",
- g[next].index, g[v].index);
- break;
- }
-
- // If this is cyclic with the right reach, we're done. Note that
- // startDs fulfils this requirement.
- if (hasSelfLoop(next, g) && !isBoundedRepeatCyclic(next)) {
+ g[next].index, g[v].index);
+ break;
+ }
+
+ // If this is cyclic with the right reach, we're done. Note that
+ // startDs fulfils this requirement.
+ if (hasSelfLoop(next, g) && !isBoundedRepeatCyclic(next)) {
DEBUG_PRINTF("found cyclic %zu\n", g[next].index);
- return next;
- }
-
- v = next;
- straw.push_back(v);
- }
-
- straw.clear();
+ return next;
+ }
+
+ v = next;
+ straw.push_back(v);
+ }
+
+ straw.clear();
return NGHolder::null_vertex();
- }
-
-private:
- const NGHolder &h; // underlying graph
- const Graph &g;
- const vector<BoundedRepeatData> &repeats;
-};
-
-} // namespace
-
-static
-NFAVertex walkStrawToCyclicRev(const NGHolder &g, NFAVertex v,
- const vector<BoundedRepeatData> &all_repeats,
- vector<NFAVertex> &straw) {
+ }
+
+private:
+ const NGHolder &h; // underlying graph
+ const Graph &g;
+ const vector<BoundedRepeatData> &repeats;
+};
+
+} // namespace
+
+static
+NFAVertex walkStrawToCyclicRev(const NGHolder &g, NFAVertex v,
+ const vector<BoundedRepeatData> &all_repeats,
+ vector<NFAVertex> &straw) {
typedef boost::reverse_graph<NGHolder, const NGHolder &> RevGraph;
const RevGraph revg(g);
-
- auto cyclic = StrawWalker<RevGraph>(g, revg, all_repeats).walk(v, straw);
- reverse(begin(straw), end(straw)); // path comes from cyclic
- return cyclic;
-}
-
-static
-NFAVertex walkStrawToCyclicFwd(const NGHolder &g, NFAVertex v,
- const vector<BoundedRepeatData> &all_repeats,
- vector<NFAVertex> &straw) {
+
+ auto cyclic = StrawWalker<RevGraph>(g, revg, all_repeats).walk(v, straw);
+ reverse(begin(straw), end(straw)); // path comes from cyclic
+ return cyclic;
+}
+
+static
+NFAVertex walkStrawToCyclicFwd(const NGHolder &g, NFAVertex v,
+ const vector<BoundedRepeatData> &all_repeats,
+ vector<NFAVertex> &straw) {
return StrawWalker<NGHolder>(g, g, all_repeats).walk(v, straw);
-}
-
-/** True if entries to this subgraph must pass through a cyclic state with
- * reachability that is a superset of the reach of the repeat, and
- * reachabilities along this path "nest" into the reaches of their
- * predecessors.
- *
- * This is what is called a 'straw' in the region code. */
-static
-bool hasCyclicSupersetEntryPath(const NGHolder &g, const ReachSubgraph &rsi,
- const vector<BoundedRepeatData> &all_repeats) {
- // Cope with peeling by following a chain of single vertices backwards
- // until we encounter our cyclic, all of which must have superset reach.
- vector<NFAVertex> straw;
- return walkStrawToCyclicRev(g, rsi.vertices.front(), all_repeats, straw) !=
+}
+
+/** True if entries to this subgraph must pass through a cyclic state with
+ * reachability that is a superset of the reach of the repeat, and
+ * reachabilities along this path "nest" into the reaches of their
+ * predecessors.
+ *
+ * This is what is called a 'straw' in the region code. */
+static
+bool hasCyclicSupersetEntryPath(const NGHolder &g, const ReachSubgraph &rsi,
+ const vector<BoundedRepeatData> &all_repeats) {
+ // Cope with peeling by following a chain of single vertices backwards
+ // until we encounter our cyclic, all of which must have superset reach.
+ vector<NFAVertex> straw;
+ return walkStrawToCyclicRev(g, rsi.vertices.front(), all_repeats, straw) !=
NGHolder::null_vertex();
-}
-
-static
-bool hasCyclicSupersetExitPath(const NGHolder &g, const ReachSubgraph &rsi,
- const vector<BoundedRepeatData> &all_repeats) {
- vector<NFAVertex> straw;
- return walkStrawToCyclicFwd(g, rsi.vertices.back(), all_repeats, straw) !=
+}
+
+static
+bool hasCyclicSupersetExitPath(const NGHolder &g, const ReachSubgraph &rsi,
+ const vector<BoundedRepeatData> &all_repeats) {
+ vector<NFAVertex> straw;
+ return walkStrawToCyclicFwd(g, rsi.vertices.back(), all_repeats, straw) !=
NGHolder::null_vertex();
-}
-
-static
-bool leadsOnlyToAccept(const NGHolder &g, const ReachSubgraph &rsi) {
- const NFAVertex u = rsi.vertices.back();
- for (auto v : adjacent_vertices_range(u, g)) {
- if (v != g.accept) {
- return false;
- }
- }
- assert(out_degree(u, g));
- return true;
-}
-
-static
-bool allSimpleHighlander(const ReportManager &rm,
- const flat_set<ReportID> &reports) {
- assert(!reports.empty());
- for (auto report : reports) {
- if (!isSimpleExhaustible(rm.getReport(report))) {
- return false;
- }
- }
-
- return true;
-}
-
-// Finds a single, fairly unrefined trigger for the repeat by walking backwards
-// and collecting the unioned reach at each step.
-static
-vector<CharReach> getUnionedTrigger(const NGHolder &g, const NFAVertex v) {
- const size_t MAX_TRIGGER_STEPS = 32;
-
- vector<CharReach> trigger;
-
+}
+
+static
+bool leadsOnlyToAccept(const NGHolder &g, const ReachSubgraph &rsi) {
+ const NFAVertex u = rsi.vertices.back();
+ for (auto v : adjacent_vertices_range(u, g)) {
+ if (v != g.accept) {
+ return false;
+ }
+ }
+ assert(out_degree(u, g));
+ return true;
+}
+
+static
+bool allSimpleHighlander(const ReportManager &rm,
+ const flat_set<ReportID> &reports) {
+ assert(!reports.empty());
+ for (auto report : reports) {
+ if (!isSimpleExhaustible(rm.getReport(report))) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Finds a single, fairly unrefined trigger for the repeat by walking backwards
+// and collecting the unioned reach at each step.
+static
+vector<CharReach> getUnionedTrigger(const NGHolder &g, const NFAVertex v) {
+ const size_t MAX_TRIGGER_STEPS = 32;
+
+ vector<CharReach> trigger;
+
flat_set<NFAVertex> curr, next;
- insert(&curr, inv_adjacent_vertices(v, g));
-
- if (contains(curr, g.start)) {
- DEBUG_PRINTF("start in repeat's immediate preds\n");
- trigger.push_back(CharReach::dot()); // Trigger could be anything!
- return trigger;
- }
-
- for (size_t num_steps = 0; num_steps < MAX_TRIGGER_STEPS; num_steps++) {
- next.clear();
- trigger.push_back(CharReach());
- CharReach &cr = trigger.back();
-
- for (auto v_c : curr) {
- cr |= g[v_c].char_reach;
- insert(&next, inv_adjacent_vertices(v_c, g));
- }
-
- DEBUG_PRINTF("cr[%zu]=%s\n", num_steps, describeClass(cr).c_str());
-
- if (next.empty() || contains(next, g.start)) {
- break;
- }
-
- curr.swap(next);
- }
-
- reverse(trigger.begin(), trigger.end());
- return trigger;
-}
-
-static
-vector<vector<CharReach>> getRepeatTriggers(const NGHolder &g,
- const NFAVertex sink) {
- const size_t MAX_TRIGGER_STEPS = 32;
- const size_t UNIONED_FALLBACK_THRESHOLD = 100;
-
- using Path = deque<NFAVertex>;
-
- vector<vector<CharReach>> triggers;
-
- deque<Path> q; // work queue
- deque<Path> done; // finished paths
-
- size_t max_len = MAX_TRIGGER_STEPS;
-
- // Find a set of paths leading to vertex v by depth first search.
-
- for (auto u : inv_adjacent_vertices_range(sink, g)) {
- if (is_any_start(u, g)) {
- triggers.push_back({}); // empty
- return triggers;
- }
- q.push_back(Path(1, u));
- }
-
- while (!q.empty()) {
- Path &path = q.front();
- NFAVertex v = path.back();
-
- if (path.size() >= max_len) {
- max_len = min(max_len, path.size());
- done.push_back(path);
- goto next_path;
- }
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (is_any_start(u, g)) {
- // Found an accept. There's no point expanding this path any
- // further, we're done.
- max_len = min(max_len, path.size());
- done.push_back(path);
- goto next_path;
- }
-
- if (path.size() + 1 >= max_len) {
- done.push_back(path);
- done.back().push_back(u);
- } else {
- q.push_back(path); // copy
- q.back().push_back(u);
- }
- }
-
- next_path:
- q.pop_front();
-
- // If our queue or our finished trigger list gets too large, fall back
- // to generating a single trigger with union reach.
- if (q.size() + done.size() > UNIONED_FALLBACK_THRESHOLD) {
- DEBUG_PRINTF("search too large, fall back to union trigger\n");
- triggers.clear();
- triggers.push_back(getUnionedTrigger(g, sink));
- return triggers;
- }
- }
-
- assert(!done.empty());
-
- // Convert our path list into a set of unique triggers.
+ insert(&curr, inv_adjacent_vertices(v, g));
+
+ if (contains(curr, g.start)) {
+ DEBUG_PRINTF("start in repeat's immediate preds\n");
+ trigger.push_back(CharReach::dot()); // Trigger could be anything!
+ return trigger;
+ }
+
+ for (size_t num_steps = 0; num_steps < MAX_TRIGGER_STEPS; num_steps++) {
+ next.clear();
+ trigger.push_back(CharReach());
+ CharReach &cr = trigger.back();
+
+ for (auto v_c : curr) {
+ cr |= g[v_c].char_reach;
+ insert(&next, inv_adjacent_vertices(v_c, g));
+ }
+
+ DEBUG_PRINTF("cr[%zu]=%s\n", num_steps, describeClass(cr).c_str());
+
+ if (next.empty() || contains(next, g.start)) {
+ break;
+ }
+
+ curr.swap(next);
+ }
+
+ reverse(trigger.begin(), trigger.end());
+ return trigger;
+}
+
+static
+vector<vector<CharReach>> getRepeatTriggers(const NGHolder &g,
+ const NFAVertex sink) {
+ const size_t MAX_TRIGGER_STEPS = 32;
+ const size_t UNIONED_FALLBACK_THRESHOLD = 100;
+
+ using Path = deque<NFAVertex>;
+
+ vector<vector<CharReach>> triggers;
+
+ deque<Path> q; // work queue
+ deque<Path> done; // finished paths
+
+ size_t max_len = MAX_TRIGGER_STEPS;
+
+ // Find a set of paths leading to vertex v by depth first search.
+
+ for (auto u : inv_adjacent_vertices_range(sink, g)) {
+ if (is_any_start(u, g)) {
+ triggers.push_back({}); // empty
+ return triggers;
+ }
+ q.push_back(Path(1, u));
+ }
+
+ while (!q.empty()) {
+ Path &path = q.front();
+ NFAVertex v = path.back();
+
+ if (path.size() >= max_len) {
+ max_len = min(max_len, path.size());
+ done.push_back(path);
+ goto next_path;
+ }
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (is_any_start(u, g)) {
+ // Found an accept. There's no point expanding this path any
+ // further, we're done.
+ max_len = min(max_len, path.size());
+ done.push_back(path);
+ goto next_path;
+ }
+
+ if (path.size() + 1 >= max_len) {
+ done.push_back(path);
+ done.back().push_back(u);
+ } else {
+ q.push_back(path); // copy
+ q.back().push_back(u);
+ }
+ }
+
+ next_path:
+ q.pop_front();
+
+ // If our queue or our finished trigger list gets too large, fall back
+ // to generating a single trigger with union reach.
+ if (q.size() + done.size() > UNIONED_FALLBACK_THRESHOLD) {
+ DEBUG_PRINTF("search too large, fall back to union trigger\n");
+ triggers.clear();
+ triggers.push_back(getUnionedTrigger(g, sink));
+ return triggers;
+ }
+ }
+
+ assert(!done.empty());
+
+ // Convert our path list into a set of unique triggers.
ue2_unordered_set<vector<CharReach>> unique_triggers;
- for (const auto &path : done) {
- vector<CharReach> reach_path;
- for (auto jt = path.rbegin(), jte = path.rend(); jt != jte; ++jt) {
- reach_path.push_back(g[*jt].char_reach);
- }
- unique_triggers.insert(reach_path);
- }
-
- insert(&triggers, triggers.end(), unique_triggers);
- sort(triggers.begin(), triggers.end());
- DEBUG_PRINTF("built %zu unique triggers, max_len=%zu\n", triggers.size(),
- max_len);
- return triggers;
-}
-
-static
-void findMinPeriod(const NGHolder &g,
- const map<u32, vector<vector<CharReach>>> &triggers,
- ReachSubgraph &rsi) {
- const auto v = rsi.vertices.front();
- const CharReach &cr = g[v].char_reach;
-
- vector<vector<CharReach>> repeat_triggers;
-
- if (is_triggered(g)) {
- // Construct a temporary copy of the graph that also contains its
- // triggers, potentially lengthening the repeat's triggers.
- NGHolder tg;
- unordered_map<NFAVertex, NFAVertex> tg_map;
- cloneHolder(tg, g, &tg_map);
- addTriggers(tg, triggers);
- assert(contains(tg_map, v));
- repeat_triggers = getRepeatTriggers(tg, tg_map.at(v));
- } else {
- // Not triggered, no need to mutate the graph.
- repeat_triggers = getRepeatTriggers(g, v);
- }
-
- rsi.minPeriod = minPeriod(repeat_triggers, cr, &rsi.is_reset);
- DEBUG_PRINTF("%zu triggers, minPeriod=%u, is_reset=%d\n",
- repeat_triggers.size(), rsi.minPeriod, (int)rsi.is_reset);
-}
-
-static
-void
-selectHistoryScheme(const NGHolder &g, const ReportManager *rm,
- ReachSubgraph &rsi,
+ for (const auto &path : done) {
+ vector<CharReach> reach_path;
+ for (auto jt = path.rbegin(), jte = path.rend(); jt != jte; ++jt) {
+ reach_path.push_back(g[*jt].char_reach);
+ }
+ unique_triggers.insert(reach_path);
+ }
+
+ insert(&triggers, triggers.end(), unique_triggers);
+ sort(triggers.begin(), triggers.end());
+ DEBUG_PRINTF("built %zu unique triggers, max_len=%zu\n", triggers.size(),
+ max_len);
+ return triggers;
+}
+
+static
+void findMinPeriod(const NGHolder &g,
+ const map<u32, vector<vector<CharReach>>> &triggers,
+ ReachSubgraph &rsi) {
+ const auto v = rsi.vertices.front();
+ const CharReach &cr = g[v].char_reach;
+
+ vector<vector<CharReach>> repeat_triggers;
+
+ if (is_triggered(g)) {
+ // Construct a temporary copy of the graph that also contains its
+ // triggers, potentially lengthening the repeat's triggers.
+ NGHolder tg;
+ unordered_map<NFAVertex, NFAVertex> tg_map;
+ cloneHolder(tg, g, &tg_map);
+ addTriggers(tg, triggers);
+ assert(contains(tg_map, v));
+ repeat_triggers = getRepeatTriggers(tg, tg_map.at(v));
+ } else {
+ // Not triggered, no need to mutate the graph.
+ repeat_triggers = getRepeatTriggers(g, v);
+ }
+
+ rsi.minPeriod = minPeriod(repeat_triggers, cr, &rsi.is_reset);
+ DEBUG_PRINTF("%zu triggers, minPeriod=%u, is_reset=%d\n",
+ repeat_triggers.size(), rsi.minPeriod, (int)rsi.is_reset);
+}
+
+static
+void
+selectHistoryScheme(const NGHolder &g, const ReportManager *rm,
+ ReachSubgraph &rsi,
const unordered_map<NFAVertex, NFAVertexDepth> &depths,
const unordered_set<NFAVertex> &reached_by_fixed_tops,
- const map<u32, vector<vector<CharReach>>> &triggers,
- const vector<BoundedRepeatData> &all_repeats,
- const bool simple_model_selection) {
- // {N,} cases use the FIRST history mechanism.
- if (rsi.repeatMax.is_infinite()) {
- DEBUG_PRINTF("selected FIRST history\n");
- rsi.historyType = REPEAT_FIRST;
- return;
- }
-
- /* If we have a repeat which only raises a highlander, only the first match
- * matters */
- if (rm && leadsOnlyToAccept(g, rsi)
- && allSimpleHighlander(*rm, g[rsi.vertices.back()].reports)) {
- DEBUG_PRINTF("selected FIRST history (as highlander)\n");
- rsi.historyType = REPEAT_FIRST;
- rsi.repeatMax = depth::infinity(); /* for consistency */
- return;
- }
-
- // {N,M} cases can use the FIRST mechanism if they follow a cyclic which
- // includes their reachability via a "straw" path. (see UE-1589)
- if (hasCyclicSupersetEntryPath(g, rsi, all_repeats)) {
- DEBUG_PRINTF("selected FIRST history due to cyclic pred with "
- "superset of reach\n");
- rsi.historyType = REPEAT_FIRST;
- rsi.repeatMax = depth::infinity(); /* will continue to pump out matches */
- return;
- }
-
- // Similarly, {N,M} cases can use the FIRST mechanism if they precede a
- // cyclic which includes their reachability via a "straw" path.
- if (hasCyclicSupersetExitPath(g, rsi, all_repeats)) {
- DEBUG_PRINTF("selected FIRST history due to cyclic succ with "
- "superset of reach\n");
- rsi.historyType = REPEAT_FIRST;
- rsi.repeatMax = depth::infinity(); /* will continue to pump out matches */
- return;
- }
-
- // Could have skip edges and therefore be a {0,N} repeat.
- if (rsi.repeatMin == depth(1) && hasSkipEdges(g, rsi)) {
- DEBUG_PRINTF("selected LAST history\n");
- rsi.historyType = REPEAT_LAST;
- return;
- }
-
- // Fill minPeriod, is_reset flags
- findMinPeriod(g, triggers, rsi);
-
- // If we can't re-enter this cyclic state, we have a reset case.
- // This check can be very expensive, so we don't do it if we've been asked
- // for simple model selection.
- if (!simple_model_selection && !rsi.is_reset &&
- hasSoleEntry(g, rsi, depths, reached_by_fixed_tops, triggers)) {
- DEBUG_PRINTF("repeat is sole entry -> reset\n");
- rsi.is_reset = true;
- }
-
- // We can lean on the common selection code for the remainder of our repeat
- // models.
- rsi.historyType = chooseRepeatType(rsi.repeatMin, rsi.repeatMax,
- rsi.minPeriod, rsi.is_reset);
-}
-
-static
-void buildFeeder(NGHolder &g, const BoundedRepeatData &rd,
+ const map<u32, vector<vector<CharReach>>> &triggers,
+ const vector<BoundedRepeatData> &all_repeats,
+ const bool simple_model_selection) {
+ // {N,} cases use the FIRST history mechanism.
+ if (rsi.repeatMax.is_infinite()) {
+ DEBUG_PRINTF("selected FIRST history\n");
+ rsi.historyType = REPEAT_FIRST;
+ return;
+ }
+
+ /* If we have a repeat which only raises a highlander, only the first match
+ * matters */
+ if (rm && leadsOnlyToAccept(g, rsi)
+ && allSimpleHighlander(*rm, g[rsi.vertices.back()].reports)) {
+ DEBUG_PRINTF("selected FIRST history (as highlander)\n");
+ rsi.historyType = REPEAT_FIRST;
+ rsi.repeatMax = depth::infinity(); /* for consistency */
+ return;
+ }
+
+ // {N,M} cases can use the FIRST mechanism if they follow a cyclic which
+ // includes their reachability via a "straw" path. (see UE-1589)
+ if (hasCyclicSupersetEntryPath(g, rsi, all_repeats)) {
+ DEBUG_PRINTF("selected FIRST history due to cyclic pred with "
+ "superset of reach\n");
+ rsi.historyType = REPEAT_FIRST;
+ rsi.repeatMax = depth::infinity(); /* will continue to pump out matches */
+ return;
+ }
+
+ // Similarly, {N,M} cases can use the FIRST mechanism if they precede a
+ // cyclic which includes their reachability via a "straw" path.
+ if (hasCyclicSupersetExitPath(g, rsi, all_repeats)) {
+ DEBUG_PRINTF("selected FIRST history due to cyclic succ with "
+ "superset of reach\n");
+ rsi.historyType = REPEAT_FIRST;
+ rsi.repeatMax = depth::infinity(); /* will continue to pump out matches */
+ return;
+ }
+
+ // Could have skip edges and therefore be a {0,N} repeat.
+ if (rsi.repeatMin == depth(1) && hasSkipEdges(g, rsi)) {
+ DEBUG_PRINTF("selected LAST history\n");
+ rsi.historyType = REPEAT_LAST;
+ return;
+ }
+
+ // Fill minPeriod, is_reset flags
+ findMinPeriod(g, triggers, rsi);
+
+ // If we can't re-enter this cyclic state, we have a reset case.
+ // This check can be very expensive, so we don't do it if we've been asked
+ // for simple model selection.
+ if (!simple_model_selection && !rsi.is_reset &&
+ hasSoleEntry(g, rsi, depths, reached_by_fixed_tops, triggers)) {
+ DEBUG_PRINTF("repeat is sole entry -> reset\n");
+ rsi.is_reset = true;
+ }
+
+ // We can lean on the common selection code for the remainder of our repeat
+ // models.
+ rsi.historyType = chooseRepeatType(rsi.repeatMin, rsi.repeatMax,
+ rsi.minPeriod, rsi.is_reset);
+}
+
+static
+void buildFeeder(NGHolder &g, const BoundedRepeatData &rd,
unordered_set<NFAVertex> &created,
- const vector<NFAVertex> &straw) {
- if (!g[rd.cyclic].char_reach.all()) {
- // Create another cyclic feeder state with flipped reach. It has an
- // edge from the repeat's cyclic state and pos_trigger, an edge to the
- // straw, and edges from every vertex along the straw.
- NFAVertex feeder = clone_vertex(g, rd.cyclic);
- created.insert(feeder);
- g[feeder].char_reach.flip();
- add_edge(feeder, feeder, g);
- add_edge(rd.pos_trigger, feeder, g);
- add_edge(rd.cyclic, feeder, g);
- add_edge(feeder, straw.front(), g);
-
- // An edge from every vertex in the straw.
- for (auto v : straw) {
- add_edge(v, feeder, g);
- }
-
- // An edge to the feeder from the first vertex in the straw and all of
- // its predecessors (other than the feeder itself, we've already
- // created that edge!)
- for (auto u : inv_adjacent_vertices_range(straw.front(), g)) {
- if (u == feeder) {
- continue;
- }
- add_edge(u, feeder, g);
- }
-
+ const vector<NFAVertex> &straw) {
+ if (!g[rd.cyclic].char_reach.all()) {
+ // Create another cyclic feeder state with flipped reach. It has an
+ // edge from the repeat's cyclic state and pos_trigger, an edge to the
+ // straw, and edges from every vertex along the straw.
+ NFAVertex feeder = clone_vertex(g, rd.cyclic);
+ created.insert(feeder);
+ g[feeder].char_reach.flip();
+ add_edge(feeder, feeder, g);
+ add_edge(rd.pos_trigger, feeder, g);
+ add_edge(rd.cyclic, feeder, g);
+ add_edge(feeder, straw.front(), g);
+
+ // An edge from every vertex in the straw.
+ for (auto v : straw) {
+ add_edge(v, feeder, g);
+ }
+
+ // An edge to the feeder from the first vertex in the straw and all of
+ // its predecessors (other than the feeder itself, we've already
+ // created that edge!)
+ for (auto u : inv_adjacent_vertices_range(straw.front(), g)) {
+ if (u == feeder) {
+ continue;
+ }
+ add_edge(u, feeder, g);
+ }
+
DEBUG_PRINTF("added feeder %zu\n", g[feeder].index);
- } else {
- // No neg trigger means feeder is empty, and unnecessary.
- assert(g[rd.pos_trigger].char_reach.all());
- }
-}
-
-/**
- * If we have a leading first repeat, we can split startDs so that it is not
- * cyclic so that the repeat is only triggered once, rather than every byte. If we
- * perform this transform we must create another cyclic state to retrigger the
- * repeat after we see an escape for the repeat.
- *
- * We do not use the anchored start state to allow us to restart the NFA at a deep
- * offset.
- */
-static
-bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd,
+ } else {
+ // No neg trigger means feeder is empty, and unnecessary.
+ assert(g[rd.pos_trigger].char_reach.all());
+ }
+}
+
+/**
+ * If we have a leading first repeat, we can split startDs so that it is not
+ * cyclic so that the repeat is only triggered once, rather than every byte. If we
+ * perform this transform we must create another cyclic state to retrigger the
+ * repeat after we see an escape for the repeat.
+ *
+ * We do not use the anchored start state to allow us to restart the NFA at a deep
+ * offset.
+ */
+static
+bool improveLeadingRepeat(NGHolder &g, BoundedRepeatData &rd,
unordered_set<NFAVertex> &created,
- const vector<BoundedRepeatData> &all_repeats) {
- assert(edge(g.startDs, g.startDs, g).second);
-
- // UE-1617: can rewire FIRST history cases that are preceded by
- // startDs.
- if (rd.type != REPEAT_FIRST) {
- return false;
- }
-
- const CharReach &cyc_cr = g[rd.cyclic].char_reach;
-
- // This transformation is only worth doing if this would allow us to
- // accelerate the cyclic state (UE-2055).
- if ((~cyc_cr).count() > ACCEL_MAX_STOP_CHAR) {
- DEBUG_PRINTF("we wouldn't be able to accel this case\n");
- return false;
- }
-
- vector<NFAVertex> straw;
- NFAVertex pred =
- walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw);
- if (pred != g.startDs) {
- DEBUG_PRINTF("straw walk doesn't lead to startDs\n");
- return false;
- }
-
- // This transformation is only safe if the straw path from startDs that
- // we've discovered can *only* lead to this repeat, since we're going to
- // remove the self-loop on startDs.
+ const vector<BoundedRepeatData> &all_repeats) {
+ assert(edge(g.startDs, g.startDs, g).second);
+
+ // UE-1617: can rewire FIRST history cases that are preceded by
+ // startDs.
+ if (rd.type != REPEAT_FIRST) {
+ return false;
+ }
+
+ const CharReach &cyc_cr = g[rd.cyclic].char_reach;
+
+ // This transformation is only worth doing if this would allow us to
+ // accelerate the cyclic state (UE-2055).
+ if ((~cyc_cr).count() > ACCEL_MAX_STOP_CHAR) {
+ DEBUG_PRINTF("we wouldn't be able to accel this case\n");
+ return false;
+ }
+
+ vector<NFAVertex> straw;
+ NFAVertex pred =
+ walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw);
+ if (pred != g.startDs) {
+ DEBUG_PRINTF("straw walk doesn't lead to startDs\n");
+ return false;
+ }
+
+ // This transformation is only safe if the straw path from startDs that
+ // we've discovered can *only* lead to this repeat, since we're going to
+ // remove the self-loop on startDs.
if (proper_out_degree(g.startDs, g) > 1) {
- DEBUG_PRINTF("startDs has other successors\n");
- return false;
- }
- for (const auto &v : straw) {
- if (proper_out_degree(v, g) != 1) {
+ DEBUG_PRINTF("startDs has other successors\n");
+ return false;
+ }
+ for (const auto &v : straw) {
+ if (proper_out_degree(v, g) != 1) {
DEBUG_PRINTF("branch between startDs and repeat, from vertex %zu\n",
- g[v].index);
- return false;
- }
- }
-
- if (g[rd.pos_trigger].char_reach.count() < ACCEL_MAX_STOP_CHAR) {
- DEBUG_PRINTF("entry is narrow, could be accelerable\n");
- return false;
- }
-
- assert(!straw.empty());
-
- /* If there is overlap between the feeder and the first vertex in the straw
- * fun things happen. TODO: handle fun things happening (requires more
- * edges and more vertices). */
- if (!g[straw.front()].char_reach.isSubsetOf(cyc_cr)) {
- DEBUG_PRINTF("straw has `interesting' reach\n");
- return false;
- }
-
- DEBUG_PRINTF("repeat can be improved by removing startDs loop!\n");
-
- // Remove the self-loop on startDs! What a blast!
- remove_edge(g.startDs, g.startDs, g);
-
- // Wire up feeder state to straw.
- buildFeeder(g, rd, created, straw);
-
- return true;
-}
-
-static
-vector<NFAVertex> makeOwnStraw(NGHolder &g, BoundedRepeatData &rd,
- const vector<NFAVertex> &straw) {
- // Straw runs from startDs to our pos trigger.
- assert(!straw.empty());
- assert(edge(g.startDs, straw.front(), g).second);
- assert(edge(straw.back(), rd.pos_trigger, g).second);
-
- vector<NFAVertex> own_straw;
- for (const auto &v : straw) {
- NFAVertex v2 = clone_vertex(g, v);
- if (hasSelfLoop(v, g)) {
- add_edge(v2, v2, g);
- }
- if (!own_straw.empty()) {
- add_edge(own_straw.back(), v2, g);
- }
- own_straw.push_back(v2);
- }
-
- // Wire our straw to start, not startDs.
- add_edge(g.start, own_straw.front(), g);
-
- // Swap over to using our own straw to get to the POS trigger.
- remove_edge(straw.back(), rd.pos_trigger, g);
- add_edge(own_straw.back(), rd.pos_trigger, g);
-
- return own_straw;
-}
-
-/**
- * Specialized version of improveLeadingRepeat for outfixes, in which we can
- * rewire the straw to start instead of removing the startDs self-loop.
- */
-static
-bool improveLeadingRepeatOutfix(NGHolder &g, BoundedRepeatData &rd,
+ g[v].index);
+ return false;
+ }
+ }
+
+ if (g[rd.pos_trigger].char_reach.count() < ACCEL_MAX_STOP_CHAR) {
+ DEBUG_PRINTF("entry is narrow, could be accelerable\n");
+ return false;
+ }
+
+ assert(!straw.empty());
+
+ /* If there is overlap between the feeder and the first vertex in the straw
+ * fun things happen. TODO: handle fun things happening (requires more
+ * edges and more vertices). */
+ if (!g[straw.front()].char_reach.isSubsetOf(cyc_cr)) {
+ DEBUG_PRINTF("straw has `interesting' reach\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("repeat can be improved by removing startDs loop!\n");
+
+ // Remove the self-loop on startDs! What a blast!
+ remove_edge(g.startDs, g.startDs, g);
+
+ // Wire up feeder state to straw.
+ buildFeeder(g, rd, created, straw);
+
+ return true;
+}
+
+static
+vector<NFAVertex> makeOwnStraw(NGHolder &g, BoundedRepeatData &rd,
+ const vector<NFAVertex> &straw) {
+ // Straw runs from startDs to our pos trigger.
+ assert(!straw.empty());
+ assert(edge(g.startDs, straw.front(), g).second);
+ assert(edge(straw.back(), rd.pos_trigger, g).second);
+
+ vector<NFAVertex> own_straw;
+ for (const auto &v : straw) {
+ NFAVertex v2 = clone_vertex(g, v);
+ if (hasSelfLoop(v, g)) {
+ add_edge(v2, v2, g);
+ }
+ if (!own_straw.empty()) {
+ add_edge(own_straw.back(), v2, g);
+ }
+ own_straw.push_back(v2);
+ }
+
+ // Wire our straw to start, not startDs.
+ add_edge(g.start, own_straw.front(), g);
+
+ // Swap over to using our own straw to get to the POS trigger.
+ remove_edge(straw.back(), rd.pos_trigger, g);
+ add_edge(own_straw.back(), rd.pos_trigger, g);
+
+ return own_straw;
+}
+
+/**
+ * Specialized version of improveLeadingRepeat for outfixes, in which we can
+ * rewire the straw to start instead of removing the startDs self-loop.
+ */
+static
+bool improveLeadingRepeatOutfix(NGHolder &g, BoundedRepeatData &rd,
unordered_set<NFAVertex> &created,
- const vector<BoundedRepeatData> &all_repeats) {
- assert(g.kind == NFA_OUTFIX);
-
- // UE-1617: can rewire FIRST history cases that are preceded by
- // startDs.
- if (rd.type != REPEAT_FIRST) {
- return false;
- }
-
- const CharReach &cyc_cr = g[rd.cyclic].char_reach;
-
- // This transformation is only worth doing if this would allow us to
- // accelerate the cyclic state (UE-2055).
- if ((~cyc_cr).count() > ACCEL_MAX_STOP_CHAR) {
- DEBUG_PRINTF("we wouldn't be able to accel this case\n");
- return false;
- }
-
- vector<NFAVertex> straw;
- NFAVertex pred =
- walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw);
- if (pred != g.startDs) {
- DEBUG_PRINTF("straw walk doesn't lead to startDs\n");
- return false;
- }
-
- if (g[rd.pos_trigger].char_reach.count() < ACCEL_MAX_STOP_CHAR) {
- DEBUG_PRINTF("entry is narrow, could be accelerable\n");
- return false;
- }
-
- assert(!straw.empty());
-
- /* If there is overlap between the feeder and the first vertex in the straw
- * fun things happen. TODO: handle fun things happening (requires more
- * edges and more vertices). */
- if (!g[straw.front()].char_reach.isSubsetOf(cyc_cr)) {
- DEBUG_PRINTF("straw has `interesting' reach\n");
- return false;
- }
-
- DEBUG_PRINTF("repeat can be improved by rebuilding its entry\n");
-
- const auto own_straw = makeOwnStraw(g, rd, straw);
- insert(&created, own_straw);
-
- // Wire up feeder state to our new straw.
- buildFeeder(g, rd, created, own_straw);
-
- // We may no longer need the original straw.
- pruneUseless(g);
-
- return true;
-}
-
-/** Returns true if doing the bounded repeat transformation on this case
- * results in a smaller NFA model. */
-static
-bool givesBetterModel(const NGHolder &g, const vector<ReachSubgraph> &rs) {
- static const u32 MAX_FAST_STATES = 128; // bigger NFAs are fat and slow.
-
- // We use vertex count as an upper bound for the number of states.
- u32 curr_states = num_vertices(g) - 2; // accepts don't have states
-
- if (curr_states <= MAX_FAST_STATES) {
- return false;
- }
- if (curr_states > NFA_MAX_STATES) {
- return true;
- }
-
- u32 expected_states = curr_states;
- for (const auto &rsi : rs) {
- /* may be off as unpeeling not done yet */
- expected_states += 2; /* cyclic and pos */
- expected_states -= rsi.vertices.size();
- }
-
- return ROUNDUP_N(curr_states, 128) != ROUNDUP_N(expected_states, 128);
-}
-
-/** True if this repeat terminates with a vertex that leads only to accept. */
-static
-bool endsInAccept(const NGHolder &g, const ReachSubgraph &rsi) {
- NFAVertex last = rsi.vertices.back();
- return getSoleDestVertex(g, last) == g.accept;
-}
-
-static
-bool endsInAcceptEod(const NGHolder &g, const ReachSubgraph &rsi) {
- NFAVertex last = rsi.vertices.back();
- return getSoleDestVertex(g, last) == g.acceptEod;
-}
-
-namespace {
-class pfti_visitor : public boost::default_dfs_visitor {
-public:
+ const vector<BoundedRepeatData> &all_repeats) {
+ assert(g.kind == NFA_OUTFIX);
+
+ // UE-1617: can rewire FIRST history cases that are preceded by
+ // startDs.
+ if (rd.type != REPEAT_FIRST) {
+ return false;
+ }
+
+ const CharReach &cyc_cr = g[rd.cyclic].char_reach;
+
+ // This transformation is only worth doing if this would allow us to
+ // accelerate the cyclic state (UE-2055).
+ if ((~cyc_cr).count() > ACCEL_MAX_STOP_CHAR) {
+ DEBUG_PRINTF("we wouldn't be able to accel this case\n");
+ return false;
+ }
+
+ vector<NFAVertex> straw;
+ NFAVertex pred =
+ walkStrawToCyclicRev(g, rd.pos_trigger, all_repeats, straw);
+ if (pred != g.startDs) {
+ DEBUG_PRINTF("straw walk doesn't lead to startDs\n");
+ return false;
+ }
+
+ if (g[rd.pos_trigger].char_reach.count() < ACCEL_MAX_STOP_CHAR) {
+ DEBUG_PRINTF("entry is narrow, could be accelerable\n");
+ return false;
+ }
+
+ assert(!straw.empty());
+
+ /* If there is overlap between the feeder and the first vertex in the straw
+ * fun things happen. TODO: handle fun things happening (requires more
+ * edges and more vertices). */
+ if (!g[straw.front()].char_reach.isSubsetOf(cyc_cr)) {
+ DEBUG_PRINTF("straw has `interesting' reach\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("repeat can be improved by rebuilding its entry\n");
+
+ const auto own_straw = makeOwnStraw(g, rd, straw);
+ insert(&created, own_straw);
+
+ // Wire up feeder state to our new straw.
+ buildFeeder(g, rd, created, own_straw);
+
+ // We may no longer need the original straw.
+ pruneUseless(g);
+
+ return true;
+}
+
+/** Returns true if doing the bounded repeat transformation on this case
+ * results in a smaller NFA model. */
+static
+bool givesBetterModel(const NGHolder &g, const vector<ReachSubgraph> &rs) {
+ static const u32 MAX_FAST_STATES = 128; // bigger NFAs are fat and slow.
+
+ // We use vertex count as an upper bound for the number of states.
+ u32 curr_states = num_vertices(g) - 2; // accepts don't have states
+
+ if (curr_states <= MAX_FAST_STATES) {
+ return false;
+ }
+ if (curr_states > NFA_MAX_STATES) {
+ return true;
+ }
+
+ u32 expected_states = curr_states;
+ for (const auto &rsi : rs) {
+ /* may be off as unpeeling not done yet */
+ expected_states += 2; /* cyclic and pos */
+ expected_states -= rsi.vertices.size();
+ }
+
+ return ROUNDUP_N(curr_states, 128) != ROUNDUP_N(expected_states, 128);
+}
+
+/** True if this repeat terminates with a vertex that leads only to accept. */
+static
+bool endsInAccept(const NGHolder &g, const ReachSubgraph &rsi) {
+ NFAVertex last = rsi.vertices.back();
+ return getSoleDestVertex(g, last) == g.accept;
+}
+
+static
+bool endsInAcceptEod(const NGHolder &g, const ReachSubgraph &rsi) {
+ NFAVertex last = rsi.vertices.back();
+ return getSoleDestVertex(g, last) == g.acceptEod;
+}
+
+namespace {
+class pfti_visitor : public boost::default_dfs_visitor {
+public:
pfti_visitor(unordered_map<NFAVertex, depth> &top_depths_in,
- const depth &our_depth_in)
- : top_depths(top_depths_in), our_depth(our_depth_in) {}
-
+ const depth &our_depth_in)
+ : top_depths(top_depths_in), our_depth(our_depth_in) {}
+
void discover_vertex(NFAVertex v, UNUSED const NGHolder &g) {
DEBUG_PRINTF("discovered %zu (depth %s)\n", g[v].index,
- our_depth.str().c_str());
-
- auto it = top_depths.find(v);
- if (it != top_depths.end() && it->second != our_depth) {
- // already seen at a different depth, remove from consideration.
- it->second = depth::infinity();
- } else {
- top_depths[v] = our_depth;
- }
- }
+ our_depth.str().c_str());
+
+ auto it = top_depths.find(v);
+ if (it != top_depths.end() && it->second != our_depth) {
+ // already seen at a different depth, remove from consideration.
+ it->second = depth::infinity();
+ } else {
+ top_depths[v] = our_depth;
+ }
+ }
unordered_map<NFAVertex, depth> &top_depths;
- const depth &our_depth;
-};
-} // namespace
-
-static
-void populateFixedTopInfo(const map<u32, u32> &fixed_depth_tops,
- const NGHolder &g,
+ const depth &our_depth;
+};
+} // namespace
+
+static
+void populateFixedTopInfo(const map<u32, u32> &fixed_depth_tops,
+ const NGHolder &g,
unordered_set<NFAVertex> *reached_by_fixed_tops) {
- if (fixed_depth_tops.empty()) {
- return; /* we will never find anything */
- }
-
- assert(!proper_out_degree(g.startDs, g));
+ if (fixed_depth_tops.empty()) {
+ return; /* we will never find anything */
+ }
+
+ assert(!proper_out_degree(g.startDs, g));
unordered_map<NFAVertex, depth> top_depths;
auto colours = make_small_color_map(g);
-
- for (const auto &e : out_edges_range(g.start, g)) {
- NFAVertex v = target(e, g);
- if (v == g.startDs) {
- continue;
- }
-
- depth td = depth::infinity();
+
+ for (const auto &e : out_edges_range(g.start, g)) {
+ NFAVertex v = target(e, g);
+ if (v == g.startDs) {
+ continue;
+ }
+
+ depth td = depth::infinity();
for (u32 top : g[e].tops) {
if (!contains(fixed_depth_tops, top)) {
td = depth::infinity();
@@ -2131,417 +2131,417 @@ void populateFixedTopInfo(const map<u32, u32> &fixed_depth_tops,
td = depth::infinity();
break;
}
- }
-
+ }
+
DEBUG_PRINTF("scanning from %zu depth=%s\n", g[v].index,
td.str().c_str());
- /* for each vertex reachable from v update its map to reflect that it is
- * reachable from a top of depth td. */
-
+ /* for each vertex reachable from v update its map to reflect that it is
+ * reachable from a top of depth td. */
+
depth_first_visit(g, v, pfti_visitor(top_depths, td), colours);
- }
-
- for (const auto &v_depth : top_depths) {
- const NFAVertex v = v_depth.first;
- const depth &d = v_depth.second;
- if (d.is_finite()) {
+ }
+
+ for (const auto &v_depth : top_depths) {
+ const NFAVertex v = v_depth.first;
+ const depth &d = v_depth.second;
+ if (d.is_finite()) {
DEBUG_PRINTF("%zu reached by fixed tops at depth %s\n",
- g[v].index, d.str().c_str());
- reached_by_fixed_tops->insert(v);
- }
- }
-}
-
-#ifndef NDEBUG
-/** Assertion use only. Returns true if the given bounded repeats share any
- * vertices, which we don't allow. */
-static
-bool hasOverlappingRepeats(UNUSED const NGHolder &g,
- const vector<BoundedRepeatData> &repeats) {
+ g[v].index, d.str().c_str());
+ reached_by_fixed_tops->insert(v);
+ }
+ }
+}
+
+#ifndef NDEBUG
+/** Assertion use only. Returns true if the given bounded repeats share any
+ * vertices, which we don't allow. */
+static
+bool hasOverlappingRepeats(UNUSED const NGHolder &g,
+ const vector<BoundedRepeatData> &repeats) {
unordered_set<NFAVertex> involved;
-
- for (const auto &br : repeats) {
- if (contains(involved, br.cyclic)) {
+
+ for (const auto &br : repeats) {
+ if (contains(involved, br.cyclic)) {
DEBUG_PRINTF("already seen cyclic %zu\n", g[br.cyclic].index);
- return true;
- }
- if (contains(involved, br.pos_trigger)) {
+ return true;
+ }
+ if (contains(involved, br.pos_trigger)) {
DEBUG_PRINTF("already seen pos %zu\n", g[br.pos_trigger].index);
- return true;
- }
- for (auto v : br.tug_triggers) {
- if (contains(involved, v)) {
+ return true;
+ }
+ for (auto v : br.tug_triggers) {
+ if (contains(involved, v)) {
DEBUG_PRINTF("already seen tug %zu\n", g[v].index);
- return true;
- }
- }
-
- involved.insert(br.cyclic);
- involved.insert(br.pos_trigger);
- involved.insert(br.tug_triggers.begin(), br.tug_triggers.end());
- }
-
- return false;
-}
-
-#endif // NDEBUG
-
-/**
- * Identifies so-called "nasty" repeats, in which the reachability of both the
- * repeat itself and its tugs are wide, which means that executing the NFA will
- * likely be bogged down in exception processing.
- */
-static
-bool repeatIsNasty(const NGHolder &g, const ReachSubgraph &rsi,
+ return true;
+ }
+ }
+
+ involved.insert(br.cyclic);
+ involved.insert(br.pos_trigger);
+ involved.insert(br.tug_triggers.begin(), br.tug_triggers.end());
+ }
+
+ return false;
+}
+
+#endif // NDEBUG
+
+/**
+ * Identifies so-called "nasty" repeats, in which the reachability of both the
+ * repeat itself and its tugs are wide, which means that executing the NFA will
+ * likely be bogged down in exception processing.
+ */
+static
+bool repeatIsNasty(const NGHolder &g, const ReachSubgraph &rsi,
const unordered_map<NFAVertex, NFAVertexDepth> &depths) {
- if (num_vertices(g) > NFA_MAX_STATES) {
- // We may have no choice but to implement this repeat to get the graph
- // down to a tractable number of vertices.
- return false;
- }
-
- if (!generates_callbacks(g) && endsInAccept(g, rsi)) {
- DEBUG_PRINTF("would generate a lazy tug, repeat is OK\n");
- return false;
- }
-
- const NFAVertex first = rsi.vertices.front();
- DEBUG_PRINTF("min depth from startds = %s\n",
- depths.at(first).fromStartDotStar.min.str().c_str());
- if (depths.at(first).fromStartDotStar.min > depth(2)) {
- return false;
- }
-
- NFAVertex last = rsi.vertices.back();
- const CharReach &cyclicreach = g[last].char_reach;
- CharReach tugreach;
- for (auto v : adjacent_vertices_range(last, g)) {
- if (v == last || is_special(v, g)) {
- continue;
- }
- tugreach |= g[v].char_reach;
- }
- // Deal with unpeeled cases.
- if (tugreach.none()) {
- tugreach = cyclicreach;
- }
- DEBUG_PRINTF("tugreach.count=%zu, cyclicreach.count=%zu\n",
- tugreach.count(), cyclicreach.count());
- return (tugreach.count() > 200) && (cyclicreach.count() > 200);
-}
-
-void analyseRepeats(NGHolder &g, const ReportManager *rm,
- const map<u32, u32> &fixed_depth_tops,
- const map<u32, vector<vector<CharReach>>> &triggers,
- vector<BoundedRepeatData> *repeats, bool streaming,
- bool simple_model_selection, const Grey &grey,
- bool *reformed_start_ds) {
- if (!grey.allowExtendedNFA || !grey.allowLimExNFA) {
- return;
- }
-
- // Quick sanity test.
- assert(allMatchStatesHaveReports(g));
-
-#ifndef NDEBUG
- // So we can assert that the number of tops hasn't changed at the end of
- // this analysis.
+ if (num_vertices(g) > NFA_MAX_STATES) {
+ // We may have no choice but to implement this repeat to get the graph
+ // down to a tractable number of vertices.
+ return false;
+ }
+
+ if (!generates_callbacks(g) && endsInAccept(g, rsi)) {
+ DEBUG_PRINTF("would generate a lazy tug, repeat is OK\n");
+ return false;
+ }
+
+ const NFAVertex first = rsi.vertices.front();
+ DEBUG_PRINTF("min depth from startds = %s\n",
+ depths.at(first).fromStartDotStar.min.str().c_str());
+ if (depths.at(first).fromStartDotStar.min > depth(2)) {
+ return false;
+ }
+
+ NFAVertex last = rsi.vertices.back();
+ const CharReach &cyclicreach = g[last].char_reach;
+ CharReach tugreach;
+ for (auto v : adjacent_vertices_range(last, g)) {
+ if (v == last || is_special(v, g)) {
+ continue;
+ }
+ tugreach |= g[v].char_reach;
+ }
+ // Deal with unpeeled cases.
+ if (tugreach.none()) {
+ tugreach = cyclicreach;
+ }
+ DEBUG_PRINTF("tugreach.count=%zu, cyclicreach.count=%zu\n",
+ tugreach.count(), cyclicreach.count());
+ return (tugreach.count() > 200) && (cyclicreach.count() > 200);
+}
+
+void analyseRepeats(NGHolder &g, const ReportManager *rm,
+ const map<u32, u32> &fixed_depth_tops,
+ const map<u32, vector<vector<CharReach>>> &triggers,
+ vector<BoundedRepeatData> *repeats, bool streaming,
+ bool simple_model_selection, const Grey &grey,
+ bool *reformed_start_ds) {
+ if (!grey.allowExtendedNFA || !grey.allowLimExNFA) {
+ return;
+ }
+
+ // Quick sanity test.
+ assert(allMatchStatesHaveReports(g));
+
+#ifndef NDEBUG
+ // So we can assert that the number of tops hasn't changed at the end of
+ // this analysis.
const flat_set<u32> allTops = getTops(g);
-#endif
-
- // Later on, we're (a little bit) dependent on depth information for
- // unpeeling and so forth. Note that these depths MUST be maintained when
- // new vertices are added.
+#endif
+
+ // Later on, we're (a little bit) dependent on depth information for
+ // unpeeling and so forth. Note that these depths MUST be maintained when
+ // new vertices are added.
unordered_map<NFAVertex, NFAVertexDepth> depths;
- findInitDepths(g, depths);
-
- // Construct our list of subgraphs with the same reach using BGL magic.
- vector<ReachSubgraph> rs;
- buildReachSubgraphs(g, rs, grey.minExtBoundedRepeatSize);
-
- // Validate and split subgraphs.
- checkReachSubgraphs(g, rs, grey.minExtBoundedRepeatSize);
-
- // Identify which subgraphs represent bounded repeats in forms ("cliches")
- // that we accept, and mark the others as bad.
- for (auto &rsi: rs) {
- if (!processSubgraph(g, rsi, grey.minExtBoundedRepeatSize)) {
- rsi.bad = true;
- continue;
- }
-
- DEBUG_PRINTF("rsi min %s=max=%s\n", rsi.repeatMin.str().c_str(),
- rsi.repeatMax.str().c_str());
-
- // Identify repeats with wide cyclic and tug reach which will produce
- // low-performance implementations and avoid doing them.
- if (repeatIsNasty(g, rsi, depths)) {
- DEBUG_PRINTF("marking nasty repeat as bad\n");
- rsi.bad = true;
- }
- }
-
- // Remove bad cases, then sort remaining subgraphs in descending size
- // order.
- rs.erase(remove_if(rs.begin(), rs.end(),
- [](const ReachSubgraph &r) { return r.bad; }),
- rs.end());
- stable_sort(rs.begin(), rs.end(),
- [](const ReachSubgraph &a, const ReachSubgraph &b) {
- return a.vertices.size() > b.vertices.size();
- });
-
- if (!streaming && !givesBetterModel(g, rs)) {
- /* in block mode, there is no state space so we are only looking for
- * performance wins */
- DEBUG_PRINTF("repeat would not reduce NFA model size, skipping\n");
- return;
- }
-
- if (rs.empty()) {
- /* no good repeats */
- return;
- }
-
- // Store a copy of the original, unmodified graph in case we need to revert
- // back: in particular, due to tug cloning it is possible to build a graph
- // that was bigger than the original. See UE-2370. FIXME: smarter analysis
- // could make this unnecessary?
- const unique_ptr<const NGHolder> orig_g(cloneHolder(g));
-
+ findInitDepths(g, depths);
+
+ // Construct our list of subgraphs with the same reach using BGL magic.
+ vector<ReachSubgraph> rs;
+ buildReachSubgraphs(g, rs, grey.minExtBoundedRepeatSize);
+
+ // Validate and split subgraphs.
+ checkReachSubgraphs(g, rs, grey.minExtBoundedRepeatSize);
+
+ // Identify which subgraphs represent bounded repeats in forms ("cliches")
+ // that we accept, and mark the others as bad.
+ for (auto &rsi: rs) {
+ if (!processSubgraph(g, rsi, grey.minExtBoundedRepeatSize)) {
+ rsi.bad = true;
+ continue;
+ }
+
+ DEBUG_PRINTF("rsi min %s=max=%s\n", rsi.repeatMin.str().c_str(),
+ rsi.repeatMax.str().c_str());
+
+ // Identify repeats with wide cyclic and tug reach which will produce
+ // low-performance implementations and avoid doing them.
+ if (repeatIsNasty(g, rsi, depths)) {
+ DEBUG_PRINTF("marking nasty repeat as bad\n");
+ rsi.bad = true;
+ }
+ }
+
+ // Remove bad cases, then sort remaining subgraphs in descending size
+ // order.
+ rs.erase(remove_if(rs.begin(), rs.end(),
+ [](const ReachSubgraph &r) { return r.bad; }),
+ rs.end());
+ stable_sort(rs.begin(), rs.end(),
+ [](const ReachSubgraph &a, const ReachSubgraph &b) {
+ return a.vertices.size() > b.vertices.size();
+ });
+
+ if (!streaming && !givesBetterModel(g, rs)) {
+ /* in block mode, there is no state space so we are only looking for
+ * performance wins */
+ DEBUG_PRINTF("repeat would not reduce NFA model size, skipping\n");
+ return;
+ }
+
+ if (rs.empty()) {
+ /* no good repeats */
+ return;
+ }
+
+ // Store a copy of the original, unmodified graph in case we need to revert
+ // back: in particular, due to tug cloning it is possible to build a graph
+ // that was bigger than the original. See UE-2370. FIXME: smarter analysis
+ // could make this unnecessary?
+ const unique_ptr<const NGHolder> orig_g(cloneHolder(g));
+
unordered_set<NFAVertex> reached_by_fixed_tops;
- if (is_triggered(g)) {
- populateFixedTopInfo(fixed_depth_tops, g, &reached_by_fixed_tops);
- }
-
- // Go to town on the remaining acceptable subgraphs.
+ if (is_triggered(g)) {
+ populateFixedTopInfo(fixed_depth_tops, g, &reached_by_fixed_tops);
+ }
+
+ // Go to town on the remaining acceptable subgraphs.
unordered_set<NFAVertex> created;
- for (auto &rsi : rs) {
+ for (auto &rsi : rs) {
DEBUG_PRINTF("subgraph (beginning vertex %zu) is a {%s,%s} repeat\n",
- g[rsi.vertices.front()].index,
- rsi.repeatMin.str().c_str(), rsi.repeatMax.str().c_str());
-
- if (!peelSubgraph(g, grey, rsi, created)) {
- DEBUG_PRINTF("peel failed, skipping\n");
- continue;
- }
-
- // Attempt to peel a vertex if we're up against startDs, for
- // performance reasons.
- peelStartDotStar(g, depths, grey, rsi);
-
- // Our peeling passes may have killed off this repeat.
- if (rsi.bad) {
- continue;
- }
-
- selectHistoryScheme(g, rm, rsi, depths, reached_by_fixed_tops, triggers,
- *repeats, simple_model_selection);
-
- if (!generates_callbacks(g) && endsInAccept(g, rsi)) {
- DEBUG_PRINTF("accepty-rosy graph\n");
- replaceSubgraphWithLazySpecial(g, rsi, repeats, depths, created);
- } else if (endsInAcceptEod(g, rsi)) {
- DEBUG_PRINTF("accepty-rosy graph\n");
- replaceSubgraphWithLazySpecial(g, rsi, repeats, depths, created);
- } else {
- replaceSubgraphWithSpecial(g, rsi, repeats, depths, created);
- }
-
- // Some of our analyses require correctly numbered vertices, so we
- // renumber after changes.
+ g[rsi.vertices.front()].index,
+ rsi.repeatMin.str().c_str(), rsi.repeatMax.str().c_str());
+
+ if (!peelSubgraph(g, grey, rsi, created)) {
+ DEBUG_PRINTF("peel failed, skipping\n");
+ continue;
+ }
+
+ // Attempt to peel a vertex if we're up against startDs, for
+ // performance reasons.
+ peelStartDotStar(g, depths, grey, rsi);
+
+ // Our peeling passes may have killed off this repeat.
+ if (rsi.bad) {
+ continue;
+ }
+
+ selectHistoryScheme(g, rm, rsi, depths, reached_by_fixed_tops, triggers,
+ *repeats, simple_model_selection);
+
+ if (!generates_callbacks(g) && endsInAccept(g, rsi)) {
+ DEBUG_PRINTF("accepty-rosy graph\n");
+ replaceSubgraphWithLazySpecial(g, rsi, repeats, depths, created);
+ } else if (endsInAcceptEod(g, rsi)) {
+ DEBUG_PRINTF("accepty-rosy graph\n");
+ replaceSubgraphWithLazySpecial(g, rsi, repeats, depths, created);
+ } else {
+ replaceSubgraphWithSpecial(g, rsi, repeats, depths, created);
+ }
+
+ // Some of our analyses require correctly numbered vertices, so we
+ // renumber after changes.
renumber_vertices(g);
- }
-
- bool modified_start_ds = false;
-
- // We may be able to make improvements to the graph for performance
- // reasons. Note that this may do 'orrible things like remove the startDs
- // cycle, this should only happen quite late in the graph lifecycle.
- if (repeats->size() == 1) {
- if (g.kind == NFA_OUTFIX) {
- improveLeadingRepeatOutfix(g, repeats->back(), created, *repeats);
- // (Does not modify startDs, so we don't need to set
- // reformed_start_ds for this case.)
- } else {
- modified_start_ds =
- improveLeadingRepeat(g, repeats->back(), created, *repeats);
- }
- }
-
- if (reformed_start_ds) {
- *reformed_start_ds = modified_start_ds;
- }
-
- if (!repeats->empty()) {
- if (num_vertices(g) > NFA_MAX_STATES) {
- // We've managed to build an unimplementable NFA. Swap back to the
- // original.
- DEBUG_PRINTF("NFA has %zu vertices; swapping back to the "
- "original graph\n", num_vertices(g));
- clear_graph(g);
- assert(orig_g);
- cloneHolder(g, *orig_g);
- repeats->clear();
- }
-
- // Sanity test: we don't want any repeats that share special vertices
- // as our construction code later can't cope with it.
- assert(!hasOverlappingRepeats(g, *repeats));
-
- // We have modified the graph, so we need to ensure that our edges
- // and vertices are correctly numbered.
+ }
+
+ bool modified_start_ds = false;
+
+ // We may be able to make improvements to the graph for performance
+ // reasons. Note that this may do 'orrible things like remove the startDs
+ // cycle, this should only happen quite late in the graph lifecycle.
+ if (repeats->size() == 1) {
+ if (g.kind == NFA_OUTFIX) {
+ improveLeadingRepeatOutfix(g, repeats->back(), created, *repeats);
+ // (Does not modify startDs, so we don't need to set
+ // reformed_start_ds for this case.)
+ } else {
+ modified_start_ds =
+ improveLeadingRepeat(g, repeats->back(), created, *repeats);
+ }
+ }
+
+ if (reformed_start_ds) {
+ *reformed_start_ds = modified_start_ds;
+ }
+
+ if (!repeats->empty()) {
+ if (num_vertices(g) > NFA_MAX_STATES) {
+ // We've managed to build an unimplementable NFA. Swap back to the
+ // original.
+ DEBUG_PRINTF("NFA has %zu vertices; swapping back to the "
+ "original graph\n", num_vertices(g));
+ clear_graph(g);
+ assert(orig_g);
+ cloneHolder(g, *orig_g);
+ repeats->clear();
+ }
+
+ // Sanity test: we don't want any repeats that share special vertices
+ // as our construction code later can't cope with it.
+ assert(!hasOverlappingRepeats(g, *repeats));
+
+ // We have modified the graph, so we need to ensure that our edges
+ // and vertices are correctly numbered.
renumber_vertices(g);
renumber_edges(g);
- // Remove stray report IDs.
- clearReports(g);
- }
-
- // Quick sanity tests.
- assert(allMatchStatesHaveReports(g));
- assert(!is_triggered(g) || getTops(g) == allTops);
-}
-
-/**
- * \brief True if the non-special vertices in the given graph all have the same
- * character reachability.
- */
-static
-bool allOneReach(const NGHolder &g) {
- const CharReach *cr = nullptr;
- for (const auto &v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
- if (!cr) {
- cr = &g[v].char_reach;
- } else {
- if (*cr != g[v].char_reach) {
- return false;
- }
- }
- }
- return true;
-}
-
-bool isPureRepeat(const NGHolder &g, PureRepeat &repeat) {
- assert(allMatchStatesHaveReports(g));
-
- DEBUG_PRINTF("entry\n");
-
- // Must be start anchored.
- assert(edge(g.startDs, g.startDs, g).second);
+ // Remove stray report IDs.
+ clearReports(g);
+ }
+
+ // Quick sanity tests.
+ assert(allMatchStatesHaveReports(g));
+ assert(!is_triggered(g) || getTops(g) == allTops);
+}
+
+/**
+ * \brief True if the non-special vertices in the given graph all have the same
+ * character reachability.
+ */
+static
+bool allOneReach(const NGHolder &g) {
+ const CharReach *cr = nullptr;
+ for (const auto &v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ if (!cr) {
+ cr = &g[v].char_reach;
+ } else {
+ if (*cr != g[v].char_reach) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+bool isPureRepeat(const NGHolder &g, PureRepeat &repeat) {
+ assert(allMatchStatesHaveReports(g));
+
+ DEBUG_PRINTF("entry\n");
+
+ // Must be start anchored.
+ assert(edge(g.startDs, g.startDs, g).second);
if (out_degree(g.startDs, g) > 1) {
- DEBUG_PRINTF("Unanchored\n");
- return false;
- }
-
- // Must not be EOD-anchored.
- assert(edge(g.accept, g.acceptEod, g).second);
+ DEBUG_PRINTF("Unanchored\n");
+ return false;
+ }
+
+ // Must not be EOD-anchored.
+ assert(edge(g.accept, g.acceptEod, g).second);
if (in_degree(g.acceptEod, g) > 1) {
- DEBUG_PRINTF("EOD anchored\n");
- return false;
- }
-
- // Must have precisely one top.
+ DEBUG_PRINTF("EOD anchored\n");
+ return false;
+ }
+
+ // Must have precisely one top.
if (is_triggered(g) && !onlyOneTop(g)) {
- DEBUG_PRINTF("Too many tops\n");
- return false;
- }
-
- if (!allOneReach(g)) {
- DEBUG_PRINTF("vertices with different reach\n");
- return false;
- }
-
- // We allow this code to report true for any repeat, even for '.*' or '.+'
- // cases.
- const u32 minNumVertices = 1;
-
- vector<ReachSubgraph> rs;
- buildReachSubgraphs(g, rs, minNumVertices);
- checkReachSubgraphs(g, rs, minNumVertices);
- if (rs.size() != 1) {
- DEBUG_PRINTF("too many subgraphs\n");
- return false;
- }
-
- ReachSubgraph &rsi = *rs.begin();
- if (!processSubgraph(g, rsi, minNumVertices)) {
- DEBUG_PRINTF("not a supported repeat\n");
- return false;
- }
-
- if (rsi.vertices.size() + N_SPECIALS != num_vertices(g)) {
- DEBUG_PRINTF("repeat doesn't span graph\n");
- return false;
- }
-
- assert(!rsi.bad);
- assert(rsi.vertices.size() >= minNumVertices);
-
- const NFAVertex v = rsi.vertices.back();
-
- repeat.reach = g[v].char_reach;
- repeat.bounds.min = rsi.repeatMin;
- repeat.bounds.max = rsi.repeatMax;
- insert(&repeat.reports, g[v].reports);
-
- if (isVacuous(g)) {
- // This graph might be a {0,N} or {0,} repeat. For this to be true, we
- // must have found a {1,N} or {1,} repeat and the start vertex must
- // have the same report set as the vertices in the repeat.
- if (repeat.bounds.min == depth(1) &&
- g[g.start].reports == g[v].reports) {
+ DEBUG_PRINTF("Too many tops\n");
+ return false;
+ }
+
+ if (!allOneReach(g)) {
+ DEBUG_PRINTF("vertices with different reach\n");
+ return false;
+ }
+
+ // We allow this code to report true for any repeat, even for '.*' or '.+'
+ // cases.
+ const u32 minNumVertices = 1;
+
+ vector<ReachSubgraph> rs;
+ buildReachSubgraphs(g, rs, minNumVertices);
+ checkReachSubgraphs(g, rs, minNumVertices);
+ if (rs.size() != 1) {
+ DEBUG_PRINTF("too many subgraphs\n");
+ return false;
+ }
+
+ ReachSubgraph &rsi = *rs.begin();
+ if (!processSubgraph(g, rsi, minNumVertices)) {
+ DEBUG_PRINTF("not a supported repeat\n");
+ return false;
+ }
+
+ if (rsi.vertices.size() + N_SPECIALS != num_vertices(g)) {
+ DEBUG_PRINTF("repeat doesn't span graph\n");
+ return false;
+ }
+
+ assert(!rsi.bad);
+ assert(rsi.vertices.size() >= minNumVertices);
+
+ const NFAVertex v = rsi.vertices.back();
+
+ repeat.reach = g[v].char_reach;
+ repeat.bounds.min = rsi.repeatMin;
+ repeat.bounds.max = rsi.repeatMax;
+ insert(&repeat.reports, g[v].reports);
+
+ if (isVacuous(g)) {
+ // This graph might be a {0,N} or {0,} repeat. For this to be true, we
+ // must have found a {1,N} or {1,} repeat and the start vertex must
+ // have the same report set as the vertices in the repeat.
+ if (repeat.bounds.min == depth(1) &&
+ g[g.start].reports == g[v].reports) {
repeat.bounds.min = depth(0);
- DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
- } else {
- DEBUG_PRINTF("not a supported repeat\n");
- return false;
- }
- }
-
- assert(all_reports(g) == set<ReportID>(begin(g[v].reports),
- end(g[v].reports)));
- return true;
-}
-
-void findRepeats(const NGHolder &h, u32 minRepeatVertices,
- vector<GraphRepeatInfo> *repeats_out) {
- // Construct our list of subgraphs with the same reach using BGL magic.
- vector<ReachSubgraph> rs;
- buildReachSubgraphs(h, rs, minRepeatVertices);
- checkReachSubgraphs(h, rs, minRepeatVertices);
-
- for (auto &rsi : rs) {
- if (!processSubgraph(h, rsi, minRepeatVertices)) {
- continue;
- }
-
- DEBUG_PRINTF("rsi min=%s max=%s\n", rsi.repeatMin.str().c_str(),
- rsi.repeatMax.str().c_str());
-
- depth repeatMax = rsi.repeatMax;
-
- vector<BoundedRepeatData> all_repeats; /* we don't mutate the graph in
- * this path */
- if (hasCyclicSupersetEntryPath(h, rsi, all_repeats)) {
- DEBUG_PRINTF("selected FIRST history due to cyclic pred with "
- "superset of reach\n");
- repeatMax = depth::infinity(); /* will continue to pump out matches */
- }
- if (hasCyclicSupersetExitPath(h, rsi, all_repeats)) {
- DEBUG_PRINTF("selected FIRST history due to cyclic succ with "
- "superset of reach\n");
- repeatMax = depth::infinity(); /* will continue to pump out matches */
- }
-
- repeats_out->push_back(GraphRepeatInfo());
- GraphRepeatInfo &ri = repeats_out->back();
- ri.vertices.swap(rsi.vertices);
- ri.repeatMin = rsi.repeatMin;
- ri.repeatMax = repeatMax;
- }
-}
-
-} // namespace ue2
+ DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
+ } else {
+ DEBUG_PRINTF("not a supported repeat\n");
+ return false;
+ }
+ }
+
+ assert(all_reports(g) == set<ReportID>(begin(g[v].reports),
+ end(g[v].reports)));
+ return true;
+}
+
+void findRepeats(const NGHolder &h, u32 minRepeatVertices,
+ vector<GraphRepeatInfo> *repeats_out) {
+ // Construct our list of subgraphs with the same reach using BGL magic.
+ vector<ReachSubgraph> rs;
+ buildReachSubgraphs(h, rs, minRepeatVertices);
+ checkReachSubgraphs(h, rs, minRepeatVertices);
+
+ for (auto &rsi : rs) {
+ if (!processSubgraph(h, rsi, minRepeatVertices)) {
+ continue;
+ }
+
+ DEBUG_PRINTF("rsi min=%s max=%s\n", rsi.repeatMin.str().c_str(),
+ rsi.repeatMax.str().c_str());
+
+ depth repeatMax = rsi.repeatMax;
+
+ vector<BoundedRepeatData> all_repeats; /* we don't mutate the graph in
+ * this path */
+ if (hasCyclicSupersetEntryPath(h, rsi, all_repeats)) {
+ DEBUG_PRINTF("selected FIRST history due to cyclic pred with "
+ "superset of reach\n");
+ repeatMax = depth::infinity(); /* will continue to pump out matches */
+ }
+ if (hasCyclicSupersetExitPath(h, rsi, all_repeats)) {
+ DEBUG_PRINTF("selected FIRST history due to cyclic succ with "
+ "superset of reach\n");
+ repeatMax = depth::infinity(); /* will continue to pump out matches */
+ }
+
+ repeats_out->push_back(GraphRepeatInfo());
+ GraphRepeatInfo &ri = repeats_out->back();
+ ri.vertices.swap(rsi.vertices);
+ ri.repeatMin = rsi.repeatMin;
+ ri.repeatMax = repeatMax;
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_repeat.h b/contrib/libs/hyperscan/src/nfagraph/ng_repeat.h
index 330e33c340..cfd804b7ef 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_repeat.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_repeat.h
@@ -1,160 +1,160 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Bounded repeat analysis.
- */
-
-#ifndef NG_REPEAT_H
-#define NG_REPEAT_H
-
-#include "ng_holder.h"
-#include "ue2common.h"
-#include "nfa/repeat_internal.h"
-#include "util/depth.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Bounded repeat analysis.
+ */
+
+#ifndef NG_REPEAT_H
+#define NG_REPEAT_H
+
+#include "ng_holder.h"
+#include "ue2common.h"
+#include "nfa/repeat_internal.h"
+#include "util/depth.h"
#include "util/flat_containers.h"
-
-#include <map>
-#include <vector>
-
-namespace ue2 {
-
-class NGHolder;
-class ReportManager;
-struct Grey;
-
-/**
- * \brief Everything you need to know about a bounded repeat that we have
- * transformed.
- */
-struct BoundedRepeatData {
- BoundedRepeatData(enum RepeatType type_in, const depth &a, const depth &z,
- u32 minPeriod_in, NFAVertex cyc, NFAVertex pos,
- const std::vector<NFAVertex> &tug_in)
- : type(type_in), repeatMin(a), repeatMax(z), minPeriod(minPeriod_in),
- cyclic(cyc), pos_trigger(pos), tug_triggers(tug_in) {}
-
- BoundedRepeatData() = delete; // no default construction allowed.
-
- enum RepeatType type; //!< selected type based on bounds and structure
- depth repeatMin; //!< minimum repeat bound
- depth repeatMax; //!< maximum repeat bound
- u32 minPeriod; //!< min trigger period
- NFAVertex cyclic; //!< cyclic vertex representing repeat in graph
- NFAVertex pos_trigger; //!< positive trigger vertex
- std::vector<NFAVertex> tug_triggers; //!< list of tug trigger vertices
-};
-
-/**
- * \brief Run the bounded repeat analysis and transform the graph where
- * bounded repeats are found.
- *
- * \param h
- * Graph to operate on.
- * \param rm
- * ReportManager, or nullptr if the graph's reports are internal (e.g. for
- * Rose use).
- * \param fixed_depth_tops
- * Map of top to possible trigger depth.
- * \param triggers
- * Map of top to the vector of triggers (i.e. preceding literals/masks)
- * \param repeats
- * Repeat info is filled in for caller here.
- * \param streaming
- * True if we're in streaming mode.
- * \param simple_model_selection
- * Don't perform complex (and slow) model selection analysis, e.g.
- * determining whether the repeat is sole entry.
- * \param grey
- * Grey box object.
- * \param reformed_start_ds
- * If supplied, this will be set to true if the graph was optimised for a
- * leading first repeat, resulting in the output graph having no self-loop
- * on startDs.
- */
-void analyseRepeats(NGHolder &h, const ReportManager *rm,
- const std::map<u32, u32> &fixed_depth_tops,
- const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
- std::vector<BoundedRepeatData> *repeats, bool streaming,
- bool simple_model_selection, const Grey &grey,
- bool *reformed_start_ds = nullptr);
-
-/**
- * \brief Information on repeats in a holder, returned from \ref findRepeats.
- */
-struct GraphRepeatInfo {
- depth repeatMin; /**< minimum bound */
- depth repeatMax; /**< effective max bound */
- std::vector<NFAVertex> vertices; /**< vertices involved in repeat */
-};
-
-/**
- * \brief Provides information on repeats in the graph.
- */
-void findRepeats(const NGHolder &h, u32 minRepeatVertices,
- std::vector<GraphRepeatInfo> *repeats_out);
-
-struct PureRepeat {
- CharReach reach;
- DepthMinMax bounds;
+
+#include <map>
+#include <vector>
+
+namespace ue2 {
+
+class NGHolder;
+class ReportManager;
+struct Grey;
+
+/**
+ * \brief Everything you need to know about a bounded repeat that we have
+ * transformed.
+ */
+struct BoundedRepeatData {
+ BoundedRepeatData(enum RepeatType type_in, const depth &a, const depth &z,
+ u32 minPeriod_in, NFAVertex cyc, NFAVertex pos,
+ const std::vector<NFAVertex> &tug_in)
+ : type(type_in), repeatMin(a), repeatMax(z), minPeriod(minPeriod_in),
+ cyclic(cyc), pos_trigger(pos), tug_triggers(tug_in) {}
+
+ BoundedRepeatData() = delete; // no default construction allowed.
+
+ enum RepeatType type; //!< selected type based on bounds and structure
+ depth repeatMin; //!< minimum repeat bound
+ depth repeatMax; //!< maximum repeat bound
+ u32 minPeriod; //!< min trigger period
+ NFAVertex cyclic; //!< cyclic vertex representing repeat in graph
+ NFAVertex pos_trigger; //!< positive trigger vertex
+ std::vector<NFAVertex> tug_triggers; //!< list of tug trigger vertices
+};
+
+/**
+ * \brief Run the bounded repeat analysis and transform the graph where
+ * bounded repeats are found.
+ *
+ * \param h
+ * Graph to operate on.
+ * \param rm
+ * ReportManager, or nullptr if the graph's reports are internal (e.g. for
+ * Rose use).
+ * \param fixed_depth_tops
+ * Map of top to possible trigger depth.
+ * \param triggers
+ * Map of top to the vector of triggers (i.e. preceding literals/masks)
+ * \param repeats
+ * Repeat info is filled in for caller here.
+ * \param streaming
+ * True if we're in streaming mode.
+ * \param simple_model_selection
+ * Don't perform complex (and slow) model selection analysis, e.g.
+ * determining whether the repeat is sole entry.
+ * \param grey
+ * Grey box object.
+ * \param reformed_start_ds
+ * If supplied, this will be set to true if the graph was optimised for a
+ * leading first repeat, resulting in the output graph having no self-loop
+ * on startDs.
+ */
+void analyseRepeats(NGHolder &h, const ReportManager *rm,
+ const std::map<u32, u32> &fixed_depth_tops,
+ const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
+ std::vector<BoundedRepeatData> *repeats, bool streaming,
+ bool simple_model_selection, const Grey &grey,
+ bool *reformed_start_ds = nullptr);
+
+/**
+ * \brief Information on repeats in a holder, returned from \ref findRepeats.
+ */
+struct GraphRepeatInfo {
+ depth repeatMin; /**< minimum bound */
+ depth repeatMax; /**< effective max bound */
+ std::vector<NFAVertex> vertices; /**< vertices involved in repeat */
+};
+
+/**
+ * \brief Provides information on repeats in the graph.
+ */
+void findRepeats(const NGHolder &h, u32 minRepeatVertices,
+ std::vector<GraphRepeatInfo> *repeats_out);
+
+struct PureRepeat {
+ CharReach reach;
+ DepthMinMax bounds;
flat_set<ReportID> reports;
-
- bool operator==(const PureRepeat &a) const {
- return reach == a.reach && bounds == a.bounds && reports == a.reports;
- }
-
- bool operator!=(const PureRepeat &a) const { return !(*this == a); }
-
- bool operator<(const PureRepeat &a) const {
- if (reach != a.reach) {
- return reach < a.reach;
- }
- if (bounds != a.bounds) {
- return bounds < a.bounds;
- }
- return reports < a.reports;
- }
-};
-
-/**
- * \brief Returns true and fills the given PureRepeat structure if the graph is
- * wholly a repeat over a single character class.
- *
- * For example, something like:
- *
- * /^[a-z]{10,20}/
- *
- * - Note: graph must not use SDS or EOD.
- * - Note: \p PureRepeat::bounds::max is set to infinity if there is no upper
- * bound on the repeat.
- */
-bool isPureRepeat(const NGHolder &h, PureRepeat &r);
-
-} // namespace ue2
-
-#endif // NG_REPEAT_H
+
+ bool operator==(const PureRepeat &a) const {
+ return reach == a.reach && bounds == a.bounds && reports == a.reports;
+ }
+
+ bool operator!=(const PureRepeat &a) const { return !(*this == a); }
+
+ bool operator<(const PureRepeat &a) const {
+ if (reach != a.reach) {
+ return reach < a.reach;
+ }
+ if (bounds != a.bounds) {
+ return bounds < a.bounds;
+ }
+ return reports < a.reports;
+ }
+};
+
+/**
+ * \brief Returns true and fills the given PureRepeat structure if the graph is
+ * wholly a repeat over a single character class.
+ *
+ * For example, something like:
+ *
+ * /^[a-z]{10,20}/
+ *
+ * - Note: graph must not use SDS or EOD.
+ * - Note: \p PureRepeat::bounds::max is set to infinity if there is no upper
+ * bound on the repeat.
+ */
+bool isPureRepeat(const NGHolder &h, PureRepeat &r);
+
+} // namespace ue2
+
+#endif // NG_REPEAT_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_reports.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_reports.cpp
index ed85863b08..4e9b498df0 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_reports.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_reports.cpp
@@ -1,70 +1,70 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Utility functions for working with Report ID sets.
- */
-#include "ng_reports.h"
-
-#include "ng_holder.h"
-#include "util/container.h"
-#include "util/compile_context.h"
-#include "util/graph_range.h"
-#include "util/report_manager.h"
-
-using namespace std;
-
-namespace ue2 {
-
-/** Returns the set of all reports in the graph. */
-set<ReportID> all_reports(const NGHolder &g) {
- set<ReportID> rv;
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- insert(&rv, g[v].reports);
- }
- for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
- insert(&rv, g[v].reports);
- }
-
- return rv;
-}
-
-/** True if *all* reports in the graph are exhaustible. */
-bool can_exhaust(const NGHolder &g, const ReportManager &rm) {
- for (ReportID report_id : all_reports(g)) {
- if (rm.getReport(report_id).ekey == INVALID_EKEY) {
- return false;
- }
- }
-
- return true;
-}
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Utility functions for working with Report ID sets.
+ */
+#include "ng_reports.h"
+
+#include "ng_holder.h"
+#include "util/container.h"
+#include "util/compile_context.h"
+#include "util/graph_range.h"
+#include "util/report_manager.h"
+
+using namespace std;
+
+namespace ue2 {
+
+/** Returns the set of all reports in the graph. */
+set<ReportID> all_reports(const NGHolder &g) {
+ set<ReportID> rv;
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ insert(&rv, g[v].reports);
+ }
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ insert(&rv, g[v].reports);
+ }
+
+ return rv;
+}
+
+/** True if *all* reports in the graph are exhaustible. */
+bool can_exhaust(const NGHolder &g, const ReportManager &rm) {
+ for (ReportID report_id : all_reports(g)) {
+ if (rm.getReport(report_id).ekey == INVALID_EKEY) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
void set_report(NGHolder &g, ReportID internal_report) {
// First, wipe the report IDs on all vertices.
for (auto v : vertices_range(g)) {
@@ -85,22 +85,22 @@ void set_report(NGHolder &g, ReportID internal_report) {
}
}
-/** Derive a maximum offset for the graph from the max_offset values of its
- * reports. Returns MAX_OFFSET for inf. */
-u64a findMaxOffset(const NGHolder &g, const ReportManager &rm) {
- u64a maxOffset = 0;
- set<ReportID> reports = all_reports(g);
- assert(!reports.empty());
-
- for (ReportID report_id : all_reports(g)) {
- const Report &ir = rm.getReport(report_id);
- if (ir.hasBounds()) {
- maxOffset = max(maxOffset, ir.maxOffset);
- } else {
- return MAX_OFFSET;
- }
- }
- return maxOffset;
-}
-
-} // namespace ue2
+/** Derive a maximum offset for the graph from the max_offset values of its
+ * reports. Returns MAX_OFFSET for inf. */
+u64a findMaxOffset(const NGHolder &g, const ReportManager &rm) {
+ u64a maxOffset = 0;
+ set<ReportID> reports = all_reports(g);
+ assert(!reports.empty());
+
+ for (ReportID report_id : all_reports(g)) {
+ const Report &ir = rm.getReport(report_id);
+ if (ir.hasBounds()) {
+ maxOffset = max(maxOffset, ir.maxOffset);
+ } else {
+ return MAX_OFFSET;
+ }
+ }
+ return maxOffset;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_reports.h b/contrib/libs/hyperscan/src/nfagraph/ng_reports.h
index 0f1b43c482..31c9530880 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_reports.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_reports.h
@@ -1,61 +1,61 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Utility functions for working with Report ID sets.
- */
-
-#ifndef NG_REPORTS_H
-#define NG_REPORTS_H
-
-#include "ue2common.h"
-
-#include <set>
-
-namespace ue2 {
-
-class NGHolder;
-class ReportManager;
-
-/** Returns the set of all reports in the graph. */
-std::set<ReportID> all_reports(const NGHolder &g);
-
-/** True if *all* reports in the graph are exhaustible. */
-bool can_exhaust(const NGHolder &g, const ReportManager &rm);
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Utility functions for working with Report ID sets.
+ */
+
+#ifndef NG_REPORTS_H
+#define NG_REPORTS_H
+
+#include "ue2common.h"
+
+#include <set>
+
+namespace ue2 {
+
+class NGHolder;
+class ReportManager;
+
+/** Returns the set of all reports in the graph. */
+std::set<ReportID> all_reports(const NGHolder &g);
+
+/** True if *all* reports in the graph are exhaustible. */
+bool can_exhaust(const NGHolder &g, const ReportManager &rm);
+
/** Replaces all existing reports on the holder with the provided internal
* report id. */
void set_report(NGHolder &g, ReportID internal_report);
-/** Derive a maximum offset for the graph from the max_offset values of its
- * reports. Returns MAX_OFFSET for inf. */
-u64a findMaxOffset(const NGHolder &g, const ReportManager &rm);
-
-} // namespace ue2
-
-#endif // NG_REPORTS_H
+/** Derive a maximum offset for the graph from the max_offset values of its
+ * reports. Returns MAX_OFFSET for inf. */
+u64a findMaxOffset(const NGHolder &g, const ReportManager &rm);
+
+} // namespace ue2
+
+#endif // NG_REPORTS_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.cpp
index c746877678..704697e57f 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.cpp
@@ -1,70 +1,70 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief State numbering and late graph restructuring code.
- */
-#include "ng_restructuring.h"
-
-#include "grey.h"
-#include "ng_holder.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/graph_range.h"
-
-#include <algorithm>
-#include <cassert>
-
-#include <boost/graph/transpose_graph.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
-/** Connect the start vertex to each of the vertices in \p tops. This is useful
- * temporarily for when we need to run a graph algorithm that expects a single
- * source vertex. */
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief State numbering and late graph restructuring code.
+ */
+#include "ng_restructuring.h"
+
+#include "grey.h"
+#include "ng_holder.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/graph_range.h"
+
+#include <algorithm>
+#include <cassert>
+
+#include <boost/graph/transpose_graph.hpp>
+
+using namespace std;
+
+namespace ue2 {
+
+/** Connect the start vertex to each of the vertices in \p tops. This is useful
+ * temporarily for when we need to run a graph algorithm that expects a single
+ * source vertex. */
static
void wireStartToTops(NGHolder &g, const flat_set<NFAVertex> &tops,
vector<NFAEdge> &tempEdges) {
for (NFAVertex v : tops) {
- assert(!isLeafNode(v, g));
-
+ assert(!isLeafNode(v, g));
+
const NFAEdge &e = add_edge(g.start, v, g);
tempEdges.push_back(e);
- }
-}
-
+ }
+}
+
/**
* Returns true if start's successors (aside from startDs) are subset of
* startDs's proper successors or if start has no successors other than startDs.
*/
-static
+static
bool startIsRedundant(const NGHolder &g) {
/* We ignore startDs as the self-loop may have been stripped as an
* optimisation for repeats (improveLeadingRepeats()). */
@@ -92,130 +92,130 @@ bool startIsRedundant(const NGHolder &g) {
static
void getStateOrdering(NGHolder &g, const flat_set<NFAVertex> &tops,
- vector<NFAVertex> &ordering) {
- // First, wire up our "tops" to start so that we have a single source,
- // which will give a nicer topo order.
+ vector<NFAVertex> &ordering) {
+ // First, wire up our "tops" to start so that we have a single source,
+ // which will give a nicer topo order.
vector<NFAEdge> tempEdges;
wireStartToTops(g, tops, tempEdges);
-
+
renumber_vertices(g);
-
- vector<NFAVertex> temp = getTopoOrdering(g);
-
+
+ vector<NFAVertex> temp = getTopoOrdering(g);
+
remove_edges(tempEdges, g);
-
- // Move {start, startDs} to the end, so they'll be first when we reverse
+
+ // Move {start, startDs} to the end, so they'll be first when we reverse
// the ordering (if they are required).
- temp.erase(remove(temp.begin(), temp.end(), g.startDs));
- temp.erase(remove(temp.begin(), temp.end(), g.start));
+ temp.erase(remove(temp.begin(), temp.end(), g.startDs));
+ temp.erase(remove(temp.begin(), temp.end(), g.start));
if (proper_out_degree(g.startDs, g)) {
temp.push_back(g.startDs);
}
if (!startIsRedundant(g)) {
temp.push_back(g.start);
}
-
- // Walk ordering, remove vertices that shouldn't be participating in state
- // numbering, such as accepts.
- for (auto v : temp) {
- if (is_any_accept(v, g)) {
- continue; // accepts don't need states
- }
-
- ordering.push_back(v);
- }
-
- // Output of topo order was in reverse.
- reverse(ordering.begin(), ordering.end());
-}
-
-// Returns the number of states.
-static
+
+ // Walk ordering, remove vertices that shouldn't be participating in state
+ // numbering, such as accepts.
+ for (auto v : temp) {
+ if (is_any_accept(v, g)) {
+ continue; // accepts don't need states
+ }
+
+ ordering.push_back(v);
+ }
+
+ // Output of topo order was in reverse.
+ reverse(ordering.begin(), ordering.end());
+}
+
+// Returns the number of states.
+static
unordered_map<NFAVertex, u32>
-getStateIndices(const NGHolder &h, const vector<NFAVertex> &ordering) {
+getStateIndices(const NGHolder &h, const vector<NFAVertex> &ordering) {
unordered_map<NFAVertex, u32> states;
- for (const auto &v : vertices_range(h)) {
- states[v] = NO_STATE;
- }
-
- u32 stateNum = 0;
- for (auto v : ordering) {
+ for (const auto &v : vertices_range(h)) {
+ states[v] = NO_STATE;
+ }
+
+ u32 stateNum = 0;
+ for (auto v : ordering) {
DEBUG_PRINTF("assigning state num %u to vertex %zu\n", stateNum,
- h[v].index);
- states[v] = stateNum++;
- }
- return states;
-}
-
-/** UE-1648: A state with a single successor that happens to be a predecessor
- * can be given any ol' state ID by the topological ordering, so we sink it
- * next to its pred. This enables better merging. */
-static
-void optimiseTightLoops(const NGHolder &g, vector<NFAVertex> &ordering) {
- deque<pair<NFAVertex, NFAVertex>> candidates;
-
- auto start = ordering.begin();
- for (auto it = ordering.begin(), ite = ordering.end(); it != ite; ++it) {
- NFAVertex v = *it;
- if (is_special(v, g)) {
- continue;
- }
-
- if (out_degree(v, g) == 1) {
- NFAVertex t = *(adjacent_vertices(v, g).first);
- if (v == t) {
- continue;
- }
- if (edge(t, v, g).second && find(start, it, t) != ite) {
- candidates.push_back(make_pair(v, t));
- }
- }
- }
-
- for (const auto &cand : candidates) {
- NFAVertex v = cand.first, u = cand.second;
- auto u_it = find(ordering.begin(), ordering.end(), u);
- auto v_it = find(ordering.begin(), ordering.end(), v);
-
- // Only move candidates backwards in the ordering, and only move them
- // when necessary.
- if (u_it >= v_it || distance(u_it, v_it) == 1) {
- continue;
- }
-
+ h[v].index);
+ states[v] = stateNum++;
+ }
+ return states;
+}
+
+/** UE-1648: A state with a single successor that happens to be a predecessor
+ * can be given any ol' state ID by the topological ordering, so we sink it
+ * next to its pred. This enables better merging. */
+static
+void optimiseTightLoops(const NGHolder &g, vector<NFAVertex> &ordering) {
+ deque<pair<NFAVertex, NFAVertex>> candidates;
+
+ auto start = ordering.begin();
+ for (auto it = ordering.begin(), ite = ordering.end(); it != ite; ++it) {
+ NFAVertex v = *it;
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ if (out_degree(v, g) == 1) {
+ NFAVertex t = *(adjacent_vertices(v, g).first);
+ if (v == t) {
+ continue;
+ }
+ if (edge(t, v, g).second && find(start, it, t) != ite) {
+ candidates.push_back(make_pair(v, t));
+ }
+ }
+ }
+
+ for (const auto &cand : candidates) {
+ NFAVertex v = cand.first, u = cand.second;
+ auto u_it = find(ordering.begin(), ordering.end(), u);
+ auto v_it = find(ordering.begin(), ordering.end(), v);
+
+ // Only move candidates backwards in the ordering, and only move them
+ // when necessary.
+ if (u_it >= v_it || distance(u_it, v_it) == 1) {
+ continue;
+ }
+
DEBUG_PRINTF("moving vertex %zu next to %zu\n", g[v].index, g[u].index);
-
- ordering.erase(v_it);
- ordering.insert(++u_it, v);
- }
-}
-
+
+ ordering.erase(v_it);
+ ordering.insert(++u_it, v);
+ }
+}
+
unordered_map<NFAVertex, u32>
numberStates(NGHolder &h, const flat_set<NFAVertex> &tops) {
- DEBUG_PRINTF("numbering states for holder %p\n", &h);
-
- vector<NFAVertex> ordering;
- getStateOrdering(h, tops, ordering);
-
- optimiseTightLoops(h, ordering);
-
+ DEBUG_PRINTF("numbering states for holder %p\n", &h);
+
+ vector<NFAVertex> ordering;
+ getStateOrdering(h, tops, ordering);
+
+ optimiseTightLoops(h, ordering);
+
return getStateIndices(h, ordering);
-}
-
+}
+
u32 countStates(const unordered_map<NFAVertex, u32> &state_ids) {
- if (state_ids.empty()) {
- return 0;
- }
-
- u32 max_state = 0;
- for (const auto &m : state_ids) {
- if (m.second != NO_STATE) {
- max_state = max(m.second, max_state);
- }
- }
- u32 num_states = max_state + 1;
-
- return num_states;
-}
-
-} // namespace ue2
+ if (state_ids.empty()) {
+ return 0;
+ }
+
+ u32 max_state = 0;
+ for (const auto &m : state_ids) {
+ if (m.second != NO_STATE) {
+ max_state = max(m.second, max_state);
+ }
+ }
+ u32 num_states = max_state + 1;
+
+ return num_states;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.h b/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.h
index 7c381748fc..75d19c6294 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_restructuring.h
@@ -1,64 +1,64 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief State numbering and late graph restructuring code.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
*/
-
-#ifndef NG_RESTRUCTURING_H
-#define NG_RESTRUCTURING_H
-
-#include "ng_holder.h"
-#include "ue2common.h"
+
+/** \file
+ * \brief State numbering and late graph restructuring code.
+ */
+
+#ifndef NG_RESTRUCTURING_H
+#define NG_RESTRUCTURING_H
+
+#include "ng_holder.h"
+#include "ue2common.h"
#include "util/flat_containers.h"
-
+
#include <unordered_map>
-
-namespace ue2 {
-
-/**
- * \brief Special state index value meaning that the vertex will not
- * participate in an (NFA/DFA/etc) implementation.
- */
-static constexpr u32 NO_STATE = ~0;
-
-/**
- * \brief Gives each participating vertex in the graph a unique state index.
- */
+
+namespace ue2 {
+
+/**
+ * \brief Special state index value meaning that the vertex will not
+ * participate in an (NFA/DFA/etc) implementation.
+ */
+static constexpr u32 NO_STATE = ~0;
+
+/**
+ * \brief Gives each participating vertex in the graph a unique state index.
+ */
std::unordered_map<NFAVertex, u32>
numberStates(NGHolder &h, const flat_set<NFAVertex> &tops);
-
-/**
- * \brief Counts the number of states (vertices with state indices) in the
- * graph.
- */
+
+/**
+ * \brief Counts the number of states (vertices with state indices) in the
+ * graph.
+ */
u32 countStates(const std::unordered_map<NFAVertex, u32> &state_ids);
-
-} // namespace ue2
-
-#endif
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_revacc.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_revacc.cpp
index bc21d3a13b..0f932668c9 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_revacc.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_revacc.cpp
@@ -1,299 +1,299 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Reverse acceleration analysis.
- */
-#include "ng_revacc.h"
-
-#include "grey.h"
-#include "ng_holder.h"
-#include "ue2common.h"
-#include "nfa/accel.h"
-#include "nfa/nfa_internal.h"
-#include "util/bitutils.h"
-#include "util/charreach.h"
-#include "util/graph_range.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Reverse acceleration analysis.
+ */
+#include "ng_revacc.h"
+
+#include "grey.h"
+#include "ng_holder.h"
+#include "ue2common.h"
+#include "nfa/accel.h"
+#include "nfa/nfa_internal.h"
+#include "util/bitutils.h"
+#include "util/charreach.h"
+#include "util/graph_range.h"
+
#include <set>
-using namespace std;
-
-namespace ue2 {
-
-static
-bool isPseudoNoCaseChar(const CharReach &cr) {
- return cr.count() == 2 && !(cr.find_first() & 32)
- && cr.test(cr.find_first() | 32);
-}
-
-static
-bool lookForEodSchemes(const RevAccInfo &rev_info, const u32 minWidth,
- NFA *nfa) {
- DEBUG_PRINTF("pure eod triggered pattern\n");
-
- /* 2 char */
- for (u8 nocase = 0; nocase < 2; nocase++) {
- for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) {
- const CharReach &cr = rev_info.acceptEodReach[i];
- const CharReach &cr2 = rev_info.acceptEodReach[i - 1];
-
- if (!nocase && cr.count() == 1 && cr2.count() == 1) {
- assert(i < minWidth);
- if (i >= minWidth) {
- goto single;
- }
- nfa->rAccelType = ACCEL_RDEOD;
- nfa->rAccelData.array[0] = (u8)cr.find_first();
- nfa->rAccelData.array[1] = (u8)cr2.find_first();
- nfa->rAccelOffset = i + 1;
- DEBUG_PRINTF("raccel eod x2 %u %04hx\n",
- nfa->rAccelOffset, nfa->rAccelData.dc);
- return true;
- } else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr))
- && (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) {
- assert(i < minWidth);
- if (i >= minWidth) {
- goto single;
- }
- nfa->rAccelType = ACCEL_RDEOD_NOCASE;
- nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR; /* uppercase */
- nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR;
- nfa->rAccelOffset = i + 1;
- DEBUG_PRINTF("raccel nc eod x2 %u %04hx\n",
- nfa->rAccelOffset, nfa->rAccelData.dc);
- return true;
- }
- }
- }
-
- single:
- /* 1 char */
- for (u8 nocase = 0; nocase < 2; nocase++) {
- for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
- const CharReach &cr = rev_info.acceptEodReach[i];
- if (!nocase && cr.count() == 1) {
- assert(i < minWidth);
- if (i >= minWidth) {
- return false;
- }
- nfa->rAccelType = ACCEL_REOD;
- nfa->rAccelData.c = (u8) cr.find_first();
- nfa->rAccelOffset = i + 1;
- DEBUG_PRINTF("raccel eod %u %02hhx\n",
- nfa->rAccelOffset, nfa->rAccelData.c);
- return true;
- } else if (nocase && isPseudoNoCaseChar(cr)) {
- assert(i < minWidth);
- if (i >= minWidth) {
- return false;
- }
- nfa->rAccelType = ACCEL_REOD_NOCASE;
- nfa->rAccelData.c = (u8)cr.find_first(); /* uppercase */
- nfa->rAccelOffset = i + 1;
- DEBUG_PRINTF("raccel nc eod %u %02hhx\n",
- nfa->rAccelOffset, nfa->rAccelData.c);
- return true;
- }
- }
- }
-
- return false;
-}
-
-static
-bool lookForFloatingSchemes(const RevAccInfo &rev_info,
- const u32 minWidth, NFA *nfa) {
- /* 2 char */
- for (u8 nocase = 0; nocase < 2; nocase++) {
- for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) {
- CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i];
- CharReach cr2 = rev_info.acceptEodReach[i - 1]
- | rev_info.acceptReach[i - 1];
- if (!nocase && cr.count() == 1 && cr2.count() == 1) {
- assert((u8)(i - 1) < minWidth);
- if (i > minWidth) {
- goto single;
- }
- nfa->rAccelType = ACCEL_RDVERM;
- nfa->rAccelData.array[0] = (u8)cr.find_first();
- nfa->rAccelData.array[1] = (u8)cr2.find_first();
- nfa->rAccelOffset = i;
- DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx\n",
- nfa->rAccelOffset, nfa->rAccelData.array[0],
- nfa->rAccelData.array[1]);
- return true;
- } else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr))
- && (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) {
- assert((u8)(i - 1) < minWidth);
- if (i > minWidth) {
- goto single;
- }
- nfa->rAccelType = ACCEL_RDVERM_NOCASE;
- nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR;
- nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR;
- nfa->rAccelOffset = i;
- DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx nc\n",
- nfa->rAccelOffset, nfa->rAccelData.array[0],
- nfa->rAccelData.array[1]);
- return true;
- }
- }
- }
-
- single:
- /* 1 char */
- for (u8 nocase = 0; nocase < 2; nocase++) {
- for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
- CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i];
- if (!nocase && cr.count() == 1) {
- assert(i < minWidth);
- if (i >= minWidth) {
- return false;
- }
- nfa->rAccelType = ACCEL_RVERM;
- nfa->rAccelData.c = (u8)cr.find_first();
- nfa->rAccelOffset = i + 1;
- DEBUG_PRINTF("raccel verm %u %02hhx\n", nfa->rAccelOffset,
- nfa->rAccelData.c);
- return true;
- } else if (nocase && isPseudoNoCaseChar(cr)) {
- assert(i < minWidth);
- if (i >= minWidth) {
- return false;
- }
- nfa->rAccelType = ACCEL_RVERM_NOCASE;
- nfa->rAccelData.c = (u8)cr.find_first(); /* 'uppercase' char */
- nfa->rAccelOffset = i + 1;
- DEBUG_PRINTF("raccel nc verm %u %02hhx\n", nfa->rAccelOffset,
- nfa->rAccelData.c);
- return true;
- }
- }
- }
-
- return false;
-}
-
-void buildReverseAcceleration(NFA *nfa, const RevAccInfo &rev_info,
- u32 min_width, bool eod_only) {
- assert(nfa);
-
- if (!rev_info.valid) {
- return;
- }
-
- nfa->rAccelOffset = 1;
-
- assert(rev_info.acceptReach[0].any() || rev_info.acceptEodReach[0].any());
- if (rev_info.acceptReach[0].none() && rev_info.acceptEodReach[0].none()) {
- DEBUG_PRINTF("expected path to accept\n");
- return;
- }
-
- if (rev_info.acceptReach[0].none()) {
- /* eod only */
-
- if (lookForEodSchemes(rev_info, min_width, nfa)) {
- assert(nfa->rAccelOffset <= min_width);
- return;
- }
- }
-
- if (eod_only) {
- return;
- }
-
- if (!lookForFloatingSchemes(rev_info, min_width, nfa)) {
- DEBUG_PRINTF("failed to accelerate\n");
- }
-}
-
-static
-void populateRevAccelInfo(const NGHolder &g, NFAVertex terminal,
- vector<CharReach> *reach) {
- set<NFAVertex> vset;
-
- for (auto v : inv_adjacent_vertices_range(terminal, g)) {
- if (!is_special(v, g)) {
- vset.insert(v);
- }
- }
-
- for (u8 offset = 0; offset < MAX_RACCEL_OFFSET; offset++) {
- set<NFAVertex> next;
-
- for (auto v : vset) {
- const CharReach &cr = g[v].char_reach;
- (*reach)[offset] |= cr;
-
- DEBUG_PRINTF("off %u adding %zu to %zu\n", offset, cr.count(),
- (*reach)[offset].count());
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == g.start || u == g.startDs) {
- /* kill all subsequent offsets by setting to dot, setting
- * to dot is in someways not accurate as there may be no
- * data at all but neither case can be accelerated */
- for (u8 i = offset + 1; i < MAX_RACCEL_OFFSET; i++) {
- (*reach)[i].setall();
- }
- break;
- } else if (!is_special(u, g)) {
- next.insert(u);
- }
- }
- }
-
- swap(vset, next);
- }
-}
-
-void populateReverseAccelerationInfo(RevAccInfo &rai, const NGHolder &g) {
- DEBUG_PRINTF("pop rev info\n");
- populateRevAccelInfo(g, g.accept, &rai.acceptReach);
- populateRevAccelInfo(g, g.acceptEod, &rai.acceptEodReach);
- rai.valid = true;
-}
-
-void mergeReverseAccelerationInfo(RevAccInfo &dest, const RevAccInfo &vic) {
- DEBUG_PRINTF("merging ra\n");
-
- dest.valid &= vic.valid;
-
- for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
- dest.acceptReach[i] |= vic.acceptReach[i];
- dest.acceptEodReach[i] |= vic.acceptEodReach[i];
- }
-}
-
-RevAccInfo::RevAccInfo(void)
- : valid(false), acceptReach(MAX_RACCEL_OFFSET),
- acceptEodReach(MAX_RACCEL_OFFSET) {}
-
-} // namespace ue2
+using namespace std;
+
+namespace ue2 {
+
+static
+bool isPseudoNoCaseChar(const CharReach &cr) {
+ return cr.count() == 2 && !(cr.find_first() & 32)
+ && cr.test(cr.find_first() | 32);
+}
+
+static
+bool lookForEodSchemes(const RevAccInfo &rev_info, const u32 minWidth,
+ NFA *nfa) {
+ DEBUG_PRINTF("pure eod triggered pattern\n");
+
+ /* 2 char */
+ for (u8 nocase = 0; nocase < 2; nocase++) {
+ for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) {
+ const CharReach &cr = rev_info.acceptEodReach[i];
+ const CharReach &cr2 = rev_info.acceptEodReach[i - 1];
+
+ if (!nocase && cr.count() == 1 && cr2.count() == 1) {
+ assert(i < minWidth);
+ if (i >= minWidth) {
+ goto single;
+ }
+ nfa->rAccelType = ACCEL_RDEOD;
+ nfa->rAccelData.array[0] = (u8)cr.find_first();
+ nfa->rAccelData.array[1] = (u8)cr2.find_first();
+ nfa->rAccelOffset = i + 1;
+ DEBUG_PRINTF("raccel eod x2 %u %04hx\n",
+ nfa->rAccelOffset, nfa->rAccelData.dc);
+ return true;
+ } else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr))
+ && (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) {
+ assert(i < minWidth);
+ if (i >= minWidth) {
+ goto single;
+ }
+ nfa->rAccelType = ACCEL_RDEOD_NOCASE;
+ nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR; /* uppercase */
+ nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR;
+ nfa->rAccelOffset = i + 1;
+ DEBUG_PRINTF("raccel nc eod x2 %u %04hx\n",
+ nfa->rAccelOffset, nfa->rAccelData.dc);
+ return true;
+ }
+ }
+ }
+
+ single:
+ /* 1 char */
+ for (u8 nocase = 0; nocase < 2; nocase++) {
+ for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
+ const CharReach &cr = rev_info.acceptEodReach[i];
+ if (!nocase && cr.count() == 1) {
+ assert(i < minWidth);
+ if (i >= minWidth) {
+ return false;
+ }
+ nfa->rAccelType = ACCEL_REOD;
+ nfa->rAccelData.c = (u8) cr.find_first();
+ nfa->rAccelOffset = i + 1;
+ DEBUG_PRINTF("raccel eod %u %02hhx\n",
+ nfa->rAccelOffset, nfa->rAccelData.c);
+ return true;
+ } else if (nocase && isPseudoNoCaseChar(cr)) {
+ assert(i < minWidth);
+ if (i >= minWidth) {
+ return false;
+ }
+ nfa->rAccelType = ACCEL_REOD_NOCASE;
+ nfa->rAccelData.c = (u8)cr.find_first(); /* uppercase */
+ nfa->rAccelOffset = i + 1;
+ DEBUG_PRINTF("raccel nc eod %u %02hhx\n",
+ nfa->rAccelOffset, nfa->rAccelData.c);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static
+bool lookForFloatingSchemes(const RevAccInfo &rev_info,
+ const u32 minWidth, NFA *nfa) {
+ /* 2 char */
+ for (u8 nocase = 0; nocase < 2; nocase++) {
+ for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) {
+ CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i];
+ CharReach cr2 = rev_info.acceptEodReach[i - 1]
+ | rev_info.acceptReach[i - 1];
+ if (!nocase && cr.count() == 1 && cr2.count() == 1) {
+ assert((u8)(i - 1) < minWidth);
+ if (i > minWidth) {
+ goto single;
+ }
+ nfa->rAccelType = ACCEL_RDVERM;
+ nfa->rAccelData.array[0] = (u8)cr.find_first();
+ nfa->rAccelData.array[1] = (u8)cr2.find_first();
+ nfa->rAccelOffset = i;
+ DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx\n",
+ nfa->rAccelOffset, nfa->rAccelData.array[0],
+ nfa->rAccelData.array[1]);
+ return true;
+ } else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr))
+ && (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) {
+ assert((u8)(i - 1) < minWidth);
+ if (i > minWidth) {
+ goto single;
+ }
+ nfa->rAccelType = ACCEL_RDVERM_NOCASE;
+ nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR;
+ nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR;
+ nfa->rAccelOffset = i;
+ DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx nc\n",
+ nfa->rAccelOffset, nfa->rAccelData.array[0],
+ nfa->rAccelData.array[1]);
+ return true;
+ }
+ }
+ }
+
+ single:
+ /* 1 char */
+ for (u8 nocase = 0; nocase < 2; nocase++) {
+ for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
+ CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i];
+ if (!nocase && cr.count() == 1) {
+ assert(i < minWidth);
+ if (i >= minWidth) {
+ return false;
+ }
+ nfa->rAccelType = ACCEL_RVERM;
+ nfa->rAccelData.c = (u8)cr.find_first();
+ nfa->rAccelOffset = i + 1;
+ DEBUG_PRINTF("raccel verm %u %02hhx\n", nfa->rAccelOffset,
+ nfa->rAccelData.c);
+ return true;
+ } else if (nocase && isPseudoNoCaseChar(cr)) {
+ assert(i < minWidth);
+ if (i >= minWidth) {
+ return false;
+ }
+ nfa->rAccelType = ACCEL_RVERM_NOCASE;
+ nfa->rAccelData.c = (u8)cr.find_first(); /* 'uppercase' char */
+ nfa->rAccelOffset = i + 1;
+ DEBUG_PRINTF("raccel nc verm %u %02hhx\n", nfa->rAccelOffset,
+ nfa->rAccelData.c);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+void buildReverseAcceleration(NFA *nfa, const RevAccInfo &rev_info,
+ u32 min_width, bool eod_only) {
+ assert(nfa);
+
+ if (!rev_info.valid) {
+ return;
+ }
+
+ nfa->rAccelOffset = 1;
+
+ assert(rev_info.acceptReach[0].any() || rev_info.acceptEodReach[0].any());
+ if (rev_info.acceptReach[0].none() && rev_info.acceptEodReach[0].none()) {
+ DEBUG_PRINTF("expected path to accept\n");
+ return;
+ }
+
+ if (rev_info.acceptReach[0].none()) {
+ /* eod only */
+
+ if (lookForEodSchemes(rev_info, min_width, nfa)) {
+ assert(nfa->rAccelOffset <= min_width);
+ return;
+ }
+ }
+
+ if (eod_only) {
+ return;
+ }
+
+ if (!lookForFloatingSchemes(rev_info, min_width, nfa)) {
+ DEBUG_PRINTF("failed to accelerate\n");
+ }
+}
+
+static
+void populateRevAccelInfo(const NGHolder &g, NFAVertex terminal,
+ vector<CharReach> *reach) {
+ set<NFAVertex> vset;
+
+ for (auto v : inv_adjacent_vertices_range(terminal, g)) {
+ if (!is_special(v, g)) {
+ vset.insert(v);
+ }
+ }
+
+ for (u8 offset = 0; offset < MAX_RACCEL_OFFSET; offset++) {
+ set<NFAVertex> next;
+
+ for (auto v : vset) {
+ const CharReach &cr = g[v].char_reach;
+ (*reach)[offset] |= cr;
+
+ DEBUG_PRINTF("off %u adding %zu to %zu\n", offset, cr.count(),
+ (*reach)[offset].count());
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == g.start || u == g.startDs) {
+ /* kill all subsequent offsets by setting to dot, setting
+ * to dot is in someways not accurate as there may be no
+ * data at all but neither case can be accelerated */
+ for (u8 i = offset + 1; i < MAX_RACCEL_OFFSET; i++) {
+ (*reach)[i].setall();
+ }
+ break;
+ } else if (!is_special(u, g)) {
+ next.insert(u);
+ }
+ }
+ }
+
+ swap(vset, next);
+ }
+}
+
+void populateReverseAccelerationInfo(RevAccInfo &rai, const NGHolder &g) {
+ DEBUG_PRINTF("pop rev info\n");
+ populateRevAccelInfo(g, g.accept, &rai.acceptReach);
+ populateRevAccelInfo(g, g.acceptEod, &rai.acceptEodReach);
+ rai.valid = true;
+}
+
+void mergeReverseAccelerationInfo(RevAccInfo &dest, const RevAccInfo &vic) {
+ DEBUG_PRINTF("merging ra\n");
+
+ dest.valid &= vic.valid;
+
+ for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
+ dest.acceptReach[i] |= vic.acceptReach[i];
+ dest.acceptEodReach[i] |= vic.acceptEodReach[i];
+ }
+}
+
+RevAccInfo::RevAccInfo(void)
+ : valid(false), acceptReach(MAX_RACCEL_OFFSET),
+ acceptEodReach(MAX_RACCEL_OFFSET) {}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_revacc.h b/contrib/libs/hyperscan/src/nfagraph/ng_revacc.h
index 0ab6a338c2..bde54574cb 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_revacc.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_revacc.h
@@ -1,65 +1,65 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Reverse acceleration analysis.
- */
-
-#ifndef NG_REVACC_H
-#define NG_REVACC_H
-
-#include "util/charreach.h"
-
-#include <vector>
-
-struct NFA;
-
-namespace ue2 {
-
-class NGHolder;
-
-#define MAX_RACCEL_OFFSET 16
-
-struct RevAccInfo {
- RevAccInfo(void);
- bool valid;
- std::vector<CharReach> acceptReach; /**< bytes which can appear n
- * bytes before a match */
- std::vector<CharReach> acceptEodReach; /**< bytes which can appear n
- * bytes before eod match */
-};
-
-void buildReverseAcceleration(struct NFA *nfa, const RevAccInfo &rev_info,
- u32 min_width, bool eod_only = false);
-
-void populateReverseAccelerationInfo(RevAccInfo &rai, const NGHolder &g);
-void mergeReverseAccelerationInfo(RevAccInfo &dest, const RevAccInfo &vic);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Reverse acceleration analysis.
+ */
+
+#ifndef NG_REVACC_H
+#define NG_REVACC_H
+
+#include "util/charreach.h"
+
+#include <vector>
+
+struct NFA;
+
+namespace ue2 {
+
+class NGHolder;
+
+#define MAX_RACCEL_OFFSET 16
+
+struct RevAccInfo {
+ RevAccInfo(void);
+ bool valid;
+ std::vector<CharReach> acceptReach; /**< bytes which can appear n
+ * bytes before a match */
+ std::vector<CharReach> acceptEodReach; /**< bytes which can appear n
+ * bytes before eod match */
+};
+
+void buildReverseAcceleration(struct NFA *nfa, const RevAccInfo &rev_info,
+ u32 min_width, bool eod_only = false);
+
+void populateReverseAccelerationInfo(RevAccInfo &rai, const NGHolder &g);
+void mergeReverseAccelerationInfo(RevAccInfo &dest, const RevAccInfo &vic);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_sep.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_sep.cpp
index 82ee226cec..86528b4a00 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_sep.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_sep.cpp
@@ -1,93 +1,93 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Short Exhaustible Passthroughs.
- *
- * Analysis code for determining whether a graph should be treated specially
- * because it is short and contains exhaustible reports; typically we turn
- * these into outfixes rather than risk them becoming Rose literals.
- *
- * For example, the pattern:
- *
- * /[a-f]/H
- *
- * ... is far better suited to becoming a small outfix that generates one match
- * and goes dead than being split into six one-byte Rose literals that end up
- * in the literal matcher.
- */
-#include "ng_sep.h"
-
-#include "grey.h"
-#include "ng_holder.h"
-#include "ng_reports.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/graph_range.h"
-
-using namespace std;
-
-namespace ue2 {
-
-static
-bool checkFromVertex(const NGHolder &g, NFAVertex start) {
- for (auto v : adjacent_vertices_range(start, g)) {
- if (v == g.startDs) {
- continue;
- }
-
- assert(!is_special(v, g)); /* should not be vacuous */
-
- if (!edge(g.startDs, v, g).second) { /* only floating starts */
- return false;
- } else if (out_degree(v, g) == 1
- && edge(v, g.accept, g).second) { /* only floating end */
- ; /* possible sep */
- } else {
- return false;
- }
- }
- return true;
-}
-
-bool isSEP(const NGHolder &g, const ReportManager &rm, const Grey &grey) {
- if (!grey.mergeSEP || !can_exhaust(g, rm)) {
- return false;
- }
-
- if (!checkFromVertex(g, g.start) || !checkFromVertex(g, g.startDs)) {
- return false;
- }
-
- assert(out_degree(g.start, g) || proper_out_degree(g.startDs, g));
-
- DEBUG_PRINTF("graph is an SEP\n");
- return true;
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Short Exhaustible Passthroughs.
+ *
+ * Analysis code for determining whether a graph should be treated specially
+ * because it is short and contains exhaustible reports; typically we turn
+ * these into outfixes rather than risk them becoming Rose literals.
+ *
+ * For example, the pattern:
+ *
+ * /[a-f]/H
+ *
+ * ... is far better suited to becoming a small outfix that generates one match
+ * and goes dead than being split into six one-byte Rose literals that end up
+ * in the literal matcher.
+ */
+#include "ng_sep.h"
+
+#include "grey.h"
+#include "ng_holder.h"
+#include "ng_reports.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/graph_range.h"
+
+using namespace std;
+
+namespace ue2 {
+
+static
+bool checkFromVertex(const NGHolder &g, NFAVertex start) {
+ for (auto v : adjacent_vertices_range(start, g)) {
+ if (v == g.startDs) {
+ continue;
+ }
+
+ assert(!is_special(v, g)); /* should not be vacuous */
+
+ if (!edge(g.startDs, v, g).second) { /* only floating starts */
+ return false;
+ } else if (out_degree(v, g) == 1
+ && edge(v, g.accept, g).second) { /* only floating end */
+ ; /* possible sep */
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool isSEP(const NGHolder &g, const ReportManager &rm, const Grey &grey) {
+ if (!grey.mergeSEP || !can_exhaust(g, rm)) {
+ return false;
+ }
+
+ if (!checkFromVertex(g, g.start) || !checkFromVertex(g, g.startDs)) {
+ return false;
+ }
+
+ assert(out_degree(g.start, g) || proper_out_degree(g.startDs, g));
+
+ DEBUG_PRINTF("graph is an SEP\n");
+ return true;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_sep.h b/contrib/libs/hyperscan/src/nfagraph/ng_sep.h
index d4195c5ef4..4a2bef34f7 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_sep.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_sep.h
@@ -1,46 +1,46 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Short Exhaustible Passthroughs.
- */
-
-#ifndef NG_SEP_H
-#define NG_SEP_H
-
-namespace ue2 {
-
-struct Grey;
-class NGHolder;
-class ReportManager;
-
-bool isSEP(const NGHolder &g, const ReportManager &rm, const Grey &grey);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Short Exhaustible Passthroughs.
+ */
+
+#ifndef NG_SEP_H
+#define NG_SEP_H
+
+namespace ue2 {
+
+struct Grey;
+class NGHolder;
+class ReportManager;
+
+bool isSEP(const NGHolder &g, const ReportManager &rm, const Grey &grey);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.cpp
index 9c07f2087c..9c2d9ba38d 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.cpp
@@ -1,268 +1,268 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose construction from NGHolder for cases representing small literal
- * sets.
- */
-#include "ng_small_literal_set.h"
-
-#include "grey.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose construction from NGHolder for cases representing small literal
+ * sets.
+ */
+#include "ng_small_literal_set.h"
+
+#include "grey.h"
#include "ng_holder.h"
-#include "ng_util.h"
-#include "rose/rose_build.h"
-#include "util/compare.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/graph_range.h"
-#include "util/order_check.h"
-#include "util/ue2string.h"
-#include "ue2common.h"
-
-#include <map>
-#include <set>
-#include <vector>
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_keys;
-
-namespace ue2 {
-
-/** \brief The maximum number of literals to accept per pattern. */
-static const size_t MAX_LITERAL_SET_SIZE = 30;
-
-/**
- * \brief The maximum number of literals to accept per pattern where at least
- * one is weak (has period < MIN_STRONG_PERIOD).
- */
-static const size_t MAX_WEAK_LITERAL_SET_SIZE = 20;
-
-/**
- * \brief The minimum string period to consider a literal "strong" (and not
- * apply the weak size limit).
- */
-static const size_t MIN_STRONG_PERIOD = 3;
-
-namespace {
-
-struct sls_literal {
- bool anchored;
- bool eod;
- ue2_literal s;
-
- explicit sls_literal(bool a) : anchored(a), eod(false) {}
-
- sls_literal append(char c, bool nocase) const {
- sls_literal rv(anchored);
- rv.s = s;
- rv.s.push_back(ue2_literal::elem(c, nocase));
-
- return rv;
- }
-};
-
-static
-bool operator<(const sls_literal &a, const sls_literal &b) {
- ORDER_CHECK(anchored);
- ORDER_CHECK(eod);
- ORDER_CHECK(s);
-
- return false;
-}
-
-} // namespace
-
-static
-bool checkLongMixedSensitivityLiterals(
+#include "ng_util.h"
+#include "rose/rose_build.h"
+#include "util/compare.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/graph_range.h"
+#include "util/order_check.h"
+#include "util/ue2string.h"
+#include "ue2common.h"
+
+#include <map>
+#include <set>
+#include <vector>
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_keys;
+
+namespace ue2 {
+
+/** \brief The maximum number of literals to accept per pattern. */
+static const size_t MAX_LITERAL_SET_SIZE = 30;
+
+/**
+ * \brief The maximum number of literals to accept per pattern where at least
+ * one is weak (has period < MIN_STRONG_PERIOD).
+ */
+static const size_t MAX_WEAK_LITERAL_SET_SIZE = 20;
+
+/**
+ * \brief The minimum string period to consider a literal "strong" (and not
+ * apply the weak size limit).
+ */
+static const size_t MIN_STRONG_PERIOD = 3;
+
+namespace {
+
+struct sls_literal {
+ bool anchored;
+ bool eod;
+ ue2_literal s;
+
+ explicit sls_literal(bool a) : anchored(a), eod(false) {}
+
+ sls_literal append(char c, bool nocase) const {
+ sls_literal rv(anchored);
+ rv.s = s;
+ rv.s.push_back(ue2_literal::elem(c, nocase));
+
+ return rv;
+ }
+};
+
+static
+bool operator<(const sls_literal &a, const sls_literal &b) {
+ ORDER_CHECK(anchored);
+ ORDER_CHECK(eod);
+ ORDER_CHECK(s);
+
+ return false;
+}
+
+} // namespace
+
+static
+bool checkLongMixedSensitivityLiterals(
const map<sls_literal, flat_set<ReportID>> &literals) {
- const size_t len = MAX_MASK2_WIDTH;
-
- for (const sls_literal &lit : literals | map_keys) {
- if (mixed_sensitivity(lit.s) && lit.s.length() > len) {
- return false;
- }
- }
-
- return true;
-}
-
-static
-bool findLiterals(const NGHolder &g,
+ const size_t len = MAX_MASK2_WIDTH;
+
+ for (const sls_literal &lit : literals | map_keys) {
+ if (mixed_sensitivity(lit.s) && lit.s.length() > len) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static
+bool findLiterals(const NGHolder &g,
map<sls_literal, flat_set<ReportID>> *literals) {
- vector<NFAVertex> order = getTopoOrdering(g);
-
- vector<set<sls_literal>> built(num_vertices(g));
- vector<size_t> read_count(num_vertices(g));
-
- for (auto it = order.rbegin(); it != order.rend(); ++it) {
- NFAVertex v = *it;
- set<sls_literal> &out = built[g[v].index];
- read_count[g[v].index] = out_degree(v, g);
-
+ vector<NFAVertex> order = getTopoOrdering(g);
+
+ vector<set<sls_literal>> built(num_vertices(g));
+ vector<size_t> read_count(num_vertices(g));
+
+ for (auto it = order.rbegin(); it != order.rend(); ++it) {
+ NFAVertex v = *it;
+ set<sls_literal> &out = built[g[v].index];
+ read_count[g[v].index] = out_degree(v, g);
+
DEBUG_PRINTF("setting read_count to %zu for %zu\n",
- read_count[g[v].index], g[v].index);
-
- assert(out.empty());
- if (v == g.start) {
- out.insert(sls_literal(true));
- continue;
- } else if (v == g.startDs) {
- out.insert(sls_literal(false));
- continue;
- }
-
- bool eod = v == g.acceptEod;
- bool accept = v == g.accept || v == g.acceptEod;
- const CharReach &cr = g[v].char_reach;
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == g.accept) {
- continue;
- }
-
- if (u == g.start && edge(g.startDs, v, g).second) {
- /* floating start states may have connections to start and
- * startDs - don't create duplicate anchored literals */
- DEBUG_PRINTF("skipping as floating\n");
- continue;
- }
-
- set<sls_literal> &in = built[g[u].index];
+ read_count[g[v].index], g[v].index);
+
+ assert(out.empty());
+ if (v == g.start) {
+ out.insert(sls_literal(true));
+ continue;
+ } else if (v == g.startDs) {
+ out.insert(sls_literal(false));
+ continue;
+ }
+
+ bool eod = v == g.acceptEod;
+ bool accept = v == g.accept || v == g.acceptEod;
+ const CharReach &cr = g[v].char_reach;
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == g.accept) {
+ continue;
+ }
+
+ if (u == g.start && edge(g.startDs, v, g).second) {
+ /* floating start states may have connections to start and
+ * startDs - don't create duplicate anchored literals */
+ DEBUG_PRINTF("skipping as floating\n");
+ continue;
+ }
+
+ set<sls_literal> &in = built[g[u].index];
DEBUG_PRINTF("getting from %zu (%zu reads to go)\n",
- g[u].index, read_count[g[u].index]);
- assert(!in.empty());
- assert(read_count[g[u].index]);
-
- for (const sls_literal &lit : in) {
- if (accept) {
- sls_literal accept_lit = lit; // copy
- accept_lit.eod = eod;
- insert(&(*literals)[accept_lit], g[u].reports);
- continue;
- }
-
- for (size_t c = cr.find_first(); c != cr.npos;
- c = cr.find_next(c)) {
- bool nocase = ourisalpha(c) && cr.test(mytoupper(c))
- && cr.test(mytolower(c));
-
- if (nocase && (char)c == mytolower(c)) {
- continue; /* uppercase already handled us */
- }
-
- out.insert(lit.append((u8)c, nocase));
-
- if (out.size() + literals->size() > MAX_LITERAL_SET_SIZE) {
- DEBUG_PRINTF("too big %zu + %zu\n", out.size(),
- literals->size());
- return false;
- }
- }
- }
-
- read_count[g[u].index]--;
- if (!read_count[g[u].index]) {
+ g[u].index, read_count[g[u].index]);
+ assert(!in.empty());
+ assert(read_count[g[u].index]);
+
+ for (const sls_literal &lit : in) {
+ if (accept) {
+ sls_literal accept_lit = lit; // copy
+ accept_lit.eod = eod;
+ insert(&(*literals)[accept_lit], g[u].reports);
+ continue;
+ }
+
+ for (size_t c = cr.find_first(); c != cr.npos;
+ c = cr.find_next(c)) {
+ bool nocase = ourisalpha(c) && cr.test(mytoupper(c))
+ && cr.test(mytolower(c));
+
+ if (nocase && (char)c == mytolower(c)) {
+ continue; /* uppercase already handled us */
+ }
+
+ out.insert(lit.append((u8)c, nocase));
+
+ if (out.size() + literals->size() > MAX_LITERAL_SET_SIZE) {
+ DEBUG_PRINTF("too big %zu + %zu\n", out.size(),
+ literals->size());
+ return false;
+ }
+ }
+ }
+
+ read_count[g[u].index]--;
+ if (!read_count[g[u].index]) {
DEBUG_PRINTF("clearing %zu as finished reading\n", g[u].index);
- in.clear();
- }
- }
- }
-
- return true;
-}
-
-static
+ in.clear();
+ }
+ }
+ }
+
+ return true;
+}
+
+static
size_t min_period(const map<sls_literal, flat_set<ReportID>> &literals) {
- size_t rv = SIZE_MAX;
-
- for (const sls_literal &lit : literals | map_keys) {
- rv = min(rv, minStringPeriod(lit.s));
- }
- DEBUG_PRINTF("min period %zu\n", rv);
- return rv;
-}
-
-// If this component is just a small set of literals and can be handled by
-// Rose, feed it directly into rose.
-bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &g,
- const CompileContext &cc) {
- if (!cc.grey.allowSmallLiteralSet) {
- return false;
- }
-
- if (!isAcyclic(g)) {
- /* literal sets would typically be acyclic... */
- DEBUG_PRINTF("not acyclic\n");
- return false;
- }
-
+ size_t rv = SIZE_MAX;
+
+ for (const sls_literal &lit : literals | map_keys) {
+ rv = min(rv, minStringPeriod(lit.s));
+ }
+ DEBUG_PRINTF("min period %zu\n", rv);
+ return rv;
+}
+
+// If this component is just a small set of literals and can be handled by
+// Rose, feed it directly into rose.
+bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &g,
+ const CompileContext &cc) {
+ if (!cc.grey.allowSmallLiteralSet) {
+ return false;
+ }
+
+ if (!isAcyclic(g)) {
+ /* literal sets would typically be acyclic... */
+ DEBUG_PRINTF("not acyclic\n");
+ return false;
+ }
+
if (!hasNarrowReachVertex(g, MAX_LITERAL_SET_SIZE * 2 + 1)) {
DEBUG_PRINTF("vertex with wide reach found\n");
return false;
}
- DEBUG_PRINTF("looking for literals\n");
-
+ DEBUG_PRINTF("looking for literals\n");
+
map<sls_literal, flat_set<ReportID>> literals;
- if (!findLiterals(g, &literals)) {
- DEBUG_PRINTF(":(\n");
- return false;
- }
-
- assert(!literals.empty());
-
- if (literals.size() > MAX_LITERAL_SET_SIZE) {
- /* try a mask instead */
- DEBUG_PRINTF("too many literals\n");
- return false;
- }
-
- size_t period = min_period(literals);
- if (period < MIN_STRONG_PERIOD &&
- literals.size() > MAX_WEAK_LITERAL_SET_SIZE) {
- DEBUG_PRINTF("too many literals with weak period\n");
- return false;
- }
-
- if (!checkLongMixedSensitivityLiterals(literals)) {
- DEBUG_PRINTF("long mixed\n");
- return false;
- }
-
- DEBUG_PRINTF("adding %zu literals\n", literals.size());
- for (const auto &m : literals) {
- const sls_literal &lit = m.first;
- const auto &reports = m.second;
- rose.add(lit.anchored, lit.eod, lit.s, reports);
- }
-
- return true;
-}
-
-} // namespace ue2
+ if (!findLiterals(g, &literals)) {
+ DEBUG_PRINTF(":(\n");
+ return false;
+ }
+
+ assert(!literals.empty());
+
+ if (literals.size() > MAX_LITERAL_SET_SIZE) {
+ /* try a mask instead */
+ DEBUG_PRINTF("too many literals\n");
+ return false;
+ }
+
+ size_t period = min_period(literals);
+ if (period < MIN_STRONG_PERIOD &&
+ literals.size() > MAX_WEAK_LITERAL_SET_SIZE) {
+ DEBUG_PRINTF("too many literals with weak period\n");
+ return false;
+ }
+
+ if (!checkLongMixedSensitivityLiterals(literals)) {
+ DEBUG_PRINTF("long mixed\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("adding %zu literals\n", literals.size());
+ for (const auto &m : literals) {
+ const sls_literal &lit = m.first;
+ const auto &reports = m.second;
+ rose.add(lit.anchored, lit.eod, lit.s, reports);
+ }
+
+ return true;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.h b/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.h
index 0beca09a96..e626627071 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_small_literal_set.h
@@ -1,50 +1,50 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose construction from NGHolder for cases representing small literal
- * sets.
- */
-
-#ifndef NG_SMALL_LITERAL_SET_H
-#define NG_SMALL_LITERAL_SET_H
-
-namespace ue2 {
-
-class RoseBuild;
-class NGHolder;
-struct CompileContext;
-
-/** \brief If the graph represents a small set of literals, feed them directly
- * to rose. Returns true if successful. */
-bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &h,
- const CompileContext &cc);
-
-} // namespace ue2
-
-#endif // NG_SMALL_LITERAL_SET_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose construction from NGHolder for cases representing small literal
+ * sets.
+ */
+
+#ifndef NG_SMALL_LITERAL_SET_H
+#define NG_SMALL_LITERAL_SET_H
+
+namespace ue2 {
+
+class RoseBuild;
+class NGHolder;
+struct CompileContext;
+
+/** \brief If the graph represents a small set of literals, feed them directly
+ * to rose. Returns true if successful. */
+bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &h,
+ const CompileContext &cc);
+
+} // namespace ue2
+
+#endif // NG_SMALL_LITERAL_SET_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_som.cpp
index 7383817ad8..d23ac408b0 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_som.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_som.cpp
@@ -1,747 +1,747 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief SOM ("Start of Match") analysis.
- */
+ * \brief SOM ("Start of Match") analysis.
+ */
#include "ng_som.h"
-#include "ng.h"
-#include "ng_dump.h"
-#include "ng_equivalence.h"
-#include "ng_execute.h"
-#include "ng_haig.h"
-#include "ng_limex.h"
-#include "ng_literal_analysis.h"
-#include "ng_prune.h"
-#include "ng_redundancy.h"
-#include "ng_region.h"
-#include "ng_reports.h"
-#include "ng_som_add_redundancy.h"
-#include "ng_som_util.h"
-#include "ng_split.h"
-#include "ng_util.h"
+#include "ng.h"
+#include "ng_dump.h"
+#include "ng_equivalence.h"
+#include "ng_execute.h"
+#include "ng_haig.h"
+#include "ng_limex.h"
+#include "ng_literal_analysis.h"
+#include "ng_prune.h"
+#include "ng_redundancy.h"
+#include "ng_region.h"
+#include "ng_reports.h"
+#include "ng_som_add_redundancy.h"
+#include "ng_som_util.h"
+#include "ng_split.h"
+#include "ng_util.h"
#include "ng_violet.h"
-#include "ng_width.h"
-#include "grey.h"
-#include "ue2common.h"
+#include "ng_width.h"
+#include "grey.h"
+#include "ue2common.h"
#include "compiler/compiler.h"
-#include "nfa/goughcompile.h"
-#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
-#include "parser/position.h"
-#include "som/som.h"
-#include "rose/rose_build.h"
-#include "rose/rose_in_util.h"
-#include "util/alloc.h"
-#include "util/compare.h"
-#include "util/compile_error.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph_range.h"
-#include "util/make_unique.h"
-
-#include <algorithm>
-#include <map>
+#include "nfa/goughcompile.h"
+#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
+#include "parser/position.h"
+#include "som/som.h"
+#include "rose/rose_build.h"
+#include "rose/rose_in_util.h"
+#include "util/alloc.h"
+#include "util/compare.h"
+#include "util/compile_error.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
+#include "util/make_unique.h"
+
+#include <algorithm>
+#include <map>
#include <unordered_map>
#include <unordered_set>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-static const size_t MAX_SOM_PLANS = 10;
-static const size_t MAX_SOMBE_CHAIN_VERTICES = 4000;
-
-#define MAX_REV_NFA_PREFIX 80
-
-namespace {
-struct som_plan {
- som_plan(const shared_ptr<NGHolder> &p, const CharReach &e, bool i,
- u32 parent_in) : prefix(p), escapes(e), is_reset(i),
- no_implement(false), parent(parent_in) { }
- shared_ptr<NGHolder> prefix;
- CharReach escapes;
- bool is_reset;
- bool no_implement;
- u32 parent; // index of parent plan in the vector.
-
- // Reporters: a list of vertices in the graph that must be have their
- // reports updated at implementation time to report this plan's
- // som_loc_out.
- vector<NFAVertex> reporters;
-
- // Similar, but these report the som_loc_in.
- vector<NFAVertex> reporters_in;
-};
-}
-
-static
-bool regionCanEstablishSom(const NGHolder &g,
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+static const size_t MAX_SOM_PLANS = 10;
+static const size_t MAX_SOMBE_CHAIN_VERTICES = 4000;
+
+#define MAX_REV_NFA_PREFIX 80
+
+namespace {
+struct som_plan {
+ som_plan(const shared_ptr<NGHolder> &p, const CharReach &e, bool i,
+ u32 parent_in) : prefix(p), escapes(e), is_reset(i),
+ no_implement(false), parent(parent_in) { }
+ shared_ptr<NGHolder> prefix;
+ CharReach escapes;
+ bool is_reset;
+ bool no_implement;
+ u32 parent; // index of parent plan in the vector.
+
+ // Reporters: a list of vertices in the graph that must be have their
+ // reports updated at implementation time to report this plan's
+ // som_loc_out.
+ vector<NFAVertex> reporters;
+
+ // Similar, but these report the som_loc_in.
+ vector<NFAVertex> reporters_in;
+};
+}
+
+static
+bool regionCanEstablishSom(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const u32 region, const vector<NFAVertex> &r_exits,
- const vector<DepthMinMax> &depths) {
- if (region == regions.at(g.accept) ||
- region == regions.at(g.acceptEod)) {
- DEBUG_PRINTF("accept in region\n");
- return false;
- }
-
- DEBUG_PRINTF("region %u\n", region);
- for (UNUSED auto v : r_exits) {
+ const u32 region, const vector<NFAVertex> &r_exits,
+ const vector<DepthMinMax> &depths) {
+ if (region == regions.at(g.accept) ||
+ region == regions.at(g.acceptEod)) {
+ DEBUG_PRINTF("accept in region\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("region %u\n", region);
+ for (UNUSED auto v : r_exits) {
DEBUG_PRINTF(" exit %zu\n", g[v].index);
- }
-
- /* simple if each region exit is at fixed distance from SOM. Note SOM does
- not include virtual starts */
- for (auto v : r_exits) {
- assert(regions.at(v) == region);
- const DepthMinMax &d = depths.at(g[v].index);
- if (d.min != d.max) {
+ }
+
+ /* simple if each region exit is at fixed distance from SOM. Note SOM does
+ not include virtual starts */
+ for (auto v : r_exits) {
+ assert(regions.at(v) == region);
+ const DepthMinMax &d = depths.at(g[v].index);
+ if (d.min != d.max) {
DEBUG_PRINTF("failing %zu as %s != %s\n", g[v].index,
- d.min.str().c_str(), d.max.str().c_str());
- return false;
- }
- }
+ d.min.str().c_str(), d.max.str().c_str());
+ return false;
+ }
+ }
DEBUG_PRINTF("region %u/%zu is good\n", regions.at(r_exits[0]),
- g[r_exits[0]].index);
-
- return true;
-}
-
-namespace {
-
-struct region_info {
- region_info() : optional(false), dag(false) {}
- vector<NFAVertex> enters;
- vector<NFAVertex> exits;
- vector<NFAVertex> full;
- bool optional; /* skip edges around region */
- bool dag; /* completely acyclic */
-};
-
-}
-
-static
-void buildRegionMapping(const NGHolder &g,
+ g[r_exits[0]].index);
+
+ return true;
+}
+
+namespace {
+
+struct region_info {
+ region_info() : optional(false), dag(false) {}
+ vector<NFAVertex> enters;
+ vector<NFAVertex> exits;
+ vector<NFAVertex> full;
+ bool optional; /* skip edges around region */
+ bool dag; /* completely acyclic */
+};
+
+}
+
+static
+void buildRegionMapping(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- map<u32, region_info> &info,
- bool include_region_0 = false) {
- for (auto v : vertices_range(g)) {
- u32 region = regions.at(v);
- if (!include_region_0 && (is_any_start(v, g) || region == 0)) {
- continue;
- }
- assert(!region || !is_any_start(v, g));
-
- if (is_any_accept(v, g)) {
- continue;
- }
-
- if (isRegionEntry(g, v, regions)) {
- info[region].enters.push_back(v);
- }
- if (isRegionExit(g, v, regions)) {
- info[region].exits.push_back(v);
- }
- info[region].full.push_back(v);
- }
-
- for (auto &m : info) {
- if (!m.second.enters.empty()
- && isOptionalRegion(g, m.second.enters.front(), regions)) {
- m.second.optional = true;
- }
- m.second.dag = true; /* will be cleared for cyclic regions later */
- }
-
- set<NFAEdge> be;
- BackEdges<set<NFAEdge> > backEdgeVisitor(be);
+ map<u32, region_info> &info,
+ bool include_region_0 = false) {
+ for (auto v : vertices_range(g)) {
+ u32 region = regions.at(v);
+ if (!include_region_0 && (is_any_start(v, g) || region == 0)) {
+ continue;
+ }
+ assert(!region || !is_any_start(v, g));
+
+ if (is_any_accept(v, g)) {
+ continue;
+ }
+
+ if (isRegionEntry(g, v, regions)) {
+ info[region].enters.push_back(v);
+ }
+ if (isRegionExit(g, v, regions)) {
+ info[region].exits.push_back(v);
+ }
+ info[region].full.push_back(v);
+ }
+
+ for (auto &m : info) {
+ if (!m.second.enters.empty()
+ && isOptionalRegion(g, m.second.enters.front(), regions)) {
+ m.second.optional = true;
+ }
+ m.second.dag = true; /* will be cleared for cyclic regions later */
+ }
+
+ set<NFAEdge> be;
+ BackEdges<set<NFAEdge> > backEdgeVisitor(be);
boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start));
-
- for (const auto &e : be) {
- NFAVertex u = source(e, g);
- NFAVertex v = target(e, g);
- if (is_special(u, g) || is_special(v, g)) {
- assert(is_special(u, g) && is_special(v, g));
- continue;
- }
- u32 r = regions.at(v);
- assert(regions.at(u) == r);
- info[r].dag = false;
- }
-
- if (include_region_0) {
- info[0].dag = false;
- }
-
- #ifdef DEBUG
- for (const auto &m : info) {
- u32 r = m.first;
- const region_info &r_i = m.second;
- DEBUG_PRINTF("region %u:%s%s\n", r,
- r_i.dag ? " (dag)" : "",
- r_i.optional ? " (optional)" : "");
- DEBUG_PRINTF(" enters:");
- for (u32 i = 0; i < r_i.enters.size(); i++) {
+
+ for (const auto &e : be) {
+ NFAVertex u = source(e, g);
+ NFAVertex v = target(e, g);
+ if (is_special(u, g) || is_special(v, g)) {
+ assert(is_special(u, g) && is_special(v, g));
+ continue;
+ }
+ u32 r = regions.at(v);
+ assert(regions.at(u) == r);
+ info[r].dag = false;
+ }
+
+ if (include_region_0) {
+ info[0].dag = false;
+ }
+
+ #ifdef DEBUG
+ for (const auto &m : info) {
+ u32 r = m.first;
+ const region_info &r_i = m.second;
+ DEBUG_PRINTF("region %u:%s%s\n", r,
+ r_i.dag ? " (dag)" : "",
+ r_i.optional ? " (optional)" : "");
+ DEBUG_PRINTF(" enters:");
+ for (u32 i = 0; i < r_i.enters.size(); i++) {
printf(" %zu", g[r_i.enters[i]].index);
- }
- printf("\n");
- DEBUG_PRINTF(" exits:");
- for (u32 i = 0; i < r_i.exits.size(); i++) {
+ }
+ printf("\n");
+ DEBUG_PRINTF(" exits:");
+ for (u32 i = 0; i < r_i.exits.size(); i++) {
printf(" %zu", g[r_i.exits[i]].index);
- }
- printf("\n");
- DEBUG_PRINTF(" all:");
- for (u32 i = 0; i < r_i.full.size(); i++) {
+ }
+ printf("\n");
+ DEBUG_PRINTF(" all:");
+ for (u32 i = 0; i < r_i.full.size(); i++) {
printf(" %zu", g[r_i.full[i]].index);
- }
- printf("\n");
- }
- #endif
-}
-
-static
-bool validateXSL(const NGHolder &g,
+ }
+ printf("\n");
+ }
+ #endif
+}
+
+static
+bool validateXSL(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const u32 region, const CharReach &escapes, u32 *bad_region) {
- /* need to check that the escapes escape all of the graph past region */
- u32 first_bad_region = ~0U;
- for (auto v : vertices_range(g)) {
- u32 v_region = regions.at(v);
- if (!is_special(v, g) && v_region > region &&
- (escapes & g[v].char_reach).any()) {
+ const u32 region, const CharReach &escapes, u32 *bad_region) {
+ /* need to check that the escapes escape all of the graph past region */
+ u32 first_bad_region = ~0U;
+ for (auto v : vertices_range(g)) {
+ u32 v_region = regions.at(v);
+ if (!is_special(v, g) && v_region > region &&
+ (escapes & g[v].char_reach).any()) {
DEBUG_PRINTF("problem with escapes for %zu\n", g[v].index);
- first_bad_region = MIN(first_bad_region, v_region);
- }
- }
-
- if (first_bad_region != ~0U) {
- *bad_region = first_bad_region;
- return false;
- }
-
- return true;
-}
-
-static
-bool validateEXSL(const NGHolder &g,
+ first_bad_region = MIN(first_bad_region, v_region);
+ }
+ }
+
+ if (first_bad_region != ~0U) {
+ *bad_region = first_bad_region;
+ return false;
+ }
+
+ return true;
+}
+
+static
+bool validateEXSL(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const u32 region, const CharReach &escapes,
- const NGHolder &prefix, u32 *bad_region) {
- /* EXSL: To be a valid EXSL with escapes e, we require that all states
- * go dead after /[e][^e]*{subsequent prefix match}/.
- */
-
- /* TODO: this is overly conservative as it allow partial matches from the
- * prefix to be considered even when the tail has processed some [^e] */
-
- u32 first_bad_region = ~0U;
- const vector<CharReach> escapes_vec(1, escapes);
- const vector<CharReach> notescapes_vec(1, ~escapes);
-
+ const u32 region, const CharReach &escapes,
+ const NGHolder &prefix, u32 *bad_region) {
+ /* EXSL: To be a valid EXSL with escapes e, we require that all states
+ * go dead after /[e][^e]*{subsequent prefix match}/.
+ */
+
+ /* TODO: this is overly conservative as it allow partial matches from the
+ * prefix to be considered even when the tail has processed some [^e] */
+
+ u32 first_bad_region = ~0U;
+ const vector<CharReach> escapes_vec(1, escapes);
+ const vector<CharReach> notescapes_vec(1, ~escapes);
+
flat_set<NFAVertex> states;
- /* turn on all states past the prefix */
- DEBUG_PRINTF("region %u is cutover\n", region);
- for (auto v : vertices_range(g)) {
- if (!is_special(v, g) && regions.at(v) > region) {
- states.insert(v);
- }
- }
-
- /* process the escapes */
- states = execute_graph(g, escapes_vec, states);
-
- /* flood with any number of not escapes */
+ /* turn on all states past the prefix */
+ DEBUG_PRINTF("region %u is cutover\n", region);
+ for (auto v : vertices_range(g)) {
+ if (!is_special(v, g) && regions.at(v) > region) {
+ states.insert(v);
+ }
+ }
+
+ /* process the escapes */
+ states = execute_graph(g, escapes_vec, states);
+
+ /* flood with any number of not escapes */
flat_set<NFAVertex> prev_states;
- while (prev_states != states) {
- prev_states = states;
- states = execute_graph(g, notescapes_vec, states);
- insert(&states, prev_states);
- }
-
- /* find input starts to use for when we are running the prefix through as
- * when the escape character arrives we may be in matching the prefix
- * already */
+ while (prev_states != states) {
+ prev_states = states;
+ states = execute_graph(g, notescapes_vec, states);
+ insert(&states, prev_states);
+ }
+
+ /* find input starts to use for when we are running the prefix through as
+ * when the escape character arrives we may be in matching the prefix
+ * already */
flat_set<NFAVertex> prefix_start_states;
- for (auto v : vertices_range(prefix)) {
- if (v != prefix.accept && v != prefix.acceptEod
- /* and as we have already made it past the prefix once */
- && v != prefix.start) {
- prefix_start_states.insert(v);
- }
- }
-
- prefix_start_states =
- execute_graph(prefix, escapes_vec, prefix_start_states);
-
- assert(contains(prefix_start_states, prefix.startDs));
- /* see what happens after we feed it the prefix */
- states = execute_graph(g, prefix, prefix_start_states, states);
-
- for (auto v : states) {
- assert(v != g.accept && v != g.acceptEod); /* no cr -> should never be
- * on */
- DEBUG_PRINTF("state still active\n");
- first_bad_region = MIN(first_bad_region, regions.at(v));
- }
-
- if (first_bad_region != ~0U) {
- *bad_region = first_bad_region;
- return false;
- }
-
- return true;
-}
-
-static
-bool isPossibleLock(const NGHolder &g,
- map<u32, region_info>::const_iterator region,
- const map<u32, region_info> &info,
- CharReach *escapes_out) {
- /* TODO: we could also check for self-loops on curr region */
-
- /* TODO: some straw-walking logic. lowish priority has we know there can
- * only be optional regions between us and the cyclic */
-
- assert(region != info.end());
- map<u32, region_info>::const_iterator next_region = region;
- ++next_region;
- if (next_region == info.end()) {
- assert(0); /* odd */
- return false;
- }
-
- const region_info &next_info = next_region->second;
- if (next_info.enters.empty()) {
- assert(0); /* odd */
- return false;
- }
-
- if (next_info.full.size() == 1 && !next_info.dag) {
- *escapes_out = ~g[next_info.full.front()].char_reach;
- return true;
- }
-
- return false;
-}
-
-static
-unique_ptr<NGHolder>
+ for (auto v : vertices_range(prefix)) {
+ if (v != prefix.accept && v != prefix.acceptEod
+ /* and as we have already made it past the prefix once */
+ && v != prefix.start) {
+ prefix_start_states.insert(v);
+ }
+ }
+
+ prefix_start_states =
+ execute_graph(prefix, escapes_vec, prefix_start_states);
+
+ assert(contains(prefix_start_states, prefix.startDs));
+ /* see what happens after we feed it the prefix */
+ states = execute_graph(g, prefix, prefix_start_states, states);
+
+ for (auto v : states) {
+ assert(v != g.accept && v != g.acceptEod); /* no cr -> should never be
+ * on */
+ DEBUG_PRINTF("state still active\n");
+ first_bad_region = MIN(first_bad_region, regions.at(v));
+ }
+
+ if (first_bad_region != ~0U) {
+ *bad_region = first_bad_region;
+ return false;
+ }
+
+ return true;
+}
+
+static
+bool isPossibleLock(const NGHolder &g,
+ map<u32, region_info>::const_iterator region,
+ const map<u32, region_info> &info,
+ CharReach *escapes_out) {
+ /* TODO: we could also check for self-loops on curr region */
+
+ /* TODO: some straw-walking logic. lowish priority has we know there can
+ * only be optional regions between us and the cyclic */
+
+ assert(region != info.end());
+ map<u32, region_info>::const_iterator next_region = region;
+ ++next_region;
+ if (next_region == info.end()) {
+ assert(0); /* odd */
+ return false;
+ }
+
+ const region_info &next_info = next_region->second;
+ if (next_info.enters.empty()) {
+ assert(0); /* odd */
+ return false;
+ }
+
+ if (next_info.full.size() == 1 && !next_info.dag) {
+ *escapes_out = ~g[next_info.full.front()].char_reach;
+ return true;
+ }
+
+ return false;
+}
+
+static
+unique_ptr<NGHolder>
makePrefix(const NGHolder &g, const unordered_map<NFAVertex, u32> &regions,
- const region_info &curr, const region_info &next,
- bool renumber = true) {
- const vector<NFAVertex> &curr_exits = curr.exits;
- const vector<NFAVertex> &next_enters = next.enters;
-
- assert(!next_enters.empty());
- assert(!curr_exits.empty());
-
- unique_ptr<NGHolder> prefix_ptr = ue2::make_unique<NGHolder>();
- NGHolder &prefix = *prefix_ptr;
-
- deque<NFAVertex> lhs_verts;
- insert(&lhs_verts, lhs_verts.end(), vertices(g));
-
+ const region_info &curr, const region_info &next,
+ bool renumber = true) {
+ const vector<NFAVertex> &curr_exits = curr.exits;
+ const vector<NFAVertex> &next_enters = next.enters;
+
+ assert(!next_enters.empty());
+ assert(!curr_exits.empty());
+
+ unique_ptr<NGHolder> prefix_ptr = ue2::make_unique<NGHolder>();
+ NGHolder &prefix = *prefix_ptr;
+
+ deque<NFAVertex> lhs_verts;
+ insert(&lhs_verts, lhs_verts.end(), vertices(g));
+
unordered_map<NFAVertex, NFAVertex> lhs_map; // g -> prefix
- fillHolder(&prefix, g, lhs_verts, &lhs_map);
- prefix.kind = NFA_OUTFIX;
-
- // We need a reverse mapping to track regions.
+ fillHolder(&prefix, g, lhs_verts, &lhs_map);
+ prefix.kind = NFA_OUTFIX;
+
+ // We need a reverse mapping to track regions.
unordered_map<NFAVertex, NFAVertex> rev_map; // prefix -> g
- for (const auto &e : lhs_map) {
- rev_map.emplace(e.second, e.first);
- }
-
- clear_in_edges(prefix.accept, prefix);
- clear_in_edges(prefix.acceptEod, prefix);
- add_edge(prefix.accept, prefix.acceptEod, prefix);
-
- assert(!next_enters.empty());
+ for (const auto &e : lhs_map) {
+ rev_map.emplace(e.second, e.first);
+ }
+
+ clear_in_edges(prefix.accept, prefix);
+ clear_in_edges(prefix.acceptEod, prefix);
+ add_edge(prefix.accept, prefix.acceptEod, prefix);
+
+ assert(!next_enters.empty());
assert(next_enters.front() != NGHolder::null_vertex());
- u32 dead_region = regions.at(next_enters.front());
- DEBUG_PRINTF("curr_region %u, dead_region %u\n",
- regions.at(curr_exits.front()), dead_region);
- for (auto v : inv_adjacent_vertices_range(next_enters.front(), g)) {
- if (regions.at(v) >= dead_region) {
- continue;
- }
- /* add edge to new accepts */
- NFAVertex p_v = lhs_map[v];
- add_edge(p_v, prefix.accept, prefix);
- }
-
- assert(in_degree(prefix.accept, prefix) != 0);
-
- /* prune everything past the picked region */
- vector<NFAVertex> to_clear;
- assert(contains(lhs_map, curr_exits.front()));
- NFAVertex p_u = lhs_map[curr_exits.front()];
+ u32 dead_region = regions.at(next_enters.front());
+ DEBUG_PRINTF("curr_region %u, dead_region %u\n",
+ regions.at(curr_exits.front()), dead_region);
+ for (auto v : inv_adjacent_vertices_range(next_enters.front(), g)) {
+ if (regions.at(v) >= dead_region) {
+ continue;
+ }
+ /* add edge to new accepts */
+ NFAVertex p_v = lhs_map[v];
+ add_edge(p_v, prefix.accept, prefix);
+ }
+
+ assert(in_degree(prefix.accept, prefix) != 0);
+
+ /* prune everything past the picked region */
+ vector<NFAVertex> to_clear;
+ assert(contains(lhs_map, curr_exits.front()));
+ NFAVertex p_u = lhs_map[curr_exits.front()];
DEBUG_PRINTF("p_u: %zu\n", prefix[p_u].index);
- for (auto p_v : adjacent_vertices_range(p_u, prefix)) {
- auto v = rev_map.at(p_v);
- if (p_v == prefix.accept || regions.at(v) < dead_region) {
- continue;
- }
- to_clear.push_back(p_v);
- }
-
- for (auto v : to_clear) {
+ for (auto p_v : adjacent_vertices_range(p_u, prefix)) {
+ auto v = rev_map.at(p_v);
+ if (p_v == prefix.accept || regions.at(v) < dead_region) {
+ continue;
+ }
+ to_clear.push_back(p_v);
+ }
+
+ for (auto v : to_clear) {
DEBUG_PRINTF("clearing in_edges on %zu\n", prefix[v].index);
- clear_in_edges(v, prefix);
- }
-
- pruneUseless(prefix, renumber /* sometimes we want no renumber to keep
- depth map valid */);
-
- assert(num_vertices(prefix) > N_SPECIALS);
- return prefix_ptr;
-}
-
-static
-void replaceTempSomSlot(ReportManager &rm, NGHolder &g, u32 real_slot) {
- const u32 temp_slot = UINT32_MAX;
- /* update the som slot on the prefix report */
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- auto &reports = g[v].reports;
- assert(reports.size() == 1);
- Report ir = rm.getReport(*reports.begin());
- if (ir.onmatch != temp_slot) {
- continue;
- }
- ir.onmatch = real_slot;
- ReportID rep = rm.getInternalId(ir);
-
- assert(reports.size() == 1);
- reports.clear();
- reports.insert(rep);
- }
-}
-
-static
+ clear_in_edges(v, prefix);
+ }
+
+ pruneUseless(prefix, renumber /* sometimes we want no renumber to keep
+ depth map valid */);
+
+ assert(num_vertices(prefix) > N_SPECIALS);
+ return prefix_ptr;
+}
+
+static
+void replaceTempSomSlot(ReportManager &rm, NGHolder &g, u32 real_slot) {
+ const u32 temp_slot = UINT32_MAX;
+ /* update the som slot on the prefix report */
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ auto &reports = g[v].reports;
+ assert(reports.size() == 1);
+ Report ir = rm.getReport(*reports.begin());
+ if (ir.onmatch != temp_slot) {
+ continue;
+ }
+ ir.onmatch = real_slot;
+ ReportID rep = rm.getInternalId(ir);
+
+ assert(reports.size() == 1);
+ reports.clear();
+ reports.insert(rep);
+ }
+}
+
+static
void setPrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type,
u32 som_loc, const vector<DepthMinMax> &depths,
bool prefix_by_rev) {
- Report ir = makeCallback(0U, 0);
- ir.type = ir_type;
- ir.onmatch = som_loc;
-
- /* add report for storing in som location on new accepts */
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- if (prefix_by_rev) {
- ir.somDistance = MO_INVALID_IDX; /* will be populated properly
- * later */
- } else {
- const DepthMinMax &d = depths.at(g[v].index);
- assert(d.min == d.max);
- ir.somDistance = d.max;
- }
- ReportID rep = rm.getInternalId(ir);
-
- auto &reports = g[v].reports;
- reports.clear();
- reports.insert(rep);
- }
-}
-
-static
+ Report ir = makeCallback(0U, 0);
+ ir.type = ir_type;
+ ir.onmatch = som_loc;
+
+ /* add report for storing in som location on new accepts */
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ if (prefix_by_rev) {
+ ir.somDistance = MO_INVALID_IDX; /* will be populated properly
+ * later */
+ } else {
+ const DepthMinMax &d = depths.at(g[v].index);
+ assert(d.min == d.max);
+ ir.somDistance = d.max;
+ }
+ ReportID rep = rm.getInternalId(ir);
+
+ auto &reports = g[v].reports;
+ reports.clear();
+ reports.insert(rep);
+ }
+}
+
+static
void updatePrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type) {
- /* update the som action on the prefix report */
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- auto &reports = g[v].reports;
- assert(reports.size() == 1);
- Report ir = rm.getReport(*reports.begin());
- ir.type = ir_type;
- ReportID rep = rm.getInternalId(ir);
-
- assert(reports.size() == 1);
- reports.clear();
- reports.insert(rep);
- }
-}
-
-static
-void updatePrefixReportsRevNFA(ReportManager &rm, NGHolder &g,
- u32 rev_comp_id) {
- /* update the action on the prefix report, to refer to a reverse nfa,
- * report type is also adjusted. */
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- auto &reports = g[v].reports;
- assert(reports.size() == 1);
- Report ir = rm.getReport(*reports.begin());
- switch (ir.type) {
- case INTERNAL_SOM_LOC_SET:
- ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA;
- break;
- case INTERNAL_SOM_LOC_SET_IF_UNSET:
- ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET;
- break;
- case INTERNAL_SOM_LOC_SET_IF_WRITABLE:
- ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE;
- break;
- default:
- assert(0);
- break;
- }
-
- ir.revNfaIndex = rev_comp_id;
- ReportID rep = rm.getInternalId(ir);
-
- assert(reports.size() == 1);
- reports.clear();
- reports.insert(rep);
- }
-}
-
-static
-void setMidfixReports(ReportManager &rm, const som_plan &item,
- const u32 som_slot_in, const u32 som_slot_out) {
- assert(item.prefix);
- NGHolder &g = *item.prefix;
-
- Report ir = makeCallback(0U, 0);
- ir.type = item.is_reset ? INTERNAL_SOM_LOC_COPY
- : INTERNAL_SOM_LOC_COPY_IF_WRITABLE;
- ir.onmatch = som_slot_out;
- ir.somDistance = som_slot_in;
- ReportID rep = rm.getInternalId(ir);
-
- /* add report for storing in som location on new accepts */
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- auto &reports = g[v].reports;
- reports.clear();
- reports.insert(rep);
- }
-}
-
-static
-bool finalRegion(const NGHolder &g,
+ /* update the som action on the prefix report */
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ auto &reports = g[v].reports;
+ assert(reports.size() == 1);
+ Report ir = rm.getReport(*reports.begin());
+ ir.type = ir_type;
+ ReportID rep = rm.getInternalId(ir);
+
+ assert(reports.size() == 1);
+ reports.clear();
+ reports.insert(rep);
+ }
+}
+
+static
+void updatePrefixReportsRevNFA(ReportManager &rm, NGHolder &g,
+ u32 rev_comp_id) {
+ /* update the action on the prefix report, to refer to a reverse nfa,
+ * report type is also adjusted. */
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ auto &reports = g[v].reports;
+ assert(reports.size() == 1);
+ Report ir = rm.getReport(*reports.begin());
+ switch (ir.type) {
+ case INTERNAL_SOM_LOC_SET:
+ ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA;
+ break;
+ case INTERNAL_SOM_LOC_SET_IF_UNSET:
+ ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET;
+ break;
+ case INTERNAL_SOM_LOC_SET_IF_WRITABLE:
+ ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ ir.revNfaIndex = rev_comp_id;
+ ReportID rep = rm.getInternalId(ir);
+
+ assert(reports.size() == 1);
+ reports.clear();
+ reports.insert(rep);
+ }
+}
+
+static
+void setMidfixReports(ReportManager &rm, const som_plan &item,
+ const u32 som_slot_in, const u32 som_slot_out) {
+ assert(item.prefix);
+ NGHolder &g = *item.prefix;
+
+ Report ir = makeCallback(0U, 0);
+ ir.type = item.is_reset ? INTERNAL_SOM_LOC_COPY
+ : INTERNAL_SOM_LOC_COPY_IF_WRITABLE;
+ ir.onmatch = som_slot_out;
+ ir.somDistance = som_slot_in;
+ ReportID rep = rm.getInternalId(ir);
+
+ /* add report for storing in som location on new accepts */
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ auto &reports = g[v].reports;
+ reports.clear();
+ reports.insert(rep);
+ }
+}
+
+static
+bool finalRegion(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- NFAVertex v) {
- u32 region = regions.at(v);
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w != g.accept && w != g.acceptEod && regions.at(w) != region) {
- return false;
- }
- }
-
- return true;
-}
-
-static
-void replaceExternalReportsWithSomRep(ReportManager &rm, NGHolder &g,
+ NFAVertex v) {
+ u32 region = regions.at(v);
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w != g.accept && w != g.acceptEod && regions.at(w) != region) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static
+void replaceExternalReportsWithSomRep(ReportManager &rm, NGHolder &g,
NFAVertex v, ReportType ir_type,
u64a param) {
- assert(!g[v].reports.empty());
-
- flat_set<ReportID> r_new;
-
- for (const ReportID &report_id : g[v].reports) {
- Report ir = rm.getReport(report_id);
-
- if (ir.type != EXTERNAL_CALLBACK) {
- /* we must have already done whatever magic we needed to do to this
- * report */
- r_new.insert(report_id);
- continue;
- }
-
- ir.type = ir_type;
- ir.somDistance = param;
- ReportID rep = rm.getInternalId(ir);
-
+ assert(!g[v].reports.empty());
+
+ flat_set<ReportID> r_new;
+
+ for (const ReportID &report_id : g[v].reports) {
+ Report ir = rm.getReport(report_id);
+
+ if (ir.type != EXTERNAL_CALLBACK) {
+ /* we must have already done whatever magic we needed to do to this
+ * report */
+ r_new.insert(report_id);
+ continue;
+ }
+
+ ir.type = ir_type;
+ ir.somDistance = param;
+ ReportID rep = rm.getInternalId(ir);
+
DEBUG_PRINTF("vertex %zu, replacing report %u with %u (type %u)\n",
- g[v].index, report_id, rep, ir_type);
- r_new.insert(rep);
- }
- g[v].reports = r_new;
-}
-
-/* updates the reports on all vertices leading to the sink */
-static
-void makeSomRelReports(ReportManager &rm, NGHolder &g, NFAVertex sink,
- const vector<DepthMinMax> &depths) {
- for (auto v : inv_adjacent_vertices_range(sink, g)) {
- if (v == g.accept) {
- continue;
- }
-
- const DepthMinMax &d = depths.at(g[v].index);
- assert(d.min == d.max);
- replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_REL,
- d.min);
- }
-}
-
-/* updates the reports on all the provided vertices */
-static
-void makeSomRelReports(ReportManager &rm, NGHolder &g,
- const vector<NFAVertex> &to_update,
- const vector<DepthMinMax> &depths) {
- for (auto v : to_update) {
- const DepthMinMax &d = depths.at(g[v].index);
- assert(d.min == d.max);
- replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_REL,
- d.min);
- }
-}
-
-static
-void makeSomAbsReports(ReportManager &rm, NGHolder &g, NFAVertex sink) {
- for (auto v : inv_adjacent_vertices_range(sink, g)) {
- if (v == g.accept) {
- continue;
- }
- replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_ABS,
- 0);
- }
-}
-
-static
-void updateReportToUseRecordedSom(ReportManager &rm, NGHolder &g, u32 som_loc) {
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED,
- som_loc);
- }
- for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
- if (v == g.accept) {
- continue;
- }
- replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED,
- som_loc);
- }
-}
-
-static
-void updateReportToUseRecordedSom(ReportManager &rm, NGHolder &g,
- const vector<NFAVertex> &to_update,
- u32 som_loc) {
- for (auto v : to_update) {
- replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED,
- som_loc);
- }
-}
-
-static
-bool createEscaper(NG &ng, const NGHolder &prefix, const CharReach &escapes,
- u32 som_loc) {
- ReportManager &rm = ng.rm;
-
- /* escaper = /prefix[^escapes]*[escapes]/ */
- DEBUG_PRINTF("creating escaper for %u\n", som_loc);
- NGHolder h;
- cloneHolder(h, prefix);
- assert(h.kind == NFA_OUTFIX);
-
- NFAVertex u = add_vertex(h);
- h[u].char_reach = ~escapes;
-
- NFAVertex v = add_vertex(h);
- h[v].char_reach = escapes;
-
- for (auto w : inv_adjacent_vertices_range(h.accept, h)) {
- add_edge(w, u, h);
- add_edge(w, v, h);
- h[w].reports.clear();
- }
-
- clear_in_edges(h.accept, h);
-
- add_edge(u, v, h);
- add_edge(u, u, h);
- add_edge(v, h.accept, h);
-
- Report ir = makeCallback(0U, 0);
- ir.type = INTERNAL_SOM_LOC_MAKE_WRITABLE;
- ir.onmatch = som_loc;
- h[v].reports.insert(rm.getInternalId(ir));
- return ng.addHolder(h);
-}
-
-static
-void fillHolderForLockCheck(NGHolder *out, const NGHolder &g,
- const map<u32, region_info> &info,
- map<u32, region_info>::const_iterator picked) {
- /* NOTE: This is appropriate for firstMatchIsFirst */
- DEBUG_PRINTF("prepping for lock check\n");
-
- NGHolder &midfix = *out;
-
- map<NFAVertex, NFAVertex> v_map;
- v_map[g.start] = midfix.start;
- v_map[g.startDs] = midfix.startDs;
-
- /* include the lock region */
+ g[v].index, report_id, rep, ir_type);
+ r_new.insert(rep);
+ }
+ g[v].reports = r_new;
+}
+
+/* updates the reports on all vertices leading to the sink */
+static
+void makeSomRelReports(ReportManager &rm, NGHolder &g, NFAVertex sink,
+ const vector<DepthMinMax> &depths) {
+ for (auto v : inv_adjacent_vertices_range(sink, g)) {
+ if (v == g.accept) {
+ continue;
+ }
+
+ const DepthMinMax &d = depths.at(g[v].index);
+ assert(d.min == d.max);
+ replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_REL,
+ d.min);
+ }
+}
+
+/* updates the reports on all the provided vertices */
+static
+void makeSomRelReports(ReportManager &rm, NGHolder &g,
+ const vector<NFAVertex> &to_update,
+ const vector<DepthMinMax> &depths) {
+ for (auto v : to_update) {
+ const DepthMinMax &d = depths.at(g[v].index);
+ assert(d.min == d.max);
+ replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_REL,
+ d.min);
+ }
+}
+
+static
+void makeSomAbsReports(ReportManager &rm, NGHolder &g, NFAVertex sink) {
+ for (auto v : inv_adjacent_vertices_range(sink, g)) {
+ if (v == g.accept) {
+ continue;
+ }
+ replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_ABS,
+ 0);
+ }
+}
+
+static
+void updateReportToUseRecordedSom(ReportManager &rm, NGHolder &g, u32 som_loc) {
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED,
+ som_loc);
+ }
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ if (v == g.accept) {
+ continue;
+ }
+ replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED,
+ som_loc);
+ }
+}
+
+static
+void updateReportToUseRecordedSom(ReportManager &rm, NGHolder &g,
+ const vector<NFAVertex> &to_update,
+ u32 som_loc) {
+ for (auto v : to_update) {
+ replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED,
+ som_loc);
+ }
+}
+
+static
+bool createEscaper(NG &ng, const NGHolder &prefix, const CharReach &escapes,
+ u32 som_loc) {
+ ReportManager &rm = ng.rm;
+
+ /* escaper = /prefix[^escapes]*[escapes]/ */
+ DEBUG_PRINTF("creating escaper for %u\n", som_loc);
+ NGHolder h;
+ cloneHolder(h, prefix);
+ assert(h.kind == NFA_OUTFIX);
+
+ NFAVertex u = add_vertex(h);
+ h[u].char_reach = ~escapes;
+
+ NFAVertex v = add_vertex(h);
+ h[v].char_reach = escapes;
+
+ for (auto w : inv_adjacent_vertices_range(h.accept, h)) {
+ add_edge(w, u, h);
+ add_edge(w, v, h);
+ h[w].reports.clear();
+ }
+
+ clear_in_edges(h.accept, h);
+
+ add_edge(u, v, h);
+ add_edge(u, u, h);
+ add_edge(v, h.accept, h);
+
+ Report ir = makeCallback(0U, 0);
+ ir.type = INTERNAL_SOM_LOC_MAKE_WRITABLE;
+ ir.onmatch = som_loc;
+ h[v].reports.insert(rm.getInternalId(ir));
+ return ng.addHolder(h);
+}
+
+static
+void fillHolderForLockCheck(NGHolder *out, const NGHolder &g,
+ const map<u32, region_info> &info,
+ map<u32, region_info>::const_iterator picked) {
+ /* NOTE: This is appropriate for firstMatchIsFirst */
+ DEBUG_PRINTF("prepping for lock check\n");
+
+ NGHolder &midfix = *out;
+
+ map<NFAVertex, NFAVertex> v_map;
+ v_map[g.start] = midfix.start;
+ v_map[g.startDs] = midfix.startDs;
+
+ /* include the lock region */
assert(picked != info.end());
auto graph_last = next(picked);
-
+
assert(!graph_last->second.dag);
assert(graph_last->second.full.size() == 1);
for (auto jt = graph_last; ; --jt) {
- DEBUG_PRINTF("adding r %u to midfix\n", jt->first);
-
- /* add all vertices in region, create mapping */
- for (auto v : jt->second.full) {
+ DEBUG_PRINTF("adding r %u to midfix\n", jt->first);
+
+ /* add all vertices in region, create mapping */
+ for (auto v : jt->second.full) {
DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index);
- if (contains(v_map, v)) {
- continue;
- }
-
- /* treat all virtual starts as happening anywhere, so that the
- * virtual start is not counted as part of the SoM */
- if (is_virtual_start(v, g)) {
- v_map[v] = midfix.startDs;
- continue;
- }
-
- NFAVertex vnew = add_vertex(g[v], midfix);
- v_map[v] = vnew;
- }
-
- /* add edges leaving region verts based on mapping */
- for (auto v : jt->second.full) {
- NFAVertex u = v_map[v];
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w == g.accept || w == g.acceptEod) {
- add_edge_if_not_present(u, midfix.accept, midfix);
- continue;
- }
- if (!contains(v_map, w)) {
- add_edge_if_not_present(u, midfix.accept, midfix);
- } else {
- add_edge_if_not_present(u, v_map[w], midfix);
- }
- }
- }
-
+ if (contains(v_map, v)) {
+ continue;
+ }
+
+ /* treat all virtual starts as happening anywhere, so that the
+ * virtual start is not counted as part of the SoM */
+ if (is_virtual_start(v, g)) {
+ v_map[v] = midfix.startDs;
+ continue;
+ }
+
+ NFAVertex vnew = add_vertex(g[v], midfix);
+ v_map[v] = vnew;
+ }
+
+ /* add edges leaving region verts based on mapping */
+ for (auto v : jt->second.full) {
+ NFAVertex u = v_map[v];
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w == g.accept || w == g.acceptEod) {
+ add_edge_if_not_present(u, midfix.accept, midfix);
+ continue;
+ }
+ if (!contains(v_map, w)) {
+ add_edge_if_not_present(u, midfix.accept, midfix);
+ } else {
+ add_edge_if_not_present(u, v_map[w], midfix);
+ }
+ }
+ }
+
if (jt == info.begin()) {
break;
}
@@ -750,1671 +750,1671 @@ void fillHolderForLockCheck(NGHolder *out, const NGHolder &g,
/* add edges from startds to the enters of all the initial optional
* regions and the first mandatory region. */
for (auto jt = info.begin(); ; ++jt) {
- for (auto enter : jt->second.enters) {
- assert(contains(v_map, enter));
- NFAVertex v = v_map[enter];
- add_edge_if_not_present(midfix.startDs, v, midfix);
- }
-
+ for (auto enter : jt->second.enters) {
+ assert(contains(v_map, enter));
+ NFAVertex v = v_map[enter];
+ add_edge_if_not_present(midfix.startDs, v, midfix);
+ }
+
if (!jt->second.optional) {
- break;
- }
+ break;
+ }
if (jt == graph_last) {
/* all regions are optional - add a direct edge to accept */
add_edge_if_not_present(midfix.startDs, midfix.accept, midfix);
break;
}
- }
-
- assert(in_degree(midfix.accept, midfix));
+ }
+
+ assert(in_degree(midfix.accept, midfix));
renumber_vertices(midfix);
-}
-
-static
-void fillRoughMidfix(NGHolder *out, const NGHolder &g,
+}
+
+static
+void fillRoughMidfix(NGHolder *out, const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const map<u32, region_info> &info,
- map<u32, region_info>::const_iterator picked) {
- /* as we are not the first prefix, we are probably not acyclic. We need to
- * generate an acyclic holder to acts a fake prefix to sentClearsTail.
- * This will result in a more conservative estimate. */
- /* NOTE: This is not appropriate for firstMatchIsFirst */
- NGHolder &midfix = *out;
- add_edge(midfix.startDs, midfix.accept, midfix);
-
- map<NFAVertex, NFAVertex> v_map;
-
- map<u32, region_info>::const_iterator jt = picked;
- for (; jt->second.dag; --jt) {
- DEBUG_PRINTF("adding r %u to midfix\n", jt->first);
- if (!jt->second.optional) {
- clear_out_edges(midfix.startDs, midfix);
- add_edge(midfix.startDs, midfix.startDs, midfix);
- }
-
- /* add all vertices in region, create mapping */
- for (auto v : jt->second.full) {
+ const map<u32, region_info> &info,
+ map<u32, region_info>::const_iterator picked) {
+ /* as we are not the first prefix, we are probably not acyclic. We need to
+ * generate an acyclic holder to acts a fake prefix to sentClearsTail.
+ * This will result in a more conservative estimate. */
+ /* NOTE: This is not appropriate for firstMatchIsFirst */
+ NGHolder &midfix = *out;
+ add_edge(midfix.startDs, midfix.accept, midfix);
+
+ map<NFAVertex, NFAVertex> v_map;
+
+ map<u32, region_info>::const_iterator jt = picked;
+ for (; jt->second.dag; --jt) {
+ DEBUG_PRINTF("adding r %u to midfix\n", jt->first);
+ if (!jt->second.optional) {
+ clear_out_edges(midfix.startDs, midfix);
+ add_edge(midfix.startDs, midfix.startDs, midfix);
+ }
+
+ /* add all vertices in region, create mapping */
+ for (auto v : jt->second.full) {
DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index);
- NFAVertex vnew = add_vertex(g[v], midfix);
- v_map[v] = vnew;
- }
-
- /* add edges leaving region verts based on mapping */
- for (auto v : jt->second.full) {
- NFAVertex u = v_map[v];
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w == g.accept || w == g.acceptEod) {
- continue;
- }
- if (!contains(v_map, w)) {
- add_edge_if_not_present(u, midfix.accept, midfix);
- } else {
- add_edge_if_not_present(u, v_map[w], midfix);
- }
- }
- }
-
- /* add edges from startds to enters */
- for (auto enter : jt->second.enters) {
- assert(contains(v_map, enter));
- NFAVertex v = v_map[enter];
- add_edge(midfix.startDs, v, midfix);
- }
-
- if (jt == info.begin()) {
- break;
- }
- }
-
- /* we can include the exits of the regions leading in */
- if (!jt->second.dag) {
- u32 first_early_region = jt->first;
- clear_out_edges(midfix.startDs, midfix);
- add_edge(midfix.startDs, midfix.startDs, midfix);
-
- do {
- for (auto v : jt->second.exits) {
+ NFAVertex vnew = add_vertex(g[v], midfix);
+ v_map[v] = vnew;
+ }
+
+ /* add edges leaving region verts based on mapping */
+ for (auto v : jt->second.full) {
+ NFAVertex u = v_map[v];
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w == g.accept || w == g.acceptEod) {
+ continue;
+ }
+ if (!contains(v_map, w)) {
+ add_edge_if_not_present(u, midfix.accept, midfix);
+ } else {
+ add_edge_if_not_present(u, v_map[w], midfix);
+ }
+ }
+ }
+
+ /* add edges from startds to enters */
+ for (auto enter : jt->second.enters) {
+ assert(contains(v_map, enter));
+ NFAVertex v = v_map[enter];
+ add_edge(midfix.startDs, v, midfix);
+ }
+
+ if (jt == info.begin()) {
+ break;
+ }
+ }
+
+ /* we can include the exits of the regions leading in */
+ if (!jt->second.dag) {
+ u32 first_early_region = jt->first;
+ clear_out_edges(midfix.startDs, midfix);
+ add_edge(midfix.startDs, midfix.startDs, midfix);
+
+ do {
+ for (auto v : jt->second.exits) {
DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index);
- NFAVertex vnew = add_vertex(g[v], midfix);
- v_map[v] = vnew;
-
- /* add edges from startds to new vertices */
- add_edge(midfix.startDs, vnew, midfix);
- }
-
- /* add edges leaving region verts based on mapping */
- for (auto v : jt->second.exits) {
- NFAVertex u = v_map[v];
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w == g.accept || w == g.acceptEod
- || regions.at(w) <= first_early_region) {
- continue;
- }
- if (!contains(v_map, w)) {
- add_edge_if_not_present(u, midfix.accept, midfix);
- } else {
- add_edge_if_not_present(u, v_map[w], midfix);
- }
- }
- }
- } while (jt->second.optional && jt != info.begin() && (jt--)->first);
-
- if (jt->second.optional) {
- assert(!jt->second.exits.empty());
- NFAVertex v = v_map[jt->second.exits.front()];
- for (auto w : adjacent_vertices_range(v, midfix)) {
- add_edge(midfix.startDs, w, midfix);
- }
- }
- }
-}
-
-static
-bool beginsWithDotStar(const NGHolder &g) {
- bool hasDot = false;
-
- // We can ignore the successors of start, as matches that begin there will
- // necessarily have a SOM of 0.
-
- set<NFAVertex> succ;
- insert(&succ, adjacent_vertices(g.startDs, g));
- succ.erase(g.startDs);
-
- for (auto v : succ) {
- // We want 'dot' states that aren't virtual starts.
- if (g[v].char_reach.all() &&
- !g[v].assert_flags) {
- hasDot = true;
- set<NFAVertex> dotsucc;
- insert(&dotsucc, adjacent_vertices(v, g));
- if (dotsucc != succ) {
- DEBUG_PRINTF("failed dot-star succ check\n");
- return false;
- }
- }
- }
-
- if (hasDot) {
- DEBUG_PRINTF("begins with dot-star\n");
- }
- return hasDot;
-}
-
-static
-bool buildMidfix(NG &ng, const som_plan &item, const u32 som_slot_in,
- const u32 som_slot_out) {
- assert(item.prefix);
- assert(hasCorrectlyNumberedVertices(*item.prefix));
-
- /* setup escaper for second som_location if required */
- if (item.escapes.any()) {
- if (!createEscaper(ng, *item.prefix, item.escapes, som_slot_out)) {
- return false;
- }
- }
-
- /* ensure we copy som from prev loc */
- setMidfixReports(ng.rm, item, som_slot_in, som_slot_out);
-
- /* add second prefix/1st midfix */
- if (!ng.addHolder(*item.prefix)) {
- DEBUG_PRINTF("---addHolder failed---\n");
- return false;
- }
-
- return true;
-}
-
-static
-bool isMandRegionBetween(map<u32, region_info>::const_iterator a,
- map<u32, region_info>::const_iterator b) {
- while (b != a) {
- if (!b->second.optional) {
- return true;
- }
- --b;
- }
-
- return false;
-}
-
-// Attempts to advance the current plan. Returns true if we advance to the end
-// (woot!); updates picked, plan and bad_region.
-static
-bool advancePlan(const NGHolder &g,
+ NFAVertex vnew = add_vertex(g[v], midfix);
+ v_map[v] = vnew;
+
+ /* add edges from startds to new vertices */
+ add_edge(midfix.startDs, vnew, midfix);
+ }
+
+ /* add edges leaving region verts based on mapping */
+ for (auto v : jt->second.exits) {
+ NFAVertex u = v_map[v];
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w == g.accept || w == g.acceptEod
+ || regions.at(w) <= first_early_region) {
+ continue;
+ }
+ if (!contains(v_map, w)) {
+ add_edge_if_not_present(u, midfix.accept, midfix);
+ } else {
+ add_edge_if_not_present(u, v_map[w], midfix);
+ }
+ }
+ }
+ } while (jt->second.optional && jt != info.begin() && (jt--)->first);
+
+ if (jt->second.optional) {
+ assert(!jt->second.exits.empty());
+ NFAVertex v = v_map[jt->second.exits.front()];
+ for (auto w : adjacent_vertices_range(v, midfix)) {
+ add_edge(midfix.startDs, w, midfix);
+ }
+ }
+ }
+}
+
+static
+bool beginsWithDotStar(const NGHolder &g) {
+ bool hasDot = false;
+
+ // We can ignore the successors of start, as matches that begin there will
+ // necessarily have a SOM of 0.
+
+ set<NFAVertex> succ;
+ insert(&succ, adjacent_vertices(g.startDs, g));
+ succ.erase(g.startDs);
+
+ for (auto v : succ) {
+ // We want 'dot' states that aren't virtual starts.
+ if (g[v].char_reach.all() &&
+ !g[v].assert_flags) {
+ hasDot = true;
+ set<NFAVertex> dotsucc;
+ insert(&dotsucc, adjacent_vertices(v, g));
+ if (dotsucc != succ) {
+ DEBUG_PRINTF("failed dot-star succ check\n");
+ return false;
+ }
+ }
+ }
+
+ if (hasDot) {
+ DEBUG_PRINTF("begins with dot-star\n");
+ }
+ return hasDot;
+}
+
+static
+bool buildMidfix(NG &ng, const som_plan &item, const u32 som_slot_in,
+ const u32 som_slot_out) {
+ assert(item.prefix);
+ assert(hasCorrectlyNumberedVertices(*item.prefix));
+
+ /* setup escaper for second som_location if required */
+ if (item.escapes.any()) {
+ if (!createEscaper(ng, *item.prefix, item.escapes, som_slot_out)) {
+ return false;
+ }
+ }
+
+ /* ensure we copy som from prev loc */
+ setMidfixReports(ng.rm, item, som_slot_in, som_slot_out);
+
+ /* add second prefix/1st midfix */
+ if (!ng.addHolder(*item.prefix)) {
+ DEBUG_PRINTF("---addHolder failed---\n");
+ return false;
+ }
+
+ return true;
+}
+
+static
+bool isMandRegionBetween(map<u32, region_info>::const_iterator a,
+ map<u32, region_info>::const_iterator b) {
+ while (b != a) {
+ if (!b->second.optional) {
+ return true;
+ }
+ --b;
+ }
+
+ return false;
+}
+
+// Attempts to advance the current plan. Returns true if we advance to the end
+// (woot!); updates picked, plan and bad_region.
+static
+bool advancePlan(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const NGHolder &prefix, bool stuck,
- map<u32, region_info>::const_iterator &picked,
- const map<u32, region_info>::const_iterator furthest,
- const map<u32, region_info>::const_iterator furthest_lock,
- const CharReach &next_escapes, som_plan &plan,
- u32 *bad_region) {
- u32 bad_region_r = 0;
- u32 bad_region_x = 0;
- u32 bad_region_e = 0;
- DEBUG_PRINTF("curr %u\n", picked->first);
-
- if (sentClearsTail(g, regions, prefix, furthest->first, &bad_region_r)) {
- plan.is_reset = true;
- picked = furthest;
- DEBUG_PRINTF("Prefix clears tail, woot!\n");
- return true;
- } else {
- DEBUG_PRINTF("Reset failed, first bad region %u\n", bad_region_r);
- }
-
- if (stuck) {
- u32 to_region = furthest_lock->first;
- if (validateXSL(g, regions, to_region, next_escapes, &bad_region_x)) {
- DEBUG_PRINTF("XSL\n");
- picked = furthest_lock;
- plan.escapes = next_escapes;
- return true;
- } else {
- DEBUG_PRINTF("XSL failed, first bad region %u\n", bad_region_x);
- }
-
- if (validateEXSL(g, regions, to_region, next_escapes, prefix,
- &bad_region_e)) {
- DEBUG_PRINTF("EXSL\n");
- picked = furthest_lock;
- plan.escapes = next_escapes;
- return true;
- } else {
- DEBUG_PRINTF("EXSL failed, first bad region %u\n", bad_region_e);
- }
- } else {
- DEBUG_PRINTF("!stuck, skipped XSL and EXSL\n");
- }
-
- assert(!plan.is_reset);
-
- *bad_region = max(bad_region_x, bad_region_e);
- if (bad_region_r >= *bad_region) {
- *bad_region = bad_region_r;
- plan.is_reset = true;
- plan.escapes.clear();
- picked = furthest;
- } else {
- picked = furthest_lock;
- plan.escapes = next_escapes;
- }
-
- DEBUG_PRINTF("first bad region now %u\n", *bad_region);
- return false;
-}
-
-static
-bool addPlan(vector<som_plan> &plan, u32 parent) {
- DEBUG_PRINTF("adding plan %zu with parent %u\n", plan.size(),
- parent);
-
- if (plan.size() >= MAX_SOM_PLANS) {
- DEBUG_PRINTF("too many plans!\n");
- return false;
- }
-
- plan.emplace_back(nullptr, CharReach(), false, parent);
- return true;
-}
-
-// Fetches all preds of {accept, acceptEod} for this graph.
-static
-void addReporterVertices(const NGHolder &g, vector<NFAVertex> &reporters) {
+ const NGHolder &prefix, bool stuck,
+ map<u32, region_info>::const_iterator &picked,
+ const map<u32, region_info>::const_iterator furthest,
+ const map<u32, region_info>::const_iterator furthest_lock,
+ const CharReach &next_escapes, som_plan &plan,
+ u32 *bad_region) {
+ u32 bad_region_r = 0;
+ u32 bad_region_x = 0;
+ u32 bad_region_e = 0;
+ DEBUG_PRINTF("curr %u\n", picked->first);
+
+ if (sentClearsTail(g, regions, prefix, furthest->first, &bad_region_r)) {
+ plan.is_reset = true;
+ picked = furthest;
+ DEBUG_PRINTF("Prefix clears tail, woot!\n");
+ return true;
+ } else {
+ DEBUG_PRINTF("Reset failed, first bad region %u\n", bad_region_r);
+ }
+
+ if (stuck) {
+ u32 to_region = furthest_lock->first;
+ if (validateXSL(g, regions, to_region, next_escapes, &bad_region_x)) {
+ DEBUG_PRINTF("XSL\n");
+ picked = furthest_lock;
+ plan.escapes = next_escapes;
+ return true;
+ } else {
+ DEBUG_PRINTF("XSL failed, first bad region %u\n", bad_region_x);
+ }
+
+ if (validateEXSL(g, regions, to_region, next_escapes, prefix,
+ &bad_region_e)) {
+ DEBUG_PRINTF("EXSL\n");
+ picked = furthest_lock;
+ plan.escapes = next_escapes;
+ return true;
+ } else {
+ DEBUG_PRINTF("EXSL failed, first bad region %u\n", bad_region_e);
+ }
+ } else {
+ DEBUG_PRINTF("!stuck, skipped XSL and EXSL\n");
+ }
+
+ assert(!plan.is_reset);
+
+ *bad_region = max(bad_region_x, bad_region_e);
+ if (bad_region_r >= *bad_region) {
+ *bad_region = bad_region_r;
+ plan.is_reset = true;
+ plan.escapes.clear();
+ picked = furthest;
+ } else {
+ picked = furthest_lock;
+ plan.escapes = next_escapes;
+ }
+
+ DEBUG_PRINTF("first bad region now %u\n", *bad_region);
+ return false;
+}
+
+static
+bool addPlan(vector<som_plan> &plan, u32 parent) {
+ DEBUG_PRINTF("adding plan %zu with parent %u\n", plan.size(),
+ parent);
+
+ if (plan.size() >= MAX_SOM_PLANS) {
+ DEBUG_PRINTF("too many plans!\n");
+ return false;
+ }
+
+ plan.emplace_back(nullptr, CharReach(), false, parent);
+ return true;
+}
+
+// Fetches all preds of {accept, acceptEod} for this graph.
+static
+void addReporterVertices(const NGHolder &g, vector<NFAVertex> &reporters) {
set<NFAVertex> tmp;
- insert(&tmp, inv_adjacent_vertices(g.accept, g));
- insert(&tmp, inv_adjacent_vertices(g.acceptEod, g));
- tmp.erase(g.accept);
-
-#ifdef DEBUG
- DEBUG_PRINTF("add reporters:");
- for (UNUSED auto v : tmp) {
+ insert(&tmp, inv_adjacent_vertices(g.accept, g));
+ insert(&tmp, inv_adjacent_vertices(g.acceptEod, g));
+ tmp.erase(g.accept);
+
+#ifdef DEBUG
+ DEBUG_PRINTF("add reporters:");
+ for (UNUSED auto v : tmp) {
printf(" %zu", g[v].index);
- }
- printf("\n");
-#endif
-
- reporters.insert(reporters.end(), tmp.begin(), tmp.end());
-}
-
-// Fetches all preds of {accept, acceptEod} in this region.
-static
-void addReporterVertices(const region_info &r, const NGHolder &g,
- vector<NFAVertex> &reporters) {
- for (auto v : r.exits) {
- if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
+ }
+ printf("\n");
+#endif
+
+ reporters.insert(reporters.end(), tmp.begin(), tmp.end());
+}
+
+// Fetches all preds of {accept, acceptEod} in this region.
+static
+void addReporterVertices(const region_info &r, const NGHolder &g,
+ vector<NFAVertex> &reporters) {
+ for (auto v : r.exits) {
+ if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
DEBUG_PRINTF("add reporter %zu\n", g[v].index);
- reporters.push_back(v);
- }
- }
-}
-
-// Fetches the mappings of all preds of {accept, acceptEod} in this region.
-static
-void addMappedReporterVertices(const region_info &r, const NGHolder &g,
+ reporters.push_back(v);
+ }
+ }
+}
+
+// Fetches the mappings of all preds of {accept, acceptEod} in this region.
+static
+void addMappedReporterVertices(const region_info &r, const NGHolder &g,
const unordered_map<NFAVertex, NFAVertex> &mapping,
- vector<NFAVertex> &reporters) {
- for (auto v : r.exits) {
- if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
+ vector<NFAVertex> &reporters) {
+ for (auto v : r.exits) {
+ if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
DEBUG_PRINTF("adding v=%zu\n", g[v].index);
auto it = mapping.find(v);
- assert(it != mapping.end());
- reporters.push_back(it->second);
- }
- }
-}
-
-// Clone a version of the graph, but only including the in-edges of `enter'
-// from earlier regions.
-static
-void cloneGraphWithOneEntry(NGHolder &out, const NGHolder &g,
+ assert(it != mapping.end());
+ reporters.push_back(it->second);
+ }
+ }
+}
+
+// Clone a version of the graph, but only including the in-edges of `enter'
+// from earlier regions.
+static
+void cloneGraphWithOneEntry(NGHolder &out, const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- NFAVertex entry, const vector<NFAVertex> &enters,
+ NFAVertex entry, const vector<NFAVertex> &enters,
unordered_map<NFAVertex, NFAVertex> &orig_to_copy) {
- orig_to_copy.clear();
- cloneHolder(out, g, &orig_to_copy);
-
- assert(contains(orig_to_copy, entry));
- const u32 region = regions.at(entry);
-
- for (auto v : enters) {
- if (v == entry) {
- continue;
- }
- assert(contains(orig_to_copy, v));
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (regions.at(u) < region) {
- assert(edge(orig_to_copy[u], orig_to_copy[v], out).second);
- remove_edge(orig_to_copy[u], orig_to_copy[v], out);
- }
- }
- }
-
- pruneUseless(out);
-}
-
-static
+ orig_to_copy.clear();
+ cloneHolder(out, g, &orig_to_copy);
+
+ assert(contains(orig_to_copy, entry));
+ const u32 region = regions.at(entry);
+
+ for (auto v : enters) {
+ if (v == entry) {
+ continue;
+ }
+ assert(contains(orig_to_copy, v));
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (regions.at(u) < region) {
+ assert(edge(orig_to_copy[u], orig_to_copy[v], out).second);
+ remove_edge(orig_to_copy[u], orig_to_copy[v], out);
+ }
+ }
+ }
+
+ pruneUseless(out);
+}
+
+static
void expandGraph(NGHolder &g, unordered_map<NFAVertex, u32> &regions,
- vector<NFAVertex> &enters) {
- assert(!enters.empty());
- const u32 split_region = regions.at(enters.front());
-
- vector<NFAVertex> new_enters;
-
- // Gather the list of vertices in the split region and subsequent regions.
- vector<NFAVertex> tail_vertices;
- for (auto v : vertices_range(g)) {
- if (is_special(v, g) || regions.at(v) < split_region) {
- continue;
- }
- tail_vertices.push_back(v);
- }
-
- for (auto enter : enters) {
+ vector<NFAVertex> &enters) {
+ assert(!enters.empty());
+ const u32 split_region = regions.at(enters.front());
+
+ vector<NFAVertex> new_enters;
+
+ // Gather the list of vertices in the split region and subsequent regions.
+ vector<NFAVertex> tail_vertices;
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g) || regions.at(v) < split_region) {
+ continue;
+ }
+ tail_vertices.push_back(v);
+ }
+
+ for (auto enter : enters) {
DEBUG_PRINTF("processing enter %zu\n", g[enter].index);
- map<NFAVertex, NFAVertex> orig_to_copy;
-
- // Make a copy of all of the tail vertices, storing region info along
- // the way.
- for (auto v : tail_vertices) {
- auto v2 = clone_vertex(g, v);
- orig_to_copy[v] = v2;
- regions[v2] = regions.at(v);
- }
-
- // Wire up the edges: edges from previous regions come from the
- // original vertices, while edges internal to and beyond the split
- // region go to the copies.
-
- for (const auto &m : orig_to_copy) {
- NFAVertex v = m.first, v2 = m.second;
-
- for (const auto &e : out_edges_range(v, g)) {
- NFAVertex t = target(e, g);
- u32 t_region = regions.at(t);
- if (t_region >= split_region && !is_special(t, g)) {
- assert(contains(orig_to_copy, t));
- t = orig_to_copy[t];
- }
- add_edge_if_not_present(v2, t, g[e], g);
- }
-
- for (const auto &e : in_edges_range(v, g)) {
- NFAVertex u = source(e, g);
- if (regions.at(u) >= split_region && !is_special(u, g)) {
- assert(contains(orig_to_copy, u));
- u = orig_to_copy[u];
- }
- add_edge_if_not_present(u, v2, g[e], g);
- }
-
- }
-
- // Clear the in-edges from earlier regions of the OTHER enters for this
- // copy of the split region.
- for (auto v : enters) {
- if (v == enter) {
- continue;
- }
-
- remove_in_edge_if(orig_to_copy[v],
- [&](const NFAEdge &e) {
- NFAVertex u = source(e, g);
- return regions.at(u) < split_region;
+ map<NFAVertex, NFAVertex> orig_to_copy;
+
+ // Make a copy of all of the tail vertices, storing region info along
+ // the way.
+ for (auto v : tail_vertices) {
+ auto v2 = clone_vertex(g, v);
+ orig_to_copy[v] = v2;
+ regions[v2] = regions.at(v);
+ }
+
+ // Wire up the edges: edges from previous regions come from the
+ // original vertices, while edges internal to and beyond the split
+ // region go to the copies.
+
+ for (const auto &m : orig_to_copy) {
+ NFAVertex v = m.first, v2 = m.second;
+
+ for (const auto &e : out_edges_range(v, g)) {
+ NFAVertex t = target(e, g);
+ u32 t_region = regions.at(t);
+ if (t_region >= split_region && !is_special(t, g)) {
+ assert(contains(orig_to_copy, t));
+ t = orig_to_copy[t];
+ }
+ add_edge_if_not_present(v2, t, g[e], g);
+ }
+
+ for (const auto &e : in_edges_range(v, g)) {
+ NFAVertex u = source(e, g);
+ if (regions.at(u) >= split_region && !is_special(u, g)) {
+ assert(contains(orig_to_copy, u));
+ u = orig_to_copy[u];
+ }
+ add_edge_if_not_present(u, v2, g[e], g);
+ }
+
+ }
+
+ // Clear the in-edges from earlier regions of the OTHER enters for this
+ // copy of the split region.
+ for (auto v : enters) {
+ if (v == enter) {
+ continue;
+ }
+
+ remove_in_edge_if(orig_to_copy[v],
+ [&](const NFAEdge &e) {
+ NFAVertex u = source(e, g);
+ return regions.at(u) < split_region;
}, g);
- }
-
- new_enters.push_back(orig_to_copy[enter]);
- }
-
- // Remove the original set of tail vertices.
- remove_vertices(tail_vertices, g);
- pruneUseless(g);
- regions = assignRegions(g);
-
- enters.swap(new_enters);
-}
-
-static
-bool doTreePlanningIntl(NGHolder &g,
+ }
+
+ new_enters.push_back(orig_to_copy[enter]);
+ }
+
+ // Remove the original set of tail vertices.
+ remove_vertices(tail_vertices, g);
+ pruneUseless(g);
+ regions = assignRegions(g);
+
+ enters.swap(new_enters);
+}
+
+static
+bool doTreePlanningIntl(NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const map<u32, region_info> &info,
- map<u32, region_info>::const_iterator picked, u32 bad_region,
- u32 parent_plan,
+ const map<u32, region_info> &info,
+ map<u32, region_info>::const_iterator picked, u32 bad_region,
+ u32 parent_plan,
const unordered_map<NFAVertex, NFAVertex> &copy_to_orig,
- vector<som_plan> &plan, const Grey &grey) {
- assert(picked != info.end());
-
- DEBUG_PRINTF("picked=%u\n", picked->first);
- DEBUG_PRINTF("parent is %u\n", parent_plan);
-
- map<u32, region_info>::const_iterator furthest;
-
- bool to_end = false;
- while (!to_end) {
- DEBUG_PRINTF("picked is %u\n", picked->first);
- DEBUG_PRINTF("first bad region now %u\n", bad_region);
-
- furthest = info.find(bad_region); /* first bad */
- if (furthest == info.end()) {
- DEBUG_PRINTF("no partition\n");
- return false;
- }
- --furthest; /* last region we can establish som for */
-
- if (furthest->first <= picked->first) {
- DEBUG_PRINTF("failed to make any progress\n");
- return false;
- }
-
- map<u32, region_info>::const_iterator furthest_lock = furthest;
- CharReach next_escapes;
- bool lock_found;
- /* The last possible lock in the range that we examine should be the
- * best. If the previous plan is a lock, this follow as any early lock
- * must have a reach that is a subset of the last plan's lock. If the
- * last plan is a resetting plan ..., ?is this true? */
- do {
- lock_found = isPossibleLock(g, furthest_lock, info,
- &next_escapes);
- } while (!lock_found && (--furthest_lock)->first > picked->first);
- DEBUG_PRINTF("lock possible? %d\n", (int)lock_found);
-
- if (lock_found && !isMandRegionBetween(picked, furthest_lock)) {
- lock_found = false;
- }
-
- if (!isMandRegionBetween(picked, furthest)) {
- return false;
- }
-
- /* There is no certainty that the som at a reset location will always
- * go forward */
- if (plan[parent_plan].is_reset && lock_found) {
- NGHolder midfix;
- DEBUG_PRINTF("checking if midfix is suitable for lock\n");
- fillHolderForLockCheck(&midfix, g, info, furthest_lock);
-
- if (!firstMatchIsFirst(midfix)) {
- DEBUG_PRINTF("not stuck\n");
- lock_found = false;
- }
- }
-
- if (!addPlan(plan, parent_plan)) {
- return false;
- }
-
- to_end = false;
-
- if (lock_found && next_escapes.none()) {
- picked = furthest_lock;
- to_end = true;
- }
-
- if (!to_end) {
- NGHolder conservative_midfix; /* for use in reset, exsl analysis */
- fillRoughMidfix(&conservative_midfix, g, regions, info, furthest);
- dumpHolder(conservative_midfix, 15, "som_pathmidfix", grey);
-
- u32 old_bad_region = bad_region;
- to_end = advancePlan(g, regions, conservative_midfix, lock_found,
- picked, furthest, furthest_lock, next_escapes,
- plan.back(), &bad_region);
- if (!to_end
- && bad_region <= old_bad_region) { /* we failed to progress */
- DEBUG_PRINTF("failed to make any progress\n");
- return false;
- }
- }
-
- /* handle direct edge to accepts from region */
- if (edge(furthest->second.exits.front(), g.accept, g).second
- || edge(furthest->second.exits.front(), g.acceptEod, g).second) {
- map<u32, region_info>::const_iterator it = furthest;
- do {
- addMappedReporterVertices(it->second, g, copy_to_orig,
- plan.back().reporters_in);
- } while (it != info.begin() && it->second.optional && (it--)->first);
- }
-
- /* create second prefix */
- plan.back().prefix = makePrefix(g, regions, furthest->second,
- next(furthest)->second);
- parent_plan = plan.size() - 1;
- }
-
- // The last region contributes reporters. If it's optional, the regions
- // before it do as well.
- map<u32, region_info>::const_reverse_iterator it = info.rbegin();
- do {
- DEBUG_PRINTF("add mapped reporters for region %u\n", it->first);
- addMappedReporterVertices(it->second, g, copy_to_orig,
- plan.back().reporters);
- } while (it->second.optional && it != info.rend() &&
- (++it)->first > furthest->first);
-
- return true;
-}
-
-static
-bool doTreePlanning(NGHolder &g,
- map<u32, region_info>::const_iterator presplit,
- map<u32, region_info>::const_iterator picked,
- vector<som_plan> &plan, const Grey &grey) {
- DEBUG_PRINTF("picked is %u\n", picked->first);
- DEBUG_PRINTF("presplit is %u\n", presplit->first);
-
- map<u32, region_info>::const_iterator splitter = next(presplit);
- vector<NFAVertex> enters = splitter->second.enters; // mutable copy
- DEBUG_PRINTF("problem region has %zu entry vertices\n", enters.size());
-
- if (enters.size() <= 1) {
- // TODO: Splitting a region with one entry won't get us anywhere, but
- // it shouldn't create buggy analyses either. See UE-1892.
- DEBUG_PRINTF("nothing to split\n");
- return false;
- }
-
- if (plan.size() + enters.size() > MAX_SOM_PLANS) {
- DEBUG_PRINTF("splitting this tree would hit the plan limit.\n");
- return false;
- }
-
- assert(!plan.empty());
- const u32 parent_plan = plan.size() - 1;
-
- // Make a copy of the graph, with the subgraph under each enter vertex
- // duplicated without the edges into the other enter vertices.
- // NOTE WELL: this will invalidate 'info' from the split point, but it's
- // OK... we don't use it after this.
- auto g_regions = assignRegions(g);
- expandGraph(g, g_regions, enters);
- dumpHolder(g, g_regions, 14, "som_expandedtree", grey);
-
- for (auto v : enters) {
+ vector<som_plan> &plan, const Grey &grey) {
+ assert(picked != info.end());
+
+ DEBUG_PRINTF("picked=%u\n", picked->first);
+ DEBUG_PRINTF("parent is %u\n", parent_plan);
+
+ map<u32, region_info>::const_iterator furthest;
+
+ bool to_end = false;
+ while (!to_end) {
+ DEBUG_PRINTF("picked is %u\n", picked->first);
+ DEBUG_PRINTF("first bad region now %u\n", bad_region);
+
+ furthest = info.find(bad_region); /* first bad */
+ if (furthest == info.end()) {
+ DEBUG_PRINTF("no partition\n");
+ return false;
+ }
+ --furthest; /* last region we can establish som for */
+
+ if (furthest->first <= picked->first) {
+ DEBUG_PRINTF("failed to make any progress\n");
+ return false;
+ }
+
+ map<u32, region_info>::const_iterator furthest_lock = furthest;
+ CharReach next_escapes;
+ bool lock_found;
+ /* The last possible lock in the range that we examine should be the
+ * best. If the previous plan is a lock, this follow as any early lock
+ * must have a reach that is a subset of the last plan's lock. If the
+ * last plan is a resetting plan ..., ?is this true? */
+ do {
+ lock_found = isPossibleLock(g, furthest_lock, info,
+ &next_escapes);
+ } while (!lock_found && (--furthest_lock)->first > picked->first);
+ DEBUG_PRINTF("lock possible? %d\n", (int)lock_found);
+
+ if (lock_found && !isMandRegionBetween(picked, furthest_lock)) {
+ lock_found = false;
+ }
+
+ if (!isMandRegionBetween(picked, furthest)) {
+ return false;
+ }
+
+ /* There is no certainty that the som at a reset location will always
+ * go forward */
+ if (plan[parent_plan].is_reset && lock_found) {
+ NGHolder midfix;
+ DEBUG_PRINTF("checking if midfix is suitable for lock\n");
+ fillHolderForLockCheck(&midfix, g, info, furthest_lock);
+
+ if (!firstMatchIsFirst(midfix)) {
+ DEBUG_PRINTF("not stuck\n");
+ lock_found = false;
+ }
+ }
+
+ if (!addPlan(plan, parent_plan)) {
+ return false;
+ }
+
+ to_end = false;
+
+ if (lock_found && next_escapes.none()) {
+ picked = furthest_lock;
+ to_end = true;
+ }
+
+ if (!to_end) {
+ NGHolder conservative_midfix; /* for use in reset, exsl analysis */
+ fillRoughMidfix(&conservative_midfix, g, regions, info, furthest);
+ dumpHolder(conservative_midfix, 15, "som_pathmidfix", grey);
+
+ u32 old_bad_region = bad_region;
+ to_end = advancePlan(g, regions, conservative_midfix, lock_found,
+ picked, furthest, furthest_lock, next_escapes,
+ plan.back(), &bad_region);
+ if (!to_end
+ && bad_region <= old_bad_region) { /* we failed to progress */
+ DEBUG_PRINTF("failed to make any progress\n");
+ return false;
+ }
+ }
+
+ /* handle direct edge to accepts from region */
+ if (edge(furthest->second.exits.front(), g.accept, g).second
+ || edge(furthest->second.exits.front(), g.acceptEod, g).second) {
+ map<u32, region_info>::const_iterator it = furthest;
+ do {
+ addMappedReporterVertices(it->second, g, copy_to_orig,
+ plan.back().reporters_in);
+ } while (it != info.begin() && it->second.optional && (it--)->first);
+ }
+
+ /* create second prefix */
+ plan.back().prefix = makePrefix(g, regions, furthest->second,
+ next(furthest)->second);
+ parent_plan = plan.size() - 1;
+ }
+
+ // The last region contributes reporters. If it's optional, the regions
+ // before it do as well.
+ map<u32, region_info>::const_reverse_iterator it = info.rbegin();
+ do {
+ DEBUG_PRINTF("add mapped reporters for region %u\n", it->first);
+ addMappedReporterVertices(it->second, g, copy_to_orig,
+ plan.back().reporters);
+ } while (it->second.optional && it != info.rend() &&
+ (++it)->first > furthest->first);
+
+ return true;
+}
+
+static
+bool doTreePlanning(NGHolder &g,
+ map<u32, region_info>::const_iterator presplit,
+ map<u32, region_info>::const_iterator picked,
+ vector<som_plan> &plan, const Grey &grey) {
+ DEBUG_PRINTF("picked is %u\n", picked->first);
+ DEBUG_PRINTF("presplit is %u\n", presplit->first);
+
+ map<u32, region_info>::const_iterator splitter = next(presplit);
+ vector<NFAVertex> enters = splitter->second.enters; // mutable copy
+ DEBUG_PRINTF("problem region has %zu entry vertices\n", enters.size());
+
+ if (enters.size() <= 1) {
+ // TODO: Splitting a region with one entry won't get us anywhere, but
+ // it shouldn't create buggy analyses either. See UE-1892.
+ DEBUG_PRINTF("nothing to split\n");
+ return false;
+ }
+
+ if (plan.size() + enters.size() > MAX_SOM_PLANS) {
+ DEBUG_PRINTF("splitting this tree would hit the plan limit.\n");
+ return false;
+ }
+
+ assert(!plan.empty());
+ const u32 parent_plan = plan.size() - 1;
+
+ // Make a copy of the graph, with the subgraph under each enter vertex
+ // duplicated without the edges into the other enter vertices.
+ // NOTE WELL: this will invalidate 'info' from the split point, but it's
+ // OK... we don't use it after this.
+ auto g_regions = assignRegions(g);
+ expandGraph(g, g_regions, enters);
+ dumpHolder(g, g_regions, 14, "som_expandedtree", grey);
+
+ for (auto v : enters) {
DEBUG_PRINTF("enter %zu\n", g[v].index);
-
- // For this entry vertex, construct a version of the graph without the
- // other entries in this region (g_path), and calculate its depths and
- // regions.
-
- NGHolder g_path;
+
+ // For this entry vertex, construct a version of the graph without the
+ // other entries in this region (g_path), and calculate its depths and
+ // regions.
+
+ NGHolder g_path;
unordered_map<NFAVertex, NFAVertex> orig_to_copy;
- cloneGraphWithOneEntry(g_path, g, g_regions, v, enters, orig_to_copy);
- auto regions = assignRegions(g_path);
- dumpHolder(g_path, regions, 14, "som_treepath", grey);
-
- map<u32, region_info> path_info;
- buildRegionMapping(g_path, regions, path_info);
-
- // Translate 'picked' to the corresponding region iterator over the
- // g_path graph. we can't trust the numbering, so we use a vertex
- // instead.
- NFAVertex picked_v = picked->second.enters.front();
- assert(contains(orig_to_copy, picked_v));
- u32 picked_region = regions.at(orig_to_copy[picked_v]);
- map<u32, region_info>::const_iterator path_pick =
- path_info.find(picked_region);
- if (path_pick == path_info.end()) {
- assert(0); // odd
- return false;
- }
-
- // Similarly, find our bad_region.
- assert(contains(orig_to_copy, v));
- u32 bad_region = regions.at(orig_to_copy[v]);
-
- // It's possible that the region may have grown to include its
- // successors, in which case we (currently) run screaming. Just
- // checking the size should be sufficient here.
- if (picked->second.full.size() != path_pick->second.full.size()) {
- DEBUG_PRINTF("picked region has grown, bailing\n");
- return false;
- }
-
- // Construct reverse mapping from vertices in g_path to g.
+ cloneGraphWithOneEntry(g_path, g, g_regions, v, enters, orig_to_copy);
+ auto regions = assignRegions(g_path);
+ dumpHolder(g_path, regions, 14, "som_treepath", grey);
+
+ map<u32, region_info> path_info;
+ buildRegionMapping(g_path, regions, path_info);
+
+ // Translate 'picked' to the corresponding region iterator over the
+ // g_path graph. we can't trust the numbering, so we use a vertex
+ // instead.
+ NFAVertex picked_v = picked->second.enters.front();
+ assert(contains(orig_to_copy, picked_v));
+ u32 picked_region = regions.at(orig_to_copy[picked_v]);
+ map<u32, region_info>::const_iterator path_pick =
+ path_info.find(picked_region);
+ if (path_pick == path_info.end()) {
+ assert(0); // odd
+ return false;
+ }
+
+ // Similarly, find our bad_region.
+ assert(contains(orig_to_copy, v));
+ u32 bad_region = regions.at(orig_to_copy[v]);
+
+ // It's possible that the region may have grown to include its
+ // successors, in which case we (currently) run screaming. Just
+ // checking the size should be sufficient here.
+ if (picked->second.full.size() != path_pick->second.full.size()) {
+ DEBUG_PRINTF("picked region has grown, bailing\n");
+ return false;
+ }
+
+ // Construct reverse mapping from vertices in g_path to g.
unordered_map<NFAVertex, NFAVertex> copy_to_orig;
- for (const auto &m : orig_to_copy) {
- copy_to_orig.insert(make_pair(m.second, m.first));
- }
-
- bool to_end = doTreePlanningIntl(g_path, regions, path_info, path_pick,
- bad_region, parent_plan,
- copy_to_orig, plan, grey);
- if (!to_end) {
- return false;
- }
- }
-
- return true;
-}
-
-enum dsp_behaviour {
- ALLOW_MODIFY_HOLDER,
- DISALLOW_MODIFY_HOLDER /* say no to tree planning */
-};
-
-static
-bool doSomPlanning(NGHolder &g, bool stuck_in,
+ for (const auto &m : orig_to_copy) {
+ copy_to_orig.insert(make_pair(m.second, m.first));
+ }
+
+ bool to_end = doTreePlanningIntl(g_path, regions, path_info, path_pick,
+ bad_region, parent_plan,
+ copy_to_orig, plan, grey);
+ if (!to_end) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+enum dsp_behaviour {
+ ALLOW_MODIFY_HOLDER,
+ DISALLOW_MODIFY_HOLDER /* say no to tree planning */
+};
+
+static
+bool doSomPlanning(NGHolder &g, bool stuck_in,
const unordered_map<NFAVertex, u32> &regions,
- const map<u32, region_info> &info,
- map<u32, region_info>::const_iterator picked,
- vector<som_plan> &plan,
- const Grey &grey,
- dsp_behaviour behaviour = ALLOW_MODIFY_HOLDER) {
- DEBUG_PRINTF("in picked is %u\n", picked->first);
-
- /* Need to verify how far the lock covers */
- u32 bad_region;
- NGHolder *ap_pref = plan.back().prefix.get();
- NGHolder ap_temp;
- if (hasBigCycles(*ap_pref)) {
- fillRoughMidfix(&ap_temp, g, regions, info, picked);
- ap_pref = &ap_temp;
- }
-
- bool to_end = advancePlan(g, regions, *ap_pref, stuck_in, picked,
- picked, picked, plan.back().escapes,
- plan.back(), &bad_region);
-
- if (to_end) {
- DEBUG_PRINTF("advanced through the whole graph in one go!\n");
- addReporterVertices(g, plan.back().reporters);
- return true;
- }
-
- map<u32, region_info>::const_iterator prev_furthest = picked;
- map<u32, region_info>::const_iterator furthest;
-
- furthest = info.find(bad_region); /* first bad */
- if (furthest == info.begin() || furthest == info.end()) {
- DEBUG_PRINTF("no partition\n");
- return false;
- }
- --furthest; /* last region we can establish som for */
-
- if (furthest->first <= picked->first) {
- do_tree:
- /* unable to establish SoM past the last picked region */
- if (behaviour == DISALLOW_MODIFY_HOLDER) {
- /* tree planning mutates the graph */
- return false;
- }
-
- DEBUG_PRINTF("failed to make any progress\n");
- assert(!plan.empty());
- if (plan.size() == 1) {
- DEBUG_PRINTF("not handling initial alternations yet\n");
- return false;
- }
- plan.pop_back();
- return doTreePlanning(g, furthest, prev_furthest, plan, grey);
- }
-
- furthest = picked;
- while (!to_end) {
- prev_furthest = furthest;
-
- DEBUG_PRINTF("prev further is %u\n", prev_furthest->first);
- DEBUG_PRINTF("first bad region now %u\n", bad_region);
-
- furthest = info.find(bad_region); /* first bad */
- if (furthest == info.begin() || furthest == info.end()) {
- DEBUG_PRINTF("no partition\n");
- return false;
- }
- --furthest; /* last region we can establish som for */
-
- map<u32, region_info>::const_iterator furthest_lock = furthest;
- CharReach next_escapes;
- bool stuck;
- do {
- stuck = isPossibleLock(g, furthest_lock, info, &next_escapes);
- } while (!stuck && (--furthest_lock)->first > prev_furthest->first);
- DEBUG_PRINTF("lock possible? %d\n", (int)stuck);
- DEBUG_PRINTF("furthest_lock=%u\n", furthest_lock->first);
-
- if (stuck && !isMandRegionBetween(prev_furthest, furthest_lock)) {
- stuck = false;
- }
-
- if (!isMandRegionBetween(prev_furthest, furthest)) {
- DEBUG_PRINTF("no mand region between %u and %u\n",
- prev_furthest->first, furthest->first);
- return false;
- }
-
- /* There is no certainty that the som at a reset location will always
- * go forward */
- if (plan.back().is_reset && stuck) {
- NGHolder midfix;
- fillHolderForLockCheck(&midfix, g, info, furthest_lock);
-
- DEBUG_PRINTF("checking if midfix is suitable for lock\n");
- if (!firstMatchIsFirst(midfix)) {
- DEBUG_PRINTF("not stuck\n");
- stuck = false;
- }
- }
-
- assert(!plan.empty());
- if (!addPlan(plan, plan.size() - 1)) {
- return false;
- }
-
- to_end = false;
-
- if (stuck && next_escapes.none()) {
- picked = furthest_lock;
- to_end = true;
- }
-
- if (!to_end) {
- NGHolder conservative_midfix; /* for use in reset, exsl analysis */
- fillRoughMidfix(&conservative_midfix, g, regions, info, furthest);
-
- u32 old_bad_region = bad_region;
- to_end = advancePlan(g, regions, conservative_midfix, stuck, picked,
- furthest, furthest_lock, next_escapes,
- plan.back(), &bad_region);
-
- if (!to_end
- && bad_region <= old_bad_region) { /* we failed to progress */
- goto do_tree;
- }
- }
-
- /* handle direct edge to accepts from region */
- if (edge(furthest->second.exits.front(), g.accept, g).second
- || edge(furthest->second.exits.front(), g.acceptEod, g).second) {
- map<u32, region_info>::const_iterator it = furthest;
- do {
- DEBUG_PRINTF("direct edge to accept from region %u\n",
- it->first);
- addReporterVertices(it->second, g, plan.back().reporters_in);
- } while (it != info.begin() && it->second.optional
- && (it--)->first);
- }
-
- /* create second prefix */
- plan.back().prefix = makePrefix(g, regions, furthest->second,
- next(furthest)->second);
- }
- DEBUG_PRINTF("(final) picked is %u\n", picked->first);
-
- // The last region contributes reporters. If it's optional, the regions
- // before it do as well.
- map<u32, region_info>::const_reverse_iterator it = info.rbegin();
- do {
- DEBUG_PRINTF("region %u contributes reporters to last plan\n",
- it->first);
- addReporterVertices(it->second, g, plan.back().reporters);
- } while (it->second.optional && it != info.rend() &&
- (++it)->first > furthest->first);
-
- DEBUG_PRINTF("done!\n");
- return true;
-}
-
-static
-void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p,
- UNUSED size_t num) {
-#if defined(DEBUG) || defined(DUMP_PLANS)
- DEBUG_PRINTF("plan %zu: prefix=%p, escapes=%s, is_reset=%d, "
- "parent=%u\n",
- num, p.prefix.get(),
- describeClass(p.escapes, 20, CC_OUT_TEXT).c_str(),
- p.is_reset, p.parent);
- printf(" reporters:");
- for (auto v : p.reporters) {
+ const map<u32, region_info> &info,
+ map<u32, region_info>::const_iterator picked,
+ vector<som_plan> &plan,
+ const Grey &grey,
+ dsp_behaviour behaviour = ALLOW_MODIFY_HOLDER) {
+ DEBUG_PRINTF("in picked is %u\n", picked->first);
+
+ /* Need to verify how far the lock covers */
+ u32 bad_region;
+ NGHolder *ap_pref = plan.back().prefix.get();
+ NGHolder ap_temp;
+ if (hasBigCycles(*ap_pref)) {
+ fillRoughMidfix(&ap_temp, g, regions, info, picked);
+ ap_pref = &ap_temp;
+ }
+
+ bool to_end = advancePlan(g, regions, *ap_pref, stuck_in, picked,
+ picked, picked, plan.back().escapes,
+ plan.back(), &bad_region);
+
+ if (to_end) {
+ DEBUG_PRINTF("advanced through the whole graph in one go!\n");
+ addReporterVertices(g, plan.back().reporters);
+ return true;
+ }
+
+ map<u32, region_info>::const_iterator prev_furthest = picked;
+ map<u32, region_info>::const_iterator furthest;
+
+ furthest = info.find(bad_region); /* first bad */
+ if (furthest == info.begin() || furthest == info.end()) {
+ DEBUG_PRINTF("no partition\n");
+ return false;
+ }
+ --furthest; /* last region we can establish som for */
+
+ if (furthest->first <= picked->first) {
+ do_tree:
+ /* unable to establish SoM past the last picked region */
+ if (behaviour == DISALLOW_MODIFY_HOLDER) {
+ /* tree planning mutates the graph */
+ return false;
+ }
+
+ DEBUG_PRINTF("failed to make any progress\n");
+ assert(!plan.empty());
+ if (plan.size() == 1) {
+ DEBUG_PRINTF("not handling initial alternations yet\n");
+ return false;
+ }
+ plan.pop_back();
+ return doTreePlanning(g, furthest, prev_furthest, plan, grey);
+ }
+
+ furthest = picked;
+ while (!to_end) {
+ prev_furthest = furthest;
+
+ DEBUG_PRINTF("prev further is %u\n", prev_furthest->first);
+ DEBUG_PRINTF("first bad region now %u\n", bad_region);
+
+ furthest = info.find(bad_region); /* first bad */
+ if (furthest == info.begin() || furthest == info.end()) {
+ DEBUG_PRINTF("no partition\n");
+ return false;
+ }
+ --furthest; /* last region we can establish som for */
+
+ map<u32, region_info>::const_iterator furthest_lock = furthest;
+ CharReach next_escapes;
+ bool stuck;
+ do {
+ stuck = isPossibleLock(g, furthest_lock, info, &next_escapes);
+ } while (!stuck && (--furthest_lock)->first > prev_furthest->first);
+ DEBUG_PRINTF("lock possible? %d\n", (int)stuck);
+ DEBUG_PRINTF("furthest_lock=%u\n", furthest_lock->first);
+
+ if (stuck && !isMandRegionBetween(prev_furthest, furthest_lock)) {
+ stuck = false;
+ }
+
+ if (!isMandRegionBetween(prev_furthest, furthest)) {
+ DEBUG_PRINTF("no mand region between %u and %u\n",
+ prev_furthest->first, furthest->first);
+ return false;
+ }
+
+ /* There is no certainty that the som at a reset location will always
+ * go forward */
+ if (plan.back().is_reset && stuck) {
+ NGHolder midfix;
+ fillHolderForLockCheck(&midfix, g, info, furthest_lock);
+
+ DEBUG_PRINTF("checking if midfix is suitable for lock\n");
+ if (!firstMatchIsFirst(midfix)) {
+ DEBUG_PRINTF("not stuck\n");
+ stuck = false;
+ }
+ }
+
+ assert(!plan.empty());
+ if (!addPlan(plan, plan.size() - 1)) {
+ return false;
+ }
+
+ to_end = false;
+
+ if (stuck && next_escapes.none()) {
+ picked = furthest_lock;
+ to_end = true;
+ }
+
+ if (!to_end) {
+ NGHolder conservative_midfix; /* for use in reset, exsl analysis */
+ fillRoughMidfix(&conservative_midfix, g, regions, info, furthest);
+
+ u32 old_bad_region = bad_region;
+ to_end = advancePlan(g, regions, conservative_midfix, stuck, picked,
+ furthest, furthest_lock, next_escapes,
+ plan.back(), &bad_region);
+
+ if (!to_end
+ && bad_region <= old_bad_region) { /* we failed to progress */
+ goto do_tree;
+ }
+ }
+
+ /* handle direct edge to accepts from region */
+ if (edge(furthest->second.exits.front(), g.accept, g).second
+ || edge(furthest->second.exits.front(), g.acceptEod, g).second) {
+ map<u32, region_info>::const_iterator it = furthest;
+ do {
+ DEBUG_PRINTF("direct edge to accept from region %u\n",
+ it->first);
+ addReporterVertices(it->second, g, plan.back().reporters_in);
+ } while (it != info.begin() && it->second.optional
+ && (it--)->first);
+ }
+
+ /* create second prefix */
+ plan.back().prefix = makePrefix(g, regions, furthest->second,
+ next(furthest)->second);
+ }
+ DEBUG_PRINTF("(final) picked is %u\n", picked->first);
+
+ // The last region contributes reporters. If it's optional, the regions
+ // before it do as well.
+ map<u32, region_info>::const_reverse_iterator it = info.rbegin();
+ do {
+ DEBUG_PRINTF("region %u contributes reporters to last plan\n",
+ it->first);
+ addReporterVertices(it->second, g, plan.back().reporters);
+ } while (it->second.optional && it != info.rend() &&
+ (++it)->first > furthest->first);
+
+ DEBUG_PRINTF("done!\n");
+ return true;
+}
+
+static
+void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p,
+ UNUSED size_t num) {
+#if defined(DEBUG) || defined(DUMP_PLANS)
+ DEBUG_PRINTF("plan %zu: prefix=%p, escapes=%s, is_reset=%d, "
+ "parent=%u\n",
+ num, p.prefix.get(),
+ describeClass(p.escapes, 20, CC_OUT_TEXT).c_str(),
+ p.is_reset, p.parent);
+ printf(" reporters:");
+ for (auto v : p.reporters) {
printf(" %zu", g[v].index);
- }
- printf("\n");
- printf(" reporters_in:");
- for (auto v : p.reporters_in) {
+ }
+ printf("\n");
+ printf(" reporters_in:");
+ for (auto v : p.reporters_in) {
printf(" %zu", g[v].index);
- }
- printf("\n");
-#endif
-}
-
-/**
- * Note: if we fail to build a midfix/ng.addHolder, we throw a pattern too
- * large exception as (1) if previous ng modification have been applied (other
- * midfixes have been applied), ng will be an undefined state on return and (2)
- * if the head of a pattern cannot be implemented we are generally unable to
- * implement the full pattern.
- */
-static
+ }
+ printf("\n");
+#endif
+}
+
+/**
+ * Note: if we fail to build a midfix/ng.addHolder, we throw a pattern too
+ * large exception as (1) if previous ng modification have been applied (other
+ * midfixes have been applied), ng will be an undefined state on return and (2)
+ * if the head of a pattern cannot be implemented we are generally unable to
+ * implement the full pattern.
+ */
+static
void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id,
NGHolder &g, vector<som_plan> &plan,
const u32 first_som_slot) {
- ReportManager &rm = ng.rm;
- SomSlotManager &ssm = ng.ssm;
-
- DEBUG_PRINTF("%zu plans\n", plan.size());
- assert(plan.size() <= MAX_SOM_PLANS);
- assert(!plan.empty());
-
- vector<u32> som_slots(plan.size());
- som_slots[0] = first_som_slot;
-
- // Root plan, which already has a SOM slot assigned (first_som_slot).
- dumpSomPlan(g, plan.front(), 0);
+ ReportManager &rm = ng.rm;
+ SomSlotManager &ssm = ng.ssm;
+
+ DEBUG_PRINTF("%zu plans\n", plan.size());
+ assert(plan.size() <= MAX_SOM_PLANS);
+ assert(!plan.empty());
+
+ vector<u32> som_slots(plan.size());
+ som_slots[0] = first_som_slot;
+
+ // Root plan, which already has a SOM slot assigned (first_som_slot).
+ dumpSomPlan(g, plan.front(), 0);
dumpSomSubComponent(*plan.front().prefix, "04_som", expr.index, comp_id, 0,
ng.cc.grey);
- assert(plan.front().prefix);
- if (plan.front().escapes.any() && !plan.front().is_reset) {
- /* setup escaper for first som location */
- if (!createEscaper(ng, *plan.front().prefix, plan.front().escapes,
- first_som_slot)) {
+ assert(plan.front().prefix);
+ if (plan.front().escapes.any() && !plan.front().is_reset) {
+ /* setup escaper for first som location */
+ if (!createEscaper(ng, *plan.front().prefix, plan.front().escapes,
+ first_som_slot)) {
throw CompileError(expr.index, "Pattern is too large.");
- }
- }
-
- assert(plan.front().reporters_in.empty());
- updateReportToUseRecordedSom(rm, g, plan.front().reporters, first_som_slot);
-
- // Tree of plans, encoded in a vector.
- vector<som_plan>::const_iterator it = plan.begin();
- for (++it; it != plan.end(); ++it) {
- const u32 plan_num = it - plan.begin();
- dumpSomPlan(g, *it, plan_num);
+ }
+ }
+
+ assert(plan.front().reporters_in.empty());
+ updateReportToUseRecordedSom(rm, g, plan.front().reporters, first_som_slot);
+
+ // Tree of plans, encoded in a vector.
+ vector<som_plan>::const_iterator it = plan.begin();
+ for (++it; it != plan.end(); ++it) {
+ const u32 plan_num = it - plan.begin();
+ dumpSomPlan(g, *it, plan_num);
dumpSomSubComponent(*it->prefix, "04_som", expr.index, comp_id,
- plan_num, ng.cc.grey);
-
- assert(it->parent < plan_num);
- u32 som_slot_in = som_slots[it->parent];
- u32 som_slot_out = ssm.getSomSlot(*it->prefix, it->escapes,
- it->is_reset, som_slot_in);
- som_slots[plan_num] = som_slot_out;
-
- assert(!it->no_implement);
- if (!buildMidfix(ng, *it, som_slot_in, som_slot_out)) {
+ plan_num, ng.cc.grey);
+
+ assert(it->parent < plan_num);
+ u32 som_slot_in = som_slots[it->parent];
+ u32 som_slot_out = ssm.getSomSlot(*it->prefix, it->escapes,
+ it->is_reset, som_slot_in);
+ som_slots[plan_num] = som_slot_out;
+
+ assert(!it->no_implement);
+ if (!buildMidfix(ng, *it, som_slot_in, som_slot_out)) {
throw CompileError(expr.index, "Pattern is too large.");
- }
- updateReportToUseRecordedSom(rm, g, it->reporters_in, som_slot_in);
- updateReportToUseRecordedSom(rm, g, it->reporters, som_slot_out);
- }
-
- /* create prefix to set the som_loc */
- if (!plan.front().no_implement) {
+ }
+ updateReportToUseRecordedSom(rm, g, it->reporters_in, som_slot_in);
+ updateReportToUseRecordedSom(rm, g, it->reporters, som_slot_out);
+ }
+
+ /* create prefix to set the som_loc */
+ if (!plan.front().no_implement) {
renumber_vertices(*plan.front().prefix);
- assert(plan.front().prefix->kind == NFA_OUTFIX);
- if (!ng.addHolder(*plan.front().prefix)) {
+ assert(plan.front().prefix->kind == NFA_OUTFIX);
+ if (!ng.addHolder(*plan.front().prefix)) {
throw CompileError(expr.index, "Pattern is too large.");
- }
- }
-}
-
-static
-void anchorStarts(NGHolder &g) {
- vector<NFAEdge> dead;
- for (const auto &e : out_edges_range(g.startDs, g)) {
- NFAVertex v = target(e, g);
- if (v == g.startDs) {
- continue;
- }
- add_edge_if_not_present(g.start, v, g[e], g);
- dead.push_back(e);
- }
- remove_edges(dead, g);
-}
-
-static
-void setZeroReports(NGHolder &g) {
- set<NFAVertex> acceptors;
- insert(&acceptors, inv_adjacent_vertices(g.accept, g));
- insert(&acceptors, inv_adjacent_vertices(g.acceptEod, g));
- acceptors.erase(g.accept);
-
- for (auto v : vertices_range(g)) {
- auto &reports = g[v].reports;
- reports.clear();
-
- if (!contains(acceptors, v)) {
- continue;
- }
-
- // We use the report ID to store the offset adjustment used for virtual
- // starts.
-
- if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
- reports.insert(1);
- } else {
- reports.insert(0);
- }
- }
-}
-
-/* updates the reports on all vertices leading to the sink */
-static
-void makeSomRevNfaReports(ReportManager &rm, NGHolder &g, NFAVertex sink,
- const ReportID report, const u32 comp_id) {
- // Construct replacement report.
- Report ir = rm.getReport(report);
- ir.type = EXTERNAL_CALLBACK_SOM_REV_NFA;
- ir.revNfaIndex = comp_id;
- ReportID new_report = rm.getInternalId(ir);
-
- for (auto v : inv_adjacent_vertices_range(sink, g)) {
- if (v == g.accept) {
- continue;
- }
-
- auto &r = g[v].reports;
- if (contains(r, report)) {
- r.erase(report);
- r.insert(new_report);
- }
- }
-}
-
-static
-void clearProperInEdges(NGHolder &g, const NFAVertex sink) {
- vector<NFAEdge> dead;
- for (const auto &e : in_edges_range(sink, g)) {
- if (source(e, g) == g.accept) {
- continue;
- }
- dead.push_back(e);
- }
-
- if (dead.empty()) {
- return;
- }
-
- remove_edges(dead, g);
- pruneUseless(g, false);
-}
-
-namespace {
-struct SomRevNfa {
+ }
+ }
+}
+
+static
+void anchorStarts(NGHolder &g) {
+ vector<NFAEdge> dead;
+ for (const auto &e : out_edges_range(g.startDs, g)) {
+ NFAVertex v = target(e, g);
+ if (v == g.startDs) {
+ continue;
+ }
+ add_edge_if_not_present(g.start, v, g[e], g);
+ dead.push_back(e);
+ }
+ remove_edges(dead, g);
+}
+
+static
+void setZeroReports(NGHolder &g) {
+ set<NFAVertex> acceptors;
+ insert(&acceptors, inv_adjacent_vertices(g.accept, g));
+ insert(&acceptors, inv_adjacent_vertices(g.acceptEod, g));
+ acceptors.erase(g.accept);
+
+ for (auto v : vertices_range(g)) {
+ auto &reports = g[v].reports;
+ reports.clear();
+
+ if (!contains(acceptors, v)) {
+ continue;
+ }
+
+ // We use the report ID to store the offset adjustment used for virtual
+ // starts.
+
+ if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
+ reports.insert(1);
+ } else {
+ reports.insert(0);
+ }
+ }
+}
+
+/* updates the reports on all vertices leading to the sink */
+static
+void makeSomRevNfaReports(ReportManager &rm, NGHolder &g, NFAVertex sink,
+ const ReportID report, const u32 comp_id) {
+ // Construct replacement report.
+ Report ir = rm.getReport(report);
+ ir.type = EXTERNAL_CALLBACK_SOM_REV_NFA;
+ ir.revNfaIndex = comp_id;
+ ReportID new_report = rm.getInternalId(ir);
+
+ for (auto v : inv_adjacent_vertices_range(sink, g)) {
+ if (v == g.accept) {
+ continue;
+ }
+
+ auto &r = g[v].reports;
+ if (contains(r, report)) {
+ r.erase(report);
+ r.insert(new_report);
+ }
+ }
+}
+
+static
+void clearProperInEdges(NGHolder &g, const NFAVertex sink) {
+ vector<NFAEdge> dead;
+ for (const auto &e : in_edges_range(sink, g)) {
+ if (source(e, g) == g.accept) {
+ continue;
+ }
+ dead.push_back(e);
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
+ remove_edges(dead, g);
+ pruneUseless(g, false);
+}
+
+namespace {
+struct SomRevNfa {
SomRevNfa(NFAVertex s, ReportID r, bytecode_ptr<NFA> n)
- : sink(s), report(r), nfa(move(n)) {}
- NFAVertex sink;
- ReportID report;
+ : sink(s), report(r), nfa(move(n)) {}
+ NFAVertex sink;
+ ReportID report;
bytecode_ptr<NFA> nfa;
-};
-}
-
-static
+};
+}
+
+static
bytecode_ptr<NFA> makeBareSomRevNfa(const NGHolder &g,
const CompileContext &cc) {
- // Create a reversed anchored version of this NFA which fires a zero report
- // ID on accept.
- NGHolder g_rev;
- reverseHolder(g, g_rev);
- anchorStarts(g_rev);
- setZeroReports(g_rev);
-
- // Prep for actual construction.
+ // Create a reversed anchored version of this NFA which fires a zero report
+ // ID on accept.
+ NGHolder g_rev;
+ reverseHolder(g, g_rev);
+ anchorStarts(g_rev);
+ setZeroReports(g_rev);
+
+ // Prep for actual construction.
renumber_vertices(g_rev);
- g_rev.kind = NFA_REV_PREFIX;
- reduceGraphEquivalences(g_rev, cc);
- removeRedundancy(g_rev, SOM_NONE);
-
- DEBUG_PRINTF("building a rev NFA with %zu vertices\n", num_vertices(g_rev));
-
+ g_rev.kind = NFA_REV_PREFIX;
+ reduceGraphEquivalences(g_rev, cc);
+ removeRedundancy(g_rev, SOM_NONE);
+
+ DEBUG_PRINTF("building a rev NFA with %zu vertices\n", num_vertices(g_rev));
+
auto nfa = constructReversedNFA(g_rev, cc);
- if (!nfa) {
- return nfa;
- }
-
- // Set some useful properties.
- depth maxWidth = findMaxWidth(g);
- if (maxWidth.is_finite()) {
- nfa->maxWidth = (u32)maxWidth;
- } else {
- nfa->maxWidth = 0;
- }
- depth minWidth = findMinWidth(g);
- nfa->minWidth = (u32)minWidth;
-
- return nfa;
-}
-
-static
-bool makeSomRevNfa(vector<SomRevNfa> &som_nfas, const NGHolder &g,
- const ReportID report, const NFAVertex sink,
- const CompileContext &cc) {
- // Clone the graph with ONLY the given report vertices on the given sink.
- NGHolder g2;
- cloneHolder(g2, g);
- clearProperInEdges(g2, sink == g.accept ? g2.acceptEod : g2.accept);
- pruneAllOtherReports(g2, report);
-
- if (in_degree(g2.accept, g2) == 0 && in_degree(g2.acceptEod, g2) == 1) {
- DEBUG_PRINTF("no work to do for this sink\n");
- return true;
- }
-
+ if (!nfa) {
+ return nfa;
+ }
+
+ // Set some useful properties.
+ depth maxWidth = findMaxWidth(g);
+ if (maxWidth.is_finite()) {
+ nfa->maxWidth = (u32)maxWidth;
+ } else {
+ nfa->maxWidth = 0;
+ }
+ depth minWidth = findMinWidth(g);
+ nfa->minWidth = (u32)minWidth;
+
+ return nfa;
+}
+
+static
+bool makeSomRevNfa(vector<SomRevNfa> &som_nfas, const NGHolder &g,
+ const ReportID report, const NFAVertex sink,
+ const CompileContext &cc) {
+ // Clone the graph with ONLY the given report vertices on the given sink.
+ NGHolder g2;
+ cloneHolder(g2, g);
+ clearProperInEdges(g2, sink == g.accept ? g2.acceptEod : g2.accept);
+ pruneAllOtherReports(g2, report);
+
+ if (in_degree(g2.accept, g2) == 0 && in_degree(g2.acceptEod, g2) == 1) {
+ DEBUG_PRINTF("no work to do for this sink\n");
+ return true;
+ }
+
renumber_vertices(g2); // for findMinWidth, findMaxWidth.
-
+
auto nfa = makeBareSomRevNfa(g2, cc);
- if (!nfa) {
- DEBUG_PRINTF("couldn't build rev nfa\n");
- return false;
- }
-
- som_nfas.emplace_back(sink, report, move(nfa));
- return true;
-}
-
-static
-bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) {
- ReportManager &rm = ng.rm;
-
- // FIXME might want to work on a graph without extra redundancy?
- depth maxWidth = findMaxWidth(g);
- DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str());
-
- if (maxWidth > depth(ng.maxSomRevHistoryAvailable)) {
- DEBUG_PRINTF("too wide\n");
- return false;
- }
-
- set<ReportID> reports = all_reports(g);
- DEBUG_PRINTF("%zu reports\n", reports.size());
-
- // We distinguish between reports and accept/acceptEod sinks in order to
- // correctly handle cases which do different things on eod/normal accepts.
- // Later, it might be more elegant to do this with a single NFA and
- // multi-tops.
-
- vector<SomRevNfa> som_nfas;
-
- for (auto report : reports) {
- if (!makeSomRevNfa(som_nfas, g, report, g.accept, cc)) {
- return false;
- }
- if (!makeSomRevNfa(som_nfas, g, report, g.acceptEod, cc)) {
- return false;
- }
- }
-
- for (auto &som_nfa : som_nfas) {
- assert(som_nfa.nfa);
-
- // Transfer ownership of the NFA to the SOM slot manager.
- u32 comp_id = ng.ssm.addRevNfa(move(som_nfa.nfa), maxWidth);
-
- // Replace this report on 'g' with a SOM_REV_NFA report pointing at our
- // new component.
- makeSomRevNfaReports(rm, g, som_nfa.sink, som_nfa.report, comp_id);
- }
-
- if (ng.cc.streaming) {
- assert(ng.ssm.somHistoryRequired() <=
- max(cc.grey.maxHistoryAvailable, ng.maxSomRevHistoryAvailable));
- }
-
- return true;
-}
-
-static
+ if (!nfa) {
+ DEBUG_PRINTF("couldn't build rev nfa\n");
+ return false;
+ }
+
+ som_nfas.emplace_back(sink, report, move(nfa));
+ return true;
+}
+
+static
+bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) {
+ ReportManager &rm = ng.rm;
+
+ // FIXME might want to work on a graph without extra redundancy?
+ depth maxWidth = findMaxWidth(g);
+ DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str());
+
+ if (maxWidth > depth(ng.maxSomRevHistoryAvailable)) {
+ DEBUG_PRINTF("too wide\n");
+ return false;
+ }
+
+ set<ReportID> reports = all_reports(g);
+ DEBUG_PRINTF("%zu reports\n", reports.size());
+
+ // We distinguish between reports and accept/acceptEod sinks in order to
+ // correctly handle cases which do different things on eod/normal accepts.
+ // Later, it might be more elegant to do this with a single NFA and
+ // multi-tops.
+
+ vector<SomRevNfa> som_nfas;
+
+ for (auto report : reports) {
+ if (!makeSomRevNfa(som_nfas, g, report, g.accept, cc)) {
+ return false;
+ }
+ if (!makeSomRevNfa(som_nfas, g, report, g.acceptEod, cc)) {
+ return false;
+ }
+ }
+
+ for (auto &som_nfa : som_nfas) {
+ assert(som_nfa.nfa);
+
+ // Transfer ownership of the NFA to the SOM slot manager.
+ u32 comp_id = ng.ssm.addRevNfa(move(som_nfa.nfa), maxWidth);
+
+ // Replace this report on 'g' with a SOM_REV_NFA report pointing at our
+ // new component.
+ makeSomRevNfaReports(rm, g, som_nfa.sink, som_nfa.report, comp_id);
+ }
+
+ if (ng.cc.streaming) {
+ assert(ng.ssm.somHistoryRequired() <=
+ max(cc.grey.maxHistoryAvailable, ng.maxSomRevHistoryAvailable));
+ }
+
+ return true;
+}
+
+static
u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, NGHolder &g,
- const CompileContext &cc) {
- depth maxWidth = findMaxWidth(g);
-
- assert(maxWidth <= depth(ng.maxSomRevHistoryAvailable));
- assert(all_reports(g).size() == 1);
-
- auto nfa = makeBareSomRevNfa(g, cc);
- if (!nfa) {
+ const CompileContext &cc) {
+ depth maxWidth = findMaxWidth(g);
+
+ assert(maxWidth <= depth(ng.maxSomRevHistoryAvailable));
+ assert(all_reports(g).size() == 1);
+
+ auto nfa = makeBareSomRevNfa(g, cc);
+ if (!nfa) {
throw CompileError(expr.index, "Pattern is too large.");
- }
-
- if (ng.cc.streaming) {
- assert(ng.ssm.somHistoryRequired() <=
- max(cc.grey.maxHistoryAvailable, ng.maxSomRevHistoryAvailable));
- }
-
- return ng.ssm.addRevNfa(move(nfa), maxWidth);
-}
-
-static
-bool is_literable(const NGHolder &g, NFAVertex v) {
- const CharReach &cr = g[v].char_reach;
- return cr.count() == 1 || cr.isCaselessChar();
-}
-
-static
-void append(ue2_literal &s, const CharReach &cr) {
- assert(cr.count() == 1 || cr.isCaselessChar());
- s.push_back(cr.find_first(), cr.isCaselessChar());
-}
-
-static
-map<u32, region_info>::const_iterator findLaterLiteral(const NGHolder &g,
- const map<u32, region_info> &info,
- map<u32, region_info>::const_iterator lower_bound,
- ue2_literal &s_out, const Grey &grey) {
-#define MIN_LITERAL_LENGTH 3
- s_out.clear();
- bool past_lower = false;
- ue2_literal s;
- map<u32, region_info>::const_iterator it;
- for (it = info.begin(); it != info.end(); ++it) {
- if (it == lower_bound) {
- past_lower = true;
- }
- if (!it->second.optional && it->second.dag
- && it->second.full.size() == 1
- && is_literable(g, it->second.full.front())) {
- append(s, g[it->second.full.front()].char_reach);
-
- if (s.length() >= grey.maxHistoryAvailable && past_lower) {
- goto exit;
- }
- } else {
- if (past_lower && it != lower_bound
- && s.length() >= MIN_LITERAL_LENGTH) {
- --it;
- goto exit;
- }
- s.clear();
- }
- }
-
- if (past_lower && it != lower_bound && s.length() >= MIN_LITERAL_LENGTH) {
- --it;
- s_out = s;
- return it;
- }
- exit:
- if (s.length() > grey.maxHistoryAvailable) {
- ue2_literal::const_iterator jt = s.end() - grey.maxHistoryAvailable;
- for (; jt != s.end(); ++jt) {
- s_out.push_back(*jt);
- }
- } else {
- s_out = s;
- }
- return it;
-}
-
-static
-bool attemptToBuildChainAfterSombe(SomSlotManager &ssm, NGHolder &g,
+ }
+
+ if (ng.cc.streaming) {
+ assert(ng.ssm.somHistoryRequired() <=
+ max(cc.grey.maxHistoryAvailable, ng.maxSomRevHistoryAvailable));
+ }
+
+ return ng.ssm.addRevNfa(move(nfa), maxWidth);
+}
+
+static
+bool is_literable(const NGHolder &g, NFAVertex v) {
+ const CharReach &cr = g[v].char_reach;
+ return cr.count() == 1 || cr.isCaselessChar();
+}
+
+static
+void append(ue2_literal &s, const CharReach &cr) {
+ assert(cr.count() == 1 || cr.isCaselessChar());
+ s.push_back(cr.find_first(), cr.isCaselessChar());
+}
+
+static
+map<u32, region_info>::const_iterator findLaterLiteral(const NGHolder &g,
+ const map<u32, region_info> &info,
+ map<u32, region_info>::const_iterator lower_bound,
+ ue2_literal &s_out, const Grey &grey) {
+#define MIN_LITERAL_LENGTH 3
+ s_out.clear();
+ bool past_lower = false;
+ ue2_literal s;
+ map<u32, region_info>::const_iterator it;
+ for (it = info.begin(); it != info.end(); ++it) {
+ if (it == lower_bound) {
+ past_lower = true;
+ }
+ if (!it->second.optional && it->second.dag
+ && it->second.full.size() == 1
+ && is_literable(g, it->second.full.front())) {
+ append(s, g[it->second.full.front()].char_reach);
+
+ if (s.length() >= grey.maxHistoryAvailable && past_lower) {
+ goto exit;
+ }
+ } else {
+ if (past_lower && it != lower_bound
+ && s.length() >= MIN_LITERAL_LENGTH) {
+ --it;
+ goto exit;
+ }
+ s.clear();
+ }
+ }
+
+ if (past_lower && it != lower_bound && s.length() >= MIN_LITERAL_LENGTH) {
+ --it;
+ s_out = s;
+ return it;
+ }
+ exit:
+ if (s.length() > grey.maxHistoryAvailable) {
+ ue2_literal::const_iterator jt = s.end() - grey.maxHistoryAvailable;
+ for (; jt != s.end(); ++jt) {
+ s_out.push_back(*jt);
+ }
+ } else {
+ s_out = s;
+ }
+ return it;
+}
+
+static
+bool attemptToBuildChainAfterSombe(SomSlotManager &ssm, NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const map<u32, region_info> &info,
- map<u32, region_info>::const_iterator picked,
- const Grey &grey,
- vector<som_plan> *plan) {
- DEBUG_PRINTF("trying to chain from %u\n", picked->first);
- const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */
-
- shared_ptr<NGHolder> prefix = makePrefix(g, regions, picked->second,
- next(picked)->second);
-
- // Quick check to stop us from trying this on huge graphs, which causes us
- // to spend forever in ng_execute looking at cases that will most like
- // fail. See UE-2078.
- size_t prefix_size = num_vertices(*prefix);
- size_t total_size = num_vertices(g);
- assert(total_size >= prefix_size);
- if (total_size - prefix_size > MAX_SOMBE_CHAIN_VERTICES) {
- DEBUG_PRINTF("suffix has %zu vertices, fail\n",
- total_size - prefix_size);
- return false;
- }
-
- clearReports(*prefix);
- for (auto u : inv_adjacent_vertices_range(prefix->accept, *prefix)) {
- (*prefix)[u].reports.insert(0);
- }
-
- dumpHolder(*prefix, 0, "full_haiglit_prefix", grey);
-
- CharReach escapes;
- bool stuck = isPossibleLock(g, picked, info, &escapes);
- if (stuck) {
- NGHolder gg;
- fillHolderForLockCheck(&gg, g, info, picked);
-
- stuck = firstMatchIsFirst(gg);
- }
-
- DEBUG_PRINTF("stuck = %d\n", (int)stuck);
-
- // Note: no-one should ever pay attention to the root plan's som_loc_in.
- plan->emplace_back(prefix, escapes, false, 0);
- plan->back().no_implement = true;
-
- dumpHolder(*plan->back().prefix, 22, "som_prefix", grey);
-
- /* don't allow tree planning to mutate the graph */
- if (!doSomPlanning(g, stuck, regions, info, picked, *plan, grey,
- DISALLOW_MODIFY_HOLDER)) {
- // Rollback SOM locations.
- ssm.rollbackSomTo(numSomLocsBefore);
-
- DEBUG_PRINTF("fail to chain\n");
- return false;
- }
-
- return true;
-}
-
-static
-void setReportOnHaigPrefix(RoseBuild &rose, NGHolder &h) {
- ReportID haig_report_id = rose.getNewNfaReport();
- DEBUG_PRINTF("setting report id of %u\n", haig_report_id);
-
- clearReports(h);
- for (auto u : inv_adjacent_vertices_range(h.accept, h)) {
- h[u].reports.clear();
- h[u].reports.insert(haig_report_id);
- }
-}
-
-static
-bool tryHaig(RoseBuild &rose, NGHolder &g,
+ const map<u32, region_info> &info,
+ map<u32, region_info>::const_iterator picked,
+ const Grey &grey,
+ vector<som_plan> *plan) {
+ DEBUG_PRINTF("trying to chain from %u\n", picked->first);
+ const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */
+
+ shared_ptr<NGHolder> prefix = makePrefix(g, regions, picked->second,
+ next(picked)->second);
+
+ // Quick check to stop us from trying this on huge graphs, which causes us
+ // to spend forever in ng_execute looking at cases that will most like
+ // fail. See UE-2078.
+ size_t prefix_size = num_vertices(*prefix);
+ size_t total_size = num_vertices(g);
+ assert(total_size >= prefix_size);
+ if (total_size - prefix_size > MAX_SOMBE_CHAIN_VERTICES) {
+ DEBUG_PRINTF("suffix has %zu vertices, fail\n",
+ total_size - prefix_size);
+ return false;
+ }
+
+ clearReports(*prefix);
+ for (auto u : inv_adjacent_vertices_range(prefix->accept, *prefix)) {
+ (*prefix)[u].reports.insert(0);
+ }
+
+ dumpHolder(*prefix, 0, "full_haiglit_prefix", grey);
+
+ CharReach escapes;
+ bool stuck = isPossibleLock(g, picked, info, &escapes);
+ if (stuck) {
+ NGHolder gg;
+ fillHolderForLockCheck(&gg, g, info, picked);
+
+ stuck = firstMatchIsFirst(gg);
+ }
+
+ DEBUG_PRINTF("stuck = %d\n", (int)stuck);
+
+ // Note: no-one should ever pay attention to the root plan's som_loc_in.
+ plan->emplace_back(prefix, escapes, false, 0);
+ plan->back().no_implement = true;
+
+ dumpHolder(*plan->back().prefix, 22, "som_prefix", grey);
+
+ /* don't allow tree planning to mutate the graph */
+ if (!doSomPlanning(g, stuck, regions, info, picked, *plan, grey,
+ DISALLOW_MODIFY_HOLDER)) {
+ // Rollback SOM locations.
+ ssm.rollbackSomTo(numSomLocsBefore);
+
+ DEBUG_PRINTF("fail to chain\n");
+ return false;
+ }
+
+ return true;
+}
+
+static
+void setReportOnHaigPrefix(RoseBuild &rose, NGHolder &h) {
+ ReportID haig_report_id = rose.getNewNfaReport();
+ DEBUG_PRINTF("setting report id of %u\n", haig_report_id);
+
+ clearReports(h);
+ for (auto u : inv_adjacent_vertices_range(h.accept, h)) {
+ h[u].reports.clear();
+ h[u].reports.insert(haig_report_id);
+ }
+}
+
+static
+bool tryHaig(RoseBuild &rose, NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- som_type som, u32 somPrecision,
- map<u32, region_info>::const_iterator picked,
- shared_ptr<raw_som_dfa> *haig, shared_ptr<NGHolder> *haig_prefix,
- const Grey &grey) {
- DEBUG_PRINTF("trying to build a haig\n");
- shared_ptr<NGHolder> prefix = makePrefix(g, regions, picked->second,
- next(picked)->second);
- prefix->kind = NFA_PREFIX;
- setReportOnHaigPrefix(rose, *prefix);
- dumpHolder(*prefix, 0, "haig_prefix", grey);
- vector<vector<CharReach> > triggers; /* empty for prefix */
- *haig = attemptToBuildHaig(*prefix, som, somPrecision, triggers, grey);
- if (!*haig) {
- DEBUG_PRINTF("failed to haig\n");
- return false;
- }
- *haig_prefix = prefix;
- return true;
-}
-
-static
-void roseAddHaigLiteral(RoseBuild &tb, const shared_ptr<NGHolder> &prefix,
- const shared_ptr<raw_som_dfa> &haig,
- const ue2_literal &lit, const set<ReportID> &reports) {
- assert(prefix && haig);
-
- DEBUG_PRINTF("trying to build a sombe from %s\n", dumpString(lit).c_str());
-
- RoseInGraph ig;
- RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig);
- RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
-
- add_edge(s, v, RoseInEdgeProps(prefix, haig, lit.length()), ig);
-
- assert(!reports.empty());
- RoseInVertex a = add_vertex(RoseInVertexProps::makeAccept(reports), ig);
- add_edge(v, a, RoseInEdgeProps(0U, 0U), ig);
-
- calcVertexOffsets(ig);
-
- UNUSED bool rv = tb.addSombeRose(ig);
- assert(rv); // TODO: recover from addRose failure
-}
-
-static
+ som_type som, u32 somPrecision,
+ map<u32, region_info>::const_iterator picked,
+ shared_ptr<raw_som_dfa> *haig, shared_ptr<NGHolder> *haig_prefix,
+ const Grey &grey) {
+ DEBUG_PRINTF("trying to build a haig\n");
+ shared_ptr<NGHolder> prefix = makePrefix(g, regions, picked->second,
+ next(picked)->second);
+ prefix->kind = NFA_PREFIX;
+ setReportOnHaigPrefix(rose, *prefix);
+ dumpHolder(*prefix, 0, "haig_prefix", grey);
+ vector<vector<CharReach> > triggers; /* empty for prefix */
+ *haig = attemptToBuildHaig(*prefix, som, somPrecision, triggers, grey);
+ if (!*haig) {
+ DEBUG_PRINTF("failed to haig\n");
+ return false;
+ }
+ *haig_prefix = prefix;
+ return true;
+}
+
+static
+void roseAddHaigLiteral(RoseBuild &tb, const shared_ptr<NGHolder> &prefix,
+ const shared_ptr<raw_som_dfa> &haig,
+ const ue2_literal &lit, const set<ReportID> &reports) {
+ assert(prefix && haig);
+
+ DEBUG_PRINTF("trying to build a sombe from %s\n", dumpString(lit).c_str());
+
+ RoseInGraph ig;
+ RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig);
+ RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
+
+ add_edge(s, v, RoseInEdgeProps(prefix, haig, lit.length()), ig);
+
+ assert(!reports.empty());
+ RoseInVertex a = add_vertex(RoseInVertexProps::makeAccept(reports), ig);
+ add_edge(v, a, RoseInEdgeProps(0U, 0U), ig);
+
+ calcVertexOffsets(ig);
+
+ UNUSED bool rv = tb.addSombeRose(ig);
+ assert(rv); // TODO: recover from addRose failure
+}
+
+static
sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
u32 comp_id, som_type som,
const unordered_map<NFAVertex, u32> &regions,
- const map<u32, region_info> &info,
- map<u32, region_info>::const_iterator lower_bound) {
- DEBUG_PRINTF("entry\n");
- assert(g.kind == NFA_OUTFIX);
- const CompileContext &cc = ng.cc;
- ReportManager &rm = ng.rm;
- SomSlotManager &ssm = ng.ssm;
-
+ const map<u32, region_info> &info,
+ map<u32, region_info>::const_iterator lower_bound) {
+ DEBUG_PRINTF("entry\n");
+ assert(g.kind == NFA_OUTFIX);
+ const CompileContext &cc = ng.cc;
+ ReportManager &rm = ng.rm;
+ SomSlotManager &ssm = ng.ssm;
+
if (!cc.grey.allowHaigLit) {
- return SOMBE_FAIL;
- }
-
- const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */
- u32 som_loc = ssm.getPrivateSomSlot();
-
+ return SOMBE_FAIL;
+ }
+
+ const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */
+ u32 som_loc = ssm.getPrivateSomSlot();
+
if (!checkViolet(rm, g, false, cc) && !isImplementableNFA(g, &rm, cc)) {
- // This is an optimisation: if we can't build a Haig from a portion of
- // the graph, then we won't be able to manage it as an outfix either
- // when we fall back.
+ // This is an optimisation: if we can't build a Haig from a portion of
+ // the graph, then we won't be able to manage it as an outfix either
+ // when we fall back.
throw CompileError(expr.index, "Pattern is too large.");
- }
-
- while (1) {
- DEBUG_PRINTF("lower bound is %u\n", lower_bound->first);
- ue2_literal s;
- map<u32, region_info>::const_iterator lit
- = findLaterLiteral(g, info, lower_bound, s, cc.grey);
- if (lit == info.end()) {
- DEBUG_PRINTF("failed to find literal\n");
- ssm.rollbackSomTo(numSomLocsBefore);
- return SOMBE_FAIL;
- }
- DEBUG_PRINTF("test literal: %s [r=%u]\n", dumpString(s).c_str(),
- lit->first);
-
- if (s.length() > MAX_MASK2_WIDTH && mixed_sensitivity(s)) {
- DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this\n");
- lower_bound = lit;
- ++lower_bound;
- continue;
- }
-
- shared_ptr<raw_som_dfa> haig;
- shared_ptr<NGHolder> haig_prefix;
- map<u32, region_info>::const_iterator haig_reg = lit;
-
- if (edge(lit->second.exits.front(), g.acceptEod, g).second) {
- /* TODO: handle */
- ssm.rollbackSomTo(numSomLocsBefore);
- return SOMBE_FAIL;
- }
-
- advance(haig_reg, -(s32)s.length());
-
- if (!haig_reg->first && haig_reg->second.full.size() == 2) {
- /* just starts */
-
- /* TODO: make below assertion true, reset checks could be stronger
- * (12356)
- */
- /* assert(!attemptToBuildChainAfterSombe(ng, g, info, lit, cc.grey,
- &plan)); */
-
- lower_bound = lit;
- ++lower_bound;
- continue; /* somebody else should have been able to chain */
- }
-
- bool ok = true;
- set<ReportID> rep;
- if (next(lit) != info.end()) {
- /* non terminal literal */
-
- /* TODO: handle edges to accept ? */
- vector<som_plan> plan;
- if (edge(lit->second.exits.front(), g.accept, g).second) {
- insert(&rep, g[lit->second.exits.front()].reports);
- remove_edge(lit->second.exits.front(), g.accept, g);
- g[lit->second.exits.front()].reports.clear();
-
- /* Note: we can mess with the graph as this is the last literal
- * we will find and on failure the graph will be thrown away */
- }
-
- ok = attemptToBuildChainAfterSombe(ssm, g, regions, info, lit,
- cc.grey, &plan);
- ok = ok && tryHaig(*ng.rose, g, regions, som, ssm.somPrecision(),
- haig_reg, &haig, &haig_prefix, cc.grey);
-
- if (!ok) {
- DEBUG_PRINTF(":( going to next attempt\n");
- goto next_try;
- }
-
+ }
+
+ while (1) {
+ DEBUG_PRINTF("lower bound is %u\n", lower_bound->first);
+ ue2_literal s;
+ map<u32, region_info>::const_iterator lit
+ = findLaterLiteral(g, info, lower_bound, s, cc.grey);
+ if (lit == info.end()) {
+ DEBUG_PRINTF("failed to find literal\n");
+ ssm.rollbackSomTo(numSomLocsBefore);
+ return SOMBE_FAIL;
+ }
+ DEBUG_PRINTF("test literal: %s [r=%u]\n", dumpString(s).c_str(),
+ lit->first);
+
+ if (s.length() > MAX_MASK2_WIDTH && mixed_sensitivity(s)) {
+ DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this\n");
+ lower_bound = lit;
+ ++lower_bound;
+ continue;
+ }
+
+ shared_ptr<raw_som_dfa> haig;
+ shared_ptr<NGHolder> haig_prefix;
+ map<u32, region_info>::const_iterator haig_reg = lit;
+
+ if (edge(lit->second.exits.front(), g.acceptEod, g).second) {
+ /* TODO: handle */
+ ssm.rollbackSomTo(numSomLocsBefore);
+ return SOMBE_FAIL;
+ }
+
+ advance(haig_reg, -(s32)s.length());
+
+ if (!haig_reg->first && haig_reg->second.full.size() == 2) {
+ /* just starts */
+
+ /* TODO: make below assertion true, reset checks could be stronger
+ * (12356)
+ */
+ /* assert(!attemptToBuildChainAfterSombe(ng, g, info, lit, cc.grey,
+ &plan)); */
+
+ lower_bound = lit;
+ ++lower_bound;
+ continue; /* somebody else should have been able to chain */
+ }
+
+ bool ok = true;
+ set<ReportID> rep;
+ if (next(lit) != info.end()) {
+ /* non terminal literal */
+
+ /* TODO: handle edges to accept ? */
+ vector<som_plan> plan;
+ if (edge(lit->second.exits.front(), g.accept, g).second) {
+ insert(&rep, g[lit->second.exits.front()].reports);
+ remove_edge(lit->second.exits.front(), g.accept, g);
+ g[lit->second.exits.front()].reports.clear();
+
+ /* Note: we can mess with the graph as this is the last literal
+ * we will find and on failure the graph will be thrown away */
+ }
+
+ ok = attemptToBuildChainAfterSombe(ssm, g, regions, info, lit,
+ cc.grey, &plan);
+ ok = ok && tryHaig(*ng.rose, g, regions, som, ssm.somPrecision(),
+ haig_reg, &haig, &haig_prefix, cc.grey);
+
+ if (!ok) {
+ DEBUG_PRINTF(":( going to next attempt\n");
+ goto next_try;
+ }
+
implementSomPlan(ng, expr, comp_id, g, plan, som_loc);
-
- Report ir = makeCallback(0U, 0);
- assert(!plan.empty());
- if (plan.front().is_reset) {
- ir.type = INTERNAL_SOM_LOC_SET_FROM;
- } else {
- ir.type = INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE;
- }
- ir.onmatch = som_loc;
- rep.insert(rm.getInternalId(ir));
- } else {
- /* terminal literal */
- ok = tryHaig(*ng.rose, g, regions, som, ssm.somPrecision(), haig_reg,
- &haig, &haig_prefix, cc.grey);
-
- /* find report */
- insert(&rep, g[lit->second.exits.front()].reports);
-
- /* TODO: som_loc is unused */
- }
-
- if (ok) {
- roseAddHaigLiteral(*ng.rose, haig_prefix, haig, s, rep);
- if (next(lit) != info.end()) {
- return SOMBE_HANDLED_INTERNAL;
- } else {
- ssm.rollbackSomTo(numSomLocsBefore);
- return SOMBE_HANDLED_ALL;
- }
- }
-next_try:
- lower_bound = lit;
- ++lower_bound;
- }
- assert(0);
- return SOMBE_FAIL;
-}
-
-static
-bool leadingLiterals(const NGHolder &g, set<ue2_literal> *lits,
- set<NFAVertex> *terminals) {
- /* TODO: smarter (topo) */
-#define MAX_LEADING_LITERALS 20
- set<NFAVertex> s_succ;
- insert(&s_succ, adjacent_vertices(g.start, g));
-
- set<NFAVertex> sds_succ;
- insert(&sds_succ, adjacent_vertices(g.startDs, g));
-
- if (!is_subset_of(s_succ, sds_succ)) {
- DEBUG_PRINTF("not floating\n");
- return false;
- }
-
- sds_succ.erase(g.startDs);
-
- map<NFAVertex, vector<ue2_literal> > curr;
- curr[g.startDs].push_back(ue2_literal());
-
- map<NFAVertex, set<NFAVertex> > seen;
- map<NFAVertex, vector<ue2_literal> > next;
-
- bool did_expansion = true;
- while (did_expansion) {
- did_expansion = false;
- u32 count = 0;
- assert(!curr.empty());
- for (const auto &m : curr) {
- const NFAVertex u = m.first;
- const vector<ue2_literal> &base = m.second;
+
+ Report ir = makeCallback(0U, 0);
+ assert(!plan.empty());
+ if (plan.front().is_reset) {
+ ir.type = INTERNAL_SOM_LOC_SET_FROM;
+ } else {
+ ir.type = INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE;
+ }
+ ir.onmatch = som_loc;
+ rep.insert(rm.getInternalId(ir));
+ } else {
+ /* terminal literal */
+ ok = tryHaig(*ng.rose, g, regions, som, ssm.somPrecision(), haig_reg,
+ &haig, &haig_prefix, cc.grey);
+
+ /* find report */
+ insert(&rep, g[lit->second.exits.front()].reports);
+
+ /* TODO: som_loc is unused */
+ }
+
+ if (ok) {
+ roseAddHaigLiteral(*ng.rose, haig_prefix, haig, s, rep);
+ if (next(lit) != info.end()) {
+ return SOMBE_HANDLED_INTERNAL;
+ } else {
+ ssm.rollbackSomTo(numSomLocsBefore);
+ return SOMBE_HANDLED_ALL;
+ }
+ }
+next_try:
+ lower_bound = lit;
+ ++lower_bound;
+ }
+ assert(0);
+ return SOMBE_FAIL;
+}
+
+static
+bool leadingLiterals(const NGHolder &g, set<ue2_literal> *lits,
+ set<NFAVertex> *terminals) {
+ /* TODO: smarter (topo) */
+#define MAX_LEADING_LITERALS 20
+ set<NFAVertex> s_succ;
+ insert(&s_succ, adjacent_vertices(g.start, g));
+
+ set<NFAVertex> sds_succ;
+ insert(&sds_succ, adjacent_vertices(g.startDs, g));
+
+ if (!is_subset_of(s_succ, sds_succ)) {
+ DEBUG_PRINTF("not floating\n");
+ return false;
+ }
+
+ sds_succ.erase(g.startDs);
+
+ map<NFAVertex, vector<ue2_literal> > curr;
+ curr[g.startDs].push_back(ue2_literal());
+
+ map<NFAVertex, set<NFAVertex> > seen;
+ map<NFAVertex, vector<ue2_literal> > next;
+
+ bool did_expansion = true;
+ while (did_expansion) {
+ did_expansion = false;
+ u32 count = 0;
+ assert(!curr.empty());
+ for (const auto &m : curr) {
+ const NFAVertex u = m.first;
+ const vector<ue2_literal> &base = m.second;
DEBUG_PRINTF("expanding from %zu\n", g[u].index);
- for (auto v : adjacent_vertices_range(u, g)) {
- if (v == g.startDs) {
- continue;
- }
- if (contains(seen[u], v)) {
- DEBUG_PRINTF("loop\n");
- goto skip_to_next_terminal;
- }
- if (is_any_accept(v, g) || is_match_vertex(v, g)) {
- DEBUG_PRINTF("match\n");
- goto skip_to_next_terminal;
- }
+ for (auto v : adjacent_vertices_range(u, g)) {
+ if (v == g.startDs) {
+ continue;
+ }
+ if (contains(seen[u], v)) {
+ DEBUG_PRINTF("loop\n");
+ goto skip_to_next_terminal;
+ }
+ if (is_any_accept(v, g) || is_match_vertex(v, g)) {
+ DEBUG_PRINTF("match\n");
+ goto skip_to_next_terminal;
+ }
if (g[v].char_reach.count() > 2 * MAX_LEADING_LITERALS) {
- DEBUG_PRINTF("wide\n");
- goto skip_to_next_terminal;
- }
- }
-
- for (auto v : adjacent_vertices_range(u, g)) {
- assert(!contains(seen[u], v));
- if (v == g.startDs) {
- continue;
- }
- insert(&seen[v], seen[u]);
- seen[v].insert(v);
- CharReach cr = g[v].char_reach;
- vector<ue2_literal> &out = next[v];
-
+ DEBUG_PRINTF("wide\n");
+ goto skip_to_next_terminal;
+ }
+ }
+
+ for (auto v : adjacent_vertices_range(u, g)) {
+ assert(!contains(seen[u], v));
+ if (v == g.startDs) {
+ continue;
+ }
+ insert(&seen[v], seen[u]);
+ seen[v].insert(v);
+ CharReach cr = g[v].char_reach;
+ vector<ue2_literal> &out = next[v];
+
DEBUG_PRINTF("expanding to %zu (|| = %zu)\n", g[v].index,
cr.count());
- for (size_t c = cr.find_first(); c != CharReach::npos;
- c = cr.find_next(c)) {
- bool nocase = ourisalpha(c) && cr.test(mytoupper(c))
- && cr.test(mytolower(c));
-
- if (nocase && (char)c == mytolower(c)) {
- continue; /* uppercase already handled us */
- }
-
- for (const auto &lit : base) {
- if (count >= MAX_LEADING_LITERALS) {
- DEBUG_PRINTF("count %u\n", count);
- goto exit;
- }
- did_expansion = true;
- out.push_back(lit);
- out.back().push_back(c, nocase);
- count++;
- if (out.back().length() > MAX_MASK2_WIDTH
- && mixed_sensitivity(out.back())) {
- goto exit;
- }
-
- }
- }
- }
- if (0) {
- skip_to_next_terminal:
- insert(&next[u], next[u].end(), base);
- count += base.size();
- if (count > MAX_LEADING_LITERALS) {
- DEBUG_PRINTF("count %u\n", count);
- goto exit;
- }
- }
- }
-
- curr.swap(next);
- next.clear();
- };
- exit:;
- for (const auto &m : curr) {
- NFAVertex t = m.first;
- if (t == g.startDs) {
- assert(curr.size() == 1);
- return false;
- }
- assert(!is_special(t, g));
- terminals->insert(t);
- insert(lits, m.second);
- }
- assert(lits->size() <= MAX_LEADING_LITERALS);
- return !lits->empty();
-}
-
-static
-bool splitOffLeadingLiterals(const NGHolder &g, set<ue2_literal> *lit_out,
- NGHolder *rhs) {
- DEBUG_PRINTF("looking for a leading literals\n");
-
- set<NFAVertex> terms;
- if (!leadingLiterals(g, lit_out, &terms)) {
- return false;
- }
-
- for (UNUSED const auto &lit : *lit_out) {
- DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(lit).c_str(),
- lit.length());
- }
-
- /* need to validate that it is a clean split */
- assert(!terms.empty());
- set<NFAVertex> adj_term1;
- insert(&adj_term1, adjacent_vertices(*terms.begin(), g));
- for (auto v : terms) {
+ for (size_t c = cr.find_first(); c != CharReach::npos;
+ c = cr.find_next(c)) {
+ bool nocase = ourisalpha(c) && cr.test(mytoupper(c))
+ && cr.test(mytolower(c));
+
+ if (nocase && (char)c == mytolower(c)) {
+ continue; /* uppercase already handled us */
+ }
+
+ for (const auto &lit : base) {
+ if (count >= MAX_LEADING_LITERALS) {
+ DEBUG_PRINTF("count %u\n", count);
+ goto exit;
+ }
+ did_expansion = true;
+ out.push_back(lit);
+ out.back().push_back(c, nocase);
+ count++;
+ if (out.back().length() > MAX_MASK2_WIDTH
+ && mixed_sensitivity(out.back())) {
+ goto exit;
+ }
+
+ }
+ }
+ }
+ if (0) {
+ skip_to_next_terminal:
+ insert(&next[u], next[u].end(), base);
+ count += base.size();
+ if (count > MAX_LEADING_LITERALS) {
+ DEBUG_PRINTF("count %u\n", count);
+ goto exit;
+ }
+ }
+ }
+
+ curr.swap(next);
+ next.clear();
+ };
+ exit:;
+ for (const auto &m : curr) {
+ NFAVertex t = m.first;
+ if (t == g.startDs) {
+ assert(curr.size() == 1);
+ return false;
+ }
+ assert(!is_special(t, g));
+ terminals->insert(t);
+ insert(lits, m.second);
+ }
+ assert(lits->size() <= MAX_LEADING_LITERALS);
+ return !lits->empty();
+}
+
+static
+bool splitOffLeadingLiterals(const NGHolder &g, set<ue2_literal> *lit_out,
+ NGHolder *rhs) {
+ DEBUG_PRINTF("looking for a leading literals\n");
+
+ set<NFAVertex> terms;
+ if (!leadingLiterals(g, lit_out, &terms)) {
+ return false;
+ }
+
+ for (UNUSED const auto &lit : *lit_out) {
+ DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(lit).c_str(),
+ lit.length());
+ }
+
+ /* need to validate that it is a clean split */
+ assert(!terms.empty());
+ set<NFAVertex> adj_term1;
+ insert(&adj_term1, adjacent_vertices(*terms.begin(), g));
+ for (auto v : terms) {
DEBUG_PRINTF("term %zu\n", g[v].index);
- set<NFAVertex> temp;
- insert(&temp, adjacent_vertices(v, g));
- if (temp != adj_term1) {
- DEBUG_PRINTF("bad split\n");
- return false;
- }
- }
-
+ set<NFAVertex> temp;
+ insert(&temp, adjacent_vertices(v, g));
+ if (temp != adj_term1) {
+ DEBUG_PRINTF("bad split\n");
+ return false;
+ }
+ }
+
unordered_map<NFAVertex, NFAVertex> rhs_map;
- vector<NFAVertex> pivots;
- insert(&pivots, pivots.end(), adj_term1);
- splitRHS(g, pivots, rhs, &rhs_map);
-
- assert(is_triggered(*rhs));
- return true;
-}
-
-static
-void findBestLiteral(const NGHolder &g,
+ vector<NFAVertex> pivots;
+ insert(&pivots, pivots.end(), adj_term1);
+ splitRHS(g, pivots, rhs, &rhs_map);
+
+ assert(is_triggered(*rhs));
+ return true;
+}
+
+static
+void findBestLiteral(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- ue2_literal *lit_out, NFAVertex *v,
- const CompileContext &cc) {
- map<u32, region_info> info;
- buildRegionMapping(g, regions, info, false);
-
- ue2_literal best;
+ ue2_literal *lit_out, NFAVertex *v,
+ const CompileContext &cc) {
+ map<u32, region_info> info;
+ buildRegionMapping(g, regions, info, false);
+
+ ue2_literal best;
NFAVertex best_v = NGHolder::null_vertex();
-
- map<u32, region_info>::const_iterator lit = info.begin();
- while (1) {
- ue2_literal s;
- lit = findLaterLiteral(g, info, lit, s, cc.grey);
- if (lit == info.end()) {
- break;
- }
- DEBUG_PRINTF("test literal: %s [r=%u]\n", dumpString(s).c_str(),
- lit->first);
-
- if (s.length() > MAX_MASK2_WIDTH && mixed_sensitivity(s)) {
- DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this\n");
- ++lit;
- continue;
- }
-
- if (s.length() > best.length()) {
- best = s;
- assert(!lit->second.exits.empty());
- best_v = lit->second.exits[0];
- }
-
- ++lit;
- }
-
- lit_out->swap(best);
- *v = best_v;
-}
-
-static
-bool splitOffBestLiteral(const NGHolder &g,
+
+ map<u32, region_info>::const_iterator lit = info.begin();
+ while (1) {
+ ue2_literal s;
+ lit = findLaterLiteral(g, info, lit, s, cc.grey);
+ if (lit == info.end()) {
+ break;
+ }
+ DEBUG_PRINTF("test literal: %s [r=%u]\n", dumpString(s).c_str(),
+ lit->first);
+
+ if (s.length() > MAX_MASK2_WIDTH && mixed_sensitivity(s)) {
+ DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this\n");
+ ++lit;
+ continue;
+ }
+
+ if (s.length() > best.length()) {
+ best = s;
+ assert(!lit->second.exits.empty());
+ best_v = lit->second.exits[0];
+ }
+
+ ++lit;
+ }
+
+ lit_out->swap(best);
+ *v = best_v;
+}
+
+static
+bool splitOffBestLiteral(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- ue2_literal *lit_out, NGHolder *lhs, NGHolder *rhs,
- const CompileContext &cc) {
+ ue2_literal *lit_out, NGHolder *lhs, NGHolder *rhs,
+ const CompileContext &cc) {
NFAVertex v = NGHolder::null_vertex();
-
- findBestLiteral(g, regions, lit_out, &v, cc);
- if (lit_out->empty()) {
- return false;
- }
-
- DEBUG_PRINTF("literal is '%s'\n", dumpString(*lit_out).c_str());
-
+
+ findBestLiteral(g, regions, lit_out, &v, cc);
+ if (lit_out->empty()) {
+ return false;
+ }
+
+ DEBUG_PRINTF("literal is '%s'\n", dumpString(*lit_out).c_str());
+
unordered_map<NFAVertex, NFAVertex> lhs_map;
unordered_map<NFAVertex, NFAVertex> rhs_map;
-
- splitGraph(g, v, lhs, &lhs_map, rhs, &rhs_map);
-
+
+ splitGraph(g, v, lhs, &lhs_map, rhs, &rhs_map);
+
DEBUG_PRINTF("v = %zu\n", g[v].index);
-
- return true;
-}
-
+
+ return true;
+}
+
/**
* Replace the given graph's EXTERNAL_CALLBACK reports with
* EXTERNAL_CALLBACK_SOM_PASS reports.
@@ -2442,706 +2442,706 @@ void makeReportsSomPass(ReportManager &rm, NGHolder &g) {
}
}
-static
-bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) {
- ue2_literal lit;
- shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
- if (!ng.cc.grey.allowLitHaig) {
- return false;
- }
-
- dumpHolder(g, 90, "lithaig_full", ng.cc.grey);
-
- if (!splitOffLeadingLiteral(g, &lit, &*rhs)) {
- DEBUG_PRINTF("no literal\n");
- return false;
- }
-
- if (lit.length() < ng.cc.grey.minRoseLiteralLength) {
- DEBUG_PRINTF("lit too short\n");
- return false;
- }
-
- assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit));
-
+static
+bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) {
+ ue2_literal lit;
+ shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
+ if (!ng.cc.grey.allowLitHaig) {
+ return false;
+ }
+
+ dumpHolder(g, 90, "lithaig_full", ng.cc.grey);
+
+ if (!splitOffLeadingLiteral(g, &lit, &*rhs)) {
+ DEBUG_PRINTF("no literal\n");
+ return false;
+ }
+
+ if (lit.length() < ng.cc.grey.minRoseLiteralLength) {
+ DEBUG_PRINTF("lit too short\n");
+ return false;
+ }
+
+ assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit));
+
makeReportsSomPass(ng.rm, *rhs);
- dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey);
-
- vector<vector<CharReach> > triggers;
- triggers.push_back(as_cr_seq(lit));
-
- assert(rhs->kind == NFA_SUFFIX);
- shared_ptr<raw_som_dfa> haig
- = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), triggers,
- ng.cc.grey, false /* lit implies adv som */);
- if (!haig) {
- DEBUG_PRINTF("failed to haig\n");
- return false;
- }
- DEBUG_PRINTF("haig %p\n", haig.get());
-
- RoseInGraph ig;
- RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig);
- RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
- add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig);
-
- RoseInVertex a
- = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig);
- add_edge(v, a, RoseInEdgeProps(haig), ig);
-
- calcVertexOffsets(ig);
-
- return ng.rose->addSombeRose(ig);
-}
-
-static
-bool doHaigLitHaigSom(NG &ng, NGHolder &g,
+ dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey);
+
+ vector<vector<CharReach> > triggers;
+ triggers.push_back(as_cr_seq(lit));
+
+ assert(rhs->kind == NFA_SUFFIX);
+ shared_ptr<raw_som_dfa> haig
+ = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), triggers,
+ ng.cc.grey, false /* lit implies adv som */);
+ if (!haig) {
+ DEBUG_PRINTF("failed to haig\n");
+ return false;
+ }
+ DEBUG_PRINTF("haig %p\n", haig.get());
+
+ RoseInGraph ig;
+ RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig);
+ RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
+ add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig);
+
+ RoseInVertex a
+ = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig);
+ add_edge(v, a, RoseInEdgeProps(haig), ig);
+
+ calcVertexOffsets(ig);
+
+ return ng.rose->addSombeRose(ig);
+}
+
+static
+bool doHaigLitHaigSom(NG &ng, NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- som_type som) {
- if (!ng.cc.grey.allowLitHaig) {
- return false;
- }
-
- // In streaming mode, we can only delay up to our max available history.
- const u32 max_delay =
- ng.cc.streaming ? ng.cc.grey.maxHistoryAvailable : MO_INVALID_IDX;
-
- ue2_literal lit;
- shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
- shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
- if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) {
- return false;
- }
-
- DEBUG_PRINTF("split off best lit '%s' (len=%zu)\n", dumpString(lit).c_str(),
- lit.length());
-
- if (lit.length() < ng.cc.grey.minRoseLiteralLength) {
- DEBUG_PRINTF("lit too short\n");
- return false;
- }
-
- assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit));
-
- if (edge(rhs->start, rhs->acceptEod, *rhs).second) {
- return false; /* TODO: handle */
- }
-
+ som_type som) {
+ if (!ng.cc.grey.allowLitHaig) {
+ return false;
+ }
+
+ // In streaming mode, we can only delay up to our max available history.
+ const u32 max_delay =
+ ng.cc.streaming ? ng.cc.grey.maxHistoryAvailable : MO_INVALID_IDX;
+
+ ue2_literal lit;
+ shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
+ shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
+ if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) {
+ return false;
+ }
+
+ DEBUG_PRINTF("split off best lit '%s' (len=%zu)\n", dumpString(lit).c_str(),
+ lit.length());
+
+ if (lit.length() < ng.cc.grey.minRoseLiteralLength) {
+ DEBUG_PRINTF("lit too short\n");
+ return false;
+ }
+
+ assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit));
+
+ if (edge(rhs->start, rhs->acceptEod, *rhs).second) {
+ return false; /* TODO: handle */
+ }
+
makeReportsSomPass(ng.rm, *rhs);
- dumpHolder(*lhs, 92, "haiglithaig_lhs", ng.cc.grey);
- dumpHolder(*rhs, 93, "haiglithaig_rhs", ng.cc.grey);
-
- u32 delay = removeTrailingLiteralStates(*lhs, lit, max_delay);
-
- RoseInGraph ig;
- RoseInVertex s
- = add_vertex(RoseInVertexProps::makeStart(false), ig);
- RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
-
- bool lhs_all_vac = true;
+ dumpHolder(*lhs, 92, "haiglithaig_lhs", ng.cc.grey);
+ dumpHolder(*rhs, 93, "haiglithaig_rhs", ng.cc.grey);
+
+ u32 delay = removeTrailingLiteralStates(*lhs, lit, max_delay);
+
+ RoseInGraph ig;
+ RoseInVertex s
+ = add_vertex(RoseInVertexProps::makeStart(false), ig);
+ RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
+
+ bool lhs_all_vac = true;
NGHolder::adjacency_iterator ai, ae;
- for (tie(ai, ae) = adjacent_vertices(lhs->startDs, *lhs);
- ai != ae && lhs_all_vac; ++ai) {
- if (!is_special(*ai, *lhs)) {
- lhs_all_vac = false;
- }
- }
- for (tie(ai, ae) = adjacent_vertices(lhs->start, *lhs);
- ai != ae && lhs_all_vac; ++ai) {
- if (!is_special(*ai, *lhs)) {
- lhs_all_vac = false;
- }
- }
-
- if (lhs_all_vac) {
- /* lhs is completely vacuous --> no prefix needed */
- add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig);
- } else {
- assert(delay == lit.length());
- setReportOnHaigPrefix(*ng.rose, *lhs);
- vector<vector<CharReach> > prefix_triggers; /* empty for prefix */
- assert(lhs->kind == NFA_PREFIX);
- shared_ptr<raw_som_dfa> l_haig
- = attemptToBuildHaig(*lhs, som, ng.ssm.somPrecision(),
- prefix_triggers, ng.cc.grey);
- if (!l_haig) {
- DEBUG_PRINTF("failed to haig\n");
- return false;
- }
- DEBUG_PRINTF("lhs haig %p\n", l_haig.get());
-
- add_edge(s, v, RoseInEdgeProps(lhs, l_haig, delay), ig);
- }
-
- if (!edge(rhs->start, rhs->accept, *rhs).second) {
- assert(rhs->kind == NFA_SUFFIX);
-
- vector<vector<CharReach> > triggers;
- triggers.push_back(as_cr_seq(lit));
-
- ue2_literal lit2;
- if (getTrailingLiteral(g, &lit2)
- && lit2.length() >= ng.cc.grey.minRoseLiteralLength
- && minStringPeriod(lit2) >= 2) {
-
- /* TODO: handle delay */
- size_t overlap = maxOverlap(lit, lit2, 0);
- u32 delay2 = min((size_t)max_delay, lit2.length() - overlap);
- delay2 = removeTrailingLiteralStates(*rhs, lit2, delay2);
- rhs->kind = NFA_INFIX;
- assert(delay2 <= lit2.length());
- setReportOnHaigPrefix(*ng.rose, *rhs);
-
- shared_ptr<raw_som_dfa> m_haig
- = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(),
- triggers, ng.cc.grey, true);
- DEBUG_PRINTF("mhs haig %p\n", m_haig.get());
- if (!m_haig) {
- DEBUG_PRINTF("failed to haig\n");
- return false;
- }
-
- RoseInVertex w
- = add_vertex(RoseInVertexProps::makeLiteral(lit2), ig);
- add_edge(v, w, RoseInEdgeProps(rhs, m_haig, delay2), ig);
-
- NFAVertex reporter = getSoleSourceVertex(g, g.accept);
- assert(reporter);
- const auto &reports = g[reporter].reports;
- RoseInVertex a =
- add_vertex(RoseInVertexProps::makeAccept(reports), ig);
- add_edge(w, a, RoseInEdgeProps(0U, 0U), ig);
- } else {
- /* TODO: analysis to see if som is in fact always increasing */
- shared_ptr<raw_som_dfa> r_haig
- = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(),
- triggers, ng.cc.grey, true);
- DEBUG_PRINTF("rhs haig %p\n", r_haig.get());
- if (!r_haig) {
- DEBUG_PRINTF("failed to haig\n");
- return false;
- }
- RoseInVertex a
- = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()),
- ig);
- add_edge(v, a, RoseInEdgeProps(r_haig), ig);
- }
- } else {
- DEBUG_PRINTF("has start->accept edge\n");
+ for (tie(ai, ae) = adjacent_vertices(lhs->startDs, *lhs);
+ ai != ae && lhs_all_vac; ++ai) {
+ if (!is_special(*ai, *lhs)) {
+ lhs_all_vac = false;
+ }
+ }
+ for (tie(ai, ae) = adjacent_vertices(lhs->start, *lhs);
+ ai != ae && lhs_all_vac; ++ai) {
+ if (!is_special(*ai, *lhs)) {
+ lhs_all_vac = false;
+ }
+ }
+
+ if (lhs_all_vac) {
+ /* lhs is completely vacuous --> no prefix needed */
+ add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig);
+ } else {
+ assert(delay == lit.length());
+ setReportOnHaigPrefix(*ng.rose, *lhs);
+ vector<vector<CharReach> > prefix_triggers; /* empty for prefix */
+ assert(lhs->kind == NFA_PREFIX);
+ shared_ptr<raw_som_dfa> l_haig
+ = attemptToBuildHaig(*lhs, som, ng.ssm.somPrecision(),
+ prefix_triggers, ng.cc.grey);
+ if (!l_haig) {
+ DEBUG_PRINTF("failed to haig\n");
+ return false;
+ }
+ DEBUG_PRINTF("lhs haig %p\n", l_haig.get());
+
+ add_edge(s, v, RoseInEdgeProps(lhs, l_haig, delay), ig);
+ }
+
+ if (!edge(rhs->start, rhs->accept, *rhs).second) {
+ assert(rhs->kind == NFA_SUFFIX);
+
+ vector<vector<CharReach> > triggers;
+ triggers.push_back(as_cr_seq(lit));
+
+ ue2_literal lit2;
+ if (getTrailingLiteral(g, &lit2)
+ && lit2.length() >= ng.cc.grey.minRoseLiteralLength
+ && minStringPeriod(lit2) >= 2) {
+
+ /* TODO: handle delay */
+ size_t overlap = maxOverlap(lit, lit2, 0);
+ u32 delay2 = min((size_t)max_delay, lit2.length() - overlap);
+ delay2 = removeTrailingLiteralStates(*rhs, lit2, delay2);
+ rhs->kind = NFA_INFIX;
+ assert(delay2 <= lit2.length());
+ setReportOnHaigPrefix(*ng.rose, *rhs);
+
+ shared_ptr<raw_som_dfa> m_haig
+ = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(),
+ triggers, ng.cc.grey, true);
+ DEBUG_PRINTF("mhs haig %p\n", m_haig.get());
+ if (!m_haig) {
+ DEBUG_PRINTF("failed to haig\n");
+ return false;
+ }
+
+ RoseInVertex w
+ = add_vertex(RoseInVertexProps::makeLiteral(lit2), ig);
+ add_edge(v, w, RoseInEdgeProps(rhs, m_haig, delay2), ig);
+
+ NFAVertex reporter = getSoleSourceVertex(g, g.accept);
+ assert(reporter);
+ const auto &reports = g[reporter].reports;
+ RoseInVertex a =
+ add_vertex(RoseInVertexProps::makeAccept(reports), ig);
+ add_edge(w, a, RoseInEdgeProps(0U, 0U), ig);
+ } else {
+ /* TODO: analysis to see if som is in fact always increasing */
+ shared_ptr<raw_som_dfa> r_haig
+ = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(),
+ triggers, ng.cc.grey, true);
+ DEBUG_PRINTF("rhs haig %p\n", r_haig.get());
+ if (!r_haig) {
+ DEBUG_PRINTF("failed to haig\n");
+ return false;
+ }
+ RoseInVertex a
+ = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()),
+ ig);
+ add_edge(v, a, RoseInEdgeProps(r_haig), ig);
+ }
+ } else {
+ DEBUG_PRINTF("has start->accept edge\n");
if (in_degree(g.acceptEod, g) > 1) {
- DEBUG_PRINTF("also has a path to EOD\n");
- return false;
- }
- NFAVertex reporter = getSoleSourceVertex(g, g.accept);
- if (!reporter) {
- return false; /* TODO: later */
- }
- const auto &reports = g[reporter].reports;
- assert(!reports.empty());
- RoseInVertex a =
- add_vertex(RoseInVertexProps::makeAccept(reports), ig);
- add_edge(v, a, RoseInEdgeProps(0U, 0U), ig);
- }
-
- calcVertexOffsets(ig);
-
- return ng.rose->addSombeRose(ig);
-}
-
-static
-bool doMultiLitHaigSom(NG &ng, NGHolder &g, som_type som) {
- set<ue2_literal> lits;
- shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
- if (!ng.cc.grey.allowLitHaig) {
- return false;
- }
-
- dumpHolder(g, 90, "lithaig_full", ng.cc.grey);
-
- if (!splitOffLeadingLiterals(g, &lits, &*rhs)) {
- DEBUG_PRINTF("no literal\n");
- return false;
- }
-
+ DEBUG_PRINTF("also has a path to EOD\n");
+ return false;
+ }
+ NFAVertex reporter = getSoleSourceVertex(g, g.accept);
+ if (!reporter) {
+ return false; /* TODO: later */
+ }
+ const auto &reports = g[reporter].reports;
+ assert(!reports.empty());
+ RoseInVertex a =
+ add_vertex(RoseInVertexProps::makeAccept(reports), ig);
+ add_edge(v, a, RoseInEdgeProps(0U, 0U), ig);
+ }
+
+ calcVertexOffsets(ig);
+
+ return ng.rose->addSombeRose(ig);
+}
+
+static
+bool doMultiLitHaigSom(NG &ng, NGHolder &g, som_type som) {
+ set<ue2_literal> lits;
+ shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
+ if (!ng.cc.grey.allowLitHaig) {
+ return false;
+ }
+
+ dumpHolder(g, 90, "lithaig_full", ng.cc.grey);
+
+ if (!splitOffLeadingLiterals(g, &lits, &*rhs)) {
+ DEBUG_PRINTF("no literal\n");
+ return false;
+ }
+
makeReportsSomPass(ng.rm, *rhs);
- dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey);
-
- vector<vector<CharReach>> triggers;
- for (const auto &lit : lits) {
- if (lit.length() < ng.cc.grey.minRoseLiteralLength) {
- DEBUG_PRINTF("lit too short\n");
- return false;
- }
-
- assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit));
- triggers.push_back(as_cr_seq(lit));
- }
-
- bool unordered_som_triggers = true; /* TODO: check overlaps to ensure that
- * we can promise ordering */
-
- assert(rhs->kind == NFA_SUFFIX);
- shared_ptr<raw_som_dfa> haig
- = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), triggers,
- ng.cc.grey, unordered_som_triggers);
- if (!haig) {
- DEBUG_PRINTF("failed to haig\n");
- return false;
- }
- DEBUG_PRINTF("haig %p\n", haig.get());
-
- RoseInGraph ig;
- RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig);
-
- RoseInVertex a
- = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig);
-
- for (const auto &lit : lits) {
- RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
- add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig);
- add_edge(v, a, RoseInEdgeProps(haig), ig);
- }
-
- calcVertexOffsets(ig);
-
- return ng.rose->addSombeRose(ig);
-}
-
-static
-bool trySombe(NG &ng, NGHolder &g, som_type som) {
- if (doLitHaigSom(ng, g, som)) {
- return true;
- }
-
- auto regions = assignRegions(g);
-
- if (doHaigLitHaigSom(ng, g, regions, som)) {
- return true;
- }
-
- if (doMultiLitHaigSom(ng, g, som)) {
- return true;
- }
-
- return false;
-}
-
-static
-map<u32, region_info>::const_iterator pickInitialSomCut(const NGHolder &g,
+ dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey);
+
+ vector<vector<CharReach>> triggers;
+ for (const auto &lit : lits) {
+ if (lit.length() < ng.cc.grey.minRoseLiteralLength) {
+ DEBUG_PRINTF("lit too short\n");
+ return false;
+ }
+
+ assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit));
+ triggers.push_back(as_cr_seq(lit));
+ }
+
+ bool unordered_som_triggers = true; /* TODO: check overlaps to ensure that
+ * we can promise ordering */
+
+ assert(rhs->kind == NFA_SUFFIX);
+ shared_ptr<raw_som_dfa> haig
+ = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), triggers,
+ ng.cc.grey, unordered_som_triggers);
+ if (!haig) {
+ DEBUG_PRINTF("failed to haig\n");
+ return false;
+ }
+ DEBUG_PRINTF("haig %p\n", haig.get());
+
+ RoseInGraph ig;
+ RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig);
+
+ RoseInVertex a
+ = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig);
+
+ for (const auto &lit : lits) {
+ RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
+ add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig);
+ add_edge(v, a, RoseInEdgeProps(haig), ig);
+ }
+
+ calcVertexOffsets(ig);
+
+ return ng.rose->addSombeRose(ig);
+}
+
+static
+bool trySombe(NG &ng, NGHolder &g, som_type som) {
+ if (doLitHaigSom(ng, g, som)) {
+ return true;
+ }
+
+ auto regions = assignRegions(g);
+
+ if (doHaigLitHaigSom(ng, g, regions, som)) {
+ return true;
+ }
+
+ if (doMultiLitHaigSom(ng, g, som)) {
+ return true;
+ }
+
+ return false;
+}
+
+static
+map<u32, region_info>::const_iterator pickInitialSomCut(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const map<u32, region_info> &info,
- const vector<DepthMinMax> &depths) {
- map<u32, region_info>::const_iterator picked = info.end();
- for (map<u32, region_info>::const_iterator it = info.begin();
- it != info.end(); ++it) {
- if (it->second.exits.empty()) {
- assert(it == info.begin());
- continue;
- }
-
- if (!regionCanEstablishSom(g, regions, it->first, it->second.exits,
- depths)) {
- /* last region is as far as we can go */
- DEBUG_PRINTF("region %u is beyond the fixed region\n", it->first);
- break;
- }
- picked = it;
- }
-
- return picked;
-}
-
-static
-map<u32, region_info>::const_iterator tryForLaterRevNfaCut(const NGHolder &g,
+ const map<u32, region_info> &info,
+ const vector<DepthMinMax> &depths) {
+ map<u32, region_info>::const_iterator picked = info.end();
+ for (map<u32, region_info>::const_iterator it = info.begin();
+ it != info.end(); ++it) {
+ if (it->second.exits.empty()) {
+ assert(it == info.begin());
+ continue;
+ }
+
+ if (!regionCanEstablishSom(g, regions, it->first, it->second.exits,
+ depths)) {
+ /* last region is as far as we can go */
+ DEBUG_PRINTF("region %u is beyond the fixed region\n", it->first);
+ break;
+ }
+ picked = it;
+ }
+
+ return picked;
+}
+
+static
+map<u32, region_info>::const_iterator tryForLaterRevNfaCut(const NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const map<u32, region_info> &info,
- const vector<DepthMinMax> &depths,
- const map<u32, region_info>::const_iterator &orig,
- const CompileContext &cc) {
- DEBUG_PRINTF("trying for later rev nfa cut\n");
- assert(orig != info.end());
-
- vector<map<u32, region_info>::const_iterator> cands;
-
- map<u32, region_info>::const_iterator it = orig;
- ++it;
- for (; it != info.end(); ++it) {
- /* for simplicity */
- if (it->second.exits.size() != 1 || it->second.optional) {
- continue;
- }
- NFAVertex v = *it->second.exits.begin();
-
- if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
- continue; /* for simplicity would require external som nfa reports
- * as well. */
- }
-
- const depth &max_depth = depths[g[v].index].max;
- if (max_depth >
- depth(cc.grey.somMaxRevNfaLength - 1)) { /* virtual starts */
- continue;
- }
-
- if (max_depth > depth(MAX_REV_NFA_PREFIX)) {
- /* probably not a good idea, anyway */
- continue;
- }
-
- cands.push_back(it);
- }
-
- while (!cands.empty()) {
- map<u32, region_info>::const_iterator rv = cands.back();
- cands.pop_back();
-
- NFAVertex v = *rv->second.exits.begin();
-
- set<ue2_literal> lits = getLiteralSet(g, v);
- compressAndScore(lits);
- if (lits.empty()) {
- next_region:
- continue;
- }
- for (const auto &lit : lits) {
- if (lit.length() <= 3 || minStringPeriod(lit) < 2) {
- goto next_region;
- }
- }
-
- if (rv->second.enters.empty()
- || find(rv->second.full.begin(), rv->second.full.end(), g.startDs)
- != rv->second.full.end()) {
- continue;
- }
-
- if (!isMandRegionBetween(info.begin(), rv)
- && info.begin()->second.optional) {
- continue;
- }
-
- /* check to see if it is a reasonable size */
- auto prefix =
- makePrefix(g, regions, rv->second, next(rv)->second, false);
-
- NGHolder g_rev;
- reverseHolder(*prefix, g_rev);
- anchorStarts(g_rev);
-
+ const map<u32, region_info> &info,
+ const vector<DepthMinMax> &depths,
+ const map<u32, region_info>::const_iterator &orig,
+ const CompileContext &cc) {
+ DEBUG_PRINTF("trying for later rev nfa cut\n");
+ assert(orig != info.end());
+
+ vector<map<u32, region_info>::const_iterator> cands;
+
+ map<u32, region_info>::const_iterator it = orig;
+ ++it;
+ for (; it != info.end(); ++it) {
+ /* for simplicity */
+ if (it->second.exits.size() != 1 || it->second.optional) {
+ continue;
+ }
+ NFAVertex v = *it->second.exits.begin();
+
+ if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
+ continue; /* for simplicity would require external som nfa reports
+ * as well. */
+ }
+
+ const depth &max_depth = depths[g[v].index].max;
+ if (max_depth >
+ depth(cc.grey.somMaxRevNfaLength - 1)) { /* virtual starts */
+ continue;
+ }
+
+ if (max_depth > depth(MAX_REV_NFA_PREFIX)) {
+ /* probably not a good idea, anyway */
+ continue;
+ }
+
+ cands.push_back(it);
+ }
+
+ while (!cands.empty()) {
+ map<u32, region_info>::const_iterator rv = cands.back();
+ cands.pop_back();
+
+ NFAVertex v = *rv->second.exits.begin();
+
+ set<ue2_literal> lits = getLiteralSet(g, v);
+ compressAndScore(lits);
+ if (lits.empty()) {
+ next_region:
+ continue;
+ }
+ for (const auto &lit : lits) {
+ if (lit.length() <= 3 || minStringPeriod(lit) < 2) {
+ goto next_region;
+ }
+ }
+
+ if (rv->second.enters.empty()
+ || find(rv->second.full.begin(), rv->second.full.end(), g.startDs)
+ != rv->second.full.end()) {
+ continue;
+ }
+
+ if (!isMandRegionBetween(info.begin(), rv)
+ && info.begin()->second.optional) {
+ continue;
+ }
+
+ /* check to see if it is a reasonable size */
+ auto prefix =
+ makePrefix(g, regions, rv->second, next(rv)->second, false);
+
+ NGHolder g_rev;
+ reverseHolder(*prefix, g_rev);
+ anchorStarts(g_rev);
+
renumber_vertices(g_rev);
- g_rev.kind = NFA_REV_PREFIX;
- reduceGraphEquivalences(g_rev, cc);
- removeRedundancy(g_rev, SOM_NONE);
-
- if (num_vertices(g_rev) > 128) { /* too big */
- continue;
- }
-
- return rv;
- }
-
- return info.end();
-}
-
-static
-unique_ptr<NGHolder> makePrefixForChain(NGHolder &g,
+ g_rev.kind = NFA_REV_PREFIX;
+ reduceGraphEquivalences(g_rev, cc);
+ removeRedundancy(g_rev, SOM_NONE);
+
+ if (num_vertices(g_rev) > 128) { /* too big */
+ continue;
+ }
+
+ return rv;
+ }
+
+ return info.end();
+}
+
+static
+unique_ptr<NGHolder> makePrefixForChain(NGHolder &g,
const unordered_map<NFAVertex, u32> &regions,
- const map<u32, region_info> &info,
- const map<u32, region_info>::const_iterator &picked,
- vector<DepthMinMax> *depths, bool prefix_by_rev,
- ReportManager &rm) {
- DEBUG_PRINTF("making prefix for chain attempt\n");
- auto prefix =
- makePrefix(g, regions, picked->second, next(picked)->second, false);
-
- /* For the root SOM plan, we use a temporary SOM slot to start with so that
- * we don't have to do any complicated rollback operations if the call to
- * doSomPlanning() below fails. The temporary SOM slot is replaced with a
- * real one afterwards. */
- const u32 temp_som_loc = UINT32_MAX;
- setPrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_WRITABLE,
- temp_som_loc, *depths, prefix_by_rev);
-
- /* handle direct edge to accepts from region */
- if (edge(picked->second.exits.front(), g.accept, g).second
- || edge(picked->second.exits.front(), g.acceptEod, g).second) {
- map<u32, region_info>::const_iterator it = picked;
- do {
- makeSomRelReports(rm, g, it->second.exits, *depths);
- } while (it != info.begin() && it->second.optional && (it--)->first);
- }
-
- depths->clear(); /* renumbering invalidates depths */
+ const map<u32, region_info> &info,
+ const map<u32, region_info>::const_iterator &picked,
+ vector<DepthMinMax> *depths, bool prefix_by_rev,
+ ReportManager &rm) {
+ DEBUG_PRINTF("making prefix for chain attempt\n");
+ auto prefix =
+ makePrefix(g, regions, picked->second, next(picked)->second, false);
+
+ /* For the root SOM plan, we use a temporary SOM slot to start with so that
+ * we don't have to do any complicated rollback operations if the call to
+ * doSomPlanning() below fails. The temporary SOM slot is replaced with a
+ * real one afterwards. */
+ const u32 temp_som_loc = UINT32_MAX;
+ setPrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_WRITABLE,
+ temp_som_loc, *depths, prefix_by_rev);
+
+ /* handle direct edge to accepts from region */
+ if (edge(picked->second.exits.front(), g.accept, g).second
+ || edge(picked->second.exits.front(), g.acceptEod, g).second) {
+ map<u32, region_info>::const_iterator it = picked;
+ do {
+ makeSomRelReports(rm, g, it->second.exits, *depths);
+ } while (it != info.begin() && it->second.optional && (it--)->first);
+ }
+
+ depths->clear(); /* renumbering invalidates depths */
renumber_vertices(*prefix);
-
- DEBUG_PRINTF("done\n");
- return prefix;
-}
-
+
+ DEBUG_PRINTF("done\n");
+ return prefix;
+}
+
sombe_rv doSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id,
- som_type som) {
- assert(som);
- DEBUG_PRINTF("som hello\n");
- ReportManager &rm = ng.rm;
- SomSlotManager &ssm = ng.ssm;
- const CompileContext &cc = ng.cc;
-
- // Special case: if g is completely anchored or begins with a dot-star, we
- // know that we have an absolute SOM of zero all the time.
+ som_type som) {
+ assert(som);
+ DEBUG_PRINTF("som hello\n");
+ ReportManager &rm = ng.rm;
+ SomSlotManager &ssm = ng.ssm;
+ const CompileContext &cc = ng.cc;
+
+ // Special case: if g is completely anchored or begins with a dot-star, we
+ // know that we have an absolute SOM of zero all the time.
if (!proper_out_degree(g.startDs, g) || beginsWithDotStar(g)) {
- makeSomAbsReports(rm, g, g.accept);
- makeSomAbsReports(rm, g, g.acceptEod);
- return SOMBE_HANDLED_INTERNAL;
- }
-
- if (!cc.grey.allowSomChain) {
- return SOMBE_FAIL;
- }
-
- // A pristine copy of the input graph, which must be restored to in paths
- // that return false. Also used as the forward graph for som rev nfa
- // construction.
- NGHolder g_pristine;
- cloneHolder(g_pristine, g);
-
- vector<DepthMinMax> depths = getDistancesFromSOM(g);
-
- // try a redundancy pass.
- if (addSomRedundancy(g, depths)) {
- depths = getDistancesFromSOM(g); // recalc
- }
-
- auto regions = assignRegions(g);
-
- dumpHolder(g, regions, 11, "som_explode", cc.grey);
-
- map<u32, region_info> info;
- buildRegionMapping(g, regions, info);
-
- map<u32, region_info>::const_iterator picked
- = pickInitialSomCut(g, regions, info, depths);
- DEBUG_PRINTF("picked %u\n", picked->first);
- if (picked == info.end() || picked->second.exits.empty()) {
- DEBUG_PRINTF("no regions/no progress possible\n");
- clear_graph(g);
- cloneHolder(g, g_pristine);
- if (doSomRevNfa(ng, g, cc)) {
- return SOMBE_HANDLED_INTERNAL;
- } else {
- return SOMBE_FAIL;
- }
- }
-
- if (finalRegion(g, regions, picked->second.exits[0])) {
- makeSomRelReports(rm, g, g.accept, depths);
- makeSomRelReports(rm, g, g.acceptEod, depths);
- return SOMBE_HANDLED_INTERNAL;
- }
-
- if (doSomRevNfa(ng, g_pristine, cc)) {
- clear_graph(g);
- cloneHolder(g, g_pristine);
- return SOMBE_HANDLED_INTERNAL;
- }
-
- bool prefix_by_rev = false;
- map<u32, region_info>::const_iterator picked_old = picked;
- map<u32, region_info>::const_iterator rev_pick
- = tryForLaterRevNfaCut(g, regions, info, depths, picked, cc);
- if (rev_pick != info.end()) {
- DEBUG_PRINTF("found later rev prefix cut point\n");
- assert(rev_pick != picked);
- picked = rev_pick;
- prefix_by_rev = true;
- } else {
- /* sanity checks for picked region, these checks have already been done
- * if we are using a prefix reverse nfa. */
- if (picked->second.enters.empty()
- || find(picked->second.full.begin(), picked->second.full.end(),
- g.startDs) != picked->second.full.end()) {
- clear_graph(g);
- cloneHolder(g, g_pristine);
- return SOMBE_FAIL;
- }
-
- if (!isMandRegionBetween(info.begin(), picked)
- && info.begin()->second.optional) {
- clear_graph(g);
- cloneHolder(g, g_pristine);
- return SOMBE_FAIL;
- }
- }
-
- DEBUG_PRINTF("region %u is the final\n", picked->first);
-
- shared_ptr<NGHolder> prefix = makePrefixForChain(
- g, regions, info, picked, &depths, prefix_by_rev, rm);
- /* note depths cleared as we have renumbered */
-
- CharReach escapes;
- bool stuck = isPossibleLock(g, picked, info, &escapes);
- if (stuck) {
- DEBUG_PRINTF("investigating potential lock\n");
-
- NGHolder gg;
- fillHolderForLockCheck(&gg, g, info, picked);
-
- stuck = firstMatchIsFirst(gg);
- }
-
- if (stuck && escapes.none()) {
- /* leads directly to .* --> woot */
- DEBUG_PRINTF("initial slot is full lock\n");
- u32 som_loc = ssm.getSomSlot(*prefix, escapes, false,
- SomSlotManager::NO_PARENT);
- replaceTempSomSlot(rm, *prefix, som_loc);
-
- /* update all reports on g to report the som_loc's som */
- updateReportToUseRecordedSom(rm, g, som_loc);
-
- /* create prefix to set the som_loc */
- updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_UNSET);
- if (prefix_by_rev) {
+ makeSomAbsReports(rm, g, g.accept);
+ makeSomAbsReports(rm, g, g.acceptEod);
+ return SOMBE_HANDLED_INTERNAL;
+ }
+
+ if (!cc.grey.allowSomChain) {
+ return SOMBE_FAIL;
+ }
+
+ // A pristine copy of the input graph, which must be restored to in paths
+ // that return false. Also used as the forward graph for som rev nfa
+ // construction.
+ NGHolder g_pristine;
+ cloneHolder(g_pristine, g);
+
+ vector<DepthMinMax> depths = getDistancesFromSOM(g);
+
+ // try a redundancy pass.
+ if (addSomRedundancy(g, depths)) {
+ depths = getDistancesFromSOM(g); // recalc
+ }
+
+ auto regions = assignRegions(g);
+
+ dumpHolder(g, regions, 11, "som_explode", cc.grey);
+
+ map<u32, region_info> info;
+ buildRegionMapping(g, regions, info);
+
+ map<u32, region_info>::const_iterator picked
+ = pickInitialSomCut(g, regions, info, depths);
+ DEBUG_PRINTF("picked %u\n", picked->first);
+ if (picked == info.end() || picked->second.exits.empty()) {
+ DEBUG_PRINTF("no regions/no progress possible\n");
+ clear_graph(g);
+ cloneHolder(g, g_pristine);
+ if (doSomRevNfa(ng, g, cc)) {
+ return SOMBE_HANDLED_INTERNAL;
+ } else {
+ return SOMBE_FAIL;
+ }
+ }
+
+ if (finalRegion(g, regions, picked->second.exits[0])) {
+ makeSomRelReports(rm, g, g.accept, depths);
+ makeSomRelReports(rm, g, g.acceptEod, depths);
+ return SOMBE_HANDLED_INTERNAL;
+ }
+
+ if (doSomRevNfa(ng, g_pristine, cc)) {
+ clear_graph(g);
+ cloneHolder(g, g_pristine);
+ return SOMBE_HANDLED_INTERNAL;
+ }
+
+ bool prefix_by_rev = false;
+ map<u32, region_info>::const_iterator picked_old = picked;
+ map<u32, region_info>::const_iterator rev_pick
+ = tryForLaterRevNfaCut(g, regions, info, depths, picked, cc);
+ if (rev_pick != info.end()) {
+ DEBUG_PRINTF("found later rev prefix cut point\n");
+ assert(rev_pick != picked);
+ picked = rev_pick;
+ prefix_by_rev = true;
+ } else {
+ /* sanity checks for picked region, these checks have already been done
+ * if we are using a prefix reverse nfa. */
+ if (picked->second.enters.empty()
+ || find(picked->second.full.begin(), picked->second.full.end(),
+ g.startDs) != picked->second.full.end()) {
+ clear_graph(g);
+ cloneHolder(g, g_pristine);
+ return SOMBE_FAIL;
+ }
+
+ if (!isMandRegionBetween(info.begin(), picked)
+ && info.begin()->second.optional) {
+ clear_graph(g);
+ cloneHolder(g, g_pristine);
+ return SOMBE_FAIL;
+ }
+ }
+
+ DEBUG_PRINTF("region %u is the final\n", picked->first);
+
+ shared_ptr<NGHolder> prefix = makePrefixForChain(
+ g, regions, info, picked, &depths, prefix_by_rev, rm);
+ /* note depths cleared as we have renumbered */
+
+ CharReach escapes;
+ bool stuck = isPossibleLock(g, picked, info, &escapes);
+ if (stuck) {
+ DEBUG_PRINTF("investigating potential lock\n");
+
+ NGHolder gg;
+ fillHolderForLockCheck(&gg, g, info, picked);
+
+ stuck = firstMatchIsFirst(gg);
+ }
+
+ if (stuck && escapes.none()) {
+ /* leads directly to .* --> woot */
+ DEBUG_PRINTF("initial slot is full lock\n");
+ u32 som_loc = ssm.getSomSlot(*prefix, escapes, false,
+ SomSlotManager::NO_PARENT);
+ replaceTempSomSlot(rm, *prefix, som_loc);
+
+ /* update all reports on g to report the som_loc's som */
+ updateReportToUseRecordedSom(rm, g, som_loc);
+
+ /* create prefix to set the som_loc */
+ updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_UNSET);
+ if (prefix_by_rev) {
u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc);
- updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
- }
+ updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
+ }
renumber_vertices(*prefix);
- if (!ng.addHolder(*prefix)) {
- DEBUG_PRINTF("failed to add holder\n");
- clear_graph(g);
- cloneHolder(g, g_pristine);
- return SOMBE_FAIL;
- }
-
- DEBUG_PRINTF("ok found initial lock\n");
- return SOMBE_HANDLED_INTERNAL;
- }
-
- vector<som_plan> plan;
- retry:
- // Note: no-one should ever pay attention to the root plan's parent.
- plan.push_back(som_plan(prefix, escapes, false, 0));
- dumpHolder(*plan.back().prefix, 12, "som_prefix", cc.grey);
- if (!prefix_by_rev) {
- if (!doSomPlanning(g, stuck, regions, info, picked, plan, cc.grey)) {
- DEBUG_PRINTF("failed\n");
- clear_graph(g);
- cloneHolder(g, g_pristine);
- return SOMBE_FAIL;
- }
- } else {
- DEBUG_PRINTF("trying for som plan\n");
- if (!doSomPlanning(g, stuck, regions, info, picked, plan, cc.grey,
- DISALLOW_MODIFY_HOLDER)) {
- /* Note: the larger prefixes generated by reverse nfas may not
- * advance as fair as the original prefix - so we should retry
- * with a smaller prefix. */
-
- prefix_by_rev = false;
- stuck = false; /* if we reached a lock, then prefix_by_rev would not
- * have advanced. */
- picked = picked_old;
- plan.clear();
- depths = getDistancesFromSOM(g); /* due to renumbering, need to
- * regenerate */
- prefix = makePrefixForChain(g, regions, info, picked, &depths,
- prefix_by_rev, rm);
- escapes.clear();
- DEBUG_PRINTF("retrying\n");
- goto retry;
- }
- }
- DEBUG_PRINTF("som planning ok\n");
-
- /* if the initial prefix is weak is if sombe approaches are better */
- if (findMinWidth(*prefix) <= depth(2)) {
- DEBUG_PRINTF("weak prefix... seeing if sombe can help out\n");
- NGHolder g2;
- cloneHolder(g2, g_pristine);
- if (trySombe(ng, g2, som)) {
- return SOMBE_HANDLED_ALL;
- }
- }
-
- /* From this point we know that we are going to succeed or die horribly with
- * a pattern too large. Anything done past this point can be considered
- * committed to the compile. */
-
- regions = assignRegions(g); // Update as g may have changed.
-
- DEBUG_PRINTF("-- get slot for initial plan\n");
- u32 som_loc;
- if (plan[0].is_reset) {
- som_loc = ssm.getInitialResetSomSlot(*prefix, g, regions,
- picked->first, &plan[0].no_implement);
- } else {
- som_loc = ssm.getSomSlot(*prefix, escapes, false,
- SomSlotManager::NO_PARENT);
- }
-
- replaceTempSomSlot(rm, *prefix, som_loc);
-
- if (plan.front().is_reset) {
- updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET);
- }
- if (prefix_by_rev && !plan.front().no_implement) {
+ if (!ng.addHolder(*prefix)) {
+ DEBUG_PRINTF("failed to add holder\n");
+ clear_graph(g);
+ cloneHolder(g, g_pristine);
+ return SOMBE_FAIL;
+ }
+
+ DEBUG_PRINTF("ok found initial lock\n");
+ return SOMBE_HANDLED_INTERNAL;
+ }
+
+ vector<som_plan> plan;
+ retry:
+ // Note: no-one should ever pay attention to the root plan's parent.
+ plan.push_back(som_plan(prefix, escapes, false, 0));
+ dumpHolder(*plan.back().prefix, 12, "som_prefix", cc.grey);
+ if (!prefix_by_rev) {
+ if (!doSomPlanning(g, stuck, regions, info, picked, plan, cc.grey)) {
+ DEBUG_PRINTF("failed\n");
+ clear_graph(g);
+ cloneHolder(g, g_pristine);
+ return SOMBE_FAIL;
+ }
+ } else {
+ DEBUG_PRINTF("trying for som plan\n");
+ if (!doSomPlanning(g, stuck, regions, info, picked, plan, cc.grey,
+ DISALLOW_MODIFY_HOLDER)) {
+ /* Note: the larger prefixes generated by reverse nfas may not
+ * advance as fair as the original prefix - so we should retry
+ * with a smaller prefix. */
+
+ prefix_by_rev = false;
+ stuck = false; /* if we reached a lock, then prefix_by_rev would not
+ * have advanced. */
+ picked = picked_old;
+ plan.clear();
+ depths = getDistancesFromSOM(g); /* due to renumbering, need to
+ * regenerate */
+ prefix = makePrefixForChain(g, regions, info, picked, &depths,
+ prefix_by_rev, rm);
+ escapes.clear();
+ DEBUG_PRINTF("retrying\n");
+ goto retry;
+ }
+ }
+ DEBUG_PRINTF("som planning ok\n");
+
+ /* if the initial prefix is weak is if sombe approaches are better */
+ if (findMinWidth(*prefix) <= depth(2)) {
+ DEBUG_PRINTF("weak prefix... seeing if sombe can help out\n");
+ NGHolder g2;
+ cloneHolder(g2, g_pristine);
+ if (trySombe(ng, g2, som)) {
+ return SOMBE_HANDLED_ALL;
+ }
+ }
+
+ /* From this point we know that we are going to succeed or die horribly with
+ * a pattern too large. Anything done past this point can be considered
+ * committed to the compile. */
+
+ regions = assignRegions(g); // Update as g may have changed.
+
+ DEBUG_PRINTF("-- get slot for initial plan\n");
+ u32 som_loc;
+ if (plan[0].is_reset) {
+ som_loc = ssm.getInitialResetSomSlot(*prefix, g, regions,
+ picked->first, &plan[0].no_implement);
+ } else {
+ som_loc = ssm.getSomSlot(*prefix, escapes, false,
+ SomSlotManager::NO_PARENT);
+ }
+
+ replaceTempSomSlot(rm, *prefix, som_loc);
+
+ if (plan.front().is_reset) {
+ updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET);
+ }
+ if (prefix_by_rev && !plan.front().no_implement) {
u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc);
- updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
- }
-
+ updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
+ }
+
implementSomPlan(ng, expr, comp_id, g, plan, som_loc);
-
- DEBUG_PRINTF("success\n");
- return SOMBE_HANDLED_INTERNAL;
-}
-
+
+ DEBUG_PRINTF("success\n");
+ return SOMBE_HANDLED_INTERNAL;
+}
+
sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr,
u32 comp_id, som_type som) {
- assert(som);
-
- DEBUG_PRINTF("som+haig hello\n");
-
- // A pristine copy of the input graph, which must be restored to in paths
- // that return false. Also used as the forward graph for som rev nfa
- // construction.
- NGHolder g_pristine;
- cloneHolder(g_pristine, g);
-
- if (trySombe(ng, g, som)) {
- return SOMBE_HANDLED_ALL;
- }
-
- if (!ng.cc.grey.allowHaigLit || !ng.cc.grey.allowSomChain) {
- return SOMBE_FAIL;
- }
-
- // know that we have an absolute SOM of zero all the time.
- assert(edge(g.startDs, g.startDs, g).second);
-
- vector<DepthMinMax> depths = getDistancesFromSOM(g);
-
- // try a redundancy pass.
- if (addSomRedundancy(g, depths)) {
- depths = getDistancesFromSOM(g);
- }
-
- auto regions = assignRegions(g);
-
- dumpHolder(g, regions, 21, "som_explode", ng.cc.grey);
-
- map<u32, region_info> info;
- buildRegionMapping(g, regions, info, true);
-
- sombe_rv rv =
+ assert(som);
+
+ DEBUG_PRINTF("som+haig hello\n");
+
+ // A pristine copy of the input graph, which must be restored to in paths
+ // that return false. Also used as the forward graph for som rev nfa
+ // construction.
+ NGHolder g_pristine;
+ cloneHolder(g_pristine, g);
+
+ if (trySombe(ng, g, som)) {
+ return SOMBE_HANDLED_ALL;
+ }
+
+ if (!ng.cc.grey.allowHaigLit || !ng.cc.grey.allowSomChain) {
+ return SOMBE_FAIL;
+ }
+
+ // know that we have an absolute SOM of zero all the time.
+ assert(edge(g.startDs, g.startDs, g).second);
+
+ vector<DepthMinMax> depths = getDistancesFromSOM(g);
+
+ // try a redundancy pass.
+ if (addSomRedundancy(g, depths)) {
+ depths = getDistancesFromSOM(g);
+ }
+
+ auto regions = assignRegions(g);
+
+ dumpHolder(g, regions, 21, "som_explode", ng.cc.grey);
+
+ map<u32, region_info> info;
+ buildRegionMapping(g, regions, info, true);
+
+ sombe_rv rv =
doHaigLitSom(ng, g, expr, comp_id, som, regions, info, info.begin());
- if (rv == SOMBE_FAIL) {
- clear_graph(g);
- cloneHolder(g, g_pristine);
- }
- return rv;
-}
-
-} // namespace ue2
+ if (rv == SOMBE_FAIL) {
+ clear_graph(g);
+ cloneHolder(g, g_pristine);
+ }
+ return rv;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som.h b/contrib/libs/hyperscan/src/nfagraph/ng_som.h
index b39c239ba2..ecae4c67fb 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_som.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_som.h
@@ -1,81 +1,81 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief SOM ("Start of Match") analysis.
- */
-
-#ifndef NG_SOM_H
-#define NG_SOM_H
-
-#include "som/som.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief SOM ("Start of Match") analysis.
+ */
+
+#ifndef NG_SOM_H
+#define NG_SOM_H
+
+#include "som/som.h"
#include "ue2common.h"
-
-namespace ue2 {
-
+
+namespace ue2 {
+
class ExpressionInfo;
-class NG;
-class NGHolder;
+class NG;
+class NGHolder;
class ReportManager;
-struct Grey;
-
-enum sombe_rv {
- SOMBE_FAIL,
- SOMBE_HANDLED_INTERNAL,
- SOMBE_HANDLED_ALL
-};
-
-/** \brief Perform SOM analysis on the given graph.
- *
- * This function will replace report IDs and mutate the graph, then return
- * SOMBE_HANDLED_INTERNAL if SOM can be established and the full graph still
- * needs to be handled (rose, etc).
- *
- * Returns SOMBE_HANDLED_ALL if everything has been done and the pattern has
- * been handled in all its glory.
- *
- * Returns SOMBE_FAIL and does not mutate the graph if SOM cannot be
- * established.
- *
- * May throw a "Pattern too large" exception if prefixes of the
- * pattern are too large to compile.
- */
+struct Grey;
+
+enum sombe_rv {
+ SOMBE_FAIL,
+ SOMBE_HANDLED_INTERNAL,
+ SOMBE_HANDLED_ALL
+};
+
+/** \brief Perform SOM analysis on the given graph.
+ *
+ * This function will replace report IDs and mutate the graph, then return
+ * SOMBE_HANDLED_INTERNAL if SOM can be established and the full graph still
+ * needs to be handled (rose, etc).
+ *
+ * Returns SOMBE_HANDLED_ALL if everything has been done and the pattern has
+ * been handled in all its glory.
+ *
+ * Returns SOMBE_FAIL and does not mutate the graph if SOM cannot be
+ * established.
+ *
+ * May throw a "Pattern too large" exception if prefixes of the
+ * pattern are too large to compile.
+ */
sombe_rv doSom(NG &ng, NGHolder &h, const ExpressionInfo &expr, u32 comp_id,
- som_type som);
-
-/** Returns SOMBE_FAIL (and the original graph) if SOM cannot be established.
- * May also throw pattern too large if prefixes of the pattern are too large to
- * compile. */
+ som_type som);
+
+/** Returns SOMBE_FAIL (and the original graph) if SOM cannot be established.
+ * May also throw pattern too large if prefixes of the pattern are too large to
+ * compile. */
sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const ExpressionInfo &expr,
u32 comp_id, som_type som);
-
+
void makeReportsSomPass(ReportManager &rm, NGHolder &g);
-} // namespace ue2
-
-#endif // NG_SOM_H
+} // namespace ue2
+
+#endif // NG_SOM_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.cpp
index 776d54f4f1..33544ec173 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.cpp
@@ -1,198 +1,198 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Add redundancy to graph to assist in SOM analysis.
- *
- * Currently patterns of the form:
- *
- * /(GET|POST).*foo/
- *
- * baffle our SOM analysis as the T's get merged into one by our graph
- * reductions and they lose the fixed depth property. One way to solve this is
- * to tell the T vertex to go fork itself before we do the main SOM pass.
- *
- * Overall plan:
- *
- * 1. build a topo ordering
- * 2. walk vertices in topo order
- * 3. fix up vertices where possible
- * 4. go home
- *
- * Vertex fix up plan:
- *
- * 1. consider depth of vertex
- * - if vertex is at fixed depth continue to next vertex
- * - if vertex can be at an unbounded depth continue to next vertex
- * - if vertex has a pred which is not a fixed depth continue to next vertex
- * 2. group preds by their depth
- * 3. for each group:
- * - create a clone of the vertex (vertex props and out edges)
- * - create edges from each vertex in the group to the clone
- * - work out the depth for the clone
- * 4. blow away original vertex
- *
- * Originally in UE-1862.
- */
-#include "ng_som_add_redundancy.h"
-
-#include "ng_dump.h"
-#include "ng_holder.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/container.h"
-#include "util/depth.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-
-using namespace std;
-
-namespace ue2 {
-
-/** \brief Hard limit on the maximum number of new vertices to create. */
-static const size_t MAX_NEW_VERTICES = 32;
-
-static
-const DepthMinMax &getDepth(NFAVertex v, const NGHolder &g,
- const vector<DepthMinMax> &depths) {
- return depths.at(g[v].index);
-}
-
-static
-bool hasFloatingPred(NFAVertex v, const NGHolder &g,
- const vector<DepthMinMax> &depths) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- const DepthMinMax &d = getDepth(u, g, depths);
- if (d.min != d.max) {
- return true;
- }
- }
- return false;
-}
-
-static
-bool forkVertex(NFAVertex v, NGHolder &g, vector<DepthMinMax> &depths,
- set<NFAVertex> &dead, size_t *numNewVertices) {
- map<depth, vector<NFAEdge>> predGroups;
- for (const auto &e : in_edges_range(v, g)) {
- const DepthMinMax &d = getDepth(source(e, g), g, depths);
- assert(d.min == d.max);
- predGroups[d.min].push_back(e);
- }
-
- DEBUG_PRINTF("forking vertex with %zu pred groups\n", predGroups.size());
-
- if (*numNewVertices + predGroups.size() > MAX_NEW_VERTICES) {
- return false;
- }
- *numNewVertices += predGroups.size();
-
- for (auto &group : predGroups) {
- const depth &predDepth = group.first;
- const vector<NFAEdge> &preds = group.second;
-
- // Clone v for this depth with all its associated out-edges.
- u32 clone_idx = depths.size(); // next index to be used
- NFAVertex clone = add_vertex(g[v], g);
- depth clone_depth = predDepth + 1;
- g[clone].index = clone_idx;
- depths.push_back(DepthMinMax(clone_depth, clone_depth));
- DEBUG_PRINTF("cloned vertex %u with depth %s\n", clone_idx,
- clone_depth.str().c_str());
-
- // Add copies of the out-edges from v.
- for (const auto &e : out_edges_range(v, g)) {
- add_edge(clone, target(e, g), g[e], g);
- }
-
- // Add in-edges from preds in this group.
- for (const auto &e : preds) {
- add_edge(source(e, g), clone, g[e], g);
- }
- }
-
- clear_vertex(v, g);
- dead.insert(v);
- return true;
-}
-
-bool addSomRedundancy(NGHolder &g, vector<DepthMinMax> &depths) {
- DEBUG_PRINTF("entry\n");
-
- const vector<NFAVertex> ordering = getTopoOrdering(g);
-
- set<NFAVertex> dead;
- size_t numNewVertices = 0;
-
- for (auto it = ordering.rbegin(), ite = ordering.rend(); it != ite; ++it) {
- NFAVertex v = *it;
-
- if (is_special(v, g)) {
- continue;
- }
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Add redundancy to graph to assist in SOM analysis.
+ *
+ * Currently patterns of the form:
+ *
+ * /(GET|POST).*foo/
+ *
+ * baffle our SOM analysis as the T's get merged into one by our graph
+ * reductions and they lose the fixed depth property. One way to solve this is
+ * to tell the T vertex to go fork itself before we do the main SOM pass.
+ *
+ * Overall plan:
+ *
+ * 1. build a topo ordering
+ * 2. walk vertices in topo order
+ * 3. fix up vertices where possible
+ * 4. go home
+ *
+ * Vertex fix up plan:
+ *
+ * 1. consider depth of vertex
+ * - if vertex is at fixed depth continue to next vertex
+ * - if vertex can be at an unbounded depth continue to next vertex
+ * - if vertex has a pred which is not a fixed depth continue to next vertex
+ * 2. group preds by their depth
+ * 3. for each group:
+ * - create a clone of the vertex (vertex props and out edges)
+ * - create edges from each vertex in the group to the clone
+ * - work out the depth for the clone
+ * 4. blow away original vertex
+ *
+ * Originally in UE-1862.
+ */
+#include "ng_som_add_redundancy.h"
+
+#include "ng_dump.h"
+#include "ng_holder.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/container.h"
+#include "util/depth.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+
+using namespace std;
+
+namespace ue2 {
+
+/** \brief Hard limit on the maximum number of new vertices to create. */
+static const size_t MAX_NEW_VERTICES = 32;
+
+static
+const DepthMinMax &getDepth(NFAVertex v, const NGHolder &g,
+ const vector<DepthMinMax> &depths) {
+ return depths.at(g[v].index);
+}
+
+static
+bool hasFloatingPred(NFAVertex v, const NGHolder &g,
+ const vector<DepthMinMax> &depths) {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ const DepthMinMax &d = getDepth(u, g, depths);
+ if (d.min != d.max) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static
+bool forkVertex(NFAVertex v, NGHolder &g, vector<DepthMinMax> &depths,
+ set<NFAVertex> &dead, size_t *numNewVertices) {
+ map<depth, vector<NFAEdge>> predGroups;
+ for (const auto &e : in_edges_range(v, g)) {
+ const DepthMinMax &d = getDepth(source(e, g), g, depths);
+ assert(d.min == d.max);
+ predGroups[d.min].push_back(e);
+ }
+
+ DEBUG_PRINTF("forking vertex with %zu pred groups\n", predGroups.size());
+
+ if (*numNewVertices + predGroups.size() > MAX_NEW_VERTICES) {
+ return false;
+ }
+ *numNewVertices += predGroups.size();
+
+ for (auto &group : predGroups) {
+ const depth &predDepth = group.first;
+ const vector<NFAEdge> &preds = group.second;
+
+ // Clone v for this depth with all its associated out-edges.
+ u32 clone_idx = depths.size(); // next index to be used
+ NFAVertex clone = add_vertex(g[v], g);
+ depth clone_depth = predDepth + 1;
+ g[clone].index = clone_idx;
+ depths.push_back(DepthMinMax(clone_depth, clone_depth));
+ DEBUG_PRINTF("cloned vertex %u with depth %s\n", clone_idx,
+ clone_depth.str().c_str());
+
+ // Add copies of the out-edges from v.
+ for (const auto &e : out_edges_range(v, g)) {
+ add_edge(clone, target(e, g), g[e], g);
+ }
+
+ // Add in-edges from preds in this group.
+ for (const auto &e : preds) {
+ add_edge(source(e, g), clone, g[e], g);
+ }
+ }
+
+ clear_vertex(v, g);
+ dead.insert(v);
+ return true;
+}
+
+bool addSomRedundancy(NGHolder &g, vector<DepthMinMax> &depths) {
+ DEBUG_PRINTF("entry\n");
+
+ const vector<NFAVertex> ordering = getTopoOrdering(g);
+
+ set<NFAVertex> dead;
+ size_t numNewVertices = 0;
+
+ for (auto it = ordering.rbegin(), ite = ordering.rend(); it != ite; ++it) {
+ NFAVertex v = *it;
+
+ if (is_special(v, g)) {
+ continue;
+ }
if (!in_degree(v, g)) {
- continue; // unreachable, probably killed
- }
-
- const DepthMinMax &d = getDepth(v, g, depths);
-
+ continue; // unreachable, probably killed
+ }
+
+ const DepthMinMax &d = getDepth(v, g, depths);
+
DEBUG_PRINTF("vertex %zu has depths %s\n", g[v].index,
- d.str().c_str());
-
- if (d.min == d.max) {
- DEBUG_PRINTF("fixed depth\n");
- continue;
- }
-
- if (d.max.is_unreachable()) {
- DEBUG_PRINTF("unbounded depth\n");
- continue;
- }
-
- if (hasFloatingPred(v, g, depths)) {
- DEBUG_PRINTF("has floating pred\n");
- continue;
- }
-
- if (!forkVertex(v, g, depths, dead, &numNewVertices)) {
- DEBUG_PRINTF("new vertex limit reached\n");
- break;
- }
- }
-
- assert(numNewVertices <= MAX_NEW_VERTICES);
-
- if (dead.empty()) {
- return false; // no changes made to the graph
- }
-
- remove_vertices(dead, g);
- return true;
-}
-
-} // namespace ue2
+ d.str().c_str());
+
+ if (d.min == d.max) {
+ DEBUG_PRINTF("fixed depth\n");
+ continue;
+ }
+
+ if (d.max.is_unreachable()) {
+ DEBUG_PRINTF("unbounded depth\n");
+ continue;
+ }
+
+ if (hasFloatingPred(v, g, depths)) {
+ DEBUG_PRINTF("has floating pred\n");
+ continue;
+ }
+
+ if (!forkVertex(v, g, depths, dead, &numNewVertices)) {
+ DEBUG_PRINTF("new vertex limit reached\n");
+ break;
+ }
+ }
+
+ assert(numNewVertices <= MAX_NEW_VERTICES);
+
+ if (dead.empty()) {
+ return false; // no changes made to the graph
+ }
+
+ remove_vertices(dead, g);
+ return true;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.h b/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.h
index bec63ccd18..890dc9c942 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_som_add_redundancy.h
@@ -1,47 +1,47 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Add redundancy to graph to assist in SOM analysis.
- */
-
-#ifndef NG_SOM_ADD_REDUNDANCY_H
-#define NG_SOM_ADD_REDUNDANCY_H
-
-#include "util/depth.h"
-#include <vector>
-
-namespace ue2 {
-
-class NGHolder;
-
-bool addSomRedundancy(NGHolder &g, std::vector<DepthMinMax> &depths);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Add redundancy to graph to assist in SOM analysis.
+ */
+
+#ifndef NG_SOM_ADD_REDUNDANCY_H
+#define NG_SOM_ADD_REDUNDANCY_H
+
+#include "util/depth.h"
+#include <vector>
+
+namespace ue2 {
+
+class NGHolder;
+
+bool addSomRedundancy(NGHolder &g, std::vector<DepthMinMax> &depths);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som_util.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_som_util.cpp
index 3d49bd15db..1e7a41bb0c 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_som_util.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_som_util.cpp
@@ -1,357 +1,357 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Utility functions related to SOM ("Start of Match").
- */
-#include "ng_som_util.h"
-
-#include "ng_depth.h"
-#include "ng_execute.h"
-#include "ng_holder.h"
-#include "ng_prune.h"
-#include "ng_util.h"
-#include "util/container.h"
-#include "util/graph_range.h"
-
-using namespace std;
-
-namespace ue2 {
-
-static
-void wireSuccessorsToStart(NGHolder &g, NFAVertex u) {
- for (auto v : adjacent_vertices_range(u, g)) {
- add_edge_if_not_present(g.start, v, g);
- }
-}
-
-vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g_orig) {
- // We operate on a temporary copy of the original graph here, so we don't
- // have to mutate the original.
- NGHolder g;
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Utility functions related to SOM ("Start of Match").
+ */
+#include "ng_som_util.h"
+
+#include "ng_depth.h"
+#include "ng_execute.h"
+#include "ng_holder.h"
+#include "ng_prune.h"
+#include "ng_util.h"
+#include "util/container.h"
+#include "util/graph_range.h"
+
+using namespace std;
+
+namespace ue2 {
+
+static
+void wireSuccessorsToStart(NGHolder &g, NFAVertex u) {
+ for (auto v : adjacent_vertices_range(u, g)) {
+ add_edge_if_not_present(g.start, v, g);
+ }
+}
+
+vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g_orig) {
+ // We operate on a temporary copy of the original graph here, so we don't
+ // have to mutate the original.
+ NGHolder g;
unordered_map<NFAVertex, NFAVertex> vmap; // vertex in g_orig to vertex in g
- cloneHolder(g, g_orig, &vmap);
-
- vector<NFAVertex> vstarts;
- for (auto v : vertices_range(g)) {
- if (is_virtual_start(v, g)) {
- vstarts.push_back(v);
- }
- }
- vstarts.push_back(g.startDs);
-
- // wire the successors of every virtual start or startDs to g.start.
- for (auto v : vstarts) {
- wireSuccessorsToStart(g, v);
- }
-
- // drop the in-edges of every virtual start so that they don't participate
- // in the depth calculation.
- for (auto v : vstarts) {
- clear_in_edges(v, g);
- }
-
+ cloneHolder(g, g_orig, &vmap);
+
+ vector<NFAVertex> vstarts;
+ for (auto v : vertices_range(g)) {
+ if (is_virtual_start(v, g)) {
+ vstarts.push_back(v);
+ }
+ }
+ vstarts.push_back(g.startDs);
+
+ // wire the successors of every virtual start or startDs to g.start.
+ for (auto v : vstarts) {
+ wireSuccessorsToStart(g, v);
+ }
+
+ // drop the in-edges of every virtual start so that they don't participate
+ // in the depth calculation.
+ for (auto v : vstarts) {
+ clear_in_edges(v, g);
+ }
+
//dumpGraph("som_depth.dot", g);
-
+
// Find depths, indexed by vertex index in g
auto temp_depths = calcDepthsFrom(g, g.start);
-
- // Transfer depths, indexed by vertex index in g_orig.
- vector<DepthMinMax> depths(num_vertices(g_orig));
-
- for (auto v_orig : vertices_range(g_orig)) {
- assert(contains(vmap, v_orig));
- NFAVertex v_new = vmap[v_orig];
-
- u32 orig_idx = g_orig[v_orig].index;
-
- DepthMinMax &d = depths.at(orig_idx);
-
- if (v_orig == g_orig.startDs || is_virtual_start(v_orig, g_orig)) {
- // StartDs and virtual starts always have zero depth.
+
+ // Transfer depths, indexed by vertex index in g_orig.
+ vector<DepthMinMax> depths(num_vertices(g_orig));
+
+ for (auto v_orig : vertices_range(g_orig)) {
+ assert(contains(vmap, v_orig));
+ NFAVertex v_new = vmap[v_orig];
+
+ u32 orig_idx = g_orig[v_orig].index;
+
+ DepthMinMax &d = depths.at(orig_idx);
+
+ if (v_orig == g_orig.startDs || is_virtual_start(v_orig, g_orig)) {
+ // StartDs and virtual starts always have zero depth.
d = DepthMinMax(depth(0), depth(0));
- } else {
- u32 new_idx = g[v_new].index;
- d = temp_depths.at(new_idx);
- }
- }
-
- return depths;
-}
-
-bool firstMatchIsFirst(const NGHolder &p) {
- /* If the first match (by end offset) is not the first match (by start
- * offset) then we can't create a lock after it.
- *
- * Consider: 4009:/(foobar|ob).*bugger/s
- *
- * We don't care about races on the last byte as they can be resolved easily
- * at runtime /(foobar|obar).*hi/
- *
- * It should be obvious we don't care about one match being a prefix
- * of another as they share the same start offset.
- *
- * Therefore, the case were we cannot establish that the som does not
- * regress is when there exists s1 and s2 in the language of p and s2 is a
- * proper infix of s1.
- *
- * It is tempting to add the further restriction that there does not exist a
- * prefix of s1 that is in the language of p (as in which case we would
- * presume, the lock has already been set). However, we have no way of
- * knowing if the lock can be cleared by some characters, and if so, if it
- * is still set. TODO: if we knew the lock's escapes where we could verify
- * that the rest of s1 does not clear the lock. (1)
- */
-
- DEBUG_PRINTF("entry\n");
-
- /* If there are any big cycles throw up our hands in despair */
- if (hasBigCycles(p)) {
- DEBUG_PRINTF("fail, big cycles\n");
- return false;
- }
-
+ } else {
+ u32 new_idx = g[v_new].index;
+ d = temp_depths.at(new_idx);
+ }
+ }
+
+ return depths;
+}
+
+bool firstMatchIsFirst(const NGHolder &p) {
+ /* If the first match (by end offset) is not the first match (by start
+ * offset) then we can't create a lock after it.
+ *
+ * Consider: 4009:/(foobar|ob).*bugger/s
+ *
+ * We don't care about races on the last byte as they can be resolved easily
+ * at runtime /(foobar|obar).*hi/
+ *
+ * It should be obvious we don't care about one match being a prefix
+ * of another as they share the same start offset.
+ *
+ * Therefore, the case were we cannot establish that the som does not
+ * regress is when there exists s1 and s2 in the language of p and s2 is a
+ * proper infix of s1.
+ *
+ * It is tempting to add the further restriction that there does not exist a
+ * prefix of s1 that is in the language of p (as in which case we would
+ * presume, the lock has already been set). However, we have no way of
+ * knowing if the lock can be cleared by some characters, and if so, if it
+ * is still set. TODO: if we knew the lock's escapes where we could verify
+ * that the rest of s1 does not clear the lock. (1)
+ */
+
+ DEBUG_PRINTF("entry\n");
+
+ /* If there are any big cycles throw up our hands in despair */
+ if (hasBigCycles(p)) {
+ DEBUG_PRINTF("fail, big cycles\n");
+ return false;
+ }
+
flat_set<NFAVertex> states;
- /* turn on all states (except starts - avoid suffix matches) */
- /* If we were doing (1) we would also except states leading to accepts -
- avoid prefix matches */
- for (auto v : vertices_range(p)) {
- assert(!is_virtual_start(v, p));
- if (!is_special(v, p)) {
+ /* turn on all states (except starts - avoid suffix matches) */
+ /* If we were doing (1) we would also except states leading to accepts -
+ avoid prefix matches */
+ for (auto v : vertices_range(p)) {
+ assert(!is_virtual_start(v, p));
+ if (!is_special(v, p)) {
DEBUG_PRINTF("turning on %zu\n", p[v].index);
- states.insert(v);
- }
- }
-
- /* run the prefix the main graph */
- states = execute_graph(p, p, states);
-
- for (auto v : states) {
- /* need to check if this vertex may represent an infix match - ie
- * it does not have an edge to accept. */
+ states.insert(v);
+ }
+ }
+
+ /* run the prefix the main graph */
+ states = execute_graph(p, p, states);
+
+ for (auto v : states) {
+ /* need to check if this vertex may represent an infix match - ie
+ * it does not have an edge to accept. */
DEBUG_PRINTF("check %zu\n", p[v].index);
- if (!edge(v, p.accept, p).second) {
+ if (!edge(v, p.accept, p).second) {
DEBUG_PRINTF("fail %zu\n", p[v].index);
- return false;
- }
- }
-
- DEBUG_PRINTF("done first is first check\n");
- return true;
-}
-
-bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
+ return false;
+ }
+ }
+
+ DEBUG_PRINTF("done first is first check\n");
+ return true;
+}
+
+bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
const unordered_map<NFAVertex, u32> &region_map,
- smgb_cache &cache) {
- /* Need to ensure all matches of the graph g up to u contain no infixes
- * which are also matches of the graph to u.
- *
- * This is basically the same as firstMatchIsFirst except we g is not
- * always a dag. As we haven't gotten around to writing an execute_graph
- * that operates on general graphs, we take some (hopefully) conservative
- * short cuts.
- *
- * Note: if the u can be jumped we will take jump edges
- * into account as a possibility of som going backwards
- *
- * TODO: write a generalised ng_execute_graph/make this less hacky
- */
- assert(&g == &cache.g);
- if (contains(cache.smgb, u)) {
- return cache.smgb[u];
- }
-
+ smgb_cache &cache) {
+ /* Need to ensure all matches of the graph g up to u contain no infixes
+ * which are also matches of the graph to u.
+ *
+ * This is basically the same as firstMatchIsFirst except we g is not
+ * always a dag. As we haven't gotten around to writing an execute_graph
+ * that operates on general graphs, we take some (hopefully) conservative
+ * short cuts.
+ *
+ * Note: if the u can be jumped we will take jump edges
+ * into account as a possibility of som going backwards
+ *
+ * TODO: write a generalised ng_execute_graph/make this less hacky
+ */
+ assert(&g == &cache.g);
+ if (contains(cache.smgb, u)) {
+ return cache.smgb[u];
+ }
+
DEBUG_PRINTF("checking if som can go backwards on %zu\n", g[u].index);
-
- set<NFAEdge> be;
- BackEdges<set<NFAEdge>> backEdgeVisitor(be);
+
+ set<NFAEdge> be;
+ BackEdges<set<NFAEdge>> backEdgeVisitor(be);
boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start));
-
- bool rv;
- if (0) {
- exit:
- DEBUG_PRINTF("using cached result\n");
- cache.smgb[u] = rv;
- return rv;
- }
-
- assert(contains(region_map, u));
- const u32 u_region = region_map.at(u);
-
- for (const auto &e : be) {
- NFAVertex s = source(e, g);
- NFAVertex t = target(e, g);
- /* only need to worry about big cycles including/before u */
+
+ bool rv;
+ if (0) {
+ exit:
+ DEBUG_PRINTF("using cached result\n");
+ cache.smgb[u] = rv;
+ return rv;
+ }
+
+ assert(contains(region_map, u));
+ const u32 u_region = region_map.at(u);
+
+ for (const auto &e : be) {
+ NFAVertex s = source(e, g);
+ NFAVertex t = target(e, g);
+ /* only need to worry about big cycles including/before u */
DEBUG_PRINTF("back edge %zu %zu\n", g[s].index, g[t].index);
- if (s != t && region_map.at(s) <= u_region) {
- DEBUG_PRINTF("eek big cycle\n");
- rv = true; /* big cycle -> eek */
- goto exit;
- }
- }
-
+ if (s != t && region_map.at(s) <= u_region) {
+ DEBUG_PRINTF("eek big cycle\n");
+ rv = true; /* big cycle -> eek */
+ goto exit;
+ }
+ }
+
unordered_map<NFAVertex, NFAVertex> orig_to_copy;
- NGHolder c_g;
- cloneHolder(c_g, g, &orig_to_copy);
-
+ NGHolder c_g;
+ cloneHolder(c_g, g, &orig_to_copy);
+
/* treat virtual starts as unconditional - wire to startDs instead */
- for (NFAVertex v : vertices_range(g)) {
- if (!is_virtual_start(v, g)) {
- continue;
- }
- NFAVertex c_v = orig_to_copy[v];
- orig_to_copy[v] = c_g.startDs;
- for (NFAVertex c_w : adjacent_vertices_range(c_v, c_g)) {
- add_edge_if_not_present(c_g.startDs, c_w, c_g);
- }
- clear_vertex(c_v, c_g);
- }
-
+ for (NFAVertex v : vertices_range(g)) {
+ if (!is_virtual_start(v, g)) {
+ continue;
+ }
+ NFAVertex c_v = orig_to_copy[v];
+ orig_to_copy[v] = c_g.startDs;
+ for (NFAVertex c_w : adjacent_vertices_range(c_v, c_g)) {
+ add_edge_if_not_present(c_g.startDs, c_w, c_g);
+ }
+ clear_vertex(c_v, c_g);
+ }
+
/* treat u as the only accept state */
- NFAVertex c_u = orig_to_copy[u];
- clear_in_edges(c_g.acceptEod, c_g);
- add_edge(c_g.accept, c_g.acceptEod, c_g);
- clear_in_edges(c_g.accept, c_g);
- clear_out_edges(c_u, c_g);
- if (hasSelfLoop(u, g)) {
- add_edge(c_u, c_u, c_g);
- }
- add_edge(c_u, c_g.accept, c_g);
-
- set<NFAVertex> u_succ;
- insert(&u_succ, adjacent_vertices(u, g));
- u_succ.erase(u);
-
- for (auto t : inv_adjacent_vertices_range(u, g)) {
- if (t == u) {
- continue;
- }
- for (auto v : adjacent_vertices_range(t, g)) {
- if (contains(u_succ, v)) {
+ NFAVertex c_u = orig_to_copy[u];
+ clear_in_edges(c_g.acceptEod, c_g);
+ add_edge(c_g.accept, c_g.acceptEod, c_g);
+ clear_in_edges(c_g.accept, c_g);
+ clear_out_edges(c_u, c_g);
+ if (hasSelfLoop(u, g)) {
+ add_edge(c_u, c_u, c_g);
+ }
+ add_edge(c_u, c_g.accept, c_g);
+
+ set<NFAVertex> u_succ;
+ insert(&u_succ, adjacent_vertices(u, g));
+ u_succ.erase(u);
+
+ for (auto t : inv_adjacent_vertices_range(u, g)) {
+ if (t == u) {
+ continue;
+ }
+ for (auto v : adjacent_vertices_range(t, g)) {
+ if (contains(u_succ, v)) {
/* due to virtual starts being aliased with normal starts in the
* copy of the graph, we may have already added the edges. */
add_edge_if_not_present(orig_to_copy[t], c_g.accept, c_g);
- break;
- }
- }
- }
-
- pruneUseless(c_g);
-
- be.clear();
+ break;
+ }
+ }
+ }
+
+ pruneUseless(c_g);
+
+ be.clear();
boost::depth_first_search(c_g, visitor(backEdgeVisitor)
.root_vertex(c_g.start));
-
- for (const auto &e : be) {
- NFAVertex s = source(e, c_g);
- NFAVertex t = target(e, c_g);
+
+ for (const auto &e : be) {
+ NFAVertex s = source(e, c_g);
+ NFAVertex t = target(e, c_g);
DEBUG_PRINTF("back edge %zu %zu\n", c_g[s].index, c_g[t].index);
- if (s != t) {
- assert(0);
- DEBUG_PRINTF("eek big cycle\n");
- rv = true; /* big cycle -> eek */
- goto exit;
- }
- }
-
- DEBUG_PRINTF("checking acyclic+selfloop graph\n");
-
- rv = !firstMatchIsFirst(c_g);
- DEBUG_PRINTF("som may regress? %d\n", (int)rv);
- goto exit;
-}
-
-bool sentClearsTail(const NGHolder &g,
+ if (s != t) {
+ assert(0);
+ DEBUG_PRINTF("eek big cycle\n");
+ rv = true; /* big cycle -> eek */
+ goto exit;
+ }
+ }
+
+ DEBUG_PRINTF("checking acyclic+selfloop graph\n");
+
+ rv = !firstMatchIsFirst(c_g);
+ DEBUG_PRINTF("som may regress? %d\n", (int)rv);
+ goto exit;
+}
+
+bool sentClearsTail(const NGHolder &g,
const unordered_map<NFAVertex, u32> &region_map,
- const NGHolder &sent, u32 last_head_region,
- u32 *bad_region) {
- /* if a subsequent match from the prefix clears the rest of the pattern
- * we can just keep track of the last match of the prefix.
- * To see if this property holds, we could:
- *
- * 1A: turn on all states in the tail and run all strings that may
- * match the prefix past the tail, if we are still in any states then
- * this property does not hold.
- *
- * 1B: we turn on the initial states of the tail and run any strings which
- * may finish any partial matches in the prefix and see if we end up with
- * anything which would also imply that this property does not hold.
- *
- * OR
- *
- * 2: we just turn everything and run the prefix inputs past it and see what
- * we are left with. I think that is equivalent to scheme 1 and is easier to
- * implement. TODO: ponder
- *
- * Anyway, we are going with scheme 2 until further notice.
- */
-
- u32 first_bad_region = ~0U;
+ const NGHolder &sent, u32 last_head_region,
+ u32 *bad_region) {
+ /* if a subsequent match from the prefix clears the rest of the pattern
+ * we can just keep track of the last match of the prefix.
+ * To see if this property holds, we could:
+ *
+ * 1A: turn on all states in the tail and run all strings that may
+ * match the prefix past the tail, if we are still in any states then
+ * this property does not hold.
+ *
+ * 1B: we turn on the initial states of the tail and run any strings which
+ * may finish any partial matches in the prefix and see if we end up with
+ * anything which would also imply that this property does not hold.
+ *
+ * OR
+ *
+ * 2: we just turn everything and run the prefix inputs past it and see what
+ * we are left with. I think that is equivalent to scheme 1 and is easier to
+ * implement. TODO: ponder
+ *
+ * Anyway, we are going with scheme 2 until further notice.
+ */
+
+ u32 first_bad_region = ~0U;
flat_set<NFAVertex> states;
- /* turn on all states */
- DEBUG_PRINTF("region %u is cutover\n", last_head_region);
- for (auto v : vertices_range(g)) {
- if (v != g.accept && v != g.acceptEod) {
- states.insert(v);
- }
- }
-
- for (UNUSED auto v : states) {
+ /* turn on all states */
+ DEBUG_PRINTF("region %u is cutover\n", last_head_region);
+ for (auto v : vertices_range(g)) {
+ if (v != g.accept && v != g.acceptEod) {
+ states.insert(v);
+ }
+ }
+
+ for (UNUSED auto v : states) {
DEBUG_PRINTF("start state: %zu\n", g[v].index);
- }
-
- /* run the prefix the main graph */
- states = execute_graph(g, sent, states);
-
- /* .. and check if we are left with anything in the tail region */
- for (auto v : states) {
- if (v == g.start || v == g.startDs) {
- continue; /* not in tail */
- }
-
+ }
+
+ /* run the prefix the main graph */
+ states = execute_graph(g, sent, states);
+
+ /* .. and check if we are left with anything in the tail region */
+ for (auto v : states) {
+ if (v == g.start || v == g.startDs) {
+ continue; /* not in tail */
+ }
+
DEBUG_PRINTF("v %zu is still on\n", g[v].index);
- assert(v != g.accept && v != g.acceptEod); /* no cr */
-
- assert(contains(region_map, v));
- const u32 v_region = region_map.at(v);
- if (v_region > last_head_region) {
- DEBUG_PRINTF("bailing, %u > %u\n", v_region, last_head_region);
- first_bad_region = min(first_bad_region, v_region);
- }
- }
-
- if (first_bad_region != ~0U) {
- DEBUG_PRINTF("first bad region is %u\n", first_bad_region);
- *bad_region = first_bad_region;
- return false;
- }
-
- return true;
-}
-
-} // namespace ue2
+ assert(v != g.accept && v != g.acceptEod); /* no cr */
+
+ assert(contains(region_map, v));
+ const u32 v_region = region_map.at(v);
+ if (v_region > last_head_region) {
+ DEBUG_PRINTF("bailing, %u > %u\n", v_region, last_head_region);
+ first_bad_region = min(first_bad_region, v_region);
+ }
+ }
+
+ if (first_bad_region != ~0U) {
+ DEBUG_PRINTF("first bad region is %u\n", first_bad_region);
+ *bad_region = first_bad_region;
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_som_util.h b/contrib/libs/hyperscan/src/nfagraph/ng_som_util.h
index 3f4fcb5b3a..e2d38642c4 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_som_util.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_som_util.h
@@ -1,84 +1,84 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Utility functions related to SOM ("Start of Match").
- */
-
-#ifndef NG_SOM_UTIL_H
-#define NG_SOM_UTIL_H
-
-#include "ng_util.h"
-#include "util/depth.h"
-
-#include <map>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Utility functions related to SOM ("Start of Match").
+ */
+
+#ifndef NG_SOM_UTIL_H
+#define NG_SOM_UTIL_H
+
+#include "ng_util.h"
+#include "util/depth.h"
+
+#include <map>
#include <unordered_map>
-#include <vector>
-
-namespace ue2 {
-
-class NGHolder;
-
-/**
- * Returns min/max distance from start of match, index by vertex_id.
- */
-std::vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g);
-
-/**
- * Returns true if the first match by end-offset must always be the first match
- * by start-offset.
- */
-bool firstMatchIsFirst(const NGHolder &p);
-
-struct smgb_cache : public mbsb_cache {
- explicit smgb_cache(const NGHolder &gg) : mbsb_cache(gg) {}
- std::map<NFAVertex, bool> smgb;
-};
-
-bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
+#include <vector>
+
+namespace ue2 {
+
+class NGHolder;
+
+/**
+ * Returns min/max distance from start of match, index by vertex_id.
+ */
+std::vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g);
+
+/**
+ * Returns true if the first match by end-offset must always be the first match
+ * by start-offset.
+ */
+bool firstMatchIsFirst(const NGHolder &p);
+
+struct smgb_cache : public mbsb_cache {
+ explicit smgb_cache(const NGHolder &gg) : mbsb_cache(gg) {}
+ std::map<NFAVertex, bool> smgb;
+};
+
+bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
const std::unordered_map<NFAVertex, u32> &region_map,
- smgb_cache &cache);
-
-/**
- * Returns true if matching 'sent' causes all tail states in the main graph \a
- * g to go dead. A tail state is any state with a region greater than
- * \a last_head_region.
- *
- * - The graph \a sent must be a "kinda-DAG", where the only back-edges present
- * are self-loops.
- * - If the result is false, \a bad_region will be updated with the smallest
- * region ID associated with a tail state that is still on.
- */
-bool sentClearsTail(const NGHolder &g,
+ smgb_cache &cache);
+
+/**
+ * Returns true if matching 'sent' causes all tail states in the main graph \a
+ * g to go dead. A tail state is any state with a region greater than
+ * \a last_head_region.
+ *
+ * - The graph \a sent must be a "kinda-DAG", where the only back-edges present
+ * are self-loops.
+ * - If the result is false, \a bad_region will be updated with the smallest
+ * region ID associated with a tail state that is still on.
+ */
+bool sentClearsTail(const NGHolder &g,
const std::unordered_map<NFAVertex, u32> &region_map,
- const NGHolder &sent, u32 last_head_region,
- u32 *bad_region);
-
-} // namespace ue2
-
-#endif // NG_SOM_UTIL_H
+ const NGHolder &sent, u32 last_head_region,
+ u32 *bad_region);
+
+} // namespace ue2
+
+#endif // NG_SOM_UTIL_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_split.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_split.cpp
index 73170a9104..91a099fc38 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_split.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_split.cpp
@@ -1,244 +1,244 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Functions for splitting NFAGraphs into LHS and RHS.
- */
-#include "ng_split.h"
-
-#include "ng_holder.h"
-#include "ng_prune.h"
-#include "ng_util.h"
-#include "util/container.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-
-#include <map>
-#include <set>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-static
-void clearAccepts(NGHolder &g) {
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- g[v].reports.clear();
- }
-
- for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
- g[v].reports.clear();
- }
-
- clear_in_edges(g.accept, g);
- clear_in_edges(g.acceptEod, g);
- add_edge(g.accept, g.acceptEod, g);
-}
-
-static
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Functions for splitting NFAGraphs into LHS and RHS.
+ */
+#include "ng_split.h"
+
+#include "ng_holder.h"
+#include "ng_prune.h"
+#include "ng_util.h"
+#include "util/container.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+static
+void clearAccepts(NGHolder &g) {
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ g[v].reports.clear();
+ }
+
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ g[v].reports.clear();
+ }
+
+ clear_in_edges(g.accept, g);
+ clear_in_edges(g.acceptEod, g);
+ add_edge(g.accept, g.acceptEod, g);
+}
+
+static
void filterSplitMap(const NGHolder &g,
unordered_map<NFAVertex, NFAVertex> *out_map) {
unordered_set<NFAVertex> verts;
- insert(&verts, vertices(g));
+ insert(&verts, vertices(g));
auto it = out_map->begin();
- while (it != out_map->end()) {
+ while (it != out_map->end()) {
auto jt = it;
- ++it;
- if (!contains(verts, jt->second)) {
- out_map->erase(jt);
- }
- }
-}
-
-static
-void splitLHS(const NGHolder &base, const vector<NFAVertex> &pivots,
+ ++it;
+ if (!contains(verts, jt->second)) {
+ out_map->erase(jt);
+ }
+ }
+}
+
+static
+void splitLHS(const NGHolder &base, const vector<NFAVertex> &pivots,
const vector<NFAVertex> &rhs_pivots, NGHolder *lhs,
unordered_map<NFAVertex, NFAVertex> *lhs_map) {
- assert(lhs && lhs_map);
-
- cloneHolder(*lhs, base, lhs_map);
-
- clearAccepts(*lhs);
-
- for (auto pivot : pivots) {
+ assert(lhs && lhs_map);
+
+ cloneHolder(*lhs, base, lhs_map);
+
+ clearAccepts(*lhs);
+
+ for (auto pivot : pivots) {
DEBUG_PRINTF("pivot is %zu lv %zu lm %zu\n", base[pivot].index,
- num_vertices(*lhs), lhs_map->size());
- assert(contains(*lhs_map, pivot));
-
- for (auto v : rhs_pivots) {
- assert(contains(*lhs_map, v));
- remove_edge((*lhs_map)[pivot], (*lhs_map)[v], *lhs);
- }
-
- (*lhs)[(*lhs_map)[pivot]].reports.insert(0);
- add_edge((*lhs_map)[pivot], lhs->accept, *lhs);
- }
-
+ num_vertices(*lhs), lhs_map->size());
+ assert(contains(*lhs_map, pivot));
+
+ for (auto v : rhs_pivots) {
+ assert(contains(*lhs_map, v));
+ remove_edge((*lhs_map)[pivot], (*lhs_map)[v], *lhs);
+ }
+
+ (*lhs)[(*lhs_map)[pivot]].reports.insert(0);
+ add_edge((*lhs_map)[pivot], lhs->accept, *lhs);
+ }
+
/* should do the renumbering unconditionally as we know edges are already
* misnumbered */
pruneUseless(*lhs, false);
renumber_edges(*lhs);
renumber_vertices(*lhs);
- filterSplitMap(*lhs, lhs_map);
-
- switch (base.kind) {
- case NFA_PREFIX:
- case NFA_OUTFIX:
- lhs->kind = NFA_PREFIX;
- break;
- case NFA_INFIX:
- case NFA_SUFFIX:
- lhs->kind = NFA_INFIX;
- break;
+ filterSplitMap(*lhs, lhs_map);
+
+ switch (base.kind) {
+ case NFA_PREFIX:
+ case NFA_OUTFIX:
+ lhs->kind = NFA_PREFIX;
+ break;
+ case NFA_INFIX:
+ case NFA_SUFFIX:
+ lhs->kind = NFA_INFIX;
+ break;
case NFA_EAGER_PREFIX:
/* Current code should not be assigning eager until well after all the
* splitting is done. */
assert(0);
lhs->kind = NFA_EAGER_PREFIX;
break;
- case NFA_REV_PREFIX:
+ case NFA_REV_PREFIX:
case NFA_OUTFIX_RAW:
- assert(0);
- break;
- }
-}
-
-void splitLHS(const NGHolder &base, NFAVertex pivot,
+ assert(0);
+ break;
+ }
+}
+
+void splitLHS(const NGHolder &base, NFAVertex pivot,
NGHolder *lhs, unordered_map<NFAVertex, NFAVertex> *lhs_map) {
- vector<NFAVertex> pivots(1, pivot);
- vector<NFAVertex> rhs_pivots;
- insert(&rhs_pivots, rhs_pivots.end(), adjacent_vertices(pivot, base));
- splitLHS(base, pivots, rhs_pivots, lhs, lhs_map);
-}
-
-void splitRHS(const NGHolder &base, const vector<NFAVertex> &pivots,
+ vector<NFAVertex> pivots(1, pivot);
+ vector<NFAVertex> rhs_pivots;
+ insert(&rhs_pivots, rhs_pivots.end(), adjacent_vertices(pivot, base));
+ splitLHS(base, pivots, rhs_pivots, lhs, lhs_map);
+}
+
+void splitRHS(const NGHolder &base, const vector<NFAVertex> &pivots,
NGHolder *rhs, unordered_map<NFAVertex, NFAVertex> *rhs_map) {
- assert(rhs && rhs_map);
-
- cloneHolder(*rhs, base, rhs_map);
-
- clear_out_edges(rhs->start, *rhs);
- clear_out_edges(rhs->startDs, *rhs);
- add_edge(rhs->start, rhs->startDs, *rhs);
- add_edge(rhs->startDs, rhs->startDs, *rhs);
-
- for (auto pivot : pivots) {
- assert(contains(*rhs_map, pivot));
+ assert(rhs && rhs_map);
+
+ cloneHolder(*rhs, base, rhs_map);
+
+ clear_out_edges(rhs->start, *rhs);
+ clear_out_edges(rhs->startDs, *rhs);
+ add_edge(rhs->start, rhs->startDs, *rhs);
+ add_edge(rhs->startDs, rhs->startDs, *rhs);
+
+ for (auto pivot : pivots) {
+ assert(contains(*rhs_map, pivot));
NFAEdge e = add_edge(rhs->start, (*rhs_map)[pivot], *rhs);
(*rhs)[e].tops.insert(DEFAULT_TOP);
- }
+ }
/* should do the renumbering unconditionally as we know edges are already
* misnumbered */
pruneUseless(*rhs, false);
renumber_edges(*rhs);
renumber_vertices(*rhs);
- filterSplitMap(*rhs, rhs_map);
-
- switch (base.kind) {
- case NFA_PREFIX:
- case NFA_INFIX:
- rhs->kind = NFA_INFIX;
- break;
- case NFA_SUFFIX:
- case NFA_OUTFIX:
- rhs->kind = NFA_SUFFIX;
- break;
+ filterSplitMap(*rhs, rhs_map);
+
+ switch (base.kind) {
+ case NFA_PREFIX:
+ case NFA_INFIX:
+ rhs->kind = NFA_INFIX;
+ break;
+ case NFA_SUFFIX:
+ case NFA_OUTFIX:
+ rhs->kind = NFA_SUFFIX;
+ break;
case NFA_EAGER_PREFIX:
/* Current code should not be assigning eager until well after all the
* splitting is done. */
assert(0);
rhs->kind = NFA_INFIX;
break;
- case NFA_REV_PREFIX:
+ case NFA_REV_PREFIX:
case NFA_OUTFIX_RAW:
- assert(0);
- break;
- }
-}
-
-/** \brief Fills \a succ with the common successors of the vertices in \a
- * pivots. */
-static
-void findCommonSuccessors(const NGHolder &g, const vector<NFAVertex> &pivots,
- vector<NFAVertex> &succ) {
- assert(!pivots.empty());
-
+ assert(0);
+ break;
+ }
+}
+
+/** \brief Fills \a succ with the common successors of the vertices in \a
+ * pivots. */
+static
+void findCommonSuccessors(const NGHolder &g, const vector<NFAVertex> &pivots,
+ vector<NFAVertex> &succ) {
+ assert(!pivots.empty());
+
set<NFAVertex> adj;
set<NFAVertex> adj_temp;
-
- insert(&adj, adjacent_vertices(pivots.at(0), g));
-
- for (auto it = pivots.begin() + 1, ite = pivots.end(); it != ite; ++it) {
- NFAVertex pivot = *it;
- adj_temp.clear();
- for (auto v : adjacent_vertices_range(pivot, g)) {
- if (contains(adj, v)) {
- adj_temp.insert(v);
- }
- }
- adj.swap(adj_temp);
- }
-
- succ.insert(succ.end(), adj.begin(), adj.end());
-}
-
-void splitGraph(const NGHolder &base, const vector<NFAVertex> &pivots,
+
+ insert(&adj, adjacent_vertices(pivots.at(0), g));
+
+ for (auto it = pivots.begin() + 1, ite = pivots.end(); it != ite; ++it) {
+ NFAVertex pivot = *it;
+ adj_temp.clear();
+ for (auto v : adjacent_vertices_range(pivot, g)) {
+ if (contains(adj, v)) {
+ adj_temp.insert(v);
+ }
+ }
+ adj.swap(adj_temp);
+ }
+
+ succ.insert(succ.end(), adj.begin(), adj.end());
+}
+
+void splitGraph(const NGHolder &base, const vector<NFAVertex> &pivots,
NGHolder *lhs, unordered_map<NFAVertex, NFAVertex> *lhs_map,
NGHolder *rhs, unordered_map<NFAVertex, NFAVertex> *rhs_map) {
- DEBUG_PRINTF("splitting graph at %zu vertices\n", pivots.size());
-
- assert(!has_parallel_edge(base));
+ DEBUG_PRINTF("splitting graph at %zu vertices\n", pivots.size());
+
+ assert(!has_parallel_edge(base));
assert(isCorrectlyTopped(base));
-
- /* RHS pivots are built from the common set of successors of pivots. */
- vector<NFAVertex> rhs_pivots;
- findCommonSuccessors(base, pivots, rhs_pivots);
-
- /* generate lhs */
- splitLHS(base, pivots, rhs_pivots, lhs, lhs_map);
-
- /* generate the rhs */
- splitRHS(base, rhs_pivots, rhs, rhs_map);
-
- assert(!has_parallel_edge(*lhs));
- assert(!has_parallel_edge(*rhs));
+
+ /* RHS pivots are built from the common set of successors of pivots. */
+ vector<NFAVertex> rhs_pivots;
+ findCommonSuccessors(base, pivots, rhs_pivots);
+
+ /* generate lhs */
+ splitLHS(base, pivots, rhs_pivots, lhs, lhs_map);
+
+ /* generate the rhs */
+ splitRHS(base, rhs_pivots, rhs, rhs_map);
+
+ assert(!has_parallel_edge(*lhs));
+ assert(!has_parallel_edge(*rhs));
assert(isCorrectlyTopped(*lhs));
assert(isCorrectlyTopped(*rhs));
-}
-
-void splitGraph(const NGHolder &base, NFAVertex pivot,
+}
+
+void splitGraph(const NGHolder &base, NFAVertex pivot,
NGHolder *lhs, unordered_map<NFAVertex, NFAVertex> *lhs_map,
NGHolder *rhs, unordered_map<NFAVertex, NFAVertex> *rhs_map) {
- vector<NFAVertex> pivots(1, pivot);
- splitGraph(base, pivots, lhs, lhs_map, rhs, rhs_map);
-}
-
-} // namespace ue2
+ vector<NFAVertex> pivots(1, pivot);
+ splitGraph(base, pivots, lhs, lhs_map, rhs, rhs_map);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_split.h b/contrib/libs/hyperscan/src/nfagraph/ng_split.h
index 3867cb76f6..9ddc033257 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_split.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_split.h
@@ -1,76 +1,76 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Functions for splitting NFAGraphs into LHS and RHS.
- */
-
-#ifndef NG_SPLIT_H
-#define NG_SPLIT_H
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Functions for splitting NFAGraphs into LHS and RHS.
+ */
+
+#ifndef NG_SPLIT_H
+#define NG_SPLIT_H
+
#include "ng_holder.h"
#include <unordered_map>
-#include <vector>
-
-namespace ue2 {
-
-class NGHolder;
-
-/** Note: pivot should be a vertex that dominates acceptEod. Treating 'in'
- * allocated to rhs if they are reachable from the pivot. Conversely, a vertex
- * is in the lhs if it is reachable from start without going through the
- * pivot. The pivot ends up in the LHS and any adjacent vertices in the RHS.
- *
+#include <vector>
+
+namespace ue2 {
+
+class NGHolder;
+
+/** Note: pivot should be a vertex that dominates acceptEod. Treating 'in'
+ * allocated to rhs if they are reachable from the pivot. Conversely, a vertex
+ * is in the lhs if it is reachable from start without going through the
+ * pivot. The pivot ends up in the LHS and any adjacent vertices in the RHS.
+ *
* Note: The RHS is setup to be triggered by TOP 0
*
- * When multiple split vertices are provided:
- * - RHS contains all vertices reachable from every pivot
- * - LHS contains all vertices which are reachable from start ignoring any
- * vertices which have an edge to every pivot
- */
-void splitGraph(const NGHolder &base, NFAVertex pivot, NGHolder *lhs,
+ * When multiple split vertices are provided:
+ * - RHS contains all vertices reachable from every pivot
+ * - LHS contains all vertices which are reachable from start ignoring any
+ * vertices which have an edge to every pivot
+ */
+void splitGraph(const NGHolder &base, NFAVertex pivot, NGHolder *lhs,
std::unordered_map<NFAVertex, NFAVertex> *lhs_map,
- NGHolder *rhs,
+ NGHolder *rhs,
std::unordered_map<NFAVertex, NFAVertex> *rhs_map);
-
-void splitGraph(const NGHolder &base, const std::vector<NFAVertex> &pivots,
- NGHolder *lhs,
+
+void splitGraph(const NGHolder &base, const std::vector<NFAVertex> &pivots,
+ NGHolder *lhs,
std::unordered_map<NFAVertex, NFAVertex> *lhs_map,
- NGHolder *rhs,
+ NGHolder *rhs,
std::unordered_map<NFAVertex, NFAVertex> *rhs_map);
-
-void splitLHS(const NGHolder &base, NFAVertex pivot, NGHolder *lhs,
+
+void splitLHS(const NGHolder &base, NFAVertex pivot, NGHolder *lhs,
std::unordered_map<NFAVertex, NFAVertex> *lhs_map);
-
-void splitRHS(const NGHolder &base, const std::vector<NFAVertex> &pivots,
+
+void splitRHS(const NGHolder &base, const std::vector<NFAVertex> &pivots,
NGHolder *rhs, std::unordered_map<NFAVertex, NFAVertex> *rhs_map);
-
-} // namespace ue2
-
-#endif // NG_SPLIT_H
+
+} // namespace ue2
+
+#endif // NG_SPLIT_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_squash.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_squash.cpp
index ac788157b0..03495d1441 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_squash.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_squash.cpp
@@ -1,324 +1,324 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief NFA graph state squashing analysis.
- *
- * The basic idea behind the state squashing is that when we are in a cyclic
- * state v there are certain other states which are completely irrelevant. This
- * is used primarily by the determinisation process to produce smaller DFAs by
- * not tracking irrelevant states. It's also used by the LimEx NFA model.
- *
- * Working out which states we can ignore mainly uses the post-dominator
- * analysis.
- *
- * ### Dot Squash Masks:
- *
- * The following vertices are added to the squash mask:
- * - (1) Any vertex post-dominated by the cyclic dot state
- * - (2) Any other vertex post-dominated by the cyclic dot state's successors
- * - (3) Any vertex post-dominated by a predecessor of the cyclic dot state -
- * provided the predecessor's successors are a subset of the cyclic state's
- * successors [For (3), the term successor also includes report information]
- *
- * (2) and (3) allow us to get squash masks from .* as well as .+
- *
- * The squash masks are not optimal especially in the case where there
- * alternations on both sides - for example in:
- *
- * /foo(bar|baz).*(abc|xyz)/s
- *
- * 'foo' is irrelevant once the dot star is hit, but it has no post-dominators
- * so isn't picked up ('bar' and 'baz' are picked up by (2)). We may be able to
- * do a more complete analysis based on cutting the graph and seeing which
- * vertices are unreachable but the current approach is quick and probably
- * adequate.
- *
- *
- * ### Non-Dot Squash Masks:
- *
- * As for dot states. However, if anything in a pdom tree falls outside the
- * character range of the cyclic state the whole pdom tree is ignored. Also when
- * considering the predecessor's pdom tree it is necessary to verify that the
- * predecessor's character reachability falls within that of the cyclic state.
- *
- * We could do better in this case by not throwing away the whole pdom tree -
- * however the bits which we can keep are not clear from the pdom tree of the
- * cyclic state - it probably can be based on the dom or pdom tree of the bad
- * vertex.
- *
- * An example of us doing badly is:
- *
- * /HTTP.*Referer[^\n]*google/s
- *
- * as '[\\n]*' doesn't get a squash mask at all due to .* but we should be able
- * to squash 'Referer'.
- *
- * ### Extension:
- *
- * If a state leads solely to a squashable state (or its immediate successors)
- * with the same reachability we can make this state a squash state of any of
- * the original states squashees which we postdominate. Could probably tighten
- * this up but it would require thought. May not need to keep the original
- * squasher around but that would also require thought.
- *
- * ### SOM Notes:
- *
- * If (left) start of match is required, it is illegal to squash any state which
- * may result in an early start of match reaching the squashing state.
- */
-
-#include "config.h"
-
-#include "ng_squash.h"
-
-#include "ng_dominators.h"
-#include "ng_dump.h"
-#include "ng_holder.h"
-#include "ng_prune.h"
-#include "ng_region.h"
-#include "ng_som_util.h"
-#include "ng_util.h"
-#include "util/container.h"
-#include "util/graph_range.h"
-#include "util/report_manager.h"
-#include "ue2common.h"
-
-#include <deque>
-#include <map>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief NFA graph state squashing analysis.
+ *
+ * The basic idea behind the state squashing is that when we are in a cyclic
+ * state v there are certain other states which are completely irrelevant. This
+ * is used primarily by the determinisation process to produce smaller DFAs by
+ * not tracking irrelevant states. It's also used by the LimEx NFA model.
+ *
+ * Working out which states we can ignore mainly uses the post-dominator
+ * analysis.
+ *
+ * ### Dot Squash Masks:
+ *
+ * The following vertices are added to the squash mask:
+ * - (1) Any vertex post-dominated by the cyclic dot state
+ * - (2) Any other vertex post-dominated by the cyclic dot state's successors
+ * - (3) Any vertex post-dominated by a predecessor of the cyclic dot state -
+ * provided the predecessor's successors are a subset of the cyclic state's
+ * successors [For (3), the term successor also includes report information]
+ *
+ * (2) and (3) allow us to get squash masks from .* as well as .+
+ *
+ * The squash masks are not optimal especially in the case where there
+ * alternations on both sides - for example in:
+ *
+ * /foo(bar|baz).*(abc|xyz)/s
+ *
+ * 'foo' is irrelevant once the dot star is hit, but it has no post-dominators
+ * so isn't picked up ('bar' and 'baz' are picked up by (2)). We may be able to
+ * do a more complete analysis based on cutting the graph and seeing which
+ * vertices are unreachable but the current approach is quick and probably
+ * adequate.
+ *
+ *
+ * ### Non-Dot Squash Masks:
+ *
+ * As for dot states. However, if anything in a pdom tree falls outside the
+ * character range of the cyclic state the whole pdom tree is ignored. Also when
+ * considering the predecessor's pdom tree it is necessary to verify that the
+ * predecessor's character reachability falls within that of the cyclic state.
+ *
+ * We could do better in this case by not throwing away the whole pdom tree -
+ * however the bits which we can keep are not clear from the pdom tree of the
+ * cyclic state - it probably can be based on the dom or pdom tree of the bad
+ * vertex.
+ *
+ * An example of us doing badly is:
+ *
+ * /HTTP.*Referer[^\n]*google/s
+ *
+ * as '[\\n]*' doesn't get a squash mask at all due to .* but we should be able
+ * to squash 'Referer'.
+ *
+ * ### Extension:
+ *
+ * If a state leads solely to a squashable state (or its immediate successors)
+ * with the same reachability we can make this state a squash state of any of
+ * the original states squashees which we postdominate. Could probably tighten
+ * this up but it would require thought. May not need to keep the original
+ * squasher around but that would also require thought.
+ *
+ * ### SOM Notes:
+ *
+ * If (left) start of match is required, it is illegal to squash any state which
+ * may result in an early start of match reaching the squashing state.
+ */
+
+#include "config.h"
+
+#include "ng_squash.h"
+
+#include "ng_dominators.h"
+#include "ng_dump.h"
+#include "ng_holder.h"
+#include "ng_prune.h"
+#include "ng_region.h"
+#include "ng_som_util.h"
+#include "ng_util.h"
+#include "util/container.h"
+#include "util/graph_range.h"
+#include "util/report_manager.h"
+#include "ue2common.h"
+
+#include <deque>
+#include <map>
#include <unordered_map>
#include <unordered_set>
-
-#include <boost/graph/depth_first_search.hpp>
-#include <boost/graph/reverse_graph.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
+
+#include <boost/graph/depth_first_search.hpp>
+#include <boost/graph/reverse_graph.hpp>
+
+using namespace std;
+
+namespace ue2 {
+
using PostDomTree = unordered_map<NFAVertex, unordered_set<NFAVertex>>;
-
-static
+
+static
PostDomTree buildPDomTree(const NGHolder &g) {
PostDomTree tree;
tree.reserve(num_vertices(g));
-
+
auto postdominators = findPostDominators(g);
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
- NFAVertex pdom = postdominators[v];
- if (pdom) {
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ NFAVertex pdom = postdominators[v];
+ if (pdom) {
DEBUG_PRINTF("vertex %zu -> %zu\n", g[pdom].index, g[v].index);
- tree[pdom].insert(v);
- }
- }
+ tree[pdom].insert(v);
+ }
+ }
return tree;
-}
-
-/**
- * Builds a squash mask based on the pdom tree of v and the given char reach.
- * The built squash mask is a bit conservative for non-dot cases and could
- * be improved with a bit of thought.
- */
-static
-void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v,
- const CharReach &cr, const NFAStateSet &init,
- const vector<NFAVertex> &vByIndex, const PostDomTree &tree,
- som_type som, const vector<DepthMinMax> &som_depths,
+}
+
+/**
+ * Builds a squash mask based on the pdom tree of v and the given char reach.
+ * The built squash mask is a bit conservative for non-dot cases and could
+ * be improved with a bit of thought.
+ */
+static
+void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v,
+ const CharReach &cr, const NFAStateSet &init,
+ const vector<NFAVertex> &vByIndex, const PostDomTree &tree,
+ som_type som, const vector<DepthMinMax> &som_depths,
const unordered_map<NFAVertex, u32> &region_map,
- smgb_cache &cache) {
+ smgb_cache &cache) {
DEBUG_PRINTF("build base squash mask for vertex %zu)\n", g[v].index);
-
- vector<NFAVertex> q;
-
+
+ vector<NFAVertex> q;
+
auto it = tree.find(v);
- if (it != tree.end()) {
- q.insert(q.end(), it->second.begin(), it->second.end());
- }
-
- const u32 v_index = g[v].index;
-
- while (!q.empty()) {
- NFAVertex u = q.back();
- q.pop_back();
- const CharReach &cru = g[u].char_reach;
-
- if ((cru & ~cr).any()) {
- /* bail: bad cr on vertex u */
- /* TODO: this could be better
- *
- * we still need to ensure that we record any paths leading to u.
- * Hence all vertices R which can reach u must be excluded from the
- * squash mask. Note: R != pdom(u) and there may exist an x in (R -
- * pdom(u)) which is in pdom(y) where y is in q. Clear ?
- */
- mask.set();
- return;
- }
-
- const u32 u_index = g[u].index;
-
- if (som) {
- /* We cannot add a state u to the squash mask of v if it may have an
- * earlier start of match offset. ie for us to add a state u to v
- * maxSomDist(u) <= minSomDist(v)
- */
- const depth &max_som_dist_u = som_depths[u_index].max;
- const depth &min_som_dist_v = som_depths[v_index].min;
-
- if (max_som_dist_u.is_infinite()) {
- /* it is hard to tell due to the INF if u can actually store an
- * earlier SOM than w (state we are building the squash mask
- * for) - need to think more deeply
- */
-
- if (mustBeSetBefore(u, v, g, cache)
- && !somMayGoBackwards(u, g, region_map, cache)) {
- DEBUG_PRINTF("u %u v %u\n", u_index, v_index);
- goto squash_ok;
- }
- }
-
- if (max_som_dist_u > min_som_dist_v) {
- /* u can't be squashed as it may be storing an earlier SOM */
- goto add_children_to_queue;
- }
-
- }
-
- squash_ok:
- mask.set(u_index);
- DEBUG_PRINTF("pdom'ed %u\n", u_index);
- add_children_to_queue:
- it = tree.find(u);
- if (it != tree.end()) {
- q.insert(q.end(), it->second.begin(), it->second.end());
- }
- }
-
- if (cr.all()) {
- /* the init states aren't in the pdom tree. If all their succ states
- * are set (or v), we can consider them post dominated */
-
- /* Note: init states will always result in a later som */
- for (size_t i = init.find_first(); i != init.npos;
- i = init.find_next(i)) {
- /* Yes vacuous patterns do exist */
- NFAVertex iv = vByIndex[i];
- for (auto w : adjacent_vertices_range(iv, g)) {
- if (w == g.accept || w == g.acceptEod) {
- DEBUG_PRINTF("skipping %zu due to vacuous accept\n", i);
- goto next_init_state;
- }
-
- u32 vert_id = g[w].index;
- if (w != iv && w != v && !mask.test(vert_id)) {
- DEBUG_PRINTF("skipping %zu due to %u\n", i, vert_id);
- goto next_init_state;
- }
- }
- DEBUG_PRINTF("pdom'ed %zu\n", i);
- mask.set(i);
- next_init_state:;
- }
- }
-
- mask.flip();
-}
-
-static
-void buildSucc(NFAStateSet &succ, const NGHolder &g, NFAVertex v) {
- for (auto w : adjacent_vertices_range(v, g)) {
- if (!is_special(w, g)) {
- succ.set(g[w].index);
- }
- }
-}
-
-static
-void buildPred(NFAStateSet &pred, const NGHolder &g, NFAVertex v) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (!is_special(u, g)) {
- pred.set(g[u].index);
- }
- }
-}
-
-static
-void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex,
- const PostDomTree &pdom_tree, const NFAStateSet &init,
+ if (it != tree.end()) {
+ q.insert(q.end(), it->second.begin(), it->second.end());
+ }
+
+ const u32 v_index = g[v].index;
+
+ while (!q.empty()) {
+ NFAVertex u = q.back();
+ q.pop_back();
+ const CharReach &cru = g[u].char_reach;
+
+ if ((cru & ~cr).any()) {
+ /* bail: bad cr on vertex u */
+ /* TODO: this could be better
+ *
+ * we still need to ensure that we record any paths leading to u.
+ * Hence all vertices R which can reach u must be excluded from the
+ * squash mask. Note: R != pdom(u) and there may exist an x in (R -
+ * pdom(u)) which is in pdom(y) where y is in q. Clear ?
+ */
+ mask.set();
+ return;
+ }
+
+ const u32 u_index = g[u].index;
+
+ if (som) {
+ /* We cannot add a state u to the squash mask of v if it may have an
+ * earlier start of match offset. ie for us to add a state u to v
+ * maxSomDist(u) <= minSomDist(v)
+ */
+ const depth &max_som_dist_u = som_depths[u_index].max;
+ const depth &min_som_dist_v = som_depths[v_index].min;
+
+ if (max_som_dist_u.is_infinite()) {
+ /* it is hard to tell due to the INF if u can actually store an
+ * earlier SOM than w (state we are building the squash mask
+ * for) - need to think more deeply
+ */
+
+ if (mustBeSetBefore(u, v, g, cache)
+ && !somMayGoBackwards(u, g, region_map, cache)) {
+ DEBUG_PRINTF("u %u v %u\n", u_index, v_index);
+ goto squash_ok;
+ }
+ }
+
+ if (max_som_dist_u > min_som_dist_v) {
+ /* u can't be squashed as it may be storing an earlier SOM */
+ goto add_children_to_queue;
+ }
+
+ }
+
+ squash_ok:
+ mask.set(u_index);
+ DEBUG_PRINTF("pdom'ed %u\n", u_index);
+ add_children_to_queue:
+ it = tree.find(u);
+ if (it != tree.end()) {
+ q.insert(q.end(), it->second.begin(), it->second.end());
+ }
+ }
+
+ if (cr.all()) {
+ /* the init states aren't in the pdom tree. If all their succ states
+ * are set (or v), we can consider them post dominated */
+
+ /* Note: init states will always result in a later som */
+ for (size_t i = init.find_first(); i != init.npos;
+ i = init.find_next(i)) {
+ /* Yes vacuous patterns do exist */
+ NFAVertex iv = vByIndex[i];
+ for (auto w : adjacent_vertices_range(iv, g)) {
+ if (w == g.accept || w == g.acceptEod) {
+ DEBUG_PRINTF("skipping %zu due to vacuous accept\n", i);
+ goto next_init_state;
+ }
+
+ u32 vert_id = g[w].index;
+ if (w != iv && w != v && !mask.test(vert_id)) {
+ DEBUG_PRINTF("skipping %zu due to %u\n", i, vert_id);
+ goto next_init_state;
+ }
+ }
+ DEBUG_PRINTF("pdom'ed %zu\n", i);
+ mask.set(i);
+ next_init_state:;
+ }
+ }
+
+ mask.flip();
+}
+
+static
+void buildSucc(NFAStateSet &succ, const NGHolder &g, NFAVertex v) {
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (!is_special(w, g)) {
+ succ.set(g[w].index);
+ }
+ }
+}
+
+static
+void buildPred(NFAStateSet &pred, const NGHolder &g, NFAVertex v) {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (!is_special(u, g)) {
+ pred.set(g[u].index);
+ }
+ }
+}
+
+static
+void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex,
+ const PostDomTree &pdom_tree, const NFAStateSet &init,
unordered_map<NFAVertex, NFAStateSet> *squash,
som_type som, const vector<DepthMinMax> &som_depths,
const unordered_map<NFAVertex, u32> &region_map,
- smgb_cache &cache) {
- deque<NFAVertex> remaining;
- for (const auto &m : *squash) {
- remaining.push_back(m.first);
- }
-
- while (!remaining.empty()) {
- NFAVertex v = remaining.back();
- remaining.pop_back();
-
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (is_special(u, g)) {
- continue;
- }
-
- if (g[v].char_reach != g[u].char_reach) {
- continue;
- }
-
- if (out_degree(u, g) != 1) {
- continue;
- }
-
- NFAStateSet u_squash(init.size());
+ smgb_cache &cache) {
+ deque<NFAVertex> remaining;
+ for (const auto &m : *squash) {
+ remaining.push_back(m.first);
+ }
+
+ while (!remaining.empty()) {
+ NFAVertex v = remaining.back();
+ remaining.pop_back();
+
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (is_special(u, g)) {
+ continue;
+ }
+
+ if (g[v].char_reach != g[u].char_reach) {
+ continue;
+ }
+
+ if (out_degree(u, g) != 1) {
+ continue;
+ }
+
+ NFAStateSet u_squash(init.size());
size_t u_index = g[u].index;
-
- buildSquashMask(u_squash, g, u, g[u].char_reach, init, vByIndex,
- pdom_tree, som, som_depths, region_map, cache);
-
- u_squash.set(u_index); /* never clear ourselves */
-
- if ((~u_squash).any()) { // i.e. some bits unset in mask
+
+ buildSquashMask(u_squash, g, u, g[u].char_reach, init, vByIndex,
+ pdom_tree, som, som_depths, region_map, cache);
+
+ u_squash.set(u_index); /* never clear ourselves */
+
+ if ((~u_squash).any()) { // i.e. some bits unset in mask
DEBUG_PRINTF("%zu is an upstream squasher of %zu\n", u_index,
- g[v].index);
- (*squash)[u] = u_squash;
- remaining.push_back(u);
- }
- }
- }
-}
-
+ g[v].index);
+ (*squash)[u] = u_squash;
+ remaining.push_back(u);
+ }
+ }
+ }
+}
+
/* If there are redundant states in the graph, it may be possible for two
* sibling .* states to try to squash each other -- which should be prevented.
*
@@ -330,7 +330,7 @@ void clearMutualSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex,
for (auto it = squash.begin(); it != squash.end();) {
NFAVertex a = it->first;
u32 a_index = g[a].index;
-
+
NFAStateSet a_squash = ~it->second; /* default is mask of survivors */
for (auto b_index = a_squash.find_first(); b_index != a_squash.npos;
b_index = a_squash.find_next(b_index)) {
@@ -365,336 +365,336 @@ unordered_map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g,
som_type som) {
unordered_map<NFAVertex, NFAStateSet> squash;
- // Number of bits to use for all our masks. If we're a triggered graph,
- // tops have already been assigned, so we don't have to account for them.
- const u32 numStates = num_vertices(g);
-
- // Build post-dominator tree.
+ // Number of bits to use for all our masks. If we're a triggered graph,
+ // tops have already been assigned, so we don't have to account for them.
+ const u32 numStates = num_vertices(g);
+
+ // Build post-dominator tree.
auto pdom_tree = buildPDomTree(g);
-
- // Build list of vertices by state ID and a set of init states.
+
+ // Build list of vertices by state ID and a set of init states.
vector<NFAVertex> vByIndex(numStates, NGHolder::null_vertex());
- NFAStateSet initStates(numStates);
- smgb_cache cache(g);
-
- // Mappings used for SOM mode calculations, otherwise left empty.
- unordered_map<NFAVertex, u32> region_map;
- vector<DepthMinMax> som_depths;
- if (som) {
- region_map = assignRegions(g);
- som_depths = getDistancesFromSOM(g);
- }
-
- for (auto v : vertices_range(g)) {
- const u32 vert_id = g[v].index;
- DEBUG_PRINTF("vertex %u/%u\n", vert_id, numStates);
- assert(vert_id < numStates);
- vByIndex[vert_id] = v;
-
- if (is_any_start(v, g) || !in_degree(v, g)) {
- initStates.set(vert_id);
- }
- }
-
- for (u32 i = 0; i < numStates; i++) {
- NFAVertex v = vByIndex[i];
+ NFAStateSet initStates(numStates);
+ smgb_cache cache(g);
+
+ // Mappings used for SOM mode calculations, otherwise left empty.
+ unordered_map<NFAVertex, u32> region_map;
+ vector<DepthMinMax> som_depths;
+ if (som) {
+ region_map = assignRegions(g);
+ som_depths = getDistancesFromSOM(g);
+ }
+
+ for (auto v : vertices_range(g)) {
+ const u32 vert_id = g[v].index;
+ DEBUG_PRINTF("vertex %u/%u\n", vert_id, numStates);
+ assert(vert_id < numStates);
+ vByIndex[vert_id] = v;
+
+ if (is_any_start(v, g) || !in_degree(v, g)) {
+ initStates.set(vert_id);
+ }
+ }
+
+ for (u32 i = 0; i < numStates; i++) {
+ NFAVertex v = vByIndex[i];
assert(v != NGHolder::null_vertex());
- const CharReach &cr = g[v].char_reach;
-
- /* only non-init cyclics can be squashers */
- if (!hasSelfLoop(v, g) || initStates.test(i)) {
- continue;
- }
-
- DEBUG_PRINTF("state %u is cyclic\n", i);
-
- NFAStateSet mask(numStates), succ(numStates), pred(numStates);
- buildSquashMask(mask, g, v, cr, initStates, vByIndex, pdom_tree, som,
- som_depths, region_map, cache);
- buildSucc(succ, g, v);
- buildPred(pred, g, v);
- const auto &reports = g[v].reports;
-
- for (size_t j = succ.find_first(); j != succ.npos;
- j = succ.find_next(j)) {
- NFAVertex vj = vByIndex[j];
- NFAStateSet pred2(numStates);
- buildPred(pred2, g, vj);
- if (pred2 == pred) {
- DEBUG_PRINTF("adding the sm from %zu to %u's sm\n", j, i);
- NFAStateSet tmp(numStates);
- buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree,
- som, som_depths, region_map, cache);
- mask &= tmp;
- }
- }
-
- for (size_t j = pred.find_first(); j != pred.npos;
- j = pred.find_next(j)) {
- NFAVertex vj = vByIndex[j];
- NFAStateSet succ2(numStates);
- buildSucc(succ2, g, vj);
- /* we can use j as a basis for squashing if its succs are a subset
- * of ours */
- if ((succ2 & ~succ).any()) {
- continue;
- }
-
- if (som) {
- /* We cannot use j to add to the squash mask of v if it may
- * have an earlier start of match offset. ie for us j as a
- * basis for the squash mask of v we require:
- * maxSomDist(j) <= minSomDist(v)
- */
-
- /* ** TODO ** */
-
- const depth &max_som_dist_j =
- som_depths[g[vj].index].max;
- const depth &min_som_dist_v =
- som_depths[g[v].index].min;
- if (max_som_dist_j > min_som_dist_v ||
- max_som_dist_j.is_infinite()) {
- /* j can't be used as it may be storing an earlier SOM */
- continue;
- }
- }
-
- const CharReach &crv = g[vj].char_reach;
-
- /* we also require that j's report information be a subset of ours
- */
- bool seen_special = false;
- for (auto w : adjacent_vertices_range(vj, g)) {
- if (is_special(w, g)) {
- if (!edge(v, w, g).second) {
- goto next_j;
- }
- seen_special = true;
- }
- }
-
- // FIXME: should be subset check?
- if (seen_special && g[vj].reports != reports) {
- continue;
- }
-
- /* ok we can use j */
- if ((crv & ~cr).none()) {
- NFAStateSet tmp(numStates);
- buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree,
- som, som_depths, region_map, cache);
- mask &= tmp;
- mask.reset(j);
- }
-
- next_j:;
- }
-
- mask.set(i); /* never clear ourselves */
-
- if ((~mask).any()) { // i.e. some bits unset in mask
- DEBUG_PRINTF("%u squashes %zu other states\n", i, (~mask).count());
- squash.emplace(v, mask);
- }
- }
-
- findDerivedSquashers(g, vByIndex, pdom_tree, initStates, &squash, som,
- som_depths, region_map, cache);
-
+ const CharReach &cr = g[v].char_reach;
+
+ /* only non-init cyclics can be squashers */
+ if (!hasSelfLoop(v, g) || initStates.test(i)) {
+ continue;
+ }
+
+ DEBUG_PRINTF("state %u is cyclic\n", i);
+
+ NFAStateSet mask(numStates), succ(numStates), pred(numStates);
+ buildSquashMask(mask, g, v, cr, initStates, vByIndex, pdom_tree, som,
+ som_depths, region_map, cache);
+ buildSucc(succ, g, v);
+ buildPred(pred, g, v);
+ const auto &reports = g[v].reports;
+
+ for (size_t j = succ.find_first(); j != succ.npos;
+ j = succ.find_next(j)) {
+ NFAVertex vj = vByIndex[j];
+ NFAStateSet pred2(numStates);
+ buildPred(pred2, g, vj);
+ if (pred2 == pred) {
+ DEBUG_PRINTF("adding the sm from %zu to %u's sm\n", j, i);
+ NFAStateSet tmp(numStates);
+ buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree,
+ som, som_depths, region_map, cache);
+ mask &= tmp;
+ }
+ }
+
+ for (size_t j = pred.find_first(); j != pred.npos;
+ j = pred.find_next(j)) {
+ NFAVertex vj = vByIndex[j];
+ NFAStateSet succ2(numStates);
+ buildSucc(succ2, g, vj);
+ /* we can use j as a basis for squashing if its succs are a subset
+ * of ours */
+ if ((succ2 & ~succ).any()) {
+ continue;
+ }
+
+ if (som) {
+ /* We cannot use j to add to the squash mask of v if it may
+ * have an earlier start of match offset. ie for us j as a
+ * basis for the squash mask of v we require:
+ * maxSomDist(j) <= minSomDist(v)
+ */
+
+ /* ** TODO ** */
+
+ const depth &max_som_dist_j =
+ som_depths[g[vj].index].max;
+ const depth &min_som_dist_v =
+ som_depths[g[v].index].min;
+ if (max_som_dist_j > min_som_dist_v ||
+ max_som_dist_j.is_infinite()) {
+ /* j can't be used as it may be storing an earlier SOM */
+ continue;
+ }
+ }
+
+ const CharReach &crv = g[vj].char_reach;
+
+ /* we also require that j's report information be a subset of ours
+ */
+ bool seen_special = false;
+ for (auto w : adjacent_vertices_range(vj, g)) {
+ if (is_special(w, g)) {
+ if (!edge(v, w, g).second) {
+ goto next_j;
+ }
+ seen_special = true;
+ }
+ }
+
+ // FIXME: should be subset check?
+ if (seen_special && g[vj].reports != reports) {
+ continue;
+ }
+
+ /* ok we can use j */
+ if ((crv & ~cr).none()) {
+ NFAStateSet tmp(numStates);
+ buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree,
+ som, som_depths, region_map, cache);
+ mask &= tmp;
+ mask.reset(j);
+ }
+
+ next_j:;
+ }
+
+ mask.set(i); /* never clear ourselves */
+
+ if ((~mask).any()) { // i.e. some bits unset in mask
+ DEBUG_PRINTF("%u squashes %zu other states\n", i, (~mask).count());
+ squash.emplace(v, mask);
+ }
+ }
+
+ findDerivedSquashers(g, vByIndex, pdom_tree, initStates, &squash, som,
+ som_depths, region_map, cache);
+
clearMutualSquashers(g, vByIndex, squash);
- return squash;
-}
-
-#define MIN_PURE_ACYCLIC_SQUASH 10 /** magic number */
-
-/** Some squash states are clearly not advantageous in the NFA, as they do
- * incur the cost of an exception:
- * -# acyclic states
- * -# squash only a few acyclic states
- */
-void filterSquashers(const NGHolder &g,
+ return squash;
+}
+
+#define MIN_PURE_ACYCLIC_SQUASH 10 /** magic number */
+
+/** Some squash states are clearly not advantageous in the NFA, as they do
+ * incur the cost of an exception:
+ * -# acyclic states
+ * -# squash only a few acyclic states
+ */
+void filterSquashers(const NGHolder &g,
unordered_map<NFAVertex, NFAStateSet> &squash) {
assert(hasCorrectlyNumberedVertices(g));
- DEBUG_PRINTF("filtering\n");
+ DEBUG_PRINTF("filtering\n");
vector<NFAVertex> rev(num_vertices(g)); /* vertex_index -> vertex */
- for (auto v : vertices_range(g)) {
- rev[g[v].index] = v;
- }
-
- for (auto v : vertices_range(g)) {
- if (!contains(squash, v)) {
- continue;
- }
+ for (auto v : vertices_range(g)) {
+ rev[g[v].index] = v;
+ }
+
+ for (auto v : vertices_range(g)) {
+ if (!contains(squash, v)) {
+ continue;
+ }
DEBUG_PRINTF("looking at squash set for vertex %zu\n", g[v].index);
-
- if (!hasSelfLoop(v, g)) {
- DEBUG_PRINTF("acyclic\n");
- squash.erase(v);
- continue;
- }
-
- NFAStateSet squashed = squash[v];
- squashed.flip(); /* default sense for mask of survivors */
+
+ if (!hasSelfLoop(v, g)) {
+ DEBUG_PRINTF("acyclic\n");
+ squash.erase(v);
+ continue;
+ }
+
+ NFAStateSet squashed = squash[v];
+ squashed.flip(); /* default sense for mask of survivors */
for (auto sq = squashed.find_first(); sq != squashed.npos;
sq = squashed.find_next(sq)) {
- NFAVertex u = rev[sq];
- if (hasSelfLoop(u, g)) {
- DEBUG_PRINTF("squashing a cyclic (%zu) is always good\n", sq);
- goto next_vertex;
- }
- }
-
- if (squashed.count() < MIN_PURE_ACYCLIC_SQUASH) {
- DEBUG_PRINTF("squash set too small\n");
- squash.erase(v);
- continue;
- }
-
- next_vertex:;
- DEBUG_PRINTF("squash set ok\n");
- }
-}
-
-static
-void getHighlanderReporters(const NGHolder &g, const NFAVertex accept,
- const ReportManager &rm,
- set<NFAVertex> &verts) {
- for (auto v : inv_adjacent_vertices_range(accept, g)) {
- if (v == g.accept) {
- continue;
- }
-
- const auto &reports = g[v].reports;
- if (reports.empty()) {
- assert(0);
- continue;
- }
-
- // Must be _all_ highlander callback reports.
- for (auto report : reports) {
- const Report &ir = rm.getReport(report);
- if (ir.ekey == INVALID_EKEY || ir.type != EXTERNAL_CALLBACK) {
- goto next_vertex;
- }
-
- // If there's any bounds, these are handled outside the NFA and
- // probably shouldn't be pre-empted.
- if (ir.hasBounds()) {
- goto next_vertex;
- }
- }
-
- verts.insert(v);
- next_vertex:
- continue;
- }
-}
-
-static
-void removeEdgesToAccept(NGHolder &g, NFAVertex v) {
- const auto &reports = g[v].reports;
- assert(!reports.empty());
-
- // We remove any accept edge with a non-empty subset of the reports of v.
-
- set<NFAEdge> dead;
-
- for (const auto &e : in_edges_range(g.accept, g)) {
- NFAVertex u = source(e, g);
- const auto &r = g[u].reports;
- if (!r.empty() && is_subset_of(r, reports)) {
+ NFAVertex u = rev[sq];
+ if (hasSelfLoop(u, g)) {
+ DEBUG_PRINTF("squashing a cyclic (%zu) is always good\n", sq);
+ goto next_vertex;
+ }
+ }
+
+ if (squashed.count() < MIN_PURE_ACYCLIC_SQUASH) {
+ DEBUG_PRINTF("squash set too small\n");
+ squash.erase(v);
+ continue;
+ }
+
+ next_vertex:;
+ DEBUG_PRINTF("squash set ok\n");
+ }
+}
+
+static
+void getHighlanderReporters(const NGHolder &g, const NFAVertex accept,
+ const ReportManager &rm,
+ set<NFAVertex> &verts) {
+ for (auto v : inv_adjacent_vertices_range(accept, g)) {
+ if (v == g.accept) {
+ continue;
+ }
+
+ const auto &reports = g[v].reports;
+ if (reports.empty()) {
+ assert(0);
+ continue;
+ }
+
+ // Must be _all_ highlander callback reports.
+ for (auto report : reports) {
+ const Report &ir = rm.getReport(report);
+ if (ir.ekey == INVALID_EKEY || ir.type != EXTERNAL_CALLBACK) {
+ goto next_vertex;
+ }
+
+ // If there's any bounds, these are handled outside the NFA and
+ // probably shouldn't be pre-empted.
+ if (ir.hasBounds()) {
+ goto next_vertex;
+ }
+ }
+
+ verts.insert(v);
+ next_vertex:
+ continue;
+ }
+}
+
+static
+void removeEdgesToAccept(NGHolder &g, NFAVertex v) {
+ const auto &reports = g[v].reports;
+ assert(!reports.empty());
+
+ // We remove any accept edge with a non-empty subset of the reports of v.
+
+ set<NFAEdge> dead;
+
+ for (const auto &e : in_edges_range(g.accept, g)) {
+ NFAVertex u = source(e, g);
+ const auto &r = g[u].reports;
+ if (!r.empty() && is_subset_of(r, reports)) {
DEBUG_PRINTF("vertex %zu\n", g[u].index);
- dead.insert(e);
- }
- }
-
- for (const auto &e : in_edges_range(g.acceptEod, g)) {
- NFAVertex u = source(e, g);
- const auto &r = g[u].reports;
- if (!r.empty() && is_subset_of(r, reports)) {
+ dead.insert(e);
+ }
+ }
+
+ for (const auto &e : in_edges_range(g.acceptEod, g)) {
+ NFAVertex u = source(e, g);
+ const auto &r = g[u].reports;
+ if (!r.empty() && is_subset_of(r, reports)) {
DEBUG_PRINTF("vertex %zu\n", g[u].index);
- dead.insert(e);
- }
- }
-
- assert(!dead.empty());
- remove_edges(dead, g);
-}
-
-static
-vector<NFAVertex> findUnreachable(const NGHolder &g) {
+ dead.insert(e);
+ }
+ }
+
+ assert(!dead.empty());
+ remove_edges(dead, g);
+}
+
+static
+vector<NFAVertex> findUnreachable(const NGHolder &g) {
const boost::reverse_graph<NGHolder, const NGHolder &> revg(g);
-
+
unordered_map<NFAVertex, boost::default_color_type> colours;
- colours.reserve(num_vertices(g));
-
- depth_first_visit(revg, g.acceptEod,
- make_dfs_visitor(boost::null_visitor()),
- make_assoc_property_map(colours));
-
- // Unreachable vertices are not in the colour map.
- vector<NFAVertex> unreach;
- for (auto v : vertices_range(revg)) {
- if (!contains(colours, v)) {
+ colours.reserve(num_vertices(g));
+
+ depth_first_visit(revg, g.acceptEod,
+ make_dfs_visitor(boost::null_visitor()),
+ make_assoc_property_map(colours));
+
+ // Unreachable vertices are not in the colour map.
+ vector<NFAVertex> unreach;
+ for (auto v : vertices_range(revg)) {
+ if (!contains(colours, v)) {
unreach.push_back(NFAVertex(v));
- }
- }
- return unreach;
-}
-
-/** Populates squash masks for states that can be switched off by highlander
- * (single match) reporters. */
+ }
+ }
+ return unreach;
+}
+
+/** Populates squash masks for states that can be switched off by highlander
+ * (single match) reporters. */
unordered_map<NFAVertex, NFAStateSet>
-findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) {
+findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) {
unordered_map<NFAVertex, NFAStateSet> squash;
-
- set<NFAVertex> verts;
- getHighlanderReporters(g, g.accept, rm, verts);
- getHighlanderReporters(g, g.acceptEod, rm, verts);
- if (verts.empty()) {
- DEBUG_PRINTF("no highlander reports\n");
- return squash;
- }
-
- const u32 numStates = num_vertices(g);
-
- for (auto v : verts) {
+
+ set<NFAVertex> verts;
+ getHighlanderReporters(g, g.accept, rm, verts);
+ getHighlanderReporters(g, g.acceptEod, rm, verts);
+ if (verts.empty()) {
+ DEBUG_PRINTF("no highlander reports\n");
+ return squash;
+ }
+
+ const u32 numStates = num_vertices(g);
+
+ for (auto v : verts) {
DEBUG_PRINTF("vertex %zu with %zu reports\n", g[v].index,
- g[v].reports.size());
-
- // Find the set of vertices that lead to v or any other reporter with a
- // subset of v's reports. We do this by creating a copy of the graph,
- // cutting the appropriate out-edges to accept and seeing which
- // vertices become unreachable.
-
+ g[v].reports.size());
+
+ // Find the set of vertices that lead to v or any other reporter with a
+ // subset of v's reports. We do this by creating a copy of the graph,
+ // cutting the appropriate out-edges to accept and seeing which
+ // vertices become unreachable.
+
unordered_map<NFAVertex, NFAVertex> orig_to_copy;
- NGHolder h;
- cloneHolder(h, g, &orig_to_copy);
- removeEdgesToAccept(h, orig_to_copy[v]);
-
- vector<NFAVertex> unreach = findUnreachable(h);
- DEBUG_PRINTF("can squash %zu vertices\n", unreach.size());
- if (unreach.empty()) {
- continue;
- }
-
- if (!contains(squash, v)) {
- squash[v] = NFAStateSet(numStates);
- squash[v].set();
- }
-
- NFAStateSet &mask = squash[v];
-
- for (auto uv : unreach) {
+ NGHolder h;
+ cloneHolder(h, g, &orig_to_copy);
+ removeEdgesToAccept(h, orig_to_copy[v]);
+
+ vector<NFAVertex> unreach = findUnreachable(h);
+ DEBUG_PRINTF("can squash %zu vertices\n", unreach.size());
+ if (unreach.empty()) {
+ continue;
+ }
+
+ if (!contains(squash, v)) {
+ squash[v] = NFAStateSet(numStates);
+ squash[v].set();
+ }
+
+ NFAStateSet &mask = squash[v];
+
+ for (auto uv : unreach) {
DEBUG_PRINTF("squashes index %zu\n", h[uv].index);
- mask.reset(h[uv].index);
- }
- }
-
- return squash;
-}
-
-} // namespace ue2
+ mask.reset(h[uv].index);
+ }
+ }
+
+ return squash;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_squash.h b/contrib/libs/hyperscan/src/nfagraph/ng_squash.h
index 16510ddd3a..489f541e84 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_squash.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_squash.h
@@ -1,72 +1,72 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief NFA graph state squashing analysis.
- */
-#ifndef NG_SQUASH_H
-#define NG_SQUASH_H
-
-#include "ng_holder.h"
-#include "som/som.h"
-#include "ue2common.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief NFA graph state squashing analysis.
+ */
+#ifndef NG_SQUASH_H
+#define NG_SQUASH_H
+
+#include "ng_holder.h"
+#include "som/som.h"
+#include "ue2common.h"
+
#include <unordered_map>
-#include <boost/dynamic_bitset.hpp>
-
-namespace ue2 {
-
-class NGHolder;
-class ReportManager;
-
+#include <boost/dynamic_bitset.hpp>
+
+namespace ue2 {
+
+class NGHolder;
+class ReportManager;
+
/**
* Dynamically-sized bitset, as an NFA can have an arbitrary number of states.
*/
using NFAStateSet = boost::dynamic_bitset<>;
-
-/**
- * Populates the squash mask for each vertex (i.e. the set of states to be left
- * on during squashing).
- *
- * The NFAStateSet in the output map is indexed by vertex_index.
- */
+
+/**
+ * Populates the squash mask for each vertex (i.e. the set of states to be left
+ * on during squashing).
+ *
+ * The NFAStateSet in the output map is indexed by vertex_index.
+ */
std::unordered_map<NFAVertex, NFAStateSet>
findSquashers(const NGHolder &g, som_type som = SOM_NONE);
-
-/** Filters out squash states intended only for use in DFA construction. */
-void filterSquashers(const NGHolder &g,
+
+/** Filters out squash states intended only for use in DFA construction. */
+void filterSquashers(const NGHolder &g,
std::unordered_map<NFAVertex, NFAStateSet> &squash);
-
-/** Populates squash masks for states that can be switched off by highlander
- * (single match) reporters. */
+
+/** Populates squash masks for states that can be switched off by highlander
+ * (single match) reporters. */
std::unordered_map<NFAVertex, NFAStateSet>
-findHighlanderSquashers(const NGHolder &g, const ReportManager &rm);
-
-} // namespace ue2
-
-#endif // NG_SQUASH_H
+findHighlanderSquashers(const NGHolder &g, const ReportManager &rm);
+
+} // namespace ue2
+
+#endif // NG_SQUASH_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_stop.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_stop.cpp
index 446c2ba317..5e627bb593 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_stop.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_stop.cpp
@@ -1,193 +1,193 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Stop Alphabet calculation.
- */
-#include "ng_stop.h"
-
-#include "ng_depth.h"
-#include "ng_holder.h"
-#include "ng_misc_opt.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "nfa/castlecompile.h"
-#include "som/som.h"
-#include "util/charreach.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-#include "util/verify_types.h"
-
-#include <map>
-#include <set>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-/** Stop alphabet depth threshold. */
-static const u32 MAX_STOP_DEPTH = 8;
-
-namespace {
-
-/** Depths from start, startDs for this graph. */
-struct InitDepths {
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Stop Alphabet calculation.
+ */
+#include "ng_stop.h"
+
+#include "ng_depth.h"
+#include "ng_holder.h"
+#include "ng_misc_opt.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "nfa/castlecompile.h"
+#include "som/som.h"
+#include "util/charreach.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+#include "util/verify_types.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+/** Stop alphabet depth threshold. */
+static const u32 MAX_STOP_DEPTH = 8;
+
+namespace {
+
+/** Depths from start, startDs for this graph. */
+struct InitDepths {
explicit InitDepths(const NGHolder &g)
: start(calcDepthsFrom(g, g.start)),
startDs(calcDepthsFrom(g, g.startDs)) {}
-
- depth maxDist(const NGHolder &g, NFAVertex v) const {
- u32 idx = g[v].index;
- assert(idx < start.size() && idx < startDs.size());
- const depth &d_start = start.at(idx).max;
- const depth &d_startDs = startDs.at(idx).max;
- if (d_start.is_unreachable()) {
- return d_startDs;
- } else if (d_startDs.is_unreachable()) {
- return d_start;
- }
- return max(d_start, d_startDs);
- }
-
-private:
- vector<DepthMinMax> start;
- vector<DepthMinMax> startDs;
-};
-
-} // namespace
-
-/** Find the set of characters that are not present in the reachability of
- * graph \p g after a certain depth (currently 8). If a character in this set
- * is encountered, it means that the NFA is either dead or has not progressed
+
+ depth maxDist(const NGHolder &g, NFAVertex v) const {
+ u32 idx = g[v].index;
+ assert(idx < start.size() && idx < startDs.size());
+ const depth &d_start = start.at(idx).max;
+ const depth &d_startDs = startDs.at(idx).max;
+ if (d_start.is_unreachable()) {
+ return d_startDs;
+ } else if (d_startDs.is_unreachable()) {
+ return d_start;
+ }
+ return max(d_start, d_startDs);
+ }
+
+private:
+ vector<DepthMinMax> start;
+ vector<DepthMinMax> startDs;
+};
+
+} // namespace
+
+/** Find the set of characters that are not present in the reachability of
+ * graph \p g after a certain depth (currently 8). If a character in this set
+ * is encountered, it means that the NFA is either dead or has not progressed
* more than 8 characters from its start states.
*
* This is only used to guide merging heuristics, use
* findLeftOffsetStopAlphabet for real uses.
*/
-CharReach findStopAlphabet(const NGHolder &g, som_type som) {
- const depth max_depth(MAX_STOP_DEPTH);
- const InitDepths depths(g);
- const map<NFAVertex, BoundedRepeatSummary> no_vertices;
-
- CharReach stopcr;
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
-
- if (depths.maxDist(g, v) >= max_depth) {
- if (som == SOM_NONE) {
- stopcr |= reduced_cr(v, g, no_vertices);
- } else {
- stopcr |= g[v].char_reach;
- }
- }
- }
-
- // Turn alphabet into stops.
- stopcr.flip();
-
- return stopcr;
-}
-
-/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then
- * build an eight-bit mask per character C, with each bit representing the
- * depth before the location of character C (if encountered) that the NFA would
- * be in a predictable start state. */
-vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som) {
- const depth max_depth(MAX_STOP_DEPTH);
- const InitDepths depths(g);
- const map<NFAVertex, BoundedRepeatSummary> no_vertices;
-
- vector<CharReach> reach(MAX_STOP_DEPTH);
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
- CharReach v_cr;
- if (som == SOM_NONE) {
- v_cr = reduced_cr(v, g, no_vertices);
- } else {
- v_cr = g[v].char_reach;
- }
-
- u32 d = min(max_depth, depths.maxDist(g, v));
- for (u32 i = 0; i < d; i++) {
- reach[i] |= v_cr;
- }
- }
-
-#ifdef DEBUG
- for (u32 i = 0; i < MAX_STOP_DEPTH; i++) {
- DEBUG_PRINTF("depth %u, stop chars: ", i);
- describeClass(stdout, ~reach[i], 20, CC_OUT_TEXT);
- printf("\n");
- }
-#endif
-
- vector<u8> stop(N_CHARS, 0);
-
- for (u32 i = 0; i < MAX_STOP_DEPTH; i++) {
- CharReach cr = ~reach[i]; // invert reach for stop chars.
- const u8 mask = 1U << i;
- for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
- stop[c] |= mask;
- }
- }
-
- return stop;
-}
-
-vector<u8> findLeftOffsetStopAlphabet(const CastleProto &castle,
- UNUSED som_type som) {
- const depth max_width = findMaxWidth(castle);
- DEBUG_PRINTF("castle has reach %s and max width %s\n",
- describeClass(castle.reach()).c_str(),
- max_width.str().c_str());
-
- const CharReach escape = ~castle.reach(); // invert reach for stop chars.
-
- u32 d = min(max_width, depth(MAX_STOP_DEPTH));
- const u8 mask = verify_u8((1U << d) - 1);
-
- vector<u8> stop(N_CHARS, 0);
-
- for (size_t c = escape.find_first(); c != escape.npos;
- c = escape.find_next(c)) {
- stop[c] |= mask;
- }
-
- return stop;
-}
-
-} // namespace ue2
+CharReach findStopAlphabet(const NGHolder &g, som_type som) {
+ const depth max_depth(MAX_STOP_DEPTH);
+ const InitDepths depths(g);
+ const map<NFAVertex, BoundedRepeatSummary> no_vertices;
+
+ CharReach stopcr;
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+
+ if (depths.maxDist(g, v) >= max_depth) {
+ if (som == SOM_NONE) {
+ stopcr |= reduced_cr(v, g, no_vertices);
+ } else {
+ stopcr |= g[v].char_reach;
+ }
+ }
+ }
+
+ // Turn alphabet into stops.
+ stopcr.flip();
+
+ return stopcr;
+}
+
+/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then
+ * build an eight-bit mask per character C, with each bit representing the
+ * depth before the location of character C (if encountered) that the NFA would
+ * be in a predictable start state. */
+vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som) {
+ const depth max_depth(MAX_STOP_DEPTH);
+ const InitDepths depths(g);
+ const map<NFAVertex, BoundedRepeatSummary> no_vertices;
+
+ vector<CharReach> reach(MAX_STOP_DEPTH);
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ CharReach v_cr;
+ if (som == SOM_NONE) {
+ v_cr = reduced_cr(v, g, no_vertices);
+ } else {
+ v_cr = g[v].char_reach;
+ }
+
+ u32 d = min(max_depth, depths.maxDist(g, v));
+ for (u32 i = 0; i < d; i++) {
+ reach[i] |= v_cr;
+ }
+ }
+
+#ifdef DEBUG
+ for (u32 i = 0; i < MAX_STOP_DEPTH; i++) {
+ DEBUG_PRINTF("depth %u, stop chars: ", i);
+ describeClass(stdout, ~reach[i], 20, CC_OUT_TEXT);
+ printf("\n");
+ }
+#endif
+
+ vector<u8> stop(N_CHARS, 0);
+
+ for (u32 i = 0; i < MAX_STOP_DEPTH; i++) {
+ CharReach cr = ~reach[i]; // invert reach for stop chars.
+ const u8 mask = 1U << i;
+ for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
+ stop[c] |= mask;
+ }
+ }
+
+ return stop;
+}
+
+vector<u8> findLeftOffsetStopAlphabet(const CastleProto &castle,
+ UNUSED som_type som) {
+ const depth max_width = findMaxWidth(castle);
+ DEBUG_PRINTF("castle has reach %s and max width %s\n",
+ describeClass(castle.reach()).c_str(),
+ max_width.str().c_str());
+
+ const CharReach escape = ~castle.reach(); // invert reach for stop chars.
+
+ u32 d = min(max_width, depth(MAX_STOP_DEPTH));
+ const u8 mask = verify_u8((1U << d) - 1);
+
+ vector<u8> stop(N_CHARS, 0);
+
+ for (size_t c = escape.find_first(); c != escape.npos;
+ c = escape.find_next(c)) {
+ stop[c] |= mask;
+ }
+
+ return stop;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_stop.h b/contrib/libs/hyperscan/src/nfagraph/ng_stop.h
index 8399047f7b..4a889dca09 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_stop.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_stop.h
@@ -1,66 +1,66 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Stop Alphabet calculation.
- */
-
-#ifndef NG_STOP_H
-#define NG_STOP_H
-
-#include "ue2common.h"
-#include "som/som.h"
-
-#include <vector>
-
-namespace ue2 {
-
-struct CastleProto;
-class CharReach;
-class NGHolder;
-
-/** Find the set of characters that are not present in the reachability of
- * graph \p g after a certain depth (currently 8). If a character in this set
- * is encountered, it means that the NFA is either dead or has not progressed
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Stop Alphabet calculation.
+ */
+
+#ifndef NG_STOP_H
+#define NG_STOP_H
+
+#include "ue2common.h"
+#include "som/som.h"
+
+#include <vector>
+
+namespace ue2 {
+
+struct CastleProto;
+class CharReach;
+class NGHolder;
+
+/** Find the set of characters that are not present in the reachability of
+ * graph \p g after a certain depth (currently 8). If a character in this set
+ * is encountered, it means that the NFA is either dead or has not progressed
* more than 8 characters from its start states.
*
* This is only used to guide merging heuristics, use
* findLeftOffsetStopAlphabet for real uses.
*/
-CharReach findStopAlphabet(const NGHolder &g, som_type som);
-
-/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then
- * build an eight-bit mask per character C, with each bit representing the
- * depth before the location of character C (if encountered) that the NFA would
- * be in a predictable start state. */
-std::vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som);
-std::vector<u8> findLeftOffsetStopAlphabet(const CastleProto &c, som_type som);
-
-} // namespace ue2
-
-#endif
+CharReach findStopAlphabet(const NGHolder &g, som_type som);
+
+/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then
+ * build an eight-bit mask per character C, with each bit representing the
+ * depth before the location of character C (if encountered) that the NFA would
+ * be in a predictable start state. */
+std::vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som);
+std::vector<u8> findLeftOffsetStopAlphabet(const CastleProto &c, som_type som);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.cpp
index 6c7259f717..4ad5ff7875 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.cpp
@@ -1,73 +1,73 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief NFA graph merging ("uncalc")
- *
- * The file contains our collection of NFA graph merging strategies.
- *
- * NFAGraph merging is generally guided by the length of the common prefix
- * between NFAGraph pairs.
- */
-#include "grey.h"
-#include "ng_holder.h"
-#include "ng_limex.h"
-#include "ng_redundancy.h"
-#include "ng_region.h"
-#include "ng_uncalc_components.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/graph_range.h"
-#include "util/ue2string.h"
-
-#include <algorithm>
-#include <deque>
-#include <map>
-#include <queue>
-#include <set>
-#include <vector>
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief NFA graph merging ("uncalc")
+ *
+ * The file contains our collection of NFA graph merging strategies.
+ *
+ * NFAGraph merging is generally guided by the length of the common prefix
+ * between NFAGraph pairs.
+ */
+#include "grey.h"
+#include "ng_holder.h"
+#include "ng_limex.h"
+#include "ng_redundancy.h"
+#include "ng_region.h"
+#include "ng_uncalc_components.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/graph_range.h"
+#include "util/ue2string.h"
+
+#include <algorithm>
+#include <deque>
+#include <map>
+#include <queue>
+#include <set>
+#include <vector>
+
#include <boost/range/adaptor/map.hpp>
-using namespace std;
+using namespace std;
using boost::adaptors::map_values;
-
-namespace ue2 {
-
-static const u32 FAST_STATE_LIMIT = 256; /**< largest possible desirable NFA */
-
-/** Sentinel value meaning no component has yet been selected. */
+
+namespace ue2 {
+
+static const u32 FAST_STATE_LIMIT = 256; /**< largest possible desirable NFA */
+
+/** Sentinel value meaning no component has yet been selected. */
static const u32 NO_COMPONENT = ~0U;
-
+
static const u32 UNUSED_STATE = ~0U;
-
+
namespace {
struct ranking_info {
explicit ranking_info(const NGHolder &h) : to_vertex(getTopoOrdering(h)) {
@@ -77,15 +77,15 @@ struct ranking_info {
for (NFAVertex v : to_vertex) {
to_rank[v] = rank++;
- }
+ }
for (NFAVertex v : vertices_range(h)) {
if (!contains(to_rank, v)) {
to_rank[v] = UNUSED_STATE;
}
}
- }
-
+ }
+
NFAVertex at(u32 ranking) const { return to_vertex.at(ranking); }
u32 get(NFAVertex v) const { return to_rank.at(v); }
u32 size() const { return (u32)to_vertex.size(); }
@@ -94,279 +94,279 @@ struct ranking_info {
to_rank[v] = rank;
to_vertex.push_back(v);
return rank;
- }
-
+ }
+
private:
vector<NFAVertex> to_vertex;
unordered_map<NFAVertex, u32> to_rank;
};
-}
-
-static never_inline
-bool cplVerticesMatch(const NGHolder &ga, NFAVertex va,
- const NGHolder &gb, NFAVertex vb) {
- // Must have the same reachability.
- if (ga[va].char_reach != gb[vb].char_reach) {
- return false;
- }
-
- // If they're start vertices, they must be the same one.
- if (is_any_start(va, ga) || is_any_start(vb, gb)) {
- if (ga[va].index != gb[vb].index) {
- return false;
- }
- }
-
- bool va_accept = edge(va, ga.accept, ga).second;
- bool vb_accept = edge(vb, gb.accept, gb).second;
- bool va_acceptEod = edge(va, ga.acceptEod, ga).second;
- bool vb_acceptEod = edge(vb, gb.acceptEod, gb).second;
-
- // Must have the same accept/acceptEod edges.
- if (va_accept != vb_accept || va_acceptEod != vb_acceptEod) {
- return false;
- }
-
- return true;
-}
-
-static never_inline
+}
+
+static never_inline
+bool cplVerticesMatch(const NGHolder &ga, NFAVertex va,
+ const NGHolder &gb, NFAVertex vb) {
+ // Must have the same reachability.
+ if (ga[va].char_reach != gb[vb].char_reach) {
+ return false;
+ }
+
+ // If they're start vertices, they must be the same one.
+ if (is_any_start(va, ga) || is_any_start(vb, gb)) {
+ if (ga[va].index != gb[vb].index) {
+ return false;
+ }
+ }
+
+ bool va_accept = edge(va, ga.accept, ga).second;
+ bool vb_accept = edge(vb, gb.accept, gb).second;
+ bool va_acceptEod = edge(va, ga.acceptEod, ga).second;
+ bool vb_acceptEod = edge(vb, gb.acceptEod, gb).second;
+
+ // Must have the same accept/acceptEod edges.
+ if (va_accept != vb_accept || va_acceptEod != vb_acceptEod) {
+ return false;
+ }
+
+ return true;
+}
+
+static never_inline
u32 cplCommonReachAndSimple(const NGHolder &ga, const ranking_info &a_ranking,
const NGHolder &gb, const ranking_info &b_ranking) {
u32 ml = min(a_ranking.size(), b_ranking.size());
- if (ml > 65535) {
- ml = 65535;
- }
-
- // Count the number of common vertices which share reachability, report and
- // "startedness" properties.
- u32 max = 0;
- for (; max < ml; max++) {
+ if (ml > 65535) {
+ ml = 65535;
+ }
+
+ // Count the number of common vertices which share reachability, report and
+ // "startedness" properties.
+ u32 max = 0;
+ for (; max < ml; max++) {
if (!cplVerticesMatch(ga, a_ranking.at(max), gb, b_ranking.at(max))) {
- break;
- }
- }
-
- return max;
-}
-
+ break;
+ }
+ }
+
+ return max;
+}
+
static
u32 commonPrefixLength(const NGHolder &ga, const ranking_info &a_ranking,
const NGHolder &gb, const ranking_info &b_ranking) {
- /* upper bound on the common region based on local properties */
+ /* upper bound on the common region based on local properties */
u32 max = cplCommonReachAndSimple(ga, a_ranking, gb, b_ranking);
- DEBUG_PRINTF("cpl upper bound %u\n", max);
-
- while (max > 0) {
- /* shrink max region based on in-edges from outside the region */
- for (size_t j = max; j > 0; j--) {
+ DEBUG_PRINTF("cpl upper bound %u\n", max);
+
+ while (max > 0) {
+ /* shrink max region based on in-edges from outside the region */
+ for (size_t j = max; j > 0; j--) {
NFAVertex a_v = a_ranking.at(j - 1);
NFAVertex b_v = b_ranking.at(j - 1);
for (auto u : inv_adjacent_vertices_range(a_v, ga)) {
u32 state_id = a_ranking.get(u);
if (state_id != UNUSED_STATE && state_id >= max) {
- max = j - 1;
- DEBUG_PRINTF("lowering max to %u\n", max);
- goto next_vertex;
- }
- }
-
+ max = j - 1;
+ DEBUG_PRINTF("lowering max to %u\n", max);
+ goto next_vertex;
+ }
+ }
+
for (auto u : inv_adjacent_vertices_range(b_v, gb)) {
u32 state_id = b_ranking.get(u);
if (state_id != UNUSED_STATE && state_id >= max) {
- max = j - 1;
- DEBUG_PRINTF("lowering max to %u\n", max);
- goto next_vertex;
- }
- }
-
- next_vertex:;
- }
-
- /* Ensure that every pair of vertices has same out-edges to vertices in
- the region. */
+ max = j - 1;
+ DEBUG_PRINTF("lowering max to %u\n", max);
+ goto next_vertex;
+ }
+ }
+
+ next_vertex:;
+ }
+
+ /* Ensure that every pair of vertices has same out-edges to vertices in
+ the region. */
for (size_t i = 0; i < max; i++) {
- size_t a_count = 0;
- size_t b_count = 0;
-
+ size_t a_count = 0;
+ size_t b_count = 0;
+
for (NFAEdge a_edge : out_edges_range(a_ranking.at(i), ga)) {
u32 sid = a_ranking.get(target(a_edge, ga));
if (sid == UNUSED_STATE || sid >= max) {
- continue;
- }
-
- a_count++;
-
+ continue;
+ }
+
+ a_count++;
+
NFAEdge b_edge = edge(b_ranking.at(i), b_ranking.at(sid), gb);
-
+
if (!b_edge) {
- max = i;
- DEBUG_PRINTF("lowering max to %u due to edge %zu->%u\n",
- max, i, sid);
+ max = i;
+ DEBUG_PRINTF("lowering max to %u due to edge %zu->%u\n",
+ max, i, sid);
goto try_smaller;
- }
-
+ }
+
if (ga[a_edge].tops != gb[b_edge].tops) {
- max = i;
+ max = i;
DEBUG_PRINTF("tops don't match on edge %zu->%u\n", i, sid);
goto try_smaller;
- }
- }
-
+ }
+ }
+
for (NFAVertex b_v : adjacent_vertices_range(b_ranking.at(i), gb)) {
u32 sid = b_ranking.get(b_v);
if (sid == UNUSED_STATE || sid >= max) {
- continue;
- }
-
- b_count++;
- }
-
- if (a_count != b_count) {
- max = i;
+ continue;
+ }
+
+ b_count++;
+ }
+
+ if (a_count != b_count) {
+ max = i;
DEBUG_PRINTF("lowering max to %u due to a,b count (a_count=%zu,"
" b_count=%zu)\n", max, a_count, b_count);
goto try_smaller;
- }
- }
-
+ }
+ }
+
DEBUG_PRINTF("survived checks, returning cpl %u\n", max);
return max;
try_smaller:;
- }
-
- DEBUG_PRINTF("failed to find any common region\n");
- return 0;
-}
-
+ }
+
+ DEBUG_PRINTF("failed to find any common region\n");
+ return 0;
+}
+
u32 commonPrefixLength(const NGHolder &ga, const NGHolder &gb) {
return commonPrefixLength(ga, ranking_info(ga), gb, ranking_info(gb));
}
-static never_inline
+static never_inline
void mergeNfaComponent(NGHolder &dest, const NGHolder &vic, size_t common_len) {
assert(&dest != &vic);
auto dest_info = ranking_info(dest);
auto vic_info = ranking_info(vic);
- map<NFAVertex, NFAVertex> vmap; // vic -> dest
-
- vmap[vic.start] = dest.start;
- vmap[vic.startDs] = dest.startDs;
- vmap[vic.accept] = dest.accept;
- vmap[vic.acceptEod] = dest.acceptEod;
+ map<NFAVertex, NFAVertex> vmap; // vic -> dest
+
+ vmap[vic.start] = dest.start;
+ vmap[vic.startDs] = dest.startDs;
+ vmap[vic.accept] = dest.accept;
+ vmap[vic.acceptEod] = dest.acceptEod;
vmap[NGHolder::null_vertex()] = NGHolder::null_vertex();
-
- // For vertices in the common len, add to vmap and merge in the reports, if
- // any.
- for (u32 i = 0; i < common_len; i++) {
+
+ // For vertices in the common len, add to vmap and merge in the reports, if
+ // any.
+ for (u32 i = 0; i < common_len; i++) {
NFAVertex v_old = vic_info.at(i);
NFAVertex v = dest_info.at(i);
- vmap[v_old] = v;
-
- const auto &reports = vic[v_old].reports;
- dest[v].reports.insert(reports.begin(), reports.end());
- }
-
+ vmap[v_old] = v;
+
+ const auto &reports = vic[v_old].reports;
+ dest[v].reports.insert(reports.begin(), reports.end());
+ }
+
// Add in vertices beyond the common len
for (u32 i = common_len; i < vic_info.size(); i++) {
NFAVertex v_old = vic_info.at(i);
-
- if (is_special(v_old, vic)) {
- // Dest already has start vertices, just merge the reports.
- u32 idx = vic[v_old].index;
- NFAVertex v = dest.getSpecialVertex(idx);
- const auto &reports = vic[v_old].reports;
- dest[v].reports.insert(reports.begin(), reports.end());
- continue;
- }
-
- NFAVertex v = add_vertex(vic[v_old], dest);
+
+ if (is_special(v_old, vic)) {
+ // Dest already has start vertices, just merge the reports.
+ u32 idx = vic[v_old].index;
+ NFAVertex v = dest.getSpecialVertex(idx);
+ const auto &reports = vic[v_old].reports;
+ dest[v].reports.insert(reports.begin(), reports.end());
+ continue;
+ }
+
+ NFAVertex v = add_vertex(vic[v_old], dest);
dest_info.add_to_tail(v);
- vmap[v_old] = v;
- }
-
- /* add edges */
- DEBUG_PRINTF("common_len=%zu\n", common_len);
- for (const auto &e : edges_range(vic)) {
+ vmap[v_old] = v;
+ }
+
+ /* add edges */
+ DEBUG_PRINTF("common_len=%zu\n", common_len);
+ for (const auto &e : edges_range(vic)) {
NFAVertex u_old = source(e, vic);
NFAVertex v_old = target(e, vic);
NFAVertex u = vmap[u_old];
NFAVertex v = vmap[v_old];
- bool uspecial = is_special(u, dest);
- bool vspecial = is_special(v, dest);
-
- // Skip stylised edges that are already present.
- if (uspecial && vspecial && edge(u, v, dest).second) {
- continue;
- }
-
- // We're in the common region if v's state ID is low enough, unless v
- // is a special (an accept), in which case we use u's state ID.
+ bool uspecial = is_special(u, dest);
+ bool vspecial = is_special(v, dest);
+
+ // Skip stylised edges that are already present.
+ if (uspecial && vspecial && edge(u, v, dest).second) {
+ continue;
+ }
+
+ // We're in the common region if v's state ID is low enough, unless v
+ // is a special (an accept), in which case we use u's state ID.
bool in_common_region = dest_info.get(v) < common_len;
if (vspecial && dest_info.get(u) < common_len) {
- in_common_region = true;
- }
-
+ in_common_region = true;
+ }
+
DEBUG_PRINTF("adding idx=%zu (state %u) -> idx=%zu (state %u)%s\n",
dest[u].index, dest_info.get(u),
dest[v].index, dest_info.get(v),
- in_common_region ? " [common]" : "");
-
- if (in_common_region) {
- if (!is_special(v, dest)) {
- DEBUG_PRINTF("skipping common edge\n");
- assert(edge(u, v, dest).second);
- // Should never merge edges with different top values.
+ in_common_region ? " [common]" : "");
+
+ if (in_common_region) {
+ if (!is_special(v, dest)) {
+ DEBUG_PRINTF("skipping common edge\n");
+ assert(edge(u, v, dest).second);
+ // Should never merge edges with different top values.
assert(vic[e].tops == dest[edge(u, v, dest)].tops);
- continue;
- } else {
- assert(is_any_accept(v, dest));
- // If the edge exists in both graphs, skip it.
- if (edge(u, v, dest).second) {
- DEBUG_PRINTF("skipping common edge to accept\n");
- continue;
- }
- }
- }
-
- assert(!edge(u, v, dest).second);
- add_edge(u, v, vic[e], dest);
- }
-
+ continue;
+ } else {
+ assert(is_any_accept(v, dest));
+ // If the edge exists in both graphs, skip it.
+ if (edge(u, v, dest).second) {
+ DEBUG_PRINTF("skipping common edge to accept\n");
+ continue;
+ }
+ }
+ }
+
+ assert(!edge(u, v, dest).second);
+ add_edge(u, v, vic[e], dest);
+ }
+
renumber_edges(dest);
renumber_vertices(dest);
-}
-
-namespace {
-struct NfaMergeCandidateH {
- NfaMergeCandidateH(size_t cpl_in, NGHolder *first_in, NGHolder *second_in,
- u32 tb_in)
- : cpl(cpl_in), first(first_in), second(second_in), tie_breaker(tb_in) {}
-
- size_t cpl; //!< common prefix length
- NGHolder *first; //!< first component to merge
- NGHolder *second; //!< second component to merge
- u32 tie_breaker; //!< for determinism
-
- bool operator<(const NfaMergeCandidateH &other) const {
- if (cpl != other.cpl) {
- return cpl < other.cpl;
- } else {
- return tie_breaker < other.tie_breaker;
- }
- }
-};
-
-} // end namespace
-
-/** Returns true if graphs \p h1 and \p h2 can (and should) be merged. */
-static
+}
+
+namespace {
+struct NfaMergeCandidateH {
+ NfaMergeCandidateH(size_t cpl_in, NGHolder *first_in, NGHolder *second_in,
+ u32 tb_in)
+ : cpl(cpl_in), first(first_in), second(second_in), tie_breaker(tb_in) {}
+
+ size_t cpl; //!< common prefix length
+ NGHolder *first; //!< first component to merge
+ NGHolder *second; //!< second component to merge
+ u32 tie_breaker; //!< for determinism
+
+ bool operator<(const NfaMergeCandidateH &other) const {
+ if (cpl != other.cpl) {
+ return cpl < other.cpl;
+ } else {
+ return tie_breaker < other.tie_breaker;
+ }
+ }
+};
+
+} // end namespace
+
+/** Returns true if graphs \p h1 and \p h2 can (and should) be merged. */
+static
bool shouldMerge(const NGHolder &ha, const NGHolder &hb, size_t cpl,
const ReportManager *rm, const CompileContext &cc) {
size_t combinedStateCount = num_vertices(ha) + num_vertices(hb) - cpl;
-
+
combinedStateCount -= 2 * 2; /* discount accepts from both */
if (is_triggered(ha)) {
@@ -377,130 +377,130 @@ bool shouldMerge(const NGHolder &ha, const NGHolder &hb, size_t cpl,
combinedStateCount += tops.size();
}
- if (combinedStateCount > FAST_STATE_LIMIT) {
- // More complex implementability check.
- NGHolder h_temp;
- cloneHolder(h_temp, ha);
- assert(h_temp.kind == hb.kind);
- mergeNfaComponent(h_temp, hb, cpl);
- reduceImplementableGraph(h_temp, SOM_NONE, rm, cc);
- u32 numStates = isImplementableNFA(h_temp, rm, cc);
- DEBUG_PRINTF("isImplementableNFA returned %u states\n", numStates);
- if (!numStates) {
- DEBUG_PRINTF("not implementable\n");
- return false;
- } else if (numStates > FAST_STATE_LIMIT) {
- DEBUG_PRINTF("too many states to merge\n");
- return false;
- }
- }
-
- return true;
-}
-
-/** Returns true if the graph has start vertices that are compatible for
- * merging. Rose may generate all sorts of wacky vacuous cases, and the merge
- * code isn't currently up to handling them. */
-static
-bool compatibleStarts(const NGHolder &ga, const NGHolder &gb) {
- // Start and startDs must have the same self-loops.
- return (edge(ga.startDs, ga.startDs, ga).second ==
- edge(gb.startDs, gb.startDs, gb).second) &&
- (edge(ga.start, ga.start, ga).second ==
- edge(gb.start, gb.start, gb).second);
-}
-
-static never_inline
-void buildNfaMergeQueue(const vector<NGHolder *> &cluster,
- priority_queue<NfaMergeCandidateH> *pq) {
- const size_t cs = cluster.size();
- assert(cs < NO_COMPONENT);
-
- // First, make sure all holders have numbered states and collect their
- // counts.
+ if (combinedStateCount > FAST_STATE_LIMIT) {
+ // More complex implementability check.
+ NGHolder h_temp;
+ cloneHolder(h_temp, ha);
+ assert(h_temp.kind == hb.kind);
+ mergeNfaComponent(h_temp, hb, cpl);
+ reduceImplementableGraph(h_temp, SOM_NONE, rm, cc);
+ u32 numStates = isImplementableNFA(h_temp, rm, cc);
+ DEBUG_PRINTF("isImplementableNFA returned %u states\n", numStates);
+ if (!numStates) {
+ DEBUG_PRINTF("not implementable\n");
+ return false;
+ } else if (numStates > FAST_STATE_LIMIT) {
+ DEBUG_PRINTF("too many states to merge\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/** Returns true if the graph has start vertices that are compatible for
+ * merging. Rose may generate all sorts of wacky vacuous cases, and the merge
+ * code isn't currently up to handling them. */
+static
+bool compatibleStarts(const NGHolder &ga, const NGHolder &gb) {
+ // Start and startDs must have the same self-loops.
+ return (edge(ga.startDs, ga.startDs, ga).second ==
+ edge(gb.startDs, gb.startDs, gb).second) &&
+ (edge(ga.start, ga.start, ga).second ==
+ edge(gb.start, gb.start, gb).second);
+}
+
+static never_inline
+void buildNfaMergeQueue(const vector<NGHolder *> &cluster,
+ priority_queue<NfaMergeCandidateH> *pq) {
+ const size_t cs = cluster.size();
+ assert(cs < NO_COMPONENT);
+
+ // First, make sure all holders have numbered states and collect their
+ // counts.
vector<ranking_info> states_map;
states_map.reserve(cs);
- for (size_t i = 0; i < cs; i++) {
- assert(cluster[i]);
+ for (size_t i = 0; i < cs; i++) {
+ assert(cluster[i]);
assert(states_map.size() == i);
const NGHolder &g = *(cluster[i]);
states_map.emplace_back(g);
- }
-
- vector<u16> seen_cpl(cs * cs, 0);
- vector<u32> best_comp(cs, NO_COMPONENT);
-
- /* TODO: understand, explain */
- for (u32 ci = 0; ci < cs; ci++) {
- for (u32 cj = ci + 1; cj < cs; cj++) {
- u16 cpl = 0;
- bool calc = false;
-
- if (best_comp[ci] != NO_COMPONENT) {
- u32 bc = best_comp[ci];
- if (seen_cpl[bc + cs * cj] < seen_cpl[bc + cs * ci]) {
- cpl = seen_cpl[bc + cs * cj];
- DEBUG_PRINTF("using cached cpl from %u %u\n", bc, cpl);
- calc = true;
- }
- }
-
- if (!calc && best_comp[cj] != NO_COMPONENT) {
- u32 bc = best_comp[cj];
- if (seen_cpl[bc + cs * ci] < seen_cpl[bc + cs * cj]) {
- cpl = seen_cpl[bc + cs * ci];
- DEBUG_PRINTF("using cached cpl from %u %u\n", bc, cpl);
- calc = true;
- }
- }
-
- NGHolder &g_i = *(cluster[ci]);
- NGHolder &g_j = *(cluster[cj]);
-
- if (!compatibleStarts(g_i, g_j)) {
- continue;
- }
-
- if (!calc) {
- cpl = commonPrefixLength(g_i, states_map[ci],
- g_j, states_map[cj]);
- }
-
- seen_cpl[ci + cs * cj] = cpl;
- seen_cpl[cj + cs * ci] = cpl;
-
- if (best_comp[cj] == NO_COMPONENT
- || seen_cpl[best_comp[cj] + cs * cj] < cpl) {
- best_comp[cj] = ci;
- }
-
- DEBUG_PRINTF("cpl %u %u = %u\n", ci, cj, cpl);
-
- pq->push(NfaMergeCandidateH(cpl, cluster[ci], cluster[cj],
- ci * cs + cj));
- }
- }
-}
-
+ }
+
+ vector<u16> seen_cpl(cs * cs, 0);
+ vector<u32> best_comp(cs, NO_COMPONENT);
+
+ /* TODO: understand, explain */
+ for (u32 ci = 0; ci < cs; ci++) {
+ for (u32 cj = ci + 1; cj < cs; cj++) {
+ u16 cpl = 0;
+ bool calc = false;
+
+ if (best_comp[ci] != NO_COMPONENT) {
+ u32 bc = best_comp[ci];
+ if (seen_cpl[bc + cs * cj] < seen_cpl[bc + cs * ci]) {
+ cpl = seen_cpl[bc + cs * cj];
+ DEBUG_PRINTF("using cached cpl from %u %u\n", bc, cpl);
+ calc = true;
+ }
+ }
+
+ if (!calc && best_comp[cj] != NO_COMPONENT) {
+ u32 bc = best_comp[cj];
+ if (seen_cpl[bc + cs * ci] < seen_cpl[bc + cs * cj]) {
+ cpl = seen_cpl[bc + cs * ci];
+ DEBUG_PRINTF("using cached cpl from %u %u\n", bc, cpl);
+ calc = true;
+ }
+ }
+
+ NGHolder &g_i = *(cluster[ci]);
+ NGHolder &g_j = *(cluster[cj]);
+
+ if (!compatibleStarts(g_i, g_j)) {
+ continue;
+ }
+
+ if (!calc) {
+ cpl = commonPrefixLength(g_i, states_map[ci],
+ g_j, states_map[cj]);
+ }
+
+ seen_cpl[ci + cs * cj] = cpl;
+ seen_cpl[cj + cs * ci] = cpl;
+
+ if (best_comp[cj] == NO_COMPONENT
+ || seen_cpl[best_comp[cj] + cs * cj] < cpl) {
+ best_comp[cj] = ci;
+ }
+
+ DEBUG_PRINTF("cpl %u %u = %u\n", ci, cj, cpl);
+
+ pq->push(NfaMergeCandidateH(cpl, cluster[ci], cluster[cj],
+ ci * cs + cj));
+ }
+ }
+}
+
/**
* True if the graphs have mergeable starts.
*
* Nowadays, this means that any vacuous edges must have the same tops. In
* addition, mixed-accept cases need to have matching reports.
*/
-static
-bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) {
+static
+bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) {
if (!isVacuous(h1) || !isVacuous(h2)) {
return true;
}
-
+
// Vacuous edges from startDs should not occur: we have better ways to
// implement true dot-star relationships. Just in case they do, ban them
// from being merged unless they have identical reports.
if (is_match_vertex(h1.startDs, h1) || is_match_vertex(h2.startDs, h2)) {
assert(0);
return false;
- }
+ }
/* TODO: relax top checks if reports match */
@@ -509,88 +509,88 @@ bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) {
NFAEdge e2_accept = edge(h2.start, h2.accept, h2);
if (e1_accept && e2_accept && h1[e1_accept].tops != h2[e2_accept].tops) {
return false;
- }
-
+ }
+
// If both graphs have edge (start, acceptEod), the tops must match.
NFAEdge e1_eod = edge(h1.start, h1.acceptEod, h1);
NFAEdge e2_eod = edge(h2.start, h2.acceptEod, h2);
if (e1_eod && e2_eod && h1[e1_eod].tops != h2[e2_eod].tops) {
return false;
}
-
+
// If one graph has an edge to accept and the other has an edge to
// acceptEod, the reports must match for the merge to be safe.
if ((e1_accept && e2_eod) || (e2_accept && e1_eod)) {
if (h1[h1.start].reports != h2[h2.start].reports) {
- return false;
- }
- }
-
- return true;
-}
-
-/** Merge graph \p ga into graph \p gb. Returns false on failure. */
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/** Merge graph \p ga into graph \p gb. Returns false on failure. */
bool mergeNfaPair(const NGHolder &ga, NGHolder &gb, const ReportManager *rm,
- const CompileContext &cc) {
- assert(ga.kind == gb.kind);
-
+ const CompileContext &cc) {
+ assert(ga.kind == gb.kind);
+
// Vacuous NFAs require special checks on their starts to ensure that tops
// match, and that reports match for mixed-accept cases.
- if (!mergeableStarts(ga, gb)) {
- DEBUG_PRINTF("starts aren't mergeable\n");
- return false;
- }
-
+ if (!mergeableStarts(ga, gb)) {
+ DEBUG_PRINTF("starts aren't mergeable\n");
+ return false;
+ }
+
u32 cpl = commonPrefixLength(ga, gb);
if (!shouldMerge(gb, ga, cpl, rm, cc)) {
- return false;
- }
-
- mergeNfaComponent(gb, ga, cpl);
- reduceImplementableGraph(gb, SOM_NONE, rm, cc);
- return true;
-}
-
+ return false;
+ }
+
+ mergeNfaComponent(gb, ga, cpl);
+ reduceImplementableGraph(gb, SOM_NONE, rm, cc);
+ return true;
+}
+
map<NGHolder *, NGHolder *> mergeNfaCluster(const vector<NGHolder *> &cluster,
const ReportManager *rm,
const CompileContext &cc) {
map<NGHolder *, NGHolder *> merged;
- if (cluster.size() < 2) {
+ if (cluster.size() < 2) {
return merged;
- }
-
- DEBUG_PRINTF("new cluster, size %zu\n", cluster.size());
-
- priority_queue<NfaMergeCandidateH> pq;
- buildNfaMergeQueue(cluster, &pq);
-
- while (!pq.empty()) {
- NGHolder &pholder = *pq.top().first;
- NGHolder &vholder = *pq.top().second;
- pq.pop();
-
- if (contains(merged, &pholder) || contains(merged, &vholder)) {
- DEBUG_PRINTF("dead\n");
- continue;
- }
-
- if (!mergeNfaPair(vholder, pholder, rm, cc)) {
- DEBUG_PRINTF("merge failed\n");
- continue;
- }
-
- merged.emplace(&vholder, &pholder);
-
- // Seek closure.
- for (auto &m : merged) {
- if (m.second == &vholder) {
- m.second = &pholder;
- }
- }
- }
+ }
+
+ DEBUG_PRINTF("new cluster, size %zu\n", cluster.size());
+
+ priority_queue<NfaMergeCandidateH> pq;
+ buildNfaMergeQueue(cluster, &pq);
+
+ while (!pq.empty()) {
+ NGHolder &pholder = *pq.top().first;
+ NGHolder &vholder = *pq.top().second;
+ pq.pop();
+
+ if (contains(merged, &pholder) || contains(merged, &vholder)) {
+ DEBUG_PRINTF("dead\n");
+ continue;
+ }
+
+ if (!mergeNfaPair(vholder, pholder, rm, cc)) {
+ DEBUG_PRINTF("merge failed\n");
+ continue;
+ }
+
+ merged.emplace(&vholder, &pholder);
+
+ // Seek closure.
+ for (auto &m : merged) {
+ if (m.second == &vholder) {
+ m.second = &pholder;
+ }
+ }
+ }
return merged;
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.h b/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.h
index 57bb242289..b0f42670a3 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_uncalc_components.h
@@ -1,74 +1,74 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief NFA graph merging ("uncalc")
- */
-
-#ifndef NG_UNCALC_COMPONENTS_H
-#define NG_UNCALC_COMPONENTS_H
-
-#include <map>
-#include <vector>
-
-namespace ue2 {
-
-struct CompileContext;
-struct Grey;
-class NGHolder;
-class ReportManager;
-
-/**
- * \brief Returns the common prefix length for a pair of graphs.
- *
- * The CPL is calculated based the topological ordering given by the state
- * indices for each graph.
- */
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief NFA graph merging ("uncalc")
+ */
+
+#ifndef NG_UNCALC_COMPONENTS_H
+#define NG_UNCALC_COMPONENTS_H
+
+#include <map>
+#include <vector>
+
+namespace ue2 {
+
+struct CompileContext;
+struct Grey;
+class NGHolder;
+class ReportManager;
+
+/**
+ * \brief Returns the common prefix length for a pair of graphs.
+ *
+ * The CPL is calculated based the topological ordering given by the state
+ * indices for each graph.
+ */
u32 commonPrefixLength(const NGHolder &ga, const NGHolder &gb);
-
-/**
- * \brief Merge the group of graphs in \p cluster where possible.
- *
+
+/**
+ * \brief Merge the group of graphs in \p cluster where possible.
+ *
* The (from, to) mapping of merged graphs is returned.
- */
+ */
std::map<NGHolder *, NGHolder *>
mergeNfaCluster(const std::vector<NGHolder *> &cluster, const ReportManager *rm,
const CompileContext &cc);
-
-/**
- * \brief Merge graph \p ga into graph \p gb.
- *
- * Returns false on failure. On success, \p gb is reduced via \ref
- * reduceImplementableGraph and renumbered.
- */
+
+/**
+ * \brief Merge graph \p ga into graph \p gb.
+ *
+ * Returns false on failure. On success, \p gb is reduced via \ref
+ * reduceImplementableGraph and renumbered.
+ */
bool mergeNfaPair(const NGHolder &ga, NGHolder &gb, const ReportManager *rm,
- const CompileContext &cc);
-
-} // namespace ue2
-
-#endif
+ const CompileContext &cc);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_utf8.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_utf8.cpp
index a9afaa304d..89500fe39e 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_utf8.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_utf8.cpp
@@ -1,303 +1,303 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief UTF-8 transforms and operations.
- */
-#include "ng_utf8.h"
-
-#include "ng.h"
-#include "ng_prune.h"
-#include "ng_util.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief UTF-8 transforms and operations.
+ */
+#include "ng_utf8.h"
+
+#include "ng.h"
+#include "ng_prune.h"
+#include "ng_util.h"
#include "compiler/compiler.h"
-#include "util/graph_range.h"
-#include "util/unicode_def.h"
-
-#include <set>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-static
+#include "util/graph_range.h"
+#include "util/unicode_def.h"
+
+#include <set>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+static
void allowIllegal(NGHolder &g, NFAVertex v, u8 pred_char) {
if (in_degree(v, g) != 1) {
- DEBUG_PRINTF("unexpected pred\n");
- assert(0); /* should be true due to the early stage of this analysis */
- return;
- }
-
+ DEBUG_PRINTF("unexpected pred\n");
+ assert(0); /* should be true due to the early stage of this analysis */
+ return;
+ }
+
CharReach &cr = g[v].char_reach;
- if (pred_char == 0xe0) {
- assert(cr.isSubsetOf(CharReach(0xa0, 0xbf)));
- if (cr == CharReach(0xa0, 0xbf)) {
- cr |= CharReach(0x80, 0x9f);
- }
- } else if (pred_char == 0xf0) {
- assert(cr.isSubsetOf(CharReach(0x90, 0xbf)));
- if (cr == CharReach(0x90, 0xbf)) {
- cr |= CharReach(0x80, 0x8f);
- }
- } else if (pred_char == 0xf4) {
- assert(cr.isSubsetOf(CharReach(0x80, 0x8f)));
- if (cr == CharReach(0x80, 0x8f)) {
- cr |= CharReach(0x90, 0xbf);
- }
- } else {
- assert(0); /* unexpected pred */
- }
-}
-
-/** \brief Relax forbidden UTF-8 sequences.
- *
- * Some byte sequences can not appear in valid UTF-8 as they encode code points
- * above \\x{10ffff} or they represent overlong encodings. As we require valid
- * UTF-8 input, we have no defined behaviour in these cases, as a result we can
- * accept them if it simplifies the graph. */
+ if (pred_char == 0xe0) {
+ assert(cr.isSubsetOf(CharReach(0xa0, 0xbf)));
+ if (cr == CharReach(0xa0, 0xbf)) {
+ cr |= CharReach(0x80, 0x9f);
+ }
+ } else if (pred_char == 0xf0) {
+ assert(cr.isSubsetOf(CharReach(0x90, 0xbf)));
+ if (cr == CharReach(0x90, 0xbf)) {
+ cr |= CharReach(0x80, 0x8f);
+ }
+ } else if (pred_char == 0xf4) {
+ assert(cr.isSubsetOf(CharReach(0x80, 0x8f)));
+ if (cr == CharReach(0x80, 0x8f)) {
+ cr |= CharReach(0x90, 0xbf);
+ }
+ } else {
+ assert(0); /* unexpected pred */
+ }
+}
+
+/** \brief Relax forbidden UTF-8 sequences.
+ *
+ * Some byte sequences can not appear in valid UTF-8 as they encode code points
+ * above \\x{10ffff} or they represent overlong encodings. As we require valid
+ * UTF-8 input, we have no defined behaviour in these cases, as a result we can
+ * accept them if it simplifies the graph. */
void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr) {
if (!expr.utf8) {
- return;
- }
-
- const CharReach e0(0xe0);
- const CharReach f0(0xf0);
- const CharReach f4(0xf4);
-
+ return;
+ }
+
+ const CharReach e0(0xe0);
+ const CharReach f0(0xf0);
+ const CharReach f4(0xf4);
+
for (auto v : vertices_range(g)) {
const CharReach &cr = g[v].char_reach;
- if (cr == e0 || cr == f0 || cr == f4) {
- u8 pred_char = cr.find_first();
+ if (cr == e0 || cr == f0 || cr == f4) {
+ u8 pred_char = cr.find_first();
for (auto t : adjacent_vertices_range(v, g)) {
allowIllegal(g, t, pred_char);
- }
- }
- }
-}
-
-static
-bool hasPredInSet(const NGHolder &g, NFAVertex v, const set<NFAVertex> &s) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (contains(s, u)) {
- return true;
- }
- }
- return false;
-}
-
-static
-bool hasSuccInSet(const NGHolder &g, NFAVertex v, const set<NFAVertex> &s) {
- for (auto w : adjacent_vertices_range(v, g)) {
- if (contains(s, w)) {
- return true;
- }
- }
- return false;
-}
-
-static
-void findSeeds(const NGHolder &h, const bool som, vector<NFAVertex> *seeds) {
- set<NFAVertex> bad; /* from zero-width asserts near accepts, etc */
- for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
- const CharReach &cr = h[v].char_reach;
- if (!isutf8ascii(cr) && !isutf8start(cr)) {
- bad.insert(v);
- }
- }
-
- for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) {
- const CharReach &cr = h[v].char_reach;
- if (!isutf8ascii(cr) && !isutf8start(cr)) {
- bad.insert(v);
- }
- }
-
- // we want to be careful with asserts connected to starts
- // as well as they may not finish a code point
- for (auto v : vertices_range(h)) {
- if (is_virtual_start(v, h)) {
- bad.insert(v);
- insert(&bad, adjacent_vertices(v, h));
- }
- }
-
- /* we cannot handle vertices connected to accept as would report matches in
- * the middle of codepoints. acceptEod is not a problem as the input must
- * end at a codepoint boundary */
- bad.insert(h.accept);
-
- // If we're in SOM mode, we don't want to mess with vertices that have a
- // direct edge from startDs.
- if (som) {
- insert(&bad, adjacent_vertices(h.startDs, h));
- }
-
- set<NFAVertex> already_seeds; /* already marked as seeds */
- for (auto v : vertices_range(h)) {
- const CharReach &cr = h[v].char_reach;
-
- if (!isutf8ascii(cr) || !hasSelfLoop(v, h)) {
- continue;
- }
-
- if (hasSuccInSet(h, v, bad)) {
- continue;
- }
-
- // Skip vertices that are directly connected to other vertices already
- // in the seeds list: we can't collapse two of these directly next to
- // each other.
- if (hasPredInSet(h, v, already_seeds) ||
- hasSuccInSet(h, v, already_seeds)) {
- continue;
- }
-
+ }
+ }
+ }
+}
+
+static
+bool hasPredInSet(const NGHolder &g, NFAVertex v, const set<NFAVertex> &s) {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (contains(s, u)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static
+bool hasSuccInSet(const NGHolder &g, NFAVertex v, const set<NFAVertex> &s) {
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (contains(s, w)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static
+void findSeeds(const NGHolder &h, const bool som, vector<NFAVertex> *seeds) {
+ set<NFAVertex> bad; /* from zero-width asserts near accepts, etc */
+ for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
+ const CharReach &cr = h[v].char_reach;
+ if (!isutf8ascii(cr) && !isutf8start(cr)) {
+ bad.insert(v);
+ }
+ }
+
+ for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) {
+ const CharReach &cr = h[v].char_reach;
+ if (!isutf8ascii(cr) && !isutf8start(cr)) {
+ bad.insert(v);
+ }
+ }
+
+ // we want to be careful with asserts connected to starts
+ // as well as they may not finish a code point
+ for (auto v : vertices_range(h)) {
+ if (is_virtual_start(v, h)) {
+ bad.insert(v);
+ insert(&bad, adjacent_vertices(v, h));
+ }
+ }
+
+ /* we cannot handle vertices connected to accept as would report matches in
+ * the middle of codepoints. acceptEod is not a problem as the input must
+ * end at a codepoint boundary */
+ bad.insert(h.accept);
+
+ // If we're in SOM mode, we don't want to mess with vertices that have a
+ // direct edge from startDs.
+ if (som) {
+ insert(&bad, adjacent_vertices(h.startDs, h));
+ }
+
+ set<NFAVertex> already_seeds; /* already marked as seeds */
+ for (auto v : vertices_range(h)) {
+ const CharReach &cr = h[v].char_reach;
+
+ if (!isutf8ascii(cr) || !hasSelfLoop(v, h)) {
+ continue;
+ }
+
+ if (hasSuccInSet(h, v, bad)) {
+ continue;
+ }
+
+ // Skip vertices that are directly connected to other vertices already
+ // in the seeds list: we can't collapse two of these directly next to
+ // each other.
+ if (hasPredInSet(h, v, already_seeds) ||
+ hasSuccInSet(h, v, already_seeds)) {
+ continue;
+ }
+
DEBUG_PRINTF("%zu is a seed\n", h[v].index);
- seeds->push_back(v);
- already_seeds.insert(v);
- }
-}
-
-static
-bool expandCyclic(NGHolder &h, NFAVertex v) {
+ seeds->push_back(v);
+ already_seeds.insert(v);
+ }
+}
+
+static
+bool expandCyclic(NGHolder &h, NFAVertex v) {
DEBUG_PRINTF("inspecting %zu\n", h[v].index);
- bool changes = false;
-
+ bool changes = false;
+
auto v_preds = preds(v, h);
auto v_succs = succs(v, h);
- set<NFAVertex> start_siblings;
- set<NFAVertex> end_siblings;
-
- CharReach &v_cr = h[v].char_reach;
-
- /* We need to find start vertices which have all of our preds.
- * As we have a self loop, it must be one of our succs. */
- for (auto a : adjacent_vertices_range(v, h)) {
+ set<NFAVertex> start_siblings;
+ set<NFAVertex> end_siblings;
+
+ CharReach &v_cr = h[v].char_reach;
+
+ /* We need to find start vertices which have all of our preds.
+ * As we have a self loop, it must be one of our succs. */
+ for (auto a : adjacent_vertices_range(v, h)) {
auto a_preds = preds(a, h);
-
- if (a_preds == v_preds && isutf8start(h[a].char_reach)) {
+
+ if (a_preds == v_preds && isutf8start(h[a].char_reach)) {
DEBUG_PRINTF("%zu is a start v\n", h[a].index);
- start_siblings.insert(a);
- }
- }
-
- /* We also need to find full cont vertices which have all our own succs;
- * As we have a self loop, it must be one of our preds. */
- for (auto a : inv_adjacent_vertices_range(v, h)) {
+ start_siblings.insert(a);
+ }
+ }
+
+ /* We also need to find full cont vertices which have all our own succs;
+ * As we have a self loop, it must be one of our preds. */
+ for (auto a : inv_adjacent_vertices_range(v, h)) {
auto a_succs = succs(a, h);
-
- if (a_succs == v_succs && h[a].char_reach == UTF_CONT_CR) {
+
+ if (a_succs == v_succs && h[a].char_reach == UTF_CONT_CR) {
DEBUG_PRINTF("%zu is a full tail cont\n", h[a].index);
- end_siblings.insert(a);
- }
- }
-
- for (auto s : start_siblings) {
- if (out_degree(s, h) != 1) {
- continue;
- }
-
- const CharReach &cr = h[s].char_reach;
- if (cr.isSubsetOf(UTF_TWO_START_CR)) {
- if (end_siblings.find(*adjacent_vertices(s, h).first)
- == end_siblings.end()) {
+ end_siblings.insert(a);
+ }
+ }
+
+ for (auto s : start_siblings) {
+ if (out_degree(s, h) != 1) {
+ continue;
+ }
+
+ const CharReach &cr = h[s].char_reach;
+ if (cr.isSubsetOf(UTF_TWO_START_CR)) {
+ if (end_siblings.find(*adjacent_vertices(s, h).first)
+ == end_siblings.end()) {
DEBUG_PRINTF("%zu is odd\n", h[s].index);
- continue;
- }
- } else if (cr.isSubsetOf(UTF_THREE_START_CR)) {
- NFAVertex m = *adjacent_vertices(s, h).first;
-
- if (h[m].char_reach != UTF_CONT_CR
- || out_degree(m, h) != 1) {
- continue;
- }
- if (end_siblings.find(*adjacent_vertices(m, h).first)
- == end_siblings.end()) {
+ continue;
+ }
+ } else if (cr.isSubsetOf(UTF_THREE_START_CR)) {
+ NFAVertex m = *adjacent_vertices(s, h).first;
+
+ if (h[m].char_reach != UTF_CONT_CR
+ || out_degree(m, h) != 1) {
+ continue;
+ }
+ if (end_siblings.find(*adjacent_vertices(m, h).first)
+ == end_siblings.end()) {
DEBUG_PRINTF("%zu is odd\n", h[s].index);
- continue;
- }
- } else if (cr.isSubsetOf(UTF_FOUR_START_CR)) {
- NFAVertex m1 = *adjacent_vertices(s, h).first;
-
- if (h[m1].char_reach != UTF_CONT_CR
- || out_degree(m1, h) != 1) {
- continue;
- }
-
- NFAVertex m2 = *adjacent_vertices(m1, h).first;
-
- if (h[m2].char_reach != UTF_CONT_CR
- || out_degree(m2, h) != 1) {
- continue;
- }
-
- if (end_siblings.find(*adjacent_vertices(m2, h).first)
- == end_siblings.end()) {
+ continue;
+ }
+ } else if (cr.isSubsetOf(UTF_FOUR_START_CR)) {
+ NFAVertex m1 = *adjacent_vertices(s, h).first;
+
+ if (h[m1].char_reach != UTF_CONT_CR
+ || out_degree(m1, h) != 1) {
+ continue;
+ }
+
+ NFAVertex m2 = *adjacent_vertices(m1, h).first;
+
+ if (h[m2].char_reach != UTF_CONT_CR
+ || out_degree(m2, h) != 1) {
+ continue;
+ }
+
+ if (end_siblings.find(*adjacent_vertices(m2, h).first)
+ == end_siblings.end()) {
DEBUG_PRINTF("%zu is odd\n", h[s].index);
- continue;
- }
- } else {
+ continue;
+ }
+ } else {
DEBUG_PRINTF("%zu is bad\n", h[s].index);
- continue;
- }
-
- v_cr |= cr;
- clear_vertex(s, h);
- changes = true;
- }
-
- if (changes) {
- v_cr |= UTF_CONT_CR; /* we need to add in cont reach */
- v_cr.set(0xc0); /* we can also add in the forbidden bytes as we require
- * valid unicode data */
- v_cr.set(0xc1);
- v_cr |= CharReach(0xf5, 0xff);
- }
-
- return changes;
-}
-
-/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex
- * where possible, based on the assumption that we will always be matching
- * against well-formed input. */
-void utf8DotRestoration(NGHolder &h, bool som) {
- vector<NFAVertex> seeds; /* cyclic ascii vertices */
- findSeeds(h, som, &seeds);
-
- bool changes = false;
- for (auto v : seeds) {
- changes |= expandCyclic(h, v);
- }
-
- if (changes) {
- pruneUseless(h);
- }
-}
-
-} // namespace ue2
+ continue;
+ }
+
+ v_cr |= cr;
+ clear_vertex(s, h);
+ changes = true;
+ }
+
+ if (changes) {
+ v_cr |= UTF_CONT_CR; /* we need to add in cont reach */
+ v_cr.set(0xc0); /* we can also add in the forbidden bytes as we require
+ * valid unicode data */
+ v_cr.set(0xc1);
+ v_cr |= CharReach(0xf5, 0xff);
+ }
+
+ return changes;
+}
+
+/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex
+ * where possible, based on the assumption that we will always be matching
+ * against well-formed input. */
+void utf8DotRestoration(NGHolder &h, bool som) {
+ vector<NFAVertex> seeds; /* cyclic ascii vertices */
+ findSeeds(h, som, &seeds);
+
+ bool changes = false;
+ for (auto v : seeds) {
+ changes |= expandCyclic(h, v);
+ }
+
+ if (changes) {
+ pruneUseless(h);
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_utf8.h b/contrib/libs/hyperscan/src/nfagraph/ng_utf8.h
index 0300088039..7c4288336f 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_utf8.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_utf8.h
@@ -1,57 +1,57 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief UTF-8 transforms and operations.
- */
-
-#ifndef NG_UTF8_H
-#define NG_UTF8_H
-
-namespace ue2 {
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief UTF-8 transforms and operations.
+ */
+
+#ifndef NG_UTF8_H
+#define NG_UTF8_H
+
+namespace ue2 {
+
class ExpressionInfo;
-class NGHolder;
-
-/** \brief Relax forbidden UTF-8 sequences.
- *
- * Some byte sequences can not appear in valid UTF-8 as they encode code points
- * above \\x{10ffff} or they represent overlong encodings. As we require valid
- * UTF-8 input, we have no defined behaviour in these cases, as a result we can
- * accept them if it simplifies the graph. */
+class NGHolder;
+
+/** \brief Relax forbidden UTF-8 sequences.
+ *
+ * Some byte sequences can not appear in valid UTF-8 as they encode code points
+ * above \\x{10ffff} or they represent overlong encodings. As we require valid
+ * UTF-8 input, we have no defined behaviour in these cases, as a result we can
+ * accept them if it simplifies the graph. */
void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr);
-
-/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex
- * where possible, based on the assumption that we will always be matching
- * against well-formed input.
- */
-void utf8DotRestoration(NGHolder &h, bool som);
-
-} // namespace ue2
-
-#endif
+
+/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex
+ * where possible, based on the assumption that we will always be matching
+ * against well-formed input.
+ */
+void utf8DotRestoration(NGHolder &h, bool som);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_util.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_util.cpp
index 630193b19b..cb2b710358 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_util.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_util.cpp
@@ -1,191 +1,191 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Miscellaneous NFA graph utilities.
- */
-#include "ng_util.h"
-
-#include "grey.h"
-#include "ng_dump.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Miscellaneous NFA graph utilities.
+ */
+#include "ng_util.h"
+
+#include "grey.h"
+#include "ng_dump.h"
#include "ng_prune.h"
-#include "ue2common.h"
-#include "nfa/limex_limits.h" // for NFA_MAX_TOP_MASKS.
-#include "parser/position.h"
-#include "util/graph_range.h"
+#include "ue2common.h"
+#include "nfa/limex_limits.h" // for NFA_MAX_TOP_MASKS.
+#include "parser/position.h"
+#include "util/graph_range.h"
#include "util/graph_small_color_map.h"
-#include "util/make_unique.h"
-#include "util/order_check.h"
-#include "util/ue2string.h"
-#include "util/report_manager.h"
-
+#include "util/make_unique.h"
+#include "util/order_check.h"
+#include "util/ue2string.h"
+#include "util/report_manager.h"
+
#include <limits>
-#include <map>
-#include <set>
+#include <map>
+#include <set>
#include <unordered_map>
#include <unordered_set>
-#include <boost/graph/filtered_graph.hpp>
-#include <boost/graph/topological_sort.hpp>
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
+#include <boost/graph/filtered_graph.hpp>
+#include <boost/graph/topological_sort.hpp>
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
using boost::make_filtered_graph;
-using boost::make_assoc_property_map;
-
-namespace ue2 {
-
-NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex a) {
+using boost::make_assoc_property_map;
+
+namespace ue2 {
+
+NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex a) {
assert(a != NGHolder::null_vertex());
-
+
NGHolder::out_edge_iterator ii, iie;
- tie(ii, iie) = out_edges(a, g);
- if (ii == iie) {
+ tie(ii, iie) = out_edges(a, g);
+ if (ii == iie) {
return NGHolder::null_vertex();
- }
- NFAVertex b = target(*ii, g);
- if (a == b) {
- ++ii;
- if (ii == iie) {
+ }
+ NFAVertex b = target(*ii, g);
+ if (a == b) {
+ ++ii;
+ if (ii == iie) {
return NGHolder::null_vertex();
- }
-
- b = target(*ii, g);
- if (++ii != iie) {
+ }
+
+ b = target(*ii, g);
+ if (++ii != iie) {
return NGHolder::null_vertex();
- }
- } else if (++ii != iie && (target(*ii, g) != a || ++ii != iie)) {
+ }
+ } else if (++ii != iie && (target(*ii, g) != a || ++ii != iie)) {
return NGHolder::null_vertex();
- }
-
- assert(a != b);
- return b;
-}
-
-NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex a) {
+ }
+
+ assert(a != b);
+ return b;
+}
+
+NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex a) {
assert(a != NGHolder::null_vertex());
-
- u32 idegree = in_degree(a, g);
- if (idegree != 1 && !(idegree == 2 && hasSelfLoop(a, g))) {
+
+ u32 idegree = in_degree(a, g);
+ if (idegree != 1 && !(idegree == 2 && hasSelfLoop(a, g))) {
return NGHolder::null_vertex();
- }
-
+ }
+
NGHolder::in_edge_iterator ii, iie;
- tie(ii, iie) = in_edges(a, g);
- if (ii == iie) {
+ tie(ii, iie) = in_edges(a, g);
+ if (ii == iie) {
return NGHolder::null_vertex();
- }
- NFAVertex b = source(*ii, g);
- if (a == b) {
- ++ii;
- if (ii == iie) {
+ }
+ NFAVertex b = source(*ii, g);
+ if (a == b) {
+ ++ii;
+ if (ii == iie) {
return NGHolder::null_vertex();
- }
-
- b = source(*ii, g);
- }
-
- assert(a != b);
- return b;
-}
-
-NFAVertex clone_vertex(NGHolder &g, NFAVertex v) {
- NFAVertex clone = add_vertex(g);
- u32 idx = g[clone].index;
- g[clone] = g[v];
- g[clone].index = idx;
-
- return clone;
-}
-
-void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest) {
- for (const auto &e : out_edges_range(source, g)) {
- NFAVertex t = target(e, g);
- if (edge(dest, t, g).second) {
- continue;
- }
+ }
+
+ b = source(*ii, g);
+ }
+
+ assert(a != b);
+ return b;
+}
+
+NFAVertex clone_vertex(NGHolder &g, NFAVertex v) {
+ NFAVertex clone = add_vertex(g);
+ u32 idx = g[clone].index;
+ g[clone] = g[v];
+ g[clone].index = idx;
+
+ return clone;
+}
+
+void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest) {
+ for (const auto &e : out_edges_range(source, g)) {
+ NFAVertex t = target(e, g);
+ if (edge(dest, t, g).second) {
+ continue;
+ }
NFAEdge clone = add_edge(dest, t, g);
- u32 idx = g[clone].index;
- g[clone] = g[e];
- g[clone].index = idx;
- }
-}
-
-void clone_in_edges(NGHolder &g, NFAVertex s, NFAVertex dest) {
- for (const auto &e : in_edges_range(s, g)) {
- NFAVertex ss = source(e, g);
- assert(!edge(ss, dest, g).second);
+ u32 idx = g[clone].index;
+ g[clone] = g[e];
+ g[clone].index = idx;
+ }
+}
+
+void clone_in_edges(NGHolder &g, NFAVertex s, NFAVertex dest) {
+ for (const auto &e : in_edges_range(s, g)) {
+ NFAVertex ss = source(e, g);
+ assert(!edge(ss, dest, g).second);
NFAEdge clone = add_edge(ss, dest, g);
- u32 idx = g[clone].index;
- g[clone] = g[e];
- g[clone].index = idx;
- }
-}
-
-bool onlyOneTop(const NGHolder &g) {
+ u32 idx = g[clone].index;
+ g[clone] = g[e];
+ g[clone].index = idx;
+ }
+}
+
+bool onlyOneTop(const NGHolder &g) {
return getTops(g).size() == 1;
-}
-
-namespace {
-struct CycleFound {};
-struct DetectCycles : public boost::default_dfs_visitor {
- explicit DetectCycles(const NGHolder &g) : startDs(g.startDs) {}
+}
+
+namespace {
+struct CycleFound {};
+struct DetectCycles : public boost::default_dfs_visitor {
+ explicit DetectCycles(const NGHolder &g) : startDs(g.startDs) {}
void back_edge(const NFAEdge &e, const NGHolder &g) const {
- NFAVertex u = source(e, g), v = target(e, g);
- // We ignore the startDs self-loop.
- if (u == startDs && v == startDs) {
- return;
- }
- // Any other back-edge indicates a cycle.
+ NFAVertex u = source(e, g), v = target(e, g);
+ // We ignore the startDs self-loop.
+ if (u == startDs && v == startDs) {
+ return;
+ }
+ // Any other back-edge indicates a cycle.
DEBUG_PRINTF("back edge %zu->%zu found\n", g[u].index, g[v].index);
- throw CycleFound();
- }
-private:
- const NFAVertex startDs;
-};
-} // namespace
-
-bool isVacuous(const NGHolder &h) {
- return edge(h.start, h.accept, h).second
- || edge(h.start, h.acceptEod, h).second
- || edge(h.startDs, h.accept, h).second
- || edge(h.startDs, h.acceptEod, h).second;
-}
-
-bool isAnchored(const NGHolder &g) {
- for (auto v : adjacent_vertices_range(g.startDs, g)) {
- if (v != g.startDs) {
- return false;
- }
- }
- return true;
-}
-
+ throw CycleFound();
+ }
+private:
+ const NFAVertex startDs;
+};
+} // namespace
+
+bool isVacuous(const NGHolder &h) {
+ return edge(h.start, h.accept, h).second
+ || edge(h.start, h.acceptEod, h).second
+ || edge(h.startDs, h.accept, h).second
+ || edge(h.startDs, h.acceptEod, h).second;
+}
+
+bool isAnchored(const NGHolder &g) {
+ for (auto v : adjacent_vertices_range(g.startDs, g)) {
+ if (v != g.startDs) {
+ return false;
+ }
+ }
+ return true;
+}
+
bool isFloating(const NGHolder &g) {
for (auto v : adjacent_vertices_range(g.start, g)) {
if (v != g.startDs && !edge(g.startDs, v, g).second) {
@@ -195,99 +195,99 @@ bool isFloating(const NGHolder &g) {
return true;
}
-bool isAcyclic(const NGHolder &g) {
- try {
+bool isAcyclic(const NGHolder &g) {
+ try {
boost::depth_first_search(g, DetectCycles(g), make_small_color_map(g),
g.start);
- } catch (const CycleFound &) {
- return false;
- }
-
- return true;
-}
-
-/** True if the graph has a cycle reachable from the given source vertex. */
-bool hasReachableCycle(const NGHolder &g, NFAVertex src) {
- assert(hasCorrectlyNumberedVertices(g));
-
- try {
- // Use depth_first_visit, rather than depth_first_search, so that we
- // only search from src.
+ } catch (const CycleFound &) {
+ return false;
+ }
+
+ return true;
+}
+
+/** True if the graph has a cycle reachable from the given source vertex. */
+bool hasReachableCycle(const NGHolder &g, NFAVertex src) {
+ assert(hasCorrectlyNumberedVertices(g));
+
+ try {
+ // Use depth_first_visit, rather than depth_first_search, so that we
+ // only search from src.
boost::depth_first_visit(g, src, DetectCycles(g),
make_small_color_map(g));
} catch (const CycleFound &) {
- return true;
- }
-
- return false;
-}
-
-bool hasBigCycles(const NGHolder &g) {
- assert(hasCorrectlyNumberedVertices(g));
- set<NFAEdge> dead;
- BackEdges<set<NFAEdge>> backEdgeVisitor(dead);
+ return true;
+ }
+
+ return false;
+}
+
+bool hasBigCycles(const NGHolder &g) {
+ assert(hasCorrectlyNumberedVertices(g));
+ set<NFAEdge> dead;
+ BackEdges<set<NFAEdge>> backEdgeVisitor(dead);
boost::depth_first_search(g, backEdgeVisitor, make_small_color_map(g),
g.start);
-
- for (const auto &e : dead) {
- if (source(e, g) != target(e, g)) {
- return true;
- }
- }
-
- return false;
-}
-
+
+ for (const auto &e : dead) {
+ if (source(e, g) != target(e, g)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool hasNarrowReachVertex(const NGHolder &g, size_t max_reach_count) {
return any_of_in(vertices_range(g), [&](NFAVertex v) {
return !is_special(v, g) && g[v].char_reach.count() < max_reach_count;
});
-}
-
-bool can_never_match(const NGHolder &g) {
- assert(edge(g.accept, g.acceptEod, g).second);
+}
+
+bool can_never_match(const NGHolder &g) {
+ assert(edge(g.accept, g.acceptEod, g).second);
if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) {
- DEBUG_PRINTF("no paths into accept\n");
- return true;
- }
-
- return false;
-}
-
-bool can_match_at_eod(const NGHolder &h) {
+ DEBUG_PRINTF("no paths into accept\n");
+ return true;
+ }
+
+ return false;
+}
+
+bool can_match_at_eod(const NGHolder &h) {
if (in_degree(h.acceptEod, h) > 1) {
- DEBUG_PRINTF("more than one edge to acceptEod\n");
- return true;
- }
-
- for (auto e : in_edges_range(h.accept, h)) {
- if (h[e].assert_flags) {
- DEBUG_PRINTF("edge to accept has assert flags %d\n",
- h[e].assert_flags);
- return true;
- }
- }
-
- return false;
-}
-
-bool can_only_match_at_eod(const NGHolder &g) {
+ DEBUG_PRINTF("more than one edge to acceptEod\n");
+ return true;
+ }
+
+ for (auto e : in_edges_range(h.accept, h)) {
+ if (h[e].assert_flags) {
+ DEBUG_PRINTF("edge to accept has assert flags %d\n",
+ h[e].assert_flags);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool can_only_match_at_eod(const NGHolder &g) {
NGHolder::in_edge_iterator ie, ee;
- tie(ie, ee) = in_edges(g.accept, g);
-
- return ie == ee;
-}
-
-bool matches_everywhere(const NGHolder &h) {
+ tie(ie, ee) = in_edges(g.accept, g);
+
+ return ie == ee;
+}
+
+bool matches_everywhere(const NGHolder &h) {
NFAEdge e = edge(h.startDs, h.accept, h);
-
+
return e && !h[e].assert_flags;
-}
-
-bool is_virtual_start(NFAVertex v, const NGHolder &g) {
- return g[v].assert_flags & POS_FLAG_VIRTUAL_START;
-}
-
+}
+
+bool is_virtual_start(NFAVertex v, const NGHolder &g) {
+ return g[v].assert_flags & POS_FLAG_VIRTUAL_START;
+}
+
static
void reorderSpecials(const NGHolder &g, vector<NFAVertex> &topoOrder) {
// Start is last element of reverse topo ordering.
@@ -329,110 +329,110 @@ void reorderSpecials(const NGHolder &g, vector<NFAVertex> &topoOrder) {
}
}
-vector<NFAVertex> getTopoOrdering(const NGHolder &g) {
- assert(hasCorrectlyNumberedVertices(g));
-
- // Use the same colour map for both DFS and topological_sort below: avoids
- // having to reallocate it, etc.
+vector<NFAVertex> getTopoOrdering(const NGHolder &g) {
+ assert(hasCorrectlyNumberedVertices(g));
+
+ // Use the same colour map for both DFS and topological_sort below: avoids
+ // having to reallocate it, etc.
auto colors = make_small_color_map(g);
-
+
using EdgeSet = unordered_set<NFAEdge>;
- EdgeSet backEdges;
- BackEdges<EdgeSet> be(backEdges);
-
+ EdgeSet backEdges;
+ BackEdges<EdgeSet> be(backEdges);
+
depth_first_search(g, visitor(be).root_vertex(g.start).color_map(colors));
-
+
auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&backEdges));
-
- vector<NFAVertex> ordering;
+
+ vector<NFAVertex> ordering;
ordering.reserve(num_vertices(g));
topological_sort(acyclic_g, back_inserter(ordering), color_map(colors));
-
+
reorderSpecials(g, ordering);
- return ordering;
-}
-
-static
-void mustBeSetBefore_int(NFAVertex u, const NGHolder &g,
+ return ordering;
+}
+
+static
+void mustBeSetBefore_int(NFAVertex u, const NGHolder &g,
decltype(make_small_color_map(NGHolder())) &colors) {
- set<NFAVertex> s;
- insert(&s, adjacent_vertices(u, g));
-
- set<NFAEdge> dead; // Edges leading to u or u's successors.
-
- for (auto v : inv_adjacent_vertices_range(u, g)) {
- for (const auto &e : out_edges_range(v, g)) {
- NFAVertex t = target(e, g);
- if (t == u || contains(s, t)) {
- dead.insert(e);
- }
- }
- }
-
+ set<NFAVertex> s;
+ insert(&s, adjacent_vertices(u, g));
+
+ set<NFAEdge> dead; // Edges leading to u or u's successors.
+
+ for (auto v : inv_adjacent_vertices_range(u, g)) {
+ for (const auto &e : out_edges_range(v, g)) {
+ NFAVertex t = target(e, g);
+ if (t == u || contains(s, t)) {
+ dead.insert(e);
+ }
+ }
+ }
+
auto prefix = make_filtered_graph(g, make_bad_edge_filter(&dead));
-
+
depth_first_visit(prefix, g.start, make_dfs_visitor(boost::null_visitor()),
colors);
-}
-
-bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g,
- mbsb_cache &cache) {
- assert(&cache.g == &g);
- auto key = make_pair(g[u].index, g[v].index);
- DEBUG_PRINTF("cache checking (%zu)\n", cache.cache.size());
- if (contains(cache.cache, key)) {
- DEBUG_PRINTF("cache hit\n");
- return cache.cache[key];
- }
-
+}
+
+bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g,
+ mbsb_cache &cache) {
+ assert(&cache.g == &g);
+ auto key = make_pair(g[u].index, g[v].index);
+ DEBUG_PRINTF("cache checking (%zu)\n", cache.cache.size());
+ if (contains(cache.cache, key)) {
+ DEBUG_PRINTF("cache hit\n");
+ return cache.cache[key];
+ }
+
auto colors = make_small_color_map(g);
mustBeSetBefore_int(u, g, colors);
-
- for (auto vi : vertices_range(g)) {
+
+ for (auto vi : vertices_range(g)) {
auto key2 = make_pair(g[u].index, g[vi].index);
DEBUG_PRINTF("adding %zu %zu\n", key2.first, key2.second);
- assert(!contains(cache.cache, key2));
+ assert(!contains(cache.cache, key2));
bool value = get(colors, vi) == small_color::white;
- cache.cache[key2] = value;
- assert(contains(cache.cache, key2));
- }
+ cache.cache[key2] = value;
+ assert(contains(cache.cache, key2));
+ }
DEBUG_PRINTF("cache miss %zu %zu (%zu)\n", key.first, key.second,
- cache.cache.size());
- return cache.cache[key];
-}
-
-void appendLiteral(NGHolder &h, const ue2_literal &s) {
- DEBUG_PRINTF("adding '%s' to graph\n", dumpString(s).c_str());
- vector<NFAVertex> tail;
- assert(in_degree(h.acceptEod, h) == 1);
- for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
- tail.push_back(v);
- }
- assert(!tail.empty());
-
- for (auto v : tail) {
- remove_edge(v, h.accept, h);
- }
-
- for (const auto &c : s) {
- NFAVertex v = add_vertex(h);
- h[v].char_reach = c;
- for (auto u : tail) {
- add_edge(u, v, h);
- }
- tail.clear();
- tail.push_back(v);
- }
-
- for (auto v : tail) {
- add_edge(v, h.accept, h);
- }
-}
-
+ cache.cache.size());
+ return cache.cache[key];
+}
+
+void appendLiteral(NGHolder &h, const ue2_literal &s) {
+ DEBUG_PRINTF("adding '%s' to graph\n", dumpString(s).c_str());
+ vector<NFAVertex> tail;
+ assert(in_degree(h.acceptEod, h) == 1);
+ for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
+ tail.push_back(v);
+ }
+ assert(!tail.empty());
+
+ for (auto v : tail) {
+ remove_edge(v, h.accept, h);
+ }
+
+ for (const auto &c : s) {
+ NFAVertex v = add_vertex(h);
+ h[v].char_reach = c;
+ for (auto u : tail) {
+ add_edge(u, v, h);
+ }
+ tail.clear();
+ tail.push_back(v);
+ }
+
+ for (auto v : tail) {
+ add_edge(v, h.accept, h);
+ }
+}
+
flat_set<u32> getTops(const NGHolder &h) {
flat_set<u32> tops;
- for (const auto &e : out_edges_range(h.start, h)) {
+ for (const auto &e : out_edges_range(h.start, h)) {
insert(&tops, h[e].tops);
}
return tops;
@@ -442,165 +442,165 @@ void setTops(NGHolder &h, u32 top) {
for (const auto &e : out_edges_range(h.start, h)) {
assert(h[e].tops.empty());
if (target(e, h) == h.startDs) {
- continue;
- }
+ continue;
+ }
h[e].tops.insert(top);
- }
-}
-
-void clearReports(NGHolder &g) {
- DEBUG_PRINTF("clearing reports without an accept edge\n");
+ }
+}
+
+void clearReports(NGHolder &g) {
+ DEBUG_PRINTF("clearing reports without an accept edge\n");
unordered_set<NFAVertex> allow;
- insert(&allow, inv_adjacent_vertices(g.accept, g));
- insert(&allow, inv_adjacent_vertices(g.acceptEod, g));
- allow.erase(g.accept); // due to stylised edge.
-
- for (auto v : vertices_range(g)) {
- if (contains(allow, v)) {
- continue;
- }
- g[v].reports.clear();
- }
-}
-
-void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new) {
- for (auto v : vertices_range(g)) {
- auto &reports = g[v].reports;
- if (contains(reports, r_old)) {
- reports.insert(r_new);
- }
- }
-}
-
-static
-void fillHolderOutEdges(NGHolder &out, const NGHolder &in,
+ insert(&allow, inv_adjacent_vertices(g.accept, g));
+ insert(&allow, inv_adjacent_vertices(g.acceptEod, g));
+ allow.erase(g.accept); // due to stylised edge.
+
+ for (auto v : vertices_range(g)) {
+ if (contains(allow, v)) {
+ continue;
+ }
+ g[v].reports.clear();
+ }
+}
+
+void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new) {
+ for (auto v : vertices_range(g)) {
+ auto &reports = g[v].reports;
+ if (contains(reports, r_old)) {
+ reports.insert(r_new);
+ }
+ }
+}
+
+static
+void fillHolderOutEdges(NGHolder &out, const NGHolder &in,
const unordered_map<NFAVertex, NFAVertex> &v_map,
- NFAVertex u) {
- NFAVertex u_new = v_map.at(u);
-
- for (auto e : out_edges_range(u, in)) {
- NFAVertex v = target(e, in);
-
- if (is_special(u, in) && is_special(v, in)) {
- continue;
- }
-
- auto it = v_map.find(v);
- if (it == v_map.end()) {
- continue;
- }
- NFAVertex v_new = it->second;
- assert(!edge(u_new, v_new, out).second);
- add_edge(u_new, v_new, in[e], out);
- }
-}
-
-void fillHolder(NGHolder *outp, const NGHolder &in, const deque<NFAVertex> &vv,
+ NFAVertex u) {
+ NFAVertex u_new = v_map.at(u);
+
+ for (auto e : out_edges_range(u, in)) {
+ NFAVertex v = target(e, in);
+
+ if (is_special(u, in) && is_special(v, in)) {
+ continue;
+ }
+
+ auto it = v_map.find(v);
+ if (it == v_map.end()) {
+ continue;
+ }
+ NFAVertex v_new = it->second;
+ assert(!edge(u_new, v_new, out).second);
+ add_edge(u_new, v_new, in[e], out);
+ }
+}
+
+void fillHolder(NGHolder *outp, const NGHolder &in, const deque<NFAVertex> &vv,
unordered_map<NFAVertex, NFAVertex> *v_map_out) {
- NGHolder &out = *outp;
+ NGHolder &out = *outp;
unordered_map<NFAVertex, NFAVertex> &v_map = *v_map_out;
-
- out.kind = in.kind;
-
- for (auto v : vv) {
- if (is_special(v, in)) {
- continue;
- }
- v_map[v] = add_vertex(in[v], out);
- }
-
- for (u32 i = 0; i < N_SPECIALS; i++) {
- v_map[in.getSpecialVertex(i)] = out.getSpecialVertex(i);
- }
-
- DEBUG_PRINTF("copied %zu vertices to NG graph\n", v_map.size());
-
- fillHolderOutEdges(out, in, v_map, in.start);
- fillHolderOutEdges(out, in, v_map, in.startDs);
-
- for (auto u : vv) {
- if (is_special(u, in)) {
- continue;
- }
- fillHolderOutEdges(out, in, v_map, u);
- }
-
+
+ out.kind = in.kind;
+
+ for (auto v : vv) {
+ if (is_special(v, in)) {
+ continue;
+ }
+ v_map[v] = add_vertex(in[v], out);
+ }
+
+ for (u32 i = 0; i < N_SPECIALS; i++) {
+ v_map[in.getSpecialVertex(i)] = out.getSpecialVertex(i);
+ }
+
+ DEBUG_PRINTF("copied %zu vertices to NG graph\n", v_map.size());
+
+ fillHolderOutEdges(out, in, v_map, in.start);
+ fillHolderOutEdges(out, in, v_map, in.startDs);
+
+ for (auto u : vv) {
+ if (is_special(u, in)) {
+ continue;
+ }
+ fillHolderOutEdges(out, in, v_map, u);
+ }
+
renumber_edges(out);
renumber_vertices(out);
-}
-
-void cloneHolder(NGHolder &out, const NGHolder &in) {
- assert(hasCorrectlyNumberedVertices(in));
+}
+
+void cloneHolder(NGHolder &out, const NGHolder &in) {
+ assert(hasCorrectlyNumberedVertices(in));
assert(hasCorrectlyNumberedVertices(out));
- out.kind = in.kind;
-
- // Note: depending on the state of the input graph, some stylized edges
- // (e.g. start->startDs) may not exist. This must be propagated to the
- // output graph as well.
-
- /* remove the existing special edges */
- clear_vertex(out.startDs, out);
- clear_vertex(out.accept, out);
+ out.kind = in.kind;
+
+ // Note: depending on the state of the input graph, some stylized edges
+ // (e.g. start->startDs) may not exist. This must be propagated to the
+ // output graph as well.
+
+ /* remove the existing special edges */
+ clear_vertex(out.startDs, out);
+ clear_vertex(out.accept, out);
renumber_edges(out);
-
- vector<NFAVertex> out_mapping(num_vertices(in));
- out_mapping[NODE_START] = out.start;
- out_mapping[NODE_START_DOTSTAR] = out.startDs;
- out_mapping[NODE_ACCEPT] = out.accept;
- out_mapping[NODE_ACCEPT_EOD] = out.acceptEod;
-
- for (auto v : vertices_range(in)) {
- u32 i = in[v].index;
-
- /* special vertices are already in the out graph */
- if (i >= N_SPECIALS) {
- assert(!out_mapping[i]);
- out_mapping[i] = add_vertex(in[v], out);
- }
-
- out[out_mapping[i]] = in[v];
- }
-
- for (auto e : edges_range(in)) {
- u32 si = in[source(e, in)].index;
- u32 ti = in[target(e, in)].index;
-
- DEBUG_PRINTF("adding edge %u->%u\n", si, ti);
-
- NFAVertex s = out_mapping[si];
- NFAVertex t = out_mapping[ti];
+
+ vector<NFAVertex> out_mapping(num_vertices(in));
+ out_mapping[NODE_START] = out.start;
+ out_mapping[NODE_START_DOTSTAR] = out.startDs;
+ out_mapping[NODE_ACCEPT] = out.accept;
+ out_mapping[NODE_ACCEPT_EOD] = out.acceptEod;
+
+ for (auto v : vertices_range(in)) {
+ u32 i = in[v].index;
+
+ /* special vertices are already in the out graph */
+ if (i >= N_SPECIALS) {
+ assert(!out_mapping[i]);
+ out_mapping[i] = add_vertex(in[v], out);
+ }
+
+ out[out_mapping[i]] = in[v];
+ }
+
+ for (auto e : edges_range(in)) {
+ u32 si = in[source(e, in)].index;
+ u32 ti = in[target(e, in)].index;
+
+ DEBUG_PRINTF("adding edge %u->%u\n", si, ti);
+
+ NFAVertex s = out_mapping[si];
+ NFAVertex t = out_mapping[ti];
NFAEdge e2 = add_edge(s, t, out);
- out[e2] = in[e];
- }
-
- // Safety checks.
+ out[e2] = in[e];
+ }
+
+ // Safety checks.
assert(num_vertices(in) == num_vertices(out));
assert(num_edges(in) == num_edges(out));
- assert(hasCorrectlyNumberedVertices(out));
-}
-
-void cloneHolder(NGHolder &out, const NGHolder &in,
+ assert(hasCorrectlyNumberedVertices(out));
+}
+
+void cloneHolder(NGHolder &out, const NGHolder &in,
unordered_map<NFAVertex, NFAVertex> *mapping) {
- cloneHolder(out, in);
- vector<NFAVertex> out_verts(num_vertices(in));
- for (auto v : vertices_range(out)) {
- out_verts[out[v].index] = v;
- }
-
- mapping->clear();
-
- for (auto v : vertices_range(in)) {
- (*mapping)[v] = out_verts[in[v].index];
- assert((*mapping)[v]);
- }
-}
-
-unique_ptr<NGHolder> cloneHolder(const NGHolder &in) {
- unique_ptr<NGHolder> h = ue2::make_unique<NGHolder>();
- cloneHolder(*h, in);
- return h;
-}
-
+ cloneHolder(out, in);
+ vector<NFAVertex> out_verts(num_vertices(in));
+ for (auto v : vertices_range(out)) {
+ out_verts[out[v].index] = v;
+ }
+
+ mapping->clear();
+
+ for (auto v : vertices_range(in)) {
+ (*mapping)[v] = out_verts[in[v].index];
+ assert((*mapping)[v]);
+ }
+}
+
+unique_ptr<NGHolder> cloneHolder(const NGHolder &in) {
+ unique_ptr<NGHolder> h = ue2::make_unique<NGHolder>();
+ cloneHolder(*h, in);
+ return h;
+}
+
void reverseHolder(const NGHolder &g_in, NGHolder &g) {
// Make the BGL do the grunt work.
unordered_map<NFAVertex, NFAVertex> vertexMap;
@@ -734,58 +734,58 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
return delay;
}
-#ifndef NDEBUG
+#ifndef NDEBUG
-bool allMatchStatesHaveReports(const NGHolder &g) {
+bool allMatchStatesHaveReports(const NGHolder &g) {
unordered_set<NFAVertex> reporters;
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- if (g[v].reports.empty()) {
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ if (g[v].reports.empty()) {
DEBUG_PRINTF("vertex %zu has no reports!\n", g[v].index);
- return false;
- }
+ return false;
+ }
reporters.insert(v);
- }
+ }
- for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
- if (v == g.accept) {
- continue; // stylised edge
- }
- if (g[v].reports.empty()) {
+ for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
+ if (v == g.accept) {
+ continue; // stylised edge
+ }
+ if (g[v].reports.empty()) {
DEBUG_PRINTF("vertex %zu has no reports!\n", g[v].index);
- return false;
- }
+ return false;
+ }
reporters.insert(v);
- }
-
- for (auto v : vertices_range(g)) {
+ }
+
+ for (auto v : vertices_range(g)) {
if (!contains(reporters, v) && !g[v].reports.empty()) {
DEBUG_PRINTF("vertex %zu is not a match state, but has reports!\n",
g[v].index);
return false;
- }
- }
+ }
+ }
return true;
-}
-
+}
+
bool isCorrectlyTopped(const NGHolder &g) {
if (is_triggered(g)) {
for (const auto &e : out_edges_range(g.start, g)) {
if (g[e].tops.empty() != (target(e, g) == g.startDs)) {
return false;
}
- }
+ }
} else {
for (const auto &e : out_edges_range(g.start, g)) {
if (!g[e].tops.empty()) {
return false;
}
}
- }
+ }
return true;
-}
+}
+
+#endif // NDEBUG
-#endif // NDEBUG
-
-} // namespace ue2
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_util.h b/contrib/libs/hyperscan/src/nfagraph/ng_util.h
index 0f89b64dc9..a2d0d9b7d6 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_util.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_util.h
@@ -1,44 +1,44 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Miscellaneous NFA graph utilities.
- */
-#ifndef NG_UTIL_H
-#define NG_UTIL_H
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Miscellaneous NFA graph utilities.
+ */
+#ifndef NG_UTIL_H
+#define NG_UTIL_H
+
#include "ng_depth.h"
-#include "ng_holder.h"
-#include "ue2common.h"
+#include "ng_holder.h"
+#include "ue2common.h"
#include "util/flat_containers.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-
+#include "util/graph.h"
+#include "util/graph_range.h"
+
#include <boost/graph/depth_first_search.hpp> // for default_dfs_visitor
#include <algorithm>
@@ -46,12 +46,12 @@
#include <unordered_map>
#include <vector>
-namespace ue2 {
-
-struct Grey;
-struct ue2_literal;
-class ReportManager;
-
+namespace ue2 {
+
+struct Grey;
+struct ue2_literal;
+class ReportManager;
+
template<class VertexDepth>
depth maxDistFromInit(const VertexDepth &vd) {
if (vd.fromStart.max.is_unreachable()) {
@@ -62,7 +62,7 @@ depth maxDistFromInit(const VertexDepth &vd) {
return std::max(vd.fromStartDotStar.max, vd.fromStart.max);
}
}
-
+
template<class VertexDepth>
depth maxDistFromStartOfData(const VertexDepth &vd) {
if (vd.fromStartDotStar.max.is_reachable()) {
@@ -73,21 +73,21 @@ depth maxDistFromStartOfData(const VertexDepth &vd) {
}
}
-/** True if the given vertex is a dot (reachable on any character). */
-template<class GraphT>
-static really_inline
-bool is_dot(NFAVertex v, const GraphT &g) {
- return g[v].char_reach.all();
-}
-
-/** adds successors of v to s */
-template<class U>
-static really_inline
-void succ(const NGHolder &g, NFAVertex v, U *s) {
+/** True if the given vertex is a dot (reachable on any character). */
+template<class GraphT>
+static really_inline
+bool is_dot(NFAVertex v, const GraphT &g) {
+ return g[v].char_reach.all();
+}
+
+/** adds successors of v to s */
+template<class U>
+static really_inline
+void succ(const NGHolder &g, NFAVertex v, U *s) {
auto rv = adjacent_vertices(v, g);
s->insert(rv.first, rv.second);
-}
-
+}
+
template<class ContTemp = flat_set<NFAVertex>>
ContTemp succs(NFAVertex u, const NGHolder &g) {
ContTemp rv;
@@ -95,14 +95,14 @@ ContTemp succs(NFAVertex u, const NGHolder &g) {
return rv;
}
-/** adds predecessors of v to s */
-template<class U>
-static really_inline
-void pred(const NGHolder &g, NFAVertex v, U *p) {
+/** adds predecessors of v to s */
+template<class U>
+static really_inline
+void pred(const NGHolder &g, NFAVertex v, U *p) {
auto rv = inv_adjacent_vertices(v, g);
p->insert(rv.first, rv.second);
-}
-
+}
+
template<class ContTemp = flat_set<NFAVertex>>
ContTemp preds(NFAVertex u, const NGHolder &g) {
ContTemp rv;
@@ -110,15 +110,15 @@ ContTemp preds(NFAVertex u, const NGHolder &g) {
return rv;
}
-/** returns a vertex with an out edge from v and is not v.
- * v must have exactly one out-edge excluding self-loops.
+/** returns a vertex with an out edge from v and is not v.
+ * v must have exactly one out-edge excluding self-loops.
* will return NGHolder::null_vertex() if the preconditions don't hold.
- */
-NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex v);
-
-/** Like getSoleDestVertex but for in-edges */
-NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex v);
-
+ */
+NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex v);
+
+/** Like getSoleDestVertex but for in-edges */
+NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex v);
+
/** \brief edge filtered graph.
*
* This will give you a view over the graph that has none of the edges from
@@ -159,159 +159,159 @@ bad_vertex_filter<VertexSet> make_bad_vertex_filter(const VertexSet *v) {
return bad_vertex_filter<VertexSet>(v);
}
-/** Visitor that records back edges */
-template <typename BackEdgeSet>
-class BackEdges : public boost::default_dfs_visitor {
-public:
- explicit BackEdges(BackEdgeSet &edges) : backEdges(edges) {}
- template <class EdgeT, class GraphT>
- void back_edge(const EdgeT &e, const GraphT &) {
- backEdges.insert(e); // Remove this back edge only
- }
- BackEdgeSet &backEdges;
-};
-
-/** Returns true if the vertex is either of the real starts (NODE_START,
- * NODE_START_DOTSTAR). */
-template <typename GraphT>
-static really_inline
+/** Visitor that records back edges */
+template <typename BackEdgeSet>
+class BackEdges : public boost::default_dfs_visitor {
+public:
+ explicit BackEdges(BackEdgeSet &edges) : backEdges(edges) {}
+ template <class EdgeT, class GraphT>
+ void back_edge(const EdgeT &e, const GraphT &) {
+ backEdges.insert(e); // Remove this back edge only
+ }
+ BackEdgeSet &backEdges;
+};
+
+/** Returns true if the vertex is either of the real starts (NODE_START,
+ * NODE_START_DOTSTAR). */
+template <typename GraphT>
+static really_inline
bool is_any_start(typename GraphT::vertex_descriptor v, const GraphT &g) {
- u32 i = g[v].index;
- return i == NODE_START || i == NODE_START_DOTSTAR;
-}
-
-bool is_virtual_start(NFAVertex v, const NGHolder &g);
-
-template <typename GraphT>
+ u32 i = g[v].index;
+ return i == NODE_START || i == NODE_START_DOTSTAR;
+}
+
+bool is_virtual_start(NFAVertex v, const NGHolder &g);
+
+template <typename GraphT>
bool is_any_accept(typename GraphT::vertex_descriptor v, const GraphT &g) {
- u32 i = g[v].index;
- return i == NODE_ACCEPT || i == NODE_ACCEPT_EOD;
-}
-
-/** returns true iff v has an edge to accept or acceptEod */
-template <typename GraphT>
+ u32 i = g[v].index;
+ return i == NODE_ACCEPT || i == NODE_ACCEPT_EOD;
+}
+
+/** returns true iff v has an edge to accept or acceptEod */
+template <typename GraphT>
bool is_match_vertex(typename GraphT::vertex_descriptor v, const GraphT &g) {
- return edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second;
-}
-
-/** Generate a reverse topological ordering for a back-edge filtered version of
+ return edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second;
+}
+
+/** Generate a reverse topological ordering for a back-edge filtered version of
* our graph (as it must be a DAG and correctly numbered).
*
* Note: we ensure that we produce a topo ordering that begins with acceptEod
* and accept (if present) and ends with startDs followed by start.
*/
-std::vector<NFAVertex> getTopoOrdering(const NGHolder &g);
-
-bool onlyOneTop(const NGHolder &g);
-
+std::vector<NFAVertex> getTopoOrdering(const NGHolder &g);
+
+bool onlyOneTop(const NGHolder &g);
+
/** Return the set of the tops on the given graph. */
-flat_set<u32> getTops(const NGHolder &h);
-
+flat_set<u32> getTops(const NGHolder &h);
+
/** Initialise the tops on h to the provide top. Assumes that h is triggered and
* no tops have been set on h. */
void setTops(NGHolder &h, u32 top = DEFAULT_TOP);
-/** adds a vertex to g with all the same vertex properties as \p v (aside from
- * index) */
-NFAVertex clone_vertex(NGHolder &g, NFAVertex v);
-
-/**
- * \brief Copies all out-edges from source to target.
- *
- * Edge properties (aside from index) are preserved and duplicate edges are
- * skipped.
- */
-void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest);
-
-/**
- * \brief Copies all in-edges from source to target.
- *
- * Edge properties (aside from index) are preserved.
- */
-void clone_in_edges(NGHolder &g, NFAVertex source, NFAVertex dest);
-
-/** \brief True if the graph contains an edge from one of {start, startDs} to
- * one of {accept, acceptEod}. */
-bool isVacuous(const NGHolder &h);
-
-/** \brief True if the graph contains no floating vertices (startDs has no
- * proper successors). */
-bool isAnchored(const NGHolder &h);
-
+/** adds a vertex to g with all the same vertex properties as \p v (aside from
+ * index) */
+NFAVertex clone_vertex(NGHolder &g, NFAVertex v);
+
+/**
+ * \brief Copies all out-edges from source to target.
+ *
+ * Edge properties (aside from index) are preserved and duplicate edges are
+ * skipped.
+ */
+void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest);
+
+/**
+ * \brief Copies all in-edges from source to target.
+ *
+ * Edge properties (aside from index) are preserved.
+ */
+void clone_in_edges(NGHolder &g, NFAVertex source, NFAVertex dest);
+
+/** \brief True if the graph contains an edge from one of {start, startDs} to
+ * one of {accept, acceptEod}. */
+bool isVacuous(const NGHolder &h);
+
+/** \brief True if the graph contains no floating vertices (startDs has no
+ * proper successors). */
+bool isAnchored(const NGHolder &h);
+
/** \brief True if the graph contains no anchored vertices (start has no
* successors aside from startDs or vertices connected to startDs). */
bool isFloating(const NGHolder &h);
-/** True if the graph contains no back-edges at all, other than the
- * startDs self-loop. */
-bool isAcyclic(const NGHolder &g);
-
-/** True if the graph has a cycle reachable from the given source vertex. */
-bool hasReachableCycle(const NGHolder &g, NFAVertex src);
-
-/** True if g has any cycles which are not self-loops. */
-bool hasBigCycles(const NGHolder &g);
-
+/** True if the graph contains no back-edges at all, other than the
+ * startDs self-loop. */
+bool isAcyclic(const NGHolder &g);
+
+/** True if the graph has a cycle reachable from the given source vertex. */
+bool hasReachableCycle(const NGHolder &g, NFAVertex src);
+
+/** True if g has any cycles which are not self-loops. */
+bool hasBigCycles(const NGHolder &g);
+
/**
* \brief True if g has at least one non-special vertex with reach smaller than
* max_reach_count. The default of 200 is pretty conservative.
*/
bool hasNarrowReachVertex(const NGHolder &g, size_t max_reach_count = 200);
-/** Returns the set of all vertices that appear in any of the graph's cycles. */
-std::set<NFAVertex> findVerticesInCycles(const NGHolder &g);
-
-bool can_never_match(const NGHolder &g);
-
-/* \brief Does the graph have any edges leading into acceptEod (aside from
- * accept) or will it have after resolving asserts? */
-bool can_match_at_eod(const NGHolder &h);
-
-bool can_only_match_at_eod(const NGHolder &g);
-
-/** \brief Does this graph become a "firehose", matching between every
- * byte? */
-bool matches_everywhere(const NGHolder &h);
-
-
-struct mbsb_cache {
- explicit mbsb_cache(const NGHolder &gg) : g(gg) {}
- std::map<std::pair<u32, u32>, bool> cache;
- const NGHolder &g;
-};
-
-/* weaker than straight domination as allows jump edges */
-bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g,
- mbsb_cache &cache);
-
-/* adds the literal 's' to the end of the graph before h.accept */
-void appendLiteral(NGHolder &h, const ue2_literal &s);
-
-/** \brief Fill graph \a outp with a subset of the vertices in \a in (given in
- * \a in). A vertex mapping is returned in \a v_map_out. */
-void fillHolder(NGHolder *outp, const NGHolder &in,
- const std::deque<NFAVertex> &vv,
+/** Returns the set of all vertices that appear in any of the graph's cycles. */
+std::set<NFAVertex> findVerticesInCycles(const NGHolder &g);
+
+bool can_never_match(const NGHolder &g);
+
+/* \brief Does the graph have any edges leading into acceptEod (aside from
+ * accept) or will it have after resolving asserts? */
+bool can_match_at_eod(const NGHolder &h);
+
+bool can_only_match_at_eod(const NGHolder &g);
+
+/** \brief Does this graph become a "firehose", matching between every
+ * byte? */
+bool matches_everywhere(const NGHolder &h);
+
+
+struct mbsb_cache {
+ explicit mbsb_cache(const NGHolder &gg) : g(gg) {}
+ std::map<std::pair<u32, u32>, bool> cache;
+ const NGHolder &g;
+};
+
+/* weaker than straight domination as allows jump edges */
+bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g,
+ mbsb_cache &cache);
+
+/* adds the literal 's' to the end of the graph before h.accept */
+void appendLiteral(NGHolder &h, const ue2_literal &s);
+
+/** \brief Fill graph \a outp with a subset of the vertices in \a in (given in
+ * \a in). A vertex mapping is returned in \a v_map_out. */
+void fillHolder(NGHolder *outp, const NGHolder &in,
+ const std::deque<NFAVertex> &vv,
std::unordered_map<NFAVertex, NFAVertex> *v_map_out);
-
-/** \brief Clone the graph in \a in into graph \a out, returning a vertex
- * mapping in \a v_map_out. */
-void cloneHolder(NGHolder &out, const NGHolder &in,
+
+/** \brief Clone the graph in \a in into graph \a out, returning a vertex
+ * mapping in \a v_map_out. */
+void cloneHolder(NGHolder &out, const NGHolder &in,
std::unordered_map<NFAVertex, NFAVertex> *v_map_out);
-
-/** \brief Clone the graph in \a in into graph \a out. */
-void cloneHolder(NGHolder &out, const NGHolder &in);
-
-/** \brief Build a clone of graph \a in and return a pointer to it. */
-std::unique_ptr<NGHolder> cloneHolder(const NGHolder &in);
-
-/** \brief Clear all reports on vertices that do not have an edge to accept or
- * acceptEod. */
-void clearReports(NGHolder &g);
-
-/** \brief Add report \a r_new to every vertex that already has report \a
- * r_old. */
-void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new);
-
+
+/** \brief Clone the graph in \a in into graph \a out. */
+void cloneHolder(NGHolder &out, const NGHolder &in);
+
+/** \brief Build a clone of graph \a in and return a pointer to it. */
+std::unique_ptr<NGHolder> cloneHolder(const NGHolder &in);
+
+/** \brief Clear all reports on vertices that do not have an edge to accept or
+ * acceptEod. */
+void clearReports(NGHolder &g);
+
+/** \brief Add report \a r_new to every vertex that already has report \a
+ * r_old. */
+void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new);
+
/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to
* accepts. */
void reverseHolder(const NGHolder &g, NGHolder &out);
@@ -321,8 +321,8 @@ void reverseHolder(const NGHolder &g, NGHolder &out);
u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
u32 max_delay, bool overhang_ok = true);
-#ifndef NDEBUG
-
+#ifndef NDEBUG
+
// Assertions: only available in internal builds.
/**
@@ -330,8 +330,8 @@ u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
* with edges to accept or acceptEod have at least one report ID. Additionally,
* checks that ONLY vertices with edges to accept or acceptEod has reports.
*/
-bool allMatchStatesHaveReports(const NGHolder &g);
-
+bool allMatchStatesHaveReports(const NGHolder &g);
+
/**
* Assertion: returns true if the graph is triggered and all edges out of start
* have tops OR if the graph is not-triggered and all edges out of start have no
@@ -339,7 +339,7 @@ bool allMatchStatesHaveReports(const NGHolder &g);
*/
bool isCorrectlyTopped(const NGHolder &g);
#endif // NDEBUG
-
-} // namespace ue2
-
-#endif
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.cpp
index 71ec2e4bab..d1123dff49 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.cpp
@@ -1,143 +1,143 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Build code for vacuous graphs.
- */
-#include "ng_vacuous.h"
-
-#include "grey.h"
-#include "ng.h"
-#include "ng_util.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Build code for vacuous graphs.
+ */
+#include "ng_vacuous.h"
+
+#include "grey.h"
+#include "ng.h"
+#include "ng_util.h"
#include "compiler/compiler.h"
-
-using namespace std;
-
-namespace ue2 {
-
-static
+
+using namespace std;
+
+namespace ue2 {
+
+static
ReportID getInternalId(ReportManager &rm, const ExpressionInfo &expr) {
Report ir = rm.getBasicInternalReport(expr);
-
- // Apply any extended params.
+
+ // Apply any extended params.
if (expr.min_offset || expr.max_offset != MAX_OFFSET) {
ir.minOffset = expr.min_offset;
ir.maxOffset = expr.max_offset;
- }
-
+ }
+
assert(!expr.min_length); // should be handled elsewhere.
-
- return rm.getInternalId(ir);
-}
-
-static
+
+ return rm.getInternalId(ir);
+}
+
+static
void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr) {
const ReportID r = getInternalId(rm, expr);
-
- boundary.report_at_0_eod.insert(r);
- boundary.report_at_0.insert(r);
-
- // Replace the graph with a '.+'.
-
- clear_graph(g);
- clearReports(g);
- remove_edge(g.start, g.accept, g);
- remove_edge(g.start, g.acceptEod, g);
- remove_edge(g.startDs, g.accept, g);
- remove_edge(g.startDs, g.acceptEod, g);
-
- NFAVertex v = add_vertex(g);
- g[v].char_reach.setall();
- g[v].reports.insert(r);
- add_edge(v, v, g);
- add_edge(g.start, v, g);
- add_edge(g.startDs, v, g);
- add_edge(v, g.accept, g);
-}
-
-static
-void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
+
+ boundary.report_at_0_eod.insert(r);
+ boundary.report_at_0.insert(r);
+
+ // Replace the graph with a '.+'.
+
+ clear_graph(g);
+ clearReports(g);
+ remove_edge(g.start, g.accept, g);
+ remove_edge(g.start, g.acceptEod, g);
+ remove_edge(g.startDs, g.accept, g);
+ remove_edge(g.startDs, g.acceptEod, g);
+
+ NFAVertex v = add_vertex(g);
+ g[v].char_reach.setall();
+ g[v].reports.insert(r);
+ add_edge(v, v, g);
+ add_edge(g.start, v, g);
+ add_edge(g.startDs, v, g);
+ add_edge(v, g.accept, g);
+}
+
+static
+void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
NGHolder &g, const ExpressionInfo &expr) {
boundary.report_at_0.insert(getInternalId(rm, expr));
- remove_edge(g.start, g.accept, g);
- remove_edge(g.start, g.acceptEod, g);
- g[g.start].reports.clear();
-}
-
-static
-void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
+ remove_edge(g.start, g.accept, g);
+ remove_edge(g.start, g.acceptEod, g);
+ g[g.start].reports.clear();
+}
+
+static
+void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
NGHolder &g, const ExpressionInfo &expr) {
boundary.report_at_eod.insert(getInternalId(rm, expr));
- remove_edge(g.startDs, g.acceptEod, g);
- remove_edge(g.start, g.acceptEod, g);
- g[g.start].reports.clear();
- g[g.startDs].reports.clear();
-}
-
-static
-void makeNothingAcceptor(BoundaryReports &boundary, ReportManager &rm,
+ remove_edge(g.startDs, g.acceptEod, g);
+ remove_edge(g.start, g.acceptEod, g);
+ g[g.start].reports.clear();
+ g[g.startDs].reports.clear();
+}
+
+static
+void makeNothingAcceptor(BoundaryReports &boundary, ReportManager &rm,
NGHolder &g, const ExpressionInfo &expr) {
boundary.report_at_0_eod.insert(getInternalId(rm, expr));
- remove_edge(g.start, g.acceptEod, g);
- g[g.start].reports.clear();
-}
-
-bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
+ remove_edge(g.start, g.acceptEod, g);
+ g[g.start].reports.clear();
+}
+
+bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
NGHolder &g, const ExpressionInfo &expr) {
- if (edge(g.startDs, g.accept, g).second) {
- // e.g. '.*'; match "between" every byte
- DEBUG_PRINTF("graph is firehose\n");
+ if (edge(g.startDs, g.accept, g).second) {
+ // e.g. '.*'; match "between" every byte
+ DEBUG_PRINTF("graph is firehose\n");
makeFirehose(boundary, rm, g, expr);
- return true;
- }
-
- bool work_done = false;
-
- if (edge(g.start, g.accept, g).second) {
- DEBUG_PRINTF("creating anchored acceptor\n");
+ return true;
+ }
+
+ bool work_done = false;
+
+ if (edge(g.start, g.accept, g).second) {
+ DEBUG_PRINTF("creating anchored acceptor\n");
makeAnchoredAcceptor(boundary, rm, g, expr);
- work_done = true;
- }
-
- if (edge(g.startDs, g.acceptEod, g).second) {
- DEBUG_PRINTF("creating end-anchored acceptor\n");
+ work_done = true;
+ }
+
+ if (edge(g.startDs, g.acceptEod, g).second) {
+ DEBUG_PRINTF("creating end-anchored acceptor\n");
makeEndAnchoredAcceptor(boundary, rm, g, expr);
- work_done = true;
- }
-
- if (edge(g.start, g.acceptEod, g).second) {
- DEBUG_PRINTF("creating nothing acceptor\n");
+ work_done = true;
+ }
+
+ if (edge(g.start, g.acceptEod, g).second) {
+ DEBUG_PRINTF("creating nothing acceptor\n");
makeNothingAcceptor(boundary, rm, g, expr);
- work_done = true;
- }
-
- return work_done;
-}
-
-} // namespace ue2
+ work_done = true;
+ }
+
+ return work_done;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.h b/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.h
index 12ad62d812..c33cb312de 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_vacuous.h
@@ -1,49 +1,49 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Build code for vacuous graphs.
- */
-
-#ifndef NG_VACUOUS_H
-#define NG_VACUOUS_H
-
-namespace ue2 {
-
-struct BoundaryReports;
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Build code for vacuous graphs.
+ */
+
+#ifndef NG_VACUOUS_H
+#define NG_VACUOUS_H
+
+namespace ue2 {
+
+struct BoundaryReports;
class ExpressionInfo;
class NGHolder;
-class ReportManager;
-
-// Returns true if a "vacuous" reporter was created.
-bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
+class ReportManager;
+
+// Returns true if a "vacuous" reporter was created.
+bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
NGHolder &g, const ExpressionInfo &expr);
-
-} // namespace ue2
-
-#endif // NG_VACUOUS_H
+
+} // namespace ue2
+
+#endif // NG_VACUOUS_H
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_width.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_width.cpp
index f2d4fb73e4..219241ca55 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_width.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_width.cpp
@@ -1,237 +1,237 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Functions for finding the min/max width of the input required to
- * match a pattern.
- */
-#include "ng_width.h"
-
-#include "ng_holder.h"
-#include "ng_util.h"
-#include "ue2common.h"
-#include "util/depth.h"
-#include "util/graph.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Functions for finding the min/max width of the input required to
+ * match a pattern.
+ */
+#include "ng_width.h"
+
+#include "ng_holder.h"
+#include "ng_util.h"
+#include "ue2common.h"
+#include "util/depth.h"
+#include "util/graph.h"
#include "util/graph_small_color_map.h"
-
-#include <deque>
-#include <vector>
-
-#include <boost/graph/breadth_first_search.hpp>
-#include <boost/graph/dag_shortest_paths.hpp>
-#include <boost/graph/filtered_graph.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
-namespace {
-
-/**
- * Filter out special edges, or in the top-specific variant, start edges that
- * don't have the right top set.
- */
-struct SpecialEdgeFilter {
- SpecialEdgeFilter() {}
- explicit SpecialEdgeFilter(const NGHolder &h_in) : h(&h_in) {}
+
+#include <deque>
+#include <vector>
+
+#include <boost/graph/breadth_first_search.hpp>
+#include <boost/graph/dag_shortest_paths.hpp>
+#include <boost/graph/filtered_graph.hpp>
+
+using namespace std;
+
+namespace ue2 {
+
+namespace {
+
+/**
+ * Filter out special edges, or in the top-specific variant, start edges that
+ * don't have the right top set.
+ */
+struct SpecialEdgeFilter {
+ SpecialEdgeFilter() {}
+ explicit SpecialEdgeFilter(const NGHolder &h_in) : h(&h_in) {}
SpecialEdgeFilter(const NGHolder &h_in, u32 top_in)
- : h(&h_in), single_top(true), top(top_in) {}
-
- bool operator()(const NFAEdge &e) const {
+ : h(&h_in), single_top(true), top(top_in) {}
+
+ bool operator()(const NFAEdge &e) const {
NFAVertex u = source(e, *h);
NFAVertex v = target(e, *h);
if ((is_any_start(u, *h) && is_any_start(v, *h)) ||
(is_any_accept(u, *h) && is_any_accept(v, *h))) {
- return false;
- }
- if (single_top) {
+ return false;
+ }
+ if (single_top) {
if (u == h->start && !contains((*h)[e].tops, top)) {
- return false;
- }
- if (u == h->startDs) {
- return false;
- }
- }
- return true;
-
- }
-private:
- const NGHolder *h = nullptr;
- bool single_top = false;
- u32 top = 0;
-};
-
-} // namespace
-
-static
-depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter,
- NFAVertex src) {
- if (isLeafNode(src, h)) {
- return depth::unreachable();
- }
-
+ return false;
+ }
+ if (u == h->startDs) {
+ return false;
+ }
+ }
+ return true;
+
+ }
+private:
+ const NGHolder *h = nullptr;
+ bool single_top = false;
+ u32 top = 0;
+};
+
+} // namespace
+
+static
+depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter,
+ NFAVertex src) {
+ if (isLeafNode(src, h)) {
+ return depth::unreachable();
+ }
+
boost::filtered_graph<NGHolder, SpecialEdgeFilter> g(h, filter);
-
- assert(hasCorrectlyNumberedVertices(h));
- const size_t num = num_vertices(h);
- vector<depth> distance(num, depth::unreachable());
- distance.at(g[src].index) = depth(0);
-
- auto index_map = get(&NFAGraphVertexProps::index, g);
-
- // Since we are interested in the single-source shortest paths on a graph
- // with the same weight on every edge, using BFS will be faster than
- // Dijkstra here.
+
+ assert(hasCorrectlyNumberedVertices(h));
+ const size_t num = num_vertices(h);
+ vector<depth> distance(num, depth::unreachable());
+ distance.at(g[src].index) = depth(0);
+
+ auto index_map = get(&NFAGraphVertexProps::index, g);
+
+ // Since we are interested in the single-source shortest paths on a graph
+ // with the same weight on every edge, using BFS will be faster than
+ // Dijkstra here.
breadth_first_search(g, src,
- visitor(make_bfs_visitor(record_distances(
- make_iterator_property_map(distance.begin(), index_map),
+ visitor(make_bfs_visitor(record_distances(
+ make_iterator_property_map(distance.begin(), index_map),
boost::on_tree_edge()))));
-
- DEBUG_PRINTF("d[accept]=%s, d[acceptEod]=%s\n",
- distance.at(NODE_ACCEPT).str().c_str(),
- distance.at(NODE_ACCEPT_EOD).str().c_str());
-
- depth d = min(distance.at(NODE_ACCEPT), distance.at(NODE_ACCEPT_EOD));
-
- if (d.is_unreachable()) {
- return d;
- }
-
- assert(d.is_finite());
- assert(d > depth(0));
- return d - depth(1);
-}
-
-static
-depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter,
- NFAVertex src) {
+
+ DEBUG_PRINTF("d[accept]=%s, d[acceptEod]=%s\n",
+ distance.at(NODE_ACCEPT).str().c_str(),
+ distance.at(NODE_ACCEPT_EOD).str().c_str());
+
+ depth d = min(distance.at(NODE_ACCEPT), distance.at(NODE_ACCEPT_EOD));
+
+ if (d.is_unreachable()) {
+ return d;
+ }
+
+ assert(d.is_finite());
+ assert(d > depth(0));
+ return d - depth(1);
+}
+
+static
+depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter,
+ NFAVertex src) {
if (isLeafNode(src, h)) {
- return depth::unreachable();
- }
-
- if (hasReachableCycle(h, src)) {
- // There's a cycle reachable from this src, so we have inf width.
- return depth::infinity();
- }
-
+ return depth::unreachable();
+ }
+
+ if (hasReachableCycle(h, src)) {
+ // There's a cycle reachable from this src, so we have inf width.
+ return depth::infinity();
+ }
+
boost::filtered_graph<NGHolder, SpecialEdgeFilter> g(h, filter);
-
- assert(hasCorrectlyNumberedVertices(h));
- const size_t num = num_vertices(h);
- vector<int> distance(num);
+
+ assert(hasCorrectlyNumberedVertices(h));
+ const size_t num = num_vertices(h);
+ vector<int> distance(num);
auto colors = make_small_color_map(h);
-
- auto index_map = get(&NFAGraphVertexProps::index, g);
-
- // DAG shortest paths with negative edge weights.
+
+ auto index_map = get(&NFAGraphVertexProps::index, g);
+
+ // DAG shortest paths with negative edge weights.
dag_shortest_paths(g, src,
- distance_map(make_iterator_property_map(distance.begin(), index_map))
- .weight_map(boost::make_constant_property<NFAEdge>(-1))
+ distance_map(make_iterator_property_map(distance.begin(), index_map))
+ .weight_map(boost::make_constant_property<NFAEdge>(-1))
.color_map(colors));
-
- depth acceptDepth, acceptEodDepth;
+
+ depth acceptDepth, acceptEodDepth;
if (get(colors, h.accept) == small_color::white) {
- acceptDepth = depth::unreachable();
- } else {
+ acceptDepth = depth::unreachable();
+ } else {
acceptDepth = depth(-1 * distance.at(NODE_ACCEPT));
- }
+ }
if (get(colors, h.acceptEod) == small_color::white) {
- acceptEodDepth = depth::unreachable();
- } else {
+ acceptEodDepth = depth::unreachable();
+ } else {
acceptEodDepth = depth(-1 * distance.at(NODE_ACCEPT_EOD));
- }
-
- depth d;
- if (acceptDepth.is_unreachable()) {
- d = acceptEodDepth;
- } else if (acceptEodDepth.is_unreachable()) {
- d = acceptDepth;
- } else {
- d = max(acceptDepth, acceptEodDepth);
- }
-
- if (d.is_unreachable()) {
+ }
+
+ depth d;
+ if (acceptDepth.is_unreachable()) {
+ d = acceptEodDepth;
+ } else if (acceptEodDepth.is_unreachable()) {
+ d = acceptDepth;
+ } else {
+ d = max(acceptDepth, acceptEodDepth);
+ }
+
+ if (d.is_unreachable()) {
assert(findMinWidth(h, filter, src).is_unreachable());
- return d;
- }
-
- // Invert sign and subtract one for start transition.
- assert(d.is_finite() && d > depth(0));
- return d - depth(1);
-}
-
-static
-depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter) {
- depth startDepth = findMinWidth(h, filter, h.start);
- depth dotstarDepth = findMinWidth(h, filter, h.startDs);
- DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(),
- dotstarDepth.str().c_str());
- if (startDepth.is_unreachable()) {
- assert(dotstarDepth.is_finite());
- return dotstarDepth;
- } else if (dotstarDepth.is_unreachable()) {
- assert(startDepth.is_finite());
- return startDepth;
- } else {
- assert(min(startDepth, dotstarDepth).is_finite());
- return min(startDepth, dotstarDepth);
- }
-}
-
-depth findMinWidth(const NGHolder &h) {
- return findMinWidth(h, SpecialEdgeFilter(h));
-}
-
-depth findMinWidth(const NGHolder &h, u32 top) {
- return findMinWidth(h, SpecialEdgeFilter(h, top));
-}
-
-static
-depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter) {
- depth startDepth = findMaxWidth(h, filter, h.start);
- depth dotstarDepth = findMaxWidth(h, filter, h.startDs);
- DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(),
- dotstarDepth.str().c_str());
- if (startDepth.is_unreachable()) {
- return dotstarDepth;
- } else if (dotstarDepth.is_unreachable()) {
- return startDepth;
- } else {
- return max(startDepth, dotstarDepth);
- }
-}
-
-depth findMaxWidth(const NGHolder &h) {
- return findMaxWidth(h, SpecialEdgeFilter(h));
-}
-
-depth findMaxWidth(const NGHolder &h, u32 top) {
- return findMaxWidth(h, SpecialEdgeFilter(h, top));
-}
-
-} // namespace ue2
+ return d;
+ }
+
+ // Invert sign and subtract one for start transition.
+ assert(d.is_finite() && d > depth(0));
+ return d - depth(1);
+}
+
+static
+depth findMinWidth(const NGHolder &h, const SpecialEdgeFilter &filter) {
+ depth startDepth = findMinWidth(h, filter, h.start);
+ depth dotstarDepth = findMinWidth(h, filter, h.startDs);
+ DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(),
+ dotstarDepth.str().c_str());
+ if (startDepth.is_unreachable()) {
+ assert(dotstarDepth.is_finite());
+ return dotstarDepth;
+ } else if (dotstarDepth.is_unreachable()) {
+ assert(startDepth.is_finite());
+ return startDepth;
+ } else {
+ assert(min(startDepth, dotstarDepth).is_finite());
+ return min(startDepth, dotstarDepth);
+ }
+}
+
+depth findMinWidth(const NGHolder &h) {
+ return findMinWidth(h, SpecialEdgeFilter(h));
+}
+
+depth findMinWidth(const NGHolder &h, u32 top) {
+ return findMinWidth(h, SpecialEdgeFilter(h, top));
+}
+
+static
+depth findMaxWidth(const NGHolder &h, const SpecialEdgeFilter &filter) {
+ depth startDepth = findMaxWidth(h, filter, h.start);
+ depth dotstarDepth = findMaxWidth(h, filter, h.startDs);
+ DEBUG_PRINTF("startDepth=%s, dotstarDepth=%s\n", startDepth.str().c_str(),
+ dotstarDepth.str().c_str());
+ if (startDepth.is_unreachable()) {
+ return dotstarDepth;
+ } else if (dotstarDepth.is_unreachable()) {
+ return startDepth;
+ } else {
+ return max(startDepth, dotstarDepth);
+ }
+}
+
+depth findMaxWidth(const NGHolder &h) {
+ return findMaxWidth(h, SpecialEdgeFilter(h));
+}
+
+depth findMaxWidth(const NGHolder &h, u32 top) {
+ return findMaxWidth(h, SpecialEdgeFilter(h, top));
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_width.h b/contrib/libs/hyperscan/src/nfagraph/ng_width.h
index ecc3c100ae..871e8a9343 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_width.h
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_width.h
@@ -1,74 +1,74 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Functions for finding the min/max width of the input required to
- * match a pattern.
- */
-
-#ifndef NG_WIDTH_H
-#define NG_WIDTH_H
-
-#include "ue2common.h"
-#include "util/depth.h"
-
-namespace ue2 {
-
-class NGHolder;
-
-/**
- * \brief Compute the minimum width in bytes of an input that will match the
- * given graph.
- */
-depth findMinWidth(const NGHolder &h);
-
-/**
- * \brief Compute the minimum width in bytes of an input that will match the
- * given graph, considering only paths activated by the given top.
- */
-depth findMinWidth(const NGHolder &h, u32 top);
-
-/**
- * \brief Compute the maximum width in bytes of an input that will match the
- * given graph.
- *
- * If there is no bound on the maximum width, returns infinity.
- */
-depth findMaxWidth(const NGHolder &h);
-
-/**
- * \brief Compute the maximum width in bytes of an input that will match the
- * given graph, considering only paths activated by the given top.
- *
- * If there is no bound on the maximum width, returns infinity.
- */
-depth findMaxWidth(const NGHolder &h, u32 top);
-
-} // namespace ue2
-
-#endif // NG_WIDTH_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Functions for finding the min/max width of the input required to
+ * match a pattern.
+ */
+
+#ifndef NG_WIDTH_H
+#define NG_WIDTH_H
+
+#include "ue2common.h"
+#include "util/depth.h"
+
+namespace ue2 {
+
+class NGHolder;
+
+/**
+ * \brief Compute the minimum width in bytes of an input that will match the
+ * given graph.
+ */
+depth findMinWidth(const NGHolder &h);
+
+/**
+ * \brief Compute the minimum width in bytes of an input that will match the
+ * given graph, considering only paths activated by the given top.
+ */
+depth findMinWidth(const NGHolder &h, u32 top);
+
+/**
+ * \brief Compute the maximum width in bytes of an input that will match the
+ * given graph.
+ *
+ * If there is no bound on the maximum width, returns infinity.
+ */
+depth findMaxWidth(const NGHolder &h);
+
+/**
+ * \brief Compute the maximum width in bytes of an input that will match the
+ * given graph, considering only paths activated by the given top.
+ *
+ * If there is no bound on the maximum width, returns infinity.
+ */
+depth findMaxWidth(const NGHolder &h, u32 top);
+
+} // namespace ue2
+
+#endif // NG_WIDTH_H
diff --git a/contrib/libs/hyperscan/src/parser/AsciiComponentClass.cpp b/contrib/libs/hyperscan/src/parser/AsciiComponentClass.cpp
index 0fa72ec5f0..7cfa6e11b3 100644
--- a/contrib/libs/hyperscan/src/parser/AsciiComponentClass.cpp
+++ b/contrib/libs/hyperscan/src/parser/AsciiComponentClass.cpp
@@ -1,160 +1,160 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Character classes and their mnemonics.
- */
-#include "AsciiComponentClass.h"
-#include "Utf8ComponentClass.h"
-#include "buildstate.h"
-#include "parse_error.h"
-#include "position.h"
-#include "position_info.h"
-#include "nfagraph/ng_builder.h"
-#include "util/charreach_util.h"
-
-using namespace std;
-
-namespace ue2 {
-
-AsciiComponentClass::AsciiComponentClass(const ParseMode &mode_in)
- : ComponentClass(mode_in), position(GlushkovBuildState::POS_UNINITIALIZED) {
- assert(!mode.utf8);
-}
-
-AsciiComponentClass *AsciiComponentClass::clone() const {
- return new AsciiComponentClass(*this);
-}
-
-bool AsciiComponentClass::class_empty(void) const {
- assert(finalized);
- return cr.none();
-}
-
-void AsciiComponentClass::createRange(unichar to) {
- assert(range_start <= 0xff);
- unsigned char from = (u8)range_start;
- if (from > to) {
- throw LocatedParseError("Range out of order in character class");
- }
-
- in_cand_range = false;
- CharReach ncr(from, to);
- if (mode.caseless) {
- make_caseless(&ncr);
- }
- cr |= ncr;
- range_start = INVALID_UNICODE;
-}
-
-void AsciiComponentClass::notePositions(GlushkovBuildState &bs) {
- // We should always be finalized by now.
- assert(finalized);
-
- NFABuilder &builder = bs.getBuilder();
- position = builder.makePositions(1);
-
- builder.addCharReach(position, cr);
- builder.setNodeReportID(position, 0 /* offset adj */);
- recordPosBounds(position, position + 1);
-}
-
-void AsciiComponentClass::buildFollowSet(GlushkovBuildState &,
- const vector<PositionInfo> &) {
- // all follow set construction is handled by firsts/lasts
-}
-
-void AsciiComponentClass::add(PredefinedClass c, bool negative) {
- if (in_cand_range) { // can't form a range here
- throw LocatedParseError("Invalid range in character class");
- }
- DEBUG_PRINTF("getting %u %s\n", (u32)c, negative ? "^" : "");
-
- if (mode.ucp) {
- c = translateForUcpMode(c, mode);
- }
-
- // Note: caselessness is handled by getPredefinedCharReach.
- CharReach pcr = getPredefinedCharReach(c, mode);
- if (negative) {
- pcr.flip();
- }
-
- cr |= pcr;
- range_start = INVALID_UNICODE;
- in_cand_range = false;
-}
-
-void AsciiComponentClass::add(unichar c) {
- DEBUG_PRINTF("adding \\x%02x\n", c);
- if (c > 0xff) { // too big!
- throw LocatedParseError("Hexadecimal value is greater than \\xFF");
- }
-
- if (in_cand_range) {
- createRange(c);
- return;
- }
-
- CharReach ncr(c, c);
- if (mode.caseless) {
- make_caseless(&ncr);
- }
-
- cr |= ncr;
- range_start = c;
-}
-
-void AsciiComponentClass::finalize() {
- if (finalized) {
- return;
- }
-
- // Handle unclosed ranges, like '[a-]' and '[a-\Q\E]' -- in these cases the
- // dash is a literal dash.
- if (in_cand_range) {
- cr.set('-');
- in_cand_range = false;
- }
-
- if (m_negate) {
- cr.flip();
- }
-
- finalized = true;
-}
-
-vector<PositionInfo> AsciiComponentClass::first(void) const {
- return vector<PositionInfo>(1, PositionInfo(position));
-}
-
-vector<PositionInfo> AsciiComponentClass::last(void) const {
- return vector<PositionInfo>(1, PositionInfo(position));
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Character classes and their mnemonics.
+ */
+#include "AsciiComponentClass.h"
+#include "Utf8ComponentClass.h"
+#include "buildstate.h"
+#include "parse_error.h"
+#include "position.h"
+#include "position_info.h"
+#include "nfagraph/ng_builder.h"
+#include "util/charreach_util.h"
+
+using namespace std;
+
+namespace ue2 {
+
+AsciiComponentClass::AsciiComponentClass(const ParseMode &mode_in)
+ : ComponentClass(mode_in), position(GlushkovBuildState::POS_UNINITIALIZED) {
+ assert(!mode.utf8);
+}
+
+AsciiComponentClass *AsciiComponentClass::clone() const {
+ return new AsciiComponentClass(*this);
+}
+
+bool AsciiComponentClass::class_empty(void) const {
+ assert(finalized);
+ return cr.none();
+}
+
+void AsciiComponentClass::createRange(unichar to) {
+ assert(range_start <= 0xff);
+ unsigned char from = (u8)range_start;
+ if (from > to) {
+ throw LocatedParseError("Range out of order in character class");
+ }
+
+ in_cand_range = false;
+ CharReach ncr(from, to);
+ if (mode.caseless) {
+ make_caseless(&ncr);
+ }
+ cr |= ncr;
+ range_start = INVALID_UNICODE;
+}
+
+void AsciiComponentClass::notePositions(GlushkovBuildState &bs) {
+ // We should always be finalized by now.
+ assert(finalized);
+
+ NFABuilder &builder = bs.getBuilder();
+ position = builder.makePositions(1);
+
+ builder.addCharReach(position, cr);
+ builder.setNodeReportID(position, 0 /* offset adj */);
+ recordPosBounds(position, position + 1);
+}
+
+void AsciiComponentClass::buildFollowSet(GlushkovBuildState &,
+ const vector<PositionInfo> &) {
+ // all follow set construction is handled by firsts/lasts
+}
+
+void AsciiComponentClass::add(PredefinedClass c, bool negative) {
+ if (in_cand_range) { // can't form a range here
+ throw LocatedParseError("Invalid range in character class");
+ }
+ DEBUG_PRINTF("getting %u %s\n", (u32)c, negative ? "^" : "");
+
+ if (mode.ucp) {
+ c = translateForUcpMode(c, mode);
+ }
+
+ // Note: caselessness is handled by getPredefinedCharReach.
+ CharReach pcr = getPredefinedCharReach(c, mode);
+ if (negative) {
+ pcr.flip();
+ }
+
+ cr |= pcr;
+ range_start = INVALID_UNICODE;
+ in_cand_range = false;
+}
+
+void AsciiComponentClass::add(unichar c) {
+ DEBUG_PRINTF("adding \\x%02x\n", c);
+ if (c > 0xff) { // too big!
+ throw LocatedParseError("Hexadecimal value is greater than \\xFF");
+ }
+
+ if (in_cand_range) {
+ createRange(c);
+ return;
+ }
+
+ CharReach ncr(c, c);
+ if (mode.caseless) {
+ make_caseless(&ncr);
+ }
+
+ cr |= ncr;
+ range_start = c;
+}
+
+void AsciiComponentClass::finalize() {
+ if (finalized) {
+ return;
+ }
+
+ // Handle unclosed ranges, like '[a-]' and '[a-\Q\E]' -- in these cases the
+ // dash is a literal dash.
+ if (in_cand_range) {
+ cr.set('-');
+ in_cand_range = false;
+ }
+
+ if (m_negate) {
+ cr.flip();
+ }
+
+ finalized = true;
+}
+
+vector<PositionInfo> AsciiComponentClass::first(void) const {
+ return vector<PositionInfo>(1, PositionInfo(position));
+}
+
+vector<PositionInfo> AsciiComponentClass::last(void) const {
+ return vector<PositionInfo>(1, PositionInfo(position));
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/AsciiComponentClass.h b/contrib/libs/hyperscan/src/parser/AsciiComponentClass.h
index 3e6c2dc908..925fa9bff4 100644
--- a/contrib/libs/hyperscan/src/parser/AsciiComponentClass.h
+++ b/contrib/libs/hyperscan/src/parser/AsciiComponentClass.h
@@ -1,89 +1,89 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Character classes and their mnemonics.
- */
-
-#ifndef ASCIICOMPONENTCLASS_H
-#define ASCIICOMPONENTCLASS_H
-
-#include "ComponentClass.h"
-#include "util/charreach.h"
-
-namespace ue2 {
-
-class AsciiComponentClass : public ComponentClass {
- friend class ConstructLiteralVisitor;
- friend class DumpVisitor;
- friend class PrintVisitor;
- friend class CaselessVisitor;
- friend class SimplifyVisitor;
- friend class SimplifyCandidatesVisitor;
-public:
- explicit AsciiComponentClass(const ParseMode &mode_in);
- ~AsciiComponentClass() override {}
- AsciiComponentClass *clone() const override;
-
- Component *accept(ComponentVisitor &v) override {
- Component *c = v.visit(this);
- v.post(this);
- return c;
- }
-
- void accept(ConstComponentVisitor &v) const override {
- v.pre(*this);
- v.during(*this);
- v.post(*this);
- }
-
- bool class_empty(void) const override;
- void add(PredefinedClass c, bool negative) override;
- void add(unichar c) override;
- void finalize(void) override;
- void notePositions(GlushkovBuildState &bs) override;
- void buildFollowSet(GlushkovBuildState &bs,
- const std::vector<PositionInfo> &) override;
- std::vector<PositionInfo> first(void) const override;
- std::vector<PositionInfo> last(void) const override;
-
-protected:
- void createRange(unichar to) override;
-
-private:
- Position position;
- CharReach cr;
-
- // Private copy ctor. Use clone instead.
- AsciiComponentClass(const AsciiComponentClass &other)
- : ComponentClass(other), position(other.position), cr(other.cr) {}
-};
-
-} // namespace ue2
-
-#endif // ASCIICOMPONENTCLASS_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Character classes and their mnemonics.
+ */
+
+#ifndef ASCIICOMPONENTCLASS_H
+#define ASCIICOMPONENTCLASS_H
+
+#include "ComponentClass.h"
+#include "util/charreach.h"
+
+namespace ue2 {
+
+class AsciiComponentClass : public ComponentClass {
+ friend class ConstructLiteralVisitor;
+ friend class DumpVisitor;
+ friend class PrintVisitor;
+ friend class CaselessVisitor;
+ friend class SimplifyVisitor;
+ friend class SimplifyCandidatesVisitor;
+public:
+ explicit AsciiComponentClass(const ParseMode &mode_in);
+ ~AsciiComponentClass() override {}
+ AsciiComponentClass *clone() const override;
+
+ Component *accept(ComponentVisitor &v) override {
+ Component *c = v.visit(this);
+ v.post(this);
+ return c;
+ }
+
+ void accept(ConstComponentVisitor &v) const override {
+ v.pre(*this);
+ v.during(*this);
+ v.post(*this);
+ }
+
+ bool class_empty(void) const override;
+ void add(PredefinedClass c, bool negative) override;
+ void add(unichar c) override;
+ void finalize(void) override;
+ void notePositions(GlushkovBuildState &bs) override;
+ void buildFollowSet(GlushkovBuildState &bs,
+ const std::vector<PositionInfo> &) override;
+ std::vector<PositionInfo> first(void) const override;
+ std::vector<PositionInfo> last(void) const override;
+
+protected:
+ void createRange(unichar to) override;
+
+private:
+ Position position;
+ CharReach cr;
+
+ // Private copy ctor. Use clone instead.
+ AsciiComponentClass(const AsciiComponentClass &other)
+ : ComponentClass(other), position(other.position), cr(other.cr) {}
+};
+
+} // namespace ue2
+
+#endif // ASCIICOMPONENTCLASS_H
diff --git a/contrib/libs/hyperscan/src/parser/Component.cpp b/contrib/libs/hyperscan/src/parser/Component.cpp
index 39cc22cb53..b40ce84d38 100644
--- a/contrib/libs/hyperscan/src/parser/Component.cpp
+++ b/contrib/libs/hyperscan/src/parser/Component.cpp
@@ -1,75 +1,75 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Base class for all components.
- */
-
-
-#include "Component.h"
-
-#include "buildstate.h"
-#include "position.h"
-#include "position_info.h"
-#include "ue2common.h"
-
-using namespace std;
-
-namespace ue2 {
-
-Component::Component()
- : pos_begin(GlushkovBuildState::POS_UNINITIALIZED),
- pos_end(GlushkovBuildState::POS_UNINITIALIZED) {}
-
-Component::~Component() {}
-
-bool Component::repeatable() const {
- return true;
-}
-
-void Component::recordPosBounds(u32 b, u32 e) {
- pos_begin = b;
- pos_end = e;
-}
-
-void Component::optimise(bool) {
-}
-
-bool Component::vacuous_everywhere(void) const {
- return false;
-}
-
-bool Component::checkEmbeddedStartAnchor(bool) const {
- return false;
-}
-
-bool Component::checkEmbeddedEndAnchor(bool) const {
- return false;
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Base class for all components.
+ */
+
+
+#include "Component.h"
+
+#include "buildstate.h"
+#include "position.h"
+#include "position_info.h"
+#include "ue2common.h"
+
+using namespace std;
+
+namespace ue2 {
+
+Component::Component()
+ : pos_begin(GlushkovBuildState::POS_UNINITIALIZED),
+ pos_end(GlushkovBuildState::POS_UNINITIALIZED) {}
+
+Component::~Component() {}
+
+bool Component::repeatable() const {
+ return true;
+}
+
+void Component::recordPosBounds(u32 b, u32 e) {
+ pos_begin = b;
+ pos_end = e;
+}
+
+void Component::optimise(bool) {
+}
+
+bool Component::vacuous_everywhere(void) const {
+ return false;
+}
+
+bool Component::checkEmbeddedStartAnchor(bool) const {
+ return false;
+}
+
+bool Component::checkEmbeddedEndAnchor(bool) const {
+ return false;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/Component.h b/contrib/libs/hyperscan/src/parser/Component.h
index f7160ae630..1ebce677ca 100644
--- a/contrib/libs/hyperscan/src/parser/Component.h
+++ b/contrib/libs/hyperscan/src/parser/Component.h
@@ -1,145 +1,145 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Base class for all components.
- */
-
-#ifndef _RE_COMPONENT_H_
-#define _RE_COMPONENT_H_
-
-#include "ComponentVisitor.h"
-#include "ConstComponentVisitor.h"
-
-#include "position.h"
-#include "ue2common.h"
-
-#include <set>
-#include <string>
-#include <vector>
-
-namespace ue2 {
-
-class GlushkovBuildState;
-class PositionInfo;
-
-enum EmptyPathType {
- NOT_EMPTY, /**< component must consume characters */
- EPS_ONLY_PATHS, /**< eps path with no overhanging asserts */
- BOUNDARY_PATHS /**< eps paths some with overhanging asserts */
-};
-
-/** \brief Base class for regular expression parse tree components. */
-class Component {
- friend class DumpVisitor;
-public:
- /** \brief Constructor. */
- Component();
-
- /** \brief Destructor. */
- virtual ~Component();
-
- /** \brief Returns a newly-allocated deep copy of this component. */
- virtual Component *clone() const = 0;
-
- /** \brief Apply the given visitor functor. */
- virtual Component *accept(ComponentVisitor &v) = 0;
-
- /** \brief Apply the given const visitor functor. */
- virtual void accept(ConstComponentVisitor &v) const = 0;
-
- /** \brief Glushkov construction First() function.
- * \return set of initial positions in this component. */
- virtual std::vector<PositionInfo> first() const = 0;
-
- /** \brief Glushkov construction Last() function.
- * \return set of final positions in this component. */
- virtual std::vector<PositionInfo> last() const = 0;
-
- /** \brief Glushkov construction Empty() function.
- * \return true iff the component accepts epsilon.
- *
- * Note: ^, $, etc are considered empty. */
- virtual bool empty() const = 0;
-
- /** \brief True iff epsilon can pass through the component.
- *
- * Note: ^, $, etc are not vacuous everywhere. */
- virtual bool vacuous_everywhere(void) const;
-
- /** \brief True iff the component is repeatable on its own, without being
- * encapsulated in a sequence first.
- *
- * This is true for most components, but not for repeats, anchors and word
- * boundaries. */
- virtual bool repeatable() const;
-
- /** \brief Optimisation pass on the component tree.
- *
- * Called before \ref notePositions. May modify to the component tree.
- * Assumes no start of match information is required.
- */
- virtual void optimise(bool connected_to_sds);
-
- /** \brief Informs the Glushkov build process of the positions used by this
- * component. */
- virtual void notePositions(GlushkovBuildState &bs) = 0;
-
- /** \brief Glushkov construction Follow() function.
- *
- * Constructs (in \a bs) the set of positions in this component reachable
- * from the positions in \a lastPos.
- *
- * \throw ParseError on failure
- */
- virtual void buildFollowSet(GlushkovBuildState &bs,
- const std::vector<PositionInfo> &lastPos) = 0;
-
- /** \brief Return value is used for chaining, throws if finds embedded
- * anchor. */
- virtual bool checkEmbeddedStartAnchor(bool at_start) const;
-
- /* \brief Return value is used for chaining, throws if finds embedded
- * anchor. */
- virtual bool checkEmbeddedEndAnchor(bool at_end) const;
-
-protected:
- /** \brief Called during \ref notePositions. */
- void recordPosBounds(u32 b, u32 e);
-
- u32 pos_begin;
- u32 pos_end;
-
- // Protected copy ctor. Use clone instead.
- Component(const Component &other)
- : pos_begin(other.pos_begin), pos_end(other.pos_end) {}
-};
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Base class for all components.
+ */
+
+#ifndef _RE_COMPONENT_H_
+#define _RE_COMPONENT_H_
+
+#include "ComponentVisitor.h"
+#include "ConstComponentVisitor.h"
+
+#include "position.h"
+#include "ue2common.h"
+
+#include <set>
+#include <string>
+#include <vector>
+
+namespace ue2 {
+
+class GlushkovBuildState;
+class PositionInfo;
+
+enum EmptyPathType {
+ NOT_EMPTY, /**< component must consume characters */
+ EPS_ONLY_PATHS, /**< eps path with no overhanging asserts */
+ BOUNDARY_PATHS /**< eps paths some with overhanging asserts */
+};
+
+/** \brief Base class for regular expression parse tree components. */
+class Component {
+ friend class DumpVisitor;
+public:
+ /** \brief Constructor. */
+ Component();
+
+ /** \brief Destructor. */
+ virtual ~Component();
+
+ /** \brief Returns a newly-allocated deep copy of this component. */
+ virtual Component *clone() const = 0;
+
+ /** \brief Apply the given visitor functor. */
+ virtual Component *accept(ComponentVisitor &v) = 0;
+
+ /** \brief Apply the given const visitor functor. */
+ virtual void accept(ConstComponentVisitor &v) const = 0;
+
+ /** \brief Glushkov construction First() function.
+ * \return set of initial positions in this component. */
+ virtual std::vector<PositionInfo> first() const = 0;
+
+ /** \brief Glushkov construction Last() function.
+ * \return set of final positions in this component. */
+ virtual std::vector<PositionInfo> last() const = 0;
+
+ /** \brief Glushkov construction Empty() function.
+ * \return true iff the component accepts epsilon.
+ *
+ * Note: ^, $, etc are considered empty. */
+ virtual bool empty() const = 0;
+
+ /** \brief True iff epsilon can pass through the component.
+ *
+ * Note: ^, $, etc are not vacuous everywhere. */
+ virtual bool vacuous_everywhere(void) const;
+
+ /** \brief True iff the component is repeatable on its own, without being
+ * encapsulated in a sequence first.
+ *
+ * This is true for most components, but not for repeats, anchors and word
+ * boundaries. */
+ virtual bool repeatable() const;
+
+ /** \brief Optimisation pass on the component tree.
+ *
+ * Called before \ref notePositions. May modify to the component tree.
+ * Assumes no start of match information is required.
+ */
+ virtual void optimise(bool connected_to_sds);
+
+ /** \brief Informs the Glushkov build process of the positions used by this
+ * component. */
+ virtual void notePositions(GlushkovBuildState &bs) = 0;
+
+ /** \brief Glushkov construction Follow() function.
+ *
+ * Constructs (in \a bs) the set of positions in this component reachable
+ * from the positions in \a lastPos.
+ *
+ * \throw ParseError on failure
+ */
+ virtual void buildFollowSet(GlushkovBuildState &bs,
+ const std::vector<PositionInfo> &lastPos) = 0;
+
+ /** \brief Return value is used for chaining, throws if finds embedded
+ * anchor. */
+ virtual bool checkEmbeddedStartAnchor(bool at_start) const;
+
+ /* \brief Return value is used for chaining, throws if finds embedded
+ * anchor. */
+ virtual bool checkEmbeddedEndAnchor(bool at_end) const;
+
+protected:
+ /** \brief Called during \ref notePositions. */
+ void recordPosBounds(u32 b, u32 e);
+
+ u32 pos_begin;
+ u32 pos_end;
+
+ // Protected copy ctor. Use clone instead.
+ Component(const Component &other)
+ : pos_begin(other.pos_begin), pos_end(other.pos_end) {}
+};
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/parser/ComponentAlternation.cpp b/contrib/libs/hyperscan/src/parser/ComponentAlternation.cpp
index c9bd541d55..3e6515fa44 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentAlternation.cpp
+++ b/contrib/libs/hyperscan/src/parser/ComponentAlternation.cpp
@@ -1,190 +1,190 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Alternations (foo|bar|baz).
- */
-
-
-#include "ComponentAlternation.h"
-
-#include "buildstate.h"
-#include "position.h"
-#include "position_info.h"
-#include "nfagraph/ng_builder.h"
-#include "ue2common.h"
-
-#include <algorithm>
-
-using namespace std;
-
-namespace ue2 {
-
-ComponentAlternation::ComponentAlternation() {
- // empty
-}
-
-ComponentAlternation::~ComponentAlternation() {
- // empty
-}
-
-ComponentAlternation::ComponentAlternation(const ComponentAlternation &other)
- : Component(other) {
- for (const auto &c : other.children) {
- assert(c);
- children.push_back(unique_ptr<Component>(c->clone()));
- }
-}
-
-ComponentAlternation * ComponentAlternation::clone() const {
- return new ComponentAlternation(*this);
-}
-
-Component *ComponentAlternation::accept(ComponentVisitor &v) {
- Component *c = v.visit(this);
- if (c != this) {
- v.post(this);
- return c;
- }
-
- for (auto i = children.begin(), e = children.end(); i != e; ++i) {
- Component *child = i->get();
- c = (*i)->accept(v);
- if (c != child) {
- // Child has been replaced (new Component pointer) or we've been
- // instructed to delete it (null).
- i->reset(c);
- }
- }
-
- // Remove deleted children.
- children.erase(remove(children.begin(), children.end(), nullptr),
- children.end());
-
- v.post(this);
- return this;
-}
-
-void ComponentAlternation::accept(ConstComponentVisitor &v) const {
- v.pre(*this);
- for (auto i = children.begin(), e = children.end(); i != e; ++i) {
- (*i)->accept(v);
- if (i + 1 != e) {
- v.during(*this);
- }
- }
-
- v.post(*this);
-}
-
-void ComponentAlternation::append(unique_ptr<Component> component) {
- children.push_back(move(component));
-}
-
-vector<PositionInfo> ComponentAlternation::first() const {
- // firsts come from all our subcomponents in position order. This will
- // maintain left-to-right priority order.
- vector<PositionInfo> firsts, subfirsts;
-
- for (const auto &c : children) {
- subfirsts = c->first();
- firsts.insert(firsts.end(), subfirsts.begin(), subfirsts.end());
- }
- return firsts;
-}
-
-vector<PositionInfo> ComponentAlternation::last() const {
- vector<PositionInfo> lasts, sublasts;
-
- for (const auto &c : children) {
- sublasts = c->last();
- lasts.insert(lasts.end(), sublasts.begin(), sublasts.end());
- }
- return lasts;
-}
-
-bool ComponentAlternation::empty(void) const {
- // an alternation can be empty if any of its components are empty
- for (const auto &c : children) {
- if (c->empty()) {
- return true;
- }
- }
-
- return false;
-}
-
-void ComponentAlternation::notePositions(GlushkovBuildState &bs) {
- u32 pb = bs.getBuilder().numVertices();
- for (auto &c : children) {
- c->notePositions(bs);
- }
- recordPosBounds(pb, bs.getBuilder().numVertices());
-}
-
-void ComponentAlternation::buildFollowSet(GlushkovBuildState &bs,
- const vector<PositionInfo> &lastPos) {
- for (auto &c : children) {
- c->buildFollowSet(bs, lastPos);
- }
-}
-
-bool ComponentAlternation::checkEmbeddedStartAnchor(bool at_start) const {
- bool rv = at_start;
- for (const auto &c : children) {
- rv &= c->checkEmbeddedStartAnchor(at_start);
- }
-
- return rv;
-}
-
-bool ComponentAlternation::checkEmbeddedEndAnchor(bool at_end) const {
- bool rv = at_end;
- for (const auto &c : children) {
- rv &= c->checkEmbeddedEndAnchor(at_end);
- }
-
- return rv;
-}
-
-bool ComponentAlternation::vacuous_everywhere(void) const {
- for (const auto &c : children) {
- if (c->vacuous_everywhere()) {
- return true;
- }
- }
- return false;
-}
-
-void ComponentAlternation::optimise(bool connected_to_sds) {
- for (auto &c : children) {
- c->optimise(connected_to_sds);
- }
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Alternations (foo|bar|baz).
+ */
+
+
+#include "ComponentAlternation.h"
+
+#include "buildstate.h"
+#include "position.h"
+#include "position_info.h"
+#include "nfagraph/ng_builder.h"
+#include "ue2common.h"
+
+#include <algorithm>
+
+using namespace std;
+
+namespace ue2 {
+
+ComponentAlternation::ComponentAlternation() {
+ // empty
+}
+
+ComponentAlternation::~ComponentAlternation() {
+ // empty
+}
+
+ComponentAlternation::ComponentAlternation(const ComponentAlternation &other)
+ : Component(other) {
+ for (const auto &c : other.children) {
+ assert(c);
+ children.push_back(unique_ptr<Component>(c->clone()));
+ }
+}
+
+ComponentAlternation * ComponentAlternation::clone() const {
+ return new ComponentAlternation(*this);
+}
+
+Component *ComponentAlternation::accept(ComponentVisitor &v) {
+ Component *c = v.visit(this);
+ if (c != this) {
+ v.post(this);
+ return c;
+ }
+
+ for (auto i = children.begin(), e = children.end(); i != e; ++i) {
+ Component *child = i->get();
+ c = (*i)->accept(v);
+ if (c != child) {
+ // Child has been replaced (new Component pointer) or we've been
+ // instructed to delete it (null).
+ i->reset(c);
+ }
+ }
+
+ // Remove deleted children.
+ children.erase(remove(children.begin(), children.end(), nullptr),
+ children.end());
+
+ v.post(this);
+ return this;
+}
+
+void ComponentAlternation::accept(ConstComponentVisitor &v) const {
+ v.pre(*this);
+ for (auto i = children.begin(), e = children.end(); i != e; ++i) {
+ (*i)->accept(v);
+ if (i + 1 != e) {
+ v.during(*this);
+ }
+ }
+
+ v.post(*this);
+}
+
+void ComponentAlternation::append(unique_ptr<Component> component) {
+ children.push_back(move(component));
+}
+
+vector<PositionInfo> ComponentAlternation::first() const {
+ // firsts come from all our subcomponents in position order. This will
+ // maintain left-to-right priority order.
+ vector<PositionInfo> firsts, subfirsts;
+
+ for (const auto &c : children) {
+ subfirsts = c->first();
+ firsts.insert(firsts.end(), subfirsts.begin(), subfirsts.end());
+ }
+ return firsts;
+}
+
+vector<PositionInfo> ComponentAlternation::last() const {
+ vector<PositionInfo> lasts, sublasts;
+
+ for (const auto &c : children) {
+ sublasts = c->last();
+ lasts.insert(lasts.end(), sublasts.begin(), sublasts.end());
+ }
+ return lasts;
+}
+
+bool ComponentAlternation::empty(void) const {
+ // an alternation can be empty if any of its components are empty
+ for (const auto &c : children) {
+ if (c->empty()) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void ComponentAlternation::notePositions(GlushkovBuildState &bs) {
+ u32 pb = bs.getBuilder().numVertices();
+ for (auto &c : children) {
+ c->notePositions(bs);
+ }
+ recordPosBounds(pb, bs.getBuilder().numVertices());
+}
+
+void ComponentAlternation::buildFollowSet(GlushkovBuildState &bs,
+ const vector<PositionInfo> &lastPos) {
+ for (auto &c : children) {
+ c->buildFollowSet(bs, lastPos);
+ }
+}
+
+bool ComponentAlternation::checkEmbeddedStartAnchor(bool at_start) const {
+ bool rv = at_start;
+ for (const auto &c : children) {
+ rv &= c->checkEmbeddedStartAnchor(at_start);
+ }
+
+ return rv;
+}
+
+bool ComponentAlternation::checkEmbeddedEndAnchor(bool at_end) const {
+ bool rv = at_end;
+ for (const auto &c : children) {
+ rv &= c->checkEmbeddedEndAnchor(at_end);
+ }
+
+ return rv;
+}
+
+bool ComponentAlternation::vacuous_everywhere(void) const {
+ for (const auto &c : children) {
+ if (c->vacuous_everywhere()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void ComponentAlternation::optimise(bool connected_to_sds) {
+ for (auto &c : children) {
+ c->optimise(connected_to_sds);
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/ComponentAlternation.h b/contrib/libs/hyperscan/src/parser/ComponentAlternation.h
index cbb168c03d..6c40074850 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentAlternation.h
+++ b/contrib/libs/hyperscan/src/parser/ComponentAlternation.h
@@ -1,79 +1,79 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Alternations (foo|bar|baz).
- */
-
-#ifndef COMPONENT_ALTERNATION_H
-#define COMPONENT_ALTERNATION_H
-
-#include "Component.h"
-#include "position.h"
-
-#include <memory>
-
-namespace ue2 {
-
-class PositionInfo;
-
-class ComponentAlternation : public Component {
- friend class DumpVisitor;
- friend class SimplifyVisitor;
-public:
- ComponentAlternation();
- ~ComponentAlternation() override;
- ComponentAlternation *clone() const override;
- Component *accept(ComponentVisitor &v) override;
- void accept(ConstComponentVisitor &v) const override;
-
- size_t numBranches() const { return children.size(); }
-
- void append(std::unique_ptr<Component> component);
-
- std::vector<PositionInfo> first() const override;
- std::vector<PositionInfo> last() const override;
- bool empty(void) const override;
- bool vacuous_everywhere() const override;
- void notePositions(GlushkovBuildState &bs) override;
- void buildFollowSet(GlushkovBuildState &bs,
- const std::vector<PositionInfo> &lastPos) override;
- bool checkEmbeddedStartAnchor(bool at_start) const override;
- bool checkEmbeddedEndAnchor(bool at_end) const override;
-
- void optimise(bool connected_to_sds) override;
-
-private:
- std::vector<std::unique_ptr<Component>> children;
-
- ComponentAlternation(const ComponentAlternation &other);
-};
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Alternations (foo|bar|baz).
+ */
+
+#ifndef COMPONENT_ALTERNATION_H
+#define COMPONENT_ALTERNATION_H
+
+#include "Component.h"
+#include "position.h"
+
+#include <memory>
+
+namespace ue2 {
+
+class PositionInfo;
+
+class ComponentAlternation : public Component {
+ friend class DumpVisitor;
+ friend class SimplifyVisitor;
+public:
+ ComponentAlternation();
+ ~ComponentAlternation() override;
+ ComponentAlternation *clone() const override;
+ Component *accept(ComponentVisitor &v) override;
+ void accept(ConstComponentVisitor &v) const override;
+
+ size_t numBranches() const { return children.size(); }
+
+ void append(std::unique_ptr<Component> component);
+
+ std::vector<PositionInfo> first() const override;
+ std::vector<PositionInfo> last() const override;
+ bool empty(void) const override;
+ bool vacuous_everywhere() const override;
+ void notePositions(GlushkovBuildState &bs) override;
+ void buildFollowSet(GlushkovBuildState &bs,
+ const std::vector<PositionInfo> &lastPos) override;
+ bool checkEmbeddedStartAnchor(bool at_start) const override;
+ bool checkEmbeddedEndAnchor(bool at_end) const override;
+
+ void optimise(bool connected_to_sds) override;
+
+private:
+ std::vector<std::unique_ptr<Component>> children;
+
+ ComponentAlternation(const ComponentAlternation &other);
+};
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/parser/ComponentAssertion.cpp b/contrib/libs/hyperscan/src/parser/ComponentAssertion.cpp
index ae023dad5a..cadff93264 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentAssertion.cpp
+++ b/contrib/libs/hyperscan/src/parser/ComponentAssertion.cpp
@@ -1,121 +1,121 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Lookahead/lookbehind zero-width assertions.
- */
-#include "ComponentAssertion.h"
-#include "buildstate.h"
-#include "position.h"
-#include "position_info.h"
-#include "ue2common.h"
-
-#include <cassert>
-#include <algorithm>
-
-using namespace std;
-
-namespace ue2 {
-
-ComponentAssertion::ComponentAssertion(enum Direction dir, enum Sense sense)
- : m_dir(dir), m_sense(sense) {}
-
-ComponentAssertion::~ComponentAssertion() { }
-
-ComponentAssertion *ComponentAssertion::clone() const {
- return new ComponentAssertion(*this);
-}
-
-Component * ComponentAssertion::accept(ComponentVisitor &v) {
- Component *c = v.visit(this);
- if (c != this) {
- v.post(this);
- return c;
- }
-
- for (auto i = children.begin(), e = children.end(); i != e; ++i) {
- Component *child = i->get();
- c = (*i)->accept(v);
- if (c != child) {
- // Child has been replaced (new Component pointer) or we've been
- // instructed to delete it (null).
- i->reset(c);
- }
- }
-
- // Remove deleted children.
- children.erase(remove(children.begin(), children.end(), nullptr),
- children.end());
-
- v.post(this);
- return this;
-}
-
-void ComponentAssertion::accept(ConstComponentVisitor &v) const {
- v.pre(*this);
- for (auto i = children.begin(), e = children.end(); i != e; ++i) {
- (*i)->accept(v);
- if (i + 1 != e) {
- v.during(*this);
- }
- }
-
- v.post(*this);
-}
-
-vector<PositionInfo> ComponentAssertion::first() const {
- assert(0);
- return vector<PositionInfo>();
-}
-
-vector<PositionInfo> ComponentAssertion::last() const {
- assert(0);
- return vector<PositionInfo>();
-}
-
-bool ComponentAssertion::empty() const {
- return true;
-}
-
-void ComponentAssertion::notePositions(GlushkovBuildState &) {
- assert(0);
-}
-
-void ComponentAssertion::buildFollowSet(GlushkovBuildState &,
- const vector<PositionInfo> &) {
- assert(0);
-}
-
-bool ComponentAssertion::repeatable() const {
- // If this assertion has no children (it's an empty sequence, like that
- // produced by '(?!)') then PCRE would throw a "nothing to repeat" error.
- // So we do as well.
- return !children.empty();
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Lookahead/lookbehind zero-width assertions.
+ */
+#include "ComponentAssertion.h"
+#include "buildstate.h"
+#include "position.h"
+#include "position_info.h"
+#include "ue2common.h"
+
+#include <cassert>
+#include <algorithm>
+
+using namespace std;
+
+namespace ue2 {
+
+ComponentAssertion::ComponentAssertion(enum Direction dir, enum Sense sense)
+ : m_dir(dir), m_sense(sense) {}
+
+ComponentAssertion::~ComponentAssertion() { }
+
+ComponentAssertion *ComponentAssertion::clone() const {
+ return new ComponentAssertion(*this);
+}
+
+Component * ComponentAssertion::accept(ComponentVisitor &v) {
+ Component *c = v.visit(this);
+ if (c != this) {
+ v.post(this);
+ return c;
+ }
+
+ for (auto i = children.begin(), e = children.end(); i != e; ++i) {
+ Component *child = i->get();
+ c = (*i)->accept(v);
+ if (c != child) {
+ // Child has been replaced (new Component pointer) or we've been
+ // instructed to delete it (null).
+ i->reset(c);
+ }
+ }
+
+ // Remove deleted children.
+ children.erase(remove(children.begin(), children.end(), nullptr),
+ children.end());
+
+ v.post(this);
+ return this;
+}
+
+void ComponentAssertion::accept(ConstComponentVisitor &v) const {
+ v.pre(*this);
+ for (auto i = children.begin(), e = children.end(); i != e; ++i) {
+ (*i)->accept(v);
+ if (i + 1 != e) {
+ v.during(*this);
+ }
+ }
+
+ v.post(*this);
+}
+
+vector<PositionInfo> ComponentAssertion::first() const {
+ assert(0);
+ return vector<PositionInfo>();
+}
+
+vector<PositionInfo> ComponentAssertion::last() const {
+ assert(0);
+ return vector<PositionInfo>();
+}
+
+bool ComponentAssertion::empty() const {
+ return true;
+}
+
+void ComponentAssertion::notePositions(GlushkovBuildState &) {
+ assert(0);
+}
+
+void ComponentAssertion::buildFollowSet(GlushkovBuildState &,
+ const vector<PositionInfo> &) {
+ assert(0);
+}
+
+bool ComponentAssertion::repeatable() const {
+ // If this assertion has no children (it's an empty sequence, like that
+ // produced by '(?!)') then PCRE would throw a "nothing to repeat" error.
+ // So we do as well.
+ return !children.empty();
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/ComponentAssertion.h b/contrib/libs/hyperscan/src/parser/ComponentAssertion.h
index 60b38cded0..fc78de0aac 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentAssertion.h
+++ b/contrib/libs/hyperscan/src/parser/ComponentAssertion.h
@@ -1,76 +1,76 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Lookahead/lookbehind zero-width assertions.
- */
-
-#ifndef _RE_COMPONENTASSERTION_H_
-#define _RE_COMPONENTASSERTION_H_
-
-#include "ComponentSequence.h"
-
-namespace ue2 {
-
-class ComponentAssertion : public ComponentSequence {
- friend class DumpVisitor;
- friend class PrintVisitor;
-public:
- enum Direction {
- LOOKAHEAD, //!< lookahead (forward) assertion
- LOOKBEHIND //!< lookbehind (backward) assertion
- };
-
- enum Sense {
- POS, //!< positive assertion, (?=...) or (?<=...)
- NEG //!< negative assertion, (?!...) or (?<!...)
- };
-
- ComponentAssertion(enum Direction dir, enum Sense sense);
- ~ComponentAssertion() override;
- ComponentAssertion *clone() const override;
- Component *accept(ComponentVisitor &v) override;
- void accept(ConstComponentVisitor &v) const override;
-
- std::vector<PositionInfo> first() const override;
- std::vector<PositionInfo> last() const override;
-
- bool empty() const override;
- void notePositions(GlushkovBuildState &bs) override;
- void buildFollowSet(GlushkovBuildState &bs,
- const std::vector<PositionInfo> &lastPos) override;
- bool repeatable() const override;
-
-private:
- enum Direction m_dir;
- enum Sense m_sense;
-};
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Lookahead/lookbehind zero-width assertions.
+ */
+
+#ifndef _RE_COMPONENTASSERTION_H_
+#define _RE_COMPONENTASSERTION_H_
+
+#include "ComponentSequence.h"
+
+namespace ue2 {
+
+class ComponentAssertion : public ComponentSequence {
+ friend class DumpVisitor;
+ friend class PrintVisitor;
+public:
+ enum Direction {
+ LOOKAHEAD, //!< lookahead (forward) assertion
+ LOOKBEHIND //!< lookbehind (backward) assertion
+ };
+
+ enum Sense {
+ POS, //!< positive assertion, (?=...) or (?<=...)
+ NEG //!< negative assertion, (?!...) or (?<!...)
+ };
+
+ ComponentAssertion(enum Direction dir, enum Sense sense);
+ ~ComponentAssertion() override;
+ ComponentAssertion *clone() const override;
+ Component *accept(ComponentVisitor &v) override;
+ void accept(ConstComponentVisitor &v) const override;
+
+ std::vector<PositionInfo> first() const override;
+ std::vector<PositionInfo> last() const override;
+
+ bool empty() const override;
+ void notePositions(GlushkovBuildState &bs) override;
+ void buildFollowSet(GlushkovBuildState &bs,
+ const std::vector<PositionInfo> &lastPos) override;
+ bool repeatable() const override;
+
+private:
+ enum Direction m_dir;
+ enum Sense m_sense;
+};
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/parser/ComponentAtomicGroup.cpp b/contrib/libs/hyperscan/src/parser/ComponentAtomicGroup.cpp
index 986ca6d9ba..106f24fc4d 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentAtomicGroup.cpp
+++ b/contrib/libs/hyperscan/src/parser/ComponentAtomicGroup.cpp
@@ -1,92 +1,92 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Atomic groups (?>...)
- */
-#include "ComponentAtomicGroup.h"
-#include "buildstate.h"
-#include "position.h"
-
-#include <algorithm>
-
-using namespace std;
-
-namespace ue2 {
-
-ComponentAtomicGroup *ComponentAtomicGroup::clone() const {
- return new ComponentAtomicGroup(*this);
-}
-
-Component *ComponentAtomicGroup::accept(ComponentVisitor &v) {
- Component *c = v.visit(this);
- if (c != this) {
- v.post(this);
- return c;
- }
-
- for (auto i = children.begin(), e = children.end(); i != e; ++i) {
- Component *child = i->get();
- c = (*i)->accept(v);
- if (c != child) {
- // Child has been replaced (new Component pointer) or we've been
- // instructed to delete it (null).
- i->reset(c);
- }
- }
-
- // Remove deleted children.
- children.erase(remove(children.begin(), children.end(), nullptr),
- children.end());
-
- v.post(this);
- return this;
-}
-
-void ComponentAtomicGroup::accept(ConstComponentVisitor &v) const {
- v.pre(*this);
- for (auto i = children.begin(), e = children.end(); i != e; ++i) {
- (*i)->accept(v);
- if (i + 1 != e) {
- v.during(*this);
- }
- }
-
- v.post(*this);
-}
-
-void ComponentAtomicGroup::notePositions(GlushkovBuildState &) {
- assert(0);
-}
-
-void ComponentAtomicGroup::buildFollowSet(GlushkovBuildState &,
- const vector<PositionInfo> &) {
- assert(0);
-}
-
-} // namespace
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Atomic groups (?>...)
+ */
+#include "ComponentAtomicGroup.h"
+#include "buildstate.h"
+#include "position.h"
+
+#include <algorithm>
+
+using namespace std;
+
+namespace ue2 {
+
+ComponentAtomicGroup *ComponentAtomicGroup::clone() const {
+ return new ComponentAtomicGroup(*this);
+}
+
+Component *ComponentAtomicGroup::accept(ComponentVisitor &v) {
+ Component *c = v.visit(this);
+ if (c != this) {
+ v.post(this);
+ return c;
+ }
+
+ for (auto i = children.begin(), e = children.end(); i != e; ++i) {
+ Component *child = i->get();
+ c = (*i)->accept(v);
+ if (c != child) {
+ // Child has been replaced (new Component pointer) or we've been
+ // instructed to delete it (null).
+ i->reset(c);
+ }
+ }
+
+ // Remove deleted children.
+ children.erase(remove(children.begin(), children.end(), nullptr),
+ children.end());
+
+ v.post(this);
+ return this;
+}
+
+void ComponentAtomicGroup::accept(ConstComponentVisitor &v) const {
+ v.pre(*this);
+ for (auto i = children.begin(), e = children.end(); i != e; ++i) {
+ (*i)->accept(v);
+ if (i + 1 != e) {
+ v.during(*this);
+ }
+ }
+
+ v.post(*this);
+}
+
+void ComponentAtomicGroup::notePositions(GlushkovBuildState &) {
+ assert(0);
+}
+
+void ComponentAtomicGroup::buildFollowSet(GlushkovBuildState &,
+ const vector<PositionInfo> &) {
+ assert(0);
+}
+
+} // namespace
diff --git a/contrib/libs/hyperscan/src/parser/ComponentAtomicGroup.h b/contrib/libs/hyperscan/src/parser/ComponentAtomicGroup.h
index e24fb3d99d..d4eab293d9 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentAtomicGroup.h
+++ b/contrib/libs/hyperscan/src/parser/ComponentAtomicGroup.h
@@ -1,58 +1,58 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Atomic groups (?>...)
- */
-
-#ifndef _COMPONENTATOMICGROUP_H_
-#define _COMPONENTATOMICGROUP_H_
-
-#include "ComponentSequence.h"
-
-namespace ue2 {
-
-// The atomic group component is a subclass of sequence that is only buildable
-// in prefilter mode, where we treat it as a standard sequence.
-class ComponentAtomicGroup : public ComponentSequence {
- friend class DumpVisitor;
-public:
- ComponentAtomicGroup() {}
- ~ComponentAtomicGroup() override {}
- ComponentAtomicGroup *clone() const override;
- Component *accept(ComponentVisitor &v) override;
- void accept(ConstComponentVisitor &v) const override;
-
- void notePositions(GlushkovBuildState &bs) override;
- void buildFollowSet(GlushkovBuildState &bs,
- const std::vector<PositionInfo> &lastPos) override;
-};
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Atomic groups (?>...)
+ */
+
+#ifndef _COMPONENTATOMICGROUP_H_
+#define _COMPONENTATOMICGROUP_H_
+
+#include "ComponentSequence.h"
+
+namespace ue2 {
+
+// The atomic group component is a subclass of sequence that is only buildable
+// in prefilter mode, where we treat it as a standard sequence.
+class ComponentAtomicGroup : public ComponentSequence {
+ friend class DumpVisitor;
+public:
+ ComponentAtomicGroup() {}
+ ~ComponentAtomicGroup() override {}
+ ComponentAtomicGroup *clone() const override;
+ Component *accept(ComponentVisitor &v) override;
+ void accept(ConstComponentVisitor &v) const override;
+
+ void notePositions(GlushkovBuildState &bs) override;
+ void buildFollowSet(GlushkovBuildState &bs,
+ const std::vector<PositionInfo> &lastPos) override;
+};
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/parser/ComponentBackReference.cpp b/contrib/libs/hyperscan/src/parser/ComponentBackReference.cpp
index 4cb5f44d23..1edc530d25 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentBackReference.cpp
+++ b/contrib/libs/hyperscan/src/parser/ComponentBackReference.cpp
@@ -1,79 +1,79 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Back-references (/([a-f]{3}).*\\1/)
- */
-
-
-#include "ComponentBackReference.h"
-
-#include "buildstate.h"
-#include "position.h"
-#include "position_info.h"
-#include "nfagraph/ng_builder.h"
-#include "util/charreach.h"
-
-#include <cassert>
-
-using namespace std;
-
-namespace ue2 {
-
-ComponentBackReference::ComponentBackReference(unsigned int id)
- : ref_id(id) {}
-
-ComponentBackReference::ComponentBackReference(const string &s)
- : name(s), ref_id(0) {}
-
-ComponentBackReference * ComponentBackReference::clone() const {
- return new ComponentBackReference(*this);
-}
-
-vector<PositionInfo> ComponentBackReference::first() const {
- assert(0);
- return vector<PositionInfo>();
-}
-
-vector<PositionInfo> ComponentBackReference::last() const {
- assert(0);
- return vector<PositionInfo>();
-}
-
-bool ComponentBackReference::empty(void) const { return true; }
-
-void ComponentBackReference::notePositions(GlushkovBuildState &) {
- assert(0);
-}
-
-void ComponentBackReference::buildFollowSet(GlushkovBuildState &,
- const vector<PositionInfo> &) {
- assert(0);
-}
-
-} // namespace
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Back-references (/([a-f]{3}).*\\1/)
+ */
+
+
+#include "ComponentBackReference.h"
+
+#include "buildstate.h"
+#include "position.h"
+#include "position_info.h"
+#include "nfagraph/ng_builder.h"
+#include "util/charreach.h"
+
+#include <cassert>
+
+using namespace std;
+
+namespace ue2 {
+
+ComponentBackReference::ComponentBackReference(unsigned int id)
+ : ref_id(id) {}
+
+ComponentBackReference::ComponentBackReference(const string &s)
+ : name(s), ref_id(0) {}
+
+ComponentBackReference * ComponentBackReference::clone() const {
+ return new ComponentBackReference(*this);
+}
+
+vector<PositionInfo> ComponentBackReference::first() const {
+ assert(0);
+ return vector<PositionInfo>();
+}
+
+vector<PositionInfo> ComponentBackReference::last() const {
+ assert(0);
+ return vector<PositionInfo>();
+}
+
+bool ComponentBackReference::empty(void) const { return true; }
+
+void ComponentBackReference::notePositions(GlushkovBuildState &) {
+ assert(0);
+}
+
+void ComponentBackReference::buildFollowSet(GlushkovBuildState &,
+ const vector<PositionInfo> &) {
+ assert(0);
+}
+
+} // namespace
diff --git a/contrib/libs/hyperscan/src/parser/ComponentBackReference.h b/contrib/libs/hyperscan/src/parser/ComponentBackReference.h
index d8324d3bc4..d22df7a7f6 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentBackReference.h
+++ b/contrib/libs/hyperscan/src/parser/ComponentBackReference.h
@@ -1,84 +1,84 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Back-references (/([a-f]{3}).*\\1/)
- */
-
-#ifndef _RE_COMPONENTBACKREFERENCE_H_
-#define _RE_COMPONENTBACKREFERENCE_H_
-
-#include "Component.h"
-#include <string>
-
-namespace ue2 {
-
-class ComponentBackReference : public Component {
- friend class DumpVisitor;
- friend class PrintVisitor;
- friend class ReferenceVisitor;
-public:
- explicit ComponentBackReference(unsigned int id);
- explicit ComponentBackReference(const std::string &s);
- ~ComponentBackReference() override {}
- ComponentBackReference *clone() const override;
-
- Component *accept(ComponentVisitor &v) override {
- Component *c = v.visit(this);
- v.post(this);
- return c;
- }
-
- void accept(ConstComponentVisitor &v) const override {
- v.pre(*this);
- v.during(*this);
- v.post(*this);
- }
-
- unsigned int getRefID() const { return ref_id; }
- const std::string &getRefName() const { return name; }
-
- std::vector<PositionInfo> first() const override;
- std::vector<PositionInfo> last() const override;
- bool empty(void) const override;
- void notePositions(GlushkovBuildState &bs) override;
- void buildFollowSet(GlushkovBuildState &bs,
- const std::vector<PositionInfo> &lastPos) override;
-
-private:
- // Private copy ctor. Use clone instead.
- ComponentBackReference(const ComponentBackReference &other)
- : Component(other), name(other.name), ref_id(other.ref_id) {}
-
- std::string name;
- unsigned int ref_id;
-};
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Back-references (/([a-f]{3}).*\\1/)
+ */
+
+#ifndef _RE_COMPONENTBACKREFERENCE_H_
+#define _RE_COMPONENTBACKREFERENCE_H_
+
+#include "Component.h"
+#include <string>
+
+namespace ue2 {
+
+class ComponentBackReference : public Component {
+ friend class DumpVisitor;
+ friend class PrintVisitor;
+ friend class ReferenceVisitor;
+public:
+ explicit ComponentBackReference(unsigned int id);
+ explicit ComponentBackReference(const std::string &s);
+ ~ComponentBackReference() override {}
+ ComponentBackReference *clone() const override;
+
+ Component *accept(ComponentVisitor &v) override {
+ Component *c = v.visit(this);
+ v.post(this);
+ return c;
+ }
+
+ void accept(ConstComponentVisitor &v) const override {
+ v.pre(*this);
+ v.during(*this);
+ v.post(*this);
+ }
+
+ unsigned int getRefID() const { return ref_id; }
+ const std::string &getRefName() const { return name; }
+
+ std::vector<PositionInfo> first() const override;
+ std::vector<PositionInfo> last() const override;
+ bool empty(void) const override;
+ void notePositions(GlushkovBuildState &bs) override;
+ void buildFollowSet(GlushkovBuildState &bs,
+ const std::vector<PositionInfo> &lastPos) override;
+
+private:
+ // Private copy ctor. Use clone instead.
+ ComponentBackReference(const ComponentBackReference &other)
+ : Component(other), name(other.name), ref_id(other.ref_id) {}
+
+ std::string name;
+ unsigned int ref_id;
+};
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/parser/ComponentBoundary.cpp b/contrib/libs/hyperscan/src/parser/ComponentBoundary.cpp
index 6b1c9038dc..efd6bf88dd 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentBoundary.cpp
+++ b/contrib/libs/hyperscan/src/parser/ComponentBoundary.cpp
@@ -1,186 +1,186 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Boundary assertions (^, $, \\A, \\Z, \\z)
- */
-
-
-#include "ComponentBoundary.h"
-
-#include "buildstate.h"
-#include "parse_error.h"
-#include "position.h"
-#include "position_info.h"
-#include "Parser.h"
-#include "util/charreach.h"
-#include "nfagraph/ng_builder.h"
-
-#include <cassert>
-
-using namespace std;
-
-namespace ue2 {
-
-ComponentBoundary::ComponentBoundary(enum Boundary bound)
- : m_bound(bound), m_newline(GlushkovBuildState::POS_UNINITIALIZED) {}
-
-ComponentBoundary::~ComponentBoundary() {
-}
-
-ComponentBoundary::ComponentBoundary(const ComponentBoundary &other)
- : Component(other), m_bound(other.m_bound), m_newline(other.m_newline),
- m_first(other.m_first), m_last(other.m_last) {}
-
-ComponentBoundary * ComponentBoundary::clone() const {
- return new ComponentBoundary(*this);
-}
-
-vector<PositionInfo> ComponentBoundary::first() const {
- return m_first;
-}
-
-vector<PositionInfo> ComponentBoundary::last() const {
- return m_last;
-}
-
-bool ComponentBoundary::empty() const {
- return true;
-}
-
-bool ComponentBoundary::repeatable() const {
- return false;
-}
-
-static
-Position makeNewline(GlushkovBuildState &bs) {
- NFABuilder &builder = bs.getBuilder();
- Position newline = builder.makePositions(1);
- builder.addCharReach(newline, CharReach('\n'));
- return newline;
-}
-
-void ComponentBoundary::notePositions(GlushkovBuildState & bs) {
- NFABuilder &builder = bs.getBuilder();
- const Position startState = builder.getStart();
-
- switch (m_bound) {
- case BEGIN_STRING: // beginning of data stream ('^')
- {
- PositionInfo epsilon(GlushkovBuildState::POS_EPSILON);
- epsilon.flags = POS_FLAG_NOFLOAT;
- m_first.push_back(epsilon);
-
- // We have the start vertex in firsts so that we can discourage
- // the mid-pattern use of boundaries.
- m_first.push_back(startState);
-
- break;
- }
- case BEGIN_LINE: // multiline anchor: beginning of stream or a newline
- {
- PositionInfo epsilon(GlushkovBuildState::POS_EPSILON);
- epsilon.flags = POS_FLAG_NOFLOAT;
- m_first.push_back(epsilon);
-
- // We have the start vertex in firsts so that we can discourage
- // the mid-pattern use of boundaries.
- m_first.push_back(startState);
-
- // Newline
- m_newline = makeNewline(bs);
- builder.setAssertFlag(m_newline, POS_FLAG_MULTILINE_START);
- builder.setAssertFlag(m_newline, POS_FLAG_VIRTUAL_START);
- PositionInfo nl(m_newline);
- nl.flags = POS_FLAG_MUST_FLOAT | POS_FLAG_FIDDLE_ACCEPT;
- m_first.push_back(nl);
- m_last.push_back(nl);
- recordPosBounds(m_newline, m_newline + 1);
- break;
- }
- case END_STRING: // end of data stream ('\z')
- {
- PositionInfo epsilon(GlushkovBuildState::POS_EPSILON);
- epsilon.flags = POS_FLAG_WIRE_EOD | POS_FLAG_NO_NL_EOD |
- POS_FLAG_NO_NL_ACCEPT | POS_FLAG_ONLY_ENDS;
- m_first.push_back(epsilon);
- break;
- }
- case END_STRING_OPTIONAL_LF: // end of data with optional LF ('$')
- {
- PositionInfo epsilon(GlushkovBuildState::POS_EPSILON);
- epsilon.flags = POS_FLAG_WIRE_EOD | POS_FLAG_WIRE_NL_EOD |
- POS_FLAG_NO_NL_ACCEPT | POS_FLAG_ONLY_ENDS;
- m_first.push_back(epsilon);
- break;
- }
- case END_LINE: // multiline anchor: end of data or a newline
- {
- PositionInfo epsilon(GlushkovBuildState::POS_EPSILON);
- epsilon.flags = POS_FLAG_WIRE_EOD | POS_FLAG_WIRE_NL_EOD |
- POS_FLAG_WIRE_NL_ACCEPT | POS_FLAG_ONLY_ENDS;
- m_first.push_back(epsilon);
- break;
- }
- default:
- // unsupported
- assert(0);
- break;
- }
-}
-
-void ComponentBoundary::buildFollowSet(GlushkovBuildState &,
- const vector<PositionInfo> &) {
-
-}
-
-bool ComponentBoundary::checkEmbeddedStartAnchor(bool at_start) const {
- if (at_start) {
- return at_start;
- }
-
- if (m_bound == BEGIN_STRING || m_bound == BEGIN_LINE) {
- throw ParseError("Embedded start anchors not supported.");
- }
-
- return at_start;
-}
-
-bool ComponentBoundary::checkEmbeddedEndAnchor(bool at_end) const {
- if (at_end) {
- return at_end;
- }
-
- if (m_bound != BEGIN_STRING && m_bound != BEGIN_LINE) {
- throw ParseError("Embedded end anchors not supported.");
- }
-
- return at_end;
-}
-
-} // namespace
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Boundary assertions (^, $, \\A, \\Z, \\z)
+ */
+
+
+#include "ComponentBoundary.h"
+
+#include "buildstate.h"
+#include "parse_error.h"
+#include "position.h"
+#include "position_info.h"
+#include "Parser.h"
+#include "util/charreach.h"
+#include "nfagraph/ng_builder.h"
+
+#include <cassert>
+
+using namespace std;
+
+namespace ue2 {
+
+ComponentBoundary::ComponentBoundary(enum Boundary bound)
+ : m_bound(bound), m_newline(GlushkovBuildState::POS_UNINITIALIZED) {}
+
+ComponentBoundary::~ComponentBoundary() {
+}
+
+ComponentBoundary::ComponentBoundary(const ComponentBoundary &other)
+ : Component(other), m_bound(other.m_bound), m_newline(other.m_newline),
+ m_first(other.m_first), m_last(other.m_last) {}
+
+ComponentBoundary * ComponentBoundary::clone() const {
+ return new ComponentBoundary(*this);
+}
+
+vector<PositionInfo> ComponentBoundary::first() const {
+ return m_first;
+}
+
+vector<PositionInfo> ComponentBoundary::last() const {
+ return m_last;
+}
+
+bool ComponentBoundary::empty() const {
+ return true;
+}
+
+bool ComponentBoundary::repeatable() const {
+ return false;
+}
+
+static
+Position makeNewline(GlushkovBuildState &bs) {
+ NFABuilder &builder = bs.getBuilder();
+ Position newline = builder.makePositions(1);
+ builder.addCharReach(newline, CharReach('\n'));
+ return newline;
+}
+
+void ComponentBoundary::notePositions(GlushkovBuildState & bs) {
+ NFABuilder &builder = bs.getBuilder();
+ const Position startState = builder.getStart();
+
+ switch (m_bound) {
+ case BEGIN_STRING: // beginning of data stream ('^')
+ {
+ PositionInfo epsilon(GlushkovBuildState::POS_EPSILON);
+ epsilon.flags = POS_FLAG_NOFLOAT;
+ m_first.push_back(epsilon);
+
+ // We have the start vertex in firsts so that we can discourage
+ // the mid-pattern use of boundaries.
+ m_first.push_back(startState);
+
+ break;
+ }
+ case BEGIN_LINE: // multiline anchor: beginning of stream or a newline
+ {
+ PositionInfo epsilon(GlushkovBuildState::POS_EPSILON);
+ epsilon.flags = POS_FLAG_NOFLOAT;
+ m_first.push_back(epsilon);
+
+ // We have the start vertex in firsts so that we can discourage
+ // the mid-pattern use of boundaries.
+ m_first.push_back(startState);
+
+ // Newline
+ m_newline = makeNewline(bs);
+ builder.setAssertFlag(m_newline, POS_FLAG_MULTILINE_START);
+ builder.setAssertFlag(m_newline, POS_FLAG_VIRTUAL_START);
+ PositionInfo nl(m_newline);
+ nl.flags = POS_FLAG_MUST_FLOAT | POS_FLAG_FIDDLE_ACCEPT;
+ m_first.push_back(nl);
+ m_last.push_back(nl);
+ recordPosBounds(m_newline, m_newline + 1);
+ break;
+ }
+ case END_STRING: // end of data stream ('\z')
+ {
+ PositionInfo epsilon(GlushkovBuildState::POS_EPSILON);
+ epsilon.flags = POS_FLAG_WIRE_EOD | POS_FLAG_NO_NL_EOD |
+ POS_FLAG_NO_NL_ACCEPT | POS_FLAG_ONLY_ENDS;
+ m_first.push_back(epsilon);
+ break;
+ }
+ case END_STRING_OPTIONAL_LF: // end of data with optional LF ('$')
+ {
+ PositionInfo epsilon(GlushkovBuildState::POS_EPSILON);
+ epsilon.flags = POS_FLAG_WIRE_EOD | POS_FLAG_WIRE_NL_EOD |
+ POS_FLAG_NO_NL_ACCEPT | POS_FLAG_ONLY_ENDS;
+ m_first.push_back(epsilon);
+ break;
+ }
+ case END_LINE: // multiline anchor: end of data or a newline
+ {
+ PositionInfo epsilon(GlushkovBuildState::POS_EPSILON);
+ epsilon.flags = POS_FLAG_WIRE_EOD | POS_FLAG_WIRE_NL_EOD |
+ POS_FLAG_WIRE_NL_ACCEPT | POS_FLAG_ONLY_ENDS;
+ m_first.push_back(epsilon);
+ break;
+ }
+ default:
+ // unsupported
+ assert(0);
+ break;
+ }
+}
+
+void ComponentBoundary::buildFollowSet(GlushkovBuildState &,
+ const vector<PositionInfo> &) {
+
+}
+
+bool ComponentBoundary::checkEmbeddedStartAnchor(bool at_start) const {
+ if (at_start) {
+ return at_start;
+ }
+
+ if (m_bound == BEGIN_STRING || m_bound == BEGIN_LINE) {
+ throw ParseError("Embedded start anchors not supported.");
+ }
+
+ return at_start;
+}
+
+bool ComponentBoundary::checkEmbeddedEndAnchor(bool at_end) const {
+ if (at_end) {
+ return at_end;
+ }
+
+ if (m_bound != BEGIN_STRING && m_bound != BEGIN_LINE) {
+ throw ParseError("Embedded end anchors not supported.");
+ }
+
+ return at_end;
+}
+
+} // namespace
diff --git a/contrib/libs/hyperscan/src/parser/ComponentBoundary.h b/contrib/libs/hyperscan/src/parser/ComponentBoundary.h
index cdc7c7d4dd..fea158ee17 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentBoundary.h
+++ b/contrib/libs/hyperscan/src/parser/ComponentBoundary.h
@@ -1,94 +1,94 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Boundary assertions (^, $, \\A, \\Z, \\z)
- */
-
-#ifndef _RE_COMPONENTBOUNDARY_H_
-#define _RE_COMPONENTBOUNDARY_H_
-
-#include "Component.h"
-#include "position.h"
-
-namespace ue2 {
-
-/** \brief Encapsulates a line/string boundary assertion. */
-class ComponentBoundary : public Component {
- friend class DumpVisitor;
- friend class PrintVisitor;
- friend class UnsafeBoundsVisitor;
- friend class MultilineVisitor;
-public:
- enum Boundary {
- BEGIN_STRING, //!< beginning of data stream
- END_STRING, //!< end of data stream
- END_STRING_OPTIONAL_LF, //!< end of data stream with an optional
- // linefeed
- BEGIN_LINE, //!< '(^|\\n)'
- END_LINE //!< '($|\\n)'
- };
-
- explicit ComponentBoundary(enum Boundary bound);
- ~ComponentBoundary() override;
- ComponentBoundary *clone() const override;
-
- Component *accept(ComponentVisitor &v) override {
- Component *c = v.visit(this);
- v.post(this);
- return c;
- }
-
- void accept(ConstComponentVisitor &v) const override {
- v.pre(*this);
- v.during(*this);
- v.post(*this);
- }
-
- std::vector<PositionInfo> first() const override;
- std::vector<PositionInfo> last() const override;
- bool empty() const override;
- bool repeatable() const override;
- void notePositions(GlushkovBuildState &bs) override;
- void buildFollowSet(GlushkovBuildState &bs,
- const std::vector<PositionInfo> &lastPos) override;
- bool checkEmbeddedStartAnchor(bool at_start) const override;
- bool checkEmbeddedEndAnchor(bool at_end) const override;
-
-private:
- enum Boundary m_bound; //!< \brief which assertion is that?
- Position m_newline; //!< \brief special newline state
- std::vector<PositionInfo> m_first; //!< \brief positions returned for first()
- std::vector<PositionInfo> m_last; //!< \brief positions returned for last()
-
- ComponentBoundary(const ComponentBoundary &other);
-};
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Boundary assertions (^, $, \\A, \\Z, \\z)
+ */
+
+#ifndef _RE_COMPONENTBOUNDARY_H_
+#define _RE_COMPONENTBOUNDARY_H_
+
+#include "Component.h"
+#include "position.h"
+
+namespace ue2 {
+
+/** \brief Encapsulates a line/string boundary assertion. */
+class ComponentBoundary : public Component {
+ friend class DumpVisitor;
+ friend class PrintVisitor;
+ friend class UnsafeBoundsVisitor;
+ friend class MultilineVisitor;
+public:
+ enum Boundary {
+ BEGIN_STRING, //!< beginning of data stream
+ END_STRING, //!< end of data stream
+ END_STRING_OPTIONAL_LF, //!< end of data stream with an optional
+ // linefeed
+ BEGIN_LINE, //!< '(^|\\n)'
+ END_LINE //!< '($|\\n)'
+ };
+
+ explicit ComponentBoundary(enum Boundary bound);
+ ~ComponentBoundary() override;
+ ComponentBoundary *clone() const override;
+
+ Component *accept(ComponentVisitor &v) override {
+ Component *c = v.visit(this);
+ v.post(this);
+ return c;
+ }
+
+ void accept(ConstComponentVisitor &v) const override {
+ v.pre(*this);
+ v.during(*this);
+ v.post(*this);
+ }
+
+ std::vector<PositionInfo> first() const override;
+ std::vector<PositionInfo> last() const override;
+ bool empty() const override;
+ bool repeatable() const override;
+ void notePositions(GlushkovBuildState &bs) override;
+ void buildFollowSet(GlushkovBuildState &bs,
+ const std::vector<PositionInfo> &lastPos) override;
+ bool checkEmbeddedStartAnchor(bool at_start) const override;
+ bool checkEmbeddedEndAnchor(bool at_end) const override;
+
+private:
+ enum Boundary m_bound; //!< \brief which assertion is that?
+ Position m_newline; //!< \brief special newline state
+ std::vector<PositionInfo> m_first; //!< \brief positions returned for first()
+ std::vector<PositionInfo> m_last; //!< \brief positions returned for last()
+
+ ComponentBoundary(const ComponentBoundary &other);
+};
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/parser/ComponentByte.cpp b/contrib/libs/hyperscan/src/parser/ComponentByte.cpp
index f4c5b026b5..c55c477284 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentByte.cpp
+++ b/contrib/libs/hyperscan/src/parser/ComponentByte.cpp
@@ -1,70 +1,70 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Single bytes (\\C metachar)
- */
-
-
-#include "ComponentByte.h"
-
-#include "buildstate.h"
-#include "position.h"
-#include "position_info.h"
-#include "nfagraph/ng_builder.h"
-#include "util/charreach.h"
-
-using namespace std;
-
-namespace ue2 {
-
-ComponentByte::ComponentByte()
- : position(GlushkovBuildState::POS_UNINITIALIZED) {}
-
-ComponentByte::~ComponentByte() {}
-
-ComponentByte *ComponentByte::clone() const {
- return new ComponentByte(*this);
-}
-
-vector<PositionInfo> ComponentByte::first() const {
- return vector<PositionInfo>(1, PositionInfo(position));
-}
-
-vector<PositionInfo> ComponentByte::last() const {
- return vector<PositionInfo>(1, PositionInfo(position));
-}
-
-void ComponentByte::notePositions(GlushkovBuildState &bs) {
- NFABuilder &builder = bs.getBuilder();
- position = builder.makePositions(1);
- builder.addCharReach(position, CharReach::dot());
- builder.setNodeReportID(position, 0 /* offset adj */);
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Single bytes (\\C metachar)
+ */
+
+
+#include "ComponentByte.h"
+
+#include "buildstate.h"
+#include "position.h"
+#include "position_info.h"
+#include "nfagraph/ng_builder.h"
+#include "util/charreach.h"
+
+using namespace std;
+
+namespace ue2 {
+
+ComponentByte::ComponentByte()
+ : position(GlushkovBuildState::POS_UNINITIALIZED) {}
+
+ComponentByte::~ComponentByte() {}
+
+ComponentByte *ComponentByte::clone() const {
+ return new ComponentByte(*this);
+}
+
+vector<PositionInfo> ComponentByte::first() const {
+ return vector<PositionInfo>(1, PositionInfo(position));
+}
+
+vector<PositionInfo> ComponentByte::last() const {
+ return vector<PositionInfo>(1, PositionInfo(position));
+}
+
+void ComponentByte::notePositions(GlushkovBuildState &bs) {
+ NFABuilder &builder = bs.getBuilder();
+ position = builder.makePositions(1);
+ builder.addCharReach(position, CharReach::dot());
+ builder.setNodeReportID(position, 0 /* offset adj */);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/ComponentByte.h b/contrib/libs/hyperscan/src/parser/ComponentByte.h
index 2f2f143b16..331e326de4 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentByte.h
+++ b/contrib/libs/hyperscan/src/parser/ComponentByte.h
@@ -1,80 +1,80 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Single bytes (\\C metachar)
- */
-
-#ifndef _RE_COMPONENTBYTE_H_
-#define _RE_COMPONENTBYTE_H_
-
-#include "Component.h"
-
-namespace ue2 {
-
-class ComponentByte : public Component {
- friend class DumpVisitor;
-public:
- ComponentByte(void);
- ~ComponentByte() override;
- ComponentByte *clone() const override;
-
- Component *accept(ComponentVisitor &v) override {
- Component *c = v.visit(this);
- v.post(this);
- return c;
- }
-
- void accept(ConstComponentVisitor &v) const override {
- v.pre(*this);
- v.during(*this);
- v.post(*this);
- }
-
- std::vector<PositionInfo> first() const override;
- std::vector<PositionInfo> last() const override;
-
- bool empty() const override { return false; }
-
- void notePositions(GlushkovBuildState &bs) override;
- void buildFollowSet(GlushkovBuildState &,
- const std::vector<PositionInfo> &) override {
- // all follow set construction is handled by firsts/lasts
- return;
- }
-
-private:
- Position position;
-
- ComponentByte(const ComponentByte &other)
- : Component(other), position(other.position) {}
-};
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Single bytes (\\C metachar)
+ */
+
+#ifndef _RE_COMPONENTBYTE_H_
+#define _RE_COMPONENTBYTE_H_
+
+#include "Component.h"
+
+namespace ue2 {
+
+class ComponentByte : public Component {
+ friend class DumpVisitor;
+public:
+ ComponentByte(void);
+ ~ComponentByte() override;
+ ComponentByte *clone() const override;
+
+ Component *accept(ComponentVisitor &v) override {
+ Component *c = v.visit(this);
+ v.post(this);
+ return c;
+ }
+
+ void accept(ConstComponentVisitor &v) const override {
+ v.pre(*this);
+ v.during(*this);
+ v.post(*this);
+ }
+
+ std::vector<PositionInfo> first() const override;
+ std::vector<PositionInfo> last() const override;
+
+ bool empty() const override { return false; }
+
+ void notePositions(GlushkovBuildState &bs) override;
+ void buildFollowSet(GlushkovBuildState &,
+ const std::vector<PositionInfo> &) override {
+ // all follow set construction is handled by firsts/lasts
+ return;
+ }
+
+private:
+ Position position;
+
+ ComponentByte(const ComponentByte &other)
+ : Component(other), position(other.position) {}
+};
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/parser/ComponentClass.cpp b/contrib/libs/hyperscan/src/parser/ComponentClass.cpp
index c61c7de0a2..a91ae979ff 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentClass.cpp
+++ b/contrib/libs/hyperscan/src/parser/ComponentClass.cpp
@@ -1,452 +1,452 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Character classes and their mnemonics.
- */
-#include "Parser.h"
-#include "ComponentClass.h"
-#include "AsciiComponentClass.h"
-#include "ucp_table.h"
-#include "Utf8ComponentClass.h"
-#include "util/charreach.h"
-#include "util/make_unique.h"
-
-#include <boost/icl/interval_set.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
-static
-CharReach to_cr(const CodePointSet &cps) {
- CharReach cr;
- for (const auto &cp : cps) {
- if (lower(cp) >= CharReach::npos) {
- break;
- }
-
- cr.setRange(lower(cp), MIN(upper(cp), CharReach::npos - 1));
- }
-
- return cr;
-}
-
-CharReach getPredefinedCharReach(PredefinedClass c, const ParseMode &mode) {
- const CharReach lower('a', 'z');
- const CharReach upper('A', 'Z');
- const CharReach number('0', '9');
- switch (c) {
- case CLASS_ALNUM:
- return lower | upper | number;
- case CLASS_ALPHA:
- return lower | upper;
- case CLASS_ANY:
- if (mode.dotall) {
- return ~CharReach();
- } else {
- return ~CharReach('\n');
- }
- case CLASS_ASCII:
- return CharReach(0, 127);
- case CLASS_BLANK:
- return CharReach(" \t");
- case CLASS_CNTRL:
- return CharReach(0, 31) | CharReach(127 /* del */);
- case CLASS_DIGIT:
- return number;
- case CLASS_GRAPH:
- return CharReach(0x21, 0x7e);
- case CLASS_XGRAPH:
- return to_cr(getPredefinedCodePointSet(c, mode));
- case CLASS_HORZ:
- return CharReach("\x09\x20\xA0");
- case CLASS_LOWER:
- if (mode.caseless) {
- return lower | upper;
- } else {
- return lower;
- }
- case CLASS_PRINT:
- return CharReach(0x20, 0x7e);
- case CLASS_XPRINT:
- return to_cr(getPredefinedCodePointSet(c, mode));
- case CLASS_PUNCT:
- return CharReach(0x21, '0' - 1)
- | CharReach('9' + 1, 'A' - 1)
- | CharReach('Z' + 1, 'a' - 1)
- | CharReach('z' + 1, 126);
- case CLASS_XPUNCT:
- return to_cr(getPredefinedCodePointSet(c, mode));
- case CLASS_SPACE:
- return CharReach("\x09\x0a\x0c\x0b\x0d\x20");
- case CLASS_UPPER:
- if (mode.caseless) {
- return lower | upper;
- } else {
- return upper;
- }
- case CLASS_VERT:
- return CharReach("\x0a\x0b\x0c\x0d\x85");
- case CLASS_WORD:
- return lower | upper | number | CharReach('_');
- case CLASS_XDIGIT:
- return CharReach("0123456789abcdefABCDEF");
- case CLASS_UCP_C:
- return to_cr(getUcpC());
- case CLASS_UCP_CC:
- return to_cr(getUcpCc());
- case CLASS_UCP_CF:
- return to_cr(getUcpCf());
- case CLASS_UCP_CN:
- return to_cr(getUcpCn());
- case CLASS_UCP_CO:
- return to_cr(getUcpCo());
- case CLASS_UCP_CS:
- return to_cr(getUcpCs());
- case CLASS_UCP_L:
- return to_cr(getUcpL());
- case CLASS_UCP_L_AND:
- return to_cr(getUcpL_and());
- case CLASS_UCP_LL:
- return to_cr(getUcpLl());
- case CLASS_UCP_LM:
- return to_cr(getUcpLm());
- case CLASS_UCP_LO:
- return to_cr(getUcpLo());
- case CLASS_UCP_LT:
- return to_cr(getUcpLt());
- case CLASS_UCP_LU:
- return to_cr(getUcpLu());
- case CLASS_UCP_M:
- return to_cr(getUcpM());
- case CLASS_UCP_MC:
- return to_cr(getUcpMc());
- case CLASS_UCP_ME:
- return to_cr(getUcpMe());
- case CLASS_UCP_MN:
- return to_cr(getUcpMn());
- case CLASS_UCP_N:
- return to_cr(getUcpN());
- case CLASS_UCP_ND:
- return to_cr(getUcpNd());
- case CLASS_UCP_NL:
- return to_cr(getUcpNl());
- case CLASS_UCP_NO:
- return to_cr(getUcpNo());
- case CLASS_UCP_P:
- return to_cr(getUcpP());
- case CLASS_UCP_PC:
- return to_cr(getUcpPc());
- case CLASS_UCP_PD:
- return to_cr(getUcpPd());
- case CLASS_UCP_PE:
- return to_cr(getUcpPe());
- case CLASS_UCP_PF:
- return to_cr(getUcpPf());
- case CLASS_UCP_PI:
- return to_cr(getUcpPi());
- case CLASS_UCP_PO:
- return to_cr(getUcpPo());
- case CLASS_UCP_PS:
- return to_cr(getUcpPs());
- case CLASS_UCP_S:
- return to_cr(getUcpS());
- case CLASS_UCP_SC:
- return to_cr(getUcpSc());
- case CLASS_UCP_SK:
- return to_cr(getUcpSk());
- case CLASS_UCP_SM:
- return to_cr(getUcpSm());
- case CLASS_UCP_SO:
- return to_cr(getUcpSo());
- case CLASS_UCP_XAN:
- return to_cr(getUcpXan());
- case CLASS_UCP_XPS:
- case CLASS_UCP_XSP:
- return getPredefinedCharReach(CLASS_VERT, mode) | getPredefinedCharReach(CLASS_HORZ, mode);
- case CLASS_UCP_XWD:
- return to_cr(getUcpXwd());
- case CLASS_UCP_Z:
- return to_cr(getUcpZ());
- case CLASS_UCP_ZL:
- return to_cr(getUcpZl());
- case CLASS_UCP_ZP:
- return to_cr(getUcpZp());
- case CLASS_UCP_ZS:
- return to_cr(getUcpZs());
- case CLASS_SCRIPT_ARABIC:
- return to_cr(getUcpArabic());
- case CLASS_SCRIPT_ARMENIAN:
- return to_cr(getUcpArmenian());
- case CLASS_SCRIPT_AVESTAN:
- return to_cr(getUcpAvestan());
- case CLASS_SCRIPT_BALINESE:
- return to_cr(getUcpBalinese());
- case CLASS_SCRIPT_BAMUM:
- return to_cr(getUcpBamum());
- case CLASS_SCRIPT_BATAK:
- return to_cr(getUcpBatak());
- case CLASS_SCRIPT_BENGALI:
- return to_cr(getUcpBengali());
- case CLASS_SCRIPT_BOPOMOFO:
- return to_cr(getUcpBopomofo());
- case CLASS_SCRIPT_BRAHMI:
- return to_cr(getUcpBrahmi());
- case CLASS_SCRIPT_BRAILLE:
- return to_cr(getUcpBraille());
- case CLASS_SCRIPT_BUGINESE:
- return to_cr(getUcpBuginese());
- case CLASS_SCRIPT_BUHID:
- return to_cr(getUcpBuhid());
- case CLASS_SCRIPT_CANADIAN_ABORIGINAL:
- return to_cr(getUcpCanadian_Aboriginal());
- case CLASS_SCRIPT_CARIAN:
- return to_cr(getUcpCarian());
- case CLASS_SCRIPT_CHAM:
- return to_cr(getUcpCham());
- case CLASS_SCRIPT_CHEROKEE:
- return to_cr(getUcpCherokee());
- case CLASS_SCRIPT_COMMON:
- return to_cr(getUcpCommon());
- case CLASS_SCRIPT_COPTIC:
- return to_cr(getUcpCoptic());
- case CLASS_SCRIPT_CUNEIFORM:
- return to_cr(getUcpCuneiform());
- case CLASS_SCRIPT_CYPRIOT:
- return to_cr(getUcpCypriot());
- case CLASS_SCRIPT_CYRILLIC:
- return to_cr(getUcpCyrillic());
- case CLASS_SCRIPT_DESERET:
- return to_cr(getUcpDeseret());
- case CLASS_SCRIPT_DEVANAGARI:
- return to_cr(getUcpDevanagari());
- case CLASS_SCRIPT_EGYPTIAN_HIEROGLYPHS:
- return to_cr(getUcpEgyptian_Hieroglyphs());
- case CLASS_SCRIPT_ETHIOPIC:
- return to_cr(getUcpEthiopic());
- case CLASS_SCRIPT_GEORGIAN:
- return to_cr(getUcpGeorgian());
- case CLASS_SCRIPT_GLAGOLITIC:
- return to_cr(getUcpGlagolitic());
- case CLASS_SCRIPT_GOTHIC:
- return to_cr(getUcpGothic());
- case CLASS_SCRIPT_GREEK:
- return to_cr(getUcpGreek());
- case CLASS_SCRIPT_GUJARATI:
- return to_cr(getUcpGujarati());
- case CLASS_SCRIPT_GURMUKHI:
- return to_cr(getUcpGurmukhi());
- case CLASS_SCRIPT_HAN:
- return to_cr(getUcpHan());
- case CLASS_SCRIPT_HANGUL:
- return to_cr(getUcpHangul());
- case CLASS_SCRIPT_HANUNOO:
- return to_cr(getUcpHanunoo());
- case CLASS_SCRIPT_HEBREW:
- return to_cr(getUcpHebrew());
- case CLASS_SCRIPT_HIRAGANA:
- return to_cr(getUcpHiragana());
- case CLASS_SCRIPT_IMPERIAL_ARAMAIC:
- return to_cr(getUcpImperial_Aramaic());
- case CLASS_SCRIPT_INHERITED:
- return to_cr(getUcpInherited());
- case CLASS_SCRIPT_INSCRIPTIONAL_PAHLAVI:
- return to_cr(getUcpInscriptional_Pahlavi());
- case CLASS_SCRIPT_INSCRIPTIONAL_PARTHIAN:
- return to_cr(getUcpInscriptional_Parthian());
- case CLASS_SCRIPT_JAVANESE:
- return to_cr(getUcpJavanese());
- case CLASS_SCRIPT_KAITHI:
- return to_cr(getUcpKaithi());
- case CLASS_SCRIPT_KANNADA:
- return to_cr(getUcpKannada());
- case CLASS_SCRIPT_KATAKANA:
- return to_cr(getUcpKatakana());
- case CLASS_SCRIPT_KAYAH_LI:
- return to_cr(getUcpKayah_Li());
- case CLASS_SCRIPT_KHAROSHTHI:
- return to_cr(getUcpKharoshthi());
- case CLASS_SCRIPT_KHMER:
- return to_cr(getUcpKhmer());
- case CLASS_SCRIPT_LAO:
- return to_cr(getUcpLao());
- case CLASS_SCRIPT_LATIN:
- return to_cr(getUcpLatin());
- case CLASS_SCRIPT_LEPCHA:
- return to_cr(getUcpLepcha());
- case CLASS_SCRIPT_LIMBU:
- return to_cr(getUcpLimbu());
- case CLASS_SCRIPT_LINEAR_B:
- return to_cr(getUcpLinear_B());
- case CLASS_SCRIPT_LISU:
- return to_cr(getUcpLisu());
- case CLASS_SCRIPT_LYCIAN:
- return to_cr(getUcpLycian());
- case CLASS_SCRIPT_LYDIAN:
- return to_cr(getUcpLydian());
- case CLASS_SCRIPT_MALAYALAM:
- return to_cr(getUcpMalayalam());
- case CLASS_SCRIPT_MANDAIC:
- return to_cr(getUcpMandaic());
- case CLASS_SCRIPT_MEETEI_MAYEK:
- return to_cr(getUcpMeetei_Mayek());
- case CLASS_SCRIPT_MONGOLIAN:
- return to_cr(getUcpMongolian());
- case CLASS_SCRIPT_MYANMAR:
- return to_cr(getUcpMyanmar());
- case CLASS_SCRIPT_NEW_TAI_LUE:
- return to_cr(getUcpNew_Tai_Lue());
- case CLASS_SCRIPT_NKO:
- return to_cr(getUcpNko());
- case CLASS_SCRIPT_OGHAM:
- return to_cr(getUcpOgham());
- case CLASS_SCRIPT_OL_CHIKI:
- return to_cr(getUcpOl_Chiki());
- case CLASS_SCRIPT_OLD_ITALIC:
- return to_cr(getUcpOld_Italic());
- case CLASS_SCRIPT_OLD_PERSIAN:
- return to_cr(getUcpOld_Persian());
- case CLASS_SCRIPT_OLD_SOUTH_ARABIAN:
- return to_cr(getUcpOld_South_Arabian());
- case CLASS_SCRIPT_OLD_TURKIC:
- return to_cr(getUcpOld_Turkic());
- case CLASS_SCRIPT_ORIYA:
- return to_cr(getUcpOriya());
- case CLASS_SCRIPT_OSMANYA:
- return to_cr(getUcpOsmanya());
- case CLASS_SCRIPT_PHAGS_PA:
- return to_cr(getUcpPhags_Pa());
- case CLASS_SCRIPT_PHOENICIAN:
- return to_cr(getUcpPhoenician());
- case CLASS_SCRIPT_REJANG:
- return to_cr(getUcpRejang());
- case CLASS_SCRIPT_RUNIC:
- return to_cr(getUcpRunic());
- case CLASS_SCRIPT_SAMARITAN:
- return to_cr(getUcpSamaritan());
- case CLASS_SCRIPT_SAURASHTRA:
- return to_cr(getUcpSaurashtra());
- case CLASS_SCRIPT_SHAVIAN:
- return to_cr(getUcpShavian());
- case CLASS_SCRIPT_SINHALA:
- return to_cr(getUcpSinhala());
- case CLASS_SCRIPT_SUNDANESE:
- return to_cr(getUcpSundanese());
- case CLASS_SCRIPT_SYLOTI_NAGRI:
- return to_cr(getUcpSyloti_Nagri());
- case CLASS_SCRIPT_SYRIAC:
- return to_cr(getUcpSyriac());
- case CLASS_SCRIPT_TAGALOG:
- return to_cr(getUcpTagalog());
- case CLASS_SCRIPT_TAGBANWA:
- return to_cr(getUcpTagbanwa());
- case CLASS_SCRIPT_TAI_LE:
- return to_cr(getUcpTai_Le());
- case CLASS_SCRIPT_TAI_THAM:
- return to_cr(getUcpTai_Tham());
- case CLASS_SCRIPT_TAI_VIET:
- return to_cr(getUcpTai_Viet());
- case CLASS_SCRIPT_TAMIL:
- return to_cr(getUcpTamil());
- case CLASS_SCRIPT_TELUGU:
- return to_cr(getUcpTelugu());
- case CLASS_SCRIPT_THAANA:
- return to_cr(getUcpThaana());
- case CLASS_SCRIPT_THAI:
- return to_cr(getUcpThai());
- case CLASS_SCRIPT_TIBETAN:
- return to_cr(getUcpTibetan());
- case CLASS_SCRIPT_TIFINAGH:
- return to_cr(getUcpTifinagh());
- case CLASS_SCRIPT_UGARITIC:
- return to_cr(getUcpUgaritic());
- case CLASS_SCRIPT_VAI:
- return to_cr(getUcpVai());
- case CLASS_SCRIPT_YI:
- return to_cr(getUcpYi());
- case CLASS_UCP_ANY: /* always include newline */
- return ~CharReach();
- }
- assert(0);
- return CharReach();
-}
-
-unique_ptr<ComponentClass> getComponentClass(const ParseMode &mode) {
- if (mode.utf8) {
- return ue2::make_unique<UTF8ComponentClass>(mode);
- } else {
- return ue2::make_unique<AsciiComponentClass>(mode);
- }
-}
-
-unique_ptr<ComponentClass> generateComponent(PredefinedClass c, bool negate,
- const ParseMode &mode) {
- auto cc = getComponentClass(mode);
- cc->add(c, negate);
- cc->finalize();
- return cc;
-}
-
-unique_ptr<ComponentClass> getLiteralComponentClass(unsigned char c,
- bool nocase) {
- ParseMode mode;
- mode.caseless = nocase;
- auto cc = getComponentClass(mode);
- cc->add(c);
- cc->finalize();
- return cc;
-}
-
-ComponentClass::ComponentClass(const ParseMode &mode_in)
- : m_negate(false), mode(mode_in), in_cand_range(false),
- range_start(INVALID_UNICODE), finalized(false) {}
-
-ComponentClass::~ComponentClass() { }
-
-void ComponentClass::addDash(void) {
- if (!in_cand_range) {
- // this could be the start of a range
- if (range_start != INVALID_UNICODE) {
- in_cand_range = true;
- } else {
- /* no possible start character for range, this is just a literal */
- add('-');
- }
- } else {
- // already creating a range, so this must be literal '-'
- in_cand_range = false;
- createRange('-');
- }
-}
-
-void ComponentClass::negate() {
- m_negate = true;
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Character classes and their mnemonics.
+ */
+#include "Parser.h"
+#include "ComponentClass.h"
+#include "AsciiComponentClass.h"
+#include "ucp_table.h"
+#include "Utf8ComponentClass.h"
+#include "util/charreach.h"
+#include "util/make_unique.h"
+
+#include <boost/icl/interval_set.hpp>
+
+using namespace std;
+
+namespace ue2 {
+
+static
+CharReach to_cr(const CodePointSet &cps) {
+ CharReach cr;
+ for (const auto &cp : cps) {
+ if (lower(cp) >= CharReach::npos) {
+ break;
+ }
+
+ cr.setRange(lower(cp), MIN(upper(cp), CharReach::npos - 1));
+ }
+
+ return cr;
+}
+
+CharReach getPredefinedCharReach(PredefinedClass c, const ParseMode &mode) {
+ const CharReach lower('a', 'z');
+ const CharReach upper('A', 'Z');
+ const CharReach number('0', '9');
+ switch (c) {
+ case CLASS_ALNUM:
+ return lower | upper | number;
+ case CLASS_ALPHA:
+ return lower | upper;
+ case CLASS_ANY:
+ if (mode.dotall) {
+ return ~CharReach();
+ } else {
+ return ~CharReach('\n');
+ }
+ case CLASS_ASCII:
+ return CharReach(0, 127);
+ case CLASS_BLANK:
+ return CharReach(" \t");
+ case CLASS_CNTRL:
+ return CharReach(0, 31) | CharReach(127 /* del */);
+ case CLASS_DIGIT:
+ return number;
+ case CLASS_GRAPH:
+ return CharReach(0x21, 0x7e);
+ case CLASS_XGRAPH:
+ return to_cr(getPredefinedCodePointSet(c, mode));
+ case CLASS_HORZ:
+ return CharReach("\x09\x20\xA0");
+ case CLASS_LOWER:
+ if (mode.caseless) {
+ return lower | upper;
+ } else {
+ return lower;
+ }
+ case CLASS_PRINT:
+ return CharReach(0x20, 0x7e);
+ case CLASS_XPRINT:
+ return to_cr(getPredefinedCodePointSet(c, mode));
+ case CLASS_PUNCT:
+ return CharReach(0x21, '0' - 1)
+ | CharReach('9' + 1, 'A' - 1)
+ | CharReach('Z' + 1, 'a' - 1)
+ | CharReach('z' + 1, 126);
+ case CLASS_XPUNCT:
+ return to_cr(getPredefinedCodePointSet(c, mode));
+ case CLASS_SPACE:
+ return CharReach("\x09\x0a\x0c\x0b\x0d\x20");
+ case CLASS_UPPER:
+ if (mode.caseless) {
+ return lower | upper;
+ } else {
+ return upper;
+ }
+ case CLASS_VERT:
+ return CharReach("\x0a\x0b\x0c\x0d\x85");
+ case CLASS_WORD:
+ return lower | upper | number | CharReach('_');
+ case CLASS_XDIGIT:
+ return CharReach("0123456789abcdefABCDEF");
+ case CLASS_UCP_C:
+ return to_cr(getUcpC());
+ case CLASS_UCP_CC:
+ return to_cr(getUcpCc());
+ case CLASS_UCP_CF:
+ return to_cr(getUcpCf());
+ case CLASS_UCP_CN:
+ return to_cr(getUcpCn());
+ case CLASS_UCP_CO:
+ return to_cr(getUcpCo());
+ case CLASS_UCP_CS:
+ return to_cr(getUcpCs());
+ case CLASS_UCP_L:
+ return to_cr(getUcpL());
+ case CLASS_UCP_L_AND:
+ return to_cr(getUcpL_and());
+ case CLASS_UCP_LL:
+ return to_cr(getUcpLl());
+ case CLASS_UCP_LM:
+ return to_cr(getUcpLm());
+ case CLASS_UCP_LO:
+ return to_cr(getUcpLo());
+ case CLASS_UCP_LT:
+ return to_cr(getUcpLt());
+ case CLASS_UCP_LU:
+ return to_cr(getUcpLu());
+ case CLASS_UCP_M:
+ return to_cr(getUcpM());
+ case CLASS_UCP_MC:
+ return to_cr(getUcpMc());
+ case CLASS_UCP_ME:
+ return to_cr(getUcpMe());
+ case CLASS_UCP_MN:
+ return to_cr(getUcpMn());
+ case CLASS_UCP_N:
+ return to_cr(getUcpN());
+ case CLASS_UCP_ND:
+ return to_cr(getUcpNd());
+ case CLASS_UCP_NL:
+ return to_cr(getUcpNl());
+ case CLASS_UCP_NO:
+ return to_cr(getUcpNo());
+ case CLASS_UCP_P:
+ return to_cr(getUcpP());
+ case CLASS_UCP_PC:
+ return to_cr(getUcpPc());
+ case CLASS_UCP_PD:
+ return to_cr(getUcpPd());
+ case CLASS_UCP_PE:
+ return to_cr(getUcpPe());
+ case CLASS_UCP_PF:
+ return to_cr(getUcpPf());
+ case CLASS_UCP_PI:
+ return to_cr(getUcpPi());
+ case CLASS_UCP_PO:
+ return to_cr(getUcpPo());
+ case CLASS_UCP_PS:
+ return to_cr(getUcpPs());
+ case CLASS_UCP_S:
+ return to_cr(getUcpS());
+ case CLASS_UCP_SC:
+ return to_cr(getUcpSc());
+ case CLASS_UCP_SK:
+ return to_cr(getUcpSk());
+ case CLASS_UCP_SM:
+ return to_cr(getUcpSm());
+ case CLASS_UCP_SO:
+ return to_cr(getUcpSo());
+ case CLASS_UCP_XAN:
+ return to_cr(getUcpXan());
+ case CLASS_UCP_XPS:
+ case CLASS_UCP_XSP:
+ return getPredefinedCharReach(CLASS_VERT, mode) | getPredefinedCharReach(CLASS_HORZ, mode);
+ case CLASS_UCP_XWD:
+ return to_cr(getUcpXwd());
+ case CLASS_UCP_Z:
+ return to_cr(getUcpZ());
+ case CLASS_UCP_ZL:
+ return to_cr(getUcpZl());
+ case CLASS_UCP_ZP:
+ return to_cr(getUcpZp());
+ case CLASS_UCP_ZS:
+ return to_cr(getUcpZs());
+ case CLASS_SCRIPT_ARABIC:
+ return to_cr(getUcpArabic());
+ case CLASS_SCRIPT_ARMENIAN:
+ return to_cr(getUcpArmenian());
+ case CLASS_SCRIPT_AVESTAN:
+ return to_cr(getUcpAvestan());
+ case CLASS_SCRIPT_BALINESE:
+ return to_cr(getUcpBalinese());
+ case CLASS_SCRIPT_BAMUM:
+ return to_cr(getUcpBamum());
+ case CLASS_SCRIPT_BATAK:
+ return to_cr(getUcpBatak());
+ case CLASS_SCRIPT_BENGALI:
+ return to_cr(getUcpBengali());
+ case CLASS_SCRIPT_BOPOMOFO:
+ return to_cr(getUcpBopomofo());
+ case CLASS_SCRIPT_BRAHMI:
+ return to_cr(getUcpBrahmi());
+ case CLASS_SCRIPT_BRAILLE:
+ return to_cr(getUcpBraille());
+ case CLASS_SCRIPT_BUGINESE:
+ return to_cr(getUcpBuginese());
+ case CLASS_SCRIPT_BUHID:
+ return to_cr(getUcpBuhid());
+ case CLASS_SCRIPT_CANADIAN_ABORIGINAL:
+ return to_cr(getUcpCanadian_Aboriginal());
+ case CLASS_SCRIPT_CARIAN:
+ return to_cr(getUcpCarian());
+ case CLASS_SCRIPT_CHAM:
+ return to_cr(getUcpCham());
+ case CLASS_SCRIPT_CHEROKEE:
+ return to_cr(getUcpCherokee());
+ case CLASS_SCRIPT_COMMON:
+ return to_cr(getUcpCommon());
+ case CLASS_SCRIPT_COPTIC:
+ return to_cr(getUcpCoptic());
+ case CLASS_SCRIPT_CUNEIFORM:
+ return to_cr(getUcpCuneiform());
+ case CLASS_SCRIPT_CYPRIOT:
+ return to_cr(getUcpCypriot());
+ case CLASS_SCRIPT_CYRILLIC:
+ return to_cr(getUcpCyrillic());
+ case CLASS_SCRIPT_DESERET:
+ return to_cr(getUcpDeseret());
+ case CLASS_SCRIPT_DEVANAGARI:
+ return to_cr(getUcpDevanagari());
+ case CLASS_SCRIPT_EGYPTIAN_HIEROGLYPHS:
+ return to_cr(getUcpEgyptian_Hieroglyphs());
+ case CLASS_SCRIPT_ETHIOPIC:
+ return to_cr(getUcpEthiopic());
+ case CLASS_SCRIPT_GEORGIAN:
+ return to_cr(getUcpGeorgian());
+ case CLASS_SCRIPT_GLAGOLITIC:
+ return to_cr(getUcpGlagolitic());
+ case CLASS_SCRIPT_GOTHIC:
+ return to_cr(getUcpGothic());
+ case CLASS_SCRIPT_GREEK:
+ return to_cr(getUcpGreek());
+ case CLASS_SCRIPT_GUJARATI:
+ return to_cr(getUcpGujarati());
+ case CLASS_SCRIPT_GURMUKHI:
+ return to_cr(getUcpGurmukhi());
+ case CLASS_SCRIPT_HAN:
+ return to_cr(getUcpHan());
+ case CLASS_SCRIPT_HANGUL:
+ return to_cr(getUcpHangul());
+ case CLASS_SCRIPT_HANUNOO:
+ return to_cr(getUcpHanunoo());
+ case CLASS_SCRIPT_HEBREW:
+ return to_cr(getUcpHebrew());
+ case CLASS_SCRIPT_HIRAGANA:
+ return to_cr(getUcpHiragana());
+ case CLASS_SCRIPT_IMPERIAL_ARAMAIC:
+ return to_cr(getUcpImperial_Aramaic());
+ case CLASS_SCRIPT_INHERITED:
+ return to_cr(getUcpInherited());
+ case CLASS_SCRIPT_INSCRIPTIONAL_PAHLAVI:
+ return to_cr(getUcpInscriptional_Pahlavi());
+ case CLASS_SCRIPT_INSCRIPTIONAL_PARTHIAN:
+ return to_cr(getUcpInscriptional_Parthian());
+ case CLASS_SCRIPT_JAVANESE:
+ return to_cr(getUcpJavanese());
+ case CLASS_SCRIPT_KAITHI:
+ return to_cr(getUcpKaithi());
+ case CLASS_SCRIPT_KANNADA:
+ return to_cr(getUcpKannada());
+ case CLASS_SCRIPT_KATAKANA:
+ return to_cr(getUcpKatakana());
+ case CLASS_SCRIPT_KAYAH_LI:
+ return to_cr(getUcpKayah_Li());
+ case CLASS_SCRIPT_KHAROSHTHI:
+ return to_cr(getUcpKharoshthi());
+ case CLASS_SCRIPT_KHMER:
+ return to_cr(getUcpKhmer());
+ case CLASS_SCRIPT_LAO:
+ return to_cr(getUcpLao());
+ case CLASS_SCRIPT_LATIN:
+ return to_cr(getUcpLatin());
+ case CLASS_SCRIPT_LEPCHA:
+ return to_cr(getUcpLepcha());
+ case CLASS_SCRIPT_LIMBU:
+ return to_cr(getUcpLimbu());
+ case CLASS_SCRIPT_LINEAR_B:
+ return to_cr(getUcpLinear_B());
+ case CLASS_SCRIPT_LISU:
+ return to_cr(getUcpLisu());
+ case CLASS_SCRIPT_LYCIAN:
+ return to_cr(getUcpLycian());
+ case CLASS_SCRIPT_LYDIAN:
+ return to_cr(getUcpLydian());
+ case CLASS_SCRIPT_MALAYALAM:
+ return to_cr(getUcpMalayalam());
+ case CLASS_SCRIPT_MANDAIC:
+ return to_cr(getUcpMandaic());
+ case CLASS_SCRIPT_MEETEI_MAYEK:
+ return to_cr(getUcpMeetei_Mayek());
+ case CLASS_SCRIPT_MONGOLIAN:
+ return to_cr(getUcpMongolian());
+ case CLASS_SCRIPT_MYANMAR:
+ return to_cr(getUcpMyanmar());
+ case CLASS_SCRIPT_NEW_TAI_LUE:
+ return to_cr(getUcpNew_Tai_Lue());
+ case CLASS_SCRIPT_NKO:
+ return to_cr(getUcpNko());
+ case CLASS_SCRIPT_OGHAM:
+ return to_cr(getUcpOgham());
+ case CLASS_SCRIPT_OL_CHIKI:
+ return to_cr(getUcpOl_Chiki());
+ case CLASS_SCRIPT_OLD_ITALIC:
+ return to_cr(getUcpOld_Italic());
+ case CLASS_SCRIPT_OLD_PERSIAN:
+ return to_cr(getUcpOld_Persian());
+ case CLASS_SCRIPT_OLD_SOUTH_ARABIAN:
+ return to_cr(getUcpOld_South_Arabian());
+ case CLASS_SCRIPT_OLD_TURKIC:
+ return to_cr(getUcpOld_Turkic());
+ case CLASS_SCRIPT_ORIYA:
+ return to_cr(getUcpOriya());
+ case CLASS_SCRIPT_OSMANYA:
+ return to_cr(getUcpOsmanya());
+ case CLASS_SCRIPT_PHAGS_PA:
+ return to_cr(getUcpPhags_Pa());
+ case CLASS_SCRIPT_PHOENICIAN:
+ return to_cr(getUcpPhoenician());
+ case CLASS_SCRIPT_REJANG:
+ return to_cr(getUcpRejang());
+ case CLASS_SCRIPT_RUNIC:
+ return to_cr(getUcpRunic());
+ case CLASS_SCRIPT_SAMARITAN:
+ return to_cr(getUcpSamaritan());
+ case CLASS_SCRIPT_SAURASHTRA:
+ return to_cr(getUcpSaurashtra());
+ case CLASS_SCRIPT_SHAVIAN:
+ return to_cr(getUcpShavian());
+ case CLASS_SCRIPT_SINHALA:
+ return to_cr(getUcpSinhala());
+ case CLASS_SCRIPT_SUNDANESE:
+ return to_cr(getUcpSundanese());
+ case CLASS_SCRIPT_SYLOTI_NAGRI:
+ return to_cr(getUcpSyloti_Nagri());
+ case CLASS_SCRIPT_SYRIAC:
+ return to_cr(getUcpSyriac());
+ case CLASS_SCRIPT_TAGALOG:
+ return to_cr(getUcpTagalog());
+ case CLASS_SCRIPT_TAGBANWA:
+ return to_cr(getUcpTagbanwa());
+ case CLASS_SCRIPT_TAI_LE:
+ return to_cr(getUcpTai_Le());
+ case CLASS_SCRIPT_TAI_THAM:
+ return to_cr(getUcpTai_Tham());
+ case CLASS_SCRIPT_TAI_VIET:
+ return to_cr(getUcpTai_Viet());
+ case CLASS_SCRIPT_TAMIL:
+ return to_cr(getUcpTamil());
+ case CLASS_SCRIPT_TELUGU:
+ return to_cr(getUcpTelugu());
+ case CLASS_SCRIPT_THAANA:
+ return to_cr(getUcpThaana());
+ case CLASS_SCRIPT_THAI:
+ return to_cr(getUcpThai());
+ case CLASS_SCRIPT_TIBETAN:
+ return to_cr(getUcpTibetan());
+ case CLASS_SCRIPT_TIFINAGH:
+ return to_cr(getUcpTifinagh());
+ case CLASS_SCRIPT_UGARITIC:
+ return to_cr(getUcpUgaritic());
+ case CLASS_SCRIPT_VAI:
+ return to_cr(getUcpVai());
+ case CLASS_SCRIPT_YI:
+ return to_cr(getUcpYi());
+ case CLASS_UCP_ANY: /* always include newline */
+ return ~CharReach();
+ }
+ assert(0);
+ return CharReach();
+}
+
+unique_ptr<ComponentClass> getComponentClass(const ParseMode &mode) {
+ if (mode.utf8) {
+ return ue2::make_unique<UTF8ComponentClass>(mode);
+ } else {
+ return ue2::make_unique<AsciiComponentClass>(mode);
+ }
+}
+
+unique_ptr<ComponentClass> generateComponent(PredefinedClass c, bool negate,
+ const ParseMode &mode) {
+ auto cc = getComponentClass(mode);
+ cc->add(c, negate);
+ cc->finalize();
+ return cc;
+}
+
+unique_ptr<ComponentClass> getLiteralComponentClass(unsigned char c,
+ bool nocase) {
+ ParseMode mode;
+ mode.caseless = nocase;
+ auto cc = getComponentClass(mode);
+ cc->add(c);
+ cc->finalize();
+ return cc;
+}
+
+ComponentClass::ComponentClass(const ParseMode &mode_in)
+ : m_negate(false), mode(mode_in), in_cand_range(false),
+ range_start(INVALID_UNICODE), finalized(false) {}
+
+ComponentClass::~ComponentClass() { }
+
+void ComponentClass::addDash(void) {
+ if (!in_cand_range) {
+ // this could be the start of a range
+ if (range_start != INVALID_UNICODE) {
+ in_cand_range = true;
+ } else {
+ /* no possible start character for range, this is just a literal */
+ add('-');
+ }
+ } else {
+ // already creating a range, so this must be literal '-'
+ in_cand_range = false;
+ createRange('-');
+ }
+}
+
+void ComponentClass::negate() {
+ m_negate = true;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/ComponentClass.h b/contrib/libs/hyperscan/src/parser/ComponentClass.h
index 34330a1c29..040e6d786c 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentClass.h
+++ b/contrib/libs/hyperscan/src/parser/ComponentClass.h
@@ -1,280 +1,280 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Character classes and their mnemonics.
- */
-
-#ifndef COMPONENTCLASS_H
-#define COMPONENTCLASS_H
-
-#include <string>
-#include <vector>
-#include <utility>
-
-#include "Component.h"
-#include "Parser.h"
-#include "util/charreach.h"
-#include "util/unicode_def.h"
-#include "ue2common.h"
-
-namespace ue2 {
-
-enum PredefinedClass {
- CLASS_ALNUM,
- CLASS_ALPHA,
- CLASS_ANY, /* dot, not quite any when not in dotall mode */
- CLASS_ASCII,
- CLASS_BLANK,
- CLASS_CNTRL,
- CLASS_DIGIT,
- CLASS_GRAPH,
- CLASS_HORZ,
- CLASS_LOWER,
- CLASS_PRINT,
- CLASS_PUNCT,
- CLASS_SPACE, /* has vertical tab */
- CLASS_UPPER,
- CLASS_VERT,
- CLASS_WORD,
- CLASS_XDIGIT,
- CLASS_XGRAPH, /* [:graph:] in UCP mode */
- CLASS_XPRINT, /* [:print:] in UCP mode */
- CLASS_XPUNCT, /* [:punct:] in UCP mode */
- CLASS_UCP_C,
- CLASS_UCP_CC,
- CLASS_UCP_CF,
- CLASS_UCP_CN, /* unallocated code points */
- CLASS_UCP_CO,
- CLASS_UCP_CS, /* does not contain valid unicode codepoints */
- CLASS_UCP_L,
- CLASS_UCP_LL,
- CLASS_UCP_LM,
- CLASS_UCP_LO,
- CLASS_UCP_LT,
- CLASS_UCP_LU,
- CLASS_UCP_L_AND, /* L& = LL+LU+LT */
- CLASS_UCP_M,
- CLASS_UCP_MC,
- CLASS_UCP_ME,
- CLASS_UCP_MN,
- CLASS_UCP_N,
- CLASS_UCP_ND,
- CLASS_UCP_NL,
- CLASS_UCP_NO,
- CLASS_UCP_P,
- CLASS_UCP_PC,
- CLASS_UCP_PD,
- CLASS_UCP_PE,
- CLASS_UCP_PF,
- CLASS_UCP_PI,
- CLASS_UCP_PO,
- CLASS_UCP_PS,
- CLASS_UCP_S,
- CLASS_UCP_SC,
- CLASS_UCP_SK,
- CLASS_UCP_SM,
- CLASS_UCP_SO,
- CLASS_UCP_Z,
- CLASS_UCP_ZL,
- CLASS_UCP_ZP,
- CLASS_UCP_ZS,
- CLASS_UCP_XAN,
- CLASS_UCP_XPS, /* CLASS_SPACE */
- CLASS_UCP_XSP,
- CLASS_UCP_XWD,
- CLASS_SCRIPT_ARABIC,
- CLASS_SCRIPT_ARMENIAN,
- CLASS_SCRIPT_AVESTAN,
- CLASS_SCRIPT_BALINESE,
- CLASS_SCRIPT_BAMUM,
- CLASS_SCRIPT_BATAK,
- CLASS_SCRIPT_BENGALI,
- CLASS_SCRIPT_BOPOMOFO,
- CLASS_SCRIPT_BRAHMI,
- CLASS_SCRIPT_BRAILLE,
- CLASS_SCRIPT_BUGINESE,
- CLASS_SCRIPT_BUHID,
- CLASS_SCRIPT_CANADIAN_ABORIGINAL,
- CLASS_SCRIPT_CARIAN,
- CLASS_SCRIPT_CHAM,
- CLASS_SCRIPT_CHEROKEE,
- CLASS_SCRIPT_COMMON,
- CLASS_SCRIPT_COPTIC,
- CLASS_SCRIPT_CUNEIFORM,
- CLASS_SCRIPT_CYPRIOT,
- CLASS_SCRIPT_CYRILLIC,
- CLASS_SCRIPT_DESERET,
- CLASS_SCRIPT_DEVANAGARI,
- CLASS_SCRIPT_EGYPTIAN_HIEROGLYPHS,
- CLASS_SCRIPT_ETHIOPIC,
- CLASS_SCRIPT_GEORGIAN,
- CLASS_SCRIPT_GLAGOLITIC,
- CLASS_SCRIPT_GOTHIC,
- CLASS_SCRIPT_GREEK,
- CLASS_SCRIPT_GUJARATI,
- CLASS_SCRIPT_GURMUKHI,
- CLASS_SCRIPT_HAN,
- CLASS_SCRIPT_HANGUL,
- CLASS_SCRIPT_HANUNOO,
- CLASS_SCRIPT_HEBREW,
- CLASS_SCRIPT_HIRAGANA,
- CLASS_SCRIPT_IMPERIAL_ARAMAIC,
- CLASS_SCRIPT_INHERITED,
- CLASS_SCRIPT_INSCRIPTIONAL_PAHLAVI,
- CLASS_SCRIPT_INSCRIPTIONAL_PARTHIAN,
- CLASS_SCRIPT_JAVANESE,
- CLASS_SCRIPT_KAITHI,
- CLASS_SCRIPT_KANNADA,
- CLASS_SCRIPT_KATAKANA,
- CLASS_SCRIPT_KAYAH_LI,
- CLASS_SCRIPT_KHAROSHTHI,
- CLASS_SCRIPT_KHMER,
- CLASS_SCRIPT_LAO,
- CLASS_SCRIPT_LATIN,
- CLASS_SCRIPT_LEPCHA,
- CLASS_SCRIPT_LIMBU,
- CLASS_SCRIPT_LINEAR_B,
- CLASS_SCRIPT_LISU,
- CLASS_SCRIPT_LYCIAN,
- CLASS_SCRIPT_LYDIAN,
- CLASS_SCRIPT_MALAYALAM,
- CLASS_SCRIPT_MANDAIC,
- CLASS_SCRIPT_MEETEI_MAYEK,
- CLASS_SCRIPT_MONGOLIAN,
- CLASS_SCRIPT_MYANMAR,
- CLASS_SCRIPT_NEW_TAI_LUE,
- CLASS_SCRIPT_NKO,
- CLASS_SCRIPT_OGHAM,
- CLASS_SCRIPT_OL_CHIKI,
- CLASS_SCRIPT_OLD_ITALIC,
- CLASS_SCRIPT_OLD_PERSIAN,
- CLASS_SCRIPT_OLD_SOUTH_ARABIAN,
- CLASS_SCRIPT_OLD_TURKIC,
- CLASS_SCRIPT_ORIYA,
- CLASS_SCRIPT_OSMANYA,
- CLASS_SCRIPT_PHAGS_PA,
- CLASS_SCRIPT_PHOENICIAN,
- CLASS_SCRIPT_REJANG,
- CLASS_SCRIPT_RUNIC,
- CLASS_SCRIPT_SAMARITAN,
- CLASS_SCRIPT_SAURASHTRA,
- CLASS_SCRIPT_SHAVIAN,
- CLASS_SCRIPT_SINHALA,
- CLASS_SCRIPT_SUNDANESE,
- CLASS_SCRIPT_SYLOTI_NAGRI,
- CLASS_SCRIPT_SYRIAC,
- CLASS_SCRIPT_TAGALOG,
- CLASS_SCRIPT_TAGBANWA,
- CLASS_SCRIPT_TAI_LE,
- CLASS_SCRIPT_TAI_THAM,
- CLASS_SCRIPT_TAI_VIET,
- CLASS_SCRIPT_TAMIL,
- CLASS_SCRIPT_TELUGU,
- CLASS_SCRIPT_THAANA,
- CLASS_SCRIPT_THAI,
- CLASS_SCRIPT_TIBETAN,
- CLASS_SCRIPT_TIFINAGH,
- CLASS_SCRIPT_UGARITIC,
- CLASS_SCRIPT_VAI,
- CLASS_SCRIPT_YI,
- CLASS_UCP_ANY
-};
-
-CharReach getPredefinedCharReach(PredefinedClass c, const ParseMode &mode);
-
-class ComponentClass;
-class NFABuilder;
-
-/* Caller is responsible for lifecycle management, class finalized */
-std::unique_ptr<ComponentClass>
-generateComponent(PredefinedClass c, bool negated, const ParseMode &mode);
-
-/* Caller is responsible for lifecycle management, class open */
-std::unique_ptr<ComponentClass> getComponentClass(const ParseMode &mode);
-
-/** Common case: generate a component for a single literal character, possibly
- * in caseless mode. Caller is responsible for lifecycle management. */
-std::unique_ptr<ComponentClass> getLiteralComponentClass(unsigned char c,
- bool nocase);
-
-class ComponentClass : public Component {
- friend class DumpVisitor;
-protected:
- explicit ComponentClass(const ParseMode &mode_in);
-public:
- ~ComponentClass() override;
- ComponentClass *clone() const override = 0;
-
- Component *accept(ComponentVisitor &v) override = 0;
- void accept(ConstComponentVisitor &v) const override = 0;
-
- /** \brief True if the class contains no members (i.e. it will not match
- * against anything). This function can only be called on a finalized
- * class.
- *
- * Note: This is a different concept to Component::empty.
- */
- virtual bool class_empty(void) const = 0;
-
- virtual void add(PredefinedClass c, bool negated) = 0;
- virtual void add(unichar c) = 0; /* may throw LocatedParseError */
- void addDash(void);
-
- void negate(void);
- virtual void finalize(void) = 0;
-
- bool isNegated() const { return m_negate; }
-
- std::vector<PositionInfo> first() const override = 0;
- std::vector<PositionInfo> last() const override = 0;
- bool empty() const override { return false; } /* always 1 codepoint wide */
-
- void notePositions(GlushkovBuildState &bs) override = 0;
- void buildFollowSet(GlushkovBuildState &bs,
- const std::vector<PositionInfo> &) override = 0;
-
-protected:
- bool m_negate;
- const ParseMode mode;
- bool in_cand_range;
- unichar range_start;
- bool finalized;
-
- virtual void createRange(unichar) = 0;
-
- // Protected copy ctor. Use clone instead.
- ComponentClass(const ComponentClass &other)
- : Component(other), m_negate(other.m_negate), mode(other.mode),
- in_cand_range(other.in_cand_range), range_start(other.range_start),
- finalized(other.finalized) {}
-};
-
-} // namespace ue2
-
-#endif // COMPONENTCLASS_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Character classes and their mnemonics.
+ */
+
+#ifndef COMPONENTCLASS_H
+#define COMPONENTCLASS_H
+
+#include <string>
+#include <vector>
+#include <utility>
+
+#include "Component.h"
+#include "Parser.h"
+#include "util/charreach.h"
+#include "util/unicode_def.h"
+#include "ue2common.h"
+
+namespace ue2 {
+
+enum PredefinedClass {
+ CLASS_ALNUM,
+ CLASS_ALPHA,
+ CLASS_ANY, /* dot, not quite any when not in dotall mode */
+ CLASS_ASCII,
+ CLASS_BLANK,
+ CLASS_CNTRL,
+ CLASS_DIGIT,
+ CLASS_GRAPH,
+ CLASS_HORZ,
+ CLASS_LOWER,
+ CLASS_PRINT,
+ CLASS_PUNCT,
+ CLASS_SPACE, /* has vertical tab */
+ CLASS_UPPER,
+ CLASS_VERT,
+ CLASS_WORD,
+ CLASS_XDIGIT,
+ CLASS_XGRAPH, /* [:graph:] in UCP mode */
+ CLASS_XPRINT, /* [:print:] in UCP mode */
+ CLASS_XPUNCT, /* [:punct:] in UCP mode */
+ CLASS_UCP_C,
+ CLASS_UCP_CC,
+ CLASS_UCP_CF,
+ CLASS_UCP_CN, /* unallocated code points */
+ CLASS_UCP_CO,
+ CLASS_UCP_CS, /* does not contain valid unicode codepoints */
+ CLASS_UCP_L,
+ CLASS_UCP_LL,
+ CLASS_UCP_LM,
+ CLASS_UCP_LO,
+ CLASS_UCP_LT,
+ CLASS_UCP_LU,
+ CLASS_UCP_L_AND, /* L& = LL+LU+LT */
+ CLASS_UCP_M,
+ CLASS_UCP_MC,
+ CLASS_UCP_ME,
+ CLASS_UCP_MN,
+ CLASS_UCP_N,
+ CLASS_UCP_ND,
+ CLASS_UCP_NL,
+ CLASS_UCP_NO,
+ CLASS_UCP_P,
+ CLASS_UCP_PC,
+ CLASS_UCP_PD,
+ CLASS_UCP_PE,
+ CLASS_UCP_PF,
+ CLASS_UCP_PI,
+ CLASS_UCP_PO,
+ CLASS_UCP_PS,
+ CLASS_UCP_S,
+ CLASS_UCP_SC,
+ CLASS_UCP_SK,
+ CLASS_UCP_SM,
+ CLASS_UCP_SO,
+ CLASS_UCP_Z,
+ CLASS_UCP_ZL,
+ CLASS_UCP_ZP,
+ CLASS_UCP_ZS,
+ CLASS_UCP_XAN,
+ CLASS_UCP_XPS, /* CLASS_SPACE */
+ CLASS_UCP_XSP,
+ CLASS_UCP_XWD,
+ CLASS_SCRIPT_ARABIC,
+ CLASS_SCRIPT_ARMENIAN,
+ CLASS_SCRIPT_AVESTAN,
+ CLASS_SCRIPT_BALINESE,
+ CLASS_SCRIPT_BAMUM,
+ CLASS_SCRIPT_BATAK,
+ CLASS_SCRIPT_BENGALI,
+ CLASS_SCRIPT_BOPOMOFO,
+ CLASS_SCRIPT_BRAHMI,
+ CLASS_SCRIPT_BRAILLE,
+ CLASS_SCRIPT_BUGINESE,
+ CLASS_SCRIPT_BUHID,
+ CLASS_SCRIPT_CANADIAN_ABORIGINAL,
+ CLASS_SCRIPT_CARIAN,
+ CLASS_SCRIPT_CHAM,
+ CLASS_SCRIPT_CHEROKEE,
+ CLASS_SCRIPT_COMMON,
+ CLASS_SCRIPT_COPTIC,
+ CLASS_SCRIPT_CUNEIFORM,
+ CLASS_SCRIPT_CYPRIOT,
+ CLASS_SCRIPT_CYRILLIC,
+ CLASS_SCRIPT_DESERET,
+ CLASS_SCRIPT_DEVANAGARI,
+ CLASS_SCRIPT_EGYPTIAN_HIEROGLYPHS,
+ CLASS_SCRIPT_ETHIOPIC,
+ CLASS_SCRIPT_GEORGIAN,
+ CLASS_SCRIPT_GLAGOLITIC,
+ CLASS_SCRIPT_GOTHIC,
+ CLASS_SCRIPT_GREEK,
+ CLASS_SCRIPT_GUJARATI,
+ CLASS_SCRIPT_GURMUKHI,
+ CLASS_SCRIPT_HAN,
+ CLASS_SCRIPT_HANGUL,
+ CLASS_SCRIPT_HANUNOO,
+ CLASS_SCRIPT_HEBREW,
+ CLASS_SCRIPT_HIRAGANA,
+ CLASS_SCRIPT_IMPERIAL_ARAMAIC,
+ CLASS_SCRIPT_INHERITED,
+ CLASS_SCRIPT_INSCRIPTIONAL_PAHLAVI,
+ CLASS_SCRIPT_INSCRIPTIONAL_PARTHIAN,
+ CLASS_SCRIPT_JAVANESE,
+ CLASS_SCRIPT_KAITHI,
+ CLASS_SCRIPT_KANNADA,
+ CLASS_SCRIPT_KATAKANA,
+ CLASS_SCRIPT_KAYAH_LI,
+ CLASS_SCRIPT_KHAROSHTHI,
+ CLASS_SCRIPT_KHMER,
+ CLASS_SCRIPT_LAO,
+ CLASS_SCRIPT_LATIN,
+ CLASS_SCRIPT_LEPCHA,
+ CLASS_SCRIPT_LIMBU,
+ CLASS_SCRIPT_LINEAR_B,
+ CLASS_SCRIPT_LISU,
+ CLASS_SCRIPT_LYCIAN,
+ CLASS_SCRIPT_LYDIAN,
+ CLASS_SCRIPT_MALAYALAM,
+ CLASS_SCRIPT_MANDAIC,
+ CLASS_SCRIPT_MEETEI_MAYEK,
+ CLASS_SCRIPT_MONGOLIAN,
+ CLASS_SCRIPT_MYANMAR,
+ CLASS_SCRIPT_NEW_TAI_LUE,
+ CLASS_SCRIPT_NKO,
+ CLASS_SCRIPT_OGHAM,
+ CLASS_SCRIPT_OL_CHIKI,
+ CLASS_SCRIPT_OLD_ITALIC,
+ CLASS_SCRIPT_OLD_PERSIAN,
+ CLASS_SCRIPT_OLD_SOUTH_ARABIAN,
+ CLASS_SCRIPT_OLD_TURKIC,
+ CLASS_SCRIPT_ORIYA,
+ CLASS_SCRIPT_OSMANYA,
+ CLASS_SCRIPT_PHAGS_PA,
+ CLASS_SCRIPT_PHOENICIAN,
+ CLASS_SCRIPT_REJANG,
+ CLASS_SCRIPT_RUNIC,
+ CLASS_SCRIPT_SAMARITAN,
+ CLASS_SCRIPT_SAURASHTRA,
+ CLASS_SCRIPT_SHAVIAN,
+ CLASS_SCRIPT_SINHALA,
+ CLASS_SCRIPT_SUNDANESE,
+ CLASS_SCRIPT_SYLOTI_NAGRI,
+ CLASS_SCRIPT_SYRIAC,
+ CLASS_SCRIPT_TAGALOG,
+ CLASS_SCRIPT_TAGBANWA,
+ CLASS_SCRIPT_TAI_LE,
+ CLASS_SCRIPT_TAI_THAM,
+ CLASS_SCRIPT_TAI_VIET,
+ CLASS_SCRIPT_TAMIL,
+ CLASS_SCRIPT_TELUGU,
+ CLASS_SCRIPT_THAANA,
+ CLASS_SCRIPT_THAI,
+ CLASS_SCRIPT_TIBETAN,
+ CLASS_SCRIPT_TIFINAGH,
+ CLASS_SCRIPT_UGARITIC,
+ CLASS_SCRIPT_VAI,
+ CLASS_SCRIPT_YI,
+ CLASS_UCP_ANY
+};
+
+CharReach getPredefinedCharReach(PredefinedClass c, const ParseMode &mode);
+
+class ComponentClass;
+class NFABuilder;
+
+/* Caller is responsible for lifecycle management, class finalized */
+std::unique_ptr<ComponentClass>
+generateComponent(PredefinedClass c, bool negated, const ParseMode &mode);
+
+/* Caller is responsible for lifecycle management, class open */
+std::unique_ptr<ComponentClass> getComponentClass(const ParseMode &mode);
+
+/** Common case: generate a component for a single literal character, possibly
+ * in caseless mode. Caller is responsible for lifecycle management. */
+std::unique_ptr<ComponentClass> getLiteralComponentClass(unsigned char c,
+ bool nocase);
+
+class ComponentClass : public Component {
+ friend class DumpVisitor;
+protected:
+ explicit ComponentClass(const ParseMode &mode_in);
+public:
+ ~ComponentClass() override;
+ ComponentClass *clone() const override = 0;
+
+ Component *accept(ComponentVisitor &v) override = 0;
+ void accept(ConstComponentVisitor &v) const override = 0;
+
+ /** \brief True if the class contains no members (i.e. it will not match
+ * against anything). This function can only be called on a finalized
+ * class.
+ *
+ * Note: This is a different concept to Component::empty.
+ */
+ virtual bool class_empty(void) const = 0;
+
+ virtual void add(PredefinedClass c, bool negated) = 0;
+ virtual void add(unichar c) = 0; /* may throw LocatedParseError */
+ void addDash(void);
+
+ void negate(void);
+ virtual void finalize(void) = 0;
+
+ bool isNegated() const { return m_negate; }
+
+ std::vector<PositionInfo> first() const override = 0;
+ std::vector<PositionInfo> last() const override = 0;
+ bool empty() const override { return false; } /* always 1 codepoint wide */
+
+ void notePositions(GlushkovBuildState &bs) override = 0;
+ void buildFollowSet(GlushkovBuildState &bs,
+ const std::vector<PositionInfo> &) override = 0;
+
+protected:
+ bool m_negate;
+ const ParseMode mode;
+ bool in_cand_range;
+ unichar range_start;
+ bool finalized;
+
+ virtual void createRange(unichar) = 0;
+
+ // Protected copy ctor. Use clone instead.
+ ComponentClass(const ComponentClass &other)
+ : Component(other), m_negate(other.m_negate), mode(other.mode),
+ in_cand_range(other.in_cand_range), range_start(other.range_start),
+ finalized(other.finalized) {}
+};
+
+} // namespace ue2
+
+#endif // COMPONENTCLASS_H
diff --git a/contrib/libs/hyperscan/src/parser/ComponentCondReference.cpp b/contrib/libs/hyperscan/src/parser/ComponentCondReference.cpp
index d4fbb8711c..2a2ed4e093 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentCondReference.cpp
+++ b/contrib/libs/hyperscan/src/parser/ComponentCondReference.cpp
@@ -1,166 +1,166 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Conditional reference.
- */
-#include "ComponentCondReference.h"
-#include "ComponentAlternation.h"
-#include "ComponentAssertion.h"
-#include "parse_error.h"
-#include "position_info.h"
-
-#include <algorithm>
-#include <cassert>
-#include <memory>
-
-using namespace std;
-
-namespace ue2 {
-
-ComponentCondReference::ComponentCondReference(unsigned ref)
- : kind(CONDITION_NUMBER), ref_id(ref), hasBothBranches(false) {}
-
-ComponentCondReference::ComponentCondReference(const string &name)
- : kind(CONDITION_NAME), ref_id(0), ref_name(name), hasBothBranches(false) {}
-
-ComponentCondReference::ComponentCondReference(unique_ptr<Component> c)
- : kind(CONDITION_ASSERTION), ref_id(0), assertion(move(c)),
- hasBothBranches(false) {}
-
-ComponentCondReference::~ComponentCondReference() {}
-
-ComponentCondReference::ComponentCondReference(
- const ComponentCondReference &other)
- : ComponentSequence(other), kind(other.kind), ref_id(other.ref_id),
- ref_name(other.ref_name), hasBothBranches(other.hasBothBranches) {
- if (kind == CONDITION_ASSERTION) {
- assert(other.assertion);
- assertion.reset(other.assertion->clone());
- } else {
- assert(!other.assertion);
- }
-}
-
-ComponentCondReference *ComponentCondReference::clone() const {
- return new ComponentCondReference(*this);
-}
-
-Component *ComponentCondReference::accept(ComponentVisitor &v) {
- Component *c = v.visit(this);
- if (c != this) {
- v.post(this);
- return c;
- }
-
- if (kind == CONDITION_ASSERTION) {
- Component *a = assertion.get();
- c = assertion->accept(v);
- if (c != a) {
- assertion.reset(c);
- }
- }
-
- for (auto i = children.begin(), e = children.end(); i != e; ++i) {
- Component *child = i->get();
- c = (*i)->accept(v);
- if (c != child) {
- // Child has been replaced (new Component pointer) or we've been
- // instructed to delete it (null).
- i->reset(c);
- }
- }
-
- // Remove deleted children.
- children.erase(remove(children.begin(), children.end(), nullptr),
- children.end());
-
- v.post(this);
- return this;
-}
-
-void ComponentCondReference::accept(ConstComponentVisitor &v) const {
- v.pre(*this);
-
- if (kind == CONDITION_ASSERTION) {
- assertion->accept(v);
- v.during(*this); // FIXME: a good idea?
- }
-
- for (auto i = children.begin(), e = children.end(); i != e; ++i) {
- (*i)->accept(v);
- if (i + 1 != e) {
- v.during(*this);
- }
- }
-
- v.post(*this);
-}
-
-void ComponentCondReference::addAlternation() {
- if (alternation) {
- if (ref_name == "DEFINE") {
- throw LocatedParseError("DEFINE conditional group with more than "
- "one branch");
- }
-
- if (alternation->numBranches() >= 2) {
- throw LocatedParseError("Conditional with more than two branches");
- }
- }
- hasBothBranches = true;
- ComponentSequence::addAlternation();
-}
-
-vector<PositionInfo> ComponentCondReference::first() const {
- assert(0);
- return vector<PositionInfo>();
-}
-
-vector<PositionInfo> ComponentCondReference::last() const {
- assert(0);
- return vector<PositionInfo>();
-}
-
-bool ComponentCondReference::empty() const { return true; }
-
-void ComponentCondReference::notePositions(GlushkovBuildState &) { assert(0); }
-
-void ComponentCondReference::buildFollowSet(GlushkovBuildState &,
- const vector<PositionInfo> &) {
- assert(0);
-}
-
-bool ComponentCondReference::repeatable() const {
- // If this assertion has no children (it's an empty sequence, like that
- // produced by '(?!)') then PCRE would throw a "nothing to repeat" error.
- // So we do as well.
- return !children.empty();
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Conditional reference.
+ */
+#include "ComponentCondReference.h"
+#include "ComponentAlternation.h"
+#include "ComponentAssertion.h"
+#include "parse_error.h"
+#include "position_info.h"
+
+#include <algorithm>
+#include <cassert>
+#include <memory>
+
+using namespace std;
+
+namespace ue2 {
+
+ComponentCondReference::ComponentCondReference(unsigned ref)
+ : kind(CONDITION_NUMBER), ref_id(ref), hasBothBranches(false) {}
+
+ComponentCondReference::ComponentCondReference(const string &name)
+ : kind(CONDITION_NAME), ref_id(0), ref_name(name), hasBothBranches(false) {}
+
+ComponentCondReference::ComponentCondReference(unique_ptr<Component> c)
+ : kind(CONDITION_ASSERTION), ref_id(0), assertion(move(c)),
+ hasBothBranches(false) {}
+
+ComponentCondReference::~ComponentCondReference() {}
+
+ComponentCondReference::ComponentCondReference(
+ const ComponentCondReference &other)
+ : ComponentSequence(other), kind(other.kind), ref_id(other.ref_id),
+ ref_name(other.ref_name), hasBothBranches(other.hasBothBranches) {
+ if (kind == CONDITION_ASSERTION) {
+ assert(other.assertion);
+ assertion.reset(other.assertion->clone());
+ } else {
+ assert(!other.assertion);
+ }
+}
+
+ComponentCondReference *ComponentCondReference::clone() const {
+ return new ComponentCondReference(*this);
+}
+
+Component *ComponentCondReference::accept(ComponentVisitor &v) {
+ Component *c = v.visit(this);
+ if (c != this) {
+ v.post(this);
+ return c;
+ }
+
+ if (kind == CONDITION_ASSERTION) {
+ Component *a = assertion.get();
+ c = assertion->accept(v);
+ if (c != a) {
+ assertion.reset(c);
+ }
+ }
+
+ for (auto i = children.begin(), e = children.end(); i != e; ++i) {
+ Component *child = i->get();
+ c = (*i)->accept(v);
+ if (c != child) {
+ // Child has been replaced (new Component pointer) or we've been
+ // instructed to delete it (null).
+ i->reset(c);
+ }
+ }
+
+ // Remove deleted children.
+ children.erase(remove(children.begin(), children.end(), nullptr),
+ children.end());
+
+ v.post(this);
+ return this;
+}
+
+void ComponentCondReference::accept(ConstComponentVisitor &v) const {
+ v.pre(*this);
+
+ if (kind == CONDITION_ASSERTION) {
+ assertion->accept(v);
+ v.during(*this); // FIXME: a good idea?
+ }
+
+ for (auto i = children.begin(), e = children.end(); i != e; ++i) {
+ (*i)->accept(v);
+ if (i + 1 != e) {
+ v.during(*this);
+ }
+ }
+
+ v.post(*this);
+}
+
+void ComponentCondReference::addAlternation() {
+ if (alternation) {
+ if (ref_name == "DEFINE") {
+ throw LocatedParseError("DEFINE conditional group with more than "
+ "one branch");
+ }
+
+ if (alternation->numBranches() >= 2) {
+ throw LocatedParseError("Conditional with more than two branches");
+ }
+ }
+ hasBothBranches = true;
+ ComponentSequence::addAlternation();
+}
+
+vector<PositionInfo> ComponentCondReference::first() const {
+ assert(0);
+ return vector<PositionInfo>();
+}
+
+vector<PositionInfo> ComponentCondReference::last() const {
+ assert(0);
+ return vector<PositionInfo>();
+}
+
+bool ComponentCondReference::empty() const { return true; }
+
+void ComponentCondReference::notePositions(GlushkovBuildState &) { assert(0); }
+
+void ComponentCondReference::buildFollowSet(GlushkovBuildState &,
+ const vector<PositionInfo> &) {
+ assert(0);
+}
+
+bool ComponentCondReference::repeatable() const {
+ // If this assertion has no children (it's an empty sequence, like that
+ // produced by '(?!)') then PCRE would throw a "nothing to repeat" error.
+ // So we do as well.
+ return !children.empty();
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/ComponentCondReference.h b/contrib/libs/hyperscan/src/parser/ComponentCondReference.h
index bcdb87382b..c0ee9ac3ac 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentCondReference.h
+++ b/contrib/libs/hyperscan/src/parser/ComponentCondReference.h
@@ -1,91 +1,91 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Conditional reference.
- */
-
-#ifndef PARSER_COMPONENTCONDREFERENCE_H_
-#define PARSER_COMPONENTCONDREFERENCE_H_
-
-#include "ComponentSequence.h"
-
-#include <memory>
-#include <string>
-
-namespace ue2 {
-
-class ComponentCondReference : public ComponentSequence {
- friend class DumpVisitor;
- friend class PrefilterVisitor;
- friend class ReferenceVisitor;
- friend class PrintVisitor;
-public:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Conditional reference.
+ */
+
+#ifndef PARSER_COMPONENTCONDREFERENCE_H_
+#define PARSER_COMPONENTCONDREFERENCE_H_
+
+#include "ComponentSequence.h"
+
+#include <memory>
+#include <string>
+
+namespace ue2 {
+
+class ComponentCondReference : public ComponentSequence {
+ friend class DumpVisitor;
+ friend class PrefilterVisitor;
+ friend class ReferenceVisitor;
+ friend class PrintVisitor;
+public:
explicit ComponentCondReference(unsigned ref);
explicit ComponentCondReference(const std::string &name);
explicit ComponentCondReference(std::unique_ptr<Component> c);
-
- ~ComponentCondReference() override;
- ComponentCondReference *clone() const override;
- Component *accept(ComponentVisitor &v) override;
- void accept(ConstComponentVisitor &v) const override;
-
- void addAlternation() override;
-
- std::vector<PositionInfo> first() const override;
- std::vector<PositionInfo> last() const override;
-
- bool empty() const override;
- void notePositions(GlushkovBuildState &bs) override;
- void buildFollowSet(GlushkovBuildState &bs,
- const std::vector<PositionInfo> &lastPos) override;
- bool repeatable() const override;
-
-private:
- ComponentCondReference(const ComponentCondReference &other);
-
- enum Condition {
- CONDITION_NUMBER,
- CONDITION_NAME,
- CONDITION_ASSERTION
- };
-
- enum Condition kind;
-
- unsigned ref_id;
- std::string ref_name;
- std::unique_ptr<Component> assertion;
-
- /** True if an alternation has been added, which means we have both a YES
- * and a NO branch. */
- bool hasBothBranches;
-};
-
-} // namespace ue2
-
-#endif // PARSER_COMPONENTCONDREFERENCE_H_
+
+ ~ComponentCondReference() override;
+ ComponentCondReference *clone() const override;
+ Component *accept(ComponentVisitor &v) override;
+ void accept(ConstComponentVisitor &v) const override;
+
+ void addAlternation() override;
+
+ std::vector<PositionInfo> first() const override;
+ std::vector<PositionInfo> last() const override;
+
+ bool empty() const override;
+ void notePositions(GlushkovBuildState &bs) override;
+ void buildFollowSet(GlushkovBuildState &bs,
+ const std::vector<PositionInfo> &lastPos) override;
+ bool repeatable() const override;
+
+private:
+ ComponentCondReference(const ComponentCondReference &other);
+
+ enum Condition {
+ CONDITION_NUMBER,
+ CONDITION_NAME,
+ CONDITION_ASSERTION
+ };
+
+ enum Condition kind;
+
+ unsigned ref_id;
+ std::string ref_name;
+ std::unique_ptr<Component> assertion;
+
+ /** True if an alternation has been added, which means we have both a YES
+ * and a NO branch. */
+ bool hasBothBranches;
+};
+
+} // namespace ue2
+
+#endif // PARSER_COMPONENTCONDREFERENCE_H_
diff --git a/contrib/libs/hyperscan/src/parser/ComponentEUS.cpp b/contrib/libs/hyperscan/src/parser/ComponentEUS.cpp
index 86c762de7d..27f30d7eb7 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentEUS.cpp
+++ b/contrib/libs/hyperscan/src/parser/ComponentEUS.cpp
@@ -1,75 +1,75 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Extended Unicode sequences (\\X)
- */
-
-
-#include "ComponentEUS.h"
-
-#include "buildstate.h"
-#include "position.h"
-#include "position_info.h"
-#include "Parser.h"
-#include "nfagraph/ng_builder.h"
-#include "util/charreach.h"
-
-using namespace std;
-
-namespace ue2 {
-
-ComponentEUS::ComponentEUS(u32 loc_in, const ParseMode &mode)
- : loc(loc_in), utf8(mode.utf8),
- position(GlushkovBuildState::POS_UNINITIALIZED) {}
-
-ComponentEUS::~ComponentEUS() {}
-
-ComponentEUS * ComponentEUS::clone() const {
- return new ComponentEUS(*this);
-}
-
-vector<PositionInfo> ComponentEUS::first() const {
- return vector<PositionInfo>(1, PositionInfo(position));
-}
-
-vector<PositionInfo> ComponentEUS::last() const {
- return vector<PositionInfo>(1, PositionInfo(position));
-}
-
-void ComponentEUS::notePositions(GlushkovBuildState &bs) {
- NFABuilder &builder = bs.getBuilder();
- position = builder.makePositions(1);
- builder.addCharReach(position, CharReach::dot());
- builder.setNodeReportID(position, 0 /* offset adj */);
- if (utf8) { /* we are prefiltering, turn to.+ */
- builder.addEdge(position, position);
- }
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Extended Unicode sequences (\\X)
+ */
+
+
+#include "ComponentEUS.h"
+
+#include "buildstate.h"
+#include "position.h"
+#include "position_info.h"
+#include "Parser.h"
+#include "nfagraph/ng_builder.h"
+#include "util/charreach.h"
+
+using namespace std;
+
+namespace ue2 {
+
+ComponentEUS::ComponentEUS(u32 loc_in, const ParseMode &mode)
+ : loc(loc_in), utf8(mode.utf8),
+ position(GlushkovBuildState::POS_UNINITIALIZED) {}
+
+ComponentEUS::~ComponentEUS() {}
+
+ComponentEUS * ComponentEUS::clone() const {
+ return new ComponentEUS(*this);
+}
+
+vector<PositionInfo> ComponentEUS::first() const {
+ return vector<PositionInfo>(1, PositionInfo(position));
+}
+
+vector<PositionInfo> ComponentEUS::last() const {
+ return vector<PositionInfo>(1, PositionInfo(position));
+}
+
+void ComponentEUS::notePositions(GlushkovBuildState &bs) {
+ NFABuilder &builder = bs.getBuilder();
+ position = builder.makePositions(1);
+ builder.addCharReach(position, CharReach::dot());
+ builder.setNodeReportID(position, 0 /* offset adj */);
+ if (utf8) { /* we are prefiltering, turn to.+ */
+ builder.addEdge(position, position);
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/ComponentEUS.h b/contrib/libs/hyperscan/src/parser/ComponentEUS.h
index 3f8d920625..a71922de9e 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentEUS.h
+++ b/contrib/libs/hyperscan/src/parser/ComponentEUS.h
@@ -1,86 +1,86 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Extended Unicode sequences (\\X)
- */
-
-#ifndef _RE_COMPONENTEXTENDEDUNICODESEQUENCE_H_
-#define _RE_COMPONENTEXTENDEDUNICODESEQUENCE_H_
-
-#include "Component.h"
-
-namespace ue2 {
-
-struct ParseMode;
-
-class ComponentEUS : public Component {
- friend class DumpVisitor;
- friend class UnsupportedVisitor;
-public:
- ComponentEUS(u32 loc, const ParseMode &mode);
- ~ComponentEUS() override;
- ComponentEUS *clone() const override;
-
- Component *accept(ComponentVisitor &v) override {
- Component *c = v.visit(this);
- v.post(this);
- return c;
- }
-
- void accept(ConstComponentVisitor &v) const override {
- v.pre(*this);
- v.during(*this);
- v.post(*this);
- }
-
- std::vector<PositionInfo> first() const override;
- std::vector<PositionInfo> last() const override;
-
- bool empty() const override { return false; }
-
- void notePositions(GlushkovBuildState &bs) override;
- void buildFollowSet(GlushkovBuildState &,
- const std::vector<PositionInfo> &) override {
- // all follow set construction is handled by firsts/lasts
- return;
- }
-
-private:
- u32 loc;
- bool utf8;
- Position position;
-
- ComponentEUS(const ComponentEUS &other)
- : Component(other), loc(other.loc), utf8(other.utf8),
- position(other.position) {}
-};
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Extended Unicode sequences (\\X)
+ */
+
+#ifndef _RE_COMPONENTEXTENDEDUNICODESEQUENCE_H_
+#define _RE_COMPONENTEXTENDEDUNICODESEQUENCE_H_
+
+#include "Component.h"
+
+namespace ue2 {
+
+struct ParseMode;
+
+class ComponentEUS : public Component {
+ friend class DumpVisitor;
+ friend class UnsupportedVisitor;
+public:
+ ComponentEUS(u32 loc, const ParseMode &mode);
+ ~ComponentEUS() override;
+ ComponentEUS *clone() const override;
+
+ Component *accept(ComponentVisitor &v) override {
+ Component *c = v.visit(this);
+ v.post(this);
+ return c;
+ }
+
+ void accept(ConstComponentVisitor &v) const override {
+ v.pre(*this);
+ v.during(*this);
+ v.post(*this);
+ }
+
+ std::vector<PositionInfo> first() const override;
+ std::vector<PositionInfo> last() const override;
+
+ bool empty() const override { return false; }
+
+ void notePositions(GlushkovBuildState &bs) override;
+ void buildFollowSet(GlushkovBuildState &,
+ const std::vector<PositionInfo> &) override {
+ // all follow set construction is handled by firsts/lasts
+ return;
+ }
+
+private:
+ u32 loc;
+ bool utf8;
+ Position position;
+
+ ComponentEUS(const ComponentEUS &other)
+ : Component(other), loc(other.loc), utf8(other.utf8),
+ position(other.position) {}
+};
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/parser/ComponentEmpty.cpp b/contrib/libs/hyperscan/src/parser/ComponentEmpty.cpp
index f650d38a46..8664450b89 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentEmpty.cpp
+++ b/contrib/libs/hyperscan/src/parser/ComponentEmpty.cpp
@@ -1,93 +1,93 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Represents an empty regex element, like (?m)
- */
-#include <cassert>
-
-#include "ComponentEmpty.h"
-#include "position.h"
-#include "position_info.h"
-#include "buildstate.h"
-#include "ue2common.h"
-
-using namespace std;
-
-namespace ue2 {
-
-ComponentEmpty::ComponentEmpty() {
- // Surprise, it's EMPTY!
-}
-
-ComponentEmpty::~ComponentEmpty() {
- // Surprise, it's EMPTY!
-}
-
-ComponentEmpty *ComponentEmpty::clone() const { return new ComponentEmpty(); }
-
-bool ComponentEmpty::empty() const {
- return true;
-}
-
-bool ComponentEmpty::vacuous_everywhere(void) const {
- return true;
-}
-
-bool ComponentEmpty::repeatable() const {
- // This is the whole point of this class. Empty constructs like '(?m)' are
- // not repeatable.
- return false;
-}
-
-vector<PositionInfo> ComponentEmpty::first() const {
- return vector<PositionInfo>(1, GlushkovBuildState::POS_EPSILON);
-}
-
-vector<PositionInfo> ComponentEmpty::last() const {
- return vector<PositionInfo>();
-}
-
-void ComponentEmpty::notePositions(GlushkovBuildState &) {
- // Nothing to do.
-}
-
-void ComponentEmpty::buildFollowSet(GlushkovBuildState &,
- const vector<PositionInfo> &) {
- // Nothing to do.
-}
-
-bool ComponentEmpty::checkEmbeddedStartAnchor(bool at_start) const {
- return at_start;
-}
-
-bool ComponentEmpty::checkEmbeddedEndAnchor(bool at_end) const {
- return at_end;
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Represents an empty regex element, like (?m)
+ */
+#include <cassert>
+
+#include "ComponentEmpty.h"
+#include "position.h"
+#include "position_info.h"
+#include "buildstate.h"
+#include "ue2common.h"
+
+using namespace std;
+
+namespace ue2 {
+
+ComponentEmpty::ComponentEmpty() {
+ // Surprise, it's EMPTY!
+}
+
+ComponentEmpty::~ComponentEmpty() {
+ // Surprise, it's EMPTY!
+}
+
+ComponentEmpty *ComponentEmpty::clone() const { return new ComponentEmpty(); }
+
+bool ComponentEmpty::empty() const {
+ return true;
+}
+
+bool ComponentEmpty::vacuous_everywhere(void) const {
+ return true;
+}
+
+bool ComponentEmpty::repeatable() const {
+ // This is the whole point of this class. Empty constructs like '(?m)' are
+ // not repeatable.
+ return false;
+}
+
+vector<PositionInfo> ComponentEmpty::first() const {
+ return vector<PositionInfo>(1, GlushkovBuildState::POS_EPSILON);
+}
+
+vector<PositionInfo> ComponentEmpty::last() const {
+ return vector<PositionInfo>();
+}
+
+void ComponentEmpty::notePositions(GlushkovBuildState &) {
+ // Nothing to do.
+}
+
+void ComponentEmpty::buildFollowSet(GlushkovBuildState &,
+ const vector<PositionInfo> &) {
+ // Nothing to do.
+}
+
+bool ComponentEmpty::checkEmbeddedStartAnchor(bool at_start) const {
+ return at_start;
+}
+
+bool ComponentEmpty::checkEmbeddedEndAnchor(bool at_end) const {
+ return at_end;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/ComponentEmpty.h b/contrib/libs/hyperscan/src/parser/ComponentEmpty.h
index db3bcafaab..17564c6d62 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentEmpty.h
+++ b/contrib/libs/hyperscan/src/parser/ComponentEmpty.h
@@ -1,75 +1,75 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Represents an empty regex element, like (?m)
- */
-
-#ifndef PARSER_COMPONENT_EMPTY_H_
-#define PARSER_COMPONENT_EMPTY_H_
-
-#include "Component.h"
-
-namespace ue2 {
-
-class ComponentEmpty : public Component {
- friend class DumpVisitor;
-public:
- ComponentEmpty();
- ~ComponentEmpty() override;
- ComponentEmpty *clone() const override;
-
- Component *accept(ComponentVisitor &v) override {
- Component *c = v.visit(this);
- v.post(this);
- return c;
- }
-
- void accept(ConstComponentVisitor &v) const override {
- v.pre(*this);
- v.during(*this);
- v.post(*this);
- }
-
- std::vector<PositionInfo> first() const override;
- std::vector<PositionInfo> last() const override;
- bool empty() const override;
- bool vacuous_everywhere() const override;
- bool repeatable() const override;
- void notePositions(GlushkovBuildState &bs) override;
- void buildFollowSet(GlushkovBuildState &bs,
- const std::vector<PositionInfo> &lastPos) override;
-
- bool checkEmbeddedStartAnchor(bool at_start) const override;
- bool checkEmbeddedEndAnchor(bool at_end) const override;
-
-};
-
-} // namespace ue2
-
-#endif // PARSER_COMPONENT_EMPTY_H_
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Represents an empty regex element, like (?m)
+ */
+
+#ifndef PARSER_COMPONENT_EMPTY_H_
+#define PARSER_COMPONENT_EMPTY_H_
+
+#include "Component.h"
+
+namespace ue2 {
+
+class ComponentEmpty : public Component {
+ friend class DumpVisitor;
+public:
+ ComponentEmpty();
+ ~ComponentEmpty() override;
+ ComponentEmpty *clone() const override;
+
+ Component *accept(ComponentVisitor &v) override {
+ Component *c = v.visit(this);
+ v.post(this);
+ return c;
+ }
+
+ void accept(ConstComponentVisitor &v) const override {
+ v.pre(*this);
+ v.during(*this);
+ v.post(*this);
+ }
+
+ std::vector<PositionInfo> first() const override;
+ std::vector<PositionInfo> last() const override;
+ bool empty() const override;
+ bool vacuous_everywhere() const override;
+ bool repeatable() const override;
+ void notePositions(GlushkovBuildState &bs) override;
+ void buildFollowSet(GlushkovBuildState &bs,
+ const std::vector<PositionInfo> &lastPos) override;
+
+ bool checkEmbeddedStartAnchor(bool at_start) const override;
+ bool checkEmbeddedEndAnchor(bool at_end) const override;
+
+};
+
+} // namespace ue2
+
+#endif // PARSER_COMPONENT_EMPTY_H_
diff --git a/contrib/libs/hyperscan/src/parser/ComponentRepeat.cpp b/contrib/libs/hyperscan/src/parser/ComponentRepeat.cpp
index 3d58c60eb9..09f59d05ec 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentRepeat.cpp
+++ b/contrib/libs/hyperscan/src/parser/ComponentRepeat.cpp
@@ -1,188 +1,188 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Repeats ('*', '+', '?', '{M,N}', etc)
- */
-
-
-#include "ComponentRepeat.h"
-
-#include "buildstate.h"
-#include "nfagraph/ng_builder.h"
-#include "parse_error.h"
-#include "Parser.h"
-#include "position.h"
-#include "position_dump.h"
-#include "position_info.h"
-#include "ue2common.h"
-#include "util/make_unique.h"
-
-#include <algorithm>
-#include <cassert>
-
-using namespace std;
-
-namespace ue2 {
-
-/** \brief Hard limit on the maximum repeat for bounded repeats. */
-static constexpr u32 MAX_REPEAT = 32767;
-
-/** \brief If expanding a repeat would lead to this many positions being
- * generated, we fail the pattern. */
-static constexpr u32 MAX_POSITIONS_EXPANDED = 500000; // arbitrarily huge
-
-/* no edge priorities means that if our subcomponent can be empty, our min
- * extent is effectively zero. */
-ComponentRepeat::ComponentRepeat(unique_ptr<Component> sub_comp_in, u32 min,
- u32 max, enum RepeatType t)
- : type(t), sub_comp(move(sub_comp_in)), m_min(min), m_max(max),
- posFirst(GlushkovBuildState::POS_UNINITIALIZED),
- posLast(GlushkovBuildState::POS_UNINITIALIZED) {
- assert(sub_comp);
- assert(max > 0);
- assert(m_min <= m_max);
-
- if (m_min > MAX_REPEAT) {
- throw ParseError("Bounded repeat is too large.");
- }
- if (m_max != NoLimit && m_max > MAX_REPEAT) {
- throw ParseError("Bounded repeat is too large.");
- }
-}
-
-ComponentRepeat::~ComponentRepeat() {}
-
-ComponentRepeat *ComponentRepeat::clone() const {
- return new ComponentRepeat(*this);
-}
-
-ComponentRepeat::ComponentRepeat(const ComponentRepeat &other)
- : Component(other),
- type(other.type), sub_comp(unique_ptr<Component>(other.sub_comp->clone())),
- m_min(other.m_min), m_max(other.m_max),
- m_firsts(other.m_firsts), m_lasts(other.m_lasts),
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Repeats ('*', '+', '?', '{M,N}', etc)
+ */
+
+
+#include "ComponentRepeat.h"
+
+#include "buildstate.h"
+#include "nfagraph/ng_builder.h"
+#include "parse_error.h"
+#include "Parser.h"
+#include "position.h"
+#include "position_dump.h"
+#include "position_info.h"
+#include "ue2common.h"
+#include "util/make_unique.h"
+
+#include <algorithm>
+#include <cassert>
+
+using namespace std;
+
+namespace ue2 {
+
+/** \brief Hard limit on the maximum repeat for bounded repeats. */
+static constexpr u32 MAX_REPEAT = 32767;
+
+/** \brief If expanding a repeat would lead to this many positions being
+ * generated, we fail the pattern. */
+static constexpr u32 MAX_POSITIONS_EXPANDED = 500000; // arbitrarily huge
+
+/* no edge priorities means that if our subcomponent can be empty, our min
+ * extent is effectively zero. */
+ComponentRepeat::ComponentRepeat(unique_ptr<Component> sub_comp_in, u32 min,
+ u32 max, enum RepeatType t)
+ : type(t), sub_comp(move(sub_comp_in)), m_min(min), m_max(max),
+ posFirst(GlushkovBuildState::POS_UNINITIALIZED),
+ posLast(GlushkovBuildState::POS_UNINITIALIZED) {
+ assert(sub_comp);
+ assert(max > 0);
+ assert(m_min <= m_max);
+
+ if (m_min > MAX_REPEAT) {
+ throw ParseError("Bounded repeat is too large.");
+ }
+ if (m_max != NoLimit && m_max > MAX_REPEAT) {
+ throw ParseError("Bounded repeat is too large.");
+ }
+}
+
+ComponentRepeat::~ComponentRepeat() {}
+
+ComponentRepeat *ComponentRepeat::clone() const {
+ return new ComponentRepeat(*this);
+}
+
+ComponentRepeat::ComponentRepeat(const ComponentRepeat &other)
+ : Component(other),
+ type(other.type), sub_comp(unique_ptr<Component>(other.sub_comp->clone())),
+ m_min(other.m_min), m_max(other.m_max),
+ m_firsts(other.m_firsts), m_lasts(other.m_lasts),
posFirst(other.posFirst), posLast(other.posLast) {}
-
-bool ComponentRepeat::empty() const {
- return m_min == 0 || sub_comp->empty();
-}
-
-bool ComponentRepeat::repeatable() const {
- return false;
-}
-
-static
-void addBase(Position base, vector<PositionInfo> &firsts,
- vector<PositionInfo> &lasts) {
- for (auto &e : firsts) {
- if (e.pos != GlushkovBuildState::POS_EPSILON) {
- e.pos += base;
- }
- }
- for (auto &e : lasts) {
- e.pos += base;
- }
-}
-
-static
-void checkPositions(vector<PositionInfo> &v, const GlushkovBuildState &bs) {
- const NFABuilder& builder = bs.getBuilder();
- for (const auto &e : v) {
- if (builder.isSpecialState(e.pos)) {
- throw ParseError("Embedded anchors not supported.");
- }
- }
-}
-
-void ComponentRepeat::notePositions(GlushkovBuildState &bs) {
- assert(m_max > 0);
- assert(m_max == NoLimit || m_max < MAX_REPEAT);
-
- /* Note: We can construct smaller subgraphs if we're not maintaining edge
- * priorities. */
-
- // We create one copy only through a recursive call to notePositions(),
- // first() and last(). Then we clone its positions and store the
- // appropriate firsts and lasts values for the copies.
- posFirst = bs.getBuilder().numVertices();
- sub_comp->notePositions(bs);
-
- u32 copies = m_max < NoLimit ? m_max : MAX(m_min, 1);
- DEBUG_PRINTF("building %u copies of repeated region\n", copies);
- m_firsts.clear();
- m_lasts.clear();
- m_firsts.resize(copies);
- m_lasts.resize(copies);
-
- m_firsts[0] = sub_comp->first();
- m_lasts[0] = sub_comp->last();
-
- postSubNotePositionHook();
-
- posLast = bs.getBuilder().numVertices() - 1;
- u32 vcount = posLast + 1 - posFirst;
-
- // If we're making more than one copy, then our firsts and lasts must only
- // contain vertices inside [posFirst, posLast]: anything else means we have
- // an embedded anchor or otherwise weird situation.
- if (copies > 1) {
- checkPositions(m_firsts[0], bs);
- checkPositions(m_lasts[0], bs);
- }
-
- // Avoid enormous expansions
- if (vcount * copies > MAX_POSITIONS_EXPANDED) {
- throw ParseError("Bounded repeat is too large.");
- }
-
- // Add positions for the rest of the copies
- size_t copyPositions = vcount * (copies - 1);
- bs.getBuilder().makePositions(copyPositions);
-
- // Calculate our firsts and lasts for the copies
- for (u32 i = 1; i < copies; ++i) {
- m_firsts[i] = m_firsts[0];
- m_lasts[i] = m_lasts[0];
- u32 base = i * vcount;
- addBase(base, m_firsts[i], m_lasts[i]);
- }
-
- recordPosBounds(posFirst, bs.getBuilder().numVertices());
+
+bool ComponentRepeat::empty() const {
+ return m_min == 0 || sub_comp->empty();
+}
+
+bool ComponentRepeat::repeatable() const {
+ return false;
+}
+
+static
+void addBase(Position base, vector<PositionInfo> &firsts,
+ vector<PositionInfo> &lasts) {
+ for (auto &e : firsts) {
+ if (e.pos != GlushkovBuildState::POS_EPSILON) {
+ e.pos += base;
+ }
+ }
+ for (auto &e : lasts) {
+ e.pos += base;
+ }
+}
+
+static
+void checkPositions(vector<PositionInfo> &v, const GlushkovBuildState &bs) {
+ const NFABuilder& builder = bs.getBuilder();
+ for (const auto &e : v) {
+ if (builder.isSpecialState(e.pos)) {
+ throw ParseError("Embedded anchors not supported.");
+ }
+ }
+}
+
+void ComponentRepeat::notePositions(GlushkovBuildState &bs) {
+ assert(m_max > 0);
+ assert(m_max == NoLimit || m_max < MAX_REPEAT);
+
+ /* Note: We can construct smaller subgraphs if we're not maintaining edge
+ * priorities. */
+
+ // We create one copy only through a recursive call to notePositions(),
+ // first() and last(). Then we clone its positions and store the
+ // appropriate firsts and lasts values for the copies.
+ posFirst = bs.getBuilder().numVertices();
+ sub_comp->notePositions(bs);
+
+ u32 copies = m_max < NoLimit ? m_max : MAX(m_min, 1);
+ DEBUG_PRINTF("building %u copies of repeated region\n", copies);
+ m_firsts.clear();
+ m_lasts.clear();
+ m_firsts.resize(copies);
+ m_lasts.resize(copies);
+
+ m_firsts[0] = sub_comp->first();
+ m_lasts[0] = sub_comp->last();
+
+ postSubNotePositionHook();
+
+ posLast = bs.getBuilder().numVertices() - 1;
+ u32 vcount = posLast + 1 - posFirst;
+
+ // If we're making more than one copy, then our firsts and lasts must only
+ // contain vertices inside [posFirst, posLast]: anything else means we have
+ // an embedded anchor or otherwise weird situation.
+ if (copies > 1) {
+ checkPositions(m_firsts[0], bs);
+ checkPositions(m_lasts[0], bs);
+ }
+
+ // Avoid enormous expansions
+ if (vcount * copies > MAX_POSITIONS_EXPANDED) {
+ throw ParseError("Bounded repeat is too large.");
+ }
+
+ // Add positions for the rest of the copies
+ size_t copyPositions = vcount * (copies - 1);
+ bs.getBuilder().makePositions(copyPositions);
+
+ // Calculate our firsts and lasts for the copies
+ for (u32 i = 1; i < copies; ++i) {
+ m_firsts[i] = m_firsts[0];
+ m_lasts[i] = m_lasts[0];
+ u32 base = i * vcount;
+ addBase(base, m_firsts[i], m_lasts[i]);
+ }
+
+ recordPosBounds(posFirst, bs.getBuilder().numVertices());
// Each optional repeat has an epsilon at the end of its firsts list.
for (u32 i = m_min; i < m_firsts.size(); i++) {
m_firsts[i].push_back(GlushkovBuildState::POS_EPSILON);
}
-}
-
-vector<PositionInfo> ComponentRepeat::first() const {
+}
+
+vector<PositionInfo> ComponentRepeat::first() const {
if (!m_max) {
return {};
}
@@ -192,177 +192,177 @@ vector<PositionInfo> ComponentRepeat::first() const {
DEBUG_PRINTF("firsts = %s\n",
dumpPositions(begin(firsts), end(firsts)).c_str());
return firsts;
-}
-
-void ComponentRepeat::buildFollowSet(GlushkovBuildState &bs,
- const vector<PositionInfo> &lastPos) {
- if (!m_max) {
- return;
- }
- DEBUG_PRINTF("enter\n");
-
- // Wire up the first (the "real") entry
-
- DEBUG_PRINTF("initial repeat\n");
- sub_comp->buildFollowSet(bs, lastPos);
-
- // Clone the subgraph we just added N times, where N is the minimum extent
- // of the graph minus one, wiring them up in a linear sequence
-
- u32 copies = m_firsts.size();
- DEBUG_PRINTF("cloning %u copies of repeat\n", copies - 1);
- for (u32 rep = 1; rep < copies; rep++) {
- u32 offset = (posLast + 1 - posFirst) * rep;
- if (offset > 0) {
- bs.cloneFollowSet(posFirst, posLast, offset);
- }
- }
-
+}
+
+void ComponentRepeat::buildFollowSet(GlushkovBuildState &bs,
+ const vector<PositionInfo> &lastPos) {
+ if (!m_max) {
+ return;
+ }
+ DEBUG_PRINTF("enter\n");
+
+ // Wire up the first (the "real") entry
+
+ DEBUG_PRINTF("initial repeat\n");
+ sub_comp->buildFollowSet(bs, lastPos);
+
+ // Clone the subgraph we just added N times, where N is the minimum extent
+ // of the graph minus one, wiring them up in a linear sequence
+
+ u32 copies = m_firsts.size();
+ DEBUG_PRINTF("cloning %u copies of repeat\n", copies - 1);
+ for (u32 rep = 1; rep < copies; rep++) {
+ u32 offset = (posLast + 1 - posFirst) * rep;
+ if (offset > 0) {
+ bs.cloneFollowSet(posFirst, posLast, offset);
+ }
+ }
+
wireRepeats(bs);
-
- DEBUG_PRINTF("leave\n");
-}
-
-void ComponentRepeat::optimise(bool connected_to_sds) {
- DEBUG_PRINTF("opt %d\n", (int)connected_to_sds);
- if (!connected_to_sds) {
- return;
- }
-
- DEBUG_PRINTF("setting m_max to %u\n", m_min);
- m_max = m_min;
-}
-
-bool ComponentRepeat::vacuous_everywhere() const {
+
+ DEBUG_PRINTF("leave\n");
+}
+
+void ComponentRepeat::optimise(bool connected_to_sds) {
+ DEBUG_PRINTF("opt %d\n", (int)connected_to_sds);
+ if (!connected_to_sds) {
+ return;
+ }
+
+ DEBUG_PRINTF("setting m_max to %u\n", m_min);
+ m_max = m_min;
+}
+
+bool ComponentRepeat::vacuous_everywhere() const {
return !m_min || sub_comp->vacuous_everywhere();
-}
-
-bool ComponentRepeat::checkEmbeddedStartAnchor(bool at_start) const {
- at_start = sub_comp->checkEmbeddedStartAnchor(at_start);
-
- if (m_max > 1) {
- at_start = sub_comp->checkEmbeddedStartAnchor(at_start);
- }
-
- return at_start;
-}
-
-bool ComponentRepeat::checkEmbeddedEndAnchor(bool at_end) const {
- at_end = sub_comp->checkEmbeddedEndAnchor(at_end);
-
- if (m_max > 1) {
- at_end = sub_comp->checkEmbeddedEndAnchor(at_end);
- }
-
- return at_end;
-}
-
-Component *ComponentRepeat::accept(ComponentVisitor &v) {
- Component *c = v.visit(this);
- if (c != this) {
- v.post(this);
- return c;
- }
-
- c = sub_comp->accept(v);
- if (c != sub_comp.get()) {
- sub_comp.reset(c);
- }
-
- v.post(this);
- return !sub_comp ? nullptr : this;
-}
-
-void ComponentRepeat::accept(ConstComponentVisitor &v) const {
- v.pre(*this);
- sub_comp->accept(v);
- v.post(*this);
-}
-
-vector<PositionInfo> ComponentRepeat::last() const {
- vector<PositionInfo> lasts;
- if (!m_max) {
- return lasts;
- }
-
- assert(!m_firsts.empty()); // notePositions should already have run
- assert(!m_lasts.empty());
-
+}
+
+bool ComponentRepeat::checkEmbeddedStartAnchor(bool at_start) const {
+ at_start = sub_comp->checkEmbeddedStartAnchor(at_start);
+
+ if (m_max > 1) {
+ at_start = sub_comp->checkEmbeddedStartAnchor(at_start);
+ }
+
+ return at_start;
+}
+
+bool ComponentRepeat::checkEmbeddedEndAnchor(bool at_end) const {
+ at_end = sub_comp->checkEmbeddedEndAnchor(at_end);
+
+ if (m_max > 1) {
+ at_end = sub_comp->checkEmbeddedEndAnchor(at_end);
+ }
+
+ return at_end;
+}
+
+Component *ComponentRepeat::accept(ComponentVisitor &v) {
+ Component *c = v.visit(this);
+ if (c != this) {
+ v.post(this);
+ return c;
+ }
+
+ c = sub_comp->accept(v);
+ if (c != sub_comp.get()) {
+ sub_comp.reset(c);
+ }
+
+ v.post(this);
+ return !sub_comp ? nullptr : this;
+}
+
+void ComponentRepeat::accept(ConstComponentVisitor &v) const {
+ v.pre(*this);
+ sub_comp->accept(v);
+ v.post(*this);
+}
+
+vector<PositionInfo> ComponentRepeat::last() const {
+ vector<PositionInfo> lasts;
+ if (!m_max) {
+ return lasts;
+ }
+
+ assert(!m_firsts.empty()); // notePositions should already have run
+ assert(!m_lasts.empty());
+
const auto &l = m_min ? m_lasts[m_min - 1] : m_lasts[0];
lasts.insert(lasts.end(), l.begin(), l.end());
- if (!m_min || m_min != m_lasts.size()) {
- lasts.insert(lasts.end(), m_lasts.back().begin(), m_lasts.back().end());
- }
+ if (!m_min || m_min != m_lasts.size()) {
+ lasts.insert(lasts.end(), m_lasts.back().begin(), m_lasts.back().end());
+ }
DEBUG_PRINTF("lasts = %s\n",
dumpPositions(lasts.begin(), lasts.end()).c_str());
- return lasts;
-}
-
+ return lasts;
+}
+
void ComponentRepeat::wireRepeats(GlushkovBuildState &bs) {
- /* note: m_lasts[0] already valid */
- u32 copies = m_firsts.size();
- const bool isEmpty = sub_comp->empty();
+ /* note: m_lasts[0] already valid */
+ u32 copies = m_firsts.size();
+ const bool isEmpty = sub_comp->empty();
const vector<PositionInfo> &optLasts =
m_min ? m_lasts[m_min - 1] : m_lasts[0];
-
- if (!copies) {
- goto inf_check;
- }
-
- DEBUG_PRINTF("wiring up %u mand repeats\n", m_min);
- for (u32 rep = 1; rep < m_min; rep++) {
- bs.connectRegions(m_lasts[rep - 1], m_firsts[rep]);
-
- if (isEmpty) {
- m_lasts[rep].insert(m_lasts[rep].end(), m_lasts[rep - 1].begin(),
- m_lasts[rep - 1].end());
- }
- }
-
- DEBUG_PRINTF("wiring up %d optional repeats\n", copies - m_min);
- for (u32 rep = MAX(m_min, 1); rep < copies; rep++) {
- vector<PositionInfo> lasts = m_lasts[rep - 1];
+
+ if (!copies) {
+ goto inf_check;
+ }
+
+ DEBUG_PRINTF("wiring up %u mand repeats\n", m_min);
+ for (u32 rep = 1; rep < m_min; rep++) {
+ bs.connectRegions(m_lasts[rep - 1], m_firsts[rep]);
+
+ if (isEmpty) {
+ m_lasts[rep].insert(m_lasts[rep].end(), m_lasts[rep - 1].begin(),
+ m_lasts[rep - 1].end());
+ }
+ }
+
+ DEBUG_PRINTF("wiring up %d optional repeats\n", copies - m_min);
+ for (u32 rep = MAX(m_min, 1); rep < copies; rep++) {
+ vector<PositionInfo> lasts = m_lasts[rep - 1];
if (rep != m_min) {
- lasts.insert(lasts.end(), optLasts.begin(), optLasts.end());
- sort(lasts.begin(), lasts.end());
- lasts.erase(unique(lasts.begin(), lasts.end()), lasts.end());
- }
- bs.connectRegions(lasts, m_firsts[rep]);
- }
-
-inf_check:
- // If we have no max bound, we need a self-loop as well.
- if (m_max == NoLimit) {
- DEBUG_PRINTF("final repeat self-loop\n");
- bs.connectRegions(m_lasts.back(), m_firsts.back());
- }
-}
-
-static
-bool hasPositionFlags(const Component &c) {
- for (const auto &e : c.first()) {
- if (e.flags) {
- return true;
- }
- }
- return false;
-}
-
-void ComponentRepeat::postSubNotePositionHook() {
- // UE-444 optimization: we can REWRITE m_min under various circumstances,
- // so that we create smaller NFA graphs. Note that this is _not_ possible
- // if our subcomponent contains a flagged position, e.g. nofloat.
- if (!hasPositionFlags(*sub_comp) && sub_comp->empty()) {
- m_min = 0;
- }
-}
-
-unique_ptr<ComponentRepeat> makeComponentRepeat(unique_ptr<Component> sub_comp,
- u32 min, u32 max,
- ComponentRepeat::RepeatType t) {
- return ue2::make_unique<ComponentRepeat>(move(sub_comp), min, max, t);
-}
-
-} // namespace ue2
+ lasts.insert(lasts.end(), optLasts.begin(), optLasts.end());
+ sort(lasts.begin(), lasts.end());
+ lasts.erase(unique(lasts.begin(), lasts.end()), lasts.end());
+ }
+ bs.connectRegions(lasts, m_firsts[rep]);
+ }
+
+inf_check:
+ // If we have no max bound, we need a self-loop as well.
+ if (m_max == NoLimit) {
+ DEBUG_PRINTF("final repeat self-loop\n");
+ bs.connectRegions(m_lasts.back(), m_firsts.back());
+ }
+}
+
+static
+bool hasPositionFlags(const Component &c) {
+ for (const auto &e : c.first()) {
+ if (e.flags) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void ComponentRepeat::postSubNotePositionHook() {
+ // UE-444 optimization: we can REWRITE m_min under various circumstances,
+ // so that we create smaller NFA graphs. Note that this is _not_ possible
+ // if our subcomponent contains a flagged position, e.g. nofloat.
+ if (!hasPositionFlags(*sub_comp) && sub_comp->empty()) {
+ m_min = 0;
+ }
+}
+
+unique_ptr<ComponentRepeat> makeComponentRepeat(unique_ptr<Component> sub_comp,
+ u32 min, u32 max,
+ ComponentRepeat::RepeatType t) {
+ return ue2::make_unique<ComponentRepeat>(move(sub_comp), min, max, t);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/ComponentRepeat.h b/contrib/libs/hyperscan/src/parser/ComponentRepeat.h
index 824a986386..8905bfcf5e 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentRepeat.h
+++ b/contrib/libs/hyperscan/src/parser/ComponentRepeat.h
@@ -1,65 +1,65 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Repeats ('*', '+', '?', '{M,N}', etc)
- */
-
-#ifndef RE_COMPONENTREPEAT_H
-#define RE_COMPONENTREPEAT_H
-
-#include "Component.h"
-#include "position.h"
-#include "ue2common.h"
-
-#include <memory>
-#include <utility>
-
-namespace ue2 {
-
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Repeats ('*', '+', '?', '{M,N}', etc)
+ */
+
+#ifndef RE_COMPONENTREPEAT_H
+#define RE_COMPONENTREPEAT_H
+
+#include "Component.h"
+#include "position.h"
+#include "ue2common.h"
+
+#include <memory>
+#include <utility>
+
+namespace ue2 {
+
/**
* \brief Encapsulates a repeat of a subexpression ('*', '+', '?', '{M,N}',
- * etc).
- *
+ * etc).
+ *
* ASCII Art Time:
- *
- * Our standard representation of standard repeats. Other constructions (fan-in
- * vs fan-out) would also be possible and equivalent for our purposes.
- *
- * {n,m}
- *
+ *
+ * Our standard representation of standard repeats. Other constructions (fan-in
+ * vs fan-out) would also be possible and equivalent for our purposes.
+ *
+ * {n,m}
+ *
* S->M->M->M->O->O->O->T
* | ^ ^ ^
* | | | |
* \-----------/
- *
- * {0,m}
- *
+ *
+ * {0,m}
+ *
* /-----------\
* | |
* | V
@@ -67,78 +67,78 @@ namespace ue2 {
* | ^ ^ ^
* | | | |
* \--------/
- *
- */
-class ComponentRepeat : public Component {
- friend class ConstructLiteralVisitor;
- friend class DumpVisitor;
- friend class PrintVisitor;
- friend class SimplifyVisitor;
-public:
- /** \brief Value representing no maximum bound. */
- static constexpr u32 NoLimit = 0xffffffff;
-
- /** \brief Type of this repeat, characterising its
- * greediness/possessiveness. */
- enum RepeatType {
- /** Minimising repeat, like 'a*?'. */
- REPEAT_NONGREEDY,
- /** Maximising repeat, like 'a*'. This is the default in PCRE. */
- REPEAT_GREEDY,
- /** Possessive, maximising repeat, like 'a*+'. Possessive repeats are
- * only currently supported in prefiltering mode, where we treat them
- * the same way we treat normal greedy repeats. */
- REPEAT_POSSESSIVE,
- };
-
- ComponentRepeat(std::unique_ptr<Component> sub_comp, u32 min, u32 max,
- RepeatType t);
- ~ComponentRepeat() override;
- ComponentRepeat *clone() const override;
- Component *accept(ComponentVisitor &v) override;
- void accept(ConstComponentVisitor &v) const override;
-
- std::vector<PositionInfo> first() const override;
- std::vector<PositionInfo> last() const override;
- bool empty() const override;
- bool repeatable() const override;
- bool vacuous_everywhere() const override;
- void notePositions(GlushkovBuildState &bs) override;
- void buildFollowSet(GlushkovBuildState &bs,
- const std::vector<PositionInfo> &lastPos) override;
- bool checkEmbeddedStartAnchor(bool at_start) const override;
- bool checkEmbeddedEndAnchor(bool at_end) const override;
-
- void optimise(bool connected_to_sds) override;
-
- virtual std::pair<u32, u32> getBounds() const {
- return std::make_pair(m_min, m_max);
- }
-
- /** \brief From declared behaviour (not taking into account the
- * sub-component). */
- enum RepeatType type;
-
-protected:
- void postSubNotePositionHook();
+ *
+ */
+class ComponentRepeat : public Component {
+ friend class ConstructLiteralVisitor;
+ friend class DumpVisitor;
+ friend class PrintVisitor;
+ friend class SimplifyVisitor;
+public:
+ /** \brief Value representing no maximum bound. */
+ static constexpr u32 NoLimit = 0xffffffff;
+
+ /** \brief Type of this repeat, characterising its
+ * greediness/possessiveness. */
+ enum RepeatType {
+ /** Minimising repeat, like 'a*?'. */
+ REPEAT_NONGREEDY,
+ /** Maximising repeat, like 'a*'. This is the default in PCRE. */
+ REPEAT_GREEDY,
+ /** Possessive, maximising repeat, like 'a*+'. Possessive repeats are
+ * only currently supported in prefiltering mode, where we treat them
+ * the same way we treat normal greedy repeats. */
+ REPEAT_POSSESSIVE,
+ };
+
+ ComponentRepeat(std::unique_ptr<Component> sub_comp, u32 min, u32 max,
+ RepeatType t);
+ ~ComponentRepeat() override;
+ ComponentRepeat *clone() const override;
+ Component *accept(ComponentVisitor &v) override;
+ void accept(ConstComponentVisitor &v) const override;
+
+ std::vector<PositionInfo> first() const override;
+ std::vector<PositionInfo> last() const override;
+ bool empty() const override;
+ bool repeatable() const override;
+ bool vacuous_everywhere() const override;
+ void notePositions(GlushkovBuildState &bs) override;
+ void buildFollowSet(GlushkovBuildState &bs,
+ const std::vector<PositionInfo> &lastPos) override;
+ bool checkEmbeddedStartAnchor(bool at_start) const override;
+ bool checkEmbeddedEndAnchor(bool at_end) const override;
+
+ void optimise(bool connected_to_sds) override;
+
+ virtual std::pair<u32, u32> getBounds() const {
+ return std::make_pair(m_min, m_max);
+ }
+
+ /** \brief From declared behaviour (not taking into account the
+ * sub-component). */
+ enum RepeatType type;
+
+protected:
+ void postSubNotePositionHook();
void wireRepeats(GlushkovBuildState &bs);
-
- std::unique_ptr<Component> sub_comp;
- u32 m_min;
- u32 m_max;
-
- std::vector<std::vector<PositionInfo> > m_firsts;
- std::vector<std::vector<PositionInfo> > m_lasts;
- Position posFirst;
- Position posLast;
-
- ComponentRepeat(const ComponentRepeat &other);
-};
-
-std::unique_ptr<ComponentRepeat>
-makeComponentRepeat(std::unique_ptr<Component> sub_comp, u32 min, u32 max,
- ComponentRepeat::RepeatType t);
-
-} // namespace ue2
-
-#endif // _RE_COMPONENTREPEAT_H_
+
+ std::unique_ptr<Component> sub_comp;
+ u32 m_min;
+ u32 m_max;
+
+ std::vector<std::vector<PositionInfo> > m_firsts;
+ std::vector<std::vector<PositionInfo> > m_lasts;
+ Position posFirst;
+ Position posLast;
+
+ ComponentRepeat(const ComponentRepeat &other);
+};
+
+std::unique_ptr<ComponentRepeat>
+makeComponentRepeat(std::unique_ptr<Component> sub_comp, u32 min, u32 max,
+ ComponentRepeat::RepeatType t);
+
+} // namespace ue2
+
+#endif // _RE_COMPONENTREPEAT_H_
diff --git a/contrib/libs/hyperscan/src/parser/ComponentSequence.cpp b/contrib/libs/hyperscan/src/parser/ComponentSequence.cpp
index fd198b222e..b0b5b13935 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentSequence.cpp
+++ b/contrib/libs/hyperscan/src/parser/ComponentSequence.cpp
@@ -1,376 +1,376 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Sequence of Component objects.
- */
-
-
-#include "ComponentSequence.h"
-
-#include "buildstate.h"
-#include "ComponentAlternation.h"
-#include "ComponentRepeat.h"
-#include "Parser.h"
-#include "ue2common.h"
-#include "parse_error.h"
-#include "position_dump.h"
-#include "position_info.h"
-#include "nfagraph/ng_builder.h"
-#include "util/container.h"
-#include "util/make_unique.h"
-
-#include <algorithm>
-#include <cassert>
-
-using namespace std;
-
-namespace ue2 {
-
-ComponentSequence::ComponentSequence() : capture_index(NOT_CAPTURED) {}
-
-ComponentSequence::~ComponentSequence() {}
-
-ComponentSequence::ComponentSequence(const ComponentSequence &other)
- : Component(other), capture_index(other.capture_index) {
- // Deep copy children.
- for (const auto &c : other.children) {
- assert(c);
- children.push_back(unique_ptr<Component>(c->clone()));
- }
- if (other.alternation) {
- const ComponentAlternation &c = *other.alternation;
- alternation.reset(c.clone());
- }
-}
-
-ComponentSequence *ComponentSequence::clone() const {
- return new ComponentSequence(*this);
-}
-
-Component *ComponentSequence::accept(ComponentVisitor &v) {
- assert(!alternation); // Sequence must be finalized first.
-
- Component *c = v.visit(this);
- if (c != this) {
- v.post(this);
- return c;
- }
-
- for (auto i = children.begin(), e = children.end(); i != e; ++i) {
- Component *child = i->get();
- c = (*i)->accept(v);
- if (c != child) {
- // Child has been replaced (new Component pointer) or we've been
- // instructed to delete it (null).
- i->reset(c);
- }
- }
-
- // Remove deleted children.
- children.erase(remove(children.begin(), children.end(), nullptr),
- children.end());
-
- v.post(this);
- return this;
-}
-
-void ComponentSequence::accept(ConstComponentVisitor &v) const {
- assert(!alternation); // Sequence must be finalized first.
-
- v.pre(*this);
-
- for (auto i = children.begin(), e = children.end(); i != e; ++i) {
- (*i)->accept(v);
-
- if (i + 1 != e) {
- v.during(*this);
- }
- }
-
- v.post(*this);
-}
-
-void ComponentSequence::addComponent(unique_ptr<Component> comp) {
- children.push_back(move(comp));
-}
-
-bool ComponentSequence::addRepeat(u32 min, u32 max,
- ComponentRepeat::RepeatType type) {
- if (children.empty() || min > max || max == 0) {
- return false;
- }
-
- // We can't apply a repeat to some types of component.
- assert(children.back());
- if (!children.back()->repeatable()) {
- return false;
- }
-
- children.back() = makeComponentRepeat(move(children.back()), min, max,
- type);
- assert(children.back());
- return true;
-}
-
-void ComponentSequence::addAlternation() {
- if (!alternation) {
- alternation = ue2::make_unique<ComponentAlternation>();
- }
-
- auto seq = ue2::make_unique<ComponentSequence>();
- seq->children.swap(children);
- alternation->append(move(seq));
-}
-
-void ComponentSequence::finalize() {
- if (alternation) {
- addAlternation();
- assert(children.empty());
- children.push_back(move(alternation));
- alternation = nullptr;
- }
-}
-
-vector<PositionInfo> ComponentSequence::first() const {
- vector<PositionInfo> firsts, subfirsts;
-
- for (const auto &c : children) {
- subfirsts = c->first();
- replaceEpsilons(firsts, subfirsts);
- if (!c->empty()) {
- break;
- }
- }
-
- if (firsts.empty()) {
- DEBUG_PRINTF("trivial empty sequence %zu\n", firsts.size());
- assert(children.empty());
- firsts.push_back(GlushkovBuildState::POS_EPSILON);
- }
-
- DEBUG_PRINTF("%zu firsts\n", firsts.size());
- return firsts;
-}
-
-namespace {
-struct eps_info {
- eps_info() : flags(0U) {}
- u32 flags;
-};
-}
-
-static
-void epsilonVisit(vector<eps_info> *info, const vector<PositionInfo> &f) {
- vector<eps_info> out;
- out.reserve(info->size());
-
- set<u32> seen_flags;
-
- assert(!info->empty());
- for (auto eps = find(f.begin(), f.end(), GlushkovBuildState::POS_EPSILON);
- eps != f.end();
- eps = find(eps + 1, f.end(), GlushkovBuildState::POS_EPSILON)) {
- for (auto it = info->begin(); it != info->end(); ++it) {
- u32 flags = it->flags | eps->flags;
- if (contains(seen_flags, flags)) {
- continue;
- }
-
- out.push_back(*it);
- out.back().flags = flags;
- seen_flags.insert(flags);
- }
- }
-
- info->swap(out);
- assert(!info->empty());
-}
-
-static
-void applyEpsilonVisits(vector<PositionInfo> &lasts,
- const vector<eps_info> &eps_visits) {
- vector<PositionInfo> out;
- out.reserve(lasts.size() * eps_visits.size());
-
- for (const auto &last : lasts) {
- for (const auto &e : eps_visits) {
- out.push_back(last);
- out.back().flags |= e.flags;
- }
- }
-
- cleanupPositions(out);
- lasts.swap(out);
-}
-
-vector<PositionInfo> ComponentSequence::last() const {
- vector<PositionInfo> lasts, sublasts;
- vector<eps_info> visits(1);
-
- auto i = children.rbegin(), e = children.rend();
- for (; i != e; ++i) {
- sublasts = (*i)->last();
- applyEpsilonVisits(sublasts, visits);
- lasts.insert(lasts.end(), sublasts.begin(), sublasts.end());
- if ((*i)->empty()) {
- // this epsilon's flags should propagate to subsequent lasts'
- // enter/exit lists
- epsilonVisit(&visits, (*i)->first());
- } else {
- break;
- }
- }
-
- DEBUG_PRINTF("lasts = %s\n",
- dumpPositions(lasts.begin(), lasts.end()).c_str());
- return lasts;
-}
-
-bool ComponentSequence::empty(void) const {
- // a sequence can be empty if all its subcomponents can be empty
- for (const auto &c : children) {
- if (!c->empty()) {
- return false;
- }
- }
- return true;
-}
-
-void ComponentSequence::notePositions(GlushkovBuildState &bs) {
- u32 pb = bs.getBuilder().numVertices();
- for (auto &c : children) {
- c->notePositions(bs);
- }
- recordPosBounds(pb, bs.getBuilder().numVertices());
-}
-
-void ComponentSequence::buildFollowSet(GlushkovBuildState &bs,
- const vector<PositionInfo> &lastPos) {
- DEBUG_PRINTF("sequence of %zu components\n", children.size());
-
- // If no components, no work to do.
- if (children.empty()) {
- return;
- }
-
- // First element
- children.front()->buildFollowSet(bs, lastPos);
- if (children.size() == 1) {
- // If our sequence contains precisely one component, then we've done
- // all our work. Hooking up its firsts and lasts will be done by our
- // parent component.
- return;
- }
-
- // Remaining elements, wiring last to first in sequence.
-
- vector<PositionInfo> prevLasts = children.front()->last();
-
- for (auto it = next(children.begin()), ite = children.end(); it != ite; ++it) {
- assert(*it);
- Component &c = *(*it);
-
- // Build subcomponent follow set
- c.buildFollowSet(bs, prevLasts);
-
- // FIRST(curr)
- vector<PositionInfo> currFirsts(c.first());
-
- // LAST(prev) => FIRST(curr)
- DEBUG_PRINTF("connecting lasts (|| %zu) to firsts of comp %zd\n",
- prevLasts.size(), it - children.begin());
- bs.connectRegions(prevLasts, currFirsts);
-
- // Generate a new LAST(prev) for the next iteration; either c->last()
- // on its own if it can't be empty or c->last unioned with the previous
- // last if c can be empty
- vector<PositionInfo> currLasts(c.last());
-
- if (!c.empty()) {
- // Current component can't be empty, so use its lasts only
- prevLasts.swap(currLasts);
- DEBUG_PRINTF("swapped lasts\n");
- } else {
- // Add current lasts to previous lasts
- DEBUG_PRINTF("doing stuff for empty comp\n");
- prevLasts.insert(prevLasts.end(), currLasts.begin(), currLasts.end());
- DEBUG_PRINTF("done stuff for empty comp\n");
- }
- }
-}
-
-bool ComponentSequence::checkEmbeddedStartAnchor(bool at_start) const {
- for (const auto &c : children) {
- at_start = c->checkEmbeddedStartAnchor(at_start);
- }
-
- return at_start;
-}
-
-bool ComponentSequence::checkEmbeddedEndAnchor(bool at_end) const {
- // Note reversed ordering.
- for (auto i = children.rbegin(), e = children.rend(); i != e; ++i) {
- at_end = (*i)->checkEmbeddedEndAnchor(at_end);
- }
-
- return at_end;
-}
-
-bool ComponentSequence::vacuous_everywhere() const {
- for (const auto &c : children) {
- if (!c->vacuous_everywhere()) {
- return false;
- }
- }
- return true;
-}
-
-void ComponentSequence::optimise(bool connected_to_sds) {
- DEBUG_PRINTF("opt %d\n", (int)connected_to_sds);
- for (u32 i = 0; i < children.size();) {
- DEBUG_PRINTF("opt %u: ctsds: %d\n", i, (int)connected_to_sds);
- Component &sub = *children[i];
-
- sub.optimise(connected_to_sds);
-
- bool vacuous = sub.vacuous_everywhere();
-
- if (connected_to_sds && vacuous) {
- DEBUG_PRINTF("delete opt %u\n", i);
- auto it = children.begin() + i;
- children.erase(it);
- continue;
- }
-
- connected_to_sds = connected_to_sds && vacuous;
- i++;
- }
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Sequence of Component objects.
+ */
+
+
+#include "ComponentSequence.h"
+
+#include "buildstate.h"
+#include "ComponentAlternation.h"
+#include "ComponentRepeat.h"
+#include "Parser.h"
+#include "ue2common.h"
+#include "parse_error.h"
+#include "position_dump.h"
+#include "position_info.h"
+#include "nfagraph/ng_builder.h"
+#include "util/container.h"
+#include "util/make_unique.h"
+
+#include <algorithm>
+#include <cassert>
+
+using namespace std;
+
+namespace ue2 {
+
+ComponentSequence::ComponentSequence() : capture_index(NOT_CAPTURED) {}
+
+ComponentSequence::~ComponentSequence() {}
+
+ComponentSequence::ComponentSequence(const ComponentSequence &other)
+ : Component(other), capture_index(other.capture_index) {
+ // Deep copy children.
+ for (const auto &c : other.children) {
+ assert(c);
+ children.push_back(unique_ptr<Component>(c->clone()));
+ }
+ if (other.alternation) {
+ const ComponentAlternation &c = *other.alternation;
+ alternation.reset(c.clone());
+ }
+}
+
+ComponentSequence *ComponentSequence::clone() const {
+ return new ComponentSequence(*this);
+}
+
+Component *ComponentSequence::accept(ComponentVisitor &v) {
+ assert(!alternation); // Sequence must be finalized first.
+
+ Component *c = v.visit(this);
+ if (c != this) {
+ v.post(this);
+ return c;
+ }
+
+ for (auto i = children.begin(), e = children.end(); i != e; ++i) {
+ Component *child = i->get();
+ c = (*i)->accept(v);
+ if (c != child) {
+ // Child has been replaced (new Component pointer) or we've been
+ // instructed to delete it (null).
+ i->reset(c);
+ }
+ }
+
+ // Remove deleted children.
+ children.erase(remove(children.begin(), children.end(), nullptr),
+ children.end());
+
+ v.post(this);
+ return this;
+}
+
+void ComponentSequence::accept(ConstComponentVisitor &v) const {
+ assert(!alternation); // Sequence must be finalized first.
+
+ v.pre(*this);
+
+ for (auto i = children.begin(), e = children.end(); i != e; ++i) {
+ (*i)->accept(v);
+
+ if (i + 1 != e) {
+ v.during(*this);
+ }
+ }
+
+ v.post(*this);
+}
+
+void ComponentSequence::addComponent(unique_ptr<Component> comp) {
+ children.push_back(move(comp));
+}
+
+bool ComponentSequence::addRepeat(u32 min, u32 max,
+ ComponentRepeat::RepeatType type) {
+ if (children.empty() || min > max || max == 0) {
+ return false;
+ }
+
+ // We can't apply a repeat to some types of component.
+ assert(children.back());
+ if (!children.back()->repeatable()) {
+ return false;
+ }
+
+ children.back() = makeComponentRepeat(move(children.back()), min, max,
+ type);
+ assert(children.back());
+ return true;
+}
+
+void ComponentSequence::addAlternation() {
+ if (!alternation) {
+ alternation = ue2::make_unique<ComponentAlternation>();
+ }
+
+ auto seq = ue2::make_unique<ComponentSequence>();
+ seq->children.swap(children);
+ alternation->append(move(seq));
+}
+
+void ComponentSequence::finalize() {
+ if (alternation) {
+ addAlternation();
+ assert(children.empty());
+ children.push_back(move(alternation));
+ alternation = nullptr;
+ }
+}
+
+vector<PositionInfo> ComponentSequence::first() const {
+ vector<PositionInfo> firsts, subfirsts;
+
+ for (const auto &c : children) {
+ subfirsts = c->first();
+ replaceEpsilons(firsts, subfirsts);
+ if (!c->empty()) {
+ break;
+ }
+ }
+
+ if (firsts.empty()) {
+ DEBUG_PRINTF("trivial empty sequence %zu\n", firsts.size());
+ assert(children.empty());
+ firsts.push_back(GlushkovBuildState::POS_EPSILON);
+ }
+
+ DEBUG_PRINTF("%zu firsts\n", firsts.size());
+ return firsts;
+}
+
+namespace {
+struct eps_info {
+ eps_info() : flags(0U) {}
+ u32 flags;
+};
+}
+
+static
+void epsilonVisit(vector<eps_info> *info, const vector<PositionInfo> &f) {
+ vector<eps_info> out;
+ out.reserve(info->size());
+
+ set<u32> seen_flags;
+
+ assert(!info->empty());
+ for (auto eps = find(f.begin(), f.end(), GlushkovBuildState::POS_EPSILON);
+ eps != f.end();
+ eps = find(eps + 1, f.end(), GlushkovBuildState::POS_EPSILON)) {
+ for (auto it = info->begin(); it != info->end(); ++it) {
+ u32 flags = it->flags | eps->flags;
+ if (contains(seen_flags, flags)) {
+ continue;
+ }
+
+ out.push_back(*it);
+ out.back().flags = flags;
+ seen_flags.insert(flags);
+ }
+ }
+
+ info->swap(out);
+ assert(!info->empty());
+}
+
+static
+void applyEpsilonVisits(vector<PositionInfo> &lasts,
+ const vector<eps_info> &eps_visits) {
+ vector<PositionInfo> out;
+ out.reserve(lasts.size() * eps_visits.size());
+
+ for (const auto &last : lasts) {
+ for (const auto &e : eps_visits) {
+ out.push_back(last);
+ out.back().flags |= e.flags;
+ }
+ }
+
+ cleanupPositions(out);
+ lasts.swap(out);
+}
+
+vector<PositionInfo> ComponentSequence::last() const {
+ vector<PositionInfo> lasts, sublasts;
+ vector<eps_info> visits(1);
+
+ auto i = children.rbegin(), e = children.rend();
+ for (; i != e; ++i) {
+ sublasts = (*i)->last();
+ applyEpsilonVisits(sublasts, visits);
+ lasts.insert(lasts.end(), sublasts.begin(), sublasts.end());
+ if ((*i)->empty()) {
+ // this epsilon's flags should propagate to subsequent lasts'
+ // enter/exit lists
+ epsilonVisit(&visits, (*i)->first());
+ } else {
+ break;
+ }
+ }
+
+ DEBUG_PRINTF("lasts = %s\n",
+ dumpPositions(lasts.begin(), lasts.end()).c_str());
+ return lasts;
+}
+
+bool ComponentSequence::empty(void) const {
+ // a sequence can be empty if all its subcomponents can be empty
+ for (const auto &c : children) {
+ if (!c->empty()) {
+ return false;
+ }
+ }
+ return true;
+}
+
+void ComponentSequence::notePositions(GlushkovBuildState &bs) {
+ u32 pb = bs.getBuilder().numVertices();
+ for (auto &c : children) {
+ c->notePositions(bs);
+ }
+ recordPosBounds(pb, bs.getBuilder().numVertices());
+}
+
+void ComponentSequence::buildFollowSet(GlushkovBuildState &bs,
+ const vector<PositionInfo> &lastPos) {
+ DEBUG_PRINTF("sequence of %zu components\n", children.size());
+
+ // If no components, no work to do.
+ if (children.empty()) {
+ return;
+ }
+
+ // First element
+ children.front()->buildFollowSet(bs, lastPos);
+ if (children.size() == 1) {
+ // If our sequence contains precisely one component, then we've done
+ // all our work. Hooking up its firsts and lasts will be done by our
+ // parent component.
+ return;
+ }
+
+ // Remaining elements, wiring last to first in sequence.
+
+ vector<PositionInfo> prevLasts = children.front()->last();
+
+ for (auto it = next(children.begin()), ite = children.end(); it != ite; ++it) {
+ assert(*it);
+ Component &c = *(*it);
+
+ // Build subcomponent follow set
+ c.buildFollowSet(bs, prevLasts);
+
+ // FIRST(curr)
+ vector<PositionInfo> currFirsts(c.first());
+
+ // LAST(prev) => FIRST(curr)
+ DEBUG_PRINTF("connecting lasts (|| %zu) to firsts of comp %zd\n",
+ prevLasts.size(), it - children.begin());
+ bs.connectRegions(prevLasts, currFirsts);
+
+ // Generate a new LAST(prev) for the next iteration; either c->last()
+ // on its own if it can't be empty or c->last unioned with the previous
+ // last if c can be empty
+ vector<PositionInfo> currLasts(c.last());
+
+ if (!c.empty()) {
+ // Current component can't be empty, so use its lasts only
+ prevLasts.swap(currLasts);
+ DEBUG_PRINTF("swapped lasts\n");
+ } else {
+ // Add current lasts to previous lasts
+ DEBUG_PRINTF("doing stuff for empty comp\n");
+ prevLasts.insert(prevLasts.end(), currLasts.begin(), currLasts.end());
+ DEBUG_PRINTF("done stuff for empty comp\n");
+ }
+ }
+}
+
+bool ComponentSequence::checkEmbeddedStartAnchor(bool at_start) const {
+ for (const auto &c : children) {
+ at_start = c->checkEmbeddedStartAnchor(at_start);
+ }
+
+ return at_start;
+}
+
+bool ComponentSequence::checkEmbeddedEndAnchor(bool at_end) const {
+ // Note reversed ordering.
+ for (auto i = children.rbegin(), e = children.rend(); i != e; ++i) {
+ at_end = (*i)->checkEmbeddedEndAnchor(at_end);
+ }
+
+ return at_end;
+}
+
+bool ComponentSequence::vacuous_everywhere() const {
+ for (const auto &c : children) {
+ if (!c->vacuous_everywhere()) {
+ return false;
+ }
+ }
+ return true;
+}
+
+void ComponentSequence::optimise(bool connected_to_sds) {
+ DEBUG_PRINTF("opt %d\n", (int)connected_to_sds);
+ for (u32 i = 0; i < children.size();) {
+ DEBUG_PRINTF("opt %u: ctsds: %d\n", i, (int)connected_to_sds);
+ Component &sub = *children[i];
+
+ sub.optimise(connected_to_sds);
+
+ bool vacuous = sub.vacuous_everywhere();
+
+ if (connected_to_sds && vacuous) {
+ DEBUG_PRINTF("delete opt %u\n", i);
+ auto it = children.begin() + i;
+ children.erase(it);
+ continue;
+ }
+
+ connected_to_sds = connected_to_sds && vacuous;
+ i++;
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/ComponentSequence.h b/contrib/libs/hyperscan/src/parser/ComponentSequence.h
index 12c35f607a..08e57d0a3d 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentSequence.h
+++ b/contrib/libs/hyperscan/src/parser/ComponentSequence.h
@@ -1,108 +1,108 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Sequence of Component objects.
- */
-
-#ifndef COMPONENT_SEQUENCE_H
-#define COMPONENT_SEQUENCE_H
-
-#include "Component.h"
-#include "ComponentRepeat.h" // for ComponentRepeat::RepeatType
-#include "ue2common.h"
-
-#include <memory>
-#include <set>
-#include <vector>
-
-namespace ue2 {
-
-class ComponentAlternation;
-class GlushkovBuildState;
-
-// Encapsulates a number of sub expressions to be applied sequentially
-class ComponentSequence : public Component {
- friend class DumpVisitor;
- friend class PrintVisitor;
- friend class SimplifyVisitor;
-public:
- /** \brief capture index representing a sequence that ISN'T capturing */
- static constexpr unsigned int NOT_CAPTURED = 65536;
-
- ComponentSequence();
- ~ComponentSequence() override;
- ComponentSequence *clone() const override;
- Component *accept(ComponentVisitor &v) override;
- void accept(ConstComponentVisitor &v) const override;
-
- bool addRepeat(u32 min, u32 max, ComponentRepeat::RepeatType type);
-
- // overridden by ComponentCondReference, which can only have 1 or 2
- // branches.
- virtual void addAlternation();
-
- virtual void finalize();
-
- void addComponent(std::unique_ptr<Component> comp);
-
- std::vector<PositionInfo> first() const override;
- std::vector<PositionInfo> last() const override;
- bool empty(void) const override;
- bool vacuous_everywhere() const override;
- void notePositions(GlushkovBuildState &bs) override;
- void buildFollowSet(GlushkovBuildState &bs,
- const std::vector<PositionInfo> &lastPos) override;
- bool checkEmbeddedStartAnchor(bool at_start) const override;
- bool checkEmbeddedEndAnchor(bool at_end) const override;
-
- void optimise(bool connected_to_sds) override;
-
- void setCaptureIndex(unsigned int idx) { capture_index = idx; }
- unsigned int getCaptureIndex() const { return capture_index; }
- void setCaptureName(const std::string &s) { capture_name = s; }
- const std::string &getCaptureName() const { return capture_name; }
-
- virtual const std::vector<std::unique_ptr<Component>> &getChildren() const {
- return children;
- }
-
-protected:
- ComponentSequence(const ComponentSequence &other);
-
- std::vector<std::unique_ptr<Component>> children;
- std::unique_ptr<ComponentAlternation> alternation;
-
-private:
- unsigned int capture_index;
- std::string capture_name; //!< empty means no name
-};
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Sequence of Component objects.
+ */
+
+#ifndef COMPONENT_SEQUENCE_H
+#define COMPONENT_SEQUENCE_H
+
+#include "Component.h"
+#include "ComponentRepeat.h" // for ComponentRepeat::RepeatType
+#include "ue2common.h"
+
+#include <memory>
+#include <set>
+#include <vector>
+
+namespace ue2 {
+
+class ComponentAlternation;
+class GlushkovBuildState;
+
+// Encapsulates a number of sub expressions to be applied sequentially
+class ComponentSequence : public Component {
+ friend class DumpVisitor;
+ friend class PrintVisitor;
+ friend class SimplifyVisitor;
+public:
+ /** \brief capture index representing a sequence that ISN'T capturing */
+ static constexpr unsigned int NOT_CAPTURED = 65536;
+
+ ComponentSequence();
+ ~ComponentSequence() override;
+ ComponentSequence *clone() const override;
+ Component *accept(ComponentVisitor &v) override;
+ void accept(ConstComponentVisitor &v) const override;
+
+ bool addRepeat(u32 min, u32 max, ComponentRepeat::RepeatType type);
+
+ // overridden by ComponentCondReference, which can only have 1 or 2
+ // branches.
+ virtual void addAlternation();
+
+ virtual void finalize();
+
+ void addComponent(std::unique_ptr<Component> comp);
+
+ std::vector<PositionInfo> first() const override;
+ std::vector<PositionInfo> last() const override;
+ bool empty(void) const override;
+ bool vacuous_everywhere() const override;
+ void notePositions(GlushkovBuildState &bs) override;
+ void buildFollowSet(GlushkovBuildState &bs,
+ const std::vector<PositionInfo> &lastPos) override;
+ bool checkEmbeddedStartAnchor(bool at_start) const override;
+ bool checkEmbeddedEndAnchor(bool at_end) const override;
+
+ void optimise(bool connected_to_sds) override;
+
+ void setCaptureIndex(unsigned int idx) { capture_index = idx; }
+ unsigned int getCaptureIndex() const { return capture_index; }
+ void setCaptureName(const std::string &s) { capture_name = s; }
+ const std::string &getCaptureName() const { return capture_name; }
+
+ virtual const std::vector<std::unique_ptr<Component>> &getChildren() const {
+ return children;
+ }
+
+protected:
+ ComponentSequence(const ComponentSequence &other);
+
+ std::vector<std::unique_ptr<Component>> children;
+ std::unique_ptr<ComponentAlternation> alternation;
+
+private:
+ unsigned int capture_index;
+ std::string capture_name; //!< empty means no name
+};
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/parser/ComponentVisitor.cpp b/contrib/libs/hyperscan/src/parser/ComponentVisitor.cpp
index d1a82af22a..b7bbba23ee 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentVisitor.cpp
+++ b/contrib/libs/hyperscan/src/parser/ComponentVisitor.cpp
@@ -1,76 +1,76 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "AsciiComponentClass.h"
-#include "ComponentVisitor.h"
-#include "ComponentAlternation.h"
-#include "ComponentAssertion.h"
-#include "ComponentAtomicGroup.h"
-#include "ComponentBackReference.h"
-#include "ComponentBoundary.h"
-#include "ComponentByte.h"
-#include "ComponentCondReference.h"
-#include "ComponentClass.h"
-#include "ComponentEmpty.h"
-#include "ComponentEUS.h"
-#include "ComponentRepeat.h"
-#include "ComponentSequence.h"
-#include "ComponentWordBoundary.h"
-#include "Utf8ComponentClass.h"
-
-namespace ue2 {
-
-ComponentVisitor::~ComponentVisitor() {
- // empty
-}
-
-// Default implementations.
-
-DefaultComponentVisitor::DefaultComponentVisitor() {}
-DefaultComponentVisitor::~DefaultComponentVisitor() {}
-
-#define DEFAULT_FUNCS(comp) \
- Component *DefaultComponentVisitor::visit(comp *c) { return c; } \
- void DefaultComponentVisitor::post(comp *) {}
-
-DEFAULT_FUNCS(AsciiComponentClass)
-DEFAULT_FUNCS(ComponentAlternation)
-DEFAULT_FUNCS(ComponentAssertion)
-DEFAULT_FUNCS(ComponentAtomicGroup)
-DEFAULT_FUNCS(ComponentBackReference)
-DEFAULT_FUNCS(ComponentBoundary)
-DEFAULT_FUNCS(ComponentByte)
-DEFAULT_FUNCS(ComponentCondReference)
-DEFAULT_FUNCS(ComponentEmpty)
-DEFAULT_FUNCS(ComponentEUS)
-DEFAULT_FUNCS(ComponentRepeat)
-DEFAULT_FUNCS(ComponentSequence)
-DEFAULT_FUNCS(ComponentWordBoundary)
-DEFAULT_FUNCS(UTF8ComponentClass)
-
-} // namespace
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "AsciiComponentClass.h"
+#include "ComponentVisitor.h"
+#include "ComponentAlternation.h"
+#include "ComponentAssertion.h"
+#include "ComponentAtomicGroup.h"
+#include "ComponentBackReference.h"
+#include "ComponentBoundary.h"
+#include "ComponentByte.h"
+#include "ComponentCondReference.h"
+#include "ComponentClass.h"
+#include "ComponentEmpty.h"
+#include "ComponentEUS.h"
+#include "ComponentRepeat.h"
+#include "ComponentSequence.h"
+#include "ComponentWordBoundary.h"
+#include "Utf8ComponentClass.h"
+
+namespace ue2 {
+
+ComponentVisitor::~ComponentVisitor() {
+ // empty
+}
+
+// Default implementations.
+
+DefaultComponentVisitor::DefaultComponentVisitor() {}
+DefaultComponentVisitor::~DefaultComponentVisitor() {}
+
+#define DEFAULT_FUNCS(comp) \
+ Component *DefaultComponentVisitor::visit(comp *c) { return c; } \
+ void DefaultComponentVisitor::post(comp *) {}
+
+DEFAULT_FUNCS(AsciiComponentClass)
+DEFAULT_FUNCS(ComponentAlternation)
+DEFAULT_FUNCS(ComponentAssertion)
+DEFAULT_FUNCS(ComponentAtomicGroup)
+DEFAULT_FUNCS(ComponentBackReference)
+DEFAULT_FUNCS(ComponentBoundary)
+DEFAULT_FUNCS(ComponentByte)
+DEFAULT_FUNCS(ComponentCondReference)
+DEFAULT_FUNCS(ComponentEmpty)
+DEFAULT_FUNCS(ComponentEUS)
+DEFAULT_FUNCS(ComponentRepeat)
+DEFAULT_FUNCS(ComponentSequence)
+DEFAULT_FUNCS(ComponentWordBoundary)
+DEFAULT_FUNCS(UTF8ComponentClass)
+
+} // namespace
diff --git a/contrib/libs/hyperscan/src/parser/ComponentVisitor.h b/contrib/libs/hyperscan/src/parser/ComponentVisitor.h
index e906c7fc4d..be28d33610 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentVisitor.h
+++ b/contrib/libs/hyperscan/src/parser/ComponentVisitor.h
@@ -1,150 +1,150 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Visitor base class for working with the component tree.
- */
-
-#ifndef COMPONENTVISITOR_H
-#define COMPONENTVISITOR_H
-
-namespace ue2 {
-
-class AsciiComponentClass;
-class Component;
-class ComponentAlternation;
-class ComponentAssertion;
-class ComponentAtomicGroup;
-class ComponentBackReference;
-class ComponentBoundary;
-class ComponentByte;
-class ComponentClass;
-class ComponentCondReference;
-class ComponentEmpty;
-class ComponentEUS;
-class ComponentRepeat;
-class ComponentSequence;
-class ComponentWordBoundary;
-class UTF8ComponentClass;
-
-/**
- * \brief Visitor base class for working with the component tree.
- *
- * Our approach to implementing the visitor pattern for traversing (and
- * optionally mutating) the Component tree for a pattern. Each _visit_ function
- * takes a Component subclass pointer in and returns a Component pointer. That
- * pointer can have several values, dictating what the containing Component
- * should do:
- *
- * 1. If ptr == c, then do nothing.
- * 2. If ptr == nullptr, then remove c from the tree.
- * 3. If ptr != c && ptr != nullptr, then replace c with ptr.
- *
- * Traversal order is pre-order.
- *
- * After a Component's subcomponents have been visited, the _post_ function for
- * that Component will be called.
- */
-class ComponentVisitor {
-public:
- virtual ~ComponentVisitor();
-
- virtual Component *visit(AsciiComponentClass *c) = 0;
- virtual Component *visit(ComponentAlternation *c) = 0;
- virtual Component *visit(ComponentAssertion *c) = 0;
- virtual Component *visit(ComponentAtomicGroup *c) = 0;
- virtual Component *visit(ComponentBackReference *c) = 0;
- virtual Component *visit(ComponentBoundary *c) = 0;
- virtual Component *visit(ComponentByte *c) = 0;
- virtual Component *visit(ComponentCondReference *c) = 0;
- virtual Component *visit(ComponentEmpty *c) = 0;
- virtual Component *visit(ComponentEUS *c) = 0;
- virtual Component *visit(ComponentRepeat *c) = 0;
- virtual Component *visit(ComponentSequence *c) = 0;
- virtual Component *visit(ComponentWordBoundary *c) = 0;
- virtual Component *visit(UTF8ComponentClass *c) = 0;
-
- virtual void post(AsciiComponentClass *c) = 0;
- virtual void post(ComponentAlternation *c) = 0;
- virtual void post(ComponentAssertion *c) = 0;
- virtual void post(ComponentAtomicGroup *c) = 0;
- virtual void post(ComponentBackReference *c) = 0;
- virtual void post(ComponentBoundary *c) = 0;
- virtual void post(ComponentByte *c) = 0;
- virtual void post(ComponentCondReference *c) = 0;
- virtual void post(ComponentEmpty *c) = 0;
- virtual void post(ComponentEUS *c) = 0;
- virtual void post(ComponentRepeat *c) = 0;
- virtual void post(ComponentSequence *c) = 0;
- virtual void post(ComponentWordBoundary *c) = 0;
- virtual void post(UTF8ComponentClass *c) = 0;
-};
-
-/**
- * \brief Concrete subclass of ComponentVisitor with default behaviour,
- * allowing you to just implement the member functions you need.
- */
-class DefaultComponentVisitor : public ComponentVisitor {
-public:
- DefaultComponentVisitor();
- ~DefaultComponentVisitor() override;
-
- Component *visit(AsciiComponentClass *c) override;
- Component *visit(ComponentAlternation *c) override;
- Component *visit(ComponentAssertion *c) override;
- Component *visit(ComponentAtomicGroup *c) override;
- Component *visit(ComponentBackReference *c) override;
- Component *visit(ComponentBoundary *c) override;
- Component *visit(ComponentByte *c) override;
- Component *visit(ComponentCondReference *c) override;
- Component *visit(ComponentEmpty *c) override;
- Component *visit(ComponentEUS *c) override;
- Component *visit(ComponentRepeat *c) override;
- Component *visit(ComponentSequence *c) override;
- Component *visit(ComponentWordBoundary *c) override;
- Component *visit(UTF8ComponentClass *c) override;
-
- void post(AsciiComponentClass *c) override;
- void post(ComponentAlternation *c) override;
- void post(ComponentAssertion *c) override;
- void post(ComponentAtomicGroup *c) override;
- void post(ComponentBackReference *c) override;
- void post(ComponentBoundary *c) override;
- void post(ComponentByte *c) override;
- void post(ComponentCondReference *c) override;
- void post(ComponentEmpty *c) override;
- void post(ComponentEUS *c) override;
- void post(ComponentRepeat *c) override;
- void post(ComponentSequence *c) override;
- void post(ComponentWordBoundary *c) override;
- void post(UTF8ComponentClass *c) override;
-};
-
-} // namespace ue2
-
-#endif // COMPONENTVISITOR_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Visitor base class for working with the component tree.
+ */
+
+#ifndef COMPONENTVISITOR_H
+#define COMPONENTVISITOR_H
+
+namespace ue2 {
+
+class AsciiComponentClass;
+class Component;
+class ComponentAlternation;
+class ComponentAssertion;
+class ComponentAtomicGroup;
+class ComponentBackReference;
+class ComponentBoundary;
+class ComponentByte;
+class ComponentClass;
+class ComponentCondReference;
+class ComponentEmpty;
+class ComponentEUS;
+class ComponentRepeat;
+class ComponentSequence;
+class ComponentWordBoundary;
+class UTF8ComponentClass;
+
+/**
+ * \brief Visitor base class for working with the component tree.
+ *
+ * Our approach to implementing the visitor pattern for traversing (and
+ * optionally mutating) the Component tree for a pattern. Each _visit_ function
+ * takes a Component subclass pointer in and returns a Component pointer. That
+ * pointer can have several values, dictating what the containing Component
+ * should do:
+ *
+ * 1. If ptr == c, then do nothing.
+ * 2. If ptr == nullptr, then remove c from the tree.
+ * 3. If ptr != c && ptr != nullptr, then replace c with ptr.
+ *
+ * Traversal order is pre-order.
+ *
+ * After a Component's subcomponents have been visited, the _post_ function for
+ * that Component will be called.
+ */
+class ComponentVisitor {
+public:
+ virtual ~ComponentVisitor();
+
+ virtual Component *visit(AsciiComponentClass *c) = 0;
+ virtual Component *visit(ComponentAlternation *c) = 0;
+ virtual Component *visit(ComponentAssertion *c) = 0;
+ virtual Component *visit(ComponentAtomicGroup *c) = 0;
+ virtual Component *visit(ComponentBackReference *c) = 0;
+ virtual Component *visit(ComponentBoundary *c) = 0;
+ virtual Component *visit(ComponentByte *c) = 0;
+ virtual Component *visit(ComponentCondReference *c) = 0;
+ virtual Component *visit(ComponentEmpty *c) = 0;
+ virtual Component *visit(ComponentEUS *c) = 0;
+ virtual Component *visit(ComponentRepeat *c) = 0;
+ virtual Component *visit(ComponentSequence *c) = 0;
+ virtual Component *visit(ComponentWordBoundary *c) = 0;
+ virtual Component *visit(UTF8ComponentClass *c) = 0;
+
+ virtual void post(AsciiComponentClass *c) = 0;
+ virtual void post(ComponentAlternation *c) = 0;
+ virtual void post(ComponentAssertion *c) = 0;
+ virtual void post(ComponentAtomicGroup *c) = 0;
+ virtual void post(ComponentBackReference *c) = 0;
+ virtual void post(ComponentBoundary *c) = 0;
+ virtual void post(ComponentByte *c) = 0;
+ virtual void post(ComponentCondReference *c) = 0;
+ virtual void post(ComponentEmpty *c) = 0;
+ virtual void post(ComponentEUS *c) = 0;
+ virtual void post(ComponentRepeat *c) = 0;
+ virtual void post(ComponentSequence *c) = 0;
+ virtual void post(ComponentWordBoundary *c) = 0;
+ virtual void post(UTF8ComponentClass *c) = 0;
+};
+
+/**
+ * \brief Concrete subclass of ComponentVisitor with default behaviour,
+ * allowing you to just implement the member functions you need.
+ */
+class DefaultComponentVisitor : public ComponentVisitor {
+public:
+ DefaultComponentVisitor();
+ ~DefaultComponentVisitor() override;
+
+ Component *visit(AsciiComponentClass *c) override;
+ Component *visit(ComponentAlternation *c) override;
+ Component *visit(ComponentAssertion *c) override;
+ Component *visit(ComponentAtomicGroup *c) override;
+ Component *visit(ComponentBackReference *c) override;
+ Component *visit(ComponentBoundary *c) override;
+ Component *visit(ComponentByte *c) override;
+ Component *visit(ComponentCondReference *c) override;
+ Component *visit(ComponentEmpty *c) override;
+ Component *visit(ComponentEUS *c) override;
+ Component *visit(ComponentRepeat *c) override;
+ Component *visit(ComponentSequence *c) override;
+ Component *visit(ComponentWordBoundary *c) override;
+ Component *visit(UTF8ComponentClass *c) override;
+
+ void post(AsciiComponentClass *c) override;
+ void post(ComponentAlternation *c) override;
+ void post(ComponentAssertion *c) override;
+ void post(ComponentAtomicGroup *c) override;
+ void post(ComponentBackReference *c) override;
+ void post(ComponentBoundary *c) override;
+ void post(ComponentByte *c) override;
+ void post(ComponentCondReference *c) override;
+ void post(ComponentEmpty *c) override;
+ void post(ComponentEUS *c) override;
+ void post(ComponentRepeat *c) override;
+ void post(ComponentSequence *c) override;
+ void post(ComponentWordBoundary *c) override;
+ void post(UTF8ComponentClass *c) override;
+};
+
+} // namespace ue2
+
+#endif // COMPONENTVISITOR_H
diff --git a/contrib/libs/hyperscan/src/parser/ComponentWordBoundary.cpp b/contrib/libs/hyperscan/src/parser/ComponentWordBoundary.cpp
index adad41b308..168a2aad8e 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentWordBoundary.cpp
+++ b/contrib/libs/hyperscan/src/parser/ComponentWordBoundary.cpp
@@ -1,105 +1,105 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Word Boundary Assertion (\\b or \\B)
- */
-#include "ComponentWordBoundary.h"
-#include "buildstate.h"
-#include "parse_error.h"
-#include "Parser.h"
-#include "position_info.h"
-#include "nfagraph/ng_builder.h"
-
-using namespace std;
-
-namespace ue2 {
-
-ComponentWordBoundary::ComponentWordBoundary(u32 loc_in, bool neg,
- const ParseMode &mode)
- : loc(loc_in), position(GlushkovBuildState::POS_UNINITIALIZED),
- negated(neg), ucp(mode.ucp), prefilter(false) {}
-
-ComponentWordBoundary::~ComponentWordBoundary() {
- // empty
-}
-
-ComponentWordBoundary * ComponentWordBoundary::clone() const {
- return new ComponentWordBoundary(*this);
-}
-
-vector<PositionInfo> ComponentWordBoundary::first() const {
- vector<PositionInfo> firsts;
- firsts.push_back(position);
- return firsts;
-}
-
-vector<PositionInfo> ComponentWordBoundary::last() const {
- // Same as firsts
- return first();
-}
-
-bool ComponentWordBoundary::empty() const {
- return false;
-}
-
-bool ComponentWordBoundary::repeatable() const {
- return false;
-}
-
-void ComponentWordBoundary::notePositions(GlushkovBuildState &bs) {
- NFABuilder &builder = bs.getBuilder();
- position = builder.makePositions(1);
-
- if (ucp) {
- assert(prefilter); // only in prefiltering mode!
- if (negated) {
- builder.setAssertFlag(position, POS_FLAG_ASSERT_WORD_TO_WORD_UCP
- | POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP);
- } else {
- builder.setAssertFlag(position, POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP
- | POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP);
- }
- } else {
- if (negated) {
- builder.setAssertFlag(position, POS_FLAG_ASSERT_WORD_TO_WORD
- | POS_FLAG_ASSERT_NONWORD_TO_NONWORD);
- } else {
- builder.setAssertFlag(position, POS_FLAG_ASSERT_WORD_TO_NONWORD
- | POS_FLAG_ASSERT_NONWORD_TO_WORD);
- }
- }
- recordPosBounds(position, position + 1);
-}
-
-void ComponentWordBoundary::buildFollowSet(GlushkovBuildState&,
- const vector<PositionInfo>&) {
- // No internal connections, nowt to do
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Word Boundary Assertion (\\b or \\B)
+ */
+#include "ComponentWordBoundary.h"
+#include "buildstate.h"
+#include "parse_error.h"
+#include "Parser.h"
+#include "position_info.h"
+#include "nfagraph/ng_builder.h"
+
+using namespace std;
+
+namespace ue2 {
+
+ComponentWordBoundary::ComponentWordBoundary(u32 loc_in, bool neg,
+ const ParseMode &mode)
+ : loc(loc_in), position(GlushkovBuildState::POS_UNINITIALIZED),
+ negated(neg), ucp(mode.ucp), prefilter(false) {}
+
+ComponentWordBoundary::~ComponentWordBoundary() {
+ // empty
+}
+
+ComponentWordBoundary * ComponentWordBoundary::clone() const {
+ return new ComponentWordBoundary(*this);
+}
+
+vector<PositionInfo> ComponentWordBoundary::first() const {
+ vector<PositionInfo> firsts;
+ firsts.push_back(position);
+ return firsts;
+}
+
+vector<PositionInfo> ComponentWordBoundary::last() const {
+ // Same as firsts
+ return first();
+}
+
+bool ComponentWordBoundary::empty() const {
+ return false;
+}
+
+bool ComponentWordBoundary::repeatable() const {
+ return false;
+}
+
+void ComponentWordBoundary::notePositions(GlushkovBuildState &bs) {
+ NFABuilder &builder = bs.getBuilder();
+ position = builder.makePositions(1);
+
+ if (ucp) {
+ assert(prefilter); // only in prefiltering mode!
+ if (negated) {
+ builder.setAssertFlag(position, POS_FLAG_ASSERT_WORD_TO_WORD_UCP
+ | POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP);
+ } else {
+ builder.setAssertFlag(position, POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP
+ | POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP);
+ }
+ } else {
+ if (negated) {
+ builder.setAssertFlag(position, POS_FLAG_ASSERT_WORD_TO_WORD
+ | POS_FLAG_ASSERT_NONWORD_TO_NONWORD);
+ } else {
+ builder.setAssertFlag(position, POS_FLAG_ASSERT_WORD_TO_NONWORD
+ | POS_FLAG_ASSERT_NONWORD_TO_WORD);
+ }
+ }
+ recordPosBounds(position, position + 1);
+}
+
+void ComponentWordBoundary::buildFollowSet(GlushkovBuildState&,
+ const vector<PositionInfo>&) {
+ // No internal connections, nowt to do
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/ComponentWordBoundary.h b/contrib/libs/hyperscan/src/parser/ComponentWordBoundary.h
index 40c317793c..8cf7654666 100644
--- a/contrib/libs/hyperscan/src/parser/ComponentWordBoundary.h
+++ b/contrib/libs/hyperscan/src/parser/ComponentWordBoundary.h
@@ -1,90 +1,90 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Word Boundary Assertion (\\b or \\B)
- */
-
-#ifndef _RE_COMPONENTWORDBOUNDARY_H_
-#define _RE_COMPONENTWORDBOUNDARY_H_
-
-#include "Component.h"
-#include "position.h"
-
-namespace ue2 {
-
-struct ParseMode;
-
-/** \brief Encapsulates a positive (\\b) or negative (\\B) word boundary
- * assertion. */
-class ComponentWordBoundary : public Component {
- friend class DumpVisitor;
- friend class PrintVisitor;
- friend class UnsupportedVisitor;
-public:
- ComponentWordBoundary(u32 loc, bool negated, const ParseMode &mode);
- ~ComponentWordBoundary() override;
- ComponentWordBoundary *clone() const override;
-
- Component *accept(ComponentVisitor &v) override {
- Component *c = v.visit(this);
- v.post(this);
- return c;
- }
-
- void accept(ConstComponentVisitor &v) const override {
- v.pre(*this);
- v.during(*this);
- v.post(*this);
- }
-
- std::vector<PositionInfo> first() const override;
- std::vector<PositionInfo> last() const override;
- bool empty() const override;
- bool repeatable() const override;
- void notePositions(GlushkovBuildState &bs) override;
- void buildFollowSet(GlushkovBuildState &bs,
- const std::vector<PositionInfo> &lastPos) override;
-
- void setPrefilter(bool p) { prefilter = p; }
-
-private:
- u32 loc; //!< location in pattern for error reporting.
- Position position;
- bool negated;
- bool ucp;
- bool prefilter; //!< set by PrefilterVisitor, this is ugly
-
- ComponentWordBoundary(const ComponentWordBoundary &other)
- : Component(other), loc(other.loc), position(other.position),
- negated(other.negated), ucp(other.ucp), prefilter(other.prefilter) {}
-};
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Word Boundary Assertion (\\b or \\B)
+ */
+
+#ifndef _RE_COMPONENTWORDBOUNDARY_H_
+#define _RE_COMPONENTWORDBOUNDARY_H_
+
+#include "Component.h"
+#include "position.h"
+
+namespace ue2 {
+
+struct ParseMode;
+
+/** \brief Encapsulates a positive (\\b) or negative (\\B) word boundary
+ * assertion. */
+class ComponentWordBoundary : public Component {
+ friend class DumpVisitor;
+ friend class PrintVisitor;
+ friend class UnsupportedVisitor;
+public:
+ ComponentWordBoundary(u32 loc, bool negated, const ParseMode &mode);
+ ~ComponentWordBoundary() override;
+ ComponentWordBoundary *clone() const override;
+
+ Component *accept(ComponentVisitor &v) override {
+ Component *c = v.visit(this);
+ v.post(this);
+ return c;
+ }
+
+ void accept(ConstComponentVisitor &v) const override {
+ v.pre(*this);
+ v.during(*this);
+ v.post(*this);
+ }
+
+ std::vector<PositionInfo> first() const override;
+ std::vector<PositionInfo> last() const override;
+ bool empty() const override;
+ bool repeatable() const override;
+ void notePositions(GlushkovBuildState &bs) override;
+ void buildFollowSet(GlushkovBuildState &bs,
+ const std::vector<PositionInfo> &lastPos) override;
+
+ void setPrefilter(bool p) { prefilter = p; }
+
+private:
+ u32 loc; //!< location in pattern for error reporting.
+ Position position;
+ bool negated;
+ bool ucp;
+ bool prefilter; //!< set by PrefilterVisitor, this is ugly
+
+ ComponentWordBoundary(const ComponentWordBoundary &other)
+ : Component(other), loc(other.loc), position(other.position),
+ negated(other.negated), ucp(other.ucp), prefilter(other.prefilter) {}
+};
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/parser/ConstComponentVisitor.cpp b/contrib/libs/hyperscan/src/parser/ConstComponentVisitor.cpp
index 735289916c..b6413ecdb3 100644
--- a/contrib/libs/hyperscan/src/parser/ConstComponentVisitor.cpp
+++ b/contrib/libs/hyperscan/src/parser/ConstComponentVisitor.cpp
@@ -1,78 +1,78 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "ConstComponentVisitor.h"
-
-#include "AsciiComponentClass.h"
-#include "ComponentAlternation.h"
-#include "ComponentAssertion.h"
-#include "ComponentAtomicGroup.h"
-#include "ComponentBackReference.h"
-#include "ComponentBoundary.h"
-#include "ComponentByte.h"
-#include "ComponentCondReference.h"
-#include "ComponentClass.h"
-#include "ComponentEmpty.h"
-#include "ComponentEUS.h"
-#include "ComponentRepeat.h"
-#include "ComponentSequence.h"
-#include "ComponentWordBoundary.h"
-#include "Utf8ComponentClass.h"
-
-namespace ue2 {
-
-ConstComponentVisitor::~ConstComponentVisitor() {
- // empty
-}
-
-// Default implementations.
-
-DefaultConstComponentVisitor::DefaultConstComponentVisitor() {}
-DefaultConstComponentVisitor::~DefaultConstComponentVisitor() {}
-
-#define DEFAULT_FUNCS(comp) \
- void DefaultConstComponentVisitor::pre(const comp &) {} \
- void DefaultConstComponentVisitor::during(const comp &) {} \
- void DefaultConstComponentVisitor::post(const comp &) {}
-
-DEFAULT_FUNCS(AsciiComponentClass)
-DEFAULT_FUNCS(ComponentAlternation)
-DEFAULT_FUNCS(ComponentAssertion)
-DEFAULT_FUNCS(ComponentAtomicGroup)
-DEFAULT_FUNCS(ComponentBackReference)
-DEFAULT_FUNCS(ComponentBoundary)
-DEFAULT_FUNCS(ComponentByte)
-DEFAULT_FUNCS(ComponentCondReference)
-DEFAULT_FUNCS(ComponentEmpty)
-DEFAULT_FUNCS(ComponentEUS)
-DEFAULT_FUNCS(ComponentRepeat)
-DEFAULT_FUNCS(ComponentSequence)
-DEFAULT_FUNCS(ComponentWordBoundary)
-DEFAULT_FUNCS(UTF8ComponentClass)
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ConstComponentVisitor.h"
+
+#include "AsciiComponentClass.h"
+#include "ComponentAlternation.h"
+#include "ComponentAssertion.h"
+#include "ComponentAtomicGroup.h"
+#include "ComponentBackReference.h"
+#include "ComponentBoundary.h"
+#include "ComponentByte.h"
+#include "ComponentCondReference.h"
+#include "ComponentClass.h"
+#include "ComponentEmpty.h"
+#include "ComponentEUS.h"
+#include "ComponentRepeat.h"
+#include "ComponentSequence.h"
+#include "ComponentWordBoundary.h"
+#include "Utf8ComponentClass.h"
+
+namespace ue2 {
+
+ConstComponentVisitor::~ConstComponentVisitor() {
+ // empty
+}
+
+// Default implementations.
+
+DefaultConstComponentVisitor::DefaultConstComponentVisitor() {}
+DefaultConstComponentVisitor::~DefaultConstComponentVisitor() {}
+
+#define DEFAULT_FUNCS(comp) \
+ void DefaultConstComponentVisitor::pre(const comp &) {} \
+ void DefaultConstComponentVisitor::during(const comp &) {} \
+ void DefaultConstComponentVisitor::post(const comp &) {}
+
+DEFAULT_FUNCS(AsciiComponentClass)
+DEFAULT_FUNCS(ComponentAlternation)
+DEFAULT_FUNCS(ComponentAssertion)
+DEFAULT_FUNCS(ComponentAtomicGroup)
+DEFAULT_FUNCS(ComponentBackReference)
+DEFAULT_FUNCS(ComponentBoundary)
+DEFAULT_FUNCS(ComponentByte)
+DEFAULT_FUNCS(ComponentCondReference)
+DEFAULT_FUNCS(ComponentEmpty)
+DEFAULT_FUNCS(ComponentEUS)
+DEFAULT_FUNCS(ComponentRepeat)
+DEFAULT_FUNCS(ComponentSequence)
+DEFAULT_FUNCS(ComponentWordBoundary)
+DEFAULT_FUNCS(UTF8ComponentClass)
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/ConstComponentVisitor.h b/contrib/libs/hyperscan/src/parser/ConstComponentVisitor.h
index 54026c92a9..c26f589e88 100644
--- a/contrib/libs/hyperscan/src/parser/ConstComponentVisitor.h
+++ b/contrib/libs/hyperscan/src/parser/ConstComponentVisitor.h
@@ -1,170 +1,170 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Visitor base class for working with the component tree.
- */
-
-#ifndef CONSTCOMPONENTVISITOR_H
-#define CONSTCOMPONENTVISITOR_H
-
-namespace ue2 {
-
-class AsciiComponentClass;
-class Component;
-class ComponentAlternation;
-class ComponentAssertion;
-class ComponentAtomicGroup;
-class ComponentBackReference;
-class ComponentBoundary;
-class ComponentByte;
-class ComponentCondReference;
-class ComponentClass;
-class ComponentEmpty;
-class ComponentEUS;
-class ComponentRepeat;
-class ComponentSequence;
-class ComponentWordBoundary;
-class UTF8ComponentClass;
-
-/**
- * \brief Visitor base class for traversing an immutable component tree.
- *
- * Our approach to implementing the visitor pattern for traversing the
- * Component tree for a pattern. This version operates on an immutable tree;
- * use \ref ComponentVisitor if you need to make changes to components during
- * traversal.
- */
-class ConstComponentVisitor {
-public:
- virtual ~ConstComponentVisitor();
-
- virtual void pre(const AsciiComponentClass &c) = 0;
- virtual void pre(const ComponentAlternation &c) = 0;
- virtual void pre(const ComponentAssertion &c) = 0;
- virtual void pre(const ComponentAtomicGroup &c) = 0;
- virtual void pre(const ComponentBackReference &c) = 0;
- virtual void pre(const ComponentBoundary &c) = 0;
- virtual void pre(const ComponentByte &c) = 0;
- virtual void pre(const ComponentCondReference &c) = 0;
- virtual void pre(const ComponentEmpty &c) = 0;
- virtual void pre(const ComponentEUS &c) = 0;
- virtual void pre(const ComponentRepeat &c) = 0;
- virtual void pre(const ComponentSequence &c) = 0;
- virtual void pre(const ComponentWordBoundary &c) = 0;
- virtual void pre(const UTF8ComponentClass &c) = 0;
-
- virtual void during(const AsciiComponentClass &c) = 0;
- virtual void during(const ComponentAlternation &c) = 0;
- virtual void during(const ComponentAssertion &c) = 0;
- virtual void during(const ComponentAtomicGroup &c) = 0;
- virtual void during(const ComponentBackReference &c) = 0;
- virtual void during(const ComponentBoundary &c) = 0;
- virtual void during(const ComponentByte &c) = 0;
- virtual void during(const ComponentCondReference &c) = 0;
- virtual void during(const ComponentEmpty &c) = 0;
- virtual void during(const ComponentEUS &c) = 0;
- virtual void during(const ComponentRepeat &c) = 0;
- virtual void during(const ComponentSequence &c) = 0;
- virtual void during(const ComponentWordBoundary &c) = 0;
- virtual void during(const UTF8ComponentClass &c) = 0;
-
- virtual void post(const AsciiComponentClass &c) = 0;
- virtual void post(const ComponentAlternation &c) = 0;
- virtual void post(const ComponentAssertion &c) = 0;
- virtual void post(const ComponentAtomicGroup &c) = 0;
- virtual void post(const ComponentBackReference &c) = 0;
- virtual void post(const ComponentBoundary &c) = 0;
- virtual void post(const ComponentByte &c) = 0;
- virtual void post(const ComponentCondReference &c) = 0;
- virtual void post(const ComponentEmpty &c) = 0;
- virtual void post(const ComponentEUS &c) = 0;
- virtual void post(const ComponentRepeat &c) = 0;
- virtual void post(const ComponentSequence &c) = 0;
- virtual void post(const ComponentWordBoundary &c) = 0;
- virtual void post(const UTF8ComponentClass &c) = 0;
-};
-
-/**
- * \brief Concrete subclass of ConstComponentVisitor with default behaviour,
- * allowing you to just implement the member functions you need.
- */
-class DefaultConstComponentVisitor : public ConstComponentVisitor {
-public:
- DefaultConstComponentVisitor();
- ~DefaultConstComponentVisitor() override;
-
- void pre(const AsciiComponentClass &c) override;
- void pre(const ComponentAlternation &c) override;
- void pre(const ComponentAssertion &c) override;
- void pre(const ComponentAtomicGroup &c) override;
- void pre(const ComponentBackReference &c) override;
- void pre(const ComponentBoundary &c) override;
- void pre(const ComponentByte &c) override;
- void pre(const ComponentCondReference &c) override;
- void pre(const ComponentEmpty &c) override;
- void pre(const ComponentEUS &c) override;
- void pre(const ComponentRepeat &c) override;
- void pre(const ComponentSequence &c) override;
- void pre(const ComponentWordBoundary &c) override;
- void pre(const UTF8ComponentClass &c) override;
-
- void during(const AsciiComponentClass &c) override;
- void during(const ComponentAlternation &c) override;
- void during(const ComponentAssertion &c) override;
- void during(const ComponentAtomicGroup &c) override;
- void during(const ComponentBackReference &c) override;
- void during(const ComponentBoundary &c) override;
- void during(const ComponentByte &c) override;
- void during(const ComponentCondReference &c) override;
- void during(const ComponentEmpty &c) override;
- void during(const ComponentEUS &c) override;
- void during(const ComponentRepeat &c) override;
- void during(const ComponentSequence &c) override;
- void during(const ComponentWordBoundary &c) override;
- void during(const UTF8ComponentClass &c) override;
-
- void post(const AsciiComponentClass &c) override;
- void post(const ComponentAlternation &c) override;
- void post(const ComponentAssertion &c) override;
- void post(const ComponentAtomicGroup &c) override;
- void post(const ComponentBackReference &c) override;
- void post(const ComponentBoundary &c) override;
- void post(const ComponentByte &c) override;
- void post(const ComponentCondReference &c) override;
- void post(const ComponentEmpty &c) override;
- void post(const ComponentEUS &c) override;
- void post(const ComponentRepeat &c) override;
- void post(const ComponentSequence &c) override;
- void post(const ComponentWordBoundary &c) override;
- void post(const UTF8ComponentClass &c) override;
-};
-
-} // namespace ue2
-
-#endif // CONSTCOMPONENTVISITOR_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Visitor base class for working with the component tree.
+ */
+
+#ifndef CONSTCOMPONENTVISITOR_H
+#define CONSTCOMPONENTVISITOR_H
+
+namespace ue2 {
+
+class AsciiComponentClass;
+class Component;
+class ComponentAlternation;
+class ComponentAssertion;
+class ComponentAtomicGroup;
+class ComponentBackReference;
+class ComponentBoundary;
+class ComponentByte;
+class ComponentCondReference;
+class ComponentClass;
+class ComponentEmpty;
+class ComponentEUS;
+class ComponentRepeat;
+class ComponentSequence;
+class ComponentWordBoundary;
+class UTF8ComponentClass;
+
+/**
+ * \brief Visitor base class for traversing an immutable component tree.
+ *
+ * Our approach to implementing the visitor pattern for traversing the
+ * Component tree for a pattern. This version operates on an immutable tree;
+ * use \ref ComponentVisitor if you need to make changes to components during
+ * traversal.
+ */
+class ConstComponentVisitor {
+public:
+ virtual ~ConstComponentVisitor();
+
+ virtual void pre(const AsciiComponentClass &c) = 0;
+ virtual void pre(const ComponentAlternation &c) = 0;
+ virtual void pre(const ComponentAssertion &c) = 0;
+ virtual void pre(const ComponentAtomicGroup &c) = 0;
+ virtual void pre(const ComponentBackReference &c) = 0;
+ virtual void pre(const ComponentBoundary &c) = 0;
+ virtual void pre(const ComponentByte &c) = 0;
+ virtual void pre(const ComponentCondReference &c) = 0;
+ virtual void pre(const ComponentEmpty &c) = 0;
+ virtual void pre(const ComponentEUS &c) = 0;
+ virtual void pre(const ComponentRepeat &c) = 0;
+ virtual void pre(const ComponentSequence &c) = 0;
+ virtual void pre(const ComponentWordBoundary &c) = 0;
+ virtual void pre(const UTF8ComponentClass &c) = 0;
+
+ virtual void during(const AsciiComponentClass &c) = 0;
+ virtual void during(const ComponentAlternation &c) = 0;
+ virtual void during(const ComponentAssertion &c) = 0;
+ virtual void during(const ComponentAtomicGroup &c) = 0;
+ virtual void during(const ComponentBackReference &c) = 0;
+ virtual void during(const ComponentBoundary &c) = 0;
+ virtual void during(const ComponentByte &c) = 0;
+ virtual void during(const ComponentCondReference &c) = 0;
+ virtual void during(const ComponentEmpty &c) = 0;
+ virtual void during(const ComponentEUS &c) = 0;
+ virtual void during(const ComponentRepeat &c) = 0;
+ virtual void during(const ComponentSequence &c) = 0;
+ virtual void during(const ComponentWordBoundary &c) = 0;
+ virtual void during(const UTF8ComponentClass &c) = 0;
+
+ virtual void post(const AsciiComponentClass &c) = 0;
+ virtual void post(const ComponentAlternation &c) = 0;
+ virtual void post(const ComponentAssertion &c) = 0;
+ virtual void post(const ComponentAtomicGroup &c) = 0;
+ virtual void post(const ComponentBackReference &c) = 0;
+ virtual void post(const ComponentBoundary &c) = 0;
+ virtual void post(const ComponentByte &c) = 0;
+ virtual void post(const ComponentCondReference &c) = 0;
+ virtual void post(const ComponentEmpty &c) = 0;
+ virtual void post(const ComponentEUS &c) = 0;
+ virtual void post(const ComponentRepeat &c) = 0;
+ virtual void post(const ComponentSequence &c) = 0;
+ virtual void post(const ComponentWordBoundary &c) = 0;
+ virtual void post(const UTF8ComponentClass &c) = 0;
+};
+
+/**
+ * \brief Concrete subclass of ConstComponentVisitor with default behaviour,
+ * allowing you to just implement the member functions you need.
+ */
+class DefaultConstComponentVisitor : public ConstComponentVisitor {
+public:
+ DefaultConstComponentVisitor();
+ ~DefaultConstComponentVisitor() override;
+
+ void pre(const AsciiComponentClass &c) override;
+ void pre(const ComponentAlternation &c) override;
+ void pre(const ComponentAssertion &c) override;
+ void pre(const ComponentAtomicGroup &c) override;
+ void pre(const ComponentBackReference &c) override;
+ void pre(const ComponentBoundary &c) override;
+ void pre(const ComponentByte &c) override;
+ void pre(const ComponentCondReference &c) override;
+ void pre(const ComponentEmpty &c) override;
+ void pre(const ComponentEUS &c) override;
+ void pre(const ComponentRepeat &c) override;
+ void pre(const ComponentSequence &c) override;
+ void pre(const ComponentWordBoundary &c) override;
+ void pre(const UTF8ComponentClass &c) override;
+
+ void during(const AsciiComponentClass &c) override;
+ void during(const ComponentAlternation &c) override;
+ void during(const ComponentAssertion &c) override;
+ void during(const ComponentAtomicGroup &c) override;
+ void during(const ComponentBackReference &c) override;
+ void during(const ComponentBoundary &c) override;
+ void during(const ComponentByte &c) override;
+ void during(const ComponentCondReference &c) override;
+ void during(const ComponentEmpty &c) override;
+ void during(const ComponentEUS &c) override;
+ void during(const ComponentRepeat &c) override;
+ void during(const ComponentSequence &c) override;
+ void during(const ComponentWordBoundary &c) override;
+ void during(const UTF8ComponentClass &c) override;
+
+ void post(const AsciiComponentClass &c) override;
+ void post(const ComponentAlternation &c) override;
+ void post(const ComponentAssertion &c) override;
+ void post(const ComponentAtomicGroup &c) override;
+ void post(const ComponentBackReference &c) override;
+ void post(const ComponentBoundary &c) override;
+ void post(const ComponentByte &c) override;
+ void post(const ComponentCondReference &c) override;
+ void post(const ComponentEmpty &c) override;
+ void post(const ComponentEUS &c) override;
+ void post(const ComponentRepeat &c) override;
+ void post(const ComponentSequence &c) override;
+ void post(const ComponentWordBoundary &c) override;
+ void post(const UTF8ComponentClass &c) override;
+};
+
+} // namespace ue2
+
+#endif // CONSTCOMPONENTVISITOR_H
diff --git a/contrib/libs/hyperscan/src/parser/Parser.h b/contrib/libs/hyperscan/src/parser/Parser.h
index f66506b396..a034a18fc1 100644
--- a/contrib/libs/hyperscan/src/parser/Parser.h
+++ b/contrib/libs/hyperscan/src/parser/Parser.h
@@ -1,76 +1,76 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Interface to Parser.
- */
-
-#ifndef _RE_PARSER_H_
-#define _RE_PARSER_H_
-
-#include "ue2common.h"
-
-#include <memory>
-
-namespace ue2 {
-
-class Component;
-
-/** \brief Represents the current "mode flags" at any point in the parsing
- * process.
- *
- * This is necessary as some modes can be changed part-way through an
- * expression, such as in:
- *
- * /foo(?i)bar/
- */
-struct ParseMode {
- ParseMode() {}
- explicit ParseMode(u32 hs_flags);
-
- bool caseless = false;
- bool dotall = false;
- bool ignore_space = false;
- bool multiline = false;
- bool ucp = false;
- bool utf8 = false;
-};
-
-/** \brief Parse the given regular expression into a \ref Component tree.
- *
- * The \a mode parameter should contain the initial mode flags, and will be
- * updated by the parser if additional global flags are introduced in the
- * expression (for example, via "(*UTF8)".)
- *
- * This call will throw a ParseError on failure.
- */
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Interface to Parser.
+ */
+
+#ifndef _RE_PARSER_H_
+#define _RE_PARSER_H_
+
+#include "ue2common.h"
+
+#include <memory>
+
+namespace ue2 {
+
+class Component;
+
+/** \brief Represents the current "mode flags" at any point in the parsing
+ * process.
+ *
+ * This is necessary as some modes can be changed part-way through an
+ * expression, such as in:
+ *
+ * /foo(?i)bar/
+ */
+struct ParseMode {
+ ParseMode() {}
+ explicit ParseMode(u32 hs_flags);
+
+ bool caseless = false;
+ bool dotall = false;
+ bool ignore_space = false;
+ bool multiline = false;
+ bool ucp = false;
+ bool utf8 = false;
+};
+
+/** \brief Parse the given regular expression into a \ref Component tree.
+ *
+ * The \a mode parameter should contain the initial mode flags, and will be
+ * updated by the parser if additional global flags are introduced in the
+ * expression (for example, via "(*UTF8)".)
+ *
+ * This call will throw a ParseError on failure.
+ */
std::unique_ptr<Component> parse(const char *ptr, ParseMode &mode);
-
-} // namespace ue2
-
-#endif // _RE_PARSER_H_
+
+} // namespace ue2
+
+#endif // _RE_PARSER_H_
diff --git a/contrib/libs/hyperscan/src/parser/Parser.rl6 b/contrib/libs/hyperscan/src/parser/Parser.rl6
index e923549407..8643aebfc6 100644
--- a/contrib/libs/hyperscan/src/parser/Parser.rl6
+++ b/contrib/libs/hyperscan/src/parser/Parser.rl6
@@ -1,565 +1,565 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Parser code (generated with Ragel from Parser.rl).
- */
-
-#include "config.h"
-
-/* Parser.cpp is a built source, may not be in same dir as parser files */
-#include "parser/check_refs.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Parser code (generated with Ragel from Parser.rl).
+ */
+
+#include "config.h"
+
+/* Parser.cpp is a built source, may not be in same dir as parser files */
+#include "parser/check_refs.h"
#include "parser/control_verbs.h"
-#include "parser/ComponentAlternation.h"
-#include "parser/ComponentAssertion.h"
-#include "parser/ComponentAtomicGroup.h"
-#include "parser/ComponentBackReference.h"
-#include "parser/ComponentBoundary.h"
-#include "parser/ComponentByte.h"
-#include "parser/ComponentClass.h"
-#include "parser/ComponentCondReference.h"
-#include "parser/ComponentEmpty.h"
-#include "parser/ComponentEUS.h"
-#include "parser/Component.h"
-#include "parser/ComponentRepeat.h"
-#include "parser/ComponentSequence.h"
-#include "parser/ComponentWordBoundary.h"
-#include "parser/parse_error.h"
-#include "parser/Parser.h"
-#include "ue2common.h"
-#include "util/compare.h"
+#include "parser/ComponentAlternation.h"
+#include "parser/ComponentAssertion.h"
+#include "parser/ComponentAtomicGroup.h"
+#include "parser/ComponentBackReference.h"
+#include "parser/ComponentBoundary.h"
+#include "parser/ComponentByte.h"
+#include "parser/ComponentClass.h"
+#include "parser/ComponentCondReference.h"
+#include "parser/ComponentEmpty.h"
+#include "parser/ComponentEUS.h"
+#include "parser/Component.h"
+#include "parser/ComponentRepeat.h"
+#include "parser/ComponentSequence.h"
+#include "parser/ComponentWordBoundary.h"
+#include "parser/parse_error.h"
+#include "parser/Parser.h"
+#include "ue2common.h"
+#include "util/compare.h"
#include "util/flat_containers.h"
-#include "util/make_unique.h"
-#include "util/unicode_def.h"
-#include "util/verify_types.h"
-
-#include <cassert>
-#include <cctype>
-#include <cstring>
-#include <cstdlib>
-#include <map>
-#include <sstream>
-#include <string>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-#define PUSH_SEQUENCE do {\
- sequences.push_back(ExprState(currentSeq, (size_t)(ts - ptr), \
- mode)); \
- } while(0)
-#define POP_SEQUENCE do {\
- currentSeq = sequences.back().seq; \
- mode = sequences.back().mode; \
- sequences.pop_back(); \
- } while(0)
-
-namespace {
-
-/** \brief Structure representing current state as we're parsing (current
- * sequence, current options). Stored in the 'sequences' vector. */
-struct ExprState {
- ExprState(ComponentSequence *seq_in, size_t offset,
- const ParseMode &mode_in) :
- seq(seq_in), seqOffset(offset), mode(mode_in) {}
-
- ComponentSequence *seq; //!< current sequence
- size_t seqOffset; //!< offset seq was entered, for error reporting
- ParseMode mode; //!< current mode flags
-};
-
-} // namespace
-
-static
-unsigned parseAsDecimal(unsigned oct) {
- // The input was parsed as octal, but should have been parsed as decimal.
- // Deconstruct the octal number and reconstruct into decimal
- unsigned ret = 0;
- unsigned multiplier = 1;
- while (oct) {
- ret += (oct & 0x7) * multiplier;
- oct >>= 3;
- multiplier *= 10;
- }
- return ret;
-}
-
-/** \brief Maximum value for a positive integer. We use INT_MAX, as that's what
- * PCRE uses. */
-static constexpr u32 MAX_NUMBER = INT_MAX;
-
-static
+#include "util/make_unique.h"
+#include "util/unicode_def.h"
+#include "util/verify_types.h"
+
+#include <cassert>
+#include <cctype>
+#include <cstring>
+#include <cstdlib>
+#include <map>
+#include <sstream>
+#include <string>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+#define PUSH_SEQUENCE do {\
+ sequences.push_back(ExprState(currentSeq, (size_t)(ts - ptr), \
+ mode)); \
+ } while(0)
+#define POP_SEQUENCE do {\
+ currentSeq = sequences.back().seq; \
+ mode = sequences.back().mode; \
+ sequences.pop_back(); \
+ } while(0)
+
+namespace {
+
+/** \brief Structure representing current state as we're parsing (current
+ * sequence, current options). Stored in the 'sequences' vector. */
+struct ExprState {
+ ExprState(ComponentSequence *seq_in, size_t offset,
+ const ParseMode &mode_in) :
+ seq(seq_in), seqOffset(offset), mode(mode_in) {}
+
+ ComponentSequence *seq; //!< current sequence
+ size_t seqOffset; //!< offset seq was entered, for error reporting
+ ParseMode mode; //!< current mode flags
+};
+
+} // namespace
+
+static
+unsigned parseAsDecimal(unsigned oct) {
+ // The input was parsed as octal, but should have been parsed as decimal.
+ // Deconstruct the octal number and reconstruct into decimal
+ unsigned ret = 0;
+ unsigned multiplier = 1;
+ while (oct) {
+ ret += (oct & 0x7) * multiplier;
+ oct >>= 3;
+ multiplier *= 10;
+ }
+ return ret;
+}
+
+/** \brief Maximum value for a positive integer. We use INT_MAX, as that's what
+ * PCRE uses. */
+static constexpr u32 MAX_NUMBER = INT_MAX;
+
+static
void pushDec(u32 *acc, char raw_digit) {
- assert(raw_digit >= '0' && raw_digit <= '9');
- u32 digit_val = raw_digit - '0';
-
- // Ensure that we don't overflow.
- u64a val = ((u64a)*acc * 10) + digit_val;
- if (val > MAX_NUMBER) {
- throw LocatedParseError("Number is too big");
- }
-
- *acc = verify_u32(val);
-}
-
-static
+ assert(raw_digit >= '0' && raw_digit <= '9');
+ u32 digit_val = raw_digit - '0';
+
+ // Ensure that we don't overflow.
+ u64a val = ((u64a)*acc * 10) + digit_val;
+ if (val > MAX_NUMBER) {
+ throw LocatedParseError("Number is too big");
+ }
+
+ *acc = verify_u32(val);
+}
+
+static
void pushOct(u32 *acc, char raw_digit) {
- assert(raw_digit >= '0' && raw_digit <= '7');
- u32 digit_val = raw_digit - '0';
-
- // Ensure that we don't overflow.
- u64a val = ((u64a)*acc * 8) + digit_val;
- if (val > MAX_NUMBER) {
- throw LocatedParseError("Number is too big");
- }
-
- *acc = verify_u32(val);
-}
-
-static
-void throwInvalidRepeat(void) {
- throw LocatedParseError("Invalid repeat");
-}
-
-static
-void throwInvalidUtf8(void) {
- throw ParseError("Expression is not valid UTF-8.");
-}
-
-/**
- * Adds the given child component to the parent sequence, returning a pointer
- * to the new (child) "current sequence".
- */
-static
-ComponentSequence *enterSequence(ComponentSequence *parent,
- unique_ptr<ComponentSequence> child) {
- assert(parent);
- assert(child);
-
- ComponentSequence *seq = child.get();
- parent->addComponent(move(child));
- return seq;
-}
-
-static
+ assert(raw_digit >= '0' && raw_digit <= '7');
+ u32 digit_val = raw_digit - '0';
+
+ // Ensure that we don't overflow.
+ u64a val = ((u64a)*acc * 8) + digit_val;
+ if (val > MAX_NUMBER) {
+ throw LocatedParseError("Number is too big");
+ }
+
+ *acc = verify_u32(val);
+}
+
+static
+void throwInvalidRepeat(void) {
+ throw LocatedParseError("Invalid repeat");
+}
+
+static
+void throwInvalidUtf8(void) {
+ throw ParseError("Expression is not valid UTF-8.");
+}
+
+/**
+ * Adds the given child component to the parent sequence, returning a pointer
+ * to the new (child) "current sequence".
+ */
+static
+ComponentSequence *enterSequence(ComponentSequence *parent,
+ unique_ptr<ComponentSequence> child) {
+ assert(parent);
+ assert(child);
+
+ ComponentSequence *seq = child.get();
+ parent->addComponent(move(child));
+ return seq;
+}
+
+static
void addLiteral(ComponentSequence *currentSeq, char c, const ParseMode &mode) {
- if (mode.utf8 && mode.caseless) {
- /* leverage ComponentClass to generate the vertices */
- auto cc = getComponentClass(mode);
- assert(cc);
- cc->add(c);
- cc->finalize();
- currentSeq->addComponent(move(cc));
- } else {
- currentSeq->addComponent(getLiteralComponentClass(c, mode.caseless));
- }
-}
-
-static
-void addEscaped(ComponentSequence *currentSeq, unichar accum,
- const ParseMode &mode, const char *err_msg) {
- if (mode.utf8) {
- /* leverage ComponentClass to generate the vertices */
- auto cc = getComponentClass(mode);
- assert(cc);
- cc->add(accum);
- cc->finalize();
- currentSeq->addComponent(move(cc));
- } else {
- if (accum > 255) {
- throw LocatedParseError(err_msg);
- }
+ if (mode.utf8 && mode.caseless) {
+ /* leverage ComponentClass to generate the vertices */
+ auto cc = getComponentClass(mode);
+ assert(cc);
+ cc->add(c);
+ cc->finalize();
+ currentSeq->addComponent(move(cc));
+ } else {
+ currentSeq->addComponent(getLiteralComponentClass(c, mode.caseless));
+ }
+}
+
+static
+void addEscaped(ComponentSequence *currentSeq, unichar accum,
+ const ParseMode &mode, const char *err_msg) {
+ if (mode.utf8) {
+ /* leverage ComponentClass to generate the vertices */
+ auto cc = getComponentClass(mode);
+ assert(cc);
+ cc->add(accum);
+ cc->finalize();
+ currentSeq->addComponent(move(cc));
+ } else {
+ if (accum > 255) {
+ throw LocatedParseError(err_msg);
+ }
addLiteral(currentSeq, (char)accum, mode);
- }
-}
-
-static
-void addEscapedOctal(ComponentSequence *currentSeq, unichar accum,
- const ParseMode &mode) {
- addEscaped(currentSeq, accum, mode, "Octal value is greater than \\377");
-}
-
-static
-void addEscapedHex(ComponentSequence *currentSeq, unichar accum,
- const ParseMode &mode) {
- addEscaped(currentSeq, accum, mode,
- "Hexadecimal value is greater than \\xFF");
-}
-
-#define SLASH_C_ERROR "\\c must be followed by an ASCII character"
-
-static
+ }
+}
+
+static
+void addEscapedOctal(ComponentSequence *currentSeq, unichar accum,
+ const ParseMode &mode) {
+ addEscaped(currentSeq, accum, mode, "Octal value is greater than \\377");
+}
+
+static
+void addEscapedHex(ComponentSequence *currentSeq, unichar accum,
+ const ParseMode &mode) {
+ addEscaped(currentSeq, accum, mode,
+ "Hexadecimal value is greater than \\xFF");
+}
+
+#define SLASH_C_ERROR "\\c must be followed by an ASCII character"
+
+static
u8 decodeCtrl(char raw) {
- if (raw & 0x80) {
- throw LocatedParseError(SLASH_C_ERROR);
- }
- return mytoupper(raw) ^ 0x40;
-}
-
-static
+ if (raw & 0x80) {
+ throw LocatedParseError(SLASH_C_ERROR);
+ }
+ return mytoupper(raw) ^ 0x40;
+}
+
+static
unichar readUtf8CodePoint2c(const char *s) {
auto *ts = (const u8 *)s;
- assert(ts[0] >= 0xc0 && ts[0] < 0xe0);
- assert(ts[1] >= 0x80 && ts[1] < 0xc0);
- unichar val = ts[0] & 0x1f;
- val <<= 6;
- val |= ts[1] & 0x3f;
- DEBUG_PRINTF("utf8 %02hhx %02hhx ->\\x{%x}\n", ts[0],
- ts[1], val);
- return val;
-}
-
-static
+ assert(ts[0] >= 0xc0 && ts[0] < 0xe0);
+ assert(ts[1] >= 0x80 && ts[1] < 0xc0);
+ unichar val = ts[0] & 0x1f;
+ val <<= 6;
+ val |= ts[1] & 0x3f;
+ DEBUG_PRINTF("utf8 %02hhx %02hhx ->\\x{%x}\n", ts[0],
+ ts[1], val);
+ return val;
+}
+
+static
unichar readUtf8CodePoint3c(const char *s) {
auto *ts = (const u8 *)s;
- assert(ts[0] >= 0xe0 && ts[0] < 0xf0);
- assert(ts[1] >= 0x80 && ts[1] < 0xc0);
- assert(ts[2] >= 0x80 && ts[2] < 0xc0);
- unichar val = ts[0] & 0x0f;
- val <<= 6;
- val |= ts[1] & 0x3f;
- val <<= 6;
- val |= ts[2] & 0x3f;
- DEBUG_PRINTF("utf8 %02hhx %02hhx %02hhx ->\\x{%x}\n", ts[0],
- ts[1], ts[2], val);
- return val;
-}
-
-static
+ assert(ts[0] >= 0xe0 && ts[0] < 0xf0);
+ assert(ts[1] >= 0x80 && ts[1] < 0xc0);
+ assert(ts[2] >= 0x80 && ts[2] < 0xc0);
+ unichar val = ts[0] & 0x0f;
+ val <<= 6;
+ val |= ts[1] & 0x3f;
+ val <<= 6;
+ val |= ts[2] & 0x3f;
+ DEBUG_PRINTF("utf8 %02hhx %02hhx %02hhx ->\\x{%x}\n", ts[0],
+ ts[1], ts[2], val);
+ return val;
+}
+
+static
unichar readUtf8CodePoint4c(const char *s) {
auto *ts = (const u8 *)s;
- assert(ts[0] >= 0xf0 && ts[0] < 0xf8);
- assert(ts[1] >= 0x80 && ts[1] < 0xc0);
- assert(ts[2] >= 0x80 && ts[2] < 0xc0);
- assert(ts[3] >= 0x80 && ts[3] < 0xc0);
- unichar val = ts[0] & 0x07;
- val <<= 6;
- val |= ts[1] & 0x3f;
- val <<= 6;
- val |= ts[2] & 0x3f;
- val <<= 6;
- val |= ts[3] & 0x3f;
- DEBUG_PRINTF("utf8 %02hhx %02hhx %02hhx %02hhx ->\\x{%x}\n", ts[0],
- ts[1], ts[2], ts[3], val);
- return val;
-}
-
-%%{
- machine regex;
-
- action throwUnsupportedEscape {
- ostringstream str;
+ assert(ts[0] >= 0xf0 && ts[0] < 0xf8);
+ assert(ts[1] >= 0x80 && ts[1] < 0xc0);
+ assert(ts[2] >= 0x80 && ts[2] < 0xc0);
+ assert(ts[3] >= 0x80 && ts[3] < 0xc0);
+ unichar val = ts[0] & 0x07;
+ val <<= 6;
+ val |= ts[1] & 0x3f;
+ val <<= 6;
+ val |= ts[2] & 0x3f;
+ val <<= 6;
+ val |= ts[3] & 0x3f;
+ DEBUG_PRINTF("utf8 %02hhx %02hhx %02hhx %02hhx ->\\x{%x}\n", ts[0],
+ ts[1], ts[2], ts[3], val);
+ return val;
+}
+
+%%{
+ machine regex;
+
+ action throwUnsupportedEscape {
+ ostringstream str;
str << "'\\" << *(ts + 1) << "' at index " << ts - ptr
<< " not supported in a character class.";
- throw ParseError(str.str());
- }
- action unsupportedProperty {
- throw LocatedParseError("Character property not supported");
- }
- action clearLabel { label.clear();}
- action appendLabelCharacter { label.push_back(fc);}
- action clearOctAccumulator { octAccumulator = 0;}
- action clearAccumulator { accumulator = 0;}
- action setOctAccumulator {
- octAccumulator = 0;
- pushOct(&octAccumulator, fc);
- }
- action setDecAccumulator {
- accumulator = 0;
- pushDec(&accumulator, fc);
- }
- action clearNM { repeatN = 0; repeatM = 0; }
- action appendN { pushDec(&repeatN, fc); }
- action appendM { pushDec(&repeatM, fc); }
- action appendAccumulatorOctDigit { pushOct(&octAccumulator, fc); }
- action appendAccumulatorDecDigit { pushDec(&accumulator, fc); }
- action appendAccumulatorHexDigit {
- accumulator *= 16;
- accumulator += fc - '0';
- }
- action appendAccumulatorHexL {
- accumulator *= 16;
- accumulator += 10 + fc - 'a';
- }
- action appendAccumulatorHexU {
- accumulator *= 16;
- accumulator += 10 + fc - 'A';
- }
-
- # enter a comment group, where we just scan for a close paren.
- action enterComment {
- inComment = true;
- fgoto readComment;
- }
-
- # enter an extended mode comment, where we just scan for a newline.
- action enterNewlineTerminatedComment {
- inComment = true;
- fgoto readNewlineTerminatedComment;
- }
-
- # enter a CAPTURING group ( e.g. '(blah)' )
- action enterCapturingGroup {
- PUSH_SEQUENCE;
- auto seq = ue2::make_unique<ComponentSequence>();
- seq->setCaptureIndex(groupIndex++);
- currentSeq = enterSequence(currentSeq, move(seq));
- }
-
- # enter a NAMED CAPTURING group ( e.g. (?'<hatstand>blah) )
- action enterNamedGroup {
- assert(!label.empty()); // should be guaranteed by machine
- char c = *label.begin();
- if (c >= '0' && c <= '9') {
- throw LocatedParseError("Group name cannot begin with a digit");
- }
- if (!groupNames.insert(label).second) {
- throw LocatedParseError("Two named subpatterns use the name '" + label + "'");
- }
- PUSH_SEQUENCE;
- auto seq = ue2::make_unique<ComponentSequence>();
- seq->setCaptureIndex(groupIndex++);
- seq->setCaptureName(label);
- currentSeq = enterSequence(currentSeq, move(seq));
- }
-
- # enter a NON-CAPTURING group where we're modifying flags
- # ( e.g. '(?i:blah)' ). Standard non-capturing groups use this path
- # as well.
- action enterModifiedGroup {
- PUSH_SEQUENCE;
- mode = newMode;
- currentSeq =
- enterSequence(currentSeq, ue2::make_unique<ComponentSequence>());
- }
-
- action exitGroup {
- if (sequences.empty()) {
- throw LocatedParseError("Unmatched parentheses");
- }
- currentSeq->finalize();
- POP_SEQUENCE;
- }
- action enterZWLookAhead {
- PUSH_SEQUENCE;
- currentSeq = enterSequence(currentSeq,
- ue2::make_unique<ComponentAssertion>(ComponentAssertion::LOOKAHEAD,
- ComponentAssertion::POS));
- }
- action enterZWNegLookAhead {
- PUSH_SEQUENCE;
- currentSeq = enterSequence(currentSeq,
- ue2::make_unique<ComponentAssertion>(ComponentAssertion::LOOKAHEAD,
- ComponentAssertion::NEG));
- }
- action enterZWLookBehind {
- PUSH_SEQUENCE;
- currentSeq = enterSequence(currentSeq,
- ue2::make_unique<ComponentAssertion>(ComponentAssertion::LOOKBEHIND,
- ComponentAssertion::POS));
- }
- action enterZWNegLookBehind {
- PUSH_SEQUENCE;
- currentSeq = enterSequence(currentSeq,
- ue2::make_unique<ComponentAssertion>(ComponentAssertion::LOOKBEHIND,
- ComponentAssertion::NEG));
- }
- action enterEmbeddedCode {
- throw LocatedParseError("Embedded code is not supported");
- }
- action enterConditionUnsupported {
- throw LocatedParseError("Conditional subpattern unsupported");
- }
- action enterReferenceUnsupported {
- throw LocatedParseError("Subpattern reference unsupported");
- }
- action enterNumberedConditionalRef {
- if (accumulator == 0) {
- throw LocatedParseError("Numbered reference cannot be zero");
- }
- PUSH_SEQUENCE;
- currentSeq = enterSequence(currentSeq,
- ue2::make_unique<ComponentCondReference>(accumulator));
- }
- action enterNamedConditionalRef {
- PUSH_SEQUENCE;
- assert(!label.empty());
- currentSeq = enterSequence(currentSeq,
- ue2::make_unique<ComponentCondReference>(label));
- }
- action enterAtomicGroup {
- PUSH_SEQUENCE;
- currentSeq = enterSequence(currentSeq,
- ue2::make_unique<ComponentAtomicGroup>());
- }
- action eatClass {
- assert(!currentCls);
- assert(!inCharClass); // not reentrant
- currentCls = getComponentClass(mode);
- inCharClass = true;
- inCharClassEarly = true;
- currentClsBegin = ts;
- fgoto readClass;
- }
- action resetModifiers {
- newMode = mode;
- }
- action applyModifiers {
- mode = newMode;
- currentSeq->addComponent(ue2::make_unique<ComponentEmpty>());
- }
- action modifyMatchPositive {
- switch (fc) {
- case 'i':
- newMode.caseless = true;
- break;
- case 'm':
- newMode.multiline = true;
- break;
- case 's':
- newMode.dotall = true;
- break;
- case 'x':
- newMode.ignore_space = true;
- break;
- default:
- assert(0); // this action only called for [imsx]
- break;
- }
- }
- action modifyMatchNegative {
- switch (fc) {
- case 'i':
- newMode.caseless = false;
- break;
- case 'm':
- newMode.multiline = false;
- break;
- case 's':
- newMode.dotall = false;
- break;
- case 'x':
- newMode.ignore_space = false;
- break;
- default:
- assert(0); // this action only called for [imsx]
- break;
- }
- }
- action is_utf8 { mode.utf8 }
- action is_ignore_space { mode.ignore_space }
- action is_early_charclass { inCharClassEarly }
-
- action addNumberedBackRef {
- if (accumulator == 0) {
- throw LocatedParseError("Numbered reference cannot be zero");
- }
- currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(accumulator));
- }
-
- action addNegativeNumberedBackRef {
- // Accumulator is a negative offset.
- if (accumulator == 0) {
- throw LocatedParseError("Numbered reference cannot be zero");
- }
- if (accumulator >= groupIndex) {
- throw LocatedParseError("Invalid reference");
- }
- unsigned idx = groupIndex - accumulator;
- currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(idx));
- }
-
- action addNamedBackRef {
- currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(label));
- }
-
- escapedOctal0 = '\\0' @clearOctAccumulator [0-7]{0,2} $appendAccumulatorOctDigit;
- escapedOctal2 = '\\' [1-7] $setOctAccumulator [0-7]{1,2} $appendAccumulatorOctDigit;
- escapedOctal2c = '\\' [1-7] $setOctAccumulator [0-7]{0,2} $appendAccumulatorOctDigit;
- backRefIdSingle = [1-7] $setDecAccumulator;
- backRefId = [1-9] $setDecAccumulator [0-9]+ $appendAccumulatorDecDigit;
- escapedHex = '\\x' @clearAccumulator ([0-9] $appendAccumulatorHexDigit | [a-f] $appendAccumulatorHexL | [A-F] $appendAccumulatorHexU){0,2};
- escapedCtrl = '\\c' any?;
- escapedUnsupported = '\\' [NluLU];
- repeatNM1 = '\{' @clearNM [0-9]+ $appendN ('}' @{repeatM = repeatN;} | ',' '\}' @{repeatM = ComponentRepeat::NoLimit;} | ',' [0-9]+ $appendM '}');
-
- backReferenceG = '\\g' @clearAccumulator [0-9]{1,3} $appendAccumulatorDecDigit;
- backReferenceGNegative = '\\g-' @clearAccumulator [0-9]{1,3} $appendAccumulatorDecDigit;
- backReferenceGBracket = '\\g{' @clearAccumulator [0-9]{1,3} $appendAccumulatorDecDigit '}';
- backReferenceGBracket2 = '\\g{-' @clearAccumulator [0-9]{1,3} $appendAccumulatorDecDigit '}';
- backReferenceGBracketName = '\\g{' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '}';
- backReferenceKBracketName = '\\k{' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '}';
- backReferenceKBracketName2 = '\\k<' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '>';
- backReferenceKBracketName3 = '\\k\'' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '\'';
- backReferenceP = '(?P=' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter ')';
-
- namedGroup1 = '(?<' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '>';
- namedGroup2 = '(?\'' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '\'';
- namedGroup3 = '(?P<' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '>';
-
- namedConditionalRef1 = '(?(<' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '>)';
- namedConditionalRef2 = '(?(\'' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '\')';
- namedConditionalRef3 = '(?(' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter ')';
-
- numberedSubExpression = '(?' [+\-]? [0-9]+ ')';
- namedSubExpression = '(?' ('&'|'P>') [A-Za-z0-9_]+ ')';
-
- positiveMatchModifiers = [imsx]+ $modifyMatchPositive;
- negativeMatchModifiers = '-' [imsx]+ $modifyMatchNegative;
- matchModifiers = positiveMatchModifiers ? negativeMatchModifiers ?;
-
- utf8_cont = 0x80..0xbf;
- utf8_2c = 0xc0..0xdf utf8_cont;
- utf8_3c = 0xe0..0xef utf8_cont utf8_cont;
- utf8_4c = 0xf0..0xf7 utf8_cont utf8_cont utf8_cont;
- hi_byte = 0x80..0xff;
-
- whitespace = [\t\n\v\f\r ];
-
- #############################################################
- # Trivial parser to read Perl 5.10+ control verbs, introduced
- # by '(*'.
- #############################################################
- readVerb := |*
- 'UTF8)' => {
+ throw ParseError(str.str());
+ }
+ action unsupportedProperty {
+ throw LocatedParseError("Character property not supported");
+ }
+ action clearLabel { label.clear();}
+ action appendLabelCharacter { label.push_back(fc);}
+ action clearOctAccumulator { octAccumulator = 0;}
+ action clearAccumulator { accumulator = 0;}
+ action setOctAccumulator {
+ octAccumulator = 0;
+ pushOct(&octAccumulator, fc);
+ }
+ action setDecAccumulator {
+ accumulator = 0;
+ pushDec(&accumulator, fc);
+ }
+ action clearNM { repeatN = 0; repeatM = 0; }
+ action appendN { pushDec(&repeatN, fc); }
+ action appendM { pushDec(&repeatM, fc); }
+ action appendAccumulatorOctDigit { pushOct(&octAccumulator, fc); }
+ action appendAccumulatorDecDigit { pushDec(&accumulator, fc); }
+ action appendAccumulatorHexDigit {
+ accumulator *= 16;
+ accumulator += fc - '0';
+ }
+ action appendAccumulatorHexL {
+ accumulator *= 16;
+ accumulator += 10 + fc - 'a';
+ }
+ action appendAccumulatorHexU {
+ accumulator *= 16;
+ accumulator += 10 + fc - 'A';
+ }
+
+ # enter a comment group, where we just scan for a close paren.
+ action enterComment {
+ inComment = true;
+ fgoto readComment;
+ }
+
+ # enter an extended mode comment, where we just scan for a newline.
+ action enterNewlineTerminatedComment {
+ inComment = true;
+ fgoto readNewlineTerminatedComment;
+ }
+
+ # enter a CAPTURING group ( e.g. '(blah)' )
+ action enterCapturingGroup {
+ PUSH_SEQUENCE;
+ auto seq = ue2::make_unique<ComponentSequence>();
+ seq->setCaptureIndex(groupIndex++);
+ currentSeq = enterSequence(currentSeq, move(seq));
+ }
+
+ # enter a NAMED CAPTURING group ( e.g. (?'<hatstand>blah) )
+ action enterNamedGroup {
+ assert(!label.empty()); // should be guaranteed by machine
+ char c = *label.begin();
+ if (c >= '0' && c <= '9') {
+ throw LocatedParseError("Group name cannot begin with a digit");
+ }
+ if (!groupNames.insert(label).second) {
+ throw LocatedParseError("Two named subpatterns use the name '" + label + "'");
+ }
+ PUSH_SEQUENCE;
+ auto seq = ue2::make_unique<ComponentSequence>();
+ seq->setCaptureIndex(groupIndex++);
+ seq->setCaptureName(label);
+ currentSeq = enterSequence(currentSeq, move(seq));
+ }
+
+ # enter a NON-CAPTURING group where we're modifying flags
+ # ( e.g. '(?i:blah)' ). Standard non-capturing groups use this path
+ # as well.
+ action enterModifiedGroup {
+ PUSH_SEQUENCE;
+ mode = newMode;
+ currentSeq =
+ enterSequence(currentSeq, ue2::make_unique<ComponentSequence>());
+ }
+
+ action exitGroup {
+ if (sequences.empty()) {
+ throw LocatedParseError("Unmatched parentheses");
+ }
+ currentSeq->finalize();
+ POP_SEQUENCE;
+ }
+ action enterZWLookAhead {
+ PUSH_SEQUENCE;
+ currentSeq = enterSequence(currentSeq,
+ ue2::make_unique<ComponentAssertion>(ComponentAssertion::LOOKAHEAD,
+ ComponentAssertion::POS));
+ }
+ action enterZWNegLookAhead {
+ PUSH_SEQUENCE;
+ currentSeq = enterSequence(currentSeq,
+ ue2::make_unique<ComponentAssertion>(ComponentAssertion::LOOKAHEAD,
+ ComponentAssertion::NEG));
+ }
+ action enterZWLookBehind {
+ PUSH_SEQUENCE;
+ currentSeq = enterSequence(currentSeq,
+ ue2::make_unique<ComponentAssertion>(ComponentAssertion::LOOKBEHIND,
+ ComponentAssertion::POS));
+ }
+ action enterZWNegLookBehind {
+ PUSH_SEQUENCE;
+ currentSeq = enterSequence(currentSeq,
+ ue2::make_unique<ComponentAssertion>(ComponentAssertion::LOOKBEHIND,
+ ComponentAssertion::NEG));
+ }
+ action enterEmbeddedCode {
+ throw LocatedParseError("Embedded code is not supported");
+ }
+ action enterConditionUnsupported {
+ throw LocatedParseError("Conditional subpattern unsupported");
+ }
+ action enterReferenceUnsupported {
+ throw LocatedParseError("Subpattern reference unsupported");
+ }
+ action enterNumberedConditionalRef {
+ if (accumulator == 0) {
+ throw LocatedParseError("Numbered reference cannot be zero");
+ }
+ PUSH_SEQUENCE;
+ currentSeq = enterSequence(currentSeq,
+ ue2::make_unique<ComponentCondReference>(accumulator));
+ }
+ action enterNamedConditionalRef {
+ PUSH_SEQUENCE;
+ assert(!label.empty());
+ currentSeq = enterSequence(currentSeq,
+ ue2::make_unique<ComponentCondReference>(label));
+ }
+ action enterAtomicGroup {
+ PUSH_SEQUENCE;
+ currentSeq = enterSequence(currentSeq,
+ ue2::make_unique<ComponentAtomicGroup>());
+ }
+ action eatClass {
+ assert(!currentCls);
+ assert(!inCharClass); // not reentrant
+ currentCls = getComponentClass(mode);
+ inCharClass = true;
+ inCharClassEarly = true;
+ currentClsBegin = ts;
+ fgoto readClass;
+ }
+ action resetModifiers {
+ newMode = mode;
+ }
+ action applyModifiers {
+ mode = newMode;
+ currentSeq->addComponent(ue2::make_unique<ComponentEmpty>());
+ }
+ action modifyMatchPositive {
+ switch (fc) {
+ case 'i':
+ newMode.caseless = true;
+ break;
+ case 'm':
+ newMode.multiline = true;
+ break;
+ case 's':
+ newMode.dotall = true;
+ break;
+ case 'x':
+ newMode.ignore_space = true;
+ break;
+ default:
+ assert(0); // this action only called for [imsx]
+ break;
+ }
+ }
+ action modifyMatchNegative {
+ switch (fc) {
+ case 'i':
+ newMode.caseless = false;
+ break;
+ case 'm':
+ newMode.multiline = false;
+ break;
+ case 's':
+ newMode.dotall = false;
+ break;
+ case 'x':
+ newMode.ignore_space = false;
+ break;
+ default:
+ assert(0); // this action only called for [imsx]
+ break;
+ }
+ }
+ action is_utf8 { mode.utf8 }
+ action is_ignore_space { mode.ignore_space }
+ action is_early_charclass { inCharClassEarly }
+
+ action addNumberedBackRef {
+ if (accumulator == 0) {
+ throw LocatedParseError("Numbered reference cannot be zero");
+ }
+ currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(accumulator));
+ }
+
+ action addNegativeNumberedBackRef {
+ // Accumulator is a negative offset.
+ if (accumulator == 0) {
+ throw LocatedParseError("Numbered reference cannot be zero");
+ }
+ if (accumulator >= groupIndex) {
+ throw LocatedParseError("Invalid reference");
+ }
+ unsigned idx = groupIndex - accumulator;
+ currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(idx));
+ }
+
+ action addNamedBackRef {
+ currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(label));
+ }
+
+ escapedOctal0 = '\\0' @clearOctAccumulator [0-7]{0,2} $appendAccumulatorOctDigit;
+ escapedOctal2 = '\\' [1-7] $setOctAccumulator [0-7]{1,2} $appendAccumulatorOctDigit;
+ escapedOctal2c = '\\' [1-7] $setOctAccumulator [0-7]{0,2} $appendAccumulatorOctDigit;
+ backRefIdSingle = [1-7] $setDecAccumulator;
+ backRefId = [1-9] $setDecAccumulator [0-9]+ $appendAccumulatorDecDigit;
+ escapedHex = '\\x' @clearAccumulator ([0-9] $appendAccumulatorHexDigit | [a-f] $appendAccumulatorHexL | [A-F] $appendAccumulatorHexU){0,2};
+ escapedCtrl = '\\c' any?;
+ escapedUnsupported = '\\' [NluLU];
+ repeatNM1 = '\{' @clearNM [0-9]+ $appendN ('}' @{repeatM = repeatN;} | ',' '\}' @{repeatM = ComponentRepeat::NoLimit;} | ',' [0-9]+ $appendM '}');
+
+ backReferenceG = '\\g' @clearAccumulator [0-9]{1,3} $appendAccumulatorDecDigit;
+ backReferenceGNegative = '\\g-' @clearAccumulator [0-9]{1,3} $appendAccumulatorDecDigit;
+ backReferenceGBracket = '\\g{' @clearAccumulator [0-9]{1,3} $appendAccumulatorDecDigit '}';
+ backReferenceGBracket2 = '\\g{-' @clearAccumulator [0-9]{1,3} $appendAccumulatorDecDigit '}';
+ backReferenceGBracketName = '\\g{' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '}';
+ backReferenceKBracketName = '\\k{' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '}';
+ backReferenceKBracketName2 = '\\k<' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '>';
+ backReferenceKBracketName3 = '\\k\'' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '\'';
+ backReferenceP = '(?P=' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter ')';
+
+ namedGroup1 = '(?<' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '>';
+ namedGroup2 = '(?\'' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '\'';
+ namedGroup3 = '(?P<' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '>';
+
+ namedConditionalRef1 = '(?(<' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '>)';
+ namedConditionalRef2 = '(?(\'' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '\')';
+ namedConditionalRef3 = '(?(' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter ')';
+
+ numberedSubExpression = '(?' [+\-]? [0-9]+ ')';
+ namedSubExpression = '(?' ('&'|'P>') [A-Za-z0-9_]+ ')';
+
+ positiveMatchModifiers = [imsx]+ $modifyMatchPositive;
+ negativeMatchModifiers = '-' [imsx]+ $modifyMatchNegative;
+ matchModifiers = positiveMatchModifiers ? negativeMatchModifiers ?;
+
+ utf8_cont = 0x80..0xbf;
+ utf8_2c = 0xc0..0xdf utf8_cont;
+ utf8_3c = 0xe0..0xef utf8_cont utf8_cont;
+ utf8_4c = 0xf0..0xf7 utf8_cont utf8_cont utf8_cont;
+ hi_byte = 0x80..0xff;
+
+ whitespace = [\t\n\v\f\r ];
+
+ #############################################################
+ # Trivial parser to read Perl 5.10+ control verbs, introduced
+ # by '(*'.
+ #############################################################
+ readVerb := |*
+ 'UTF8)' => {
throw LocatedParseError("(*UTF8) must be at start of "
"expression, encountered");
- };
+ };
'UTF)' => {
throw LocatedParseError("(*UTF) must be at start of "
"expression, encountered");
};
- 'UCP)' => {
+ 'UCP)' => {
throw LocatedParseError("(*UCP) must be at start of "
"expression, encountered");
- };
+ };
# Use the control verb mini-parser to report an error for this
# unsupported/unknown verb.
[^)]+ ')' => {
@@ -568,414 +568,414 @@ unichar readUtf8CodePoint4c(const char *s) {
read_control_verbs(ts - 2, te, (ts - 2 - ptr), temp_mode);
assert(0); // Should have thrown a parse error.
throw LocatedParseError("Unknown control verb");
- };
- any => {
- throw LocatedParseError("Unknown control verb");
- };
- *|;
-
- #############################################################
- # Parser to read UCP
- #############################################################
- readUCP := |*
- 'C' => { currentCls->add(CLASS_UCP_C, negated); fret; };
- 'Cc' => { currentCls->add(CLASS_UCP_CC, negated); fret; };
- 'Cf' => { currentCls->add(CLASS_UCP_CF, negated); fret; };
- 'Cn' => { currentCls->add(CLASS_UCP_CN, negated); fret; };
- 'Co' => { currentCls->add(CLASS_UCP_CO, negated); fret; };
- 'Cs' => { currentCls->add(CLASS_UCP_CS, negated); fret; };
- 'L' => { currentCls->add(CLASS_UCP_L, negated); fret; };
- 'Ll' => { currentCls->add(CLASS_UCP_LL, negated); fret; };
- 'Lm' => { currentCls->add(CLASS_UCP_LM, negated); fret; };
- 'Lo' => { currentCls->add(CLASS_UCP_LO, negated); fret; };
- 'Lt' => { currentCls->add(CLASS_UCP_LT, negated); fret; };
- 'Lu' => { currentCls->add(CLASS_UCP_LU, negated); fret; };
- 'L&' => { currentCls->add(CLASS_UCP_L_AND, negated); fret; };
- 'M' => { currentCls->add(CLASS_UCP_M, negated); fret; };
- 'Mc' => { currentCls->add(CLASS_UCP_MC, negated); fret; };
- 'Me' => { currentCls->add(CLASS_UCP_ME, negated); fret; };
- 'Mn' => { currentCls->add(CLASS_UCP_MN, negated); fret; };
- 'N' => { currentCls->add(CLASS_UCP_N, negated); fret; };
- 'Nd' => { currentCls->add(CLASS_UCP_ND, negated); fret; };
- 'Nl' => { currentCls->add(CLASS_UCP_NL, negated); fret; };
- 'No' => { currentCls->add(CLASS_UCP_NO, negated); fret; };
- 'P' => { currentCls->add(CLASS_UCP_P, negated); fret; };
- 'Pc' => { currentCls->add(CLASS_UCP_PC, negated); fret; };
- 'Pd' => { currentCls->add(CLASS_UCP_PD, negated); fret; };
- 'Pe' => { currentCls->add(CLASS_UCP_PE, negated); fret; };
- 'Pf' => { currentCls->add(CLASS_UCP_PF, negated); fret; };
- 'Pi' => { currentCls->add(CLASS_UCP_PI, negated); fret; };
- 'Po' => { currentCls->add(CLASS_UCP_PO, negated); fret; };
- 'Ps' => { currentCls->add(CLASS_UCP_PS, negated); fret; };
- 'S' => { currentCls->add(CLASS_UCP_S, negated); fret; };
- 'Sc' => { currentCls->add(CLASS_UCP_SC, negated); fret; };
- 'Sk' => { currentCls->add(CLASS_UCP_SK, negated); fret; };
- 'Sm' => { currentCls->add(CLASS_UCP_SM, negated); fret; };
- 'So' => { currentCls->add(CLASS_UCP_SO, negated); fret; };
- 'Z' => { currentCls->add(CLASS_UCP_Z, negated); fret; };
- 'Zl' => { currentCls->add(CLASS_UCP_ZL, negated); fret; };
- 'Zp' => { currentCls->add(CLASS_UCP_ZP, negated); fret; };
- 'Zs' => { currentCls->add(CLASS_UCP_ZS, negated); fret; };
- 'Xan' => { currentCls->add(CLASS_UCP_XAN, negated); fret; };
- 'Xps' => { currentCls->add(CLASS_UCP_XPS, negated); fret; };
- 'Xsp' => { currentCls->add(CLASS_UCP_XSP, negated); fret; };
- 'Xwd' => { currentCls->add(CLASS_UCP_XWD, negated); fret; };
- 'Arabic' => { currentCls->add(CLASS_SCRIPT_ARABIC, negated); fret; };
- 'Armenian' => { currentCls->add(CLASS_SCRIPT_ARMENIAN, negated); fret; };
- 'Avestan' => { currentCls->add(CLASS_SCRIPT_AVESTAN, negated); fret; };
- 'Balinese' => { currentCls->add(CLASS_SCRIPT_BALINESE, negated); fret; };
- 'Bamum' => { currentCls->add(CLASS_SCRIPT_BAMUM, negated); fret; };
- 'Batak' => { currentCls->add(CLASS_SCRIPT_BATAK, negated); fret; };
- 'Bengali' => { currentCls->add(CLASS_SCRIPT_BENGALI, negated); fret; };
- 'Bopomofo' => { currentCls->add(CLASS_SCRIPT_BOPOMOFO, negated); fret; };
- 'Brahmi' => { currentCls->add(CLASS_SCRIPT_BRAHMI, negated); fret; };
- 'Braille' => { currentCls->add(CLASS_SCRIPT_BRAILLE, negated); fret; };
- 'Buginese' => { currentCls->add(CLASS_SCRIPT_BUGINESE, negated); fret; };
- 'Buhid' => { currentCls->add(CLASS_SCRIPT_BUHID, negated); fret; };
- 'Canadian_Aboriginal' => { currentCls->add(CLASS_SCRIPT_CANADIAN_ABORIGINAL, negated); fret; };
- 'Carian' => { currentCls->add(CLASS_SCRIPT_CARIAN, negated); fret; };
- 'Cham' => { currentCls->add(CLASS_SCRIPT_CHAM, negated); fret; };
- 'Cherokee' => { currentCls->add(CLASS_SCRIPT_CHEROKEE, negated); fret; };
- 'Common' => { currentCls->add(CLASS_SCRIPT_COMMON, negated); fret; };
- 'Coptic' => { currentCls->add(CLASS_SCRIPT_COPTIC, negated); fret; };
- 'Cuneiform' => { currentCls->add(CLASS_SCRIPT_CUNEIFORM, negated); fret; };
- 'Cypriot' => { currentCls->add(CLASS_SCRIPT_CYPRIOT, negated); fret; };
- 'Cyrillic' => { currentCls->add(CLASS_SCRIPT_CYRILLIC, negated); fret; };
- 'Deseret' => { currentCls->add(CLASS_SCRIPT_DESERET, negated); fret; };
- 'Devanagari' => { currentCls->add(CLASS_SCRIPT_DEVANAGARI, negated); fret; };
- 'Egyptian_Hieroglyphs' => { currentCls->add(CLASS_SCRIPT_EGYPTIAN_HIEROGLYPHS, negated); fret; };
- 'Ethiopic' => { currentCls->add(CLASS_SCRIPT_ETHIOPIC, negated); fret; };
- 'Georgian' => { currentCls->add(CLASS_SCRIPT_GEORGIAN, negated); fret; };
- 'Glagolitic' => { currentCls->add(CLASS_SCRIPT_GLAGOLITIC, negated); fret; };
- 'Gothic' => { currentCls->add(CLASS_SCRIPT_GOTHIC, negated); fret; };
- 'Greek' => { currentCls->add(CLASS_SCRIPT_GREEK, negated); fret; };
- 'Gujarati' => { currentCls->add(CLASS_SCRIPT_GUJARATI, negated); fret; };
- 'Gurmukhi' => { currentCls->add(CLASS_SCRIPT_GURMUKHI, negated); fret; };
- 'Han' => { currentCls->add(CLASS_SCRIPT_HAN, negated); fret; };
- 'Hangul' => { currentCls->add(CLASS_SCRIPT_HANGUL, negated); fret; };
- 'Hanunoo' => { currentCls->add(CLASS_SCRIPT_HANUNOO, negated); fret; };
- 'Hebrew' => { currentCls->add(CLASS_SCRIPT_HEBREW, negated); fret; };
- 'Hiragana' => { currentCls->add(CLASS_SCRIPT_HIRAGANA, negated); fret; };
- 'Imperial_Aramaic' => { currentCls->add(CLASS_SCRIPT_IMPERIAL_ARAMAIC, negated); fret; };
- 'Inherited' => { currentCls->add(CLASS_SCRIPT_INHERITED, negated); fret; };
- 'Inscriptional_Pahlavi' => { currentCls->add(CLASS_SCRIPT_INSCRIPTIONAL_PAHLAVI, negated); fret; };
- 'Inscriptional_Parthian' => { currentCls->add(CLASS_SCRIPT_INSCRIPTIONAL_PARTHIAN, negated); fret; };
- 'Javanese' => { currentCls->add(CLASS_SCRIPT_JAVANESE, negated); fret; };
- 'Kaithi' => { currentCls->add(CLASS_SCRIPT_KAITHI, negated); fret; };
- 'Kannada' => { currentCls->add(CLASS_SCRIPT_KANNADA, negated); fret; };
- 'Katakana' => { currentCls->add(CLASS_SCRIPT_KATAKANA, negated); fret; };
- 'Kayah_Li' => { currentCls->add(CLASS_SCRIPT_KAYAH_LI, negated); fret; };
- 'Kharoshthi' => { currentCls->add(CLASS_SCRIPT_KHAROSHTHI, negated); fret; };
- 'Khmer' => { currentCls->add(CLASS_SCRIPT_KHMER, negated); fret; };
- 'Lao' => { currentCls->add(CLASS_SCRIPT_LAO, negated); fret; };
- 'Latin' => { currentCls->add(CLASS_SCRIPT_LATIN, negated); fret; };
- 'Lepcha' => { currentCls->add(CLASS_SCRIPT_LEPCHA, negated); fret; };
- 'Limbu' => { currentCls->add(CLASS_SCRIPT_LIMBU, negated); fret; };
- 'Linear_B' => { currentCls->add(CLASS_SCRIPT_LINEAR_B, negated); fret; };
- 'Lisu' => { currentCls->add(CLASS_SCRIPT_LISU, negated); fret; };
- 'Lycian' => { currentCls->add(CLASS_SCRIPT_LYCIAN, negated); fret; };
- 'Lydian' => { currentCls->add(CLASS_SCRIPT_LYDIAN, negated); fret; };
- 'Malayalam' => { currentCls->add(CLASS_SCRIPT_MALAYALAM, negated); fret; };
- 'Mandaic' => { currentCls->add(CLASS_SCRIPT_MANDAIC, negated); fret; };
- 'Meetei_Mayek' => { currentCls->add(CLASS_SCRIPT_MEETEI_MAYEK, negated); fret; };
- 'Mongolian' => { currentCls->add(CLASS_SCRIPT_MONGOLIAN, negated); fret; };
- 'Myanmar' => { currentCls->add(CLASS_SCRIPT_MYANMAR, negated); fret; };
- 'New_Tai_Lue' => { currentCls->add(CLASS_SCRIPT_NEW_TAI_LUE, negated); fret; };
- 'Nko' => { currentCls->add(CLASS_SCRIPT_NKO, negated); fret; };
- 'Ogham' => { currentCls->add(CLASS_SCRIPT_OGHAM, negated); fret; };
- 'Ol_Chiki' => { currentCls->add(CLASS_SCRIPT_OL_CHIKI, negated); fret; };
- 'Old_Italic' => { currentCls->add(CLASS_SCRIPT_OLD_ITALIC, negated); fret; };
- 'Old_Persian' => { currentCls->add(CLASS_SCRIPT_OLD_PERSIAN, negated); fret; };
- 'Old_South_Arabian' => { currentCls->add(CLASS_SCRIPT_OLD_SOUTH_ARABIAN, negated); fret; };
- 'Old_Turkic' => { currentCls->add(CLASS_SCRIPT_OLD_TURKIC, negated); fret; };
- 'Oriya' => { currentCls->add(CLASS_SCRIPT_ORIYA, negated); fret; };
- 'Osmanya' => { currentCls->add(CLASS_SCRIPT_OSMANYA, negated); fret; };
- 'Phags_Pa' => { currentCls->add(CLASS_SCRIPT_PHAGS_PA, negated); fret; };
- 'Phoenician' => { currentCls->add(CLASS_SCRIPT_PHOENICIAN, negated); fret; };
- 'Rejang' => { currentCls->add(CLASS_SCRIPT_REJANG, negated); fret; };
- 'Runic' => { currentCls->add(CLASS_SCRIPT_RUNIC, negated); fret; };
- 'Samaritan' => { currentCls->add(CLASS_SCRIPT_SAMARITAN, negated); fret; };
- 'Saurashtra' => { currentCls->add(CLASS_SCRIPT_SAURASHTRA, negated); fret; };
- 'Shavian' => { currentCls->add(CLASS_SCRIPT_SHAVIAN, negated); fret; };
- 'Sinhala' => { currentCls->add(CLASS_SCRIPT_SINHALA, negated); fret; };
- 'Sundanese' => { currentCls->add(CLASS_SCRIPT_SUNDANESE, negated); fret; };
- 'Syloti_Nagri' => { currentCls->add(CLASS_SCRIPT_SYLOTI_NAGRI, negated); fret; };
- 'Syriac' => { currentCls->add(CLASS_SCRIPT_SYRIAC, negated); fret; };
- 'Tagalog' => { currentCls->add(CLASS_SCRIPT_TAGALOG, negated); fret; };
- 'Tagbanwa' => { currentCls->add(CLASS_SCRIPT_TAGBANWA, negated); fret; };
- 'Tai_Le' => { currentCls->add(CLASS_SCRIPT_TAI_LE, negated); fret; };
- 'Tai_Tham' => { currentCls->add(CLASS_SCRIPT_TAI_THAM, negated); fret; };
- 'Tai_Viet' => { currentCls->add(CLASS_SCRIPT_TAI_VIET, negated); fret; };
- 'Tamil' => { currentCls->add(CLASS_SCRIPT_TAMIL, negated); fret; };
- 'Telugu' => { currentCls->add(CLASS_SCRIPT_TELUGU, negated); fret; };
- 'Thaana' => { currentCls->add(CLASS_SCRIPT_THAANA, negated); fret; };
- 'Thai' => { currentCls->add(CLASS_SCRIPT_THAI, negated); fret; };
- 'Tibetan' => { currentCls->add(CLASS_SCRIPT_TIBETAN, negated); fret; };
- 'Tifinagh' => { currentCls->add(CLASS_SCRIPT_TIFINAGH, negated); fret; };
- 'Ugaritic' => { currentCls->add(CLASS_SCRIPT_UGARITIC, negated); fret; };
- 'Vai' => { currentCls->add(CLASS_SCRIPT_VAI, negated); fret; };
- 'Yi' => { currentCls->add(CLASS_SCRIPT_YI, negated); fret; };
- 'Any' => { currentCls->add(CLASS_UCP_ANY, negated); fret; };
- any => { throw LocatedParseError("Unknown property"); };
- *|;
-
- readBracedUCP := ('{'
- ('^' ${ negated = !negated; }) ?
- ([^^] ${ fhold; fcall readUCP; })
- '}' ${ if (!inCharClass) { // not inside [..]
- currentCls->finalize();
- currentSeq->addComponent(move(currentCls));
- }
- fret;
- })
- $^{ throw LocatedParseError("Malformed property"); };
-
- readUCPSingle := |*
- 'C' => {
- currentCls->add(CLASS_UCP_C, negated);
- if (!inCharClass) {
- currentCls->finalize();
- currentSeq->addComponent(move(currentCls));
- }
- fret;
- };
- 'L' => {
- currentCls->add(CLASS_UCP_L, negated);
- if (!inCharClass) {
- currentCls->finalize();
- currentSeq->addComponent(move(currentCls));
- }
- fret;
- };
- 'M' => {
- currentCls->add(CLASS_UCP_M, negated);
- if (!inCharClass) {
- currentCls->finalize();
- currentSeq->addComponent(move(currentCls));
- }
- fret;
- };
- 'N' => {
- currentCls->add(CLASS_UCP_N, negated);
- if (!inCharClass) {
- currentCls->finalize();
- currentSeq->addComponent(move(currentCls));
- }
+ };
+ any => {
+ throw LocatedParseError("Unknown control verb");
+ };
+ *|;
+
+ #############################################################
+ # Parser to read UCP
+ #############################################################
+ readUCP := |*
+ 'C' => { currentCls->add(CLASS_UCP_C, negated); fret; };
+ 'Cc' => { currentCls->add(CLASS_UCP_CC, negated); fret; };
+ 'Cf' => { currentCls->add(CLASS_UCP_CF, negated); fret; };
+ 'Cn' => { currentCls->add(CLASS_UCP_CN, negated); fret; };
+ 'Co' => { currentCls->add(CLASS_UCP_CO, negated); fret; };
+ 'Cs' => { currentCls->add(CLASS_UCP_CS, negated); fret; };
+ 'L' => { currentCls->add(CLASS_UCP_L, negated); fret; };
+ 'Ll' => { currentCls->add(CLASS_UCP_LL, negated); fret; };
+ 'Lm' => { currentCls->add(CLASS_UCP_LM, negated); fret; };
+ 'Lo' => { currentCls->add(CLASS_UCP_LO, negated); fret; };
+ 'Lt' => { currentCls->add(CLASS_UCP_LT, negated); fret; };
+ 'Lu' => { currentCls->add(CLASS_UCP_LU, negated); fret; };
+ 'L&' => { currentCls->add(CLASS_UCP_L_AND, negated); fret; };
+ 'M' => { currentCls->add(CLASS_UCP_M, negated); fret; };
+ 'Mc' => { currentCls->add(CLASS_UCP_MC, negated); fret; };
+ 'Me' => { currentCls->add(CLASS_UCP_ME, negated); fret; };
+ 'Mn' => { currentCls->add(CLASS_UCP_MN, negated); fret; };
+ 'N' => { currentCls->add(CLASS_UCP_N, negated); fret; };
+ 'Nd' => { currentCls->add(CLASS_UCP_ND, negated); fret; };
+ 'Nl' => { currentCls->add(CLASS_UCP_NL, negated); fret; };
+ 'No' => { currentCls->add(CLASS_UCP_NO, negated); fret; };
+ 'P' => { currentCls->add(CLASS_UCP_P, negated); fret; };
+ 'Pc' => { currentCls->add(CLASS_UCP_PC, negated); fret; };
+ 'Pd' => { currentCls->add(CLASS_UCP_PD, negated); fret; };
+ 'Pe' => { currentCls->add(CLASS_UCP_PE, negated); fret; };
+ 'Pf' => { currentCls->add(CLASS_UCP_PF, negated); fret; };
+ 'Pi' => { currentCls->add(CLASS_UCP_PI, negated); fret; };
+ 'Po' => { currentCls->add(CLASS_UCP_PO, negated); fret; };
+ 'Ps' => { currentCls->add(CLASS_UCP_PS, negated); fret; };
+ 'S' => { currentCls->add(CLASS_UCP_S, negated); fret; };
+ 'Sc' => { currentCls->add(CLASS_UCP_SC, negated); fret; };
+ 'Sk' => { currentCls->add(CLASS_UCP_SK, negated); fret; };
+ 'Sm' => { currentCls->add(CLASS_UCP_SM, negated); fret; };
+ 'So' => { currentCls->add(CLASS_UCP_SO, negated); fret; };
+ 'Z' => { currentCls->add(CLASS_UCP_Z, negated); fret; };
+ 'Zl' => { currentCls->add(CLASS_UCP_ZL, negated); fret; };
+ 'Zp' => { currentCls->add(CLASS_UCP_ZP, negated); fret; };
+ 'Zs' => { currentCls->add(CLASS_UCP_ZS, negated); fret; };
+ 'Xan' => { currentCls->add(CLASS_UCP_XAN, negated); fret; };
+ 'Xps' => { currentCls->add(CLASS_UCP_XPS, negated); fret; };
+ 'Xsp' => { currentCls->add(CLASS_UCP_XSP, negated); fret; };
+ 'Xwd' => { currentCls->add(CLASS_UCP_XWD, negated); fret; };
+ 'Arabic' => { currentCls->add(CLASS_SCRIPT_ARABIC, negated); fret; };
+ 'Armenian' => { currentCls->add(CLASS_SCRIPT_ARMENIAN, negated); fret; };
+ 'Avestan' => { currentCls->add(CLASS_SCRIPT_AVESTAN, negated); fret; };
+ 'Balinese' => { currentCls->add(CLASS_SCRIPT_BALINESE, negated); fret; };
+ 'Bamum' => { currentCls->add(CLASS_SCRIPT_BAMUM, negated); fret; };
+ 'Batak' => { currentCls->add(CLASS_SCRIPT_BATAK, negated); fret; };
+ 'Bengali' => { currentCls->add(CLASS_SCRIPT_BENGALI, negated); fret; };
+ 'Bopomofo' => { currentCls->add(CLASS_SCRIPT_BOPOMOFO, negated); fret; };
+ 'Brahmi' => { currentCls->add(CLASS_SCRIPT_BRAHMI, negated); fret; };
+ 'Braille' => { currentCls->add(CLASS_SCRIPT_BRAILLE, negated); fret; };
+ 'Buginese' => { currentCls->add(CLASS_SCRIPT_BUGINESE, negated); fret; };
+ 'Buhid' => { currentCls->add(CLASS_SCRIPT_BUHID, negated); fret; };
+ 'Canadian_Aboriginal' => { currentCls->add(CLASS_SCRIPT_CANADIAN_ABORIGINAL, negated); fret; };
+ 'Carian' => { currentCls->add(CLASS_SCRIPT_CARIAN, negated); fret; };
+ 'Cham' => { currentCls->add(CLASS_SCRIPT_CHAM, negated); fret; };
+ 'Cherokee' => { currentCls->add(CLASS_SCRIPT_CHEROKEE, negated); fret; };
+ 'Common' => { currentCls->add(CLASS_SCRIPT_COMMON, negated); fret; };
+ 'Coptic' => { currentCls->add(CLASS_SCRIPT_COPTIC, negated); fret; };
+ 'Cuneiform' => { currentCls->add(CLASS_SCRIPT_CUNEIFORM, negated); fret; };
+ 'Cypriot' => { currentCls->add(CLASS_SCRIPT_CYPRIOT, negated); fret; };
+ 'Cyrillic' => { currentCls->add(CLASS_SCRIPT_CYRILLIC, negated); fret; };
+ 'Deseret' => { currentCls->add(CLASS_SCRIPT_DESERET, negated); fret; };
+ 'Devanagari' => { currentCls->add(CLASS_SCRIPT_DEVANAGARI, negated); fret; };
+ 'Egyptian_Hieroglyphs' => { currentCls->add(CLASS_SCRIPT_EGYPTIAN_HIEROGLYPHS, negated); fret; };
+ 'Ethiopic' => { currentCls->add(CLASS_SCRIPT_ETHIOPIC, negated); fret; };
+ 'Georgian' => { currentCls->add(CLASS_SCRIPT_GEORGIAN, negated); fret; };
+ 'Glagolitic' => { currentCls->add(CLASS_SCRIPT_GLAGOLITIC, negated); fret; };
+ 'Gothic' => { currentCls->add(CLASS_SCRIPT_GOTHIC, negated); fret; };
+ 'Greek' => { currentCls->add(CLASS_SCRIPT_GREEK, negated); fret; };
+ 'Gujarati' => { currentCls->add(CLASS_SCRIPT_GUJARATI, negated); fret; };
+ 'Gurmukhi' => { currentCls->add(CLASS_SCRIPT_GURMUKHI, negated); fret; };
+ 'Han' => { currentCls->add(CLASS_SCRIPT_HAN, negated); fret; };
+ 'Hangul' => { currentCls->add(CLASS_SCRIPT_HANGUL, negated); fret; };
+ 'Hanunoo' => { currentCls->add(CLASS_SCRIPT_HANUNOO, negated); fret; };
+ 'Hebrew' => { currentCls->add(CLASS_SCRIPT_HEBREW, negated); fret; };
+ 'Hiragana' => { currentCls->add(CLASS_SCRIPT_HIRAGANA, negated); fret; };
+ 'Imperial_Aramaic' => { currentCls->add(CLASS_SCRIPT_IMPERIAL_ARAMAIC, negated); fret; };
+ 'Inherited' => { currentCls->add(CLASS_SCRIPT_INHERITED, negated); fret; };
+ 'Inscriptional_Pahlavi' => { currentCls->add(CLASS_SCRIPT_INSCRIPTIONAL_PAHLAVI, negated); fret; };
+ 'Inscriptional_Parthian' => { currentCls->add(CLASS_SCRIPT_INSCRIPTIONAL_PARTHIAN, negated); fret; };
+ 'Javanese' => { currentCls->add(CLASS_SCRIPT_JAVANESE, negated); fret; };
+ 'Kaithi' => { currentCls->add(CLASS_SCRIPT_KAITHI, negated); fret; };
+ 'Kannada' => { currentCls->add(CLASS_SCRIPT_KANNADA, negated); fret; };
+ 'Katakana' => { currentCls->add(CLASS_SCRIPT_KATAKANA, negated); fret; };
+ 'Kayah_Li' => { currentCls->add(CLASS_SCRIPT_KAYAH_LI, negated); fret; };
+ 'Kharoshthi' => { currentCls->add(CLASS_SCRIPT_KHAROSHTHI, negated); fret; };
+ 'Khmer' => { currentCls->add(CLASS_SCRIPT_KHMER, negated); fret; };
+ 'Lao' => { currentCls->add(CLASS_SCRIPT_LAO, negated); fret; };
+ 'Latin' => { currentCls->add(CLASS_SCRIPT_LATIN, negated); fret; };
+ 'Lepcha' => { currentCls->add(CLASS_SCRIPT_LEPCHA, negated); fret; };
+ 'Limbu' => { currentCls->add(CLASS_SCRIPT_LIMBU, negated); fret; };
+ 'Linear_B' => { currentCls->add(CLASS_SCRIPT_LINEAR_B, negated); fret; };
+ 'Lisu' => { currentCls->add(CLASS_SCRIPT_LISU, negated); fret; };
+ 'Lycian' => { currentCls->add(CLASS_SCRIPT_LYCIAN, negated); fret; };
+ 'Lydian' => { currentCls->add(CLASS_SCRIPT_LYDIAN, negated); fret; };
+ 'Malayalam' => { currentCls->add(CLASS_SCRIPT_MALAYALAM, negated); fret; };
+ 'Mandaic' => { currentCls->add(CLASS_SCRIPT_MANDAIC, negated); fret; };
+ 'Meetei_Mayek' => { currentCls->add(CLASS_SCRIPT_MEETEI_MAYEK, negated); fret; };
+ 'Mongolian' => { currentCls->add(CLASS_SCRIPT_MONGOLIAN, negated); fret; };
+ 'Myanmar' => { currentCls->add(CLASS_SCRIPT_MYANMAR, negated); fret; };
+ 'New_Tai_Lue' => { currentCls->add(CLASS_SCRIPT_NEW_TAI_LUE, negated); fret; };
+ 'Nko' => { currentCls->add(CLASS_SCRIPT_NKO, negated); fret; };
+ 'Ogham' => { currentCls->add(CLASS_SCRIPT_OGHAM, negated); fret; };
+ 'Ol_Chiki' => { currentCls->add(CLASS_SCRIPT_OL_CHIKI, negated); fret; };
+ 'Old_Italic' => { currentCls->add(CLASS_SCRIPT_OLD_ITALIC, negated); fret; };
+ 'Old_Persian' => { currentCls->add(CLASS_SCRIPT_OLD_PERSIAN, negated); fret; };
+ 'Old_South_Arabian' => { currentCls->add(CLASS_SCRIPT_OLD_SOUTH_ARABIAN, negated); fret; };
+ 'Old_Turkic' => { currentCls->add(CLASS_SCRIPT_OLD_TURKIC, negated); fret; };
+ 'Oriya' => { currentCls->add(CLASS_SCRIPT_ORIYA, negated); fret; };
+ 'Osmanya' => { currentCls->add(CLASS_SCRIPT_OSMANYA, negated); fret; };
+ 'Phags_Pa' => { currentCls->add(CLASS_SCRIPT_PHAGS_PA, negated); fret; };
+ 'Phoenician' => { currentCls->add(CLASS_SCRIPT_PHOENICIAN, negated); fret; };
+ 'Rejang' => { currentCls->add(CLASS_SCRIPT_REJANG, negated); fret; };
+ 'Runic' => { currentCls->add(CLASS_SCRIPT_RUNIC, negated); fret; };
+ 'Samaritan' => { currentCls->add(CLASS_SCRIPT_SAMARITAN, negated); fret; };
+ 'Saurashtra' => { currentCls->add(CLASS_SCRIPT_SAURASHTRA, negated); fret; };
+ 'Shavian' => { currentCls->add(CLASS_SCRIPT_SHAVIAN, negated); fret; };
+ 'Sinhala' => { currentCls->add(CLASS_SCRIPT_SINHALA, negated); fret; };
+ 'Sundanese' => { currentCls->add(CLASS_SCRIPT_SUNDANESE, negated); fret; };
+ 'Syloti_Nagri' => { currentCls->add(CLASS_SCRIPT_SYLOTI_NAGRI, negated); fret; };
+ 'Syriac' => { currentCls->add(CLASS_SCRIPT_SYRIAC, negated); fret; };
+ 'Tagalog' => { currentCls->add(CLASS_SCRIPT_TAGALOG, negated); fret; };
+ 'Tagbanwa' => { currentCls->add(CLASS_SCRIPT_TAGBANWA, negated); fret; };
+ 'Tai_Le' => { currentCls->add(CLASS_SCRIPT_TAI_LE, negated); fret; };
+ 'Tai_Tham' => { currentCls->add(CLASS_SCRIPT_TAI_THAM, negated); fret; };
+ 'Tai_Viet' => { currentCls->add(CLASS_SCRIPT_TAI_VIET, negated); fret; };
+ 'Tamil' => { currentCls->add(CLASS_SCRIPT_TAMIL, negated); fret; };
+ 'Telugu' => { currentCls->add(CLASS_SCRIPT_TELUGU, negated); fret; };
+ 'Thaana' => { currentCls->add(CLASS_SCRIPT_THAANA, negated); fret; };
+ 'Thai' => { currentCls->add(CLASS_SCRIPT_THAI, negated); fret; };
+ 'Tibetan' => { currentCls->add(CLASS_SCRIPT_TIBETAN, negated); fret; };
+ 'Tifinagh' => { currentCls->add(CLASS_SCRIPT_TIFINAGH, negated); fret; };
+ 'Ugaritic' => { currentCls->add(CLASS_SCRIPT_UGARITIC, negated); fret; };
+ 'Vai' => { currentCls->add(CLASS_SCRIPT_VAI, negated); fret; };
+ 'Yi' => { currentCls->add(CLASS_SCRIPT_YI, negated); fret; };
+ 'Any' => { currentCls->add(CLASS_UCP_ANY, negated); fret; };
+ any => { throw LocatedParseError("Unknown property"); };
+ *|;
+
+ readBracedUCP := ('{'
+ ('^' ${ negated = !negated; }) ?
+ ([^^] ${ fhold; fcall readUCP; })
+ '}' ${ if (!inCharClass) { // not inside [..]
+ currentCls->finalize();
+ currentSeq->addComponent(move(currentCls));
+ }
+ fret;
+ })
+ $^{ throw LocatedParseError("Malformed property"); };
+
+ readUCPSingle := |*
+ 'C' => {
+ currentCls->add(CLASS_UCP_C, negated);
+ if (!inCharClass) {
+ currentCls->finalize();
+ currentSeq->addComponent(move(currentCls));
+ }
+ fret;
+ };
+ 'L' => {
+ currentCls->add(CLASS_UCP_L, negated);
+ if (!inCharClass) {
+ currentCls->finalize();
+ currentSeq->addComponent(move(currentCls));
+ }
fret;
- };
- 'P' => {
- currentCls->add(CLASS_UCP_P, negated);
- if (!inCharClass) {
- currentCls->finalize();
- currentSeq->addComponent(move(currentCls));
- }
- fret;
- };
- 'S' => {
- currentCls->add(CLASS_UCP_S, negated);
- if (!inCharClass) {
- currentCls->finalize();
- currentSeq->addComponent(move(currentCls));
- }
- fret;
- };
- 'Z' => {
- currentCls->add(CLASS_UCP_Z, negated);
- if (!inCharClass) {
- currentCls->finalize();
- currentSeq->addComponent(move(currentCls));
- }
- fret;
- };
-
- any => { throw LocatedParseError("Unknown property"); };
- *|;
- charClassGuts := |*
- # We don't support POSIX collating elements (neither does PCRE
- # or Perl). These look like [.ch.] or [=ch=].
- '\[\.' ( '\\]' | [^\]] )* '\.\]' |
- '\[=' ( '\\]' | [^\]] )* '=\]' => {
- throw LocatedParseError("Unsupported POSIX collating "
- "element");
- };
- # Named sets
- # Adding these may cause the charclass to close, hence the
- # finalized check - UE-2276
- '[:alnum:]' => {
- currentCls->add(CLASS_ALNUM, false);
- };
- '[:^alnum:]' => {
- currentCls->add(CLASS_ALNUM, true);
- };
- '[:alpha:]' => {
- currentCls->add(CLASS_ALPHA, false);
- };
- '[:^alpha:]' => {
- currentCls->add(CLASS_ALPHA, true);
- };
- '[:ascii:]' => {
- currentCls->add(CLASS_ASCII, false);
- };
- '[:^ascii:]' => {
- currentCls->add(CLASS_ASCII, true);
- };
- '[:blank:]' => {
- currentCls->add(CLASS_BLANK, false);
- };
- '[:^blank:]' => {
- currentCls->add(CLASS_BLANK, true);
- };
- '[:cntrl:]' => {
- currentCls->add(CLASS_CNTRL, false);
- };
- '[:^cntrl:]' => {
- currentCls->add(CLASS_CNTRL, true);
- };
- '[:digit:]' => {
- currentCls->add(CLASS_DIGIT, false);
- };
- '[:^digit:]' => {
- currentCls->add(CLASS_DIGIT, true);
- };
- '[:graph:]' => {
- currentCls->add(CLASS_GRAPH, false);
- };
- '[:^graph:]' => {
- currentCls->add(CLASS_GRAPH, true);
- };
- '[:lower:]' => {
- currentCls->add(CLASS_LOWER, false);
- };
- '[:^lower:]' => {
- currentCls->add(CLASS_LOWER, true);
- };
- '[:print:]' => {
- currentCls->add(CLASS_PRINT, false);
- };
- '[:^print:]' => {
- currentCls->add(CLASS_PRINT, true);
- };
- '[:punct:]' => {
- currentCls->add(CLASS_PUNCT, false);
- };
- '[:^punct:]' => {
- currentCls->add(CLASS_PUNCT, true);
- };
- # Posix SPACE covers 9, 10, 11, 12, 13, 32
- '[:space:]' => {
- currentCls->add(CLASS_SPACE, false);
- };
- '[:^space:]' => {
- currentCls->add(CLASS_SPACE, true);
- };
- '[:upper:]' => {
- currentCls->add(CLASS_UPPER, false);
- };
- '[:^upper:]' => {
- currentCls->add(CLASS_UPPER, true);
- };
- '[:word:]' => {
- currentCls->add(CLASS_WORD, false);
- };
- '[:^word:]' => {
- currentCls->add(CLASS_WORD, true);
- };
- '[:xdigit:]' => {
- currentCls->add(CLASS_XDIGIT, false);
- };
- '[:^xdigit:]' => {
- currentCls->add(CLASS_XDIGIT, true);
- };
- # Anything else between "[:" and ":]" is an invalid POSIX class.
- # Note that "\]" counts as a literal char here.
- '\[:' ( '\\]' | [^\]] )* ':\]' => {
- throw LocatedParseError("Invalid POSIX named class");
- };
- '\\Q' => {
- fcall readQuotedClass;
- };
- '\\E' => { /*noop*/};
- # Backspace (this is only valid for \b in char classes)
- '\\b' => {
- currentCls->add('\x08');
- };
- # Tab
- '\\t' => {
- currentCls->add('\x09');
- };
- # Newline
- '\\n' => {
- currentCls->add('\x0a');
- };
- # Carriage return
- '\\r' => {
- currentCls->add('\x0d');
- };
- # Form feed
- '\\f' => {
- currentCls->add('\x0c');
- };
- # Bell
- '\\a' => {
- currentCls->add('\x07');
- };
- # Escape
- '\\e' => {
- currentCls->add('\x1b');
- };
- # Horizontal whitespace
- '\\h' => {
- currentCls->add(CLASS_HORZ, false);
- };
- # Not horizontal whitespace
- '\\H' => {
- currentCls->add(CLASS_HORZ, true);
- };
- # Vertical whitespace
- '\\v' => {
- currentCls->add(CLASS_VERT, false);
- };
- # Not vertical whitespace
- '\\V' => {
- currentCls->add(CLASS_VERT, true);
- };
-
- '\\p{' => {
- negated = false;
- fhold;
- fcall readBracedUCP;
- };
-
- '\\p' any => {
- negated = false;
- fhold;
- fcall readUCPSingle;
- };
-
- '\\P{' => {
- negated = true;
- fhold;
- fcall readBracedUCP;
- };
-
- '\\P'any => {
- negated = true;
- fhold;
- fcall readUCPSingle;
- };
-
- '\\P' => { throw LocatedParseError("Malformed property"); };
- '\\p' => { throw LocatedParseError("Malformed property"); };
-
- # Octal
- escapedOctal0 => {
- currentCls->add(octAccumulator);
- };
- escapedOctal2c => {
- currentCls->add(octAccumulator);
- };
-
- '\\o{' [0-7]+ '}' => {
+ };
+ 'M' => {
+ currentCls->add(CLASS_UCP_M, negated);
+ if (!inCharClass) {
+ currentCls->finalize();
+ currentSeq->addComponent(move(currentCls));
+ }
+ fret;
+ };
+ 'N' => {
+ currentCls->add(CLASS_UCP_N, negated);
+ if (!inCharClass) {
+ currentCls->finalize();
+ currentSeq->addComponent(move(currentCls));
+ }
+ fret;
+ };
+ 'P' => {
+ currentCls->add(CLASS_UCP_P, negated);
+ if (!inCharClass) {
+ currentCls->finalize();
+ currentSeq->addComponent(move(currentCls));
+ }
+ fret;
+ };
+ 'S' => {
+ currentCls->add(CLASS_UCP_S, negated);
+ if (!inCharClass) {
+ currentCls->finalize();
+ currentSeq->addComponent(move(currentCls));
+ }
+ fret;
+ };
+ 'Z' => {
+ currentCls->add(CLASS_UCP_Z, negated);
+ if (!inCharClass) {
+ currentCls->finalize();
+ currentSeq->addComponent(move(currentCls));
+ }
+ fret;
+ };
+
+ any => { throw LocatedParseError("Unknown property"); };
+ *|;
+ charClassGuts := |*
+ # We don't support POSIX collating elements (neither does PCRE
+ # or Perl). These look like [.ch.] or [=ch=].
+ '\[\.' ( '\\]' | [^\]] )* '\.\]' |
+ '\[=' ( '\\]' | [^\]] )* '=\]' => {
+ throw LocatedParseError("Unsupported POSIX collating "
+ "element");
+ };
+ # Named sets
+ # Adding these may cause the charclass to close, hence the
+ # finalized check - UE-2276
+ '[:alnum:]' => {
+ currentCls->add(CLASS_ALNUM, false);
+ };
+ '[:^alnum:]' => {
+ currentCls->add(CLASS_ALNUM, true);
+ };
+ '[:alpha:]' => {
+ currentCls->add(CLASS_ALPHA, false);
+ };
+ '[:^alpha:]' => {
+ currentCls->add(CLASS_ALPHA, true);
+ };
+ '[:ascii:]' => {
+ currentCls->add(CLASS_ASCII, false);
+ };
+ '[:^ascii:]' => {
+ currentCls->add(CLASS_ASCII, true);
+ };
+ '[:blank:]' => {
+ currentCls->add(CLASS_BLANK, false);
+ };
+ '[:^blank:]' => {
+ currentCls->add(CLASS_BLANK, true);
+ };
+ '[:cntrl:]' => {
+ currentCls->add(CLASS_CNTRL, false);
+ };
+ '[:^cntrl:]' => {
+ currentCls->add(CLASS_CNTRL, true);
+ };
+ '[:digit:]' => {
+ currentCls->add(CLASS_DIGIT, false);
+ };
+ '[:^digit:]' => {
+ currentCls->add(CLASS_DIGIT, true);
+ };
+ '[:graph:]' => {
+ currentCls->add(CLASS_GRAPH, false);
+ };
+ '[:^graph:]' => {
+ currentCls->add(CLASS_GRAPH, true);
+ };
+ '[:lower:]' => {
+ currentCls->add(CLASS_LOWER, false);
+ };
+ '[:^lower:]' => {
+ currentCls->add(CLASS_LOWER, true);
+ };
+ '[:print:]' => {
+ currentCls->add(CLASS_PRINT, false);
+ };
+ '[:^print:]' => {
+ currentCls->add(CLASS_PRINT, true);
+ };
+ '[:punct:]' => {
+ currentCls->add(CLASS_PUNCT, false);
+ };
+ '[:^punct:]' => {
+ currentCls->add(CLASS_PUNCT, true);
+ };
+ # Posix SPACE covers 9, 10, 11, 12, 13, 32
+ '[:space:]' => {
+ currentCls->add(CLASS_SPACE, false);
+ };
+ '[:^space:]' => {
+ currentCls->add(CLASS_SPACE, true);
+ };
+ '[:upper:]' => {
+ currentCls->add(CLASS_UPPER, false);
+ };
+ '[:^upper:]' => {
+ currentCls->add(CLASS_UPPER, true);
+ };
+ '[:word:]' => {
+ currentCls->add(CLASS_WORD, false);
+ };
+ '[:^word:]' => {
+ currentCls->add(CLASS_WORD, true);
+ };
+ '[:xdigit:]' => {
+ currentCls->add(CLASS_XDIGIT, false);
+ };
+ '[:^xdigit:]' => {
+ currentCls->add(CLASS_XDIGIT, true);
+ };
+ # Anything else between "[:" and ":]" is an invalid POSIX class.
+ # Note that "\]" counts as a literal char here.
+ '\[:' ( '\\]' | [^\]] )* ':\]' => {
+ throw LocatedParseError("Invalid POSIX named class");
+ };
+ '\\Q' => {
+ fcall readQuotedClass;
+ };
+ '\\E' => { /*noop*/};
+ # Backspace (this is only valid for \b in char classes)
+ '\\b' => {
+ currentCls->add('\x08');
+ };
+ # Tab
+ '\\t' => {
+ currentCls->add('\x09');
+ };
+ # Newline
+ '\\n' => {
+ currentCls->add('\x0a');
+ };
+ # Carriage return
+ '\\r' => {
+ currentCls->add('\x0d');
+ };
+ # Form feed
+ '\\f' => {
+ currentCls->add('\x0c');
+ };
+ # Bell
+ '\\a' => {
+ currentCls->add('\x07');
+ };
+ # Escape
+ '\\e' => {
+ currentCls->add('\x1b');
+ };
+ # Horizontal whitespace
+ '\\h' => {
+ currentCls->add(CLASS_HORZ, false);
+ };
+ # Not horizontal whitespace
+ '\\H' => {
+ currentCls->add(CLASS_HORZ, true);
+ };
+ # Vertical whitespace
+ '\\v' => {
+ currentCls->add(CLASS_VERT, false);
+ };
+ # Not vertical whitespace
+ '\\V' => {
+ currentCls->add(CLASS_VERT, true);
+ };
+
+ '\\p{' => {
+ negated = false;
+ fhold;
+ fcall readBracedUCP;
+ };
+
+ '\\p' any => {
+ negated = false;
+ fhold;
+ fcall readUCPSingle;
+ };
+
+ '\\P{' => {
+ negated = true;
+ fhold;
+ fcall readBracedUCP;
+ };
+
+ '\\P'any => {
+ negated = true;
+ fhold;
+ fcall readUCPSingle;
+ };
+
+ '\\P' => { throw LocatedParseError("Malformed property"); };
+ '\\p' => { throw LocatedParseError("Malformed property"); };
+
+ # Octal
+ escapedOctal0 => {
+ currentCls->add(octAccumulator);
+ };
+ escapedOctal2c => {
+ currentCls->add(octAccumulator);
+ };
+
+ '\\o{' [0-7]+ '}' => {
string oct(ts + 3, te - ts - 4);
unsigned long val;
try {
@@ -983,29 +983,29 @@ unichar readUtf8CodePoint4c(const char *s) {
} catch (const std::out_of_range &) {
val = MAX_UNICODE + 1;
}
- if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) {
- throw LocatedParseError("Value in \\o{...} sequence is too large");
- }
- currentCls->add((unichar)val);
- };
-
- # And for when it goes wrong
- '\\o' => {
- throw LocatedParseError("Value in \\o{...} sequence is non-octal or missing braces");
- };
-
- # Hex
- escapedHex => {
- currentCls->add(accumulator);
- };
- # not a back-ref, not octal, just PCRE madness
- '\\' [89] => {
- // whatever we found here
- currentCls->add(*(ts + 1));
-
- };
- # Unicode Hex
- '\\x{' xdigit+ '}' => {
+ if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) {
+ throw LocatedParseError("Value in \\o{...} sequence is too large");
+ }
+ currentCls->add((unichar)val);
+ };
+
+ # And for when it goes wrong
+ '\\o' => {
+ throw LocatedParseError("Value in \\o{...} sequence is non-octal or missing braces");
+ };
+
+ # Hex
+ escapedHex => {
+ currentCls->add(accumulator);
+ };
+ # not a back-ref, not octal, just PCRE madness
+ '\\' [89] => {
+ // whatever we found here
+ currentCls->add(*(ts + 1));
+
+ };
+ # Unicode Hex
+ '\\x{' xdigit+ '}' => {
string hex(ts + 3, te - ts - 4);
unsigned long val;
try {
@@ -1013,148 +1013,148 @@ unichar readUtf8CodePoint4c(const char *s) {
} catch (const std::out_of_range &) {
val = MAX_UNICODE + 1;
}
- if (val > MAX_UNICODE) {
- throw LocatedParseError("Value in \\x{...} sequence is too large");
- }
- currentCls->add((unichar)val);
- };
- # And for when it goes wrong
- '\\x{' => {
- throw LocatedParseError("Value in \\x{...} sequence is non-hex or missing }");
- };
- # Control characters
- escapedCtrl => {
- if (te - ts < 3) {
- assert(te - ts == 2);
- throw LocatedParseError(SLASH_C_ERROR);
- } else {
- assert(te - ts == 3);
- currentCls->add(decodeCtrl(ts[2]));
- }
- };
- # Word character
- '\\w' => {
- currentCls->add(CLASS_WORD, false);
- };
- # Non word character
- '\\W' => {
- currentCls->add(CLASS_WORD, true);
- };
- # Whitespace character (except VT)
- '\\s' => {
- currentCls->add(CLASS_SPACE, false);
- };
- # Non whitespace character
- '\\S' => {
- currentCls->add(CLASS_SPACE, true);
- };
- # Digit character
- '\\d' => {
- currentCls->add(CLASS_DIGIT, false);
- };
- # Non digit character
- '\\D' => {
- currentCls->add(CLASS_DIGIT, true);
- };
- '\-' => {
- currentCls->addDash();
- };
-
- # A bunch of unsupported (for now) escapes
- escapedUnsupported - '\\X' => throwUnsupportedEscape;
-
- # PCRE appears to discard escaped g in a char class (a backref bug?)
- '\\g' => throwUnsupportedEscape;
-
- # the too-hard basket: UE-944, UE-1134, UE-1157
- # many escaped single char literals shold be benign, but PCRE
- # breaks with them when adding to ranges, so unless they have
- # defined special meaning in a char-class we reject them to be
- # safe.
- '\\' alpha => throwUnsupportedEscape;
-
- '\\' any => {
- // add the literal char
- currentCls->add(*(ts + 1));
- };
-
- #unicode chars
- utf8_2c when is_utf8 => {
- assert(mode.utf8);
- currentCls->add(readUtf8CodePoint2c(ts));
- };
-
- utf8_3c when is_utf8 => {
- assert(mode.utf8);
- currentCls->add(readUtf8CodePoint3c(ts));
- };
-
- utf8_4c when is_utf8 => {
- assert(mode.utf8);
- currentCls->add(readUtf8CodePoint4c(ts));
- };
-
- hi_byte when is_utf8 => {
- assert(mode.utf8);
- throwInvalidUtf8();
- };
-
- # Literal character
- (any - ']') => {
+ if (val > MAX_UNICODE) {
+ throw LocatedParseError("Value in \\x{...} sequence is too large");
+ }
+ currentCls->add((unichar)val);
+ };
+ # And for when it goes wrong
+ '\\x{' => {
+ throw LocatedParseError("Value in \\x{...} sequence is non-hex or missing }");
+ };
+ # Control characters
+ escapedCtrl => {
+ if (te - ts < 3) {
+ assert(te - ts == 2);
+ throw LocatedParseError(SLASH_C_ERROR);
+ } else {
+ assert(te - ts == 3);
+ currentCls->add(decodeCtrl(ts[2]));
+ }
+ };
+ # Word character
+ '\\w' => {
+ currentCls->add(CLASS_WORD, false);
+ };
+ # Non word character
+ '\\W' => {
+ currentCls->add(CLASS_WORD, true);
+ };
+ # Whitespace character (except VT)
+ '\\s' => {
+ currentCls->add(CLASS_SPACE, false);
+ };
+ # Non whitespace character
+ '\\S' => {
+ currentCls->add(CLASS_SPACE, true);
+ };
+ # Digit character
+ '\\d' => {
+ currentCls->add(CLASS_DIGIT, false);
+ };
+ # Non digit character
+ '\\D' => {
+ currentCls->add(CLASS_DIGIT, true);
+ };
+ '\-' => {
+ currentCls->addDash();
+ };
+
+ # A bunch of unsupported (for now) escapes
+ escapedUnsupported - '\\X' => throwUnsupportedEscape;
+
+ # PCRE appears to discard escaped g in a char class (a backref bug?)
+ '\\g' => throwUnsupportedEscape;
+
+ # the too-hard basket: UE-944, UE-1134, UE-1157
+ # many escaped single char literals shold be benign, but PCRE
+ # breaks with them when adding to ranges, so unless they have
+ # defined special meaning in a char-class we reject them to be
+ # safe.
+ '\\' alpha => throwUnsupportedEscape;
+
+ '\\' any => {
+ // add the literal char
+ currentCls->add(*(ts + 1));
+ };
+
+ #unicode chars
+ utf8_2c when is_utf8 => {
+ assert(mode.utf8);
+ currentCls->add(readUtf8CodePoint2c(ts));
+ };
+
+ utf8_3c when is_utf8 => {
+ assert(mode.utf8);
+ currentCls->add(readUtf8CodePoint3c(ts));
+ };
+
+ utf8_4c when is_utf8 => {
+ assert(mode.utf8);
+ currentCls->add(readUtf8CodePoint4c(ts));
+ };
+
+ hi_byte when is_utf8 => {
+ assert(mode.utf8);
+ throwInvalidUtf8();
+ };
+
+ # Literal character
+ (any - ']') => {
currentCls->add((u8)*ts);
- };
-
- ']' => {
- currentCls->finalize();
- currentSeq->addComponent(move(currentCls));
- inCharClass = false;
- fgoto main;
- };
- *|;
-
- #############################################################
- # Parser to read stuff from a character class
- #############################################################
- readClass := |*
- # A caret at the beginning of the class means that the rest of the
- # class is negated.
- '\^' when is_early_charclass => {
- if (currentCls->isNegated()) {
- // Already seen a caret; the second one is not a meta-character.
- inCharClassEarly = false;
- fhold; fgoto charClassGuts;
- } else {
- currentCls->negate();
- // Note: we cannot switch off inCharClassEarly here, as /[^]]/
- // needs to use the right square bracket path below.
- }
- };
- # A right square bracket before anything "real" is interpreted as a
- # literal right square bracket.
- ']' when is_early_charclass => {
- currentCls->add(']');
- inCharClassEarly = false;
- };
- # if we hit a quote before anything "real", handle it
- '\\Q' => { fcall readQuotedClass; };
- '\\E' => { /*noop*/};
-
- # time for the real work to happen
- any => {
- inCharClassEarly = false;
- fhold;
- fgoto charClassGuts;
- };
- *|;
-
- #############################################################
- # Parser to read a quoted literal
- #############################################################
- readQuotedLiteral := |*
- # Escape sequence
- '\\E' => {
- fgoto main;
- };
+ };
+
+ ']' => {
+ currentCls->finalize();
+ currentSeq->addComponent(move(currentCls));
+ inCharClass = false;
+ fgoto main;
+ };
+ *|;
+
+ #############################################################
+ # Parser to read stuff from a character class
+ #############################################################
+ readClass := |*
+ # A caret at the beginning of the class means that the rest of the
+ # class is negated.
+ '\^' when is_early_charclass => {
+ if (currentCls->isNegated()) {
+ // Already seen a caret; the second one is not a meta-character.
+ inCharClassEarly = false;
+ fhold; fgoto charClassGuts;
+ } else {
+ currentCls->negate();
+ // Note: we cannot switch off inCharClassEarly here, as /[^]]/
+ // needs to use the right square bracket path below.
+ }
+ };
+ # A right square bracket before anything "real" is interpreted as a
+ # literal right square bracket.
+ ']' when is_early_charclass => {
+ currentCls->add(']');
+ inCharClassEarly = false;
+ };
+ # if we hit a quote before anything "real", handle it
+ '\\Q' => { fcall readQuotedClass; };
+ '\\E' => { /*noop*/};
+
+ # time for the real work to happen
+ any => {
+ inCharClassEarly = false;
+ fhold;
+ fgoto charClassGuts;
+ };
+ *|;
+
+ #############################################################
+ # Parser to read a quoted literal
+ #############################################################
+ readQuotedLiteral := |*
+ # Escape sequence
+ '\\E' => {
+ fgoto main;
+ };
#unicode chars
utf8_2c when is_utf8 => {
@@ -1189,20 +1189,20 @@ unichar readUtf8CodePoint4c(const char *s) {
throwInvalidUtf8();
};
- # Literal character
- any => {
- addLiteral(currentSeq, *ts, mode);
- };
- *|;
-
- #############################################################
- # Parser to read a quoted class
- #############################################################
- readQuotedClass := |*
- # Escape sequence
- '\\E' => {
- fret;
- };
+ # Literal character
+ any => {
+ addLiteral(currentSeq, *ts, mode);
+ };
+ *|;
+
+ #############################################################
+ # Parser to read a quoted class
+ #############################################################
+ readQuotedClass := |*
+ # Escape sequence
+ '\\E' => {
+ fret;
+ };
#unicode chars
utf8_2c when is_utf8 => {
@@ -1228,337 +1228,337 @@ unichar readUtf8CodePoint4c(const char *s) {
throwInvalidUtf8();
};
- # Literal character
- any => {
- currentCls->add(*ts);
- inCharClassEarly = false;
- };
- *|;
-
-
- #############################################################
- # Parser to read (and ignore) a comment block
- #############################################################
- readComment := |*
- # Right paren
- '\)' => { inComment = false; fgoto main; };
-
- # absolutely everything gets ignored until we see a right
- # paren
- any;
- *|;
-
- #############################################################
- # Parser to read (and ignore) a newline-terminated comment
- # block
- #############################################################
- readNewlineTerminatedComment := |*
- '\n' => { inComment = false; fgoto main; };
-
- # absolutely everything gets ignored until we see a
- # newline
- any;
- *|;
-
- #############################################################
- # Parser for standard components
- #############################################################
- main := |*
- #############################################################
- # Standard components
- #############################################################
- # Begin capturing group (non-capturing handled further down)
- '\(' => enterCapturingGroup;
- # End group
- '\)' => exitGroup;
- # Mark alternation
- '\|' => {
- currentSeq->addAlternation();
- };
- # POSIX named elements should only be used inside a class. Note
- # that we need to be able to reject /[:\]:]/ here.
- '\[:' ( '\\]' | [^\]] )* ':\]' => {
- throw LocatedParseError("POSIX named classes are only "
- "supported inside a class");
- };
- # We don't support POSIX collating elements (neither does PCRE
- # or Perl). These look like [.ch.] or [=ch=].
- '\[\.' ( '\\]' | [^\]] )* '\.\]' |
- '\[=' ( '\\]' | [^\]] )* '=\]' => {
- throw LocatedParseError("Unsupported POSIX collating "
- "element");
- };
- # Begin eating characters for class
- '\[' => eatClass;
- # Begin quoted literal
- '\\Q' => {
- fgoto readQuotedLiteral;
- };
+ # Literal character
+ any => {
+ currentCls->add(*ts);
+ inCharClassEarly = false;
+ };
+ *|;
+
+
+ #############################################################
+ # Parser to read (and ignore) a comment block
+ #############################################################
+ readComment := |*
+ # Right paren
+ '\)' => { inComment = false; fgoto main; };
+
+ # absolutely everything gets ignored until we see a right
+ # paren
+ any;
+ *|;
+
+ #############################################################
+ # Parser to read (and ignore) a newline-terminated comment
+ # block
+ #############################################################
+ readNewlineTerminatedComment := |*
+ '\n' => { inComment = false; fgoto main; };
+
+ # absolutely everything gets ignored until we see a
+ # newline
+ any;
+ *|;
+
+ #############################################################
+ # Parser for standard components
+ #############################################################
+ main := |*
+ #############################################################
+ # Standard components
+ #############################################################
+ # Begin capturing group (non-capturing handled further down)
+ '\(' => enterCapturingGroup;
+ # End group
+ '\)' => exitGroup;
+ # Mark alternation
+ '\|' => {
+ currentSeq->addAlternation();
+ };
+ # POSIX named elements should only be used inside a class. Note
+ # that we need to be able to reject /[:\]:]/ here.
+ '\[:' ( '\\]' | [^\]] )* ':\]' => {
+ throw LocatedParseError("POSIX named classes are only "
+ "supported inside a class");
+ };
+ # We don't support POSIX collating elements (neither does PCRE
+ # or Perl). These look like [.ch.] or [=ch=].
+ '\[\.' ( '\\]' | [^\]] )* '\.\]' |
+ '\[=' ( '\\]' | [^\]] )* '=\]' => {
+ throw LocatedParseError("Unsupported POSIX collating "
+ "element");
+ };
+ # Begin eating characters for class
+ '\[' => eatClass;
+ # Begin quoted literal
+ '\\Q' => {
+ fgoto readQuotedLiteral;
+ };
# An \E that is not preceded by a \Q is ignored
'\\E' => { /* noop */ };
- # Match any character
- '\.' => {
- currentSeq->addComponent(generateComponent(CLASS_ANY, false, mode));
- };
- # Match one byte
- '\\C' => {
- if (mode.utf8) {
- throw LocatedParseError("\\C is unsupported in UTF8");
- }
- currentSeq->addComponent(ue2::make_unique<ComponentByte>());
- };
- # Match 0 or more times (greedy)
- '\*' => {
- if (!currentSeq->addRepeat(0, ComponentRepeat::NoLimit,
- ComponentRepeat::REPEAT_GREEDY)) {
- throwInvalidRepeat();
- }
- };
- # Match 0 or more times (non-greedy)
- '\*\?' => {
- if (!currentSeq->addRepeat(0, ComponentRepeat::NoLimit,
- ComponentRepeat::REPEAT_NONGREEDY)) {
- throwInvalidRepeat();
- }
- };
- # Match 0 or more times (possessive)
- '\*\+' => {
- if (!currentSeq->addRepeat(0, ComponentRepeat::NoLimit,
- ComponentRepeat::REPEAT_POSSESSIVE)) {
- throwInvalidRepeat();
- }
- };
- # Match 1 or more times (greedy)
- '\+' => {
- if (!currentSeq->addRepeat(1, ComponentRepeat::NoLimit,
- ComponentRepeat::REPEAT_GREEDY)) {
- throwInvalidRepeat();
- }
- };
- # Match 1 or more times (non-greedy)
- '\+\?' => {
- if (!currentSeq->addRepeat(1, ComponentRepeat::NoLimit,
- ComponentRepeat::REPEAT_NONGREEDY)) {
- throwInvalidRepeat();
- }
- };
- # Match 1 or more times (possessive)
- '\+\+' => {
- if (!currentSeq->addRepeat(1, ComponentRepeat::NoLimit,
- ComponentRepeat::REPEAT_POSSESSIVE)) {
- throwInvalidRepeat();
- }
- };
- # Match 0 or 1 times (greedy)
- '\?' => {
- if (!currentSeq->addRepeat(
- 0, 1, ComponentRepeat::REPEAT_GREEDY)) {
- throwInvalidRepeat();
- }
- };
- # Match 0 or 1 times (non-greedy)
- '\?\?' => {
- if (!currentSeq->addRepeat(
- 0, 1, ComponentRepeat::REPEAT_NONGREEDY)) {
- throwInvalidRepeat();
- }
- };
- # Match 0 or 1 times (possessive)
- '\?\+' => {
- if (!currentSeq->addRepeat(
- 0, 1, ComponentRepeat::REPEAT_POSSESSIVE)) {
- throwInvalidRepeat();
- }
- };
- # Match {n}|{n,}|{n,m} times (greedy)
- repeatNM1 => {
- if (repeatN > repeatM || repeatM == 0) {
- throwInvalidRepeat();
- } else if (!currentSeq->addRepeat(
- repeatN, repeatM,
- ComponentRepeat::REPEAT_GREEDY)) {
- throwInvalidRepeat();
- }
- };
- # Match {n}|{n,}|{n,m} times (non-greedy)
- repeatNM1 '\?' => {
- if (repeatN > repeatM || repeatM == 0) {
- throwInvalidRepeat();
- } else if (!currentSeq->addRepeat(
- repeatN, repeatM,
- ComponentRepeat::REPEAT_NONGREEDY)) {
- throwInvalidRepeat();
- }
- };
- # Match {n}|{n,}|{n,m} times (possessive)
- repeatNM1 '\+' => {
- if (repeatN > repeatM || repeatM == 0) {
- throwInvalidRepeat();
- } else if (!currentSeq->addRepeat(
- repeatN, repeatM,
- ComponentRepeat::REPEAT_POSSESSIVE)) {
- throwInvalidRepeat();
- }
- };
-
- # In ignore_space mode, an unescaped # character introduces a
- # comment that runs until the next newline or the end of the
- # pattern.
- '\#' when is_ignore_space => enterNewlineTerminatedComment;
-
- # Perl 5.10 Special Backtracking Control Verbs: we support
- # UTF8/UCP, none of the others
- '(*' [^)] => { fhold; fcall readVerb; };
-
- # Earlier parser code checked for the terminating NULL and exited
- # explicitly.
- '\0' => { assert(0); fbreak; };
-
- #############################################################
- # Boundaries
- #############################################################
-
- # Start of data; also after internal newline in multiline mode
- '\^' => {
- auto bound = mode.multiline ? ComponentBoundary::BEGIN_LINE
- : ComponentBoundary::BEGIN_STRING;
- currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound));
- };
- # End of data (with optional internal newline); also before
- # internal newline in multiline mode
- '\$' => {
- auto bound = mode.multiline ? ComponentBoundary::END_LINE
- : ComponentBoundary::END_STRING_OPTIONAL_LF;
- currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound));
- };
- # Beginning of data
- '\\A' => {
- auto bound = ComponentBoundary::BEGIN_STRING;
- currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound));
- };
- # End of data (with optional internal newline)
- '\\Z' => {
- auto bound = ComponentBoundary::END_STRING_OPTIONAL_LF;
- currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound));
- };
- # End of data
- '\\z' => {
- auto bound = ComponentBoundary::END_STRING;
- currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound));
- };
- # Word boundary
- '\\b' => {
- currentSeq->addComponent(
- ue2::make_unique<ComponentWordBoundary>(ts - ptr, false, mode));
- };
- # Non-word boundary
- '\\B' => {
- currentSeq->addComponent(
- ue2::make_unique<ComponentWordBoundary>(ts - ptr, true, mode));
- };
-
- #############################################################
- # Escaped chars
- #############################################################
-
- # Tab
- '\\t' => {
- addLiteral(currentSeq, '\x09', mode);
- };
- # Newline
- '\\n' => {
- addLiteral(currentSeq, '\x0a', mode);
- };
- # Carriage return
- '\\r' => {
- addLiteral(currentSeq, '\x0d', mode);
- };
- # Form feed
- '\\f' => {
- addLiteral(currentSeq, '\x0c', mode);
- };
- # Bell
- '\\a' => {
- addLiteral(currentSeq, '\x07', mode);
- };
- # Escape
- '\\e' => {
- addLiteral(currentSeq, '\x1b', mode);
- };
- # Octal
- escapedOctal0 => {
- addLiteral(currentSeq, octAccumulator, mode);
- };
- escapedOctal2 => {
- // If there are enough capturing sub expressions, this may be
- // a back reference
- accumulator = parseAsDecimal(octAccumulator);
- if (accumulator < groupIndex) {
- currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(accumulator));
- } else {
- addEscapedOctal(currentSeq, octAccumulator, mode);
- }
- };
-
- # Numeric back reference
- # everything less than 8 is a straight up back ref, even if
- # it is a forwards backward reference (aieeee!)
- # Note that \8 and \9 are the literal chars '8' and '9'.
- '\\' backRefIdSingle => addNumberedBackRef;
- # otherwise we need to munge through the possible backref
- '\\' backRefId => {
- // if there are enough left parens to this point, back ref
- if (accumulator < groupIndex) {
- currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(accumulator));
- } else {
- // Otherwise, we interpret the first three digits as an
- // octal escape, and the remaining characters stand for
- // themselves as literals.
+ # Match any character
+ '\.' => {
+ currentSeq->addComponent(generateComponent(CLASS_ANY, false, mode));
+ };
+ # Match one byte
+ '\\C' => {
+ if (mode.utf8) {
+ throw LocatedParseError("\\C is unsupported in UTF8");
+ }
+ currentSeq->addComponent(ue2::make_unique<ComponentByte>());
+ };
+ # Match 0 or more times (greedy)
+ '\*' => {
+ if (!currentSeq->addRepeat(0, ComponentRepeat::NoLimit,
+ ComponentRepeat::REPEAT_GREEDY)) {
+ throwInvalidRepeat();
+ }
+ };
+ # Match 0 or more times (non-greedy)
+ '\*\?' => {
+ if (!currentSeq->addRepeat(0, ComponentRepeat::NoLimit,
+ ComponentRepeat::REPEAT_NONGREEDY)) {
+ throwInvalidRepeat();
+ }
+ };
+ # Match 0 or more times (possessive)
+ '\*\+' => {
+ if (!currentSeq->addRepeat(0, ComponentRepeat::NoLimit,
+ ComponentRepeat::REPEAT_POSSESSIVE)) {
+ throwInvalidRepeat();
+ }
+ };
+ # Match 1 or more times (greedy)
+ '\+' => {
+ if (!currentSeq->addRepeat(1, ComponentRepeat::NoLimit,
+ ComponentRepeat::REPEAT_GREEDY)) {
+ throwInvalidRepeat();
+ }
+ };
+ # Match 1 or more times (non-greedy)
+ '\+\?' => {
+ if (!currentSeq->addRepeat(1, ComponentRepeat::NoLimit,
+ ComponentRepeat::REPEAT_NONGREEDY)) {
+ throwInvalidRepeat();
+ }
+ };
+ # Match 1 or more times (possessive)
+ '\+\+' => {
+ if (!currentSeq->addRepeat(1, ComponentRepeat::NoLimit,
+ ComponentRepeat::REPEAT_POSSESSIVE)) {
+ throwInvalidRepeat();
+ }
+ };
+ # Match 0 or 1 times (greedy)
+ '\?' => {
+ if (!currentSeq->addRepeat(
+ 0, 1, ComponentRepeat::REPEAT_GREEDY)) {
+ throwInvalidRepeat();
+ }
+ };
+ # Match 0 or 1 times (non-greedy)
+ '\?\?' => {
+ if (!currentSeq->addRepeat(
+ 0, 1, ComponentRepeat::REPEAT_NONGREEDY)) {
+ throwInvalidRepeat();
+ }
+ };
+ # Match 0 or 1 times (possessive)
+ '\?\+' => {
+ if (!currentSeq->addRepeat(
+ 0, 1, ComponentRepeat::REPEAT_POSSESSIVE)) {
+ throwInvalidRepeat();
+ }
+ };
+ # Match {n}|{n,}|{n,m} times (greedy)
+ repeatNM1 => {
+ if (repeatN > repeatM || repeatM == 0) {
+ throwInvalidRepeat();
+ } else if (!currentSeq->addRepeat(
+ repeatN, repeatM,
+ ComponentRepeat::REPEAT_GREEDY)) {
+ throwInvalidRepeat();
+ }
+ };
+ # Match {n}|{n,}|{n,m} times (non-greedy)
+ repeatNM1 '\?' => {
+ if (repeatN > repeatM || repeatM == 0) {
+ throwInvalidRepeat();
+ } else if (!currentSeq->addRepeat(
+ repeatN, repeatM,
+ ComponentRepeat::REPEAT_NONGREEDY)) {
+ throwInvalidRepeat();
+ }
+ };
+ # Match {n}|{n,}|{n,m} times (possessive)
+ repeatNM1 '\+' => {
+ if (repeatN > repeatM || repeatM == 0) {
+ throwInvalidRepeat();
+ } else if (!currentSeq->addRepeat(
+ repeatN, repeatM,
+ ComponentRepeat::REPEAT_POSSESSIVE)) {
+ throwInvalidRepeat();
+ }
+ };
+
+ # In ignore_space mode, an unescaped # character introduces a
+ # comment that runs until the next newline or the end of the
+ # pattern.
+ '\#' when is_ignore_space => enterNewlineTerminatedComment;
+
+ # Perl 5.10 Special Backtracking Control Verbs: we support
+ # UTF8/UCP, none of the others
+ '(*' [^)] => { fhold; fcall readVerb; };
+
+ # Earlier parser code checked for the terminating NULL and exited
+ # explicitly.
+ '\0' => { assert(0); fbreak; };
+
+ #############################################################
+ # Boundaries
+ #############################################################
+
+ # Start of data; also after internal newline in multiline mode
+ '\^' => {
+ auto bound = mode.multiline ? ComponentBoundary::BEGIN_LINE
+ : ComponentBoundary::BEGIN_STRING;
+ currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound));
+ };
+ # End of data (with optional internal newline); also before
+ # internal newline in multiline mode
+ '\$' => {
+ auto bound = mode.multiline ? ComponentBoundary::END_LINE
+ : ComponentBoundary::END_STRING_OPTIONAL_LF;
+ currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound));
+ };
+ # Beginning of data
+ '\\A' => {
+ auto bound = ComponentBoundary::BEGIN_STRING;
+ currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound));
+ };
+ # End of data (with optional internal newline)
+ '\\Z' => {
+ auto bound = ComponentBoundary::END_STRING_OPTIONAL_LF;
+ currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound));
+ };
+ # End of data
+ '\\z' => {
+ auto bound = ComponentBoundary::END_STRING;
+ currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound));
+ };
+ # Word boundary
+ '\\b' => {
+ currentSeq->addComponent(
+ ue2::make_unique<ComponentWordBoundary>(ts - ptr, false, mode));
+ };
+ # Non-word boundary
+ '\\B' => {
+ currentSeq->addComponent(
+ ue2::make_unique<ComponentWordBoundary>(ts - ptr, true, mode));
+ };
+
+ #############################################################
+ # Escaped chars
+ #############################################################
+
+ # Tab
+ '\\t' => {
+ addLiteral(currentSeq, '\x09', mode);
+ };
+ # Newline
+ '\\n' => {
+ addLiteral(currentSeq, '\x0a', mode);
+ };
+ # Carriage return
+ '\\r' => {
+ addLiteral(currentSeq, '\x0d', mode);
+ };
+ # Form feed
+ '\\f' => {
+ addLiteral(currentSeq, '\x0c', mode);
+ };
+ # Bell
+ '\\a' => {
+ addLiteral(currentSeq, '\x07', mode);
+ };
+ # Escape
+ '\\e' => {
+ addLiteral(currentSeq, '\x1b', mode);
+ };
+ # Octal
+ escapedOctal0 => {
+ addLiteral(currentSeq, octAccumulator, mode);
+ };
+ escapedOctal2 => {
+ // If there are enough capturing sub expressions, this may be
+ // a back reference
+ accumulator = parseAsDecimal(octAccumulator);
+ if (accumulator < groupIndex) {
+ currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(accumulator));
+ } else {
+ addEscapedOctal(currentSeq, octAccumulator, mode);
+ }
+ };
+
+ # Numeric back reference
+ # everything less than 8 is a straight up back ref, even if
+ # it is a forwards backward reference (aieeee!)
+ # Note that \8 and \9 are the literal chars '8' and '9'.
+ '\\' backRefIdSingle => addNumberedBackRef;
+ # otherwise we need to munge through the possible backref
+ '\\' backRefId => {
+ // if there are enough left parens to this point, back ref
+ if (accumulator < groupIndex) {
+ currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(accumulator));
+ } else {
+ // Otherwise, we interpret the first three digits as an
+ // octal escape, and the remaining characters stand for
+ // themselves as literals.
const char *s = ts;
- unsigned int accum = 0;
- unsigned int oct_digits = 0;
+ unsigned int accum = 0;
+ unsigned int oct_digits = 0;
assert(*s == '\\'); // token starts at backslash
for (++s; s < te && oct_digits < 3; ++oct_digits, ++s) {
u8 digit = *s - '0';
- if (digit < 8) {
- accum = digit + accum * 8;
- } else {
- break;
- }
- }
-
- if (oct_digits > 0) {
- addEscapedOctal(currentSeq, accum, mode);
- }
-
- // And then the rest of the digits, if any, are literal.
+ if (digit < 8) {
+ accum = digit + accum * 8;
+ } else {
+ break;
+ }
+ }
+
+ if (oct_digits > 0) {
+ addEscapedOctal(currentSeq, accum, mode);
+ }
+
+ // And then the rest of the digits, if any, are literal.
for (; s < te; ++s) {
addLiteral(currentSeq, *s, mode);
- }
- }
- };
- backReferenceG => addNumberedBackRef;
- backReferenceGNegative => addNegativeNumberedBackRef;
- backReferenceGBracket => addNumberedBackRef;
- backReferenceGBracket2 => addNegativeNumberedBackRef;
- backReferenceGBracketName => addNamedBackRef;
- backReferenceKBracketName => addNamedBackRef;
- backReferenceKBracketName2 => addNamedBackRef;
- backReferenceKBracketName3 => addNamedBackRef;
- backReferenceP => addNamedBackRef;
- # Oniguruma - either angle braces or single quotes for this one
- ('\\g<' [^>]*? '>'|'\\g\'' [^\']*? '\'') => {
- ostringstream str;
- str << "Onigiruma subroutine call at index " << ts - ptr <<
- " not supported.";
- throw ParseError(str.str());
- };
- # Fallthrough: a \g that hasn't been caught by one of the above
- # is invalid syntax. Without this rule, we would accept /A\g/.
- '\\g' => {
- throw LocatedParseError("Invalid reference after \\g");
- };
- '\\o{' [0-7]+ '}' => {
+ }
+ }
+ };
+ backReferenceG => addNumberedBackRef;
+ backReferenceGNegative => addNegativeNumberedBackRef;
+ backReferenceGBracket => addNumberedBackRef;
+ backReferenceGBracket2 => addNegativeNumberedBackRef;
+ backReferenceGBracketName => addNamedBackRef;
+ backReferenceKBracketName => addNamedBackRef;
+ backReferenceKBracketName2 => addNamedBackRef;
+ backReferenceKBracketName3 => addNamedBackRef;
+ backReferenceP => addNamedBackRef;
+ # Oniguruma - either angle braces or single quotes for this one
+ ('\\g<' [^>]*? '>'|'\\g\'' [^\']*? '\'') => {
+ ostringstream str;
+ str << "Onigiruma subroutine call at index " << ts - ptr <<
+ " not supported.";
+ throw ParseError(str.str());
+ };
+ # Fallthrough: a \g that hasn't been caught by one of the above
+ # is invalid syntax. Without this rule, we would accept /A\g/.
+ '\\g' => {
+ throw LocatedParseError("Invalid reference after \\g");
+ };
+ '\\o{' [0-7]+ '}' => {
string oct(ts + 3, te - ts - 4);
unsigned long val;
try {
@@ -1566,21 +1566,21 @@ unichar readUtf8CodePoint4c(const char *s) {
} catch (const std::out_of_range &) {
val = MAX_UNICODE + 1;
}
- if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) {
- throw LocatedParseError("Value in \\o{...} sequence is too large");
- }
- addEscapedOctal(currentSeq, (unichar)val, mode);
- };
- # And for when it goes wrong
- '\\o' => {
- throw LocatedParseError("Value in \\o{...} sequence is non-octal or missing braces");
- };
- # Hex
- escapedHex => {
- addEscapedHex(currentSeq, accumulator, mode);
- };
- # Unicode Hex
- '\\x{' xdigit+ '}' => {
+ if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) {
+ throw LocatedParseError("Value in \\o{...} sequence is too large");
+ }
+ addEscapedOctal(currentSeq, (unichar)val, mode);
+ };
+ # And for when it goes wrong
+ '\\o' => {
+ throw LocatedParseError("Value in \\o{...} sequence is non-octal or missing braces");
+ };
+ # Hex
+ escapedHex => {
+ addEscapedHex(currentSeq, accumulator, mode);
+ };
+ # Unicode Hex
+ '\\x{' xdigit+ '}' => {
string hex(ts + 3, te - ts - 4);
unsigned long val;
try {
@@ -1588,330 +1588,330 @@ unichar readUtf8CodePoint4c(const char *s) {
} catch (const std::out_of_range &) {
val = MAX_UNICODE + 1;
}
- if (val > MAX_UNICODE) {
- throw LocatedParseError("Value in \\x{...} sequence is too large");
- }
- addEscapedHex(currentSeq, (unichar)val, mode);
- };
- # And for when it goes wrong
- '\\x{' => {
- throw LocatedParseError("Value in \\x{...} sequence is non-hex or missing }");
- };
- # Control characters
- escapedCtrl => {
- if (te - ts < 3) {
- assert(te - ts == 2);
- throw LocatedParseError(SLASH_C_ERROR);
- } else {
- assert(te - ts == 3);
- addLiteral(currentSeq, decodeCtrl(ts[2]), mode);
- }
- };
- # A bunch of unsupported (for now) escapes
- escapedUnsupported => {
- ostringstream str;
+ if (val > MAX_UNICODE) {
+ throw LocatedParseError("Value in \\x{...} sequence is too large");
+ }
+ addEscapedHex(currentSeq, (unichar)val, mode);
+ };
+ # And for when it goes wrong
+ '\\x{' => {
+ throw LocatedParseError("Value in \\x{...} sequence is non-hex or missing }");
+ };
+ # Control characters
+ escapedCtrl => {
+ if (te - ts < 3) {
+ assert(te - ts == 2);
+ throw LocatedParseError(SLASH_C_ERROR);
+ } else {
+ assert(te - ts == 3);
+ addLiteral(currentSeq, decodeCtrl(ts[2]), mode);
+ }
+ };
+ # A bunch of unsupported (for now) escapes
+ escapedUnsupported => {
+ ostringstream str;
str << "'\\" << *(ts + 1) << "' at index " << ts - ptr
<< " not supported.";
- throw ParseError(str.str());
- };
-
- # Word character
- '\\w' => {
- auto cc = generateComponent(CLASS_WORD, false, mode);
- currentSeq->addComponent(move(cc));
- };
- # Non word character
- '\\W' => {
- auto cc = generateComponent(CLASS_WORD, true, mode);
- currentSeq->addComponent(move(cc));
- };
- # Whitespace character
- '\\s' => {
- auto cc = generateComponent(CLASS_SPACE, false, mode);
- currentSeq->addComponent(move(cc));
- };
- # Non whitespace character
- '\\S' => {
- auto cc = generateComponent(CLASS_SPACE, true, mode);
- currentSeq->addComponent(move(cc));
- };
- # Digit character
- '\\d' => {
- auto cc = generateComponent(CLASS_DIGIT, false, mode);
- currentSeq->addComponent(move(cc));
- };
- # Non digit character
- '\\D' => {
- auto cc = generateComponent(CLASS_DIGIT, true, mode);
- currentSeq->addComponent(move(cc));
- };
- # Horizontal whitespace
- '\\h' => {
- auto cc = generateComponent(CLASS_HORZ, false, mode);
- currentSeq->addComponent(move(cc));
- };
- # Not horizontal whitespace
- '\\H' => {
- auto cc = generateComponent(CLASS_HORZ, true, mode);
- currentSeq->addComponent(move(cc));
- };
- # Vertical whitespace
- '\\v' => {
- auto cc = generateComponent(CLASS_VERT, false, mode);
- currentSeq->addComponent(move(cc));
- };
- # Not vertical whitespace
- '\\V' => {
- auto cc = generateComponent(CLASS_VERT, true, mode);
- currentSeq->addComponent(move(cc));
- };
-
- '\\p{' => {
- assert(!currentCls && !inCharClass);
- currentCls = getComponentClass(mode);
- negated = false;
- fhold;
- fcall readBracedUCP;
- };
-
- '\\p' any => {
- assert(!currentCls && !inCharClass);
- currentCls = getComponentClass(mode);
- negated = false;
- fhold;
- fcall readUCPSingle;
- };
-
- '\\P{' => {
- assert(!currentCls && !inCharClass);
- currentCls = getComponentClass(mode);
- negated = true;
- fhold;
- fcall readBracedUCP;
- };
-
- '\\P' any => {
- assert(!currentCls && !inCharClass);
- currentCls = getComponentClass(mode);
- negated = true;
- fhold;
- fcall readUCPSingle;
- };
-
- '\\P' => { throw LocatedParseError("Malformed property"); };
- '\\p' => { throw LocatedParseError("Malformed property"); };
-
- # Newline sequence, hairy semantics that we don't do
- '\\R' => {
- ostringstream str;
- str << "\\R at index " << ts - ptr << " not supported.";
- throw ParseError(str.str());
- };
-
- # Reset start of match, also hairy semantics that we don't do
- '\\K' => {
- ostringstream str;
- str << "\\K at index " << ts - ptr << " not supported.";
- throw ParseError(str.str());
- };
-
- # \k without a backref is bugged in PCRE so we have no
- # idea what our semantics should be on it
- '\\k' => {
- ostringstream str;
- str << "\\k at index " << ts - ptr << " not supported.";
- throw ParseError(str.str());
- };
-
- # \G is more hairy pcre-api stuff, DO NOT WANT
- '\\G' => {
- ostringstream str;
- str << "\\G at index " << ts - ptr << " not supported.";
- throw ParseError(str.str());
- };
-
- '\\X' => {
- currentSeq->addComponent(ue2::make_unique<ComponentEUS>(ts - ptr, mode));
- };
-
- # Fall through general escaped character
- '\\' any => {
- addLiteral(currentSeq, *(ts + 1), mode);
- };
-
- # A backslash with no follower is not allowed
- '\\' => {
- assert(ts + 1 == pe);
- ostringstream str;
- str << "Unescaped \\ at end of input, index " << ts - ptr << ".";
- throw ParseError(str.str());
- };
-
- #############################################################
- # Extended patterns
- #############################################################
-
- # Comment
- '\(\?\#' => enterComment;
- # Match modifiers
- '\(\?' matchModifiers >resetModifiers ')' => applyModifiers;
- # Non-capturing group, with flag modifiers
- '\(\?' matchModifiers >resetModifiers ':' => enterModifiedGroup;
- # Zero width look ahead assertion
- '\(\?=' => enterZWLookAhead;
- # Zero width negative look ahead assertion
- '\(\?\!' => enterZWNegLookAhead;
- # Zero width look behind assertion
- '\(\?\<=' => enterZWLookBehind;
- # Zero width negative look behind assertion
- '\(\?\<\!' => enterZWNegLookBehind;
- # Code (TOTALLY unsupported... for good reason)
- '\(\?\{' => enterEmbeddedCode;
- '\(\?\?\{' => enterEmbeddedCode;
- # Atomic group
- '\(\?\>' => enterAtomicGroup;
-
- # Named capturing groups
- ( namedGroup1 |
- namedGroup2 |
- namedGroup3 ) => enterNamedGroup;
-
- # named/numbered subroutine references
- numberedSubExpression => enterReferenceUnsupported;
- namedSubExpression => enterReferenceUnsupported;
-
- # Conditional reference with a positive lookahead assertion
- '(?(?=' => {
- auto a = ue2::make_unique<ComponentAssertion>(
- ComponentAssertion::LOOKAHEAD, ComponentAssertion::POS);
- ComponentAssertion *a_seq = a.get();
- PUSH_SEQUENCE;
- currentSeq = enterSequence(currentSeq,
- ue2::make_unique<ComponentCondReference>(move(a)));
- PUSH_SEQUENCE;
- currentSeq = a_seq;
- };
- # Conditional reference with a negative lookahead assertion
- '(?(?!' => {
- auto a = ue2::make_unique<ComponentAssertion>(
- ComponentAssertion::LOOKAHEAD, ComponentAssertion::NEG);
- ComponentAssertion *a_seq = a.get();
- PUSH_SEQUENCE;
- currentSeq = enterSequence(currentSeq,
- ue2::make_unique<ComponentCondReference>(move(a)));
- PUSH_SEQUENCE;
- currentSeq = a_seq;
- };
- # Conditional reference with a positive lookbehind assertion
- '(?(?<=' => {
- auto a = ue2::make_unique<ComponentAssertion>(
- ComponentAssertion::LOOKBEHIND, ComponentAssertion::POS);
- ComponentAssertion *a_seq = a.get();
- PUSH_SEQUENCE;
- currentSeq = enterSequence(currentSeq,
- ue2::make_unique<ComponentCondReference>(move(a)));
- PUSH_SEQUENCE;
- currentSeq = a_seq;
- };
- # Conditional reference with a negative lookbehind assertion
- '(?(?<!' => {
- auto a = ue2::make_unique<ComponentAssertion>(
- ComponentAssertion::LOOKBEHIND, ComponentAssertion::NEG);
- ComponentAssertion *a_seq = a.get();
- PUSH_SEQUENCE;
- currentSeq = enterSequence(currentSeq,
- ue2::make_unique<ComponentCondReference>(move(a)));
- PUSH_SEQUENCE;
- currentSeq = a_seq;
- };
-
- # Recursive conditional references (unsupported)
- '(?(R' ( [0-9]+ | ('&' [A-Za-z0-9_]+) ) ? ')' => {
- throw LocatedParseError("Pattern recursion not supported");
- };
-
- # Conditional references
- # numbered
- '\(\?\(' (backRefIdSingle | backRefId) ')' => enterNumberedConditionalRef;
- # named
- ( namedConditionalRef1 |
- namedConditionalRef2 |
- namedConditionalRef3 ) => enterNamedConditionalRef;
-
- # Conditions (unsupported)
- '\(\?\(' => enterConditionUnsupported;
-
- # Callouts (unsupported)
- '\(\?C' [0-9]* '\)' => {
- ostringstream str;
- str << "Callout at index " << ts - ptr << " not supported.";
- throw ParseError(str.str());
- };
-
- # Any other char after '(?' is a pattern modifier we don't
- # recognise.
- '\(\?' any => {
- throw LocatedParseError("Unrecognised character after (?");
- };
-
- #unicode chars
- utf8_2c when is_utf8 => {
- assert(mode.utf8);
- /* leverage ComponentClass to generate the vertices */
- auto cc = getComponentClass(mode);
- cc->add(readUtf8CodePoint2c(ts));
- cc->finalize();
- currentSeq->addComponent(move(cc));
- };
-
- utf8_3c when is_utf8 => {
- assert(mode.utf8);
- /* leverage ComponentClass to generate the vertices */
- auto cc = getComponentClass(mode);
- cc->add(readUtf8CodePoint3c(ts));
- cc->finalize();
- currentSeq->addComponent(move(cc));
- };
-
- utf8_4c when is_utf8 => {
- assert(mode.utf8);
- /* leverage ComponentClass to generate the vertices */
- auto cc = getComponentClass(mode);
- cc->add(readUtf8CodePoint4c(ts));
- cc->finalize();
- currentSeq->addComponent(move(cc));
- };
-
- hi_byte when is_utf8 => {
- assert(mode.utf8);
- throwInvalidUtf8();
- };
-
- #############################################################
- # Literal character
- #############################################################
- # literal character
- whitespace => {
- if (mode.ignore_space == false) {
- addLiteral(currentSeq, *ts, mode);
- }
- };
- any => {
- addLiteral(currentSeq, *ts, mode);
- };
- *|;
-
- prepush {
- DEBUG_PRINTF("stack %zu top %d\n", stack.size(), top);
- if ((int)stack.size() == top) {
- stack.resize(2 * (top + 1));
- }
- }
-}%%
-
-%% write data nofinal;
-
-/** \brief Main parser call, returns root Component or nullptr. */
+ throw ParseError(str.str());
+ };
+
+ # Word character
+ '\\w' => {
+ auto cc = generateComponent(CLASS_WORD, false, mode);
+ currentSeq->addComponent(move(cc));
+ };
+ # Non word character
+ '\\W' => {
+ auto cc = generateComponent(CLASS_WORD, true, mode);
+ currentSeq->addComponent(move(cc));
+ };
+ # Whitespace character
+ '\\s' => {
+ auto cc = generateComponent(CLASS_SPACE, false, mode);
+ currentSeq->addComponent(move(cc));
+ };
+ # Non whitespace character
+ '\\S' => {
+ auto cc = generateComponent(CLASS_SPACE, true, mode);
+ currentSeq->addComponent(move(cc));
+ };
+ # Digit character
+ '\\d' => {
+ auto cc = generateComponent(CLASS_DIGIT, false, mode);
+ currentSeq->addComponent(move(cc));
+ };
+ # Non digit character
+ '\\D' => {
+ auto cc = generateComponent(CLASS_DIGIT, true, mode);
+ currentSeq->addComponent(move(cc));
+ };
+ # Horizontal whitespace
+ '\\h' => {
+ auto cc = generateComponent(CLASS_HORZ, false, mode);
+ currentSeq->addComponent(move(cc));
+ };
+ # Not horizontal whitespace
+ '\\H' => {
+ auto cc = generateComponent(CLASS_HORZ, true, mode);
+ currentSeq->addComponent(move(cc));
+ };
+ # Vertical whitespace
+ '\\v' => {
+ auto cc = generateComponent(CLASS_VERT, false, mode);
+ currentSeq->addComponent(move(cc));
+ };
+ # Not vertical whitespace
+ '\\V' => {
+ auto cc = generateComponent(CLASS_VERT, true, mode);
+ currentSeq->addComponent(move(cc));
+ };
+
+ '\\p{' => {
+ assert(!currentCls && !inCharClass);
+ currentCls = getComponentClass(mode);
+ negated = false;
+ fhold;
+ fcall readBracedUCP;
+ };
+
+ '\\p' any => {
+ assert(!currentCls && !inCharClass);
+ currentCls = getComponentClass(mode);
+ negated = false;
+ fhold;
+ fcall readUCPSingle;
+ };
+
+ '\\P{' => {
+ assert(!currentCls && !inCharClass);
+ currentCls = getComponentClass(mode);
+ negated = true;
+ fhold;
+ fcall readBracedUCP;
+ };
+
+ '\\P' any => {
+ assert(!currentCls && !inCharClass);
+ currentCls = getComponentClass(mode);
+ negated = true;
+ fhold;
+ fcall readUCPSingle;
+ };
+
+ '\\P' => { throw LocatedParseError("Malformed property"); };
+ '\\p' => { throw LocatedParseError("Malformed property"); };
+
+ # Newline sequence, hairy semantics that we don't do
+ '\\R' => {
+ ostringstream str;
+ str << "\\R at index " << ts - ptr << " not supported.";
+ throw ParseError(str.str());
+ };
+
+ # Reset start of match, also hairy semantics that we don't do
+ '\\K' => {
+ ostringstream str;
+ str << "\\K at index " << ts - ptr << " not supported.";
+ throw ParseError(str.str());
+ };
+
+ # \k without a backref is bugged in PCRE so we have no
+ # idea what our semantics should be on it
+ '\\k' => {
+ ostringstream str;
+ str << "\\k at index " << ts - ptr << " not supported.";
+ throw ParseError(str.str());
+ };
+
+ # \G is more hairy pcre-api stuff, DO NOT WANT
+ '\\G' => {
+ ostringstream str;
+ str << "\\G at index " << ts - ptr << " not supported.";
+ throw ParseError(str.str());
+ };
+
+ '\\X' => {
+ currentSeq->addComponent(ue2::make_unique<ComponentEUS>(ts - ptr, mode));
+ };
+
+ # Fall through general escaped character
+ '\\' any => {
+ addLiteral(currentSeq, *(ts + 1), mode);
+ };
+
+ # A backslash with no follower is not allowed
+ '\\' => {
+ assert(ts + 1 == pe);
+ ostringstream str;
+ str << "Unescaped \\ at end of input, index " << ts - ptr << ".";
+ throw ParseError(str.str());
+ };
+
+ #############################################################
+ # Extended patterns
+ #############################################################
+
+ # Comment
+ '\(\?\#' => enterComment;
+ # Match modifiers
+ '\(\?' matchModifiers >resetModifiers ')' => applyModifiers;
+ # Non-capturing group, with flag modifiers
+ '\(\?' matchModifiers >resetModifiers ':' => enterModifiedGroup;
+ # Zero width look ahead assertion
+ '\(\?=' => enterZWLookAhead;
+ # Zero width negative look ahead assertion
+ '\(\?\!' => enterZWNegLookAhead;
+ # Zero width look behind assertion
+ '\(\?\<=' => enterZWLookBehind;
+ # Zero width negative look behind assertion
+ '\(\?\<\!' => enterZWNegLookBehind;
+ # Code (TOTALLY unsupported... for good reason)
+ '\(\?\{' => enterEmbeddedCode;
+ '\(\?\?\{' => enterEmbeddedCode;
+ # Atomic group
+ '\(\?\>' => enterAtomicGroup;
+
+ # Named capturing groups
+ ( namedGroup1 |
+ namedGroup2 |
+ namedGroup3 ) => enterNamedGroup;
+
+ # named/numbered subroutine references
+ numberedSubExpression => enterReferenceUnsupported;
+ namedSubExpression => enterReferenceUnsupported;
+
+ # Conditional reference with a positive lookahead assertion
+ '(?(?=' => {
+ auto a = ue2::make_unique<ComponentAssertion>(
+ ComponentAssertion::LOOKAHEAD, ComponentAssertion::POS);
+ ComponentAssertion *a_seq = a.get();
+ PUSH_SEQUENCE;
+ currentSeq = enterSequence(currentSeq,
+ ue2::make_unique<ComponentCondReference>(move(a)));
+ PUSH_SEQUENCE;
+ currentSeq = a_seq;
+ };
+ # Conditional reference with a negative lookahead assertion
+ '(?(?!' => {
+ auto a = ue2::make_unique<ComponentAssertion>(
+ ComponentAssertion::LOOKAHEAD, ComponentAssertion::NEG);
+ ComponentAssertion *a_seq = a.get();
+ PUSH_SEQUENCE;
+ currentSeq = enterSequence(currentSeq,
+ ue2::make_unique<ComponentCondReference>(move(a)));
+ PUSH_SEQUENCE;
+ currentSeq = a_seq;
+ };
+ # Conditional reference with a positive lookbehind assertion
+ '(?(?<=' => {
+ auto a = ue2::make_unique<ComponentAssertion>(
+ ComponentAssertion::LOOKBEHIND, ComponentAssertion::POS);
+ ComponentAssertion *a_seq = a.get();
+ PUSH_SEQUENCE;
+ currentSeq = enterSequence(currentSeq,
+ ue2::make_unique<ComponentCondReference>(move(a)));
+ PUSH_SEQUENCE;
+ currentSeq = a_seq;
+ };
+ # Conditional reference with a negative lookbehind assertion
+ '(?(?<!' => {
+ auto a = ue2::make_unique<ComponentAssertion>(
+ ComponentAssertion::LOOKBEHIND, ComponentAssertion::NEG);
+ ComponentAssertion *a_seq = a.get();
+ PUSH_SEQUENCE;
+ currentSeq = enterSequence(currentSeq,
+ ue2::make_unique<ComponentCondReference>(move(a)));
+ PUSH_SEQUENCE;
+ currentSeq = a_seq;
+ };
+
+ # Recursive conditional references (unsupported)
+ '(?(R' ( [0-9]+ | ('&' [A-Za-z0-9_]+) ) ? ')' => {
+ throw LocatedParseError("Pattern recursion not supported");
+ };
+
+ # Conditional references
+ # numbered
+ '\(\?\(' (backRefIdSingle | backRefId) ')' => enterNumberedConditionalRef;
+ # named
+ ( namedConditionalRef1 |
+ namedConditionalRef2 |
+ namedConditionalRef3 ) => enterNamedConditionalRef;
+
+ # Conditions (unsupported)
+ '\(\?\(' => enterConditionUnsupported;
+
+ # Callouts (unsupported)
+ '\(\?C' [0-9]* '\)' => {
+ ostringstream str;
+ str << "Callout at index " << ts - ptr << " not supported.";
+ throw ParseError(str.str());
+ };
+
+ # Any other char after '(?' is a pattern modifier we don't
+ # recognise.
+ '\(\?' any => {
+ throw LocatedParseError("Unrecognised character after (?");
+ };
+
+ #unicode chars
+ utf8_2c when is_utf8 => {
+ assert(mode.utf8);
+ /* leverage ComponentClass to generate the vertices */
+ auto cc = getComponentClass(mode);
+ cc->add(readUtf8CodePoint2c(ts));
+ cc->finalize();
+ currentSeq->addComponent(move(cc));
+ };
+
+ utf8_3c when is_utf8 => {
+ assert(mode.utf8);
+ /* leverage ComponentClass to generate the vertices */
+ auto cc = getComponentClass(mode);
+ cc->add(readUtf8CodePoint3c(ts));
+ cc->finalize();
+ currentSeq->addComponent(move(cc));
+ };
+
+ utf8_4c when is_utf8 => {
+ assert(mode.utf8);
+ /* leverage ComponentClass to generate the vertices */
+ auto cc = getComponentClass(mode);
+ cc->add(readUtf8CodePoint4c(ts));
+ cc->finalize();
+ currentSeq->addComponent(move(cc));
+ };
+
+ hi_byte when is_utf8 => {
+ assert(mode.utf8);
+ throwInvalidUtf8();
+ };
+
+ #############################################################
+ # Literal character
+ #############################################################
+ # literal character
+ whitespace => {
+ if (mode.ignore_space == false) {
+ addLiteral(currentSeq, *ts, mode);
+ }
+ };
+ any => {
+ addLiteral(currentSeq, *ts, mode);
+ };
+ *|;
+
+ prepush {
+ DEBUG_PRINTF("stack %zu top %d\n", stack.size(), top);
+ if ((int)stack.size() == top) {
+ stack.resize(2 * (top + 1));
+ }
+ }
+}%%
+
+%% write data nofinal;
+
+/** \brief Main parser call, returns root Component or nullptr. */
unique_ptr<Component> parse(const char *ptr, ParseMode &globalMode) {
assert(ptr);
@@ -1923,116 +1923,116 @@ unique_ptr<Component> parse(const char *ptr, ParseMode &globalMode) {
p = read_control_verbs(p, pe, 0, globalMode);
const char *eof = pe;
- int cs;
- UNUSED int act;
- int top;
- vector<int> stack;
+ int cs;
+ UNUSED int act;
+ int top;
+ vector<int> stack;
const char *ts, *te;
- unichar accumulator = 0;
- unichar octAccumulator = 0; /* required as we are also accumulating for
- * back ref when looking for octals */
- unsigned repeatN = 0;
- unsigned repeatM = 0;
- string label;
-
- ParseMode mode = globalMode;
- ParseMode newMode;
-
- bool negated = false;
- bool inComment = false;
-
- // Stack of sequences and flags used to store state when we enter
- // sub-sequences.
- vector<ExprState> sequences;
-
- // Index of the next capturing group. Note that zero is reserved for the
- // root sequence.
- unsigned groupIndex = 1;
-
- // Set storing group names that are currently in use.
+ unichar accumulator = 0;
+ unichar octAccumulator = 0; /* required as we are also accumulating for
+ * back ref when looking for octals */
+ unsigned repeatN = 0;
+ unsigned repeatM = 0;
+ string label;
+
+ ParseMode mode = globalMode;
+ ParseMode newMode;
+
+ bool negated = false;
+ bool inComment = false;
+
+ // Stack of sequences and flags used to store state when we enter
+ // sub-sequences.
+ vector<ExprState> sequences;
+
+ // Index of the next capturing group. Note that zero is reserved for the
+ // root sequence.
+ unsigned groupIndex = 1;
+
+ // Set storing group names that are currently in use.
flat_set<string> groupNames;
-
- // Root sequence.
- unique_ptr<ComponentSequence> rootSeq = ue2::make_unique<ComponentSequence>();
- rootSeq->setCaptureIndex(0);
-
- // Current sequence being appended to
- ComponentSequence *currentSeq = rootSeq.get();
-
- // The current character class being appended to. This is used as the
- // accumulator for both character class and UCP properties.
- unique_ptr<ComponentClass> currentCls;
-
- // True if the machine is currently inside a character class, i.e. square
- // brackets [..].
- bool inCharClass = false;
-
- // True if the machine is inside a character class but it has not processed
- // any "real" elements yet, i.e. it's still processing meta-characters like
- // '^'.
- bool inCharClassEarly = false;
-
- // Location at which the current character class began.
+
+ // Root sequence.
+ unique_ptr<ComponentSequence> rootSeq = ue2::make_unique<ComponentSequence>();
+ rootSeq->setCaptureIndex(0);
+
+ // Current sequence being appended to
+ ComponentSequence *currentSeq = rootSeq.get();
+
+ // The current character class being appended to. This is used as the
+ // accumulator for both character class and UCP properties.
+ unique_ptr<ComponentClass> currentCls;
+
+ // True if the machine is currently inside a character class, i.e. square
+ // brackets [..].
+ bool inCharClass = false;
+
+ // True if the machine is inside a character class but it has not processed
+ // any "real" elements yet, i.e. it's still processing meta-characters like
+ // '^'.
+ bool inCharClassEarly = false;
+
+ // Location at which the current character class began.
const char *currentClsBegin = p;
-
- // We throw exceptions on various parsing failures beyond this point: we
- // use a try/catch block here to clean up our allocated memory before we
- // re-throw the exception to the caller.
- try {
- // Embed the Ragel machine here
- %% write init;
- %% write exec;
-
- if (p != pe && *p != '\0') {
- // didn't make it to the end of our input, but we didn't throw a ParseError?
- assert(0);
- ostringstream str;
- str << "Parse error at index " << (p - ptr) << ".";
- throw ParseError(str.str());
- }
-
- if (currentCls) {
- assert(inCharClass);
- assert(currentClsBegin);
- ostringstream oss;
- oss << "Unterminated character class starting at index "
- << currentClsBegin - ptr << ".";
- throw ParseError(oss.str());
- }
-
- if (inComment) {
- throw ParseError("Unterminated comment.");
- }
-
- if (!sequences.empty()) {
- ostringstream str;
- str << "Missing close parenthesis for group started at index "
- << sequences.back().seqOffset << ".";
- throw ParseError(str.str());
- }
-
- // Unlikely, but possible
- if (groupIndex > 65535) {
- throw ParseError("The maximum number of capturing subexpressions is 65535.");
- }
-
- // Finalize the top-level sequence, which will take care of any
- // top-level alternation.
- currentSeq->finalize();
- assert(currentSeq == rootSeq.get());
-
- // Ensure that all references are valid.
- checkReferences(*rootSeq, groupIndex, groupNames);
-
- return move(rootSeq);
- } catch (LocatedParseError &error) {
- if (ts >= ptr && ts <= pe) {
- error.locate(ts - ptr);
- } else {
- error.locate(0);
- }
- throw;
- }
-}
-
-} // namespace ue2
+
+ // We throw exceptions on various parsing failures beyond this point: we
+ // use a try/catch block here to clean up our allocated memory before we
+ // re-throw the exception to the caller.
+ try {
+ // Embed the Ragel machine here
+ %% write init;
+ %% write exec;
+
+ if (p != pe && *p != '\0') {
+ // didn't make it to the end of our input, but we didn't throw a ParseError?
+ assert(0);
+ ostringstream str;
+ str << "Parse error at index " << (p - ptr) << ".";
+ throw ParseError(str.str());
+ }
+
+ if (currentCls) {
+ assert(inCharClass);
+ assert(currentClsBegin);
+ ostringstream oss;
+ oss << "Unterminated character class starting at index "
+ << currentClsBegin - ptr << ".";
+ throw ParseError(oss.str());
+ }
+
+ if (inComment) {
+ throw ParseError("Unterminated comment.");
+ }
+
+ if (!sequences.empty()) {
+ ostringstream str;
+ str << "Missing close parenthesis for group started at index "
+ << sequences.back().seqOffset << ".";
+ throw ParseError(str.str());
+ }
+
+ // Unlikely, but possible
+ if (groupIndex > 65535) {
+ throw ParseError("The maximum number of capturing subexpressions is 65535.");
+ }
+
+ // Finalize the top-level sequence, which will take care of any
+ // top-level alternation.
+ currentSeq->finalize();
+ assert(currentSeq == rootSeq.get());
+
+ // Ensure that all references are valid.
+ checkReferences(*rootSeq, groupIndex, groupNames);
+
+ return move(rootSeq);
+ } catch (LocatedParseError &error) {
+ if (ts >= ptr && ts <= pe) {
+ error.locate(ts - ptr);
+ } else {
+ error.locate(0);
+ }
+ throw;
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/Utf8ComponentClass.cpp b/contrib/libs/hyperscan/src/parser/Utf8ComponentClass.cpp
index c5149fd702..cdfc974acd 100644
--- a/contrib/libs/hyperscan/src/parser/Utf8ComponentClass.cpp
+++ b/contrib/libs/hyperscan/src/parser/Utf8ComponentClass.cpp
@@ -1,1173 +1,1173 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Character class in UTF-8 mode.
- */
-
-
-#include "Utf8ComponentClass.h"
-
-#include "buildstate.h"
-#include "Parser.h"
-#include "parse_error.h"
-#include "position.h"
-#include "position_info.h"
-#include "nfagraph/ng_builder.h"
-#include "util/compare.h"
-#include "util/unicode_def.h"
-
-#include <cstring>
-
-#include "ucp_table.h"
-
-using namespace std;
-
-namespace ue2 {
-
-PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode) {
- /* Note: the mapping used here for mapping posix character classes
- * matches the observed behaviour of PCRE (lower and upper going to \p{L}
- * is not documented by pcre).
- *
- * Note: this mapping is quite different from both of the mappings
- * recommended in the unicode regex tech report (TR-18) appendix C
- */
- switch (in) {
- case CLASS_ALNUM:
- return CLASS_UCP_XAN;
- case CLASS_ALPHA:
- return CLASS_UCP_L;
- case CLASS_BLANK:
- return CLASS_HORZ;
- case CLASS_DIGIT:
- return CLASS_UCP_ND;
- case CLASS_GRAPH:
- return CLASS_XGRAPH;
- case CLASS_LOWER:
- if (mode.caseless) { /* we also pick up uppercase titlecase and others */
- return CLASS_UCP_L;
- } else {
- return CLASS_UCP_LL;
- }
- case CLASS_PRINT:
- return CLASS_XPRINT;
- case CLASS_PUNCT:
- return CLASS_XPUNCT;
- case CLASS_SPACE:
- return CLASS_UCP_XPS;
- case CLASS_UPPER:
- if (mode.caseless) { /* we also pick up lowercase titlecase and others */
- return CLASS_UCP_L;
- } else {
- return CLASS_UCP_LU;
- }
- case CLASS_WORD:
- return CLASS_UCP_XWD;
- default:
- return in;
- }
-}
-
-CodePointSet getPredefinedCodePointSet(PredefinedClass c,
- const ParseMode &mode) {
- /* TODO: support properly PCRE_UCP mode and non PCRE_UCP mode */
- switch (c) {
- case CLASS_ANY:
- if (mode.dotall) {
- return CodePointSet(CodePointSet::interval(0, MAX_UNICODE));
- } else {
- CodePointSet rv;
- rv.set('\n');
- rv.flip();
- return rv;
- }
- case CLASS_XGRAPH: {
- CodePointSet rv;
- rv = getUcpZ();
- rv |= getUcpC();
- rv.flip();
- // most of Cf, except for ...
- CodePointSet cf = getUcpCf();
- cf.unset(0x061c);
- cf.unset(0x180e);
- cf.unsetRange(0x2066, 0x2069);
- rv |= cf;
- return rv;
- }
- case CLASS_XPRINT: {
- // Same as graph, plus everything with the Zs property.
- CodePointSet rv = getPredefinedCodePointSet(CLASS_XGRAPH, mode);
- rv |= getUcpZs();
- rv.set(0x180e); // Also included in this class by PCRE 8.38.
- return rv;
- }
- case CLASS_XPUNCT: {
- // Everything with the P (punctuation) property, plus code points in S
- // (symbols) that are < 128.
- CodePointSet rv = getUcpP();
- CodePointSet symbols = getUcpS();
- symbols.unsetRange(128, MAX_UNICODE);
- rv |= symbols;
- return rv;
- }
- case CLASS_HORZ: {
- CodePointSet rv;
- rv.set(0x0009); /* Horizontal tab */
- rv.set(0x0020); /* Space */
- rv.set(0x00A0); /* Non-break space */
- rv.set(0x1680); /* Ogham space mark */
- rv.set(0x180E); /* Mongolian vowel separator */
- rv.set(0x2000); /* En quad */
- rv.set(0x2001); /* Em quad */
- rv.set(0x2002); /* En space */
- rv.set(0x2003); /* Em space */
- rv.set(0x2004); /* Three-per-em space */
- rv.set(0x2005); /* Four-per-em space */
- rv.set(0x2006); /* Six-per-em space */
- rv.set(0x2007); /* Figure space */
- rv.set(0x2008); /* Punctuation space */
- rv.set(0x2009); /* Thin space */
- rv.set(0x200A); /* Hair space */
- rv.set(0x202F); /* Narrow no-break space */
- rv.set(0x205F); /* Medium mathematical space */
- rv.set(0x3000); /* Ideographic space */
- return rv;
- }
- case CLASS_VERT: {
- CodePointSet rv;
- rv.set(0x000A); /* Linefeed */
- rv.set(0x000B); /* Vertical tab */
- rv.set(0x000C); /* Formfeed */
- rv.set(0x000D); /* Carriage return */
- rv.set(0x0085); /* Next line */
- rv.set(0x2028); /* Line separator */
- rv.set(0x2029); /* Paragraph separator */
- return rv;
- }
- case CLASS_UCP_XPS:
- case CLASS_UCP_XSP: {
- CodePointSet rv;
- rv.set(0x0009); /* Horizontal tab */
- rv.set(0x0020); /* Space */
- rv.set(0x00A0); /* Non-break space */
- rv.set(0x1680); /* Ogham space mark */
- rv.set(0x180E); /* Mongolian vowel separator */
- rv.set(0x2000); /* En quad */
- rv.set(0x2001); /* Em quad */
- rv.set(0x2002); /* En space */
- rv.set(0x2003); /* Em space */
- rv.set(0x2004); /* Three-per-em space */
- rv.set(0x2005); /* Four-per-em space */
- rv.set(0x2006); /* Six-per-em space */
- rv.set(0x2007); /* Figure space */
- rv.set(0x2008); /* Punctuation space */
- rv.set(0x2009); /* Thin space */
- rv.set(0x200A); /* Hair space */
- rv.set(0x202F); /* Narrow no-break space */
- rv.set(0x205F); /* Medium mathematical space */
- rv.set(0x3000); /* Ideographic space */
- rv.set(0x000A); /* Linefeed */
- rv.set(0x000B); /* Vertical tab */
- rv.set(0x000C); /* Formfeed */
- rv.set(0x000D); /* Carriage return */
- rv.set(0x0085); /* Next line */
- rv.set(0x2028); /* Line separator */
- rv.set(0x2029); /* Paragraph separator */
- return rv;
- }
- case CLASS_UCP_C:
- return getUcpC();
- case CLASS_UCP_CC:
- return getUcpCc();
- case CLASS_UCP_CF:
- return getUcpCf();
- case CLASS_UCP_CN:
- return getUcpCn();
- case CLASS_UCP_CO:
- return getUcpCo();
- case CLASS_UCP_CS:
- return getUcpCs();
- case CLASS_UCP_L:
- return getUcpL();
- case CLASS_UCP_L_AND:
- return getUcpL_and();
- case CLASS_UCP_LL:
- return getUcpLl();
- case CLASS_UCP_LM:
- return getUcpLm();
- case CLASS_UCP_LO:
- return getUcpLo();
- case CLASS_UCP_LT:
- return getUcpLt();
- case CLASS_UCP_LU:
- return getUcpLu();
- case CLASS_UCP_M:
- return getUcpM();
- case CLASS_UCP_MC:
- return getUcpMc();
- case CLASS_UCP_ME:
- return getUcpMe();
- case CLASS_UCP_MN:
- return getUcpMn();
- case CLASS_UCP_N:
- return getUcpN();
- case CLASS_UCP_ND:
- return getUcpNd();
- case CLASS_UCP_NL:
- return getUcpNl();
- case CLASS_UCP_NO:
- return getUcpNo();
- case CLASS_UCP_P:
- return getUcpP();
- case CLASS_UCP_PC:
- return getUcpPc();
- case CLASS_UCP_PD:
- return getUcpPd();
- case CLASS_UCP_PE:
- return getUcpPe();
- case CLASS_UCP_PF:
- return getUcpPf();
- case CLASS_UCP_PI:
- return getUcpPi();
- case CLASS_UCP_PO:
- return getUcpPo();
- case CLASS_UCP_PS:
- return getUcpPs();
- case CLASS_UCP_S:
- return getUcpS();
- case CLASS_UCP_SC:
- return getUcpSc();
- case CLASS_UCP_SK:
- return getUcpSk();
- case CLASS_UCP_SM:
- return getUcpSm();
- case CLASS_UCP_SO:
- return getUcpSo();
- case CLASS_UCP_XAN:
- return getUcpXan();
- case CLASS_UCP_XWD:
- return getUcpXwd();
- case CLASS_UCP_Z:
- return getUcpZ();
- case CLASS_UCP_ZL:
- return getUcpZl();
- case CLASS_UCP_ZP:
- return getUcpZp();
- case CLASS_UCP_ZS:
- return getUcpZs();
- case CLASS_SCRIPT_ARABIC:
- return getUcpArabic();
- case CLASS_SCRIPT_ARMENIAN:
- return getUcpArmenian();
- case CLASS_SCRIPT_AVESTAN:
- return getUcpAvestan();
- case CLASS_SCRIPT_BALINESE:
- return getUcpBalinese();
- case CLASS_SCRIPT_BAMUM:
- return getUcpBamum();
- case CLASS_SCRIPT_BATAK:
- return getUcpBatak();
- case CLASS_SCRIPT_BENGALI:
- return getUcpBengali();
- case CLASS_SCRIPT_BOPOMOFO:
- return getUcpBopomofo();
- case CLASS_SCRIPT_BRAHMI:
- return getUcpBrahmi();
- case CLASS_SCRIPT_BRAILLE:
- return getUcpBraille();
- case CLASS_SCRIPT_BUGINESE:
- return getUcpBuginese();
- case CLASS_SCRIPT_BUHID:
- return getUcpBuhid();
- case CLASS_SCRIPT_CANADIAN_ABORIGINAL:
- return getUcpCanadian_Aboriginal();
- case CLASS_SCRIPT_CARIAN:
- return getUcpCarian();
- case CLASS_SCRIPT_CHAM:
- return getUcpCham();
- case CLASS_SCRIPT_CHEROKEE:
- return getUcpCherokee();
- case CLASS_SCRIPT_COMMON:
- return getUcpCommon();
- case CLASS_SCRIPT_COPTIC:
- return getUcpCoptic();
- case CLASS_SCRIPT_CUNEIFORM:
- return getUcpCuneiform();
- case CLASS_SCRIPT_CYPRIOT:
- return getUcpCypriot();
- case CLASS_SCRIPT_CYRILLIC:
- return getUcpCyrillic();
- case CLASS_SCRIPT_DESERET:
- return getUcpDeseret();
- case CLASS_SCRIPT_DEVANAGARI:
- return getUcpDevanagari();
- case CLASS_SCRIPT_EGYPTIAN_HIEROGLYPHS:
- return getUcpEgyptian_Hieroglyphs();
- case CLASS_SCRIPT_ETHIOPIC:
- return getUcpEthiopic();
- case CLASS_SCRIPT_GEORGIAN:
- return getUcpGeorgian();
- case CLASS_SCRIPT_GLAGOLITIC:
- return getUcpGlagolitic();
- case CLASS_SCRIPT_GOTHIC:
- return getUcpGothic();
- case CLASS_SCRIPT_GREEK:
- return getUcpGreek();
- case CLASS_SCRIPT_GUJARATI:
- return getUcpGujarati();
- case CLASS_SCRIPT_GURMUKHI:
- return getUcpGurmukhi();
- case CLASS_SCRIPT_HAN:
- return getUcpHan();
- case CLASS_SCRIPT_HANGUL:
- return getUcpHangul();
- case CLASS_SCRIPT_HANUNOO:
- return getUcpHanunoo();
- case CLASS_SCRIPT_HEBREW:
- return getUcpHebrew();
- case CLASS_SCRIPT_HIRAGANA:
- return getUcpHiragana();
- case CLASS_SCRIPT_IMPERIAL_ARAMAIC:
- return getUcpImperial_Aramaic();
- case CLASS_SCRIPT_INHERITED:
- return getUcpInherited();
- case CLASS_SCRIPT_INSCRIPTIONAL_PAHLAVI:
- return getUcpInscriptional_Pahlavi();
- case CLASS_SCRIPT_INSCRIPTIONAL_PARTHIAN:
- return getUcpInscriptional_Parthian();
- case CLASS_SCRIPT_JAVANESE:
- return getUcpJavanese();
- case CLASS_SCRIPT_KAITHI:
- return getUcpKaithi();
- case CLASS_SCRIPT_KANNADA:
- return getUcpKannada();
- case CLASS_SCRIPT_KATAKANA:
- return getUcpKatakana();
- case CLASS_SCRIPT_KAYAH_LI:
- return getUcpKayah_Li();
- case CLASS_SCRIPT_KHAROSHTHI:
- return getUcpKharoshthi();
- case CLASS_SCRIPT_KHMER:
- return getUcpKhmer();
- case CLASS_SCRIPT_LAO:
- return getUcpLao();
- case CLASS_SCRIPT_LATIN:
- return getUcpLatin();
- case CLASS_SCRIPT_LEPCHA:
- return getUcpLepcha();
- case CLASS_SCRIPT_LIMBU:
- return getUcpLimbu();
- case CLASS_SCRIPT_LINEAR_B:
- return getUcpLinear_B();
- case CLASS_SCRIPT_LISU:
- return getUcpLisu();
- case CLASS_SCRIPT_LYCIAN:
- return getUcpLycian();
- case CLASS_SCRIPT_LYDIAN:
- return getUcpLydian();
- case CLASS_SCRIPT_MALAYALAM:
- return getUcpMalayalam();
- case CLASS_SCRIPT_MANDAIC:
- return getUcpMandaic();
- case CLASS_SCRIPT_MEETEI_MAYEK:
- return getUcpMeetei_Mayek();
- case CLASS_SCRIPT_MONGOLIAN:
- return getUcpMongolian();
- case CLASS_SCRIPT_MYANMAR:
- return getUcpMyanmar();
- case CLASS_SCRIPT_NEW_TAI_LUE:
- return getUcpNew_Tai_Lue();
- case CLASS_SCRIPT_NKO:
- return getUcpNko();
- case CLASS_SCRIPT_OGHAM:
- return getUcpOgham();
- case CLASS_SCRIPT_OL_CHIKI:
- return getUcpOl_Chiki();
- case CLASS_SCRIPT_OLD_ITALIC:
- return getUcpOld_Italic();
- case CLASS_SCRIPT_OLD_PERSIAN:
- return getUcpOld_Persian();
- case CLASS_SCRIPT_OLD_SOUTH_ARABIAN:
- return getUcpOld_South_Arabian();
- case CLASS_SCRIPT_OLD_TURKIC:
- return getUcpOld_Turkic();
- case CLASS_SCRIPT_ORIYA:
- return getUcpOriya();
- case CLASS_SCRIPT_OSMANYA:
- return getUcpOsmanya();
- case CLASS_SCRIPT_PHAGS_PA:
- return getUcpPhags_Pa();
- case CLASS_SCRIPT_PHOENICIAN:
- return getUcpPhoenician();
- case CLASS_SCRIPT_REJANG:
- return getUcpRejang();
- case CLASS_SCRIPT_RUNIC:
- return getUcpRunic();
- case CLASS_SCRIPT_SAMARITAN:
- return getUcpSamaritan();
- case CLASS_SCRIPT_SAURASHTRA:
- return getUcpSaurashtra();
- case CLASS_SCRIPT_SHAVIAN:
- return getUcpShavian();
- case CLASS_SCRIPT_SINHALA:
- return getUcpSinhala();
- case CLASS_SCRIPT_SUNDANESE:
- return getUcpSundanese();
- case CLASS_SCRIPT_SYLOTI_NAGRI:
- return getUcpSyloti_Nagri();
- case CLASS_SCRIPT_SYRIAC:
- return getUcpSyriac();
- case CLASS_SCRIPT_TAGALOG:
- return getUcpTagalog();
- case CLASS_SCRIPT_TAGBANWA:
- return getUcpTagbanwa();
- case CLASS_SCRIPT_TAI_LE:
- return getUcpTai_Le();
- case CLASS_SCRIPT_TAI_THAM:
- return getUcpTai_Tham();
- case CLASS_SCRIPT_TAI_VIET:
- return getUcpTai_Viet();
- case CLASS_SCRIPT_TAMIL:
- return getUcpTamil();
- case CLASS_SCRIPT_TELUGU:
- return getUcpTelugu();
- case CLASS_SCRIPT_THAANA:
- return getUcpThaana();
- case CLASS_SCRIPT_THAI:
- return getUcpThai();
- case CLASS_SCRIPT_TIBETAN:
- return getUcpTibetan();
- case CLASS_SCRIPT_TIFINAGH:
- return getUcpTifinagh();
- case CLASS_SCRIPT_UGARITIC:
- return getUcpUgaritic();
- case CLASS_SCRIPT_VAI:
- return getUcpVai();
- case CLASS_SCRIPT_YI:
- return getUcpYi();
- case CLASS_UCP_ANY:
- return CodePointSet(CodePointSet::interval(0, MAX_UNICODE));
-
- default: { /* currently uses ascii defns */
- CharReach cr = getPredefinedCharReach(c, mode);
- CodePointSet rv;
- for (u32 i = cr.find_first(); i != CharReach::npos;
- i = cr.find_next(i)) {
- rv.set(i);
- }
- return rv;
- }
- }
-}
-
-UTF8ComponentClass::UTF8ComponentClass(const ParseMode &mode_in)
- : ComponentClass(mode_in),
- single_pos( GlushkovBuildState::POS_UNINITIALIZED),
- one_dot_trailer( GlushkovBuildState::POS_UNINITIALIZED),
- two_dot_trailer( GlushkovBuildState::POS_UNINITIALIZED),
- three_dot_trailer( GlushkovBuildState::POS_UNINITIALIZED),
- two_char_dot_head( GlushkovBuildState::POS_UNINITIALIZED),
- three_char_dot_head(GlushkovBuildState::POS_UNINITIALIZED),
- four_char_dot_head( GlushkovBuildState::POS_UNINITIALIZED) {
- assert(mode.utf8);
-}
-
-UTF8ComponentClass *UTF8ComponentClass::clone() const {
- return new UTF8ComponentClass(*this);
-}
-
-bool UTF8ComponentClass::class_empty(void) const {
- assert(finalized);
- return cps.none();
-}
-
-void UTF8ComponentClass::createRange(unichar to) {
- assert(range_start != INVALID_UNICODE);
- unichar from = range_start;
- if (from > to) {
- throw LocatedParseError("Range out of order in character class");
- }
-
- in_cand_range = false;
- CodePointSet ncps;
- ncps.setRange(from, to);
- if (mode.caseless) {
- make_caseless(&ncps);
- }
- cps |= ncps;
- range_start = INVALID_UNICODE;
-}
-
-void UTF8ComponentClass::add(PredefinedClass c, bool negative) {
- if (in_cand_range) { // can't form a range here
- throw LocatedParseError("Invalid range in character class");
- }
-
- if (mode.ucp) {
- c = translateForUcpMode(c, mode);
- }
-
- // caselessness is handled inside this call - don't apply make_caseless
- // to the result
- CodePointSet pcps = getPredefinedCodePointSet(c, mode);
- if (negative) {
- pcps.flip();
- }
-
- cps |= pcps;
-
- range_start = INVALID_UNICODE;
- in_cand_range = false;
-}
-
-void UTF8ComponentClass::add(unichar c) {
- DEBUG_PRINTF("adding \\x%08x\n", c);
- if (c > MAX_UNICODE) { // too big!
- throw LocatedParseError("Hexadecimal value is greater than \\x10FFFF");
- }
-
- if (in_cand_range) {
- createRange(c);
- return;
- }
-
- CodePointSet ncps;
- ncps.set(c);
- if (mode.caseless) {
- make_caseless(&ncps);
- }
- cps |= ncps;
- range_start = c;
-}
-
-void UTF8ComponentClass::finalize() {
- if (finalized) {
- return;
- }
-
- // Handle unclosed ranges, like '[a-]' and '[a-\Q\E]' -- in these cases the
- // dash is a literal dash.
- if (in_cand_range) {
- cps.set('-');
- in_cand_range = false;
- }
-
- if (m_negate) {
- cps.flip();
- }
-
- finalized = true;
-}
-
-Position UTF8ComponentClass::getHead(NFABuilder &builder, u8 first_byte) {
- map<u8, Position>::const_iterator it = heads.find(first_byte);
- if (it != heads.end()) {
- return it->second;
- }
-
- Position head = builder.makePositions(1);
- assert(heads.find(first_byte) == heads.end());
- builder.addCharReach(head, CharReach(first_byte));
- /* no report id as head can not be directly wired to accept */
-
- heads[first_byte] = head;
- return head;
-}
-
-void UTF8ComponentClass::ensureDotTrailer(GlushkovBuildState &bs) {
- NFABuilder &builder = bs.getBuilder();
- if (one_dot_trailer != GlushkovBuildState::POS_UNINITIALIZED) {
- return;
- }
-
- one_dot_trailer = builder.makePositions(1);
- builder.setNodeReportID(one_dot_trailer, 0);
- builder.addCharReach(one_dot_trailer, CharReach(0x80, 0xbf));
- tails.insert(one_dot_trailer);
-}
-
-void UTF8ComponentClass::ensureTwoDotTrailer(GlushkovBuildState &bs) {
- NFABuilder &builder = bs.getBuilder();
- if (two_dot_trailer != GlushkovBuildState::POS_UNINITIALIZED) {
- return;
- }
-
- ensureDotTrailer(bs);
-
- two_dot_trailer = builder.makePositions(1);
- builder.addCharReach(two_dot_trailer, CharReach(0x80, 0xbf));
- bs.addSuccessor(two_dot_trailer, one_dot_trailer);
-}
-
-void UTF8ComponentClass::ensureThreeDotTrailer(GlushkovBuildState &bs) {
- NFABuilder &builder = bs.getBuilder();
- if (three_dot_trailer != GlushkovBuildState::POS_UNINITIALIZED) {
- return;
- }
-
- ensureTwoDotTrailer(bs);
-
- three_dot_trailer = builder.makePositions(1);
- builder.addCharReach(three_dot_trailer, CharReach(0x80, 0xbf));
- bs.addSuccessor(three_dot_trailer, two_dot_trailer);
-}
-
-void UTF8ComponentClass::buildOneByte(GlushkovBuildState &bs) {
- NFABuilder &builder = bs.getBuilder();
- for (CodePointSet::const_iterator it = cps.begin(); it != cps.end(); ++it) {
- unichar b = lower(*it);
- unichar e = upper(*it) + 1;
- if (b >= UTF_2CHAR_MIN) {
- continue;
- }
-
- DEBUG_PRINTF("building vertices for [%u, %u)\n", b, e);
-
- if (single_pos == GlushkovBuildState::POS_UNINITIALIZED) {
- single_pos = builder.makePositions(1);
- builder.setNodeReportID(single_pos, 0 /* offset adj */);
- tails.insert(single_pos);
- }
- CharReach cr(b, MIN(e, UTF_2CHAR_MIN) - 1);
- builder.addCharReach(single_pos, cr);
- }
-}
-
-void UTF8ComponentClass::addToTail(GlushkovBuildState &bs,
- map<Position, Position> &finals,
- Position prev, unichar b, unichar e) {
- NFABuilder &builder = bs.getBuilder();
- Position tail;
- if (finals.find(prev) == finals.end()) {
- tail = builder.makePositions(1);
- builder.setNodeReportID(tail, 0 /* offset adj */);
- bs.addSuccessor(prev, tail);
- finals[prev] = tail;
- tails.insert(tail);
- } else {
- tail = finals[prev];
- }
-
- u8 bb = makeContByte(b);
- u8 ee = makeContByte(e - 1);
- builder.addCharReach(tail, CharReach(bb, ee));
-}
-
-void UTF8ComponentClass::buildTwoByte(GlushkovBuildState &bs) {
- NFABuilder &builder = bs.getBuilder();
- map<Position, Position> finals;
-
- for (auto it = cps.begin(); it != cps.end(); ++it) {
- unichar b = lower(*it);
- unichar e = upper(*it) + 1;
-
- b = MAX(b, UTF_2CHAR_MIN);
- e = MIN(e, UTF_3CHAR_MIN);
-
- if (b >= e) {
- continue; /* we're done here */
- }
-
- /* raise b to the start of the next tail byte boundary */
- if (b & UTF_CONT_BYTE_VALUE_MASK) {
- unichar bb = MIN(e, ROUNDUP_N(b, UTF_CONT_BYTE_RANGE));
- u8 first_byte = UTF_TWO_BYTE_HEADER | (b >> UTF_CONT_SHIFT);
- assert(first_byte > 0xc1 && first_byte <= 0xdf);
-
- Position head = getHead(builder, first_byte);
- addToTail(bs, finals, head, b, bb);
-
- b = bb;
- }
-
- if (b == e) {
- continue; /* we're done here */
- }
- assert(b < e);
-
- /* lower e to the end of a tail byte boundary */
- if (e & UTF_CONT_BYTE_VALUE_MASK) {
- unichar ee = e & ~UTF_CONT_BYTE_VALUE_MASK;
- assert(ee >= b);
-
- u8 first_byte = UTF_TWO_BYTE_HEADER | (ee >> UTF_CONT_SHIFT);
- assert(first_byte > 0xc1 && first_byte <= 0xdf);
-
- Position head = getHead(builder, first_byte);
- addToTail(bs, finals, head, ee, e);
-
- e = ee;
- }
-
- if (b == e) {
- continue; /* we're done here */
- }
- assert(b < e);
-
- /* middle section just goes to a common full vertex */
- ensureDotTrailer(bs);
-
- if (two_char_dot_head == GlushkovBuildState::POS_UNINITIALIZED) {
- two_char_dot_head = builder.makePositions(1);
- bs.addSuccessor(two_char_dot_head, one_dot_trailer);
- }
-
- u8 min_first_byte = UTF_TWO_BYTE_HEADER | (b >> UTF_CONT_SHIFT);
- u8 max_first_byte = UTF_TWO_BYTE_HEADER | ((e - 1) >> UTF_CONT_SHIFT);
-
- assert(min_first_byte > 0xc1 && min_first_byte <= 0xdf);
- assert(max_first_byte > 0xc1 && max_first_byte <= 0xdf);
-
- builder.addCharReach(two_char_dot_head,
- CharReach(min_first_byte, max_first_byte));
- }
-}
-
-static
-Position getMid(GlushkovBuildState &bs, map<Position, map<u8, Position> > &mids,
- const Position &prev, u8 byte_val) {
- NFABuilder &builder = bs.getBuilder();
- map<u8, Position> &by_byte = mids[prev];
-
- map<u8, Position>::const_iterator it = by_byte.find(byte_val);
- if (it != by_byte.end()) {
- return it->second;
- }
-
- Position mid = builder.makePositions(1);
- builder.addCharReach(mid, CharReach(byte_val));
- bs.addSuccessor(prev, mid);
- /* no report id as mid can not be directly wired to accept */
-
- by_byte[byte_val] = mid;
- return mid;
-}
-
-void UTF8ComponentClass::buildThreeByte(GlushkovBuildState &bs) {
- NFABuilder &builder = bs.getBuilder();
-
- map<Position, map<u8, Position> > mids;
- map<Position, Position> finals;
-
- for (auto it = cps.begin(); it != cps.end(); ++it) {
- unichar b = lower(*it);
- unichar e = upper(*it) + 1;
-
- b = MAX(b, UTF_3CHAR_MIN);
- e = MIN(e, UTF_4CHAR_MIN);
-
- if (b >= e) {
- continue; /* we're done here */
- }
-
- /* raise b to the start of the next tail byte boundary */
- if (b & UTF_CONT_BYTE_VALUE_MASK) {
- unichar bb = MIN(e, ROUNDUP_N(b, UTF_CONT_BYTE_RANGE));
-
- u8 first_byte = UTF_THREE_BYTE_HEADER | (b >> (2 * UTF_CONT_SHIFT));
- assert(first_byte >= 0xe0 && first_byte <= 0xef);
- Position head = getHead(builder, first_byte);
-
- u8 second_byte = makeContByte(b >> UTF_CONT_SHIFT);
- Position mid = getMid(bs, mids, head, second_byte);
-
- addToTail(bs, finals, mid, b, bb);
-
- b = bb;
- }
-
- if (b == e) {
- continue; /* we're done here */
- }
- assert(b < e);
-
- /* lower e to the end of a tail byte boundary */
- if (e & UTF_CONT_BYTE_VALUE_MASK) {
- unichar ee = e & ~UTF_CONT_BYTE_VALUE_MASK;
- assert(ee >= b);
-
- u8 first_byte = UTF_THREE_BYTE_HEADER
- | (ee >> (2 * UTF_CONT_SHIFT));
- assert(first_byte >= 0xe0 && first_byte <= 0xef);
- Position head = getHead(builder, first_byte);
-
- u8 second_byte = makeContByte(ee >> UTF_CONT_SHIFT);
- Position mid = getMid(bs, mids, head, second_byte);
-
- addToTail(bs, finals, mid, ee, e);
-
- e = ee;
- }
-
- if (b == e) {
- continue; /* we're done here */
- }
- assert(b < e);
-
- /* from here on in the last byte is always full */
- ensureDotTrailer(bs);
-
- /* raise b to the start of the next mid byte boundary */
- if (b & ((1 << (2 * UTF_CONT_SHIFT)) - 1)) {
- unichar bb = MIN(e, ROUNDUP_N(b, 1 << (2 * UTF_CONT_SHIFT)));
-
- u8 first_byte = UTF_THREE_BYTE_HEADER | (b >> (2 * UTF_CONT_SHIFT));
- Position head = getHead(builder, first_byte);
-
- Position mid = builder.makePositions(1);
- bs.addSuccessor(head, mid);
- bs.addSuccessor(mid, one_dot_trailer);
- /* no report id as mid can not be directly wired to accept,
- * not adding to mids as we are completely filling its downstream */
- u8 second_min = makeContByte(b >> UTF_CONT_SHIFT);
- u8 second_max = makeContByte((bb - 1) >> UTF_CONT_SHIFT);
-
- builder.addCharReach(mid, CharReach(second_min, second_max));
-
- b = bb;
- }
-
- if (b == e) {
- continue; /* we're done here */
- }
- assert(b < e);
-
- /* lower e to the end of a mid byte boundary */
- if (e & ((1 << (2 * UTF_CONT_SHIFT)) - 1)) {
- unichar ee = e & ~((1 << (2 * UTF_CONT_SHIFT)) - 1);
- assert(ee >= b);
-
- u8 first_byte = UTF_THREE_BYTE_HEADER
- | (ee >> (2 * UTF_CONT_SHIFT));
- Position head = getHead(builder, first_byte);
-
- Position mid = builder.makePositions(1);
- bs.addSuccessor(head, mid);
- bs.addSuccessor(mid, one_dot_trailer);
- /* no report id as mid can not be directly wired to accept,
- * not adding to mids as we are completely filling its downstream */
- u8 second_min = makeContByte(ee >> UTF_CONT_SHIFT);
- u8 second_max = makeContByte((e - 1) >> UTF_CONT_SHIFT);
-
- builder.addCharReach(mid, CharReach(second_min, second_max));
-
- e = ee;
- }
-
- if (b == e) {
- continue; /* we're done here */
- }
- assert(b < e);
-
- /* now we just have to wire head to a common dot trailer */
- ensureTwoDotTrailer(bs);
- if (three_char_dot_head == GlushkovBuildState::POS_UNINITIALIZED) {
- three_char_dot_head = builder.makePositions(1);
- bs.addSuccessor(three_char_dot_head, two_dot_trailer);
- }
-
- u8 min_first_byte = UTF_THREE_BYTE_HEADER
- | (b >> (2 * UTF_CONT_SHIFT));
- u8 max_first_byte = UTF_THREE_BYTE_HEADER
- | ((e - 1) >> (2 * UTF_CONT_SHIFT));
-
- assert(min_first_byte > 0xdf && min_first_byte <= 0xef);
- assert(max_first_byte > 0xdf && max_first_byte <= 0xef);
-
- builder.addCharReach(three_char_dot_head,
- CharReach(min_first_byte, max_first_byte));
- }
-}
-
-static
-u8 makeFirstByteOfFour(unichar raw) {
- u8 first_byte = UTF_FOUR_BYTE_HEADER | (raw >> (3 * UTF_CONT_SHIFT));
- assert(first_byte > 0xef && first_byte <= 0xf7);
- return first_byte;
-}
-
-static
-bool isTwoContAligned(unichar raw) {
- return !(raw & ((1 << (2 * UTF_CONT_SHIFT)) - 1));
-}
-
-static
-bool isThreeContAligned(unichar raw) {
- return !(raw & ((1 << (3 * UTF_CONT_SHIFT)) - 1));
-}
-
-void UTF8ComponentClass::buildFourByte(GlushkovBuildState &bs) {
- NFABuilder &builder = bs.getBuilder();
- map<Position, map<u8, Position> > mids;
- map<Position, Position> finals;
-
- for (auto it = cps.begin(); it != cps.end(); ++it) {
- unichar b = lower(*it);
- unichar e = upper(*it) + 1;
-
- b = MAX(b, UTF_4CHAR_MIN);
- e = MIN(e, MAX_UNICODE + 1);
-
- if (b >= e) {
- continue;
- }
-
- /* raise b to the start of the next tail byte boundary */
- if (b & UTF_CONT_BYTE_VALUE_MASK) {
- unichar bb = MIN(e, ROUNDUP_N(b, UTF_CONT_BYTE_RANGE));
-
- u8 first_byte = makeFirstByteOfFour(b);
- Position head = getHead(builder, first_byte);
-
- u8 second_byte = makeContByte(b >> (2 * UTF_CONT_SHIFT));
- Position mid1 = getMid(bs, mids, head, second_byte);
-
- u8 third_byte = makeContByte(b >> UTF_CONT_SHIFT);
- Position mid2 = getMid(bs, mids, mid1, third_byte);
-
- addToTail(bs, finals, mid2, b, bb);
-
- b = bb;
- }
-
- if (b == e) {
- continue; /* we're done here */
- }
- assert(b < e);
-
- /* lower e to the end of a tail byte boundary */
- if (e & UTF_CONT_BYTE_VALUE_MASK) {
- unichar ee = e & ~UTF_CONT_BYTE_VALUE_MASK;
- assert(ee >= b);
-
- u8 first_byte = makeFirstByteOfFour(ee);
- Position head = getHead(builder, first_byte);
-
- u8 second_byte = makeContByte(ee >> (2 * UTF_CONT_SHIFT));
- Position mid1 = getMid(bs, mids, head, second_byte);
-
- u8 third_byte = makeContByte(ee >> UTF_CONT_SHIFT);
- Position mid2 = getMid(bs, mids, mid1, third_byte);
-
- addToTail(bs, finals, mid2, ee, e);
-
- e = ee;
- }
-
- if (b == e) {
- continue; /* we're done here */
- }
- assert(b < e);
-
- /* from here on in the last byte is always full */
- ensureDotTrailer(bs);
-
- /* raise b to the start of the next mid byte boundary */
- if (!isTwoContAligned(b)) {
- unichar bb = MIN(e, ROUNDUP_N(b, 1 << (2 * UTF_CONT_SHIFT)));
-
- u8 first_byte = makeFirstByteOfFour(b);
- Position head = getHead(builder, first_byte);
-
- u8 second_byte = makeContByte(b >> (2 * UTF_CONT_SHIFT));
- Position mid1 = getMid(bs, mids, head, second_byte);
-
- Position mid2 = builder.makePositions(1);
- bs.addSuccessor(mid1, mid2);
- bs.addSuccessor(mid2, one_dot_trailer);
- /* no report id as mid can not be directly wired to accept,
- * not adding to mids as we are completely filling its downstream */
- u8 byte_min = makeContByte(b >> UTF_CONT_SHIFT);
- u8 byte_max = makeContByte((bb - 1) >> UTF_CONT_SHIFT);
-
- builder.addCharReach(mid2, CharReach(byte_min, byte_max));
-
- b = bb;
- }
-
- if (b == e) {
- continue; /* we're done here */
- }
- assert(b < e);
-
- /* lower e to the end of a mid byte boundary */
- if (!isTwoContAligned(e)) {
- unichar ee = e & ~((1 << (2 * UTF_CONT_SHIFT)) - 1);
- assert(ee >= b);
-
- u8 first_byte = makeFirstByteOfFour(ee);
- Position head = getHead(builder, first_byte);
-
- u8 second_byte = makeContByte(ee >> (2 * UTF_CONT_SHIFT));
- Position mid1 = getMid(bs, mids, head, second_byte);
-
- Position mid2 = builder.makePositions(1);
- bs.addSuccessor(mid1, mid2);
- bs.addSuccessor(mid2, one_dot_trailer);
- /* no report id as mid can not be directly wired to accept,
- * not adding to mids as we are completely filling its downstream */
- u8 byte_min = makeContByte(ee >> UTF_CONT_SHIFT);
- u8 byte_max = makeContByte((e - 1) >> UTF_CONT_SHIFT);
-
- builder.addCharReach(mid2, CharReach(byte_min, byte_max));
-
- e = ee;
- }
-
- if (b == e) {
- continue; /* we're done here */
- }
- assert(b < e);
-
- ensureTwoDotTrailer(bs);
-
- /* raise b to the next byte boundary */
- if (!isThreeContAligned(b)) {
- unichar bb = MIN(e, ROUNDUP_N(b, 1 << (3 * UTF_CONT_SHIFT)));
-
- u8 first_byte = makeFirstByteOfFour(b);
- Position head = getHead(builder, first_byte);
-
- Position mid1 = builder.makePositions(1);
- bs.addSuccessor(head, mid1);
- bs.addSuccessor(mid1, two_dot_trailer);
- /* no report id as mid can not be directly wired to accept,
- * not adding to mids as we are completely filling its downstream */
- u8 byte_min = makeContByte(b >> (2 * UTF_CONT_SHIFT));
- u8 byte_max = makeContByte((bb - 1) >> (2 * UTF_CONT_SHIFT));
-
- builder.addCharReach(mid1, CharReach(byte_min, byte_max));
-
- b = bb;
- }
-
- if (b == e) {
- continue; /* we're done here */
- }
- assert(b < e);
-
- /* lower e to the next byte boundary */
- if (!isThreeContAligned(e)) {
- unichar ee = e & ~((1 << (3 * UTF_CONT_SHIFT)) - 1);
- assert(ee >= b);
-
- u8 first_byte = makeFirstByteOfFour(ee);
- Position head = getHead(builder, first_byte);
- Position mid1 = builder.makePositions(1);
- bs.addSuccessor(head, mid1);
- bs.addSuccessor(mid1, two_dot_trailer);
- /* no report id as mid can not be directly wired to accept,
- * not adding to mids as we are completely filling its downstream */
- u8 byte_min = makeContByte(ee >> (2 * UTF_CONT_SHIFT));
- u8 byte_max = makeContByte((e - 1) >> (2 * UTF_CONT_SHIFT));
-
- builder.addCharReach(mid1, CharReach(byte_min, byte_max));
-
- e = ee;
- }
-
- if (b == e) {
- continue; /* we're done here */
- }
- assert(b < e);
-
- /* now we just have to wire head to a common dot trailer */
- ensureThreeDotTrailer(bs);
- if (four_char_dot_head == GlushkovBuildState::POS_UNINITIALIZED) {
- four_char_dot_head = builder.makePositions(1);
- bs.addSuccessor(four_char_dot_head, three_dot_trailer);
- }
-
- u8 min_first_byte = makeFirstByteOfFour(b);
- u8 max_first_byte = makeFirstByteOfFour(e - 1);
-
- builder.addCharReach(four_char_dot_head,
- CharReach(min_first_byte, max_first_byte));
- }
-}
-
-void UTF8ComponentClass::notePositions(GlushkovBuildState &bs) {
- // We should always be finalized by now.
- assert(finalized);
-
- // An empty class is a special case; this would be generated by something
- // like /[\s\S]/8, which can never match. We treat these like we do the non
- // UTF-8 version: add a vertex with empty reach (to ensure we create a
- // connected graph) and pick it up later on.
- if (class_empty()) {
- DEBUG_PRINTF("empty class!\n");
- assert(single_pos == GlushkovBuildState::POS_UNINITIALIZED);
- NFABuilder &builder = bs.getBuilder();
- single_pos = builder.makePositions(1);
- builder.setNodeReportID(single_pos, 0 /* offset adj */);
- builder.addCharReach(single_pos, CharReach());
- tails.insert(single_pos);
- return;
- }
-
- buildOneByte(bs);
- buildTwoByte(bs);
- buildThreeByte(bs);
- buildFourByte(bs);
-}
-
-void UTF8ComponentClass::buildFollowSet(GlushkovBuildState &,
- const vector<PositionInfo> &) {
- /* states are wired in notePositions as all belong to this component. */
-}
-
-vector<PositionInfo> UTF8ComponentClass::first(void) const {
- vector<PositionInfo> rv;
- if (single_pos != GlushkovBuildState::POS_UNINITIALIZED) {
- rv.push_back(single_pos);
- }
- if (two_char_dot_head != GlushkovBuildState::POS_UNINITIALIZED) {
- rv.push_back(two_char_dot_head);
- }
- if (three_char_dot_head != GlushkovBuildState::POS_UNINITIALIZED) {
- rv.push_back(three_char_dot_head);
- }
- if (four_char_dot_head != GlushkovBuildState::POS_UNINITIALIZED) {
- rv.push_back(four_char_dot_head);
- }
-
- for (auto it = heads.begin(); it != heads.end(); ++it) {
- rv.push_back(it->second);
- }
- return rv;
-}
-
-vector<PositionInfo> UTF8ComponentClass::last(void) const {
- vector<PositionInfo> rv;
-
- rv.insert(rv.end(), tails.begin(), tails.end());
- return rv;
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Character class in UTF-8 mode.
+ */
+
+
+#include "Utf8ComponentClass.h"
+
+#include "buildstate.h"
+#include "Parser.h"
+#include "parse_error.h"
+#include "position.h"
+#include "position_info.h"
+#include "nfagraph/ng_builder.h"
+#include "util/compare.h"
+#include "util/unicode_def.h"
+
+#include <cstring>
+
+#include "ucp_table.h"
+
+using namespace std;
+
+namespace ue2 {
+
+PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode) {
+ /* Note: the mapping used here for mapping posix character classes
+ * matches the observed behaviour of PCRE (lower and upper going to \p{L}
+ * is not documented by pcre).
+ *
+ * Note: this mapping is quite different from both of the mappings
+ * recommended in the unicode regex tech report (TR-18) appendix C
+ */
+ switch (in) {
+ case CLASS_ALNUM:
+ return CLASS_UCP_XAN;
+ case CLASS_ALPHA:
+ return CLASS_UCP_L;
+ case CLASS_BLANK:
+ return CLASS_HORZ;
+ case CLASS_DIGIT:
+ return CLASS_UCP_ND;
+ case CLASS_GRAPH:
+ return CLASS_XGRAPH;
+ case CLASS_LOWER:
+ if (mode.caseless) { /* we also pick up uppercase titlecase and others */
+ return CLASS_UCP_L;
+ } else {
+ return CLASS_UCP_LL;
+ }
+ case CLASS_PRINT:
+ return CLASS_XPRINT;
+ case CLASS_PUNCT:
+ return CLASS_XPUNCT;
+ case CLASS_SPACE:
+ return CLASS_UCP_XPS;
+ case CLASS_UPPER:
+ if (mode.caseless) { /* we also pick up lowercase titlecase and others */
+ return CLASS_UCP_L;
+ } else {
+ return CLASS_UCP_LU;
+ }
+ case CLASS_WORD:
+ return CLASS_UCP_XWD;
+ default:
+ return in;
+ }
+}
+
+CodePointSet getPredefinedCodePointSet(PredefinedClass c,
+ const ParseMode &mode) {
+ /* TODO: support properly PCRE_UCP mode and non PCRE_UCP mode */
+ switch (c) {
+ case CLASS_ANY:
+ if (mode.dotall) {
+ return CodePointSet(CodePointSet::interval(0, MAX_UNICODE));
+ } else {
+ CodePointSet rv;
+ rv.set('\n');
+ rv.flip();
+ return rv;
+ }
+ case CLASS_XGRAPH: {
+ CodePointSet rv;
+ rv = getUcpZ();
+ rv |= getUcpC();
+ rv.flip();
+ // most of Cf, except for ...
+ CodePointSet cf = getUcpCf();
+ cf.unset(0x061c);
+ cf.unset(0x180e);
+ cf.unsetRange(0x2066, 0x2069);
+ rv |= cf;
+ return rv;
+ }
+ case CLASS_XPRINT: {
+ // Same as graph, plus everything with the Zs property.
+ CodePointSet rv = getPredefinedCodePointSet(CLASS_XGRAPH, mode);
+ rv |= getUcpZs();
+ rv.set(0x180e); // Also included in this class by PCRE 8.38.
+ return rv;
+ }
+ case CLASS_XPUNCT: {
+ // Everything with the P (punctuation) property, plus code points in S
+ // (symbols) that are < 128.
+ CodePointSet rv = getUcpP();
+ CodePointSet symbols = getUcpS();
+ symbols.unsetRange(128, MAX_UNICODE);
+ rv |= symbols;
+ return rv;
+ }
+ case CLASS_HORZ: {
+ CodePointSet rv;
+ rv.set(0x0009); /* Horizontal tab */
+ rv.set(0x0020); /* Space */
+ rv.set(0x00A0); /* Non-break space */
+ rv.set(0x1680); /* Ogham space mark */
+ rv.set(0x180E); /* Mongolian vowel separator */
+ rv.set(0x2000); /* En quad */
+ rv.set(0x2001); /* Em quad */
+ rv.set(0x2002); /* En space */
+ rv.set(0x2003); /* Em space */
+ rv.set(0x2004); /* Three-per-em space */
+ rv.set(0x2005); /* Four-per-em space */
+ rv.set(0x2006); /* Six-per-em space */
+ rv.set(0x2007); /* Figure space */
+ rv.set(0x2008); /* Punctuation space */
+ rv.set(0x2009); /* Thin space */
+ rv.set(0x200A); /* Hair space */
+ rv.set(0x202F); /* Narrow no-break space */
+ rv.set(0x205F); /* Medium mathematical space */
+ rv.set(0x3000); /* Ideographic space */
+ return rv;
+ }
+ case CLASS_VERT: {
+ CodePointSet rv;
+ rv.set(0x000A); /* Linefeed */
+ rv.set(0x000B); /* Vertical tab */
+ rv.set(0x000C); /* Formfeed */
+ rv.set(0x000D); /* Carriage return */
+ rv.set(0x0085); /* Next line */
+ rv.set(0x2028); /* Line separator */
+ rv.set(0x2029); /* Paragraph separator */
+ return rv;
+ }
+ case CLASS_UCP_XPS:
+ case CLASS_UCP_XSP: {
+ CodePointSet rv;
+ rv.set(0x0009); /* Horizontal tab */
+ rv.set(0x0020); /* Space */
+ rv.set(0x00A0); /* Non-break space */
+ rv.set(0x1680); /* Ogham space mark */
+ rv.set(0x180E); /* Mongolian vowel separator */
+ rv.set(0x2000); /* En quad */
+ rv.set(0x2001); /* Em quad */
+ rv.set(0x2002); /* En space */
+ rv.set(0x2003); /* Em space */
+ rv.set(0x2004); /* Three-per-em space */
+ rv.set(0x2005); /* Four-per-em space */
+ rv.set(0x2006); /* Six-per-em space */
+ rv.set(0x2007); /* Figure space */
+ rv.set(0x2008); /* Punctuation space */
+ rv.set(0x2009); /* Thin space */
+ rv.set(0x200A); /* Hair space */
+ rv.set(0x202F); /* Narrow no-break space */
+ rv.set(0x205F); /* Medium mathematical space */
+ rv.set(0x3000); /* Ideographic space */
+ rv.set(0x000A); /* Linefeed */
+ rv.set(0x000B); /* Vertical tab */
+ rv.set(0x000C); /* Formfeed */
+ rv.set(0x000D); /* Carriage return */
+ rv.set(0x0085); /* Next line */
+ rv.set(0x2028); /* Line separator */
+ rv.set(0x2029); /* Paragraph separator */
+ return rv;
+ }
+ case CLASS_UCP_C:
+ return getUcpC();
+ case CLASS_UCP_CC:
+ return getUcpCc();
+ case CLASS_UCP_CF:
+ return getUcpCf();
+ case CLASS_UCP_CN:
+ return getUcpCn();
+ case CLASS_UCP_CO:
+ return getUcpCo();
+ case CLASS_UCP_CS:
+ return getUcpCs();
+ case CLASS_UCP_L:
+ return getUcpL();
+ case CLASS_UCP_L_AND:
+ return getUcpL_and();
+ case CLASS_UCP_LL:
+ return getUcpLl();
+ case CLASS_UCP_LM:
+ return getUcpLm();
+ case CLASS_UCP_LO:
+ return getUcpLo();
+ case CLASS_UCP_LT:
+ return getUcpLt();
+ case CLASS_UCP_LU:
+ return getUcpLu();
+ case CLASS_UCP_M:
+ return getUcpM();
+ case CLASS_UCP_MC:
+ return getUcpMc();
+ case CLASS_UCP_ME:
+ return getUcpMe();
+ case CLASS_UCP_MN:
+ return getUcpMn();
+ case CLASS_UCP_N:
+ return getUcpN();
+ case CLASS_UCP_ND:
+ return getUcpNd();
+ case CLASS_UCP_NL:
+ return getUcpNl();
+ case CLASS_UCP_NO:
+ return getUcpNo();
+ case CLASS_UCP_P:
+ return getUcpP();
+ case CLASS_UCP_PC:
+ return getUcpPc();
+ case CLASS_UCP_PD:
+ return getUcpPd();
+ case CLASS_UCP_PE:
+ return getUcpPe();
+ case CLASS_UCP_PF:
+ return getUcpPf();
+ case CLASS_UCP_PI:
+ return getUcpPi();
+ case CLASS_UCP_PO:
+ return getUcpPo();
+ case CLASS_UCP_PS:
+ return getUcpPs();
+ case CLASS_UCP_S:
+ return getUcpS();
+ case CLASS_UCP_SC:
+ return getUcpSc();
+ case CLASS_UCP_SK:
+ return getUcpSk();
+ case CLASS_UCP_SM:
+ return getUcpSm();
+ case CLASS_UCP_SO:
+ return getUcpSo();
+ case CLASS_UCP_XAN:
+ return getUcpXan();
+ case CLASS_UCP_XWD:
+ return getUcpXwd();
+ case CLASS_UCP_Z:
+ return getUcpZ();
+ case CLASS_UCP_ZL:
+ return getUcpZl();
+ case CLASS_UCP_ZP:
+ return getUcpZp();
+ case CLASS_UCP_ZS:
+ return getUcpZs();
+ case CLASS_SCRIPT_ARABIC:
+ return getUcpArabic();
+ case CLASS_SCRIPT_ARMENIAN:
+ return getUcpArmenian();
+ case CLASS_SCRIPT_AVESTAN:
+ return getUcpAvestan();
+ case CLASS_SCRIPT_BALINESE:
+ return getUcpBalinese();
+ case CLASS_SCRIPT_BAMUM:
+ return getUcpBamum();
+ case CLASS_SCRIPT_BATAK:
+ return getUcpBatak();
+ case CLASS_SCRIPT_BENGALI:
+ return getUcpBengali();
+ case CLASS_SCRIPT_BOPOMOFO:
+ return getUcpBopomofo();
+ case CLASS_SCRIPT_BRAHMI:
+ return getUcpBrahmi();
+ case CLASS_SCRIPT_BRAILLE:
+ return getUcpBraille();
+ case CLASS_SCRIPT_BUGINESE:
+ return getUcpBuginese();
+ case CLASS_SCRIPT_BUHID:
+ return getUcpBuhid();
+ case CLASS_SCRIPT_CANADIAN_ABORIGINAL:
+ return getUcpCanadian_Aboriginal();
+ case CLASS_SCRIPT_CARIAN:
+ return getUcpCarian();
+ case CLASS_SCRIPT_CHAM:
+ return getUcpCham();
+ case CLASS_SCRIPT_CHEROKEE:
+ return getUcpCherokee();
+ case CLASS_SCRIPT_COMMON:
+ return getUcpCommon();
+ case CLASS_SCRIPT_COPTIC:
+ return getUcpCoptic();
+ case CLASS_SCRIPT_CUNEIFORM:
+ return getUcpCuneiform();
+ case CLASS_SCRIPT_CYPRIOT:
+ return getUcpCypriot();
+ case CLASS_SCRIPT_CYRILLIC:
+ return getUcpCyrillic();
+ case CLASS_SCRIPT_DESERET:
+ return getUcpDeseret();
+ case CLASS_SCRIPT_DEVANAGARI:
+ return getUcpDevanagari();
+ case CLASS_SCRIPT_EGYPTIAN_HIEROGLYPHS:
+ return getUcpEgyptian_Hieroglyphs();
+ case CLASS_SCRIPT_ETHIOPIC:
+ return getUcpEthiopic();
+ case CLASS_SCRIPT_GEORGIAN:
+ return getUcpGeorgian();
+ case CLASS_SCRIPT_GLAGOLITIC:
+ return getUcpGlagolitic();
+ case CLASS_SCRIPT_GOTHIC:
+ return getUcpGothic();
+ case CLASS_SCRIPT_GREEK:
+ return getUcpGreek();
+ case CLASS_SCRIPT_GUJARATI:
+ return getUcpGujarati();
+ case CLASS_SCRIPT_GURMUKHI:
+ return getUcpGurmukhi();
+ case CLASS_SCRIPT_HAN:
+ return getUcpHan();
+ case CLASS_SCRIPT_HANGUL:
+ return getUcpHangul();
+ case CLASS_SCRIPT_HANUNOO:
+ return getUcpHanunoo();
+ case CLASS_SCRIPT_HEBREW:
+ return getUcpHebrew();
+ case CLASS_SCRIPT_HIRAGANA:
+ return getUcpHiragana();
+ case CLASS_SCRIPT_IMPERIAL_ARAMAIC:
+ return getUcpImperial_Aramaic();
+ case CLASS_SCRIPT_INHERITED:
+ return getUcpInherited();
+ case CLASS_SCRIPT_INSCRIPTIONAL_PAHLAVI:
+ return getUcpInscriptional_Pahlavi();
+ case CLASS_SCRIPT_INSCRIPTIONAL_PARTHIAN:
+ return getUcpInscriptional_Parthian();
+ case CLASS_SCRIPT_JAVANESE:
+ return getUcpJavanese();
+ case CLASS_SCRIPT_KAITHI:
+ return getUcpKaithi();
+ case CLASS_SCRIPT_KANNADA:
+ return getUcpKannada();
+ case CLASS_SCRIPT_KATAKANA:
+ return getUcpKatakana();
+ case CLASS_SCRIPT_KAYAH_LI:
+ return getUcpKayah_Li();
+ case CLASS_SCRIPT_KHAROSHTHI:
+ return getUcpKharoshthi();
+ case CLASS_SCRIPT_KHMER:
+ return getUcpKhmer();
+ case CLASS_SCRIPT_LAO:
+ return getUcpLao();
+ case CLASS_SCRIPT_LATIN:
+ return getUcpLatin();
+ case CLASS_SCRIPT_LEPCHA:
+ return getUcpLepcha();
+ case CLASS_SCRIPT_LIMBU:
+ return getUcpLimbu();
+ case CLASS_SCRIPT_LINEAR_B:
+ return getUcpLinear_B();
+ case CLASS_SCRIPT_LISU:
+ return getUcpLisu();
+ case CLASS_SCRIPT_LYCIAN:
+ return getUcpLycian();
+ case CLASS_SCRIPT_LYDIAN:
+ return getUcpLydian();
+ case CLASS_SCRIPT_MALAYALAM:
+ return getUcpMalayalam();
+ case CLASS_SCRIPT_MANDAIC:
+ return getUcpMandaic();
+ case CLASS_SCRIPT_MEETEI_MAYEK:
+ return getUcpMeetei_Mayek();
+ case CLASS_SCRIPT_MONGOLIAN:
+ return getUcpMongolian();
+ case CLASS_SCRIPT_MYANMAR:
+ return getUcpMyanmar();
+ case CLASS_SCRIPT_NEW_TAI_LUE:
+ return getUcpNew_Tai_Lue();
+ case CLASS_SCRIPT_NKO:
+ return getUcpNko();
+ case CLASS_SCRIPT_OGHAM:
+ return getUcpOgham();
+ case CLASS_SCRIPT_OL_CHIKI:
+ return getUcpOl_Chiki();
+ case CLASS_SCRIPT_OLD_ITALIC:
+ return getUcpOld_Italic();
+ case CLASS_SCRIPT_OLD_PERSIAN:
+ return getUcpOld_Persian();
+ case CLASS_SCRIPT_OLD_SOUTH_ARABIAN:
+ return getUcpOld_South_Arabian();
+ case CLASS_SCRIPT_OLD_TURKIC:
+ return getUcpOld_Turkic();
+ case CLASS_SCRIPT_ORIYA:
+ return getUcpOriya();
+ case CLASS_SCRIPT_OSMANYA:
+ return getUcpOsmanya();
+ case CLASS_SCRIPT_PHAGS_PA:
+ return getUcpPhags_Pa();
+ case CLASS_SCRIPT_PHOENICIAN:
+ return getUcpPhoenician();
+ case CLASS_SCRIPT_REJANG:
+ return getUcpRejang();
+ case CLASS_SCRIPT_RUNIC:
+ return getUcpRunic();
+ case CLASS_SCRIPT_SAMARITAN:
+ return getUcpSamaritan();
+ case CLASS_SCRIPT_SAURASHTRA:
+ return getUcpSaurashtra();
+ case CLASS_SCRIPT_SHAVIAN:
+ return getUcpShavian();
+ case CLASS_SCRIPT_SINHALA:
+ return getUcpSinhala();
+ case CLASS_SCRIPT_SUNDANESE:
+ return getUcpSundanese();
+ case CLASS_SCRIPT_SYLOTI_NAGRI:
+ return getUcpSyloti_Nagri();
+ case CLASS_SCRIPT_SYRIAC:
+ return getUcpSyriac();
+ case CLASS_SCRIPT_TAGALOG:
+ return getUcpTagalog();
+ case CLASS_SCRIPT_TAGBANWA:
+ return getUcpTagbanwa();
+ case CLASS_SCRIPT_TAI_LE:
+ return getUcpTai_Le();
+ case CLASS_SCRIPT_TAI_THAM:
+ return getUcpTai_Tham();
+ case CLASS_SCRIPT_TAI_VIET:
+ return getUcpTai_Viet();
+ case CLASS_SCRIPT_TAMIL:
+ return getUcpTamil();
+ case CLASS_SCRIPT_TELUGU:
+ return getUcpTelugu();
+ case CLASS_SCRIPT_THAANA:
+ return getUcpThaana();
+ case CLASS_SCRIPT_THAI:
+ return getUcpThai();
+ case CLASS_SCRIPT_TIBETAN:
+ return getUcpTibetan();
+ case CLASS_SCRIPT_TIFINAGH:
+ return getUcpTifinagh();
+ case CLASS_SCRIPT_UGARITIC:
+ return getUcpUgaritic();
+ case CLASS_SCRIPT_VAI:
+ return getUcpVai();
+ case CLASS_SCRIPT_YI:
+ return getUcpYi();
+ case CLASS_UCP_ANY:
+ return CodePointSet(CodePointSet::interval(0, MAX_UNICODE));
+
+ default: { /* currently uses ascii defns */
+ CharReach cr = getPredefinedCharReach(c, mode);
+ CodePointSet rv;
+ for (u32 i = cr.find_first(); i != CharReach::npos;
+ i = cr.find_next(i)) {
+ rv.set(i);
+ }
+ return rv;
+ }
+ }
+}
+
+UTF8ComponentClass::UTF8ComponentClass(const ParseMode &mode_in)
+ : ComponentClass(mode_in),
+ single_pos( GlushkovBuildState::POS_UNINITIALIZED),
+ one_dot_trailer( GlushkovBuildState::POS_UNINITIALIZED),
+ two_dot_trailer( GlushkovBuildState::POS_UNINITIALIZED),
+ three_dot_trailer( GlushkovBuildState::POS_UNINITIALIZED),
+ two_char_dot_head( GlushkovBuildState::POS_UNINITIALIZED),
+ three_char_dot_head(GlushkovBuildState::POS_UNINITIALIZED),
+ four_char_dot_head( GlushkovBuildState::POS_UNINITIALIZED) {
+ assert(mode.utf8);
+}
+
+UTF8ComponentClass *UTF8ComponentClass::clone() const {
+ return new UTF8ComponentClass(*this);
+}
+
+bool UTF8ComponentClass::class_empty(void) const {
+ assert(finalized);
+ return cps.none();
+}
+
+void UTF8ComponentClass::createRange(unichar to) {
+ assert(range_start != INVALID_UNICODE);
+ unichar from = range_start;
+ if (from > to) {
+ throw LocatedParseError("Range out of order in character class");
+ }
+
+ in_cand_range = false;
+ CodePointSet ncps;
+ ncps.setRange(from, to);
+ if (mode.caseless) {
+ make_caseless(&ncps);
+ }
+ cps |= ncps;
+ range_start = INVALID_UNICODE;
+}
+
+void UTF8ComponentClass::add(PredefinedClass c, bool negative) {
+ if (in_cand_range) { // can't form a range here
+ throw LocatedParseError("Invalid range in character class");
+ }
+
+ if (mode.ucp) {
+ c = translateForUcpMode(c, mode);
+ }
+
+ // caselessness is handled inside this call - don't apply make_caseless
+ // to the result
+ CodePointSet pcps = getPredefinedCodePointSet(c, mode);
+ if (negative) {
+ pcps.flip();
+ }
+
+ cps |= pcps;
+
+ range_start = INVALID_UNICODE;
+ in_cand_range = false;
+}
+
+void UTF8ComponentClass::add(unichar c) {
+ DEBUG_PRINTF("adding \\x%08x\n", c);
+ if (c > MAX_UNICODE) { // too big!
+ throw LocatedParseError("Hexadecimal value is greater than \\x10FFFF");
+ }
+
+ if (in_cand_range) {
+ createRange(c);
+ return;
+ }
+
+ CodePointSet ncps;
+ ncps.set(c);
+ if (mode.caseless) {
+ make_caseless(&ncps);
+ }
+ cps |= ncps;
+ range_start = c;
+}
+
+void UTF8ComponentClass::finalize() {
+ if (finalized) {
+ return;
+ }
+
+ // Handle unclosed ranges, like '[a-]' and '[a-\Q\E]' -- in these cases the
+ // dash is a literal dash.
+ if (in_cand_range) {
+ cps.set('-');
+ in_cand_range = false;
+ }
+
+ if (m_negate) {
+ cps.flip();
+ }
+
+ finalized = true;
+}
+
+Position UTF8ComponentClass::getHead(NFABuilder &builder, u8 first_byte) {
+ map<u8, Position>::const_iterator it = heads.find(first_byte);
+ if (it != heads.end()) {
+ return it->second;
+ }
+
+ Position head = builder.makePositions(1);
+ assert(heads.find(first_byte) == heads.end());
+ builder.addCharReach(head, CharReach(first_byte));
+ /* no report id as head can not be directly wired to accept */
+
+ heads[first_byte] = head;
+ return head;
+}
+
+void UTF8ComponentClass::ensureDotTrailer(GlushkovBuildState &bs) {
+ NFABuilder &builder = bs.getBuilder();
+ if (one_dot_trailer != GlushkovBuildState::POS_UNINITIALIZED) {
+ return;
+ }
+
+ one_dot_trailer = builder.makePositions(1);
+ builder.setNodeReportID(one_dot_trailer, 0);
+ builder.addCharReach(one_dot_trailer, CharReach(0x80, 0xbf));
+ tails.insert(one_dot_trailer);
+}
+
+void UTF8ComponentClass::ensureTwoDotTrailer(GlushkovBuildState &bs) {
+ NFABuilder &builder = bs.getBuilder();
+ if (two_dot_trailer != GlushkovBuildState::POS_UNINITIALIZED) {
+ return;
+ }
+
+ ensureDotTrailer(bs);
+
+ two_dot_trailer = builder.makePositions(1);
+ builder.addCharReach(two_dot_trailer, CharReach(0x80, 0xbf));
+ bs.addSuccessor(two_dot_trailer, one_dot_trailer);
+}
+
+void UTF8ComponentClass::ensureThreeDotTrailer(GlushkovBuildState &bs) {
+ NFABuilder &builder = bs.getBuilder();
+ if (three_dot_trailer != GlushkovBuildState::POS_UNINITIALIZED) {
+ return;
+ }
+
+ ensureTwoDotTrailer(bs);
+
+ three_dot_trailer = builder.makePositions(1);
+ builder.addCharReach(three_dot_trailer, CharReach(0x80, 0xbf));
+ bs.addSuccessor(three_dot_trailer, two_dot_trailer);
+}
+
+void UTF8ComponentClass::buildOneByte(GlushkovBuildState &bs) {
+ NFABuilder &builder = bs.getBuilder();
+ for (CodePointSet::const_iterator it = cps.begin(); it != cps.end(); ++it) {
+ unichar b = lower(*it);
+ unichar e = upper(*it) + 1;
+ if (b >= UTF_2CHAR_MIN) {
+ continue;
+ }
+
+ DEBUG_PRINTF("building vertices for [%u, %u)\n", b, e);
+
+ if (single_pos == GlushkovBuildState::POS_UNINITIALIZED) {
+ single_pos = builder.makePositions(1);
+ builder.setNodeReportID(single_pos, 0 /* offset adj */);
+ tails.insert(single_pos);
+ }
+ CharReach cr(b, MIN(e, UTF_2CHAR_MIN) - 1);
+ builder.addCharReach(single_pos, cr);
+ }
+}
+
+void UTF8ComponentClass::addToTail(GlushkovBuildState &bs,
+ map<Position, Position> &finals,
+ Position prev, unichar b, unichar e) {
+ NFABuilder &builder = bs.getBuilder();
+ Position tail;
+ if (finals.find(prev) == finals.end()) {
+ tail = builder.makePositions(1);
+ builder.setNodeReportID(tail, 0 /* offset adj */);
+ bs.addSuccessor(prev, tail);
+ finals[prev] = tail;
+ tails.insert(tail);
+ } else {
+ tail = finals[prev];
+ }
+
+ u8 bb = makeContByte(b);
+ u8 ee = makeContByte(e - 1);
+ builder.addCharReach(tail, CharReach(bb, ee));
+}
+
+void UTF8ComponentClass::buildTwoByte(GlushkovBuildState &bs) {
+ NFABuilder &builder = bs.getBuilder();
+ map<Position, Position> finals;
+
+ for (auto it = cps.begin(); it != cps.end(); ++it) {
+ unichar b = lower(*it);
+ unichar e = upper(*it) + 1;
+
+ b = MAX(b, UTF_2CHAR_MIN);
+ e = MIN(e, UTF_3CHAR_MIN);
+
+ if (b >= e) {
+ continue; /* we're done here */
+ }
+
+ /* raise b to the start of the next tail byte boundary */
+ if (b & UTF_CONT_BYTE_VALUE_MASK) {
+ unichar bb = MIN(e, ROUNDUP_N(b, UTF_CONT_BYTE_RANGE));
+ u8 first_byte = UTF_TWO_BYTE_HEADER | (b >> UTF_CONT_SHIFT);
+ assert(first_byte > 0xc1 && first_byte <= 0xdf);
+
+ Position head = getHead(builder, first_byte);
+ addToTail(bs, finals, head, b, bb);
+
+ b = bb;
+ }
+
+ if (b == e) {
+ continue; /* we're done here */
+ }
+ assert(b < e);
+
+ /* lower e to the end of a tail byte boundary */
+ if (e & UTF_CONT_BYTE_VALUE_MASK) {
+ unichar ee = e & ~UTF_CONT_BYTE_VALUE_MASK;
+ assert(ee >= b);
+
+ u8 first_byte = UTF_TWO_BYTE_HEADER | (ee >> UTF_CONT_SHIFT);
+ assert(first_byte > 0xc1 && first_byte <= 0xdf);
+
+ Position head = getHead(builder, first_byte);
+ addToTail(bs, finals, head, ee, e);
+
+ e = ee;
+ }
+
+ if (b == e) {
+ continue; /* we're done here */
+ }
+ assert(b < e);
+
+ /* middle section just goes to a common full vertex */
+ ensureDotTrailer(bs);
+
+ if (two_char_dot_head == GlushkovBuildState::POS_UNINITIALIZED) {
+ two_char_dot_head = builder.makePositions(1);
+ bs.addSuccessor(two_char_dot_head, one_dot_trailer);
+ }
+
+ u8 min_first_byte = UTF_TWO_BYTE_HEADER | (b >> UTF_CONT_SHIFT);
+ u8 max_first_byte = UTF_TWO_BYTE_HEADER | ((e - 1) >> UTF_CONT_SHIFT);
+
+ assert(min_first_byte > 0xc1 && min_first_byte <= 0xdf);
+ assert(max_first_byte > 0xc1 && max_first_byte <= 0xdf);
+
+ builder.addCharReach(two_char_dot_head,
+ CharReach(min_first_byte, max_first_byte));
+ }
+}
+
+static
+Position getMid(GlushkovBuildState &bs, map<Position, map<u8, Position> > &mids,
+ const Position &prev, u8 byte_val) {
+ NFABuilder &builder = bs.getBuilder();
+ map<u8, Position> &by_byte = mids[prev];
+
+ map<u8, Position>::const_iterator it = by_byte.find(byte_val);
+ if (it != by_byte.end()) {
+ return it->second;
+ }
+
+ Position mid = builder.makePositions(1);
+ builder.addCharReach(mid, CharReach(byte_val));
+ bs.addSuccessor(prev, mid);
+ /* no report id as mid can not be directly wired to accept */
+
+ by_byte[byte_val] = mid;
+ return mid;
+}
+
+void UTF8ComponentClass::buildThreeByte(GlushkovBuildState &bs) {
+ NFABuilder &builder = bs.getBuilder();
+
+ map<Position, map<u8, Position> > mids;
+ map<Position, Position> finals;
+
+ for (auto it = cps.begin(); it != cps.end(); ++it) {
+ unichar b = lower(*it);
+ unichar e = upper(*it) + 1;
+
+ b = MAX(b, UTF_3CHAR_MIN);
+ e = MIN(e, UTF_4CHAR_MIN);
+
+ if (b >= e) {
+ continue; /* we're done here */
+ }
+
+ /* raise b to the start of the next tail byte boundary */
+ if (b & UTF_CONT_BYTE_VALUE_MASK) {
+ unichar bb = MIN(e, ROUNDUP_N(b, UTF_CONT_BYTE_RANGE));
+
+ u8 first_byte = UTF_THREE_BYTE_HEADER | (b >> (2 * UTF_CONT_SHIFT));
+ assert(first_byte >= 0xe0 && first_byte <= 0xef);
+ Position head = getHead(builder, first_byte);
+
+ u8 second_byte = makeContByte(b >> UTF_CONT_SHIFT);
+ Position mid = getMid(bs, mids, head, second_byte);
+
+ addToTail(bs, finals, mid, b, bb);
+
+ b = bb;
+ }
+
+ if (b == e) {
+ continue; /* we're done here */
+ }
+ assert(b < e);
+
+ /* lower e to the end of a tail byte boundary */
+ if (e & UTF_CONT_BYTE_VALUE_MASK) {
+ unichar ee = e & ~UTF_CONT_BYTE_VALUE_MASK;
+ assert(ee >= b);
+
+ u8 first_byte = UTF_THREE_BYTE_HEADER
+ | (ee >> (2 * UTF_CONT_SHIFT));
+ assert(first_byte >= 0xe0 && first_byte <= 0xef);
+ Position head = getHead(builder, first_byte);
+
+ u8 second_byte = makeContByte(ee >> UTF_CONT_SHIFT);
+ Position mid = getMid(bs, mids, head, second_byte);
+
+ addToTail(bs, finals, mid, ee, e);
+
+ e = ee;
+ }
+
+ if (b == e) {
+ continue; /* we're done here */
+ }
+ assert(b < e);
+
+ /* from here on in the last byte is always full */
+ ensureDotTrailer(bs);
+
+ /* raise b to the start of the next mid byte boundary */
+ if (b & ((1 << (2 * UTF_CONT_SHIFT)) - 1)) {
+ unichar bb = MIN(e, ROUNDUP_N(b, 1 << (2 * UTF_CONT_SHIFT)));
+
+ u8 first_byte = UTF_THREE_BYTE_HEADER | (b >> (2 * UTF_CONT_SHIFT));
+ Position head = getHead(builder, first_byte);
+
+ Position mid = builder.makePositions(1);
+ bs.addSuccessor(head, mid);
+ bs.addSuccessor(mid, one_dot_trailer);
+ /* no report id as mid can not be directly wired to accept,
+ * not adding to mids as we are completely filling its downstream */
+ u8 second_min = makeContByte(b >> UTF_CONT_SHIFT);
+ u8 second_max = makeContByte((bb - 1) >> UTF_CONT_SHIFT);
+
+ builder.addCharReach(mid, CharReach(second_min, second_max));
+
+ b = bb;
+ }
+
+ if (b == e) {
+ continue; /* we're done here */
+ }
+ assert(b < e);
+
+ /* lower e to the end of a mid byte boundary */
+ if (e & ((1 << (2 * UTF_CONT_SHIFT)) - 1)) {
+ unichar ee = e & ~((1 << (2 * UTF_CONT_SHIFT)) - 1);
+ assert(ee >= b);
+
+ u8 first_byte = UTF_THREE_BYTE_HEADER
+ | (ee >> (2 * UTF_CONT_SHIFT));
+ Position head = getHead(builder, first_byte);
+
+ Position mid = builder.makePositions(1);
+ bs.addSuccessor(head, mid);
+ bs.addSuccessor(mid, one_dot_trailer);
+ /* no report id as mid can not be directly wired to accept,
+ * not adding to mids as we are completely filling its downstream */
+ u8 second_min = makeContByte(ee >> UTF_CONT_SHIFT);
+ u8 second_max = makeContByte((e - 1) >> UTF_CONT_SHIFT);
+
+ builder.addCharReach(mid, CharReach(second_min, second_max));
+
+ e = ee;
+ }
+
+ if (b == e) {
+ continue; /* we're done here */
+ }
+ assert(b < e);
+
+ /* now we just have to wire head to a common dot trailer */
+ ensureTwoDotTrailer(bs);
+ if (three_char_dot_head == GlushkovBuildState::POS_UNINITIALIZED) {
+ three_char_dot_head = builder.makePositions(1);
+ bs.addSuccessor(three_char_dot_head, two_dot_trailer);
+ }
+
+ u8 min_first_byte = UTF_THREE_BYTE_HEADER
+ | (b >> (2 * UTF_CONT_SHIFT));
+ u8 max_first_byte = UTF_THREE_BYTE_HEADER
+ | ((e - 1) >> (2 * UTF_CONT_SHIFT));
+
+ assert(min_first_byte > 0xdf && min_first_byte <= 0xef);
+ assert(max_first_byte > 0xdf && max_first_byte <= 0xef);
+
+ builder.addCharReach(three_char_dot_head,
+ CharReach(min_first_byte, max_first_byte));
+ }
+}
+
+static
+u8 makeFirstByteOfFour(unichar raw) {
+ u8 first_byte = UTF_FOUR_BYTE_HEADER | (raw >> (3 * UTF_CONT_SHIFT));
+ assert(first_byte > 0xef && first_byte <= 0xf7);
+ return first_byte;
+}
+
+static
+bool isTwoContAligned(unichar raw) {
+ return !(raw & ((1 << (2 * UTF_CONT_SHIFT)) - 1));
+}
+
+static
+bool isThreeContAligned(unichar raw) {
+ return !(raw & ((1 << (3 * UTF_CONT_SHIFT)) - 1));
+}
+
+void UTF8ComponentClass::buildFourByte(GlushkovBuildState &bs) {
+ NFABuilder &builder = bs.getBuilder();
+ map<Position, map<u8, Position> > mids;
+ map<Position, Position> finals;
+
+ for (auto it = cps.begin(); it != cps.end(); ++it) {
+ unichar b = lower(*it);
+ unichar e = upper(*it) + 1;
+
+ b = MAX(b, UTF_4CHAR_MIN);
+ e = MIN(e, MAX_UNICODE + 1);
+
+ if (b >= e) {
+ continue;
+ }
+
+ /* raise b to the start of the next tail byte boundary */
+ if (b & UTF_CONT_BYTE_VALUE_MASK) {
+ unichar bb = MIN(e, ROUNDUP_N(b, UTF_CONT_BYTE_RANGE));
+
+ u8 first_byte = makeFirstByteOfFour(b);
+ Position head = getHead(builder, first_byte);
+
+ u8 second_byte = makeContByte(b >> (2 * UTF_CONT_SHIFT));
+ Position mid1 = getMid(bs, mids, head, second_byte);
+
+ u8 third_byte = makeContByte(b >> UTF_CONT_SHIFT);
+ Position mid2 = getMid(bs, mids, mid1, third_byte);
+
+ addToTail(bs, finals, mid2, b, bb);
+
+ b = bb;
+ }
+
+ if (b == e) {
+ continue; /* we're done here */
+ }
+ assert(b < e);
+
+ /* lower e to the end of a tail byte boundary */
+ if (e & UTF_CONT_BYTE_VALUE_MASK) {
+ unichar ee = e & ~UTF_CONT_BYTE_VALUE_MASK;
+ assert(ee >= b);
+
+ u8 first_byte = makeFirstByteOfFour(ee);
+ Position head = getHead(builder, first_byte);
+
+ u8 second_byte = makeContByte(ee >> (2 * UTF_CONT_SHIFT));
+ Position mid1 = getMid(bs, mids, head, second_byte);
+
+ u8 third_byte = makeContByte(ee >> UTF_CONT_SHIFT);
+ Position mid2 = getMid(bs, mids, mid1, third_byte);
+
+ addToTail(bs, finals, mid2, ee, e);
+
+ e = ee;
+ }
+
+ if (b == e) {
+ continue; /* we're done here */
+ }
+ assert(b < e);
+
+ /* from here on in the last byte is always full */
+ ensureDotTrailer(bs);
+
+ /* raise b to the start of the next mid byte boundary */
+ if (!isTwoContAligned(b)) {
+ unichar bb = MIN(e, ROUNDUP_N(b, 1 << (2 * UTF_CONT_SHIFT)));
+
+ u8 first_byte = makeFirstByteOfFour(b);
+ Position head = getHead(builder, first_byte);
+
+ u8 second_byte = makeContByte(b >> (2 * UTF_CONT_SHIFT));
+ Position mid1 = getMid(bs, mids, head, second_byte);
+
+ Position mid2 = builder.makePositions(1);
+ bs.addSuccessor(mid1, mid2);
+ bs.addSuccessor(mid2, one_dot_trailer);
+ /* no report id as mid can not be directly wired to accept,
+ * not adding to mids as we are completely filling its downstream */
+ u8 byte_min = makeContByte(b >> UTF_CONT_SHIFT);
+ u8 byte_max = makeContByte((bb - 1) >> UTF_CONT_SHIFT);
+
+ builder.addCharReach(mid2, CharReach(byte_min, byte_max));
+
+ b = bb;
+ }
+
+ if (b == e) {
+ continue; /* we're done here */
+ }
+ assert(b < e);
+
+ /* lower e to the end of a mid byte boundary */
+ if (!isTwoContAligned(e)) {
+ unichar ee = e & ~((1 << (2 * UTF_CONT_SHIFT)) - 1);
+ assert(ee >= b);
+
+ u8 first_byte = makeFirstByteOfFour(ee);
+ Position head = getHead(builder, first_byte);
+
+ u8 second_byte = makeContByte(ee >> (2 * UTF_CONT_SHIFT));
+ Position mid1 = getMid(bs, mids, head, second_byte);
+
+ Position mid2 = builder.makePositions(1);
+ bs.addSuccessor(mid1, mid2);
+ bs.addSuccessor(mid2, one_dot_trailer);
+ /* no report id as mid can not be directly wired to accept,
+ * not adding to mids as we are completely filling its downstream */
+ u8 byte_min = makeContByte(ee >> UTF_CONT_SHIFT);
+ u8 byte_max = makeContByte((e - 1) >> UTF_CONT_SHIFT);
+
+ builder.addCharReach(mid2, CharReach(byte_min, byte_max));
+
+ e = ee;
+ }
+
+ if (b == e) {
+ continue; /* we're done here */
+ }
+ assert(b < e);
+
+ ensureTwoDotTrailer(bs);
+
+ /* raise b to the next byte boundary */
+ if (!isThreeContAligned(b)) {
+ unichar bb = MIN(e, ROUNDUP_N(b, 1 << (3 * UTF_CONT_SHIFT)));
+
+ u8 first_byte = makeFirstByteOfFour(b);
+ Position head = getHead(builder, first_byte);
+
+ Position mid1 = builder.makePositions(1);
+ bs.addSuccessor(head, mid1);
+ bs.addSuccessor(mid1, two_dot_trailer);
+ /* no report id as mid can not be directly wired to accept,
+ * not adding to mids as we are completely filling its downstream */
+ u8 byte_min = makeContByte(b >> (2 * UTF_CONT_SHIFT));
+ u8 byte_max = makeContByte((bb - 1) >> (2 * UTF_CONT_SHIFT));
+
+ builder.addCharReach(mid1, CharReach(byte_min, byte_max));
+
+ b = bb;
+ }
+
+ if (b == e) {
+ continue; /* we're done here */
+ }
+ assert(b < e);
+
+ /* lower e to the next byte boundary */
+ if (!isThreeContAligned(e)) {
+ unichar ee = e & ~((1 << (3 * UTF_CONT_SHIFT)) - 1);
+ assert(ee >= b);
+
+ u8 first_byte = makeFirstByteOfFour(ee);
+ Position head = getHead(builder, first_byte);
+ Position mid1 = builder.makePositions(1);
+ bs.addSuccessor(head, mid1);
+ bs.addSuccessor(mid1, two_dot_trailer);
+ /* no report id as mid can not be directly wired to accept,
+ * not adding to mids as we are completely filling its downstream */
+ u8 byte_min = makeContByte(ee >> (2 * UTF_CONT_SHIFT));
+ u8 byte_max = makeContByte((e - 1) >> (2 * UTF_CONT_SHIFT));
+
+ builder.addCharReach(mid1, CharReach(byte_min, byte_max));
+
+ e = ee;
+ }
+
+ if (b == e) {
+ continue; /* we're done here */
+ }
+ assert(b < e);
+
+ /* now we just have to wire head to a common dot trailer */
+ ensureThreeDotTrailer(bs);
+ if (four_char_dot_head == GlushkovBuildState::POS_UNINITIALIZED) {
+ four_char_dot_head = builder.makePositions(1);
+ bs.addSuccessor(four_char_dot_head, three_dot_trailer);
+ }
+
+ u8 min_first_byte = makeFirstByteOfFour(b);
+ u8 max_first_byte = makeFirstByteOfFour(e - 1);
+
+ builder.addCharReach(four_char_dot_head,
+ CharReach(min_first_byte, max_first_byte));
+ }
+}
+
+void UTF8ComponentClass::notePositions(GlushkovBuildState &bs) {
+ // We should always be finalized by now.
+ assert(finalized);
+
+ // An empty class is a special case; this would be generated by something
+ // like /[\s\S]/8, which can never match. We treat these like we do the non
+ // UTF-8 version: add a vertex with empty reach (to ensure we create a
+ // connected graph) and pick it up later on.
+ if (class_empty()) {
+ DEBUG_PRINTF("empty class!\n");
+ assert(single_pos == GlushkovBuildState::POS_UNINITIALIZED);
+ NFABuilder &builder = bs.getBuilder();
+ single_pos = builder.makePositions(1);
+ builder.setNodeReportID(single_pos, 0 /* offset adj */);
+ builder.addCharReach(single_pos, CharReach());
+ tails.insert(single_pos);
+ return;
+ }
+
+ buildOneByte(bs);
+ buildTwoByte(bs);
+ buildThreeByte(bs);
+ buildFourByte(bs);
+}
+
+void UTF8ComponentClass::buildFollowSet(GlushkovBuildState &,
+ const vector<PositionInfo> &) {
+ /* states are wired in notePositions as all belong to this component. */
+}
+
+vector<PositionInfo> UTF8ComponentClass::first(void) const {
+ vector<PositionInfo> rv;
+ if (single_pos != GlushkovBuildState::POS_UNINITIALIZED) {
+ rv.push_back(single_pos);
+ }
+ if (two_char_dot_head != GlushkovBuildState::POS_UNINITIALIZED) {
+ rv.push_back(two_char_dot_head);
+ }
+ if (three_char_dot_head != GlushkovBuildState::POS_UNINITIALIZED) {
+ rv.push_back(three_char_dot_head);
+ }
+ if (four_char_dot_head != GlushkovBuildState::POS_UNINITIALIZED) {
+ rv.push_back(four_char_dot_head);
+ }
+
+ for (auto it = heads.begin(); it != heads.end(); ++it) {
+ rv.push_back(it->second);
+ }
+ return rv;
+}
+
+vector<PositionInfo> UTF8ComponentClass::last(void) const {
+ vector<PositionInfo> rv;
+
+ rv.insert(rv.end(), tails.begin(), tails.end());
+ return rv;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/Utf8ComponentClass.h b/contrib/libs/hyperscan/src/parser/Utf8ComponentClass.h
index 72c16b0095..f4e7ea328d 100644
--- a/contrib/libs/hyperscan/src/parser/Utf8ComponentClass.h
+++ b/contrib/libs/hyperscan/src/parser/Utf8ComponentClass.h
@@ -1,116 +1,116 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Character class in UTF-8 mode.
- */
-
-#ifndef UTF8_COMPONENT_CLASS_H
-#define UTF8_COMPONENT_CLASS_H
-
-#include "ComponentClass.h"
-#include "ue2common.h"
-#include "util/unicode_set.h"
-
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-namespace ue2 {
-
-class UTF8ComponentClass : public ComponentClass {
- friend class DumpVisitor;
- friend class PrintVisitor;
- friend class CaselessVisitor;
- friend class SimplifyVisitor;
- friend class SimplifyCandidatesVisitor;
-public:
- explicit UTF8ComponentClass(const ParseMode &mode);
- ~UTF8ComponentClass() override {}
- UTF8ComponentClass *clone() const override;
-
- Component *accept(ComponentVisitor &v) override {
- Component *c = v.visit(this);
- v.post(this);
- return c;
- }
-
- void accept(ConstComponentVisitor &v) const override {
- v.pre(*this);
- v.during(*this);
- v.post(*this);
- }
-
- bool class_empty(void) const override;
- void add(PredefinedClass c, bool negative) override;
- void add(unichar c) override;
- void finalize(void) override;
- void notePositions(GlushkovBuildState &bs) override;
- void buildFollowSet(GlushkovBuildState &bs,
- const std::vector<PositionInfo> &) override;
- std::vector<PositionInfo> first(void) const override;
- std::vector<PositionInfo> last(void) const override;
-
-protected:
- void createRange(unichar to) override;
-
-private:
- Position getHead(NFABuilder &builder, u8 first_byte);
- void addToTail(GlushkovBuildState &bs, std::map<Position, Position> &finals,
- Position prev, unichar b, unichar e);
- void ensureDotTrailer(GlushkovBuildState &bs);
- void ensureTwoDotTrailer(GlushkovBuildState &bs);
- void ensureThreeDotTrailer(GlushkovBuildState &bs);
- void buildOneByte(GlushkovBuildState &bs);
- void buildTwoByte(GlushkovBuildState &bs);
- void buildThreeByte(GlushkovBuildState &bs);
- void buildFourByte(GlushkovBuildState &bs);
-
- CodePointSet cps;
-
- std::map<u8, Position> heads;
- Position single_pos;
- Position one_dot_trailer;
- Position two_dot_trailer;
- Position three_dot_trailer;
-
- Position two_char_dot_head;
- Position three_char_dot_head;
- Position four_char_dot_head;
- std::set<Position> tails;
-};
-
-PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode);
-
-CodePointSet getPredefinedCodePointSet(PredefinedClass c,
- const ParseMode &mode);
-
-} // namespace
-
-#endif // UTF8_COMPONENT_CLASS_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Character class in UTF-8 mode.
+ */
+
+#ifndef UTF8_COMPONENT_CLASS_H
+#define UTF8_COMPONENT_CLASS_H
+
+#include "ComponentClass.h"
+#include "ue2common.h"
+#include "util/unicode_set.h"
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+namespace ue2 {
+
+class UTF8ComponentClass : public ComponentClass {
+ friend class DumpVisitor;
+ friend class PrintVisitor;
+ friend class CaselessVisitor;
+ friend class SimplifyVisitor;
+ friend class SimplifyCandidatesVisitor;
+public:
+ explicit UTF8ComponentClass(const ParseMode &mode);
+ ~UTF8ComponentClass() override {}
+ UTF8ComponentClass *clone() const override;
+
+ Component *accept(ComponentVisitor &v) override {
+ Component *c = v.visit(this);
+ v.post(this);
+ return c;
+ }
+
+ void accept(ConstComponentVisitor &v) const override {
+ v.pre(*this);
+ v.during(*this);
+ v.post(*this);
+ }
+
+ bool class_empty(void) const override;
+ void add(PredefinedClass c, bool negative) override;
+ void add(unichar c) override;
+ void finalize(void) override;
+ void notePositions(GlushkovBuildState &bs) override;
+ void buildFollowSet(GlushkovBuildState &bs,
+ const std::vector<PositionInfo> &) override;
+ std::vector<PositionInfo> first(void) const override;
+ std::vector<PositionInfo> last(void) const override;
+
+protected:
+ void createRange(unichar to) override;
+
+private:
+ Position getHead(NFABuilder &builder, u8 first_byte);
+ void addToTail(GlushkovBuildState &bs, std::map<Position, Position> &finals,
+ Position prev, unichar b, unichar e);
+ void ensureDotTrailer(GlushkovBuildState &bs);
+ void ensureTwoDotTrailer(GlushkovBuildState &bs);
+ void ensureThreeDotTrailer(GlushkovBuildState &bs);
+ void buildOneByte(GlushkovBuildState &bs);
+ void buildTwoByte(GlushkovBuildState &bs);
+ void buildThreeByte(GlushkovBuildState &bs);
+ void buildFourByte(GlushkovBuildState &bs);
+
+ CodePointSet cps;
+
+ std::map<u8, Position> heads;
+ Position single_pos;
+ Position one_dot_trailer;
+ Position two_dot_trailer;
+ Position three_dot_trailer;
+
+ Position two_char_dot_head;
+ Position three_char_dot_head;
+ Position four_char_dot_head;
+ std::set<Position> tails;
+};
+
+PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode);
+
+CodePointSet getPredefinedCodePointSet(PredefinedClass c,
+ const ParseMode &mode);
+
+} // namespace
+
+#endif // UTF8_COMPONENT_CLASS_H
diff --git a/contrib/libs/hyperscan/src/parser/buildstate.cpp b/contrib/libs/hyperscan/src/parser/buildstate.cpp
index 3e2cbd9a39..75cfbb7b2d 100644
--- a/contrib/libs/hyperscan/src/parser/buildstate.cpp
+++ b/contrib/libs/hyperscan/src/parser/buildstate.cpp
@@ -1,529 +1,529 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Glushkov construction.
- */
-#include "buildstate.h"
-#include "position.h"
-#include "position_dump.h"
-#include "position_info.h"
-#include "parse_error.h"
-#include "hs_internal.h"
-#include "ue2common.h"
-#include "nfagraph/ng_builder.h"
-#include "util/charreach.h"
-#include "util/container.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Glushkov construction.
+ */
+#include "buildstate.h"
+#include "position.h"
+#include "position_dump.h"
+#include "position_info.h"
+#include "parse_error.h"
+#include "hs_internal.h"
+#include "ue2common.h"
+#include "nfagraph/ng_builder.h"
+#include "util/charreach.h"
+#include "util/container.h"
#include "util/flat_containers.h"
#include "util/hash.h"
-#include "util/make_unique.h"
+#include "util/make_unique.h"
#include "util/unordered.h"
-
-#include <algorithm>
-#include <iterator>
-#include <limits>
-#include <map>
-#include <utility>
-
-#if defined(DEBUG) || defined(DUMP_SUPPORT)
-#include <ostream>
-#include <sstream>
-#endif
-
-using namespace std;
-
-namespace ue2 {
-
-/** \brief Represents an uninitialized state. */
-const Position GlushkovBuildState::POS_UNINITIALIZED =
- numeric_limits<Position>::max();
-
-/** \brief Represents an epsilon transition in the firsts of a component. */
-const Position GlushkovBuildState::POS_EPSILON =
- numeric_limits<Position>::max() - 1;
-
-GlushkovBuildState::~GlushkovBuildState() { }
-
-namespace /* anonymous */ {
-
-class CheckPositionFlags {
-public:
- explicit CheckPositionFlags(int fl) : flags(fl) {}
- bool operator()(const PositionInfo &p) const {
- return (p.flags & flags) == flags;
- }
-private:
- int flags;
-};
-
-class CheckUnflaggedEpsilon {
-public:
- bool operator()(const PositionInfo &p) const {
- return p.pos == GlushkovBuildState::POS_EPSILON && p.flags == 0;
- }
-};
-
-/** \brief Concrete impl of the GlushkovBuildState interface. */
-class GlushkovBuildStateImpl : public GlushkovBuildState {
-public:
- GlushkovBuildStateImpl(NFABuilder &b, bool prefilter);
-
- /** \brief Returns a reference to the NFABuilder being used. */
- NFABuilder &getBuilder() override { return builder; }
-
- /** \brief Returns a const reference to the NFABuilder being used. */
- const NFABuilder &getBuilder() const override { return builder; }
-
- /** \brief Wire up the lasts of one component to the firsts of another. */
- void connectRegions(const vector<PositionInfo> &lasts,
- const vector<PositionInfo> &firsts) override;
-
- /** \brief Wire the lasts of the main sequence to accepts. */
- void connectAccepts(const vector<PositionInfo> &lasts) override;
-
- /** \brief Wire up a single last to a list of firsts. */
- void connectSuccessors(const PositionInfo &last,
- vector<PositionInfo> firsts);
-
- /** Wire up a pair of positions. */
- void addSuccessor(Position from, Position to) override;
-
- /** \brief Clone the vertex properties and edges of all vertices between
- * two positions. */
- void cloneFollowSet(Position from, Position to, unsigned offset) override;
-
- /** \brief Build the prioritised list of edges out of our successor map. */
- void buildEdges() override;
-
- /** Construct an edge, called internally by \ref buildEdges. */
- void buildEdge(Position from, const PositionInfo &to);
-
- Position startState;
- Position startDotstarState;
- Position acceptState;
- Position acceptEodState;
- Position acceptNlEodState;
- Position acceptNlState;
-
- NFABuilder &builder; //!< \brief builder for the NFAGraph
-
- bool doPrefilter; //!< \brief we're building a prefiltering pattern
-
- /** \brief Map storing successors for each position. */
- map<Position, flat_set<PositionInfo>> successors;
-};
-
-} // namespace
-
-GlushkovBuildStateImpl::GlushkovBuildStateImpl(NFABuilder &b,
- bool prefilter) :
- startState(b.getStart()),
- startDotstarState(b.getStartDotStar()),
- acceptState(b.getAccept()),
- acceptEodState(b.getAcceptEOD()),
- acceptNlEodState(POS_UNINITIALIZED),
- acceptNlState(POS_UNINITIALIZED),
- builder(b),
- doPrefilter(prefilter)
-{
- // Our special nodes need special relationships.
- vector<PositionInfo> lasts, firsts;
-
- // start->startDs and startDs self-loop.
- lasts.push_back(startState);
- lasts.push_back(startDotstarState);
- firsts.push_back(startDotstarState);
- connectRegions(lasts, firsts);
-
- // accept to acceptEod edges already wired
-
- // XXX: a small hack to support vacuous NFAs: give start and startDs an
- // initial report ID.
- builder.setNodeReportID(startState, 0);
- builder.setNodeReportID(startDotstarState, 0);
-}
-
-static
-void checkEmbeddedEndAnchor(const PositionInfo &from,
- const vector<PositionInfo> &firsts) {
- if (!(from.flags & POS_FLAG_ONLY_ENDS)) {
- return;
- }
-
- for (const auto &first : firsts) {
- if (first.pos != GlushkovBuildStateImpl::POS_EPSILON) {
- /* can make it through the parse tree */
- throw ParseError("Embedded end anchors not supported.");
- }
- }
-}
-
-// Wire up the lasts of one component to the firsts of another
-void
-GlushkovBuildStateImpl::connectRegions(const vector<PositionInfo> &lasts,
- const vector<PositionInfo> &firsts) {
- for (const auto &last : lasts) {
- checkEmbeddedEndAnchor(last, firsts);
- connectSuccessors(last, firsts);
- }
-}
-
-static
-void filterEdges(const GlushkovBuildStateImpl &bs, const PositionInfo &from,
- vector<PositionInfo> &tolist) {
- if (from.pos == bs.startDotstarState) {
- // If we're connecting from start-dotstar, remove all caret flavoured
- // positions.
- CheckPositionFlags check(POS_FLAG_NOFLOAT);
- tolist.erase(remove_if(tolist.begin(), tolist.end(), check),
- tolist.end());
- if (from.flags & POS_FLAG_NOFLOAT) {
- tolist.clear();
- }
- } else if (from.pos == bs.startState) {
- // If we're connecting from start, we should remove any epsilons that
- // aren't caret flavoured.
- CheckUnflaggedEpsilon check;
- tolist.erase(remove_if(tolist.begin(), tolist.end(), check),
- tolist.end());
- CheckPositionFlags check2(POS_FLAG_MUST_FLOAT | POS_FLAG_NOFLOAT);
- tolist.erase(remove_if(tolist.begin(), tolist.end(), check2),
- tolist.end());
- }
-
- if (bs.builder.getAssertFlag(from.pos) & POS_FLAG_MULTILINE_START) {
- // If we have a (mildly boneheaded) pattern like /^$/m, we're right up
- // against the edge of what we can do without true assertion support.
- // Here we have an evil hack to prevent us plugging the \n generated by
- // the caret right into acceptEod (which is in the firsts of the
- // dollar).
- /* This is due to the 'interesting quirk' that multiline ^ does not
- * not match a newline at the end of buffer. */
- DEBUG_PRINTF("multiline start - no eod\n");
- tolist.erase(remove(tolist.begin(), tolist.end(), bs.acceptEodState),
- tolist.end());
- }
-}
-
-static
-Position makeNewlineAssertPos(GlushkovBuildState &bs) {
- NFABuilder &builder = bs.getBuilder();
- Position newline = builder.makePositions(1);
- builder.addCharReach(newline, CharReach('\n'));
- builder.setAssertFlag(newline, POS_FLAG_FIDDLE_ACCEPT);
- builder.setNodeReportID(newline, -1);
- return newline;
-}
-
-static
-void generateAccepts(GlushkovBuildStateImpl &bs, const PositionInfo &from,
- vector<PositionInfo> *tolist) {
- NFABuilder &builder = bs.getBuilder();
- u32 flags = from.flags;
-
- bool require_eod = flags & POS_FLAG_WIRE_EOD;
- bool require_nl_eod = flags & POS_FLAG_WIRE_NL_EOD
- && !(flags & POS_FLAG_NO_NL_EOD);
- bool require_nl_accept = (flags & POS_FLAG_WIRE_NL_ACCEPT)
- && !(flags & POS_FLAG_NO_NL_ACCEPT);
-
- bool require_accept = !(flags & POS_FLAG_ONLY_ENDS);
-
- if (require_eod) {
- tolist->push_back(bs.acceptEodState);
- }
-
- if (require_nl_accept) {
- if (bs.acceptNlState == GlushkovBuildState::POS_UNINITIALIZED) {
- Position newline = makeNewlineAssertPos(bs);
- bs.addSuccessor(newline, builder.getAccept());
- bs.acceptNlState = newline;
- }
- tolist->push_back(bs.acceptNlState);
- }
-
- if (require_nl_eod) {
- if (bs.acceptNlEodState == GlushkovBuildState::POS_UNINITIALIZED) {
- Position newline = makeNewlineAssertPos(bs);
- bs.addSuccessor(newline, builder.getAcceptEOD());
- bs.acceptNlEodState = newline;
- }
- tolist->push_back(bs.acceptNlEodState);
- }
-
- if (require_accept) {
- tolist->push_back(bs.acceptState);
- }
-}
-
-void GlushkovBuildStateImpl::connectAccepts(const vector<PositionInfo> &lasts) {
- for (const auto &last : lasts) {
- vector<PositionInfo> accepts;
- generateAccepts(*this, last, &accepts);
- connectSuccessors(last, accepts);
- }
-}
-
-#if defined(DEBUG) || defined(DUMP_SUPPORT)
-
-static UNUSED
-string dumpCaptures(const PositionInfo &p) {
- ostringstream oss;
-
- if (p.flags & POS_FLAG_NOFLOAT) {
- oss << "<nofloat>";
- }
- if (p.flags & POS_FLAG_MUST_FLOAT) {
- oss << "<must_float>";
- }
- if (p.flags & POS_FLAG_FIDDLE_ACCEPT) {
- oss << "<fiddle_accept>";
- }
- if (p.flags & POS_FLAG_ONLY_ENDS) {
- oss << "<only_ends>";
- }
- if (p.flags & POS_FLAG_NO_NL_EOD) {
- oss << "<no_nl_eod>";
- }
- if (p.flags & POS_FLAG_NO_NL_ACCEPT) {
- oss << "<no_nl_acc>";
- }
-
- return oss.str();
-}
-
-#endif // DEBUG || DUMP_SUPPORT
-
-void GlushkovBuildStateImpl::connectSuccessors(const PositionInfo &from,
- vector<PositionInfo> tolist) {
- /* note: tolist maybe modified for our own internal use -> not a reference */
- assert(from.pos != POS_EPSILON);
- assert(from.pos != POS_UNINITIALIZED);
- assert(find(tolist.begin(), tolist.end(), POS_UNINITIALIZED)
- == tolist.end());
-
- DEBUG_PRINTF("FROM = %u%s TO = %s\n", from.pos, dumpCaptures(from).c_str(),
- dumpPositions(tolist.begin(), tolist.end()).c_str());
-
- /* prevent creation of edges with invalid assertions */
- filterEdges(*this, from, tolist);
-
- if (from.flags & POS_FLAG_FIDDLE_ACCEPT) {
- auto accept = find(tolist.begin(), tolist.end(), acceptState);
- if (accept != tolist.end()) {
- DEBUG_PRINTF("accept through -1 offset-adjusting dot\n");
- Position fakedot = builder.makePositions(1);
- builder.addCharReach(fakedot, CharReach(0x00, 0xff));
- builder.setNodeReportID(fakedot, -1);
- addSuccessor(fakedot, acceptState);
- *accept = fakedot;
- } else {
- // We might lead to accept via an assertion vertex, so we add the
- // offset adj to this vertex itself. Used for cases like /^\B/m,
- // which should match only at 0 for '\n'.
- builder.setNodeReportID(from.pos, -1);
- }
-
- assert(find(tolist.begin(), tolist.end(), acceptState) == tolist.end());
- }
-
- auto &succ = successors[from.pos];
-
- DEBUG_PRINTF("connect %u -> %s\n", from.pos,
- dumpPositions(tolist.begin(), tolist.end()).c_str());
- DEBUG_PRINTF("%u curr succ: %s\n", from.pos,
- dumpPositions(begin(succ), end(succ)).c_str());
-
- for (const auto &to : tolist) {
- if (to.pos != POS_EPSILON) {
- succ.insert(to);
- }
- }
-
- DEBUG_PRINTF("%u succ: %s\n", from.pos,
- dumpPositions(begin(succ), end(succ)).c_str());
-}
-
-void GlushkovBuildStateImpl::addSuccessor(Position from, Position to) {
- DEBUG_PRINTF("connect %u -> %u\n", from, to);
- assert(from != POS_EPSILON && from != POS_UNINITIALIZED);
- assert(to != POS_EPSILON && to != POS_UNINITIALIZED);
-
- auto &succ = successors[from];
- succ.insert(to);
-
- DEBUG_PRINTF("%u succ: %s\n", from,
- dumpPositions(begin(succ), end(succ)).c_str());
-}
-
-void GlushkovBuildStateImpl::cloneFollowSet(Position first, Position last,
- unsigned offset) {
- assert(first <= last);
-
- // Clone vertex properties (reachability, etc)
- builder.cloneRegion(first, last, offset);
-
- /* Clone the successors of all the positions between first and last
- * inclusive, producing a new set of positions starting at (first +
- * offset). */
- for (Position i = first; i <= last; i++) {
- // This should be a new position.
- assert(successors[i + offset].empty());
-
- for (const PositionInfo &to : successors[i]) {
- if (to.pos >= first && to.pos <= last) {
- PositionInfo clone(to);
- clone.pos += offset;
- DEBUG_PRINTF("clone: %u -> %u\n", i + offset, clone.pos);
- successors[i + offset].insert(clone);
- } else {
- // There shouldn't be any stray edges leading out of this
- // region!
- assert(0);
- }
- }
- }
-}
-
-void GlushkovBuildStateImpl::buildEdge(Position from, const PositionInfo &to) {
- // Guard against embedded anchors
- if (to == startState) {
- /* can make it through the parse tree */
- throw ParseError("Embedded start anchors not supported.");
- }
-
- assert(to.pos != POS_UNINITIALIZED);
- assert(to.pos != POS_EPSILON);
-
- if (builder.hasEdge(from, to.pos)) {
- return;
- }
-
- builder.addEdge(from, to.pos);
-}
-
-void GlushkovBuildStateImpl::buildEdges() {
- // Create all the edges and track which vertices are asserts which need to
- // be removed later.
- for (const auto &m : successors) {
- const Position from = m.first;
- for (const auto &to : m.second) {
- buildEdge(from, to);
- }
- }
-}
-
-// Construct a usable GlushkovBuildState for the outside world.
-unique_ptr<GlushkovBuildState> makeGlushkovBuildState(NFABuilder &b,
- bool prefilter) {
- return ue2::make_unique<GlushkovBuildStateImpl>(b, prefilter);
-}
-
-// free functions for utility use
-
-/** \brief Eliminate lower-priority duplicate PositionInfo entries.
- *
- * Scans through a list of positions and retains only the highest priority
- * version of a given (position, flags) entry. */
-void cleanupPositions(vector<PositionInfo> &a) {
+
+#include <algorithm>
+#include <iterator>
+#include <limits>
+#include <map>
+#include <utility>
+
+#if defined(DEBUG) || defined(DUMP_SUPPORT)
+#include <ostream>
+#include <sstream>
+#endif
+
+using namespace std;
+
+namespace ue2 {
+
+/** \brief Represents an uninitialized state. */
+const Position GlushkovBuildState::POS_UNINITIALIZED =
+ numeric_limits<Position>::max();
+
+/** \brief Represents an epsilon transition in the firsts of a component. */
+const Position GlushkovBuildState::POS_EPSILON =
+ numeric_limits<Position>::max() - 1;
+
+GlushkovBuildState::~GlushkovBuildState() { }
+
+namespace /* anonymous */ {
+
+class CheckPositionFlags {
+public:
+ explicit CheckPositionFlags(int fl) : flags(fl) {}
+ bool operator()(const PositionInfo &p) const {
+ return (p.flags & flags) == flags;
+ }
+private:
+ int flags;
+};
+
+class CheckUnflaggedEpsilon {
+public:
+ bool operator()(const PositionInfo &p) const {
+ return p.pos == GlushkovBuildState::POS_EPSILON && p.flags == 0;
+ }
+};
+
+/** \brief Concrete impl of the GlushkovBuildState interface. */
+class GlushkovBuildStateImpl : public GlushkovBuildState {
+public:
+ GlushkovBuildStateImpl(NFABuilder &b, bool prefilter);
+
+ /** \brief Returns a reference to the NFABuilder being used. */
+ NFABuilder &getBuilder() override { return builder; }
+
+ /** \brief Returns a const reference to the NFABuilder being used. */
+ const NFABuilder &getBuilder() const override { return builder; }
+
+ /** \brief Wire up the lasts of one component to the firsts of another. */
+ void connectRegions(const vector<PositionInfo> &lasts,
+ const vector<PositionInfo> &firsts) override;
+
+ /** \brief Wire the lasts of the main sequence to accepts. */
+ void connectAccepts(const vector<PositionInfo> &lasts) override;
+
+ /** \brief Wire up a single last to a list of firsts. */
+ void connectSuccessors(const PositionInfo &last,
+ vector<PositionInfo> firsts);
+
+ /** Wire up a pair of positions. */
+ void addSuccessor(Position from, Position to) override;
+
+ /** \brief Clone the vertex properties and edges of all vertices between
+ * two positions. */
+ void cloneFollowSet(Position from, Position to, unsigned offset) override;
+
+ /** \brief Build the prioritised list of edges out of our successor map. */
+ void buildEdges() override;
+
+ /** Construct an edge, called internally by \ref buildEdges. */
+ void buildEdge(Position from, const PositionInfo &to);
+
+ Position startState;
+ Position startDotstarState;
+ Position acceptState;
+ Position acceptEodState;
+ Position acceptNlEodState;
+ Position acceptNlState;
+
+ NFABuilder &builder; //!< \brief builder for the NFAGraph
+
+ bool doPrefilter; //!< \brief we're building a prefiltering pattern
+
+ /** \brief Map storing successors for each position. */
+ map<Position, flat_set<PositionInfo>> successors;
+};
+
+} // namespace
+
+GlushkovBuildStateImpl::GlushkovBuildStateImpl(NFABuilder &b,
+ bool prefilter) :
+ startState(b.getStart()),
+ startDotstarState(b.getStartDotStar()),
+ acceptState(b.getAccept()),
+ acceptEodState(b.getAcceptEOD()),
+ acceptNlEodState(POS_UNINITIALIZED),
+ acceptNlState(POS_UNINITIALIZED),
+ builder(b),
+ doPrefilter(prefilter)
+{
+ // Our special nodes need special relationships.
+ vector<PositionInfo> lasts, firsts;
+
+ // start->startDs and startDs self-loop.
+ lasts.push_back(startState);
+ lasts.push_back(startDotstarState);
+ firsts.push_back(startDotstarState);
+ connectRegions(lasts, firsts);
+
+ // accept to acceptEod edges already wired
+
+ // XXX: a small hack to support vacuous NFAs: give start and startDs an
+ // initial report ID.
+ builder.setNodeReportID(startState, 0);
+ builder.setNodeReportID(startDotstarState, 0);
+}
+
+static
+void checkEmbeddedEndAnchor(const PositionInfo &from,
+ const vector<PositionInfo> &firsts) {
+ if (!(from.flags & POS_FLAG_ONLY_ENDS)) {
+ return;
+ }
+
+ for (const auto &first : firsts) {
+ if (first.pos != GlushkovBuildStateImpl::POS_EPSILON) {
+ /* can make it through the parse tree */
+ throw ParseError("Embedded end anchors not supported.");
+ }
+ }
+}
+
+// Wire up the lasts of one component to the firsts of another
+void
+GlushkovBuildStateImpl::connectRegions(const vector<PositionInfo> &lasts,
+ const vector<PositionInfo> &firsts) {
+ for (const auto &last : lasts) {
+ checkEmbeddedEndAnchor(last, firsts);
+ connectSuccessors(last, firsts);
+ }
+}
+
+static
+void filterEdges(const GlushkovBuildStateImpl &bs, const PositionInfo &from,
+ vector<PositionInfo> &tolist) {
+ if (from.pos == bs.startDotstarState) {
+ // If we're connecting from start-dotstar, remove all caret flavoured
+ // positions.
+ CheckPositionFlags check(POS_FLAG_NOFLOAT);
+ tolist.erase(remove_if(tolist.begin(), tolist.end(), check),
+ tolist.end());
+ if (from.flags & POS_FLAG_NOFLOAT) {
+ tolist.clear();
+ }
+ } else if (from.pos == bs.startState) {
+ // If we're connecting from start, we should remove any epsilons that
+ // aren't caret flavoured.
+ CheckUnflaggedEpsilon check;
+ tolist.erase(remove_if(tolist.begin(), tolist.end(), check),
+ tolist.end());
+ CheckPositionFlags check2(POS_FLAG_MUST_FLOAT | POS_FLAG_NOFLOAT);
+ tolist.erase(remove_if(tolist.begin(), tolist.end(), check2),
+ tolist.end());
+ }
+
+ if (bs.builder.getAssertFlag(from.pos) & POS_FLAG_MULTILINE_START) {
+ // If we have a (mildly boneheaded) pattern like /^$/m, we're right up
+ // against the edge of what we can do without true assertion support.
+ // Here we have an evil hack to prevent us plugging the \n generated by
+ // the caret right into acceptEod (which is in the firsts of the
+ // dollar).
+ /* This is due to the 'interesting quirk' that multiline ^ does not
+ * not match a newline at the end of buffer. */
+ DEBUG_PRINTF("multiline start - no eod\n");
+ tolist.erase(remove(tolist.begin(), tolist.end(), bs.acceptEodState),
+ tolist.end());
+ }
+}
+
+static
+Position makeNewlineAssertPos(GlushkovBuildState &bs) {
+ NFABuilder &builder = bs.getBuilder();
+ Position newline = builder.makePositions(1);
+ builder.addCharReach(newline, CharReach('\n'));
+ builder.setAssertFlag(newline, POS_FLAG_FIDDLE_ACCEPT);
+ builder.setNodeReportID(newline, -1);
+ return newline;
+}
+
+static
+void generateAccepts(GlushkovBuildStateImpl &bs, const PositionInfo &from,
+ vector<PositionInfo> *tolist) {
+ NFABuilder &builder = bs.getBuilder();
+ u32 flags = from.flags;
+
+ bool require_eod = flags & POS_FLAG_WIRE_EOD;
+ bool require_nl_eod = flags & POS_FLAG_WIRE_NL_EOD
+ && !(flags & POS_FLAG_NO_NL_EOD);
+ bool require_nl_accept = (flags & POS_FLAG_WIRE_NL_ACCEPT)
+ && !(flags & POS_FLAG_NO_NL_ACCEPT);
+
+ bool require_accept = !(flags & POS_FLAG_ONLY_ENDS);
+
+ if (require_eod) {
+ tolist->push_back(bs.acceptEodState);
+ }
+
+ if (require_nl_accept) {
+ if (bs.acceptNlState == GlushkovBuildState::POS_UNINITIALIZED) {
+ Position newline = makeNewlineAssertPos(bs);
+ bs.addSuccessor(newline, builder.getAccept());
+ bs.acceptNlState = newline;
+ }
+ tolist->push_back(bs.acceptNlState);
+ }
+
+ if (require_nl_eod) {
+ if (bs.acceptNlEodState == GlushkovBuildState::POS_UNINITIALIZED) {
+ Position newline = makeNewlineAssertPos(bs);
+ bs.addSuccessor(newline, builder.getAcceptEOD());
+ bs.acceptNlEodState = newline;
+ }
+ tolist->push_back(bs.acceptNlEodState);
+ }
+
+ if (require_accept) {
+ tolist->push_back(bs.acceptState);
+ }
+}
+
+void GlushkovBuildStateImpl::connectAccepts(const vector<PositionInfo> &lasts) {
+ for (const auto &last : lasts) {
+ vector<PositionInfo> accepts;
+ generateAccepts(*this, last, &accepts);
+ connectSuccessors(last, accepts);
+ }
+}
+
+#if defined(DEBUG) || defined(DUMP_SUPPORT)
+
+static UNUSED
+string dumpCaptures(const PositionInfo &p) {
+ ostringstream oss;
+
+ if (p.flags & POS_FLAG_NOFLOAT) {
+ oss << "<nofloat>";
+ }
+ if (p.flags & POS_FLAG_MUST_FLOAT) {
+ oss << "<must_float>";
+ }
+ if (p.flags & POS_FLAG_FIDDLE_ACCEPT) {
+ oss << "<fiddle_accept>";
+ }
+ if (p.flags & POS_FLAG_ONLY_ENDS) {
+ oss << "<only_ends>";
+ }
+ if (p.flags & POS_FLAG_NO_NL_EOD) {
+ oss << "<no_nl_eod>";
+ }
+ if (p.flags & POS_FLAG_NO_NL_ACCEPT) {
+ oss << "<no_nl_acc>";
+ }
+
+ return oss.str();
+}
+
+#endif // DEBUG || DUMP_SUPPORT
+
+void GlushkovBuildStateImpl::connectSuccessors(const PositionInfo &from,
+ vector<PositionInfo> tolist) {
+ /* note: tolist maybe modified for our own internal use -> not a reference */
+ assert(from.pos != POS_EPSILON);
+ assert(from.pos != POS_UNINITIALIZED);
+ assert(find(tolist.begin(), tolist.end(), POS_UNINITIALIZED)
+ == tolist.end());
+
+ DEBUG_PRINTF("FROM = %u%s TO = %s\n", from.pos, dumpCaptures(from).c_str(),
+ dumpPositions(tolist.begin(), tolist.end()).c_str());
+
+ /* prevent creation of edges with invalid assertions */
+ filterEdges(*this, from, tolist);
+
+ if (from.flags & POS_FLAG_FIDDLE_ACCEPT) {
+ auto accept = find(tolist.begin(), tolist.end(), acceptState);
+ if (accept != tolist.end()) {
+ DEBUG_PRINTF("accept through -1 offset-adjusting dot\n");
+ Position fakedot = builder.makePositions(1);
+ builder.addCharReach(fakedot, CharReach(0x00, 0xff));
+ builder.setNodeReportID(fakedot, -1);
+ addSuccessor(fakedot, acceptState);
+ *accept = fakedot;
+ } else {
+ // We might lead to accept via an assertion vertex, so we add the
+ // offset adj to this vertex itself. Used for cases like /^\B/m,
+ // which should match only at 0 for '\n'.
+ builder.setNodeReportID(from.pos, -1);
+ }
+
+ assert(find(tolist.begin(), tolist.end(), acceptState) == tolist.end());
+ }
+
+ auto &succ = successors[from.pos];
+
+ DEBUG_PRINTF("connect %u -> %s\n", from.pos,
+ dumpPositions(tolist.begin(), tolist.end()).c_str());
+ DEBUG_PRINTF("%u curr succ: %s\n", from.pos,
+ dumpPositions(begin(succ), end(succ)).c_str());
+
+ for (const auto &to : tolist) {
+ if (to.pos != POS_EPSILON) {
+ succ.insert(to);
+ }
+ }
+
+ DEBUG_PRINTF("%u succ: %s\n", from.pos,
+ dumpPositions(begin(succ), end(succ)).c_str());
+}
+
+void GlushkovBuildStateImpl::addSuccessor(Position from, Position to) {
+ DEBUG_PRINTF("connect %u -> %u\n", from, to);
+ assert(from != POS_EPSILON && from != POS_UNINITIALIZED);
+ assert(to != POS_EPSILON && to != POS_UNINITIALIZED);
+
+ auto &succ = successors[from];
+ succ.insert(to);
+
+ DEBUG_PRINTF("%u succ: %s\n", from,
+ dumpPositions(begin(succ), end(succ)).c_str());
+}
+
+void GlushkovBuildStateImpl::cloneFollowSet(Position first, Position last,
+ unsigned offset) {
+ assert(first <= last);
+
+ // Clone vertex properties (reachability, etc)
+ builder.cloneRegion(first, last, offset);
+
+ /* Clone the successors of all the positions between first and last
+ * inclusive, producing a new set of positions starting at (first +
+ * offset). */
+ for (Position i = first; i <= last; i++) {
+ // This should be a new position.
+ assert(successors[i + offset].empty());
+
+ for (const PositionInfo &to : successors[i]) {
+ if (to.pos >= first && to.pos <= last) {
+ PositionInfo clone(to);
+ clone.pos += offset;
+ DEBUG_PRINTF("clone: %u -> %u\n", i + offset, clone.pos);
+ successors[i + offset].insert(clone);
+ } else {
+ // There shouldn't be any stray edges leading out of this
+ // region!
+ assert(0);
+ }
+ }
+ }
+}
+
+void GlushkovBuildStateImpl::buildEdge(Position from, const PositionInfo &to) {
+ // Guard against embedded anchors
+ if (to == startState) {
+ /* can make it through the parse tree */
+ throw ParseError("Embedded start anchors not supported.");
+ }
+
+ assert(to.pos != POS_UNINITIALIZED);
+ assert(to.pos != POS_EPSILON);
+
+ if (builder.hasEdge(from, to.pos)) {
+ return;
+ }
+
+ builder.addEdge(from, to.pos);
+}
+
+void GlushkovBuildStateImpl::buildEdges() {
+ // Create all the edges and track which vertices are asserts which need to
+ // be removed later.
+ for (const auto &m : successors) {
+ const Position from = m.first;
+ for (const auto &to : m.second) {
+ buildEdge(from, to);
+ }
+ }
+}
+
+// Construct a usable GlushkovBuildState for the outside world.
+unique_ptr<GlushkovBuildState> makeGlushkovBuildState(NFABuilder &b,
+ bool prefilter) {
+ return ue2::make_unique<GlushkovBuildStateImpl>(b, prefilter);
+}
+
+// free functions for utility use
+
+/** \brief Eliminate lower-priority duplicate PositionInfo entries.
+ *
+ * Scans through a list of positions and retains only the highest priority
+ * version of a given (position, flags) entry. */
+void cleanupPositions(vector<PositionInfo> &a) {
ue2_unordered_set<pair<Position, int>> seen;
-
- vector<PositionInfo> out;
- out.reserve(a.size()); // output should be close to input in size.
-
- for (const auto &p : a) {
- if (seen.emplace(p.pos, p.flags).second) {
- out.push_back(p); // first encounter
- }
- }
-
- DEBUG_PRINTF("in %zu; out %zu\n", a.size(), out.size());
- a.swap(out);
-}
-
-static
-vector<PositionInfo>::iterator
-replaceElemWithSequence(vector<PositionInfo> &dest,
- vector<PositionInfo>::iterator &victim,
- const vector<PositionInfo> &replacement) {
- auto past = dest.erase(victim);
- size_t d = distance(dest.begin(), past) + replacement.size();
- dest.insert(past, replacement.begin(), replacement.end());
- /* recalc past as iterator may have been invalidated */
- return dest.begin() + d;
-}
-
-/** \brief Replace all epsilons with the given positions.
- *
- * Replace epsilons in a firsts list with another given firsts list. Note: the
- * firsts lists must come from disjoint sets of components. If no epsilons are
- * in the first firsts list the source is appended to the end.
- */
-void replaceEpsilons(vector<PositionInfo> &target,
- const vector<PositionInfo> &source) {
- auto found =
- find(target.begin(), target.end(), GlushkovBuildState::POS_EPSILON);
-
- if (found == target.end()) {
- // no epsilons to replace, push on to the end
- target.insert(target.end(), source.begin(), source.end());
- return;
- }
-
- while (found != target.end()) {
- checkEmbeddedEndAnchor(*found, source);
-
- // replace this epsilon with a copy of source with the same flags
- vector<PositionInfo> newsource(source);
- for (auto &pos : newsource) {
- pos.flags |= found->flags;
- }
-
- found = replaceElemWithSequence(target, found, newsource);
- // find the next epsilon
- found = find(found, target.end(), GlushkovBuildState::POS_EPSILON);
- }
-
- cleanupPositions(target);
-}
-
-#ifdef DUMP_SUPPORT
-
-void dump(ostream &os, const PositionInfo &p) {
- if (p.pos == GlushkovBuildState::POS_EPSILON) {
- os << "epsilon";
- } else {
- os << p.pos;
- }
-
- os << dumpCaptures(p);
-}
-
-#endif // DUMP_SUPPORT
-
-} // namespace ue2
+
+ vector<PositionInfo> out;
+ out.reserve(a.size()); // output should be close to input in size.
+
+ for (const auto &p : a) {
+ if (seen.emplace(p.pos, p.flags).second) {
+ out.push_back(p); // first encounter
+ }
+ }
+
+ DEBUG_PRINTF("in %zu; out %zu\n", a.size(), out.size());
+ a.swap(out);
+}
+
+static
+vector<PositionInfo>::iterator
+replaceElemWithSequence(vector<PositionInfo> &dest,
+ vector<PositionInfo>::iterator &victim,
+ const vector<PositionInfo> &replacement) {
+ auto past = dest.erase(victim);
+ size_t d = distance(dest.begin(), past) + replacement.size();
+ dest.insert(past, replacement.begin(), replacement.end());
+ /* recalc past as iterator may have been invalidated */
+ return dest.begin() + d;
+}
+
+/** \brief Replace all epsilons with the given positions.
+ *
+ * Replace epsilons in a firsts list with another given firsts list. Note: the
+ * firsts lists must come from disjoint sets of components. If no epsilons are
+ * in the first firsts list the source is appended to the end.
+ */
+void replaceEpsilons(vector<PositionInfo> &target,
+ const vector<PositionInfo> &source) {
+ auto found =
+ find(target.begin(), target.end(), GlushkovBuildState::POS_EPSILON);
+
+ if (found == target.end()) {
+ // no epsilons to replace, push on to the end
+ target.insert(target.end(), source.begin(), source.end());
+ return;
+ }
+
+ while (found != target.end()) {
+ checkEmbeddedEndAnchor(*found, source);
+
+ // replace this epsilon with a copy of source with the same flags
+ vector<PositionInfo> newsource(source);
+ for (auto &pos : newsource) {
+ pos.flags |= found->flags;
+ }
+
+ found = replaceElemWithSequence(target, found, newsource);
+ // find the next epsilon
+ found = find(found, target.end(), GlushkovBuildState::POS_EPSILON);
+ }
+
+ cleanupPositions(target);
+}
+
+#ifdef DUMP_SUPPORT
+
+void dump(ostream &os, const PositionInfo &p) {
+ if (p.pos == GlushkovBuildState::POS_EPSILON) {
+ os << "epsilon";
+ } else {
+ os << p.pos;
+ }
+
+ os << dumpCaptures(p);
+}
+
+#endif // DUMP_SUPPORT
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/buildstate.h b/contrib/libs/hyperscan/src/parser/buildstate.h
index 89d71f7160..5ddaf9b238 100644
--- a/contrib/libs/hyperscan/src/parser/buildstate.h
+++ b/contrib/libs/hyperscan/src/parser/buildstate.h
@@ -1,103 +1,103 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Glushkov construction.
- */
-
-#ifndef BUILDSTATE_H
-#define BUILDSTATE_H
-
-#include "ue2common.h"
-#include "position.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Glushkov construction.
+ */
+
+#ifndef BUILDSTATE_H
+#define BUILDSTATE_H
+
+#include "ue2common.h"
+#include "position.h"
#include "util/noncopyable.h"
-
-#include <memory>
-#include <vector>
-
-namespace ue2 {
-
-class NFABuilder;
-class PositionInfo;
-
-/** \brief Machinery for Glushkov construction.
- *
- * Abstract base class; use \ref makeGlushkovBuildState to get one of these you
- * can use. */
+
+#include <memory>
+#include <vector>
+
+namespace ue2 {
+
+class NFABuilder;
+class PositionInfo;
+
+/** \brief Machinery for Glushkov construction.
+ *
+ * Abstract base class; use \ref makeGlushkovBuildState to get one of these you
+ * can use. */
class GlushkovBuildState : noncopyable {
-public:
- /** \brief Represents an uninitialized state. */
- static const Position POS_UNINITIALIZED;
-
- /** \brief Represents an epsilon transition in the firsts of a component. */
- static const Position POS_EPSILON;
-
- virtual ~GlushkovBuildState();
-
- /** \brief Returns a reference to the NFABuilder being used. */
- virtual NFABuilder &getBuilder() = 0;
-
- /** \brief Returns a const reference to the NFABuilder being used. */
- virtual const NFABuilder &getBuilder() const = 0;
-
- /** \brief Wire up edges from the lasts of one component to the firsts of
- * another. */
- virtual void connectRegions(const std::vector<PositionInfo> &lasts,
- const std::vector<PositionInfo> &firsts) = 0;
-
- /** \brief Wire the lasts of the main sequence to accepts. */
- virtual void connectAccepts(const std::vector<PositionInfo> &lasts) = 0;
-
- /** \brief Wire up a pair of positions. */
- virtual void addSuccessor(Position from, Position to) = 0;
-
- /** \brief Clone the vertex properties and edges of all vertices between
- * two positions. */
- virtual void cloneFollowSet(Position from, Position to, u32 offset) = 0;
-
- /** \brief Build the prioritised list of edges out of our successor map. */
- virtual void buildEdges() = 0;
-};
-
-/** \brief Returns a new GlushkovBuildState object. */
-std::unique_ptr<GlushkovBuildState> makeGlushkovBuildState(NFABuilder &b,
- bool prefilter);
-
-/** \brief Replace all epsilons with the given positions. */
-void replaceEpsilons(std::vector<PositionInfo> &target,
- const std::vector<PositionInfo> &source);
-
-/** \brief Eliminate lower-priority duplicate PositionInfo entries.
- *
- * Scans through a list of positions and retains only the highest priority
- * version of a given (position, flags) entry. */
-void cleanupPositions(std::vector<PositionInfo> &a);
-
-} // namespace ue2
-
-#endif
+public:
+ /** \brief Represents an uninitialized state. */
+ static const Position POS_UNINITIALIZED;
+
+ /** \brief Represents an epsilon transition in the firsts of a component. */
+ static const Position POS_EPSILON;
+
+ virtual ~GlushkovBuildState();
+
+ /** \brief Returns a reference to the NFABuilder being used. */
+ virtual NFABuilder &getBuilder() = 0;
+
+ /** \brief Returns a const reference to the NFABuilder being used. */
+ virtual const NFABuilder &getBuilder() const = 0;
+
+ /** \brief Wire up edges from the lasts of one component to the firsts of
+ * another. */
+ virtual void connectRegions(const std::vector<PositionInfo> &lasts,
+ const std::vector<PositionInfo> &firsts) = 0;
+
+ /** \brief Wire the lasts of the main sequence to accepts. */
+ virtual void connectAccepts(const std::vector<PositionInfo> &lasts) = 0;
+
+ /** \brief Wire up a pair of positions. */
+ virtual void addSuccessor(Position from, Position to) = 0;
+
+ /** \brief Clone the vertex properties and edges of all vertices between
+ * two positions. */
+ virtual void cloneFollowSet(Position from, Position to, u32 offset) = 0;
+
+ /** \brief Build the prioritised list of edges out of our successor map. */
+ virtual void buildEdges() = 0;
+};
+
+/** \brief Returns a new GlushkovBuildState object. */
+std::unique_ptr<GlushkovBuildState> makeGlushkovBuildState(NFABuilder &b,
+ bool prefilter);
+
+/** \brief Replace all epsilons with the given positions. */
+void replaceEpsilons(std::vector<PositionInfo> &target,
+ const std::vector<PositionInfo> &source);
+
+/** \brief Eliminate lower-priority duplicate PositionInfo entries.
+ *
+ * Scans through a list of positions and retains only the highest priority
+ * version of a given (position, flags) entry. */
+void cleanupPositions(std::vector<PositionInfo> &a);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/parser/check_refs.cpp b/contrib/libs/hyperscan/src/parser/check_refs.cpp
index 6a99fb3291..60b5b6ba77 100644
--- a/contrib/libs/hyperscan/src/parser/check_refs.cpp
+++ b/contrib/libs/hyperscan/src/parser/check_refs.cpp
@@ -1,122 +1,122 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Component tree analysis that checks that references (such as
- * back-refs, conditionals) have valid referents.
- */
-#include "check_refs.h"
-#include "ComponentBackReference.h"
-#include "ComponentCondReference.h"
-#include "ConstComponentVisitor.h"
-#include "parse_error.h"
-#include "util/container.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Component tree analysis that checks that references (such as
+ * back-refs, conditionals) have valid referents.
+ */
+#include "check_refs.h"
+#include "ComponentBackReference.h"
+#include "ComponentCondReference.h"
+#include "ConstComponentVisitor.h"
+#include "parse_error.h"
+#include "util/container.h"
#include "util/flat_containers.h"
-
-#include <sstream>
-
-using namespace std;
-
-namespace ue2 {
-
-/**
- * \brief Visitor that checks the validity of references against a known list
- * of indices and labels.
- */
-class ReferenceVisitor: public DefaultConstComponentVisitor {
-private:
- const size_t num_ids;
- const flat_set<string> &names;
-
-public:
- ReferenceVisitor(size_t num_groups, const flat_set<string> &targets)
- : num_ids(num_groups), names(targets) {}
-
- ~ReferenceVisitor() override;
-
- void invalid_index(const char *component, unsigned id) {
- assert(component);
- ostringstream str;
- str << "Invalid " << component << " to expression " << id << ".";
- throw ParseError(str.str());
- }
-
- void invalid_label(const char *component, const std::string &label) {
- assert(component);
- ostringstream str;
- str << "Invalid " << component << " to label '" << label << "'.";
- throw ParseError(str.str());
- }
-
+
+#include <sstream>
+
+using namespace std;
+
+namespace ue2 {
+
+/**
+ * \brief Visitor that checks the validity of references against a known list
+ * of indices and labels.
+ */
+class ReferenceVisitor: public DefaultConstComponentVisitor {
+private:
+ const size_t num_ids;
+ const flat_set<string> &names;
+
+public:
+ ReferenceVisitor(size_t num_groups, const flat_set<string> &targets)
+ : num_ids(num_groups), names(targets) {}
+
+ ~ReferenceVisitor() override;
+
+ void invalid_index(const char *component, unsigned id) {
+ assert(component);
+ ostringstream str;
+ str << "Invalid " << component << " to expression " << id << ".";
+ throw ParseError(str.str());
+ }
+
+ void invalid_label(const char *component, const std::string &label) {
+ assert(component);
+ ostringstream str;
+ str << "Invalid " << component << " to label '" << label << "'.";
+ throw ParseError(str.str());
+ }
+
using DefaultConstComponentVisitor::pre;
- void pre(const ComponentBackReference &c) override {
- if (c.ref_id) {
- if (c.ref_id >= num_ids) {
- invalid_index("back reference", c.ref_id);
- }
- } else {
- if (!contains(names, c.name)) {
- invalid_label("back reference", c.name);
- }
- }
- }
-
- void pre(const ComponentCondReference &c) override {
- switch (c.kind) {
- case ComponentCondReference::CONDITION_NUMBER:
- if (c.ref_id >= num_ids) {
- invalid_index("conditional reference", c.ref_id);
- }
- break;
- case ComponentCondReference::CONDITION_NAME:
- if (c.ref_name == "DEFINE") {
- // The string "DEFINE" is a special "always false" condition
- // used to define subroutines.
- break;
- }
- if (!contains(names, c.ref_name)) {
- invalid_label("conditional reference", c.ref_name);
- }
- break;
- case ComponentCondReference::CONDITION_ASSERTION:
- break;
- }
- }
-};
-
-// Out-of-line destructor to silence weak vtable warnings.
-ReferenceVisitor::~ReferenceVisitor() {}
-
-void checkReferences(const Component &root, unsigned int groupIndices,
+ void pre(const ComponentBackReference &c) override {
+ if (c.ref_id) {
+ if (c.ref_id >= num_ids) {
+ invalid_index("back reference", c.ref_id);
+ }
+ } else {
+ if (!contains(names, c.name)) {
+ invalid_label("back reference", c.name);
+ }
+ }
+ }
+
+ void pre(const ComponentCondReference &c) override {
+ switch (c.kind) {
+ case ComponentCondReference::CONDITION_NUMBER:
+ if (c.ref_id >= num_ids) {
+ invalid_index("conditional reference", c.ref_id);
+ }
+ break;
+ case ComponentCondReference::CONDITION_NAME:
+ if (c.ref_name == "DEFINE") {
+ // The string "DEFINE" is a special "always false" condition
+ // used to define subroutines.
+ break;
+ }
+ if (!contains(names, c.ref_name)) {
+ invalid_label("conditional reference", c.ref_name);
+ }
+ break;
+ case ComponentCondReference::CONDITION_ASSERTION:
+ break;
+ }
+ }
+};
+
+// Out-of-line destructor to silence weak vtable warnings.
+ReferenceVisitor::~ReferenceVisitor() {}
+
+void checkReferences(const Component &root, unsigned int groupIndices,
const flat_set<std::string> &groupNames) {
- ReferenceVisitor vis(groupIndices, groupNames);
- root.accept(vis);
-}
-
-} // namespace ue2
+ ReferenceVisitor vis(groupIndices, groupNames);
+ root.accept(vis);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/check_refs.h b/contrib/libs/hyperscan/src/parser/check_refs.h
index c7958119b3..26912fb8e4 100644
--- a/contrib/libs/hyperscan/src/parser/check_refs.h
+++ b/contrib/libs/hyperscan/src/parser/check_refs.h
@@ -1,52 +1,52 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Component tree analysis that checks that references (such as
- * back-refs, conditionals) have valid referents.
- */
-
+ * \brief Component tree analysis that checks that references (such as
+ * back-refs, conditionals) have valid referents.
+ */
+
#ifndef PARSER_CHECK_REFS_H
#define PARSER_CHECK_REFS_H
-
+
#include "util/flat_containers.h"
-#include <string>
-
-namespace ue2 {
-
-class Component;
-class ComponentSequence;
-
-void checkReferences(const Component &root, unsigned int groupIndices,
+#include <string>
+
+namespace ue2 {
+
+class Component;
+class ComponentSequence;
+
+void checkReferences(const Component &root, unsigned int groupIndices,
const flat_set<std::string> &groupNames);
-
-} // namespace ue2
-
+
+} // namespace ue2
+
#endif // PARSER_CHECK_REFS_H
diff --git a/contrib/libs/hyperscan/src/parser/dump.h b/contrib/libs/hyperscan/src/parser/dump.h
index 538a843e02..c920849272 100644
--- a/contrib/libs/hyperscan/src/parser/dump.h
+++ b/contrib/libs/hyperscan/src/parser/dump.h
@@ -1,48 +1,48 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef PARSER_DUMP_H_
-#define PARSER_DUMP_H_
-
-#ifdef DUMP_SUPPORT
-
-#include <ostream>
-
-namespace ue2 {
-
-class Component;
-
-/** \brief Dump a text representation of the given component tree. Only
- * available in DUMP_SUPPORT builds. */
-void dumpTree(std::ostream &os, const Component *const root);
-
-} // namespace ue2
-
-#endif // DUMP_SUPPORT
-
-#endif // PARSER_DUMP_H_
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef PARSER_DUMP_H_
+#define PARSER_DUMP_H_
+
+#ifdef DUMP_SUPPORT
+
+#include <ostream>
+
+namespace ue2 {
+
+class Component;
+
+/** \brief Dump a text representation of the given component tree. Only
+ * available in DUMP_SUPPORT builds. */
+void dumpTree(std::ostream &os, const Component *const root);
+
+} // namespace ue2
+
+#endif // DUMP_SUPPORT
+
+#endif // PARSER_DUMP_H_
diff --git a/contrib/libs/hyperscan/src/parser/parse_error.cpp b/contrib/libs/hyperscan/src/parser/parse_error.cpp
index 68725a586e..e7f60b2645 100644
--- a/contrib/libs/hyperscan/src/parser/parse_error.cpp
+++ b/contrib/libs/hyperscan/src/parser/parse_error.cpp
@@ -1,56 +1,56 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Parse/Compile exceptions.
- */
-
-
-#include "parse_error.h"
-
-#include <sstream>
-
-namespace ue2 {
-
-// this is just to get these out of the .h to avoid weak vtables
-
-ParseError::~ParseError() {}
-
-LocatedParseError::~LocatedParseError() {}
-
-void LocatedParseError::locate(size_t offset) {
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Parse/Compile exceptions.
+ */
+
+
+#include "parse_error.h"
+
+#include <sstream>
+
+namespace ue2 {
+
+// this is just to get these out of the .h to avoid weak vtables
+
+ParseError::~ParseError() {}
+
+LocatedParseError::~LocatedParseError() {}
+
+void LocatedParseError::locate(size_t offset) {
if (finalized) {
return;
}
- std::ostringstream str;
- str << reason << " at index " << offset << ".";
- reason = str.str();
+ std::ostringstream str;
+ str << reason << " at index " << offset << ".";
+ reason = str.str();
finalized = true;
-}
-
-}
+}
+
+}
diff --git a/contrib/libs/hyperscan/src/parser/parse_error.h b/contrib/libs/hyperscan/src/parser/parse_error.h
index 07169a6dc3..4556ed5e04 100644
--- a/contrib/libs/hyperscan/src/parser/parse_error.h
+++ b/contrib/libs/hyperscan/src/parser/parse_error.h
@@ -1,67 +1,67 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Parse/Compile exceptions.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Parse/Compile exceptions.
+ */
+
#ifndef PARSE_ERROR_H
#define PARSE_ERROR_H
-
-#include "util/compile_error.h"
-
-#include <string>
-
-namespace ue2 {
-
-/** \brief Error thrown internally by the Parser interface. */
-class ParseError : public CompileError {
-public:
- // Note: 'why' should describe why the error occurred and end with a
- // full stop, but no line break.
+
+#include "util/compile_error.h"
+
+#include <string>
+
+namespace ue2 {
+
+/** \brief Error thrown internally by the Parser interface. */
+class ParseError : public CompileError {
+public:
+ // Note: 'why' should describe why the error occurred and end with a
+ // full stop, but no line break.
explicit ParseError(std::string why) : CompileError(std::move(why)) {}
-
- ~ParseError() override;
-};
-
-class LocatedParseError : public ParseError {
-public:
+
+ ~ParseError() override;
+};
+
+class LocatedParseError : public ParseError {
+public:
explicit LocatedParseError(std::string why) : ParseError(".") {
reason = std::move(why); // don't use ParseError ctor
- }
-
- ~LocatedParseError() override;
-
- void locate(size_t offset);
+ }
+
+ ~LocatedParseError() override;
+
+ void locate(size_t offset);
private:
bool finalized = false; //!< true when locate() has been called.
-};
-
-} // namespace ue2
-
+};
+
+} // namespace ue2
+
#endif /* PARSE_ERROR_H */
diff --git a/contrib/libs/hyperscan/src/parser/parser_util.cpp b/contrib/libs/hyperscan/src/parser/parser_util.cpp
index 214b361af7..a064b9743b 100644
--- a/contrib/libs/hyperscan/src/parser/parser_util.cpp
+++ b/contrib/libs/hyperscan/src/parser/parser_util.cpp
@@ -1,48 +1,48 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Utilities (currently just ParseMode constructor)
- */
-
-
-#include "hs.h"
-#include "Parser.h"
-#include "ue2common.h"
-
-namespace ue2 {
-
-ParseMode::ParseMode(u32 hs_flags) :
- caseless(hs_flags & HS_FLAG_CASELESS),
- dotall(hs_flags & HS_FLAG_DOTALL),
- ignore_space(false),
- multiline(hs_flags & HS_FLAG_MULTILINE),
- ucp(hs_flags & HS_FLAG_UCP),
- utf8(hs_flags & HS_FLAG_UTF8) {}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Utilities (currently just ParseMode constructor)
+ */
+
+
+#include "hs.h"
+#include "Parser.h"
+#include "ue2common.h"
+
+namespace ue2 {
+
+ParseMode::ParseMode(u32 hs_flags) :
+ caseless(hs_flags & HS_FLAG_CASELESS),
+ dotall(hs_flags & HS_FLAG_DOTALL),
+ ignore_space(false),
+ multiline(hs_flags & HS_FLAG_MULTILINE),
+ ucp(hs_flags & HS_FLAG_UCP),
+ utf8(hs_flags & HS_FLAG_UTF8) {}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/position.h b/contrib/libs/hyperscan/src/parser/position.h
index 1913e88e65..184cdb291e 100644
--- a/contrib/libs/hyperscan/src/parser/position.h
+++ b/contrib/libs/hyperscan/src/parser/position.h
@@ -1,107 +1,107 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Per-position flags used during Glushkov construction, PositionInfo class.
- */
-
-#ifndef PARSER_POSITION_H
-#define PARSER_POSITION_H
-
-#include "ue2common.h"
-
-#include <set>
-
-namespace ue2 {
-
-#define POS_FLAG_NOFLOAT (1 << 0) //!< don't wire to start-dotstar
-#define POS_FLAG_MUST_FLOAT (1 << 1) //!< don't wire solely to start
-#define POS_FLAG_FIDDLE_ACCEPT (1 << 2) //!< add a dot with an offset adjustment when wiring to accept
-#define POS_FLAG_ASSERT_WORD_TO_NONWORD (1 << 3) //!< epsilon for word to nonword transition
-#define POS_FLAG_ASSERT_NONWORD_TO_WORD (1 << 4) //!< epsilon for nonword to word transition
-#define POS_FLAG_ASSERT_WORD_TO_WORD (1 << 5) //!< epsilon for word to word transition
-#define POS_FLAG_ASSERT_NONWORD_TO_NONWORD (1 << 6) //!< epsilon for nonword to nonword transition
-
-/** vertex created by cloning startDs, not considered part of the match.
- * mirrors POS_FLAG_FIDDLE_ACCEPT */
-#define POS_FLAG_VIRTUAL_START (1 << 7)
-
-/** multi-line ^ does not match \\n at end of buffer. As a result, we must never
- * wire the \\n from ^ to eod */
-#define POS_FLAG_MULTILINE_START (1 << 8)
-
-#define POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP (1 << 9)
-#define POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP (1 << 10)
-#define POS_FLAG_ASSERT_WORD_TO_WORD_UCP (1 << 11)
-#define POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP (1 << 12)
-
-#define POS_FLAG_ASSERT_NONWORD_TO_ANY (POS_FLAG_ASSERT_NONWORD_TO_NONWORD \
- | POS_FLAG_ASSERT_NONWORD_TO_WORD)
-#define POS_FLAG_ASSERT_WORD_TO_ANY (POS_FLAG_ASSERT_WORD_TO_NONWORD \
- | POS_FLAG_ASSERT_WORD_TO_WORD)
-
-#define POS_FLAG_ASSERT_ANY_TO_NONWORD (POS_FLAG_ASSERT_NONWORD_TO_NONWORD \
- | POS_FLAG_ASSERT_WORD_TO_NONWORD)
-#define POS_FLAG_ASSERT_ANY_TO_WORD (POS_FLAG_ASSERT_NONWORD_TO_WORD \
- | POS_FLAG_ASSERT_WORD_TO_WORD)
-
-#define POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP \
- (POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP \
- | POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP)
-#define POS_FLAG_ASSERT_WORD_TO_ANY_UCP (POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP \
- | POS_FLAG_ASSERT_WORD_TO_WORD_UCP)
-
-#define POS_FLAG_ASSERT_ANY_TO_NONWORD_UCP \
- (POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP \
- | POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP)
-#define POS_FLAG_ASSERT_ANY_TO_WORD_UCP (POS_FLAG_ASSERT_WORD_TO_WORD_UCP \
- | POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP)
-
-#define UCP_ASSERT_FLAGS (POS_FLAG_ASSERT_WORD_TO_ANY_UCP \
- | POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP)
-
-#define NON_UCP_ASSERT_FLAGS (POS_FLAG_ASSERT_WORD_TO_ANY \
- | POS_FLAG_ASSERT_NONWORD_TO_ANY)
-
-/** do not wire to accept or other pos; may still wire to eod, etc if
- * instructed */
-#define POS_FLAG_ONLY_ENDS (1 << 23)
-
-#define POS_FLAG_WIRE_EOD (1 << 24) /**< wire to accept eod */
-#define POS_FLAG_WIRE_NL_EOD (1 << 25) /**< wire to nl before accept eod */
-#define POS_FLAG_WIRE_NL_ACCEPT (1 << 26) /**< wire to nl before accept */
-#define POS_FLAG_NO_NL_EOD (1 << 27) /**< disallow nl before accept eod */
-#define POS_FLAG_NO_NL_ACCEPT (1 << 28) /**< disallow nl before accept */
-
-/** \brief Parse and Glushkov construction use only. State number within the
- * NFA as it is being constructed. */
-typedef u32 Position;
-
-} // namespace ue2
-
-#endif // PARSER_POSITION_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Per-position flags used during Glushkov construction, PositionInfo class.
+ */
+
+#ifndef PARSER_POSITION_H
+#define PARSER_POSITION_H
+
+#include "ue2common.h"
+
+#include <set>
+
+namespace ue2 {
+
+#define POS_FLAG_NOFLOAT (1 << 0) //!< don't wire to start-dotstar
+#define POS_FLAG_MUST_FLOAT (1 << 1) //!< don't wire solely to start
+#define POS_FLAG_FIDDLE_ACCEPT (1 << 2) //!< add a dot with an offset adjustment when wiring to accept
+#define POS_FLAG_ASSERT_WORD_TO_NONWORD (1 << 3) //!< epsilon for word to nonword transition
+#define POS_FLAG_ASSERT_NONWORD_TO_WORD (1 << 4) //!< epsilon for nonword to word transition
+#define POS_FLAG_ASSERT_WORD_TO_WORD (1 << 5) //!< epsilon for word to word transition
+#define POS_FLAG_ASSERT_NONWORD_TO_NONWORD (1 << 6) //!< epsilon for nonword to nonword transition
+
+/** vertex created by cloning startDs, not considered part of the match.
+ * mirrors POS_FLAG_FIDDLE_ACCEPT */
+#define POS_FLAG_VIRTUAL_START (1 << 7)
+
+/** multi-line ^ does not match \\n at end of buffer. As a result, we must never
+ * wire the \\n from ^ to eod */
+#define POS_FLAG_MULTILINE_START (1 << 8)
+
+#define POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP (1 << 9)
+#define POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP (1 << 10)
+#define POS_FLAG_ASSERT_WORD_TO_WORD_UCP (1 << 11)
+#define POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP (1 << 12)
+
+#define POS_FLAG_ASSERT_NONWORD_TO_ANY (POS_FLAG_ASSERT_NONWORD_TO_NONWORD \
+ | POS_FLAG_ASSERT_NONWORD_TO_WORD)
+#define POS_FLAG_ASSERT_WORD_TO_ANY (POS_FLAG_ASSERT_WORD_TO_NONWORD \
+ | POS_FLAG_ASSERT_WORD_TO_WORD)
+
+#define POS_FLAG_ASSERT_ANY_TO_NONWORD (POS_FLAG_ASSERT_NONWORD_TO_NONWORD \
+ | POS_FLAG_ASSERT_WORD_TO_NONWORD)
+#define POS_FLAG_ASSERT_ANY_TO_WORD (POS_FLAG_ASSERT_NONWORD_TO_WORD \
+ | POS_FLAG_ASSERT_WORD_TO_WORD)
+
+#define POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP \
+ (POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP \
+ | POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP)
+#define POS_FLAG_ASSERT_WORD_TO_ANY_UCP (POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP \
+ | POS_FLAG_ASSERT_WORD_TO_WORD_UCP)
+
+#define POS_FLAG_ASSERT_ANY_TO_NONWORD_UCP \
+ (POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP \
+ | POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP)
+#define POS_FLAG_ASSERT_ANY_TO_WORD_UCP (POS_FLAG_ASSERT_WORD_TO_WORD_UCP \
+ | POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP)
+
+#define UCP_ASSERT_FLAGS (POS_FLAG_ASSERT_WORD_TO_ANY_UCP \
+ | POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP)
+
+#define NON_UCP_ASSERT_FLAGS (POS_FLAG_ASSERT_WORD_TO_ANY \
+ | POS_FLAG_ASSERT_NONWORD_TO_ANY)
+
+/** do not wire to accept or other pos; may still wire to eod, etc if
+ * instructed */
+#define POS_FLAG_ONLY_ENDS (1 << 23)
+
+#define POS_FLAG_WIRE_EOD (1 << 24) /**< wire to accept eod */
+#define POS_FLAG_WIRE_NL_EOD (1 << 25) /**< wire to nl before accept eod */
+#define POS_FLAG_WIRE_NL_ACCEPT (1 << 26) /**< wire to nl before accept */
+#define POS_FLAG_NO_NL_EOD (1 << 27) /**< disallow nl before accept eod */
+#define POS_FLAG_NO_NL_ACCEPT (1 << 28) /**< disallow nl before accept */
+
+/** \brief Parse and Glushkov construction use only. State number within the
+ * NFA as it is being constructed. */
+typedef u32 Position;
+
+} // namespace ue2
+
+#endif // PARSER_POSITION_H
diff --git a/contrib/libs/hyperscan/src/parser/position_dump.h b/contrib/libs/hyperscan/src/parser/position_dump.h
index 12e5c896e4..e4afbf415b 100644
--- a/contrib/libs/hyperscan/src/parser/position_dump.h
+++ b/contrib/libs/hyperscan/src/parser/position_dump.h
@@ -1,63 +1,63 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef POSITION_DUMP_H
-#define POSITION_DUMP_H
-
-#include <sstream>
-
-namespace ue2 {
-
-#ifdef DUMP_SUPPORT
-// implemented in buildstate.cpp
-void dump(std::ostream &os, const PositionInfo &p);
-#endif
-
-#if defined(DUMP_SUPPORT) || defined(DEBUG)
-
-template<class Iterator>
-static UNUSED
-std::string dumpPositions(const Iterator &begin, const Iterator &end) {
- std::ostringstream oss;
- oss << '[';
- for (Iterator i = begin; i != end; ++i) {
- if (i != begin) {
- oss << ' ';
- }
- dump(oss, *i);
- }
- oss << ']';
- return oss.str();
-}
-
-#endif
-
-} // namespace ue2
-
-#endif /* POSITION_DUMP_H */
-
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef POSITION_DUMP_H
+#define POSITION_DUMP_H
+
+#include <sstream>
+
+namespace ue2 {
+
+#ifdef DUMP_SUPPORT
+// implemented in buildstate.cpp
+void dump(std::ostream &os, const PositionInfo &p);
+#endif
+
+#if defined(DUMP_SUPPORT) || defined(DEBUG)
+
+template<class Iterator>
+static UNUSED
+std::string dumpPositions(const Iterator &begin, const Iterator &end) {
+ std::ostringstream oss;
+ oss << '[';
+ for (Iterator i = begin; i != end; ++i) {
+ if (i != begin) {
+ oss << ' ';
+ }
+ dump(oss, *i);
+ }
+ oss << ']';
+ return oss.str();
+}
+
+#endif
+
+} // namespace ue2
+
+#endif /* POSITION_DUMP_H */
+
diff --git a/contrib/libs/hyperscan/src/parser/position_info.h b/contrib/libs/hyperscan/src/parser/position_info.h
index 2a1f078473..4e64a28029 100644
--- a/contrib/libs/hyperscan/src/parser/position_info.h
+++ b/contrib/libs/hyperscan/src/parser/position_info.h
@@ -1,57 +1,57 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef POSITION_INFO_H
-#define POSITION_INFO_H
-
-#include "ue2common.h"
-#include "position.h"
-
-namespace ue2 {
-
-/** Class representing a component state. */
-class PositionInfo {
-public:
- PositionInfo(unsigned int p) : pos(p), flags(0) {}
-
- bool operator<(const PositionInfo &other) const {
- return pos < other.pos;
- }
-
- bool operator==(const PositionInfo &other) const {
- return pos == other.pos;
- }
-
- Position pos; //!< state number
- int flags; //!< from POS_FLAG_* above
-};
-
-} // namespace ue2
-
-#endif /* POSITION_INFO_H */
-
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef POSITION_INFO_H
+#define POSITION_INFO_H
+
+#include "ue2common.h"
+#include "position.h"
+
+namespace ue2 {
+
+/** Class representing a component state. */
+class PositionInfo {
+public:
+ PositionInfo(unsigned int p) : pos(p), flags(0) {}
+
+ bool operator<(const PositionInfo &other) const {
+ return pos < other.pos;
+ }
+
+ bool operator==(const PositionInfo &other) const {
+ return pos == other.pos;
+ }
+
+ Position pos; //!< state number
+ int flags; //!< from POS_FLAG_* above
+};
+
+} // namespace ue2
+
+#endif /* POSITION_INFO_H */
+
diff --git a/contrib/libs/hyperscan/src/parser/prefilter.cpp b/contrib/libs/hyperscan/src/parser/prefilter.cpp
index 94e73ccfd5..f69362e4e3 100644
--- a/contrib/libs/hyperscan/src/parser/prefilter.cpp
+++ b/contrib/libs/hyperscan/src/parser/prefilter.cpp
@@ -1,300 +1,300 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Prefiltering component tree transformation.
- */
-#include "ComponentAssertion.h"
-#include "ComponentAtomicGroup.h"
-#include "ComponentBackReference.h"
-#include "ComponentBoundary.h"
-#include "ComponentClass.h"
-#include "ComponentCondReference.h"
-#include "ComponentRepeat.h"
-#include "ComponentSequence.h"
-#include "ComponentVisitor.h"
-#include "ComponentWordBoundary.h"
-#include "ConstComponentVisitor.h"
-#include "Parser.h"
-#include "prefilter.h"
-
-#include <algorithm>
-#include <stack>
-
-using namespace std;
-
-namespace ue2 {
-
-/** \brief Max number of positions a referent can have to be considered safe to
- * replace a reference in prefiltering mode. */
-static const size_t MAX_REFERENT_POSITIONS = 1;
-
-/** \brief Constructs a \ref ComponentClass that matches a dot (any
- * byte/codepoint, depending on whether UTF-8). */
-static
-unique_ptr<ComponentClass> makeDotClass(const ParseMode &mode_in) {
- ParseMode mode(mode_in);
- mode.dotall = true;
- return generateComponent(CLASS_ANY, false, mode);
-}
-
-namespace {
-
-/**
- * \brief Visitor used to determine if a given referent component is safe to
- * replace its reference in prefiltering mode. Throws
- * SafeReferentVisitor::Unsafe to terminate early on unsafe cases. */
-class SafeReferentVisitor : public DefaultConstComponentVisitor {
-public:
- struct Unsafe {};
-
- SafeReferentVisitor() : numPositions(0) {}
-
- bool is_safe() const {
- DEBUG_PRINTF("numPositions = %zu\n", numPositions);
- return numPositions <= MAX_REFERENT_POSITIONS;
- }
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Prefiltering component tree transformation.
+ */
+#include "ComponentAssertion.h"
+#include "ComponentAtomicGroup.h"
+#include "ComponentBackReference.h"
+#include "ComponentBoundary.h"
+#include "ComponentClass.h"
+#include "ComponentCondReference.h"
+#include "ComponentRepeat.h"
+#include "ComponentSequence.h"
+#include "ComponentVisitor.h"
+#include "ComponentWordBoundary.h"
+#include "ConstComponentVisitor.h"
+#include "Parser.h"
+#include "prefilter.h"
+
+#include <algorithm>
+#include <stack>
+
+using namespace std;
+
+namespace ue2 {
+
+/** \brief Max number of positions a referent can have to be considered safe to
+ * replace a reference in prefiltering mode. */
+static const size_t MAX_REFERENT_POSITIONS = 1;
+
+/** \brief Constructs a \ref ComponentClass that matches a dot (any
+ * byte/codepoint, depending on whether UTF-8). */
+static
+unique_ptr<ComponentClass> makeDotClass(const ParseMode &mode_in) {
+ ParseMode mode(mode_in);
+ mode.dotall = true;
+ return generateComponent(CLASS_ANY, false, mode);
+}
+
+namespace {
+
+/**
+ * \brief Visitor used to determine if a given referent component is safe to
+ * replace its reference in prefiltering mode. Throws
+ * SafeReferentVisitor::Unsafe to terminate early on unsafe cases. */
+class SafeReferentVisitor : public DefaultConstComponentVisitor {
+public:
+ struct Unsafe {};
+
+ SafeReferentVisitor() : numPositions(0) {}
+
+ bool is_safe() const {
+ DEBUG_PRINTF("numPositions = %zu\n", numPositions);
+ return numPositions <= MAX_REFERENT_POSITIONS;
+ }
+
using DefaultConstComponentVisitor::pre;
using DefaultConstComponentVisitor::post;
- void pre(const AsciiComponentClass &) override {
- numPositions++;
- }
-
- void pre(const UTF8ComponentClass &) override {
- // FIXME: we should be able to tell precisely how many positions this
- // class will use. Right now, use the worst case.
- numPositions += 4;
- }
-
- void pre(const ComponentBoundary &) override {
- numPositions++;
- }
-
- void pre(const ComponentByte &) override {
- numPositions++;
- }
-
- void pre(const ComponentEUS &) override {
- numPositions++;
- }
-
- void pre(const ComponentRepeat &) override {
- // Record the number of positions used before we visit the contents of
- // the repeat.
- countStack.push(numPositions);
- }
-
- void post(const ComponentRepeat &c) override {
- assert(!countStack.empty());
- size_t before = countStack.top();
- countStack.pop();
- assert(before <= numPositions);
-
- std::pair<u32, u32> bounds = c.getBounds();
- size_t subPositions = numPositions - before;
- size_t copies = bounds.second < ComponentRepeat::NoLimit
- ? bounds.second
- : max(bounds.first, 1U);
- numPositions = before + (subPositions * copies);
- }
-
- void pre(const ComponentWordBoundary &) override {
- // not quite accurate, as these are expanded out in assert
- // resolution...
- numPositions++;
- }
-
- void pre(const ComponentBackReference &) override {
- throw Unsafe();
- }
-
- void pre(const ComponentCondReference &) override {
- throw Unsafe();
- }
-
-private:
- size_t numPositions;
-
- // For temporary use
- std::stack<size_t> countStack;
-};
-
-static
-bool isSafeReferent(const Component &c) {
- try {
- SafeReferentVisitor vis;
- c.accept(vis);
- return vis.is_safe();
- }
- catch (const SafeReferentVisitor::Unsafe &) {
- return false;
- }
-}
-
-/**
- * \brief Visitor to find the \ref ComponentSequence with a given reference ID
- * or name: if found, the visitor will throw a const ptr to it.
- */
-class FindSequenceVisitor : public DefaultConstComponentVisitor {
-public:
- explicit FindSequenceVisitor(unsigned ref_id) : id(ref_id) {}
- explicit FindSequenceVisitor(const std::string &s) : name(s) {}
-
+ void pre(const AsciiComponentClass &) override {
+ numPositions++;
+ }
+
+ void pre(const UTF8ComponentClass &) override {
+ // FIXME: we should be able to tell precisely how many positions this
+ // class will use. Right now, use the worst case.
+ numPositions += 4;
+ }
+
+ void pre(const ComponentBoundary &) override {
+ numPositions++;
+ }
+
+ void pre(const ComponentByte &) override {
+ numPositions++;
+ }
+
+ void pre(const ComponentEUS &) override {
+ numPositions++;
+ }
+
+ void pre(const ComponentRepeat &) override {
+ // Record the number of positions used before we visit the contents of
+ // the repeat.
+ countStack.push(numPositions);
+ }
+
+ void post(const ComponentRepeat &c) override {
+ assert(!countStack.empty());
+ size_t before = countStack.top();
+ countStack.pop();
+ assert(before <= numPositions);
+
+ std::pair<u32, u32> bounds = c.getBounds();
+ size_t subPositions = numPositions - before;
+ size_t copies = bounds.second < ComponentRepeat::NoLimit
+ ? bounds.second
+ : max(bounds.first, 1U);
+ numPositions = before + (subPositions * copies);
+ }
+
+ void pre(const ComponentWordBoundary &) override {
+ // not quite accurate, as these are expanded out in assert
+ // resolution...
+ numPositions++;
+ }
+
+ void pre(const ComponentBackReference &) override {
+ throw Unsafe();
+ }
+
+ void pre(const ComponentCondReference &) override {
+ throw Unsafe();
+ }
+
+private:
+ size_t numPositions;
+
+ // For temporary use
+ std::stack<size_t> countStack;
+};
+
+static
+bool isSafeReferent(const Component &c) {
+ try {
+ SafeReferentVisitor vis;
+ c.accept(vis);
+ return vis.is_safe();
+ }
+ catch (const SafeReferentVisitor::Unsafe &) {
+ return false;
+ }
+}
+
+/**
+ * \brief Visitor to find the \ref ComponentSequence with a given reference ID
+ * or name: if found, the visitor will throw a const ptr to it.
+ */
+class FindSequenceVisitor : public DefaultConstComponentVisitor {
+public:
+ explicit FindSequenceVisitor(unsigned ref_id) : id(ref_id) {}
+ explicit FindSequenceVisitor(const std::string &s) : name(s) {}
+
using DefaultConstComponentVisitor::pre;
- void pre(const ComponentSequence &c) override {
- if (!name.empty()) {
- if (c.getCaptureName() == name) {
- throw &c;
- }
- } else if (c.getCaptureIndex() == id) {
- throw &c;
- }
- }
-private:
- const std::string name;
- const unsigned id = 0;
-};
-
-static
-const ComponentSequence *findCapturingGroup(const Component *root,
- FindSequenceVisitor &vis) {
- try {
- root->accept(vis);
- DEBUG_PRINTF("group not found\n");
- return nullptr;
- } catch (const ComponentSequence *seq) {
- return seq;
- }
-}
-
-} // namespace
-
-/**
- * \brief Visitor to apply prefilter reductions, swapping components for which
- * we don't have real implementations with implementable ones. Any such
- * replacement should produce a superset of the matches that would be produced
- * by the original.
- */
-class PrefilterVisitor : public DefaultComponentVisitor {
-public:
- PrefilterVisitor(Component *c, const ParseMode &m) : root(c), mode(m) {}
- ~PrefilterVisitor() override;
-
+ void pre(const ComponentSequence &c) override {
+ if (!name.empty()) {
+ if (c.getCaptureName() == name) {
+ throw &c;
+ }
+ } else if (c.getCaptureIndex() == id) {
+ throw &c;
+ }
+ }
+private:
+ const std::string name;
+ const unsigned id = 0;
+};
+
+static
+const ComponentSequence *findCapturingGroup(const Component *root,
+ FindSequenceVisitor &vis) {
+ try {
+ root->accept(vis);
+ DEBUG_PRINTF("group not found\n");
+ return nullptr;
+ } catch (const ComponentSequence *seq) {
+ return seq;
+ }
+}
+
+} // namespace
+
+/**
+ * \brief Visitor to apply prefilter reductions, swapping components for which
+ * we don't have real implementations with implementable ones. Any such
+ * replacement should produce a superset of the matches that would be produced
+ * by the original.
+ */
+class PrefilterVisitor : public DefaultComponentVisitor {
+public:
+ PrefilterVisitor(Component *c, const ParseMode &m) : root(c), mode(m) {}
+ ~PrefilterVisitor() override;
+
using DefaultComponentVisitor::visit;
- /** \brief Calls the visitor (recursively) on a new replacement component
- * we've just created. Takes care of freeing it if the sequence is itself
- * replaced. */
- template<class T>
- Component *visit_replacement(T *r) {
- Component *c = r->accept(*this);
- if (c != r) {
- delete r;
- }
- return c;
- }
-
- Component *visit(ComponentBackReference *c) override {
- assert(c);
-
- // If the referent is simple (represents a single position), then we
- // replace the back-reference with a copy of it.
- const ComponentSequence *ref = nullptr;
- const std::string &ref_name = c->getRefName();
- const unsigned ref_id = c->getRefID();
- if (!ref_name.empty()) {
- FindSequenceVisitor vis(ref_name);
- ref = findCapturingGroup(root, vis);
- } else if (ref_id > 0) {
- FindSequenceVisitor vis(ref_id);
- ref = findCapturingGroup(root, vis);
- }
-
- if (ref && isSafeReferent(*ref)) {
- DEBUG_PRINTF("found safe ref %p\n", ref);
- ComponentSequence *seq = ref->clone();
- // Remove labels from cloned sequence.
- seq->setCaptureName("");
- seq->setCaptureIndex(ComponentSequence::NOT_CAPTURED);
-
- return visit_replacement(seq);
- }
-
- // Replace with ".*".
- auto rep = makeComponentRepeat(makeDotClass(mode), 0,
- ComponentRepeat::NoLimit,
- ComponentRepeat::REPEAT_GREEDY);
- return rep.release(); // FIXME: owning raw ptr
- }
-
- Component *visit(UNUSED ComponentAssertion *c) override {
- assert(c);
- // Replace with an empty sequence.
- return new ComponentSequence();
- }
-
- Component *visit(ComponentRepeat *c) override {
- assert(c);
- // Possessive repeats become greedy.
- if (c->type == ComponentRepeat::REPEAT_POSSESSIVE) {
- c->type = ComponentRepeat::REPEAT_GREEDY;
- }
- return c;
- }
-
- Component *visit(ComponentAtomicGroup *c) override {
- assert(c);
- // Replace with a plain sequence containing the atomic group's
- // children.
- ComponentSequence *seq = new ComponentSequence();
- const auto &children = c->getChildren();
- for (const auto &child : children) {
- assert(child);
- seq->addComponent(unique_ptr<Component>(child->clone()));
- }
-
- return visit_replacement(seq);
- }
-
- Component *visit(UNUSED ComponentEUS *c) override {
- assert(c);
- // Replace with ".+".
- auto rep = makeComponentRepeat(makeDotClass(mode), 1,
- ComponentRepeat::NoLimit,
- ComponentRepeat::REPEAT_GREEDY);
- return rep.release(); // FIXME: owning raw ptr
- }
-
- Component *visit(ComponentWordBoundary *c) override {
- assert(c);
+ /** \brief Calls the visitor (recursively) on a new replacement component
+ * we've just created. Takes care of freeing it if the sequence is itself
+ * replaced. */
+ template<class T>
+ Component *visit_replacement(T *r) {
+ Component *c = r->accept(*this);
+ if (c != r) {
+ delete r;
+ }
+ return c;
+ }
+
+ Component *visit(ComponentBackReference *c) override {
+ assert(c);
+
+ // If the referent is simple (represents a single position), then we
+ // replace the back-reference with a copy of it.
+ const ComponentSequence *ref = nullptr;
+ const std::string &ref_name = c->getRefName();
+ const unsigned ref_id = c->getRefID();
+ if (!ref_name.empty()) {
+ FindSequenceVisitor vis(ref_name);
+ ref = findCapturingGroup(root, vis);
+ } else if (ref_id > 0) {
+ FindSequenceVisitor vis(ref_id);
+ ref = findCapturingGroup(root, vis);
+ }
+
+ if (ref && isSafeReferent(*ref)) {
+ DEBUG_PRINTF("found safe ref %p\n", ref);
+ ComponentSequence *seq = ref->clone();
+ // Remove labels from cloned sequence.
+ seq->setCaptureName("");
+ seq->setCaptureIndex(ComponentSequence::NOT_CAPTURED);
+
+ return visit_replacement(seq);
+ }
+
+ // Replace with ".*".
+ auto rep = makeComponentRepeat(makeDotClass(mode), 0,
+ ComponentRepeat::NoLimit,
+ ComponentRepeat::REPEAT_GREEDY);
+ return rep.release(); // FIXME: owning raw ptr
+ }
+
+ Component *visit(UNUSED ComponentAssertion *c) override {
+ assert(c);
+ // Replace with an empty sequence.
+ return new ComponentSequence();
+ }
+
+ Component *visit(ComponentRepeat *c) override {
+ assert(c);
+ // Possessive repeats become greedy.
+ if (c->type == ComponentRepeat::REPEAT_POSSESSIVE) {
+ c->type = ComponentRepeat::REPEAT_GREEDY;
+ }
+ return c;
+ }
+
+ Component *visit(ComponentAtomicGroup *c) override {
+ assert(c);
+ // Replace with a plain sequence containing the atomic group's
+ // children.
+ ComponentSequence *seq = new ComponentSequence();
+ const auto &children = c->getChildren();
+ for (const auto &child : children) {
+ assert(child);
+ seq->addComponent(unique_ptr<Component>(child->clone()));
+ }
+
+ return visit_replacement(seq);
+ }
+
+ Component *visit(UNUSED ComponentEUS *c) override {
+ assert(c);
+ // Replace with ".+".
+ auto rep = makeComponentRepeat(makeDotClass(mode), 1,
+ ComponentRepeat::NoLimit,
+ ComponentRepeat::REPEAT_GREEDY);
+ return rep.release(); // FIXME: owning raw ptr
+ }
+
+ Component *visit(ComponentWordBoundary *c) override {
+ assert(c);
// TODO: Right now, we do not have correct code for resolving these
// when prefiltering is on, UCP is on, and UTF-8 is *off*. For now, we
@@ -305,52 +305,52 @@ public:
}
// All other cases can be prefiltered.
- c->setPrefilter(true);
- return c;
- }
-
- Component *visit(ComponentCondReference *c) override {
- assert(c);
- // Replace with a plain sequence containing the conditional reference's
- // children.
- ComponentSequence *seq = new ComponentSequence();
- const auto &children = c->getChildren();
-
- // Empty children is accepted by PCRE as a "do nothing" case.
- if (children.empty()) {
- return seq;
- }
-
- for (const auto &child : children) {
- assert(child);
- seq->addComponent(unique_ptr<Component>(child->clone()));
- }
-
- // If the conditional reference had just a YES branch, we want this to
- // be an alternation with an empty sequence (the NO branch).
- if (!c->hasBothBranches) {
- seq->addAlternation();
- seq->finalize();
- }
-
- return visit_replacement(seq);
- }
-
-private:
- Component *root;
- const ParseMode &mode;
-};
-
-PrefilterVisitor::~PrefilterVisitor() {}
-
-void prefilterTree(unique_ptr<Component> &root, const ParseMode &mode) {
- assert(root);
- PrefilterVisitor vis(root.get(), mode);
-
- Component *c = root->accept(vis);
- if (c != root.get()) {
- root.reset(c);
- }
-}
-
-} // namespace ue2
+ c->setPrefilter(true);
+ return c;
+ }
+
+ Component *visit(ComponentCondReference *c) override {
+ assert(c);
+ // Replace with a plain sequence containing the conditional reference's
+ // children.
+ ComponentSequence *seq = new ComponentSequence();
+ const auto &children = c->getChildren();
+
+ // Empty children is accepted by PCRE as a "do nothing" case.
+ if (children.empty()) {
+ return seq;
+ }
+
+ for (const auto &child : children) {
+ assert(child);
+ seq->addComponent(unique_ptr<Component>(child->clone()));
+ }
+
+ // If the conditional reference had just a YES branch, we want this to
+ // be an alternation with an empty sequence (the NO branch).
+ if (!c->hasBothBranches) {
+ seq->addAlternation();
+ seq->finalize();
+ }
+
+ return visit_replacement(seq);
+ }
+
+private:
+ Component *root;
+ const ParseMode &mode;
+};
+
+PrefilterVisitor::~PrefilterVisitor() {}
+
+void prefilterTree(unique_ptr<Component> &root, const ParseMode &mode) {
+ assert(root);
+ PrefilterVisitor vis(root.get(), mode);
+
+ Component *c = root->accept(vis);
+ if (c != root.get()) {
+ root.reset(c);
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/prefilter.h b/contrib/libs/hyperscan/src/parser/prefilter.h
index d7d61fc272..c2665f4096 100644
--- a/contrib/libs/hyperscan/src/parser/prefilter.h
+++ b/contrib/libs/hyperscan/src/parser/prefilter.h
@@ -1,48 +1,48 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef PARSER_PREFILTER_H
-#define PARSER_PREFILTER_H
-
-#include <memory>
-
-namespace ue2 {
-
-class Component;
-struct ParseMode;
-
-/**
- * \brief Applies prefiltering transformations to the given component.
- *
- * May reseat the given Component pointer.
- */
-void prefilterTree(std::unique_ptr<Component> &root, const ParseMode &mode);
-
-} // namespace ue2
-
-#endif // PARSER_PREFILTER_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef PARSER_PREFILTER_H
+#define PARSER_PREFILTER_H
+
+#include <memory>
+
+namespace ue2 {
+
+class Component;
+struct ParseMode;
+
+/**
+ * \brief Applies prefiltering transformations to the given component.
+ *
+ * May reseat the given Component pointer.
+ */
+void prefilterTree(std::unique_ptr<Component> &root, const ParseMode &mode);
+
+} // namespace ue2
+
+#endif // PARSER_PREFILTER_H
diff --git a/contrib/libs/hyperscan/src/parser/shortcut_literal.cpp b/contrib/libs/hyperscan/src/parser/shortcut_literal.cpp
index 0f0a1663e2..a5d67f30d8 100644
--- a/contrib/libs/hyperscan/src/parser/shortcut_literal.cpp
+++ b/contrib/libs/hyperscan/src/parser/shortcut_literal.cpp
@@ -1,205 +1,205 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Shortcut literal pass: directly add literal components to Rose.
- */
-#include "AsciiComponentClass.h"
-#include "Utf8ComponentClass.h"
-#include "ComponentAssertion.h"
-#include "ComponentAtomicGroup.h"
-#include "ComponentBackReference.h"
-#include "ComponentBoundary.h"
-#include "ComponentClass.h"
-#include "ComponentCondReference.h"
-#include "ComponentRepeat.h"
-#include "ComponentSequence.h"
-#include "ComponentVisitor.h"
-#include "ComponentWordBoundary.h"
-#include "ConstComponentVisitor.h"
-#include "parse_error.h"
-#include "shortcut_literal.h"
-#include "grey.h"
-#include "nfagraph/ng.h"
-#include "compiler/compiler.h"
-#include "util/ue2string.h"
-#include "ue2common.h"
-
-#include <stack>
-
-using namespace std;
-
-namespace ue2 {
-
-/**
- * \brief Visitor that constructs a ue2_literal from a component tree.
- *
- * If a component that can't be part of a literal is encountered, this visitor
- * will throw ConstructLiteralVisitor::NotLiteral.
- */
-class ConstructLiteralVisitor : public ConstComponentVisitor {
-public:
- ~ConstructLiteralVisitor() override;
-
- /** \brief Thrown if this component does not represent a literal. */
- struct NotLiteral {};
-
- void pre(const AsciiComponentClass &c) override {
- const CharReach &cr = c.cr;
- const size_t width = cr.count();
- if (width == 1) {
- lit.push_back(cr.find_first(), false);
- } else if (width == 2 && cr.isCaselessChar()) {
- lit.push_back(cr.find_first(), true);
- } else {
- throw NotLiteral();
- }
- }
-
- void pre(const ComponentRepeat &c) override {
- if (c.m_min == 0 || c.m_min != c.m_max) {
- throw NotLiteral();
- }
-
- if (c.m_max < ComponentRepeat::NoLimit && c.m_max > 32767) {
- throw ParseError("Bounded repeat is too large.");
- }
-
- // Store the current length of the literal; in this repeat's post()
- // call we will append N-1 more copies of [index..end].
- repeat_stack.push(lit.length());
- }
-
- void post(const ComponentRepeat &c) override {
- // Add N-1 copies of the string between the entry to the repeat and the
- // current end of the literal.
- assert(!repeat_stack.empty());
- const ue2_literal suffix = lit.substr(repeat_stack.top());
- repeat_stack.pop();
-
- for (unsigned i = 1; i < c.m_min; i++) {
- lit += suffix;
- }
- }
-
- void pre(const ComponentSequence &) override {
- // Pass through.
- }
-
- void pre(const ComponentAlternation &) override { throw NotLiteral(); }
- void pre(const ComponentAssertion &) override { throw NotLiteral(); }
- void pre(const ComponentAtomicGroup &) override { throw NotLiteral(); }
- void pre(const ComponentBackReference &) override { throw NotLiteral(); }
- void pre(const ComponentBoundary &) override { throw NotLiteral(); }
- void pre(const ComponentByte &) override { throw NotLiteral(); }
- void pre(const ComponentCondReference &) override { throw NotLiteral(); }
- void pre(const ComponentEmpty &) override { throw NotLiteral(); }
- void pre(const ComponentEUS &) override { throw NotLiteral(); }
- void pre(const ComponentWordBoundary &) override { throw NotLiteral(); }
- void pre(const UTF8ComponentClass &) override { throw NotLiteral(); }
-
- void during(const AsciiComponentClass &) override {}
- void during(const ComponentAlternation &) override {}
- void during(const ComponentAssertion &) override {}
- void during(const ComponentAtomicGroup &) override {}
- void during(const ComponentBackReference &) override {}
- void during(const ComponentBoundary &) override {}
- void during(const ComponentByte &) override {}
- void during(const ComponentCondReference &) override {}
- void during(const ComponentEmpty &) override {}
- void during(const ComponentEUS &) override {}
- void during(const ComponentRepeat &) override {}
- void during(const ComponentSequence &) override {}
- void during(const ComponentWordBoundary &) override {}
- void during(const UTF8ComponentClass &) override {}
-
- void post(const AsciiComponentClass &) override {}
- void post(const ComponentAlternation &) override {}
- void post(const ComponentAssertion &) override {}
- void post(const ComponentAtomicGroup &) override {}
- void post(const ComponentBackReference &) override {}
- void post(const ComponentBoundary &) override {}
- void post(const ComponentByte &) override {}
- void post(const ComponentCondReference &) override {}
- void post(const ComponentEmpty &) override {}
- void post(const ComponentEUS &) override {}
- void post(const ComponentSequence &) override {}
- void post(const ComponentWordBoundary &) override {}
- void post(const UTF8ComponentClass &) override {}
-
- ue2_literal lit;
- stack<size_t> repeat_stack; //!< index of entry to repeat.
-};
-
-ConstructLiteralVisitor::~ConstructLiteralVisitor() {}
-
-/** \brief True if the literal expression \a expr could be added to Rose. */
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Shortcut literal pass: directly add literal components to Rose.
+ */
+#include "AsciiComponentClass.h"
+#include "Utf8ComponentClass.h"
+#include "ComponentAssertion.h"
+#include "ComponentAtomicGroup.h"
+#include "ComponentBackReference.h"
+#include "ComponentBoundary.h"
+#include "ComponentClass.h"
+#include "ComponentCondReference.h"
+#include "ComponentRepeat.h"
+#include "ComponentSequence.h"
+#include "ComponentVisitor.h"
+#include "ComponentWordBoundary.h"
+#include "ConstComponentVisitor.h"
+#include "parse_error.h"
+#include "shortcut_literal.h"
+#include "grey.h"
+#include "nfagraph/ng.h"
+#include "compiler/compiler.h"
+#include "util/ue2string.h"
+#include "ue2common.h"
+
+#include <stack>
+
+using namespace std;
+
+namespace ue2 {
+
+/**
+ * \brief Visitor that constructs a ue2_literal from a component tree.
+ *
+ * If a component that can't be part of a literal is encountered, this visitor
+ * will throw ConstructLiteralVisitor::NotLiteral.
+ */
+class ConstructLiteralVisitor : public ConstComponentVisitor {
+public:
+ ~ConstructLiteralVisitor() override;
+
+ /** \brief Thrown if this component does not represent a literal. */
+ struct NotLiteral {};
+
+ void pre(const AsciiComponentClass &c) override {
+ const CharReach &cr = c.cr;
+ const size_t width = cr.count();
+ if (width == 1) {
+ lit.push_back(cr.find_first(), false);
+ } else if (width == 2 && cr.isCaselessChar()) {
+ lit.push_back(cr.find_first(), true);
+ } else {
+ throw NotLiteral();
+ }
+ }
+
+ void pre(const ComponentRepeat &c) override {
+ if (c.m_min == 0 || c.m_min != c.m_max) {
+ throw NotLiteral();
+ }
+
+ if (c.m_max < ComponentRepeat::NoLimit && c.m_max > 32767) {
+ throw ParseError("Bounded repeat is too large.");
+ }
+
+ // Store the current length of the literal; in this repeat's post()
+ // call we will append N-1 more copies of [index..end].
+ repeat_stack.push(lit.length());
+ }
+
+ void post(const ComponentRepeat &c) override {
+ // Add N-1 copies of the string between the entry to the repeat and the
+ // current end of the literal.
+ assert(!repeat_stack.empty());
+ const ue2_literal suffix = lit.substr(repeat_stack.top());
+ repeat_stack.pop();
+
+ for (unsigned i = 1; i < c.m_min; i++) {
+ lit += suffix;
+ }
+ }
+
+ void pre(const ComponentSequence &) override {
+ // Pass through.
+ }
+
+ void pre(const ComponentAlternation &) override { throw NotLiteral(); }
+ void pre(const ComponentAssertion &) override { throw NotLiteral(); }
+ void pre(const ComponentAtomicGroup &) override { throw NotLiteral(); }
+ void pre(const ComponentBackReference &) override { throw NotLiteral(); }
+ void pre(const ComponentBoundary &) override { throw NotLiteral(); }
+ void pre(const ComponentByte &) override { throw NotLiteral(); }
+ void pre(const ComponentCondReference &) override { throw NotLiteral(); }
+ void pre(const ComponentEmpty &) override { throw NotLiteral(); }
+ void pre(const ComponentEUS &) override { throw NotLiteral(); }
+ void pre(const ComponentWordBoundary &) override { throw NotLiteral(); }
+ void pre(const UTF8ComponentClass &) override { throw NotLiteral(); }
+
+ void during(const AsciiComponentClass &) override {}
+ void during(const ComponentAlternation &) override {}
+ void during(const ComponentAssertion &) override {}
+ void during(const ComponentAtomicGroup &) override {}
+ void during(const ComponentBackReference &) override {}
+ void during(const ComponentBoundary &) override {}
+ void during(const ComponentByte &) override {}
+ void during(const ComponentCondReference &) override {}
+ void during(const ComponentEmpty &) override {}
+ void during(const ComponentEUS &) override {}
+ void during(const ComponentRepeat &) override {}
+ void during(const ComponentSequence &) override {}
+ void during(const ComponentWordBoundary &) override {}
+ void during(const UTF8ComponentClass &) override {}
+
+ void post(const AsciiComponentClass &) override {}
+ void post(const ComponentAlternation &) override {}
+ void post(const ComponentAssertion &) override {}
+ void post(const ComponentAtomicGroup &) override {}
+ void post(const ComponentBackReference &) override {}
+ void post(const ComponentBoundary &) override {}
+ void post(const ComponentByte &) override {}
+ void post(const ComponentCondReference &) override {}
+ void post(const ComponentEmpty &) override {}
+ void post(const ComponentEUS &) override {}
+ void post(const ComponentSequence &) override {}
+ void post(const ComponentWordBoundary &) override {}
+ void post(const UTF8ComponentClass &) override {}
+
+ ue2_literal lit;
+ stack<size_t> repeat_stack; //!< index of entry to repeat.
+};
+
+ConstructLiteralVisitor::~ConstructLiteralVisitor() {}
+
+/** \brief True if the literal expression \a expr could be added to Rose. */
bool shortcutLiteral(NG &ng, const ParsedExpression &pe) {
assert(pe.component);
-
+
if (!ng.cc.grey.allowLiteral) {
- return false;
- }
-
+ return false;
+ }
+
const auto &expr = pe.expr;
- // XXX: don't shortcut literals with extended params (yet)
+ // XXX: don't shortcut literals with extended params (yet)
if (expr.min_offset || expr.max_offset != MAX_OFFSET || expr.min_length ||
expr.edit_distance || expr.hamm_distance) {
- DEBUG_PRINTF("extended params not allowed\n");
- return false;
- }
-
- ConstructLiteralVisitor vis;
- try {
+ DEBUG_PRINTF("extended params not allowed\n");
+ return false;
+ }
+
+ ConstructLiteralVisitor vis;
+ try {
assert(pe.component);
pe.component->accept(vis);
- assert(vis.repeat_stack.empty());
- } catch (const ConstructLiteralVisitor::NotLiteral&) {
- DEBUG_PRINTF("not a literal\n");
- return false;
- }
-
- const ue2_literal &lit = vis.lit;
-
- if (lit.empty()) {
- DEBUG_PRINTF("empty literal\n");
- return false;
- }
-
- if (expr.highlander && lit.length() <= 1) {
- DEBUG_PRINTF("not shortcutting SEP literal\n");
- return false;
- }
-
- DEBUG_PRINTF("constructed literal %s\n", dumpString(lit).c_str());
+ assert(vis.repeat_stack.empty());
+ } catch (const ConstructLiteralVisitor::NotLiteral&) {
+ DEBUG_PRINTF("not a literal\n");
+ return false;
+ }
+
+ const ue2_literal &lit = vis.lit;
+
+ if (lit.empty()) {
+ DEBUG_PRINTF("empty literal\n");
+ return false;
+ }
+
+ if (expr.highlander && lit.length() <= 1) {
+ DEBUG_PRINTF("not shortcutting SEP literal\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("constructed literal %s\n", dumpString(lit).c_str());
return ng.addLiteral(lit, expr.index, expr.report, expr.highlander,
expr.som, expr.quiet);
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/shortcut_literal.h b/contrib/libs/hyperscan/src/parser/shortcut_literal.h
index 2129475211..60cffd00ed 100644
--- a/contrib/libs/hyperscan/src/parser/shortcut_literal.h
+++ b/contrib/libs/hyperscan/src/parser/shortcut_literal.h
@@ -1,46 +1,46 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Shortcut literal pass: directly add literal components to Rose.
- */
-
-#ifndef SHORTCUT_LITERAL_H
-#define SHORTCUT_LITERAL_H
-
-namespace ue2 {
-
-class NG;
-class ParsedExpression;
-
-/** \brief True if the literal expression \a expr could be added to Rose. */
-bool shortcutLiteral(NG &ng, const ParsedExpression &expr);
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Shortcut literal pass: directly add literal components to Rose.
+ */
+
+#ifndef SHORTCUT_LITERAL_H
+#define SHORTCUT_LITERAL_H
+
+namespace ue2 {
+
+class NG;
+class ParsedExpression;
+
+/** \brief True if the literal expression \a expr could be added to Rose. */
+bool shortcutLiteral(NG &ng, const ParsedExpression &expr);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/parser/ucp_table.cpp b/contrib/libs/hyperscan/src/parser/ucp_table.cpp
index cdc683a607..fc1330fe7f 100644
--- a/contrib/libs/hyperscan/src/parser/ucp_table.cpp
+++ b/contrib/libs/hyperscan/src/parser/ucp_table.cpp
@@ -1,132 +1,132 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "Utf8ComponentClass.h"
-
-#include <algorithm>
-
-using namespace std;
-
-namespace ue2 {
-
-#define UCP_FN(cat) \
-CodePointSet getUcp##cat(void) { \
- CodePointSet rv; \
- for (u32 i = 0; i < ARRAY_LENGTH(ucp_##cat##_def); i += 2) { \
- rv.setRange(ucp_##cat##_def[i], ucp_##cat##_def[i + 1]); \
- } \
- return rv; \
-}
-
-struct unicase {
- unichar base;
- unichar caseless;
-};
-
-} // namespace ue2
-
-#define UCP_TABLE_DEFINE_FN
-#include "ucp_table.h"
-
-namespace ue2 {
-
-static
-bool operator<(const unicase &a, const unicase &b) {
- if (a.base < b.base) {
- return true;
- }
-
- if (a.base > b.base) {
- return false;
- }
-
- return a.caseless < b.caseless;
-}
-
-void make_caseless(CodePointSet *cps) {
- assert(cps);
- DEBUG_PRINTF("hello\n");
- // Cheap optimisation: if we are empty or a dot, we're already caseless.
- if (cps->begin() == cps->end()) {
- DEBUG_PRINTF("empty\n");
- return;
- }
- if (lower(*cps->begin()) == 0 && upper(*cps->begin()) == MAX_UNICODE) {
- DEBUG_PRINTF("dot\n");
- return;
- }
-
- CodePointSet base = *cps;
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "Utf8ComponentClass.h"
+
+#include <algorithm>
+
+using namespace std;
+
+namespace ue2 {
+
+#define UCP_FN(cat) \
+CodePointSet getUcp##cat(void) { \
+ CodePointSet rv; \
+ for (u32 i = 0; i < ARRAY_LENGTH(ucp_##cat##_def); i += 2) { \
+ rv.setRange(ucp_##cat##_def[i], ucp_##cat##_def[i + 1]); \
+ } \
+ return rv; \
+}
+
+struct unicase {
+ unichar base;
+ unichar caseless;
+};
+
+} // namespace ue2
+
+#define UCP_TABLE_DEFINE_FN
+#include "ucp_table.h"
+
+namespace ue2 {
+
+static
+bool operator<(const unicase &a, const unicase &b) {
+ if (a.base < b.base) {
+ return true;
+ }
+
+ if (a.base > b.base) {
+ return false;
+ }
+
+ return a.caseless < b.caseless;
+}
+
+void make_caseless(CodePointSet *cps) {
+ assert(cps);
+ DEBUG_PRINTF("hello\n");
+ // Cheap optimisation: if we are empty or a dot, we're already caseless.
+ if (cps->begin() == cps->end()) {
+ DEBUG_PRINTF("empty\n");
+ return;
+ }
+ if (lower(*cps->begin()) == 0 && upper(*cps->begin()) == MAX_UNICODE) {
+ DEBUG_PRINTF("dot\n");
+ return;
+ }
+
+ CodePointSet base = *cps;
+
auto uc_begin = begin(ucp_caseless_def);
auto uc_end = end(ucp_caseless_def);
DEBUG_PRINTF("uc len %zd\n", distance(uc_begin, uc_end));
-
+
for (const auto &elem : base) {
unichar b = lower(elem);
unichar e = upper(elem) + 1;
-
- for (; b < e; b++) {
- DEBUG_PRINTF("decasing %x\n", b);
- unicase test = {b, 0}; /* NUL is not a caseless version of anything,
- * so we are ok */
- uc_begin = lower_bound(uc_begin, uc_end, test);
- if (uc_begin == uc_end) {
- DEBUG_PRINTF("EOL\n");
- return;
- }
+
+ for (; b < e; b++) {
+ DEBUG_PRINTF("decasing %x\n", b);
+ unicase test = {b, 0}; /* NUL is not a caseless version of anything,
+ * so we are ok */
+ uc_begin = lower_bound(uc_begin, uc_end, test);
+ if (uc_begin == uc_end) {
+ DEBUG_PRINTF("EOL\n");
+ return;
+ }
while (uc_begin != uc_end && uc_begin->base == b) {
- DEBUG_PRINTF("at {%x,%x}\n", uc_begin->base, uc_begin->caseless);
- cps->set(uc_begin->caseless);
- ++uc_begin;
- }
- }
- }
-}
-
-/** \brief Flip the case of the codepoint in c, if possible.
- *
- * Note that this assumes a one-to-one case mapping, which (though not
- * realistic) is what PCRE does. */
-bool flip_case(unichar *c) {
- assert(c);
-
+ DEBUG_PRINTF("at {%x,%x}\n", uc_begin->base, uc_begin->caseless);
+ cps->set(uc_begin->caseless);
+ ++uc_begin;
+ }
+ }
+ }
+}
+
+/** \brief Flip the case of the codepoint in c, if possible.
+ *
+ * Note that this assumes a one-to-one case mapping, which (though not
+ * realistic) is what PCRE does. */
+bool flip_case(unichar *c) {
+ assert(c);
+
const unicase test = { *c, 0 };
-
+
const auto uc_begin = begin(ucp_caseless_def);
const auto uc_end = end(ucp_caseless_def);
const auto f = lower_bound(uc_begin, uc_end, test);
if (f != uc_end && f->base == *c) {
- DEBUG_PRINTF("flipped c=%x to %x\n", *c, f->caseless);
- *c = f->caseless;
- return true;
- }
- return false;
-}
-
-} // namespace ue2
+ DEBUG_PRINTF("flipped c=%x to %x\n", *c, f->caseless);
+ *c = f->caseless;
+ return true;
+ }
+ return false;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/ucp_table.h b/contrib/libs/hyperscan/src/parser/ucp_table.h
index d420db2b5d..269a971cb1 100644
--- a/contrib/libs/hyperscan/src/parser/ucp_table.h
+++ b/contrib/libs/hyperscan/src/parser/ucp_table.h
@@ -1,11043 +1,11043 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef UCP_TABLE_H
-#define UCP_TABLE_H
-
-/* Generated by tools/scripts/ucp.py based on unicode database
- *
- * Do not hand edit
- */
-
-namespace ue2 {
-
-class CodePointSet;
-void make_caseless(CodePointSet *cps);
-bool flip_case(unichar *c);
-
-CodePointSet getUcpC(void);
-CodePointSet getUcpCc(void);
-CodePointSet getUcpCf(void);
-CodePointSet getUcpCn(void);
-CodePointSet getUcpCo(void);
-CodePointSet getUcpCs(void);
-CodePointSet getUcpL(void);
-CodePointSet getUcpL_and(void);
-CodePointSet getUcpLl(void);
-CodePointSet getUcpLm(void);
-CodePointSet getUcpLo(void);
-CodePointSet getUcpLt(void);
-CodePointSet getUcpLu(void);
-CodePointSet getUcpM(void);
-CodePointSet getUcpMc(void);
-CodePointSet getUcpMe(void);
-CodePointSet getUcpMn(void);
-CodePointSet getUcpN(void);
-CodePointSet getUcpNd(void);
-CodePointSet getUcpNl(void);
-CodePointSet getUcpNo(void);
-CodePointSet getUcpP(void);
-CodePointSet getUcpPc(void);
-CodePointSet getUcpPd(void);
-CodePointSet getUcpPe(void);
-CodePointSet getUcpPf(void);
-CodePointSet getUcpPi(void);
-CodePointSet getUcpPo(void);
-CodePointSet getUcpPs(void);
-CodePointSet getUcpS(void);
-CodePointSet getUcpSc(void);
-CodePointSet getUcpSk(void);
-CodePointSet getUcpSm(void);
-CodePointSet getUcpSo(void);
-CodePointSet getUcpXan(void);
-CodePointSet getUcpXps(void);
-CodePointSet getUcpXsp(void);
-CodePointSet getUcpXwd(void);
-CodePointSet getUcpZ(void);
-CodePointSet getUcpZl(void);
-CodePointSet getUcpZp(void);
-CodePointSet getUcpZs(void);
-CodePointSet getUcpArabic(void);
-CodePointSet getUcpArmenian(void);
-CodePointSet getUcpAvestan(void);
-CodePointSet getUcpBalinese(void);
-CodePointSet getUcpBamum(void);
-CodePointSet getUcpBassa_Vah(void);
-CodePointSet getUcpBatak(void);
-CodePointSet getUcpBengali(void);
-CodePointSet getUcpBopomofo(void);
-CodePointSet getUcpBrahmi(void);
-CodePointSet getUcpBraille(void);
-CodePointSet getUcpBuginese(void);
-CodePointSet getUcpBuhid(void);
-CodePointSet getUcpCanadian_Aboriginal(void);
-CodePointSet getUcpCarian(void);
-CodePointSet getUcpCaucasian_Albanian(void);
-CodePointSet getUcpChakma(void);
-CodePointSet getUcpCham(void);
-CodePointSet getUcpCherokee(void);
-CodePointSet getUcpCommon(void);
-CodePointSet getUcpCoptic(void);
-CodePointSet getUcpCuneiform(void);
-CodePointSet getUcpCypriot(void);
-CodePointSet getUcpCyrillic(void);
-CodePointSet getUcpDeseret(void);
-CodePointSet getUcpDevanagari(void);
-CodePointSet getUcpDuployan(void);
-CodePointSet getUcpEgyptian_Hieroglyphs(void);
-CodePointSet getUcpElbasan(void);
-CodePointSet getUcpEthiopic(void);
-CodePointSet getUcpGeorgian(void);
-CodePointSet getUcpGlagolitic(void);
-CodePointSet getUcpGothic(void);
-CodePointSet getUcpGrantha(void);
-CodePointSet getUcpGreek(void);
-CodePointSet getUcpGujarati(void);
-CodePointSet getUcpGurmukhi(void);
-CodePointSet getUcpHan(void);
-CodePointSet getUcpHangul(void);
-CodePointSet getUcpHanunoo(void);
-CodePointSet getUcpHebrew(void);
-CodePointSet getUcpHiragana(void);
-CodePointSet getUcpImperial_Aramaic(void);
-CodePointSet getUcpInherited(void);
-CodePointSet getUcpInscriptional_Pahlavi(void);
-CodePointSet getUcpInscriptional_Parthian(void);
-CodePointSet getUcpJavanese(void);
-CodePointSet getUcpKaithi(void);
-CodePointSet getUcpKannada(void);
-CodePointSet getUcpKatakana(void);
-CodePointSet getUcpKayah_Li(void);
-CodePointSet getUcpKharoshthi(void);
-CodePointSet getUcpKhmer(void);
-CodePointSet getUcpKhojki(void);
-CodePointSet getUcpKhudawadi(void);
-CodePointSet getUcpLao(void);
-CodePointSet getUcpLatin(void);
-CodePointSet getUcpLepcha(void);
-CodePointSet getUcpLimbu(void);
-CodePointSet getUcpLinear_A(void);
-CodePointSet getUcpLinear_B(void);
-CodePointSet getUcpLisu(void);
-CodePointSet getUcpLycian(void);
-CodePointSet getUcpLydian(void);
-CodePointSet getUcpMahajani(void);
-CodePointSet getUcpMalayalam(void);
-CodePointSet getUcpMandaic(void);
-CodePointSet getUcpManichaean(void);
-CodePointSet getUcpMeetei_Mayek(void);
-CodePointSet getUcpMende_Kikakui(void);
-CodePointSet getUcpMeroitic_Cursive(void);
-CodePointSet getUcpMeroitic_Hieroglyphs(void);
-CodePointSet getUcpMiao(void);
-CodePointSet getUcpModi(void);
-CodePointSet getUcpMongolian(void);
-CodePointSet getUcpMro(void);
-CodePointSet getUcpMyanmar(void);
-CodePointSet getUcpNabataean(void);
-CodePointSet getUcpNew_Tai_Lue(void);
-CodePointSet getUcpNko(void);
-CodePointSet getUcpOgham(void);
-CodePointSet getUcpOl_Chiki(void);
-CodePointSet getUcpOld_Italic(void);
-CodePointSet getUcpOld_North_Arabian(void);
-CodePointSet getUcpOld_Permic(void);
-CodePointSet getUcpOld_Persian(void);
-CodePointSet getUcpOld_South_Arabian(void);
-CodePointSet getUcpOld_Turkic(void);
-CodePointSet getUcpOriya(void);
-CodePointSet getUcpOsmanya(void);
-CodePointSet getUcpPahawh_Hmong(void);
-CodePointSet getUcpPalmyrene(void);
-CodePointSet getUcpPau_Cin_Hau(void);
-CodePointSet getUcpPhags_Pa(void);
-CodePointSet getUcpPhoenician(void);
-CodePointSet getUcpPsalter_Pahlavi(void);
-CodePointSet getUcpRejang(void);
-CodePointSet getUcpRunic(void);
-CodePointSet getUcpSamaritan(void);
-CodePointSet getUcpSaurashtra(void);
-CodePointSet getUcpSharada(void);
-CodePointSet getUcpShavian(void);
-CodePointSet getUcpSiddham(void);
-CodePointSet getUcpSinhala(void);
-CodePointSet getUcpSora_Sompeng(void);
-CodePointSet getUcpSundanese(void);
-CodePointSet getUcpSyloti_Nagri(void);
-CodePointSet getUcpSyriac(void);
-CodePointSet getUcpTagalog(void);
-CodePointSet getUcpTagbanwa(void);
-CodePointSet getUcpTai_Le(void);
-CodePointSet getUcpTai_Tham(void);
-CodePointSet getUcpTai_Viet(void);
-CodePointSet getUcpTakri(void);
-CodePointSet getUcpTamil(void);
-CodePointSet getUcpTelugu(void);
-CodePointSet getUcpThaana(void);
-CodePointSet getUcpThai(void);
-CodePointSet getUcpTibetan(void);
-CodePointSet getUcpTifinagh(void);
-CodePointSet getUcpTirhuta(void);
-CodePointSet getUcpUgaritic(void);
-CodePointSet getUcpVai(void);
-CodePointSet getUcpWarang_Citi(void);
-CodePointSet getUcpYi(void);
-
-#ifdef UCP_TABLE_DEFINE_FN
-
-static const unichar ucp_C_def[] = {
- 0x0, 0x1f,
- 0x7f, 0x9f,
- 0xad, 0xad,
- 0x378, 0x379,
- 0x380, 0x383,
- 0x38b, 0x38b,
- 0x38d, 0x38d,
- 0x3a2, 0x3a2,
- 0x530, 0x530,
- 0x557, 0x558,
- 0x560, 0x560,
- 0x588, 0x588,
- 0x58b, 0x58c,
- 0x590, 0x590,
- 0x5c8, 0x5cf,
- 0x5eb, 0x5ef,
- 0x5f5, 0x605,
- 0x61c, 0x61d,
- 0x6dd, 0x6dd,
- 0x70e, 0x70f,
- 0x74b, 0x74c,
- 0x7b2, 0x7bf,
- 0x7fb, 0x7ff,
- 0x82e, 0x82f,
- 0x83f, 0x83f,
- 0x85c, 0x85d,
- 0x85f, 0x89f,
- 0x8b3, 0x8e3,
- 0x984, 0x984,
- 0x98d, 0x98e,
- 0x991, 0x992,
- 0x9a9, 0x9a9,
- 0x9b1, 0x9b1,
- 0x9b3, 0x9b5,
- 0x9ba, 0x9bb,
- 0x9c5, 0x9c6,
- 0x9c9, 0x9ca,
- 0x9cf, 0x9d6,
- 0x9d8, 0x9db,
- 0x9de, 0x9de,
- 0x9e4, 0x9e5,
- 0x9fc, 0xa00,
- 0xa04, 0xa04,
- 0xa0b, 0xa0e,
- 0xa11, 0xa12,
- 0xa29, 0xa29,
- 0xa31, 0xa31,
- 0xa34, 0xa34,
- 0xa37, 0xa37,
- 0xa3a, 0xa3b,
- 0xa3d, 0xa3d,
- 0xa43, 0xa46,
- 0xa49, 0xa4a,
- 0xa4e, 0xa50,
- 0xa52, 0xa58,
- 0xa5d, 0xa5d,
- 0xa5f, 0xa65,
- 0xa76, 0xa80,
- 0xa84, 0xa84,
- 0xa8e, 0xa8e,
- 0xa92, 0xa92,
- 0xaa9, 0xaa9,
- 0xab1, 0xab1,
- 0xab4, 0xab4,
- 0xaba, 0xabb,
- 0xac6, 0xac6,
- 0xaca, 0xaca,
- 0xace, 0xacf,
- 0xad1, 0xadf,
- 0xae4, 0xae5,
- 0xaf2, 0xb00,
- 0xb04, 0xb04,
- 0xb0d, 0xb0e,
- 0xb11, 0xb12,
- 0xb29, 0xb29,
- 0xb31, 0xb31,
- 0xb34, 0xb34,
- 0xb3a, 0xb3b,
- 0xb45, 0xb46,
- 0xb49, 0xb4a,
- 0xb4e, 0xb55,
- 0xb58, 0xb5b,
- 0xb5e, 0xb5e,
- 0xb64, 0xb65,
- 0xb78, 0xb81,
- 0xb84, 0xb84,
- 0xb8b, 0xb8d,
- 0xb91, 0xb91,
- 0xb96, 0xb98,
- 0xb9b, 0xb9b,
- 0xb9d, 0xb9d,
- 0xba0, 0xba2,
- 0xba5, 0xba7,
- 0xbab, 0xbad,
- 0xbba, 0xbbd,
- 0xbc3, 0xbc5,
- 0xbc9, 0xbc9,
- 0xbce, 0xbcf,
- 0xbd1, 0xbd6,
- 0xbd8, 0xbe5,
- 0xbfb, 0xbff,
- 0xc04, 0xc04,
- 0xc0d, 0xc0d,
- 0xc11, 0xc11,
- 0xc29, 0xc29,
- 0xc3a, 0xc3c,
- 0xc45, 0xc45,
- 0xc49, 0xc49,
- 0xc4e, 0xc54,
- 0xc57, 0xc57,
- 0xc5a, 0xc5f,
- 0xc64, 0xc65,
- 0xc70, 0xc77,
- 0xc80, 0xc80,
- 0xc84, 0xc84,
- 0xc8d, 0xc8d,
- 0xc91, 0xc91,
- 0xca9, 0xca9,
- 0xcb4, 0xcb4,
- 0xcba, 0xcbb,
- 0xcc5, 0xcc5,
- 0xcc9, 0xcc9,
- 0xcce, 0xcd4,
- 0xcd7, 0xcdd,
- 0xcdf, 0xcdf,
- 0xce4, 0xce5,
- 0xcf0, 0xcf0,
- 0xcf3, 0xd00,
- 0xd04, 0xd04,
- 0xd0d, 0xd0d,
- 0xd11, 0xd11,
- 0xd3b, 0xd3c,
- 0xd45, 0xd45,
- 0xd49, 0xd49,
- 0xd4f, 0xd56,
- 0xd58, 0xd5f,
- 0xd64, 0xd65,
- 0xd76, 0xd78,
- 0xd80, 0xd81,
- 0xd84, 0xd84,
- 0xd97, 0xd99,
- 0xdb2, 0xdb2,
- 0xdbc, 0xdbc,
- 0xdbe, 0xdbf,
- 0xdc7, 0xdc9,
- 0xdcb, 0xdce,
- 0xdd5, 0xdd5,
- 0xdd7, 0xdd7,
- 0xde0, 0xde5,
- 0xdf0, 0xdf1,
- 0xdf5, 0xe00,
- 0xe3b, 0xe3e,
- 0xe5c, 0xe80,
- 0xe83, 0xe83,
- 0xe85, 0xe86,
- 0xe89, 0xe89,
- 0xe8b, 0xe8c,
- 0xe8e, 0xe93,
- 0xe98, 0xe98,
- 0xea0, 0xea0,
- 0xea4, 0xea4,
- 0xea6, 0xea6,
- 0xea8, 0xea9,
- 0xeac, 0xeac,
- 0xeba, 0xeba,
- 0xebe, 0xebf,
- 0xec5, 0xec5,
- 0xec7, 0xec7,
- 0xece, 0xecf,
- 0xeda, 0xedb,
- 0xee0, 0xeff,
- 0xf48, 0xf48,
- 0xf6d, 0xf70,
- 0xf98, 0xf98,
- 0xfbd, 0xfbd,
- 0xfcd, 0xfcd,
- 0xfdb, 0xfff,
- 0x10c6, 0x10c6,
- 0x10c8, 0x10cc,
- 0x10ce, 0x10cf,
- 0x1249, 0x1249,
- 0x124e, 0x124f,
- 0x1257, 0x1257,
- 0x1259, 0x1259,
- 0x125e, 0x125f,
- 0x1289, 0x1289,
- 0x128e, 0x128f,
- 0x12b1, 0x12b1,
- 0x12b6, 0x12b7,
- 0x12bf, 0x12bf,
- 0x12c1, 0x12c1,
- 0x12c6, 0x12c7,
- 0x12d7, 0x12d7,
- 0x1311, 0x1311,
- 0x1316, 0x1317,
- 0x135b, 0x135c,
- 0x137d, 0x137f,
- 0x139a, 0x139f,
- 0x13f5, 0x13ff,
- 0x169d, 0x169f,
- 0x16f9, 0x16ff,
- 0x170d, 0x170d,
- 0x1715, 0x171f,
- 0x1737, 0x173f,
- 0x1754, 0x175f,
- 0x176d, 0x176d,
- 0x1771, 0x1771,
- 0x1774, 0x177f,
- 0x17de, 0x17df,
- 0x17ea, 0x17ef,
- 0x17fa, 0x17ff,
- 0x180e, 0x180f,
- 0x181a, 0x181f,
- 0x1878, 0x187f,
- 0x18ab, 0x18af,
- 0x18f6, 0x18ff,
- 0x191f, 0x191f,
- 0x192c, 0x192f,
- 0x193c, 0x193f,
- 0x1941, 0x1943,
- 0x196e, 0x196f,
- 0x1975, 0x197f,
- 0x19ac, 0x19af,
- 0x19ca, 0x19cf,
- 0x19db, 0x19dd,
- 0x1a1c, 0x1a1d,
- 0x1a5f, 0x1a5f,
- 0x1a7d, 0x1a7e,
- 0x1a8a, 0x1a8f,
- 0x1a9a, 0x1a9f,
- 0x1aae, 0x1aaf,
- 0x1abf, 0x1aff,
- 0x1b4c, 0x1b4f,
- 0x1b7d, 0x1b7f,
- 0x1bf4, 0x1bfb,
- 0x1c38, 0x1c3a,
- 0x1c4a, 0x1c4c,
- 0x1c80, 0x1cbf,
- 0x1cc8, 0x1ccf,
- 0x1cf7, 0x1cf7,
- 0x1cfa, 0x1cff,
- 0x1df6, 0x1dfb,
- 0x1f16, 0x1f17,
- 0x1f1e, 0x1f1f,
- 0x1f46, 0x1f47,
- 0x1f4e, 0x1f4f,
- 0x1f58, 0x1f58,
- 0x1f5a, 0x1f5a,
- 0x1f5c, 0x1f5c,
- 0x1f5e, 0x1f5e,
- 0x1f7e, 0x1f7f,
- 0x1fb5, 0x1fb5,
- 0x1fc5, 0x1fc5,
- 0x1fd4, 0x1fd5,
- 0x1fdc, 0x1fdc,
- 0x1ff0, 0x1ff1,
- 0x1ff5, 0x1ff5,
- 0x1fff, 0x1fff,
- 0x200b, 0x200f,
- 0x202a, 0x202e,
- 0x2060, 0x206f,
- 0x2072, 0x2073,
- 0x208f, 0x208f,
- 0x209d, 0x209f,
- 0x20be, 0x20cf,
- 0x20f1, 0x20ff,
- 0x218a, 0x218f,
- 0x23fb, 0x23ff,
- 0x2427, 0x243f,
- 0x244b, 0x245f,
- 0x2b74, 0x2b75,
- 0x2b96, 0x2b97,
- 0x2bba, 0x2bbc,
- 0x2bc9, 0x2bc9,
- 0x2bd2, 0x2bff,
- 0x2c2f, 0x2c2f,
- 0x2c5f, 0x2c5f,
- 0x2cf4, 0x2cf8,
- 0x2d26, 0x2d26,
- 0x2d28, 0x2d2c,
- 0x2d2e, 0x2d2f,
- 0x2d68, 0x2d6e,
- 0x2d71, 0x2d7e,
- 0x2d97, 0x2d9f,
- 0x2da7, 0x2da7,
- 0x2daf, 0x2daf,
- 0x2db7, 0x2db7,
- 0x2dbf, 0x2dbf,
- 0x2dc7, 0x2dc7,
- 0x2dcf, 0x2dcf,
- 0x2dd7, 0x2dd7,
- 0x2ddf, 0x2ddf,
- 0x2e43, 0x2e7f,
- 0x2e9a, 0x2e9a,
- 0x2ef4, 0x2eff,
- 0x2fd6, 0x2fef,
- 0x2ffc, 0x2fff,
- 0x3040, 0x3040,
- 0x3097, 0x3098,
- 0x3100, 0x3104,
- 0x312e, 0x3130,
- 0x318f, 0x318f,
- 0x31bb, 0x31bf,
- 0x31e4, 0x31ef,
- 0x321f, 0x321f,
- 0x32ff, 0x32ff,
- 0x4db6, 0x4dbf,
- 0x9fcd, 0x9fff,
- 0xa48d, 0xa48f,
- 0xa4c7, 0xa4cf,
- 0xa62c, 0xa63f,
- 0xa69e, 0xa69e,
- 0xa6f8, 0xa6ff,
- 0xa78f, 0xa78f,
- 0xa7ae, 0xa7af,
- 0xa7b2, 0xa7f6,
- 0xa82c, 0xa82f,
- 0xa83a, 0xa83f,
- 0xa878, 0xa87f,
- 0xa8c5, 0xa8cd,
- 0xa8da, 0xa8df,
- 0xa8fc, 0xa8ff,
- 0xa954, 0xa95e,
- 0xa97d, 0xa97f,
- 0xa9ce, 0xa9ce,
- 0xa9da, 0xa9dd,
- 0xa9ff, 0xa9ff,
- 0xaa37, 0xaa3f,
- 0xaa4e, 0xaa4f,
- 0xaa5a, 0xaa5b,
- 0xaac3, 0xaada,
- 0xaaf7, 0xab00,
- 0xab07, 0xab08,
- 0xab0f, 0xab10,
- 0xab17, 0xab1f,
- 0xab27, 0xab27,
- 0xab2f, 0xab2f,
- 0xab60, 0xab63,
- 0xab66, 0xabbf,
- 0xabee, 0xabef,
- 0xabfa, 0xabff,
- 0xd7a4, 0xd7af,
- 0xd7c7, 0xd7ca,
- 0xd7fc, 0xf8ff,
- 0xfa6e, 0xfa6f,
- 0xfada, 0xfaff,
- 0xfb07, 0xfb12,
- 0xfb18, 0xfb1c,
- 0xfb37, 0xfb37,
- 0xfb3d, 0xfb3d,
- 0xfb3f, 0xfb3f,
- 0xfb42, 0xfb42,
- 0xfb45, 0xfb45,
- 0xfbc2, 0xfbd2,
- 0xfd40, 0xfd4f,
- 0xfd90, 0xfd91,
- 0xfdc8, 0xfdef,
- 0xfdfe, 0xfdff,
- 0xfe1a, 0xfe1f,
- 0xfe2e, 0xfe2f,
- 0xfe53, 0xfe53,
- 0xfe67, 0xfe67,
- 0xfe6c, 0xfe6f,
- 0xfe75, 0xfe75,
- 0xfefd, 0xff00,
- 0xffbf, 0xffc1,
- 0xffc8, 0xffc9,
- 0xffd0, 0xffd1,
- 0xffd8, 0xffd9,
- 0xffdd, 0xffdf,
- 0xffe7, 0xffe7,
- 0xffef, 0xfffb,
- 0xfffe, 0xffff,
- 0x1000c, 0x1000c,
- 0x10027, 0x10027,
- 0x1003b, 0x1003b,
- 0x1003e, 0x1003e,
- 0x1004e, 0x1004f,
- 0x1005e, 0x1007f,
- 0x100fb, 0x100ff,
- 0x10103, 0x10106,
- 0x10134, 0x10136,
- 0x1018d, 0x1018f,
- 0x1019c, 0x1019f,
- 0x101a1, 0x101cf,
- 0x101fe, 0x1027f,
- 0x1029d, 0x1029f,
- 0x102d1, 0x102df,
- 0x102fc, 0x102ff,
- 0x10324, 0x1032f,
- 0x1034b, 0x1034f,
- 0x1037b, 0x1037f,
- 0x1039e, 0x1039e,
- 0x103c4, 0x103c7,
- 0x103d6, 0x103ff,
- 0x1049e, 0x1049f,
- 0x104aa, 0x104ff,
- 0x10528, 0x1052f,
- 0x10564, 0x1056e,
- 0x10570, 0x105ff,
- 0x10737, 0x1073f,
- 0x10756, 0x1075f,
- 0x10768, 0x107ff,
- 0x10806, 0x10807,
- 0x10809, 0x10809,
- 0x10836, 0x10836,
- 0x10839, 0x1083b,
- 0x1083d, 0x1083e,
- 0x10856, 0x10856,
- 0x1089f, 0x108a6,
- 0x108b0, 0x108ff,
- 0x1091c, 0x1091e,
- 0x1093a, 0x1093e,
- 0x10940, 0x1097f,
- 0x109b8, 0x109bd,
- 0x109c0, 0x109ff,
- 0x10a04, 0x10a04,
- 0x10a07, 0x10a0b,
- 0x10a14, 0x10a14,
- 0x10a18, 0x10a18,
- 0x10a34, 0x10a37,
- 0x10a3b, 0x10a3e,
- 0x10a48, 0x10a4f,
- 0x10a59, 0x10a5f,
- 0x10aa0, 0x10abf,
- 0x10ae7, 0x10aea,
- 0x10af7, 0x10aff,
- 0x10b36, 0x10b38,
- 0x10b56, 0x10b57,
- 0x10b73, 0x10b77,
- 0x10b92, 0x10b98,
- 0x10b9d, 0x10ba8,
- 0x10bb0, 0x10bff,
- 0x10c49, 0x10e5f,
- 0x10e7f, 0x10fff,
- 0x1104e, 0x11051,
- 0x11070, 0x1107e,
- 0x110bd, 0x110bd,
- 0x110c2, 0x110cf,
- 0x110e9, 0x110ef,
- 0x110fa, 0x110ff,
- 0x11135, 0x11135,
- 0x11144, 0x1114f,
- 0x11177, 0x1117f,
- 0x111c9, 0x111cc,
- 0x111ce, 0x111cf,
- 0x111db, 0x111e0,
- 0x111f5, 0x111ff,
- 0x11212, 0x11212,
- 0x1123e, 0x112af,
- 0x112eb, 0x112ef,
- 0x112fa, 0x11300,
- 0x11304, 0x11304,
- 0x1130d, 0x1130e,
- 0x11311, 0x11312,
- 0x11329, 0x11329,
- 0x11331, 0x11331,
- 0x11334, 0x11334,
- 0x1133a, 0x1133b,
- 0x11345, 0x11346,
- 0x11349, 0x1134a,
- 0x1134e, 0x11356,
- 0x11358, 0x1135c,
- 0x11364, 0x11365,
- 0x1136d, 0x1136f,
- 0x11375, 0x1147f,
- 0x114c8, 0x114cf,
- 0x114da, 0x1157f,
- 0x115b6, 0x115b7,
- 0x115ca, 0x115ff,
- 0x11645, 0x1164f,
- 0x1165a, 0x1167f,
- 0x116b8, 0x116bf,
- 0x116ca, 0x1189f,
- 0x118f3, 0x118fe,
- 0x11900, 0x11abf,
- 0x11af9, 0x11fff,
- 0x12399, 0x123ff,
- 0x1246f, 0x1246f,
- 0x12475, 0x12fff,
- 0x1342f, 0x167ff,
- 0x16a39, 0x16a3f,
- 0x16a5f, 0x16a5f,
- 0x16a6a, 0x16a6d,
- 0x16a70, 0x16acf,
- 0x16aee, 0x16aef,
- 0x16af6, 0x16aff,
- 0x16b46, 0x16b4f,
- 0x16b5a, 0x16b5a,
- 0x16b62, 0x16b62,
- 0x16b78, 0x16b7c,
- 0x16b90, 0x16eff,
- 0x16f45, 0x16f4f,
- 0x16f7f, 0x16f8e,
- 0x16fa0, 0x1afff,
- 0x1b002, 0x1bbff,
- 0x1bc6b, 0x1bc6f,
- 0x1bc7d, 0x1bc7f,
- 0x1bc89, 0x1bc8f,
- 0x1bc9a, 0x1bc9b,
- 0x1bca0, 0x1cfff,
- 0x1d0f6, 0x1d0ff,
- 0x1d127, 0x1d128,
- 0x1d173, 0x1d17a,
- 0x1d1de, 0x1d1ff,
- 0x1d246, 0x1d2ff,
- 0x1d357, 0x1d35f,
- 0x1d372, 0x1d3ff,
- 0x1d455, 0x1d455,
- 0x1d49d, 0x1d49d,
- 0x1d4a0, 0x1d4a1,
- 0x1d4a3, 0x1d4a4,
- 0x1d4a7, 0x1d4a8,
- 0x1d4ad, 0x1d4ad,
- 0x1d4ba, 0x1d4ba,
- 0x1d4bc, 0x1d4bc,
- 0x1d4c4, 0x1d4c4,
- 0x1d506, 0x1d506,
- 0x1d50b, 0x1d50c,
- 0x1d515, 0x1d515,
- 0x1d51d, 0x1d51d,
- 0x1d53a, 0x1d53a,
- 0x1d53f, 0x1d53f,
- 0x1d545, 0x1d545,
- 0x1d547, 0x1d549,
- 0x1d551, 0x1d551,
- 0x1d6a6, 0x1d6a7,
- 0x1d7cc, 0x1d7cd,
- 0x1d800, 0x1e7ff,
- 0x1e8c5, 0x1e8c6,
- 0x1e8d7, 0x1edff,
- 0x1ee04, 0x1ee04,
- 0x1ee20, 0x1ee20,
- 0x1ee23, 0x1ee23,
- 0x1ee25, 0x1ee26,
- 0x1ee28, 0x1ee28,
- 0x1ee33, 0x1ee33,
- 0x1ee38, 0x1ee38,
- 0x1ee3a, 0x1ee3a,
- 0x1ee3c, 0x1ee41,
- 0x1ee43, 0x1ee46,
- 0x1ee48, 0x1ee48,
- 0x1ee4a, 0x1ee4a,
- 0x1ee4c, 0x1ee4c,
- 0x1ee50, 0x1ee50,
- 0x1ee53, 0x1ee53,
- 0x1ee55, 0x1ee56,
- 0x1ee58, 0x1ee58,
- 0x1ee5a, 0x1ee5a,
- 0x1ee5c, 0x1ee5c,
- 0x1ee5e, 0x1ee5e,
- 0x1ee60, 0x1ee60,
- 0x1ee63, 0x1ee63,
- 0x1ee65, 0x1ee66,
- 0x1ee6b, 0x1ee6b,
- 0x1ee73, 0x1ee73,
- 0x1ee78, 0x1ee78,
- 0x1ee7d, 0x1ee7d,
- 0x1ee7f, 0x1ee7f,
- 0x1ee8a, 0x1ee8a,
- 0x1ee9c, 0x1eea0,
- 0x1eea4, 0x1eea4,
- 0x1eeaa, 0x1eeaa,
- 0x1eebc, 0x1eeef,
- 0x1eef2, 0x1efff,
- 0x1f02c, 0x1f02f,
- 0x1f094, 0x1f09f,
- 0x1f0af, 0x1f0b0,
- 0x1f0c0, 0x1f0c0,
- 0x1f0d0, 0x1f0d0,
- 0x1f0f6, 0x1f0ff,
- 0x1f10d, 0x1f10f,
- 0x1f12f, 0x1f12f,
- 0x1f16c, 0x1f16f,
- 0x1f19b, 0x1f1e5,
- 0x1f203, 0x1f20f,
- 0x1f23b, 0x1f23f,
- 0x1f249, 0x1f24f,
- 0x1f252, 0x1f2ff,
- 0x1f32d, 0x1f32f,
- 0x1f37e, 0x1f37f,
- 0x1f3cf, 0x1f3d3,
- 0x1f3f8, 0x1f3ff,
- 0x1f4ff, 0x1f4ff,
- 0x1f54b, 0x1f54f,
- 0x1f57a, 0x1f57a,
- 0x1f5a4, 0x1f5a4,
- 0x1f643, 0x1f644,
- 0x1f6d0, 0x1f6df,
- 0x1f6ed, 0x1f6ef,
- 0x1f6f4, 0x1f6ff,
- 0x1f774, 0x1f77f,
- 0x1f7d5, 0x1f7ff,
- 0x1f80c, 0x1f80f,
- 0x1f848, 0x1f84f,
- 0x1f85a, 0x1f85f,
- 0x1f888, 0x1f88f,
- 0x1f8ae, 0x1ffff,
- 0x2a6d7, 0x2a6ff,
- 0x2b735, 0x2b73f,
- 0x2b81e, 0x2f7ff,
- 0x2fa1e, 0xe00ff,
- 0xe01f0, 0x10ffff
-};
-UCP_FN(C)
-
-static const unichar ucp_Cc_def[] = {
- 0x0, 0x1f,
- 0x7f, 0x9f
-};
-UCP_FN(Cc)
-
-static const unichar ucp_Cf_def[] = {
- 0xad, 0xad,
- 0x600, 0x605,
- 0x61c, 0x61c,
- 0x6dd, 0x6dd,
- 0x70f, 0x70f,
- 0x180e, 0x180e,
- 0x200b, 0x200f,
- 0x202a, 0x202e,
- 0x2060, 0x2064,
- 0x2066, 0x206f,
- 0xfeff, 0xfeff,
- 0xfff9, 0xfffb,
- 0x110bd, 0x110bd,
- 0x1bca0, 0x1bca3,
- 0x1d173, 0x1d17a,
- 0xe0001, 0xe0001,
- 0xe0020, 0xe007f
-};
-UCP_FN(Cf)
-
-static const unichar ucp_Cn_def[] = {
- 0x378, 0x379,
- 0x380, 0x383,
- 0x38b, 0x38b,
- 0x38d, 0x38d,
- 0x3a2, 0x3a2,
- 0x530, 0x530,
- 0x557, 0x558,
- 0x560, 0x560,
- 0x588, 0x588,
- 0x58b, 0x58c,
- 0x590, 0x590,
- 0x5c8, 0x5cf,
- 0x5eb, 0x5ef,
- 0x5f5, 0x5ff,
- 0x61d, 0x61d,
- 0x70e, 0x70e,
- 0x74b, 0x74c,
- 0x7b2, 0x7bf,
- 0x7fb, 0x7ff,
- 0x82e, 0x82f,
- 0x83f, 0x83f,
- 0x85c, 0x85d,
- 0x85f, 0x89f,
- 0x8b3, 0x8e3,
- 0x984, 0x984,
- 0x98d, 0x98e,
- 0x991, 0x992,
- 0x9a9, 0x9a9,
- 0x9b1, 0x9b1,
- 0x9b3, 0x9b5,
- 0x9ba, 0x9bb,
- 0x9c5, 0x9c6,
- 0x9c9, 0x9ca,
- 0x9cf, 0x9d6,
- 0x9d8, 0x9db,
- 0x9de, 0x9de,
- 0x9e4, 0x9e5,
- 0x9fc, 0xa00,
- 0xa04, 0xa04,
- 0xa0b, 0xa0e,
- 0xa11, 0xa12,
- 0xa29, 0xa29,
- 0xa31, 0xa31,
- 0xa34, 0xa34,
- 0xa37, 0xa37,
- 0xa3a, 0xa3b,
- 0xa3d, 0xa3d,
- 0xa43, 0xa46,
- 0xa49, 0xa4a,
- 0xa4e, 0xa50,
- 0xa52, 0xa58,
- 0xa5d, 0xa5d,
- 0xa5f, 0xa65,
- 0xa76, 0xa80,
- 0xa84, 0xa84,
- 0xa8e, 0xa8e,
- 0xa92, 0xa92,
- 0xaa9, 0xaa9,
- 0xab1, 0xab1,
- 0xab4, 0xab4,
- 0xaba, 0xabb,
- 0xac6, 0xac6,
- 0xaca, 0xaca,
- 0xace, 0xacf,
- 0xad1, 0xadf,
- 0xae4, 0xae5,
- 0xaf2, 0xb00,
- 0xb04, 0xb04,
- 0xb0d, 0xb0e,
- 0xb11, 0xb12,
- 0xb29, 0xb29,
- 0xb31, 0xb31,
- 0xb34, 0xb34,
- 0xb3a, 0xb3b,
- 0xb45, 0xb46,
- 0xb49, 0xb4a,
- 0xb4e, 0xb55,
- 0xb58, 0xb5b,
- 0xb5e, 0xb5e,
- 0xb64, 0xb65,
- 0xb78, 0xb81,
- 0xb84, 0xb84,
- 0xb8b, 0xb8d,
- 0xb91, 0xb91,
- 0xb96, 0xb98,
- 0xb9b, 0xb9b,
- 0xb9d, 0xb9d,
- 0xba0, 0xba2,
- 0xba5, 0xba7,
- 0xbab, 0xbad,
- 0xbba, 0xbbd,
- 0xbc3, 0xbc5,
- 0xbc9, 0xbc9,
- 0xbce, 0xbcf,
- 0xbd1, 0xbd6,
- 0xbd8, 0xbe5,
- 0xbfb, 0xbff,
- 0xc04, 0xc04,
- 0xc0d, 0xc0d,
- 0xc11, 0xc11,
- 0xc29, 0xc29,
- 0xc3a, 0xc3c,
- 0xc45, 0xc45,
- 0xc49, 0xc49,
- 0xc4e, 0xc54,
- 0xc57, 0xc57,
- 0xc5a, 0xc5f,
- 0xc64, 0xc65,
- 0xc70, 0xc77,
- 0xc80, 0xc80,
- 0xc84, 0xc84,
- 0xc8d, 0xc8d,
- 0xc91, 0xc91,
- 0xca9, 0xca9,
- 0xcb4, 0xcb4,
- 0xcba, 0xcbb,
- 0xcc5, 0xcc5,
- 0xcc9, 0xcc9,
- 0xcce, 0xcd4,
- 0xcd7, 0xcdd,
- 0xcdf, 0xcdf,
- 0xce4, 0xce5,
- 0xcf0, 0xcf0,
- 0xcf3, 0xd00,
- 0xd04, 0xd04,
- 0xd0d, 0xd0d,
- 0xd11, 0xd11,
- 0xd3b, 0xd3c,
- 0xd45, 0xd45,
- 0xd49, 0xd49,
- 0xd4f, 0xd56,
- 0xd58, 0xd5f,
- 0xd64, 0xd65,
- 0xd76, 0xd78,
- 0xd80, 0xd81,
- 0xd84, 0xd84,
- 0xd97, 0xd99,
- 0xdb2, 0xdb2,
- 0xdbc, 0xdbc,
- 0xdbe, 0xdbf,
- 0xdc7, 0xdc9,
- 0xdcb, 0xdce,
- 0xdd5, 0xdd5,
- 0xdd7, 0xdd7,
- 0xde0, 0xde5,
- 0xdf0, 0xdf1,
- 0xdf5, 0xe00,
- 0xe3b, 0xe3e,
- 0xe5c, 0xe80,
- 0xe83, 0xe83,
- 0xe85, 0xe86,
- 0xe89, 0xe89,
- 0xe8b, 0xe8c,
- 0xe8e, 0xe93,
- 0xe98, 0xe98,
- 0xea0, 0xea0,
- 0xea4, 0xea4,
- 0xea6, 0xea6,
- 0xea8, 0xea9,
- 0xeac, 0xeac,
- 0xeba, 0xeba,
- 0xebe, 0xebf,
- 0xec5, 0xec5,
- 0xec7, 0xec7,
- 0xece, 0xecf,
- 0xeda, 0xedb,
- 0xee0, 0xeff,
- 0xf48, 0xf48,
- 0xf6d, 0xf70,
- 0xf98, 0xf98,
- 0xfbd, 0xfbd,
- 0xfcd, 0xfcd,
- 0xfdb, 0xfff,
- 0x10c6, 0x10c6,
- 0x10c8, 0x10cc,
- 0x10ce, 0x10cf,
- 0x1249, 0x1249,
- 0x124e, 0x124f,
- 0x1257, 0x1257,
- 0x1259, 0x1259,
- 0x125e, 0x125f,
- 0x1289, 0x1289,
- 0x128e, 0x128f,
- 0x12b1, 0x12b1,
- 0x12b6, 0x12b7,
- 0x12bf, 0x12bf,
- 0x12c1, 0x12c1,
- 0x12c6, 0x12c7,
- 0x12d7, 0x12d7,
- 0x1311, 0x1311,
- 0x1316, 0x1317,
- 0x135b, 0x135c,
- 0x137d, 0x137f,
- 0x139a, 0x139f,
- 0x13f5, 0x13ff,
- 0x169d, 0x169f,
- 0x16f9, 0x16ff,
- 0x170d, 0x170d,
- 0x1715, 0x171f,
- 0x1737, 0x173f,
- 0x1754, 0x175f,
- 0x176d, 0x176d,
- 0x1771, 0x1771,
- 0x1774, 0x177f,
- 0x17de, 0x17df,
- 0x17ea, 0x17ef,
- 0x17fa, 0x17ff,
- 0x180f, 0x180f,
- 0x181a, 0x181f,
- 0x1878, 0x187f,
- 0x18ab, 0x18af,
- 0x18f6, 0x18ff,
- 0x191f, 0x191f,
- 0x192c, 0x192f,
- 0x193c, 0x193f,
- 0x1941, 0x1943,
- 0x196e, 0x196f,
- 0x1975, 0x197f,
- 0x19ac, 0x19af,
- 0x19ca, 0x19cf,
- 0x19db, 0x19dd,
- 0x1a1c, 0x1a1d,
- 0x1a5f, 0x1a5f,
- 0x1a7d, 0x1a7e,
- 0x1a8a, 0x1a8f,
- 0x1a9a, 0x1a9f,
- 0x1aae, 0x1aaf,
- 0x1abf, 0x1aff,
- 0x1b4c, 0x1b4f,
- 0x1b7d, 0x1b7f,
- 0x1bf4, 0x1bfb,
- 0x1c38, 0x1c3a,
- 0x1c4a, 0x1c4c,
- 0x1c80, 0x1cbf,
- 0x1cc8, 0x1ccf,
- 0x1cf7, 0x1cf7,
- 0x1cfa, 0x1cff,
- 0x1df6, 0x1dfb,
- 0x1f16, 0x1f17,
- 0x1f1e, 0x1f1f,
- 0x1f46, 0x1f47,
- 0x1f4e, 0x1f4f,
- 0x1f58, 0x1f58,
- 0x1f5a, 0x1f5a,
- 0x1f5c, 0x1f5c,
- 0x1f5e, 0x1f5e,
- 0x1f7e, 0x1f7f,
- 0x1fb5, 0x1fb5,
- 0x1fc5, 0x1fc5,
- 0x1fd4, 0x1fd5,
- 0x1fdc, 0x1fdc,
- 0x1ff0, 0x1ff1,
- 0x1ff5, 0x1ff5,
- 0x1fff, 0x1fff,
- 0x2065, 0x2065,
- 0x2072, 0x2073,
- 0x208f, 0x208f,
- 0x209d, 0x209f,
- 0x20be, 0x20cf,
- 0x20f1, 0x20ff,
- 0x218a, 0x218f,
- 0x23fb, 0x23ff,
- 0x2427, 0x243f,
- 0x244b, 0x245f,
- 0x2b74, 0x2b75,
- 0x2b96, 0x2b97,
- 0x2bba, 0x2bbc,
- 0x2bc9, 0x2bc9,
- 0x2bd2, 0x2bff,
- 0x2c2f, 0x2c2f,
- 0x2c5f, 0x2c5f,
- 0x2cf4, 0x2cf8,
- 0x2d26, 0x2d26,
- 0x2d28, 0x2d2c,
- 0x2d2e, 0x2d2f,
- 0x2d68, 0x2d6e,
- 0x2d71, 0x2d7e,
- 0x2d97, 0x2d9f,
- 0x2da7, 0x2da7,
- 0x2daf, 0x2daf,
- 0x2db7, 0x2db7,
- 0x2dbf, 0x2dbf,
- 0x2dc7, 0x2dc7,
- 0x2dcf, 0x2dcf,
- 0x2dd7, 0x2dd7,
- 0x2ddf, 0x2ddf,
- 0x2e43, 0x2e7f,
- 0x2e9a, 0x2e9a,
- 0x2ef4, 0x2eff,
- 0x2fd6, 0x2fef,
- 0x2ffc, 0x2fff,
- 0x3040, 0x3040,
- 0x3097, 0x3098,
- 0x3100, 0x3104,
- 0x312e, 0x3130,
- 0x318f, 0x318f,
- 0x31bb, 0x31bf,
- 0x31e4, 0x31ef,
- 0x321f, 0x321f,
- 0x32ff, 0x32ff,
- 0x4db6, 0x4dbf,
- 0x9fcd, 0x9fff,
- 0xa48d, 0xa48f,
- 0xa4c7, 0xa4cf,
- 0xa62c, 0xa63f,
- 0xa69e, 0xa69e,
- 0xa6f8, 0xa6ff,
- 0xa78f, 0xa78f,
- 0xa7ae, 0xa7af,
- 0xa7b2, 0xa7f6,
- 0xa82c, 0xa82f,
- 0xa83a, 0xa83f,
- 0xa878, 0xa87f,
- 0xa8c5, 0xa8cd,
- 0xa8da, 0xa8df,
- 0xa8fc, 0xa8ff,
- 0xa954, 0xa95e,
- 0xa97d, 0xa97f,
- 0xa9ce, 0xa9ce,
- 0xa9da, 0xa9dd,
- 0xa9ff, 0xa9ff,
- 0xaa37, 0xaa3f,
- 0xaa4e, 0xaa4f,
- 0xaa5a, 0xaa5b,
- 0xaac3, 0xaada,
- 0xaaf7, 0xab00,
- 0xab07, 0xab08,
- 0xab0f, 0xab10,
- 0xab17, 0xab1f,
- 0xab27, 0xab27,
- 0xab2f, 0xab2f,
- 0xab60, 0xab63,
- 0xab66, 0xabbf,
- 0xabee, 0xabef,
- 0xabfa, 0xabff,
- 0xd7a4, 0xd7af,
- 0xd7c7, 0xd7ca,
- 0xd7fc, 0xd7ff,
- 0xfa6e, 0xfa6f,
- 0xfada, 0xfaff,
- 0xfb07, 0xfb12,
- 0xfb18, 0xfb1c,
- 0xfb37, 0xfb37,
- 0xfb3d, 0xfb3d,
- 0xfb3f, 0xfb3f,
- 0xfb42, 0xfb42,
- 0xfb45, 0xfb45,
- 0xfbc2, 0xfbd2,
- 0xfd40, 0xfd4f,
- 0xfd90, 0xfd91,
- 0xfdc8, 0xfdef,
- 0xfdfe, 0xfdff,
- 0xfe1a, 0xfe1f,
- 0xfe2e, 0xfe2f,
- 0xfe53, 0xfe53,
- 0xfe67, 0xfe67,
- 0xfe6c, 0xfe6f,
- 0xfe75, 0xfe75,
- 0xfefd, 0xfefe,
- 0xff00, 0xff00,
- 0xffbf, 0xffc1,
- 0xffc8, 0xffc9,
- 0xffd0, 0xffd1,
- 0xffd8, 0xffd9,
- 0xffdd, 0xffdf,
- 0xffe7, 0xffe7,
- 0xffef, 0xfff8,
- 0xfffe, 0xffff,
- 0x1000c, 0x1000c,
- 0x10027, 0x10027,
- 0x1003b, 0x1003b,
- 0x1003e, 0x1003e,
- 0x1004e, 0x1004f,
- 0x1005e, 0x1007f,
- 0x100fb, 0x100ff,
- 0x10103, 0x10106,
- 0x10134, 0x10136,
- 0x1018d, 0x1018f,
- 0x1019c, 0x1019f,
- 0x101a1, 0x101cf,
- 0x101fe, 0x1027f,
- 0x1029d, 0x1029f,
- 0x102d1, 0x102df,
- 0x102fc, 0x102ff,
- 0x10324, 0x1032f,
- 0x1034b, 0x1034f,
- 0x1037b, 0x1037f,
- 0x1039e, 0x1039e,
- 0x103c4, 0x103c7,
- 0x103d6, 0x103ff,
- 0x1049e, 0x1049f,
- 0x104aa, 0x104ff,
- 0x10528, 0x1052f,
- 0x10564, 0x1056e,
- 0x10570, 0x105ff,
- 0x10737, 0x1073f,
- 0x10756, 0x1075f,
- 0x10768, 0x107ff,
- 0x10806, 0x10807,
- 0x10809, 0x10809,
- 0x10836, 0x10836,
- 0x10839, 0x1083b,
- 0x1083d, 0x1083e,
- 0x10856, 0x10856,
- 0x1089f, 0x108a6,
- 0x108b0, 0x108ff,
- 0x1091c, 0x1091e,
- 0x1093a, 0x1093e,
- 0x10940, 0x1097f,
- 0x109b8, 0x109bd,
- 0x109c0, 0x109ff,
- 0x10a04, 0x10a04,
- 0x10a07, 0x10a0b,
- 0x10a14, 0x10a14,
- 0x10a18, 0x10a18,
- 0x10a34, 0x10a37,
- 0x10a3b, 0x10a3e,
- 0x10a48, 0x10a4f,
- 0x10a59, 0x10a5f,
- 0x10aa0, 0x10abf,
- 0x10ae7, 0x10aea,
- 0x10af7, 0x10aff,
- 0x10b36, 0x10b38,
- 0x10b56, 0x10b57,
- 0x10b73, 0x10b77,
- 0x10b92, 0x10b98,
- 0x10b9d, 0x10ba8,
- 0x10bb0, 0x10bff,
- 0x10c49, 0x10e5f,
- 0x10e7f, 0x10fff,
- 0x1104e, 0x11051,
- 0x11070, 0x1107e,
- 0x110c2, 0x110cf,
- 0x110e9, 0x110ef,
- 0x110fa, 0x110ff,
- 0x11135, 0x11135,
- 0x11144, 0x1114f,
- 0x11177, 0x1117f,
- 0x111c9, 0x111cc,
- 0x111ce, 0x111cf,
- 0x111db, 0x111e0,
- 0x111f5, 0x111ff,
- 0x11212, 0x11212,
- 0x1123e, 0x112af,
- 0x112eb, 0x112ef,
- 0x112fa, 0x11300,
- 0x11304, 0x11304,
- 0x1130d, 0x1130e,
- 0x11311, 0x11312,
- 0x11329, 0x11329,
- 0x11331, 0x11331,
- 0x11334, 0x11334,
- 0x1133a, 0x1133b,
- 0x11345, 0x11346,
- 0x11349, 0x1134a,
- 0x1134e, 0x11356,
- 0x11358, 0x1135c,
- 0x11364, 0x11365,
- 0x1136d, 0x1136f,
- 0x11375, 0x1147f,
- 0x114c8, 0x114cf,
- 0x114da, 0x1157f,
- 0x115b6, 0x115b7,
- 0x115ca, 0x115ff,
- 0x11645, 0x1164f,
- 0x1165a, 0x1167f,
- 0x116b8, 0x116bf,
- 0x116ca, 0x1189f,
- 0x118f3, 0x118fe,
- 0x11900, 0x11abf,
- 0x11af9, 0x11fff,
- 0x12399, 0x123ff,
- 0x1246f, 0x1246f,
- 0x12475, 0x12fff,
- 0x1342f, 0x167ff,
- 0x16a39, 0x16a3f,
- 0x16a5f, 0x16a5f,
- 0x16a6a, 0x16a6d,
- 0x16a70, 0x16acf,
- 0x16aee, 0x16aef,
- 0x16af6, 0x16aff,
- 0x16b46, 0x16b4f,
- 0x16b5a, 0x16b5a,
- 0x16b62, 0x16b62,
- 0x16b78, 0x16b7c,
- 0x16b90, 0x16eff,
- 0x16f45, 0x16f4f,
- 0x16f7f, 0x16f8e,
- 0x16fa0, 0x1afff,
- 0x1b002, 0x1bbff,
- 0x1bc6b, 0x1bc6f,
- 0x1bc7d, 0x1bc7f,
- 0x1bc89, 0x1bc8f,
- 0x1bc9a, 0x1bc9b,
- 0x1bca4, 0x1cfff,
- 0x1d0f6, 0x1d0ff,
- 0x1d127, 0x1d128,
- 0x1d1de, 0x1d1ff,
- 0x1d246, 0x1d2ff,
- 0x1d357, 0x1d35f,
- 0x1d372, 0x1d3ff,
- 0x1d455, 0x1d455,
- 0x1d49d, 0x1d49d,
- 0x1d4a0, 0x1d4a1,
- 0x1d4a3, 0x1d4a4,
- 0x1d4a7, 0x1d4a8,
- 0x1d4ad, 0x1d4ad,
- 0x1d4ba, 0x1d4ba,
- 0x1d4bc, 0x1d4bc,
- 0x1d4c4, 0x1d4c4,
- 0x1d506, 0x1d506,
- 0x1d50b, 0x1d50c,
- 0x1d515, 0x1d515,
- 0x1d51d, 0x1d51d,
- 0x1d53a, 0x1d53a,
- 0x1d53f, 0x1d53f,
- 0x1d545, 0x1d545,
- 0x1d547, 0x1d549,
- 0x1d551, 0x1d551,
- 0x1d6a6, 0x1d6a7,
- 0x1d7cc, 0x1d7cd,
- 0x1d800, 0x1e7ff,
- 0x1e8c5, 0x1e8c6,
- 0x1e8d7, 0x1edff,
- 0x1ee04, 0x1ee04,
- 0x1ee20, 0x1ee20,
- 0x1ee23, 0x1ee23,
- 0x1ee25, 0x1ee26,
- 0x1ee28, 0x1ee28,
- 0x1ee33, 0x1ee33,
- 0x1ee38, 0x1ee38,
- 0x1ee3a, 0x1ee3a,
- 0x1ee3c, 0x1ee41,
- 0x1ee43, 0x1ee46,
- 0x1ee48, 0x1ee48,
- 0x1ee4a, 0x1ee4a,
- 0x1ee4c, 0x1ee4c,
- 0x1ee50, 0x1ee50,
- 0x1ee53, 0x1ee53,
- 0x1ee55, 0x1ee56,
- 0x1ee58, 0x1ee58,
- 0x1ee5a, 0x1ee5a,
- 0x1ee5c, 0x1ee5c,
- 0x1ee5e, 0x1ee5e,
- 0x1ee60, 0x1ee60,
- 0x1ee63, 0x1ee63,
- 0x1ee65, 0x1ee66,
- 0x1ee6b, 0x1ee6b,
- 0x1ee73, 0x1ee73,
- 0x1ee78, 0x1ee78,
- 0x1ee7d, 0x1ee7d,
- 0x1ee7f, 0x1ee7f,
- 0x1ee8a, 0x1ee8a,
- 0x1ee9c, 0x1eea0,
- 0x1eea4, 0x1eea4,
- 0x1eeaa, 0x1eeaa,
- 0x1eebc, 0x1eeef,
- 0x1eef2, 0x1efff,
- 0x1f02c, 0x1f02f,
- 0x1f094, 0x1f09f,
- 0x1f0af, 0x1f0b0,
- 0x1f0c0, 0x1f0c0,
- 0x1f0d0, 0x1f0d0,
- 0x1f0f6, 0x1f0ff,
- 0x1f10d, 0x1f10f,
- 0x1f12f, 0x1f12f,
- 0x1f16c, 0x1f16f,
- 0x1f19b, 0x1f1e5,
- 0x1f203, 0x1f20f,
- 0x1f23b, 0x1f23f,
- 0x1f249, 0x1f24f,
- 0x1f252, 0x1f2ff,
- 0x1f32d, 0x1f32f,
- 0x1f37e, 0x1f37f,
- 0x1f3cf, 0x1f3d3,
- 0x1f3f8, 0x1f3ff,
- 0x1f4ff, 0x1f4ff,
- 0x1f54b, 0x1f54f,
- 0x1f57a, 0x1f57a,
- 0x1f5a4, 0x1f5a4,
- 0x1f643, 0x1f644,
- 0x1f6d0, 0x1f6df,
- 0x1f6ed, 0x1f6ef,
- 0x1f6f4, 0x1f6ff,
- 0x1f774, 0x1f77f,
- 0x1f7d5, 0x1f7ff,
- 0x1f80c, 0x1f80f,
- 0x1f848, 0x1f84f,
- 0x1f85a, 0x1f85f,
- 0x1f888, 0x1f88f,
- 0x1f8ae, 0x1ffff,
- 0x2a6d7, 0x2a6ff,
- 0x2b735, 0x2b73f,
- 0x2b81e, 0x2f7ff,
- 0x2fa1e, 0xe0000,
- 0xe0002, 0xe001f,
- 0xe0080, 0xe00ff,
- 0xe01f0, 0xeffff,
- 0xffffe, 0xfffff,
- 0x10fffe, 0x10ffff
-};
-UCP_FN(Cn)
-
-static const unichar ucp_Co_def[] = {
- 0xe000, 0xf8ff,
- 0xf0000, 0xffffd,
- 0x100000, 0x10fffd
-};
-UCP_FN(Co)
-
-static const unichar ucp_Cs_def[] = {
- 0xd800, 0xdfff
-};
-UCP_FN(Cs)
-
-static const unichar ucp_L_def[] = {
- 0x41, 0x5a,
- 0x61, 0x7a,
- 0xaa, 0xaa,
- 0xb5, 0xb5,
- 0xba, 0xba,
- 0xc0, 0xd6,
- 0xd8, 0xf6,
- 0xf8, 0x2c1,
- 0x2c6, 0x2d1,
- 0x2e0, 0x2e4,
- 0x2ec, 0x2ec,
- 0x2ee, 0x2ee,
- 0x370, 0x374,
- 0x376, 0x377,
- 0x37a, 0x37d,
- 0x37f, 0x37f,
- 0x386, 0x386,
- 0x388, 0x38a,
- 0x38c, 0x38c,
- 0x38e, 0x3a1,
- 0x3a3, 0x3f5,
- 0x3f7, 0x481,
- 0x48a, 0x52f,
- 0x531, 0x556,
- 0x559, 0x559,
- 0x561, 0x587,
- 0x5d0, 0x5ea,
- 0x5f0, 0x5f2,
- 0x620, 0x64a,
- 0x66e, 0x66f,
- 0x671, 0x6d3,
- 0x6d5, 0x6d5,
- 0x6e5, 0x6e6,
- 0x6ee, 0x6ef,
- 0x6fa, 0x6fc,
- 0x6ff, 0x6ff,
- 0x710, 0x710,
- 0x712, 0x72f,
- 0x74d, 0x7a5,
- 0x7b1, 0x7b1,
- 0x7ca, 0x7ea,
- 0x7f4, 0x7f5,
- 0x7fa, 0x7fa,
- 0x800, 0x815,
- 0x81a, 0x81a,
- 0x824, 0x824,
- 0x828, 0x828,
- 0x840, 0x858,
- 0x8a0, 0x8b2,
- 0x904, 0x939,
- 0x93d, 0x93d,
- 0x950, 0x950,
- 0x958, 0x961,
- 0x971, 0x980,
- 0x985, 0x98c,
- 0x98f, 0x990,
- 0x993, 0x9a8,
- 0x9aa, 0x9b0,
- 0x9b2, 0x9b2,
- 0x9b6, 0x9b9,
- 0x9bd, 0x9bd,
- 0x9ce, 0x9ce,
- 0x9dc, 0x9dd,
- 0x9df, 0x9e1,
- 0x9f0, 0x9f1,
- 0xa05, 0xa0a,
- 0xa0f, 0xa10,
- 0xa13, 0xa28,
- 0xa2a, 0xa30,
- 0xa32, 0xa33,
- 0xa35, 0xa36,
- 0xa38, 0xa39,
- 0xa59, 0xa5c,
- 0xa5e, 0xa5e,
- 0xa72, 0xa74,
- 0xa85, 0xa8d,
- 0xa8f, 0xa91,
- 0xa93, 0xaa8,
- 0xaaa, 0xab0,
- 0xab2, 0xab3,
- 0xab5, 0xab9,
- 0xabd, 0xabd,
- 0xad0, 0xad0,
- 0xae0, 0xae1,
- 0xb05, 0xb0c,
- 0xb0f, 0xb10,
- 0xb13, 0xb28,
- 0xb2a, 0xb30,
- 0xb32, 0xb33,
- 0xb35, 0xb39,
- 0xb3d, 0xb3d,
- 0xb5c, 0xb5d,
- 0xb5f, 0xb61,
- 0xb71, 0xb71,
- 0xb83, 0xb83,
- 0xb85, 0xb8a,
- 0xb8e, 0xb90,
- 0xb92, 0xb95,
- 0xb99, 0xb9a,
- 0xb9c, 0xb9c,
- 0xb9e, 0xb9f,
- 0xba3, 0xba4,
- 0xba8, 0xbaa,
- 0xbae, 0xbb9,
- 0xbd0, 0xbd0,
- 0xc05, 0xc0c,
- 0xc0e, 0xc10,
- 0xc12, 0xc28,
- 0xc2a, 0xc39,
- 0xc3d, 0xc3d,
- 0xc58, 0xc59,
- 0xc60, 0xc61,
- 0xc85, 0xc8c,
- 0xc8e, 0xc90,
- 0xc92, 0xca8,
- 0xcaa, 0xcb3,
- 0xcb5, 0xcb9,
- 0xcbd, 0xcbd,
- 0xcde, 0xcde,
- 0xce0, 0xce1,
- 0xcf1, 0xcf2,
- 0xd05, 0xd0c,
- 0xd0e, 0xd10,
- 0xd12, 0xd3a,
- 0xd3d, 0xd3d,
- 0xd4e, 0xd4e,
- 0xd60, 0xd61,
- 0xd7a, 0xd7f,
- 0xd85, 0xd96,
- 0xd9a, 0xdb1,
- 0xdb3, 0xdbb,
- 0xdbd, 0xdbd,
- 0xdc0, 0xdc6,
- 0xe01, 0xe30,
- 0xe32, 0xe33,
- 0xe40, 0xe46,
- 0xe81, 0xe82,
- 0xe84, 0xe84,
- 0xe87, 0xe88,
- 0xe8a, 0xe8a,
- 0xe8d, 0xe8d,
- 0xe94, 0xe97,
- 0xe99, 0xe9f,
- 0xea1, 0xea3,
- 0xea5, 0xea5,
- 0xea7, 0xea7,
- 0xeaa, 0xeab,
- 0xead, 0xeb0,
- 0xeb2, 0xeb3,
- 0xebd, 0xebd,
- 0xec0, 0xec4,
- 0xec6, 0xec6,
- 0xedc, 0xedf,
- 0xf00, 0xf00,
- 0xf40, 0xf47,
- 0xf49, 0xf6c,
- 0xf88, 0xf8c,
- 0x1000, 0x102a,
- 0x103f, 0x103f,
- 0x1050, 0x1055,
- 0x105a, 0x105d,
- 0x1061, 0x1061,
- 0x1065, 0x1066,
- 0x106e, 0x1070,
- 0x1075, 0x1081,
- 0x108e, 0x108e,
- 0x10a0, 0x10c5,
- 0x10c7, 0x10c7,
- 0x10cd, 0x10cd,
- 0x10d0, 0x10fa,
- 0x10fc, 0x1248,
- 0x124a, 0x124d,
- 0x1250, 0x1256,
- 0x1258, 0x1258,
- 0x125a, 0x125d,
- 0x1260, 0x1288,
- 0x128a, 0x128d,
- 0x1290, 0x12b0,
- 0x12b2, 0x12b5,
- 0x12b8, 0x12be,
- 0x12c0, 0x12c0,
- 0x12c2, 0x12c5,
- 0x12c8, 0x12d6,
- 0x12d8, 0x1310,
- 0x1312, 0x1315,
- 0x1318, 0x135a,
- 0x1380, 0x138f,
- 0x13a0, 0x13f4,
- 0x1401, 0x166c,
- 0x166f, 0x167f,
- 0x1681, 0x169a,
- 0x16a0, 0x16ea,
- 0x16f1, 0x16f8,
- 0x1700, 0x170c,
- 0x170e, 0x1711,
- 0x1720, 0x1731,
- 0x1740, 0x1751,
- 0x1760, 0x176c,
- 0x176e, 0x1770,
- 0x1780, 0x17b3,
- 0x17d7, 0x17d7,
- 0x17dc, 0x17dc,
- 0x1820, 0x1877,
- 0x1880, 0x18a8,
- 0x18aa, 0x18aa,
- 0x18b0, 0x18f5,
- 0x1900, 0x191e,
- 0x1950, 0x196d,
- 0x1970, 0x1974,
- 0x1980, 0x19ab,
- 0x19c1, 0x19c7,
- 0x1a00, 0x1a16,
- 0x1a20, 0x1a54,
- 0x1aa7, 0x1aa7,
- 0x1b05, 0x1b33,
- 0x1b45, 0x1b4b,
- 0x1b83, 0x1ba0,
- 0x1bae, 0x1baf,
- 0x1bba, 0x1be5,
- 0x1c00, 0x1c23,
- 0x1c4d, 0x1c4f,
- 0x1c5a, 0x1c7d,
- 0x1ce9, 0x1cec,
- 0x1cee, 0x1cf1,
- 0x1cf5, 0x1cf6,
- 0x1d00, 0x1dbf,
- 0x1e00, 0x1f15,
- 0x1f18, 0x1f1d,
- 0x1f20, 0x1f45,
- 0x1f48, 0x1f4d,
- 0x1f50, 0x1f57,
- 0x1f59, 0x1f59,
- 0x1f5b, 0x1f5b,
- 0x1f5d, 0x1f5d,
- 0x1f5f, 0x1f7d,
- 0x1f80, 0x1fb4,
- 0x1fb6, 0x1fbc,
- 0x1fbe, 0x1fbe,
- 0x1fc2, 0x1fc4,
- 0x1fc6, 0x1fcc,
- 0x1fd0, 0x1fd3,
- 0x1fd6, 0x1fdb,
- 0x1fe0, 0x1fec,
- 0x1ff2, 0x1ff4,
- 0x1ff6, 0x1ffc,
- 0x2071, 0x2071,
- 0x207f, 0x207f,
- 0x2090, 0x209c,
- 0x2102, 0x2102,
- 0x2107, 0x2107,
- 0x210a, 0x2113,
- 0x2115, 0x2115,
- 0x2119, 0x211d,
- 0x2124, 0x2124,
- 0x2126, 0x2126,
- 0x2128, 0x2128,
- 0x212a, 0x212d,
- 0x212f, 0x2139,
- 0x213c, 0x213f,
- 0x2145, 0x2149,
- 0x214e, 0x214e,
- 0x2183, 0x2184,
- 0x2c00, 0x2c2e,
- 0x2c30, 0x2c5e,
- 0x2c60, 0x2ce4,
- 0x2ceb, 0x2cee,
- 0x2cf2, 0x2cf3,
- 0x2d00, 0x2d25,
- 0x2d27, 0x2d27,
- 0x2d2d, 0x2d2d,
- 0x2d30, 0x2d67,
- 0x2d6f, 0x2d6f,
- 0x2d80, 0x2d96,
- 0x2da0, 0x2da6,
- 0x2da8, 0x2dae,
- 0x2db0, 0x2db6,
- 0x2db8, 0x2dbe,
- 0x2dc0, 0x2dc6,
- 0x2dc8, 0x2dce,
- 0x2dd0, 0x2dd6,
- 0x2dd8, 0x2dde,
- 0x2e2f, 0x2e2f,
- 0x3005, 0x3006,
- 0x3031, 0x3035,
- 0x303b, 0x303c,
- 0x3041, 0x3096,
- 0x309d, 0x309f,
- 0x30a1, 0x30fa,
- 0x30fc, 0x30ff,
- 0x3105, 0x312d,
- 0x3131, 0x318e,
- 0x31a0, 0x31ba,
- 0x31f0, 0x31ff,
- 0x3400, 0x4db5,
- 0x4e00, 0x9fcc,
- 0xa000, 0xa48c,
- 0xa4d0, 0xa4fd,
- 0xa500, 0xa60c,
- 0xa610, 0xa61f,
- 0xa62a, 0xa62b,
- 0xa640, 0xa66e,
- 0xa67f, 0xa69d,
- 0xa6a0, 0xa6e5,
- 0xa717, 0xa71f,
- 0xa722, 0xa788,
- 0xa78b, 0xa78e,
- 0xa790, 0xa7ad,
- 0xa7b0, 0xa7b1,
- 0xa7f7, 0xa801,
- 0xa803, 0xa805,
- 0xa807, 0xa80a,
- 0xa80c, 0xa822,
- 0xa840, 0xa873,
- 0xa882, 0xa8b3,
- 0xa8f2, 0xa8f7,
- 0xa8fb, 0xa8fb,
- 0xa90a, 0xa925,
- 0xa930, 0xa946,
- 0xa960, 0xa97c,
- 0xa984, 0xa9b2,
- 0xa9cf, 0xa9cf,
- 0xa9e0, 0xa9e4,
- 0xa9e6, 0xa9ef,
- 0xa9fa, 0xa9fe,
- 0xaa00, 0xaa28,
- 0xaa40, 0xaa42,
- 0xaa44, 0xaa4b,
- 0xaa60, 0xaa76,
- 0xaa7a, 0xaa7a,
- 0xaa7e, 0xaaaf,
- 0xaab1, 0xaab1,
- 0xaab5, 0xaab6,
- 0xaab9, 0xaabd,
- 0xaac0, 0xaac0,
- 0xaac2, 0xaac2,
- 0xaadb, 0xaadd,
- 0xaae0, 0xaaea,
- 0xaaf2, 0xaaf4,
- 0xab01, 0xab06,
- 0xab09, 0xab0e,
- 0xab11, 0xab16,
- 0xab20, 0xab26,
- 0xab28, 0xab2e,
- 0xab30, 0xab5a,
- 0xab5c, 0xab5f,
- 0xab64, 0xab65,
- 0xabc0, 0xabe2,
- 0xac00, 0xd7a3,
- 0xd7b0, 0xd7c6,
- 0xd7cb, 0xd7fb,
- 0xf900, 0xfa6d,
- 0xfa70, 0xfad9,
- 0xfb00, 0xfb06,
- 0xfb13, 0xfb17,
- 0xfb1d, 0xfb1d,
- 0xfb1f, 0xfb28,
- 0xfb2a, 0xfb36,
- 0xfb38, 0xfb3c,
- 0xfb3e, 0xfb3e,
- 0xfb40, 0xfb41,
- 0xfb43, 0xfb44,
- 0xfb46, 0xfbb1,
- 0xfbd3, 0xfd3d,
- 0xfd50, 0xfd8f,
- 0xfd92, 0xfdc7,
- 0xfdf0, 0xfdfb,
- 0xfe70, 0xfe74,
- 0xfe76, 0xfefc,
- 0xff21, 0xff3a,
- 0xff41, 0xff5a,
- 0xff66, 0xffbe,
- 0xffc2, 0xffc7,
- 0xffca, 0xffcf,
- 0xffd2, 0xffd7,
- 0xffda, 0xffdc,
- 0x10000, 0x1000b,
- 0x1000d, 0x10026,
- 0x10028, 0x1003a,
- 0x1003c, 0x1003d,
- 0x1003f, 0x1004d,
- 0x10050, 0x1005d,
- 0x10080, 0x100fa,
- 0x10280, 0x1029c,
- 0x102a0, 0x102d0,
- 0x10300, 0x1031f,
- 0x10330, 0x10340,
- 0x10342, 0x10349,
- 0x10350, 0x10375,
- 0x10380, 0x1039d,
- 0x103a0, 0x103c3,
- 0x103c8, 0x103cf,
- 0x10400, 0x1049d,
- 0x10500, 0x10527,
- 0x10530, 0x10563,
- 0x10600, 0x10736,
- 0x10740, 0x10755,
- 0x10760, 0x10767,
- 0x10800, 0x10805,
- 0x10808, 0x10808,
- 0x1080a, 0x10835,
- 0x10837, 0x10838,
- 0x1083c, 0x1083c,
- 0x1083f, 0x10855,
- 0x10860, 0x10876,
- 0x10880, 0x1089e,
- 0x10900, 0x10915,
- 0x10920, 0x10939,
- 0x10980, 0x109b7,
- 0x109be, 0x109bf,
- 0x10a00, 0x10a00,
- 0x10a10, 0x10a13,
- 0x10a15, 0x10a17,
- 0x10a19, 0x10a33,
- 0x10a60, 0x10a7c,
- 0x10a80, 0x10a9c,
- 0x10ac0, 0x10ac7,
- 0x10ac9, 0x10ae4,
- 0x10b00, 0x10b35,
- 0x10b40, 0x10b55,
- 0x10b60, 0x10b72,
- 0x10b80, 0x10b91,
- 0x10c00, 0x10c48,
- 0x11003, 0x11037,
- 0x11083, 0x110af,
- 0x110d0, 0x110e8,
- 0x11103, 0x11126,
- 0x11150, 0x11172,
- 0x11176, 0x11176,
- 0x11183, 0x111b2,
- 0x111c1, 0x111c4,
- 0x111da, 0x111da,
- 0x11200, 0x11211,
- 0x11213, 0x1122b,
- 0x112b0, 0x112de,
- 0x11305, 0x1130c,
- 0x1130f, 0x11310,
- 0x11313, 0x11328,
- 0x1132a, 0x11330,
- 0x11332, 0x11333,
- 0x11335, 0x11339,
- 0x1133d, 0x1133d,
- 0x1135d, 0x11361,
- 0x11480, 0x114af,
- 0x114c4, 0x114c5,
- 0x114c7, 0x114c7,
- 0x11580, 0x115ae,
- 0x11600, 0x1162f,
- 0x11644, 0x11644,
- 0x11680, 0x116aa,
- 0x118a0, 0x118df,
- 0x118ff, 0x118ff,
- 0x11ac0, 0x11af8,
- 0x12000, 0x12398,
- 0x13000, 0x1342e,
- 0x16800, 0x16a38,
- 0x16a40, 0x16a5e,
- 0x16ad0, 0x16aed,
- 0x16b00, 0x16b2f,
- 0x16b40, 0x16b43,
- 0x16b63, 0x16b77,
- 0x16b7d, 0x16b8f,
- 0x16f00, 0x16f44,
- 0x16f50, 0x16f50,
- 0x16f93, 0x16f9f,
- 0x1b000, 0x1b001,
- 0x1bc00, 0x1bc6a,
- 0x1bc70, 0x1bc7c,
- 0x1bc80, 0x1bc88,
- 0x1bc90, 0x1bc99,
- 0x1d400, 0x1d454,
- 0x1d456, 0x1d49c,
- 0x1d49e, 0x1d49f,
- 0x1d4a2, 0x1d4a2,
- 0x1d4a5, 0x1d4a6,
- 0x1d4a9, 0x1d4ac,
- 0x1d4ae, 0x1d4b9,
- 0x1d4bb, 0x1d4bb,
- 0x1d4bd, 0x1d4c3,
- 0x1d4c5, 0x1d505,
- 0x1d507, 0x1d50a,
- 0x1d50d, 0x1d514,
- 0x1d516, 0x1d51c,
- 0x1d51e, 0x1d539,
- 0x1d53b, 0x1d53e,
- 0x1d540, 0x1d544,
- 0x1d546, 0x1d546,
- 0x1d54a, 0x1d550,
- 0x1d552, 0x1d6a5,
- 0x1d6a8, 0x1d6c0,
- 0x1d6c2, 0x1d6da,
- 0x1d6dc, 0x1d6fa,
- 0x1d6fc, 0x1d714,
- 0x1d716, 0x1d734,
- 0x1d736, 0x1d74e,
- 0x1d750, 0x1d76e,
- 0x1d770, 0x1d788,
- 0x1d78a, 0x1d7a8,
- 0x1d7aa, 0x1d7c2,
- 0x1d7c4, 0x1d7cb,
- 0x1e800, 0x1e8c4,
- 0x1ee00, 0x1ee03,
- 0x1ee05, 0x1ee1f,
- 0x1ee21, 0x1ee22,
- 0x1ee24, 0x1ee24,
- 0x1ee27, 0x1ee27,
- 0x1ee29, 0x1ee32,
- 0x1ee34, 0x1ee37,
- 0x1ee39, 0x1ee39,
- 0x1ee3b, 0x1ee3b,
- 0x1ee42, 0x1ee42,
- 0x1ee47, 0x1ee47,
- 0x1ee49, 0x1ee49,
- 0x1ee4b, 0x1ee4b,
- 0x1ee4d, 0x1ee4f,
- 0x1ee51, 0x1ee52,
- 0x1ee54, 0x1ee54,
- 0x1ee57, 0x1ee57,
- 0x1ee59, 0x1ee59,
- 0x1ee5b, 0x1ee5b,
- 0x1ee5d, 0x1ee5d,
- 0x1ee5f, 0x1ee5f,
- 0x1ee61, 0x1ee62,
- 0x1ee64, 0x1ee64,
- 0x1ee67, 0x1ee6a,
- 0x1ee6c, 0x1ee72,
- 0x1ee74, 0x1ee77,
- 0x1ee79, 0x1ee7c,
- 0x1ee7e, 0x1ee7e,
- 0x1ee80, 0x1ee89,
- 0x1ee8b, 0x1ee9b,
- 0x1eea1, 0x1eea3,
- 0x1eea5, 0x1eea9,
- 0x1eeab, 0x1eebb,
- 0x20000, 0x2a6d6,
- 0x2a700, 0x2b734,
- 0x2b740, 0x2b81d,
- 0x2f800, 0x2fa1d
-};
-UCP_FN(L)
-
-static const unichar ucp_L_and_def[] = {
- 0x41, 0x5a,
- 0x61, 0x7a,
- 0xb5, 0xb5,
- 0xc0, 0xd6,
- 0xd8, 0xf6,
- 0xf8, 0x1ba,
- 0x1bc, 0x1bf,
- 0x1c4, 0x293,
- 0x295, 0x2af,
- 0x370, 0x373,
- 0x376, 0x377,
- 0x37b, 0x37d,
- 0x37f, 0x37f,
- 0x386, 0x386,
- 0x388, 0x38a,
- 0x38c, 0x38c,
- 0x38e, 0x3a1,
- 0x3a3, 0x3f5,
- 0x3f7, 0x481,
- 0x48a, 0x52f,
- 0x531, 0x556,
- 0x561, 0x587,
- 0x10a0, 0x10c5,
- 0x10c7, 0x10c7,
- 0x10cd, 0x10cd,
- 0x1d00, 0x1d2b,
- 0x1d6b, 0x1d77,
- 0x1d79, 0x1d9a,
- 0x1e00, 0x1f15,
- 0x1f18, 0x1f1d,
- 0x1f20, 0x1f45,
- 0x1f48, 0x1f4d,
- 0x1f50, 0x1f57,
- 0x1f59, 0x1f59,
- 0x1f5b, 0x1f5b,
- 0x1f5d, 0x1f5d,
- 0x1f5f, 0x1f7d,
- 0x1f80, 0x1fb4,
- 0x1fb6, 0x1fbc,
- 0x1fbe, 0x1fbe,
- 0x1fc2, 0x1fc4,
- 0x1fc6, 0x1fcc,
- 0x1fd0, 0x1fd3,
- 0x1fd6, 0x1fdb,
- 0x1fe0, 0x1fec,
- 0x1ff2, 0x1ff4,
- 0x1ff6, 0x1ffc,
- 0x2102, 0x2102,
- 0x2107, 0x2107,
- 0x210a, 0x2113,
- 0x2115, 0x2115,
- 0x2119, 0x211d,
- 0x2124, 0x2124,
- 0x2126, 0x2126,
- 0x2128, 0x2128,
- 0x212a, 0x212d,
- 0x212f, 0x2134,
- 0x2139, 0x2139,
- 0x213c, 0x213f,
- 0x2145, 0x2149,
- 0x214e, 0x214e,
- 0x2183, 0x2184,
- 0x2c00, 0x2c2e,
- 0x2c30, 0x2c5e,
- 0x2c60, 0x2c7b,
- 0x2c7e, 0x2ce4,
- 0x2ceb, 0x2cee,
- 0x2cf2, 0x2cf3,
- 0x2d00, 0x2d25,
- 0x2d27, 0x2d27,
- 0x2d2d, 0x2d2d,
- 0xa640, 0xa66d,
- 0xa680, 0xa69b,
- 0xa722, 0xa76f,
- 0xa771, 0xa787,
- 0xa78b, 0xa78e,
- 0xa790, 0xa7ad,
- 0xa7b0, 0xa7b1,
- 0xa7fa, 0xa7fa,
- 0xab30, 0xab5a,
- 0xab64, 0xab65,
- 0xfb00, 0xfb06,
- 0xfb13, 0xfb17,
- 0xff21, 0xff3a,
- 0xff41, 0xff5a,
- 0x10400, 0x1044f,
- 0x118a0, 0x118df,
- 0x1d400, 0x1d454,
- 0x1d456, 0x1d49c,
- 0x1d49e, 0x1d49f,
- 0x1d4a2, 0x1d4a2,
- 0x1d4a5, 0x1d4a6,
- 0x1d4a9, 0x1d4ac,
- 0x1d4ae, 0x1d4b9,
- 0x1d4bb, 0x1d4bb,
- 0x1d4bd, 0x1d4c3,
- 0x1d4c5, 0x1d505,
- 0x1d507, 0x1d50a,
- 0x1d50d, 0x1d514,
- 0x1d516, 0x1d51c,
- 0x1d51e, 0x1d539,
- 0x1d53b, 0x1d53e,
- 0x1d540, 0x1d544,
- 0x1d546, 0x1d546,
- 0x1d54a, 0x1d550,
- 0x1d552, 0x1d6a5,
- 0x1d6a8, 0x1d6c0,
- 0x1d6c2, 0x1d6da,
- 0x1d6dc, 0x1d6fa,
- 0x1d6fc, 0x1d714,
- 0x1d716, 0x1d734,
- 0x1d736, 0x1d74e,
- 0x1d750, 0x1d76e,
- 0x1d770, 0x1d788,
- 0x1d78a, 0x1d7a8,
- 0x1d7aa, 0x1d7c2,
- 0x1d7c4, 0x1d7cb
-};
-UCP_FN(L_and)
-
-static const unichar ucp_Ll_def[] = {
- 0x61, 0x7a,
- 0xb5, 0xb5,
- 0xdf, 0xf6,
- 0xf8, 0xff,
- 0x101, 0x101,
- 0x103, 0x103,
- 0x105, 0x105,
- 0x107, 0x107,
- 0x109, 0x109,
- 0x10b, 0x10b,
- 0x10d, 0x10d,
- 0x10f, 0x10f,
- 0x111, 0x111,
- 0x113, 0x113,
- 0x115, 0x115,
- 0x117, 0x117,
- 0x119, 0x119,
- 0x11b, 0x11b,
- 0x11d, 0x11d,
- 0x11f, 0x11f,
- 0x121, 0x121,
- 0x123, 0x123,
- 0x125, 0x125,
- 0x127, 0x127,
- 0x129, 0x129,
- 0x12b, 0x12b,
- 0x12d, 0x12d,
- 0x12f, 0x12f,
- 0x131, 0x131,
- 0x133, 0x133,
- 0x135, 0x135,
- 0x137, 0x138,
- 0x13a, 0x13a,
- 0x13c, 0x13c,
- 0x13e, 0x13e,
- 0x140, 0x140,
- 0x142, 0x142,
- 0x144, 0x144,
- 0x146, 0x146,
- 0x148, 0x149,
- 0x14b, 0x14b,
- 0x14d, 0x14d,
- 0x14f, 0x14f,
- 0x151, 0x151,
- 0x153, 0x153,
- 0x155, 0x155,
- 0x157, 0x157,
- 0x159, 0x159,
- 0x15b, 0x15b,
- 0x15d, 0x15d,
- 0x15f, 0x15f,
- 0x161, 0x161,
- 0x163, 0x163,
- 0x165, 0x165,
- 0x167, 0x167,
- 0x169, 0x169,
- 0x16b, 0x16b,
- 0x16d, 0x16d,
- 0x16f, 0x16f,
- 0x171, 0x171,
- 0x173, 0x173,
- 0x175, 0x175,
- 0x177, 0x177,
- 0x17a, 0x17a,
- 0x17c, 0x17c,
- 0x17e, 0x180,
- 0x183, 0x183,
- 0x185, 0x185,
- 0x188, 0x188,
- 0x18c, 0x18d,
- 0x192, 0x192,
- 0x195, 0x195,
- 0x199, 0x19b,
- 0x19e, 0x19e,
- 0x1a1, 0x1a1,
- 0x1a3, 0x1a3,
- 0x1a5, 0x1a5,
- 0x1a8, 0x1a8,
- 0x1aa, 0x1ab,
- 0x1ad, 0x1ad,
- 0x1b0, 0x1b0,
- 0x1b4, 0x1b4,
- 0x1b6, 0x1b6,
- 0x1b9, 0x1ba,
- 0x1bd, 0x1bf,
- 0x1c6, 0x1c6,
- 0x1c9, 0x1c9,
- 0x1cc, 0x1cc,
- 0x1ce, 0x1ce,
- 0x1d0, 0x1d0,
- 0x1d2, 0x1d2,
- 0x1d4, 0x1d4,
- 0x1d6, 0x1d6,
- 0x1d8, 0x1d8,
- 0x1da, 0x1da,
- 0x1dc, 0x1dd,
- 0x1df, 0x1df,
- 0x1e1, 0x1e1,
- 0x1e3, 0x1e3,
- 0x1e5, 0x1e5,
- 0x1e7, 0x1e7,
- 0x1e9, 0x1e9,
- 0x1eb, 0x1eb,
- 0x1ed, 0x1ed,
- 0x1ef, 0x1f0,
- 0x1f3, 0x1f3,
- 0x1f5, 0x1f5,
- 0x1f9, 0x1f9,
- 0x1fb, 0x1fb,
- 0x1fd, 0x1fd,
- 0x1ff, 0x1ff,
- 0x201, 0x201,
- 0x203, 0x203,
- 0x205, 0x205,
- 0x207, 0x207,
- 0x209, 0x209,
- 0x20b, 0x20b,
- 0x20d, 0x20d,
- 0x20f, 0x20f,
- 0x211, 0x211,
- 0x213, 0x213,
- 0x215, 0x215,
- 0x217, 0x217,
- 0x219, 0x219,
- 0x21b, 0x21b,
- 0x21d, 0x21d,
- 0x21f, 0x21f,
- 0x221, 0x221,
- 0x223, 0x223,
- 0x225, 0x225,
- 0x227, 0x227,
- 0x229, 0x229,
- 0x22b, 0x22b,
- 0x22d, 0x22d,
- 0x22f, 0x22f,
- 0x231, 0x231,
- 0x233, 0x239,
- 0x23c, 0x23c,
- 0x23f, 0x240,
- 0x242, 0x242,
- 0x247, 0x247,
- 0x249, 0x249,
- 0x24b, 0x24b,
- 0x24d, 0x24d,
- 0x24f, 0x293,
- 0x295, 0x2af,
- 0x371, 0x371,
- 0x373, 0x373,
- 0x377, 0x377,
- 0x37b, 0x37d,
- 0x390, 0x390,
- 0x3ac, 0x3ce,
- 0x3d0, 0x3d1,
- 0x3d5, 0x3d7,
- 0x3d9, 0x3d9,
- 0x3db, 0x3db,
- 0x3dd, 0x3dd,
- 0x3df, 0x3df,
- 0x3e1, 0x3e1,
- 0x3e3, 0x3e3,
- 0x3e5, 0x3e5,
- 0x3e7, 0x3e7,
- 0x3e9, 0x3e9,
- 0x3eb, 0x3eb,
- 0x3ed, 0x3ed,
- 0x3ef, 0x3f3,
- 0x3f5, 0x3f5,
- 0x3f8, 0x3f8,
- 0x3fb, 0x3fc,
- 0x430, 0x45f,
- 0x461, 0x461,
- 0x463, 0x463,
- 0x465, 0x465,
- 0x467, 0x467,
- 0x469, 0x469,
- 0x46b, 0x46b,
- 0x46d, 0x46d,
- 0x46f, 0x46f,
- 0x471, 0x471,
- 0x473, 0x473,
- 0x475, 0x475,
- 0x477, 0x477,
- 0x479, 0x479,
- 0x47b, 0x47b,
- 0x47d, 0x47d,
- 0x47f, 0x47f,
- 0x481, 0x481,
- 0x48b, 0x48b,
- 0x48d, 0x48d,
- 0x48f, 0x48f,
- 0x491, 0x491,
- 0x493, 0x493,
- 0x495, 0x495,
- 0x497, 0x497,
- 0x499, 0x499,
- 0x49b, 0x49b,
- 0x49d, 0x49d,
- 0x49f, 0x49f,
- 0x4a1, 0x4a1,
- 0x4a3, 0x4a3,
- 0x4a5, 0x4a5,
- 0x4a7, 0x4a7,
- 0x4a9, 0x4a9,
- 0x4ab, 0x4ab,
- 0x4ad, 0x4ad,
- 0x4af, 0x4af,
- 0x4b1, 0x4b1,
- 0x4b3, 0x4b3,
- 0x4b5, 0x4b5,
- 0x4b7, 0x4b7,
- 0x4b9, 0x4b9,
- 0x4bb, 0x4bb,
- 0x4bd, 0x4bd,
- 0x4bf, 0x4bf,
- 0x4c2, 0x4c2,
- 0x4c4, 0x4c4,
- 0x4c6, 0x4c6,
- 0x4c8, 0x4c8,
- 0x4ca, 0x4ca,
- 0x4cc, 0x4cc,
- 0x4ce, 0x4cf,
- 0x4d1, 0x4d1,
- 0x4d3, 0x4d3,
- 0x4d5, 0x4d5,
- 0x4d7, 0x4d7,
- 0x4d9, 0x4d9,
- 0x4db, 0x4db,
- 0x4dd, 0x4dd,
- 0x4df, 0x4df,
- 0x4e1, 0x4e1,
- 0x4e3, 0x4e3,
- 0x4e5, 0x4e5,
- 0x4e7, 0x4e7,
- 0x4e9, 0x4e9,
- 0x4eb, 0x4eb,
- 0x4ed, 0x4ed,
- 0x4ef, 0x4ef,
- 0x4f1, 0x4f1,
- 0x4f3, 0x4f3,
- 0x4f5, 0x4f5,
- 0x4f7, 0x4f7,
- 0x4f9, 0x4f9,
- 0x4fb, 0x4fb,
- 0x4fd, 0x4fd,
- 0x4ff, 0x4ff,
- 0x501, 0x501,
- 0x503, 0x503,
- 0x505, 0x505,
- 0x507, 0x507,
- 0x509, 0x509,
- 0x50b, 0x50b,
- 0x50d, 0x50d,
- 0x50f, 0x50f,
- 0x511, 0x511,
- 0x513, 0x513,
- 0x515, 0x515,
- 0x517, 0x517,
- 0x519, 0x519,
- 0x51b, 0x51b,
- 0x51d, 0x51d,
- 0x51f, 0x51f,
- 0x521, 0x521,
- 0x523, 0x523,
- 0x525, 0x525,
- 0x527, 0x527,
- 0x529, 0x529,
- 0x52b, 0x52b,
- 0x52d, 0x52d,
- 0x52f, 0x52f,
- 0x561, 0x587,
- 0x1d00, 0x1d2b,
- 0x1d6b, 0x1d77,
- 0x1d79, 0x1d9a,
- 0x1e01, 0x1e01,
- 0x1e03, 0x1e03,
- 0x1e05, 0x1e05,
- 0x1e07, 0x1e07,
- 0x1e09, 0x1e09,
- 0x1e0b, 0x1e0b,
- 0x1e0d, 0x1e0d,
- 0x1e0f, 0x1e0f,
- 0x1e11, 0x1e11,
- 0x1e13, 0x1e13,
- 0x1e15, 0x1e15,
- 0x1e17, 0x1e17,
- 0x1e19, 0x1e19,
- 0x1e1b, 0x1e1b,
- 0x1e1d, 0x1e1d,
- 0x1e1f, 0x1e1f,
- 0x1e21, 0x1e21,
- 0x1e23, 0x1e23,
- 0x1e25, 0x1e25,
- 0x1e27, 0x1e27,
- 0x1e29, 0x1e29,
- 0x1e2b, 0x1e2b,
- 0x1e2d, 0x1e2d,
- 0x1e2f, 0x1e2f,
- 0x1e31, 0x1e31,
- 0x1e33, 0x1e33,
- 0x1e35, 0x1e35,
- 0x1e37, 0x1e37,
- 0x1e39, 0x1e39,
- 0x1e3b, 0x1e3b,
- 0x1e3d, 0x1e3d,
- 0x1e3f, 0x1e3f,
- 0x1e41, 0x1e41,
- 0x1e43, 0x1e43,
- 0x1e45, 0x1e45,
- 0x1e47, 0x1e47,
- 0x1e49, 0x1e49,
- 0x1e4b, 0x1e4b,
- 0x1e4d, 0x1e4d,
- 0x1e4f, 0x1e4f,
- 0x1e51, 0x1e51,
- 0x1e53, 0x1e53,
- 0x1e55, 0x1e55,
- 0x1e57, 0x1e57,
- 0x1e59, 0x1e59,
- 0x1e5b, 0x1e5b,
- 0x1e5d, 0x1e5d,
- 0x1e5f, 0x1e5f,
- 0x1e61, 0x1e61,
- 0x1e63, 0x1e63,
- 0x1e65, 0x1e65,
- 0x1e67, 0x1e67,
- 0x1e69, 0x1e69,
- 0x1e6b, 0x1e6b,
- 0x1e6d, 0x1e6d,
- 0x1e6f, 0x1e6f,
- 0x1e71, 0x1e71,
- 0x1e73, 0x1e73,
- 0x1e75, 0x1e75,
- 0x1e77, 0x1e77,
- 0x1e79, 0x1e79,
- 0x1e7b, 0x1e7b,
- 0x1e7d, 0x1e7d,
- 0x1e7f, 0x1e7f,
- 0x1e81, 0x1e81,
- 0x1e83, 0x1e83,
- 0x1e85, 0x1e85,
- 0x1e87, 0x1e87,
- 0x1e89, 0x1e89,
- 0x1e8b, 0x1e8b,
- 0x1e8d, 0x1e8d,
- 0x1e8f, 0x1e8f,
- 0x1e91, 0x1e91,
- 0x1e93, 0x1e93,
- 0x1e95, 0x1e9d,
- 0x1e9f, 0x1e9f,
- 0x1ea1, 0x1ea1,
- 0x1ea3, 0x1ea3,
- 0x1ea5, 0x1ea5,
- 0x1ea7, 0x1ea7,
- 0x1ea9, 0x1ea9,
- 0x1eab, 0x1eab,
- 0x1ead, 0x1ead,
- 0x1eaf, 0x1eaf,
- 0x1eb1, 0x1eb1,
- 0x1eb3, 0x1eb3,
- 0x1eb5, 0x1eb5,
- 0x1eb7, 0x1eb7,
- 0x1eb9, 0x1eb9,
- 0x1ebb, 0x1ebb,
- 0x1ebd, 0x1ebd,
- 0x1ebf, 0x1ebf,
- 0x1ec1, 0x1ec1,
- 0x1ec3, 0x1ec3,
- 0x1ec5, 0x1ec5,
- 0x1ec7, 0x1ec7,
- 0x1ec9, 0x1ec9,
- 0x1ecb, 0x1ecb,
- 0x1ecd, 0x1ecd,
- 0x1ecf, 0x1ecf,
- 0x1ed1, 0x1ed1,
- 0x1ed3, 0x1ed3,
- 0x1ed5, 0x1ed5,
- 0x1ed7, 0x1ed7,
- 0x1ed9, 0x1ed9,
- 0x1edb, 0x1edb,
- 0x1edd, 0x1edd,
- 0x1edf, 0x1edf,
- 0x1ee1, 0x1ee1,
- 0x1ee3, 0x1ee3,
- 0x1ee5, 0x1ee5,
- 0x1ee7, 0x1ee7,
- 0x1ee9, 0x1ee9,
- 0x1eeb, 0x1eeb,
- 0x1eed, 0x1eed,
- 0x1eef, 0x1eef,
- 0x1ef1, 0x1ef1,
- 0x1ef3, 0x1ef3,
- 0x1ef5, 0x1ef5,
- 0x1ef7, 0x1ef7,
- 0x1ef9, 0x1ef9,
- 0x1efb, 0x1efb,
- 0x1efd, 0x1efd,
- 0x1eff, 0x1f07,
- 0x1f10, 0x1f15,
- 0x1f20, 0x1f27,
- 0x1f30, 0x1f37,
- 0x1f40, 0x1f45,
- 0x1f50, 0x1f57,
- 0x1f60, 0x1f67,
- 0x1f70, 0x1f7d,
- 0x1f80, 0x1f87,
- 0x1f90, 0x1f97,
- 0x1fa0, 0x1fa7,
- 0x1fb0, 0x1fb4,
- 0x1fb6, 0x1fb7,
- 0x1fbe, 0x1fbe,
- 0x1fc2, 0x1fc4,
- 0x1fc6, 0x1fc7,
- 0x1fd0, 0x1fd3,
- 0x1fd6, 0x1fd7,
- 0x1fe0, 0x1fe7,
- 0x1ff2, 0x1ff4,
- 0x1ff6, 0x1ff7,
- 0x210a, 0x210a,
- 0x210e, 0x210f,
- 0x2113, 0x2113,
- 0x212f, 0x212f,
- 0x2134, 0x2134,
- 0x2139, 0x2139,
- 0x213c, 0x213d,
- 0x2146, 0x2149,
- 0x214e, 0x214e,
- 0x2184, 0x2184,
- 0x2c30, 0x2c5e,
- 0x2c61, 0x2c61,
- 0x2c65, 0x2c66,
- 0x2c68, 0x2c68,
- 0x2c6a, 0x2c6a,
- 0x2c6c, 0x2c6c,
- 0x2c71, 0x2c71,
- 0x2c73, 0x2c74,
- 0x2c76, 0x2c7b,
- 0x2c81, 0x2c81,
- 0x2c83, 0x2c83,
- 0x2c85, 0x2c85,
- 0x2c87, 0x2c87,
- 0x2c89, 0x2c89,
- 0x2c8b, 0x2c8b,
- 0x2c8d, 0x2c8d,
- 0x2c8f, 0x2c8f,
- 0x2c91, 0x2c91,
- 0x2c93, 0x2c93,
- 0x2c95, 0x2c95,
- 0x2c97, 0x2c97,
- 0x2c99, 0x2c99,
- 0x2c9b, 0x2c9b,
- 0x2c9d, 0x2c9d,
- 0x2c9f, 0x2c9f,
- 0x2ca1, 0x2ca1,
- 0x2ca3, 0x2ca3,
- 0x2ca5, 0x2ca5,
- 0x2ca7, 0x2ca7,
- 0x2ca9, 0x2ca9,
- 0x2cab, 0x2cab,
- 0x2cad, 0x2cad,
- 0x2caf, 0x2caf,
- 0x2cb1, 0x2cb1,
- 0x2cb3, 0x2cb3,
- 0x2cb5, 0x2cb5,
- 0x2cb7, 0x2cb7,
- 0x2cb9, 0x2cb9,
- 0x2cbb, 0x2cbb,
- 0x2cbd, 0x2cbd,
- 0x2cbf, 0x2cbf,
- 0x2cc1, 0x2cc1,
- 0x2cc3, 0x2cc3,
- 0x2cc5, 0x2cc5,
- 0x2cc7, 0x2cc7,
- 0x2cc9, 0x2cc9,
- 0x2ccb, 0x2ccb,
- 0x2ccd, 0x2ccd,
- 0x2ccf, 0x2ccf,
- 0x2cd1, 0x2cd1,
- 0x2cd3, 0x2cd3,
- 0x2cd5, 0x2cd5,
- 0x2cd7, 0x2cd7,
- 0x2cd9, 0x2cd9,
- 0x2cdb, 0x2cdb,
- 0x2cdd, 0x2cdd,
- 0x2cdf, 0x2cdf,
- 0x2ce1, 0x2ce1,
- 0x2ce3, 0x2ce4,
- 0x2cec, 0x2cec,
- 0x2cee, 0x2cee,
- 0x2cf3, 0x2cf3,
- 0x2d00, 0x2d25,
- 0x2d27, 0x2d27,
- 0x2d2d, 0x2d2d,
- 0xa641, 0xa641,
- 0xa643, 0xa643,
- 0xa645, 0xa645,
- 0xa647, 0xa647,
- 0xa649, 0xa649,
- 0xa64b, 0xa64b,
- 0xa64d, 0xa64d,
- 0xa64f, 0xa64f,
- 0xa651, 0xa651,
- 0xa653, 0xa653,
- 0xa655, 0xa655,
- 0xa657, 0xa657,
- 0xa659, 0xa659,
- 0xa65b, 0xa65b,
- 0xa65d, 0xa65d,
- 0xa65f, 0xa65f,
- 0xa661, 0xa661,
- 0xa663, 0xa663,
- 0xa665, 0xa665,
- 0xa667, 0xa667,
- 0xa669, 0xa669,
- 0xa66b, 0xa66b,
- 0xa66d, 0xa66d,
- 0xa681, 0xa681,
- 0xa683, 0xa683,
- 0xa685, 0xa685,
- 0xa687, 0xa687,
- 0xa689, 0xa689,
- 0xa68b, 0xa68b,
- 0xa68d, 0xa68d,
- 0xa68f, 0xa68f,
- 0xa691, 0xa691,
- 0xa693, 0xa693,
- 0xa695, 0xa695,
- 0xa697, 0xa697,
- 0xa699, 0xa699,
- 0xa69b, 0xa69b,
- 0xa723, 0xa723,
- 0xa725, 0xa725,
- 0xa727, 0xa727,
- 0xa729, 0xa729,
- 0xa72b, 0xa72b,
- 0xa72d, 0xa72d,
- 0xa72f, 0xa731,
- 0xa733, 0xa733,
- 0xa735, 0xa735,
- 0xa737, 0xa737,
- 0xa739, 0xa739,
- 0xa73b, 0xa73b,
- 0xa73d, 0xa73d,
- 0xa73f, 0xa73f,
- 0xa741, 0xa741,
- 0xa743, 0xa743,
- 0xa745, 0xa745,
- 0xa747, 0xa747,
- 0xa749, 0xa749,
- 0xa74b, 0xa74b,
- 0xa74d, 0xa74d,
- 0xa74f, 0xa74f,
- 0xa751, 0xa751,
- 0xa753, 0xa753,
- 0xa755, 0xa755,
- 0xa757, 0xa757,
- 0xa759, 0xa759,
- 0xa75b, 0xa75b,
- 0xa75d, 0xa75d,
- 0xa75f, 0xa75f,
- 0xa761, 0xa761,
- 0xa763, 0xa763,
- 0xa765, 0xa765,
- 0xa767, 0xa767,
- 0xa769, 0xa769,
- 0xa76b, 0xa76b,
- 0xa76d, 0xa76d,
- 0xa76f, 0xa76f,
- 0xa771, 0xa778,
- 0xa77a, 0xa77a,
- 0xa77c, 0xa77c,
- 0xa77f, 0xa77f,
- 0xa781, 0xa781,
- 0xa783, 0xa783,
- 0xa785, 0xa785,
- 0xa787, 0xa787,
- 0xa78c, 0xa78c,
- 0xa78e, 0xa78e,
- 0xa791, 0xa791,
- 0xa793, 0xa795,
- 0xa797, 0xa797,
- 0xa799, 0xa799,
- 0xa79b, 0xa79b,
- 0xa79d, 0xa79d,
- 0xa79f, 0xa79f,
- 0xa7a1, 0xa7a1,
- 0xa7a3, 0xa7a3,
- 0xa7a5, 0xa7a5,
- 0xa7a7, 0xa7a7,
- 0xa7a9, 0xa7a9,
- 0xa7fa, 0xa7fa,
- 0xab30, 0xab5a,
- 0xab64, 0xab65,
- 0xfb00, 0xfb06,
- 0xfb13, 0xfb17,
- 0xff41, 0xff5a,
- 0x10428, 0x1044f,
- 0x118c0, 0x118df,
- 0x1d41a, 0x1d433,
- 0x1d44e, 0x1d454,
- 0x1d456, 0x1d467,
- 0x1d482, 0x1d49b,
- 0x1d4b6, 0x1d4b9,
- 0x1d4bb, 0x1d4bb,
- 0x1d4bd, 0x1d4c3,
- 0x1d4c5, 0x1d4cf,
- 0x1d4ea, 0x1d503,
- 0x1d51e, 0x1d537,
- 0x1d552, 0x1d56b,
- 0x1d586, 0x1d59f,
- 0x1d5ba, 0x1d5d3,
- 0x1d5ee, 0x1d607,
- 0x1d622, 0x1d63b,
- 0x1d656, 0x1d66f,
- 0x1d68a, 0x1d6a5,
- 0x1d6c2, 0x1d6da,
- 0x1d6dc, 0x1d6e1,
- 0x1d6fc, 0x1d714,
- 0x1d716, 0x1d71b,
- 0x1d736, 0x1d74e,
- 0x1d750, 0x1d755,
- 0x1d770, 0x1d788,
- 0x1d78a, 0x1d78f,
- 0x1d7aa, 0x1d7c2,
- 0x1d7c4, 0x1d7c9,
- 0x1d7cb, 0x1d7cb
-};
-UCP_FN(Ll)
-
-static const unichar ucp_Lm_def[] = {
- 0x2b0, 0x2c1,
- 0x2c6, 0x2d1,
- 0x2e0, 0x2e4,
- 0x2ec, 0x2ec,
- 0x2ee, 0x2ee,
- 0x374, 0x374,
- 0x37a, 0x37a,
- 0x559, 0x559,
- 0x640, 0x640,
- 0x6e5, 0x6e6,
- 0x7f4, 0x7f5,
- 0x7fa, 0x7fa,
- 0x81a, 0x81a,
- 0x824, 0x824,
- 0x828, 0x828,
- 0x971, 0x971,
- 0xe46, 0xe46,
- 0xec6, 0xec6,
- 0x10fc, 0x10fc,
- 0x17d7, 0x17d7,
- 0x1843, 0x1843,
- 0x1aa7, 0x1aa7,
- 0x1c78, 0x1c7d,
- 0x1d2c, 0x1d6a,
- 0x1d78, 0x1d78,
- 0x1d9b, 0x1dbf,
- 0x2071, 0x2071,
- 0x207f, 0x207f,
- 0x2090, 0x209c,
- 0x2c7c, 0x2c7d,
- 0x2d6f, 0x2d6f,
- 0x2e2f, 0x2e2f,
- 0x3005, 0x3005,
- 0x3031, 0x3035,
- 0x303b, 0x303b,
- 0x309d, 0x309e,
- 0x30fc, 0x30fe,
- 0xa015, 0xa015,
- 0xa4f8, 0xa4fd,
- 0xa60c, 0xa60c,
- 0xa67f, 0xa67f,
- 0xa69c, 0xa69d,
- 0xa717, 0xa71f,
- 0xa770, 0xa770,
- 0xa788, 0xa788,
- 0xa7f8, 0xa7f9,
- 0xa9cf, 0xa9cf,
- 0xa9e6, 0xa9e6,
- 0xaa70, 0xaa70,
- 0xaadd, 0xaadd,
- 0xaaf3, 0xaaf4,
- 0xab5c, 0xab5f,
- 0xff70, 0xff70,
- 0xff9e, 0xff9f,
- 0x16b40, 0x16b43,
- 0x16f93, 0x16f9f
-};
-UCP_FN(Lm)
-
-static const unichar ucp_Lo_def[] = {
- 0xaa, 0xaa,
- 0xba, 0xba,
- 0x1bb, 0x1bb,
- 0x1c0, 0x1c3,
- 0x294, 0x294,
- 0x5d0, 0x5ea,
- 0x5f0, 0x5f2,
- 0x620, 0x63f,
- 0x641, 0x64a,
- 0x66e, 0x66f,
- 0x671, 0x6d3,
- 0x6d5, 0x6d5,
- 0x6ee, 0x6ef,
- 0x6fa, 0x6fc,
- 0x6ff, 0x6ff,
- 0x710, 0x710,
- 0x712, 0x72f,
- 0x74d, 0x7a5,
- 0x7b1, 0x7b1,
- 0x7ca, 0x7ea,
- 0x800, 0x815,
- 0x840, 0x858,
- 0x8a0, 0x8b2,
- 0x904, 0x939,
- 0x93d, 0x93d,
- 0x950, 0x950,
- 0x958, 0x961,
- 0x972, 0x980,
- 0x985, 0x98c,
- 0x98f, 0x990,
- 0x993, 0x9a8,
- 0x9aa, 0x9b0,
- 0x9b2, 0x9b2,
- 0x9b6, 0x9b9,
- 0x9bd, 0x9bd,
- 0x9ce, 0x9ce,
- 0x9dc, 0x9dd,
- 0x9df, 0x9e1,
- 0x9f0, 0x9f1,
- 0xa05, 0xa0a,
- 0xa0f, 0xa10,
- 0xa13, 0xa28,
- 0xa2a, 0xa30,
- 0xa32, 0xa33,
- 0xa35, 0xa36,
- 0xa38, 0xa39,
- 0xa59, 0xa5c,
- 0xa5e, 0xa5e,
- 0xa72, 0xa74,
- 0xa85, 0xa8d,
- 0xa8f, 0xa91,
- 0xa93, 0xaa8,
- 0xaaa, 0xab0,
- 0xab2, 0xab3,
- 0xab5, 0xab9,
- 0xabd, 0xabd,
- 0xad0, 0xad0,
- 0xae0, 0xae1,
- 0xb05, 0xb0c,
- 0xb0f, 0xb10,
- 0xb13, 0xb28,
- 0xb2a, 0xb30,
- 0xb32, 0xb33,
- 0xb35, 0xb39,
- 0xb3d, 0xb3d,
- 0xb5c, 0xb5d,
- 0xb5f, 0xb61,
- 0xb71, 0xb71,
- 0xb83, 0xb83,
- 0xb85, 0xb8a,
- 0xb8e, 0xb90,
- 0xb92, 0xb95,
- 0xb99, 0xb9a,
- 0xb9c, 0xb9c,
- 0xb9e, 0xb9f,
- 0xba3, 0xba4,
- 0xba8, 0xbaa,
- 0xbae, 0xbb9,
- 0xbd0, 0xbd0,
- 0xc05, 0xc0c,
- 0xc0e, 0xc10,
- 0xc12, 0xc28,
- 0xc2a, 0xc39,
- 0xc3d, 0xc3d,
- 0xc58, 0xc59,
- 0xc60, 0xc61,
- 0xc85, 0xc8c,
- 0xc8e, 0xc90,
- 0xc92, 0xca8,
- 0xcaa, 0xcb3,
- 0xcb5, 0xcb9,
- 0xcbd, 0xcbd,
- 0xcde, 0xcde,
- 0xce0, 0xce1,
- 0xcf1, 0xcf2,
- 0xd05, 0xd0c,
- 0xd0e, 0xd10,
- 0xd12, 0xd3a,
- 0xd3d, 0xd3d,
- 0xd4e, 0xd4e,
- 0xd60, 0xd61,
- 0xd7a, 0xd7f,
- 0xd85, 0xd96,
- 0xd9a, 0xdb1,
- 0xdb3, 0xdbb,
- 0xdbd, 0xdbd,
- 0xdc0, 0xdc6,
- 0xe01, 0xe30,
- 0xe32, 0xe33,
- 0xe40, 0xe45,
- 0xe81, 0xe82,
- 0xe84, 0xe84,
- 0xe87, 0xe88,
- 0xe8a, 0xe8a,
- 0xe8d, 0xe8d,
- 0xe94, 0xe97,
- 0xe99, 0xe9f,
- 0xea1, 0xea3,
- 0xea5, 0xea5,
- 0xea7, 0xea7,
- 0xeaa, 0xeab,
- 0xead, 0xeb0,
- 0xeb2, 0xeb3,
- 0xebd, 0xebd,
- 0xec0, 0xec4,
- 0xedc, 0xedf,
- 0xf00, 0xf00,
- 0xf40, 0xf47,
- 0xf49, 0xf6c,
- 0xf88, 0xf8c,
- 0x1000, 0x102a,
- 0x103f, 0x103f,
- 0x1050, 0x1055,
- 0x105a, 0x105d,
- 0x1061, 0x1061,
- 0x1065, 0x1066,
- 0x106e, 0x1070,
- 0x1075, 0x1081,
- 0x108e, 0x108e,
- 0x10d0, 0x10fa,
- 0x10fd, 0x1248,
- 0x124a, 0x124d,
- 0x1250, 0x1256,
- 0x1258, 0x1258,
- 0x125a, 0x125d,
- 0x1260, 0x1288,
- 0x128a, 0x128d,
- 0x1290, 0x12b0,
- 0x12b2, 0x12b5,
- 0x12b8, 0x12be,
- 0x12c0, 0x12c0,
- 0x12c2, 0x12c5,
- 0x12c8, 0x12d6,
- 0x12d8, 0x1310,
- 0x1312, 0x1315,
- 0x1318, 0x135a,
- 0x1380, 0x138f,
- 0x13a0, 0x13f4,
- 0x1401, 0x166c,
- 0x166f, 0x167f,
- 0x1681, 0x169a,
- 0x16a0, 0x16ea,
- 0x16f1, 0x16f8,
- 0x1700, 0x170c,
- 0x170e, 0x1711,
- 0x1720, 0x1731,
- 0x1740, 0x1751,
- 0x1760, 0x176c,
- 0x176e, 0x1770,
- 0x1780, 0x17b3,
- 0x17dc, 0x17dc,
- 0x1820, 0x1842,
- 0x1844, 0x1877,
- 0x1880, 0x18a8,
- 0x18aa, 0x18aa,
- 0x18b0, 0x18f5,
- 0x1900, 0x191e,
- 0x1950, 0x196d,
- 0x1970, 0x1974,
- 0x1980, 0x19ab,
- 0x19c1, 0x19c7,
- 0x1a00, 0x1a16,
- 0x1a20, 0x1a54,
- 0x1b05, 0x1b33,
- 0x1b45, 0x1b4b,
- 0x1b83, 0x1ba0,
- 0x1bae, 0x1baf,
- 0x1bba, 0x1be5,
- 0x1c00, 0x1c23,
- 0x1c4d, 0x1c4f,
- 0x1c5a, 0x1c77,
- 0x1ce9, 0x1cec,
- 0x1cee, 0x1cf1,
- 0x1cf5, 0x1cf6,
- 0x2135, 0x2138,
- 0x2d30, 0x2d67,
- 0x2d80, 0x2d96,
- 0x2da0, 0x2da6,
- 0x2da8, 0x2dae,
- 0x2db0, 0x2db6,
- 0x2db8, 0x2dbe,
- 0x2dc0, 0x2dc6,
- 0x2dc8, 0x2dce,
- 0x2dd0, 0x2dd6,
- 0x2dd8, 0x2dde,
- 0x3006, 0x3006,
- 0x303c, 0x303c,
- 0x3041, 0x3096,
- 0x309f, 0x309f,
- 0x30a1, 0x30fa,
- 0x30ff, 0x30ff,
- 0x3105, 0x312d,
- 0x3131, 0x318e,
- 0x31a0, 0x31ba,
- 0x31f0, 0x31ff,
- 0x3400, 0x4db5,
- 0x4e00, 0x9fcc,
- 0xa000, 0xa014,
- 0xa016, 0xa48c,
- 0xa4d0, 0xa4f7,
- 0xa500, 0xa60b,
- 0xa610, 0xa61f,
- 0xa62a, 0xa62b,
- 0xa66e, 0xa66e,
- 0xa6a0, 0xa6e5,
- 0xa7f7, 0xa7f7,
- 0xa7fb, 0xa801,
- 0xa803, 0xa805,
- 0xa807, 0xa80a,
- 0xa80c, 0xa822,
- 0xa840, 0xa873,
- 0xa882, 0xa8b3,
- 0xa8f2, 0xa8f7,
- 0xa8fb, 0xa8fb,
- 0xa90a, 0xa925,
- 0xa930, 0xa946,
- 0xa960, 0xa97c,
- 0xa984, 0xa9b2,
- 0xa9e0, 0xa9e4,
- 0xa9e7, 0xa9ef,
- 0xa9fa, 0xa9fe,
- 0xaa00, 0xaa28,
- 0xaa40, 0xaa42,
- 0xaa44, 0xaa4b,
- 0xaa60, 0xaa6f,
- 0xaa71, 0xaa76,
- 0xaa7a, 0xaa7a,
- 0xaa7e, 0xaaaf,
- 0xaab1, 0xaab1,
- 0xaab5, 0xaab6,
- 0xaab9, 0xaabd,
- 0xaac0, 0xaac0,
- 0xaac2, 0xaac2,
- 0xaadb, 0xaadc,
- 0xaae0, 0xaaea,
- 0xaaf2, 0xaaf2,
- 0xab01, 0xab06,
- 0xab09, 0xab0e,
- 0xab11, 0xab16,
- 0xab20, 0xab26,
- 0xab28, 0xab2e,
- 0xabc0, 0xabe2,
- 0xac00, 0xd7a3,
- 0xd7b0, 0xd7c6,
- 0xd7cb, 0xd7fb,
- 0xf900, 0xfa6d,
- 0xfa70, 0xfad9,
- 0xfb1d, 0xfb1d,
- 0xfb1f, 0xfb28,
- 0xfb2a, 0xfb36,
- 0xfb38, 0xfb3c,
- 0xfb3e, 0xfb3e,
- 0xfb40, 0xfb41,
- 0xfb43, 0xfb44,
- 0xfb46, 0xfbb1,
- 0xfbd3, 0xfd3d,
- 0xfd50, 0xfd8f,
- 0xfd92, 0xfdc7,
- 0xfdf0, 0xfdfb,
- 0xfe70, 0xfe74,
- 0xfe76, 0xfefc,
- 0xff66, 0xff6f,
- 0xff71, 0xff9d,
- 0xffa0, 0xffbe,
- 0xffc2, 0xffc7,
- 0xffca, 0xffcf,
- 0xffd2, 0xffd7,
- 0xffda, 0xffdc,
- 0x10000, 0x1000b,
- 0x1000d, 0x10026,
- 0x10028, 0x1003a,
- 0x1003c, 0x1003d,
- 0x1003f, 0x1004d,
- 0x10050, 0x1005d,
- 0x10080, 0x100fa,
- 0x10280, 0x1029c,
- 0x102a0, 0x102d0,
- 0x10300, 0x1031f,
- 0x10330, 0x10340,
- 0x10342, 0x10349,
- 0x10350, 0x10375,
- 0x10380, 0x1039d,
- 0x103a0, 0x103c3,
- 0x103c8, 0x103cf,
- 0x10450, 0x1049d,
- 0x10500, 0x10527,
- 0x10530, 0x10563,
- 0x10600, 0x10736,
- 0x10740, 0x10755,
- 0x10760, 0x10767,
- 0x10800, 0x10805,
- 0x10808, 0x10808,
- 0x1080a, 0x10835,
- 0x10837, 0x10838,
- 0x1083c, 0x1083c,
- 0x1083f, 0x10855,
- 0x10860, 0x10876,
- 0x10880, 0x1089e,
- 0x10900, 0x10915,
- 0x10920, 0x10939,
- 0x10980, 0x109b7,
- 0x109be, 0x109bf,
- 0x10a00, 0x10a00,
- 0x10a10, 0x10a13,
- 0x10a15, 0x10a17,
- 0x10a19, 0x10a33,
- 0x10a60, 0x10a7c,
- 0x10a80, 0x10a9c,
- 0x10ac0, 0x10ac7,
- 0x10ac9, 0x10ae4,
- 0x10b00, 0x10b35,
- 0x10b40, 0x10b55,
- 0x10b60, 0x10b72,
- 0x10b80, 0x10b91,
- 0x10c00, 0x10c48,
- 0x11003, 0x11037,
- 0x11083, 0x110af,
- 0x110d0, 0x110e8,
- 0x11103, 0x11126,
- 0x11150, 0x11172,
- 0x11176, 0x11176,
- 0x11183, 0x111b2,
- 0x111c1, 0x111c4,
- 0x111da, 0x111da,
- 0x11200, 0x11211,
- 0x11213, 0x1122b,
- 0x112b0, 0x112de,
- 0x11305, 0x1130c,
- 0x1130f, 0x11310,
- 0x11313, 0x11328,
- 0x1132a, 0x11330,
- 0x11332, 0x11333,
- 0x11335, 0x11339,
- 0x1133d, 0x1133d,
- 0x1135d, 0x11361,
- 0x11480, 0x114af,
- 0x114c4, 0x114c5,
- 0x114c7, 0x114c7,
- 0x11580, 0x115ae,
- 0x11600, 0x1162f,
- 0x11644, 0x11644,
- 0x11680, 0x116aa,
- 0x118ff, 0x118ff,
- 0x11ac0, 0x11af8,
- 0x12000, 0x12398,
- 0x13000, 0x1342e,
- 0x16800, 0x16a38,
- 0x16a40, 0x16a5e,
- 0x16ad0, 0x16aed,
- 0x16b00, 0x16b2f,
- 0x16b63, 0x16b77,
- 0x16b7d, 0x16b8f,
- 0x16f00, 0x16f44,
- 0x16f50, 0x16f50,
- 0x1b000, 0x1b001,
- 0x1bc00, 0x1bc6a,
- 0x1bc70, 0x1bc7c,
- 0x1bc80, 0x1bc88,
- 0x1bc90, 0x1bc99,
- 0x1e800, 0x1e8c4,
- 0x1ee00, 0x1ee03,
- 0x1ee05, 0x1ee1f,
- 0x1ee21, 0x1ee22,
- 0x1ee24, 0x1ee24,
- 0x1ee27, 0x1ee27,
- 0x1ee29, 0x1ee32,
- 0x1ee34, 0x1ee37,
- 0x1ee39, 0x1ee39,
- 0x1ee3b, 0x1ee3b,
- 0x1ee42, 0x1ee42,
- 0x1ee47, 0x1ee47,
- 0x1ee49, 0x1ee49,
- 0x1ee4b, 0x1ee4b,
- 0x1ee4d, 0x1ee4f,
- 0x1ee51, 0x1ee52,
- 0x1ee54, 0x1ee54,
- 0x1ee57, 0x1ee57,
- 0x1ee59, 0x1ee59,
- 0x1ee5b, 0x1ee5b,
- 0x1ee5d, 0x1ee5d,
- 0x1ee5f, 0x1ee5f,
- 0x1ee61, 0x1ee62,
- 0x1ee64, 0x1ee64,
- 0x1ee67, 0x1ee6a,
- 0x1ee6c, 0x1ee72,
- 0x1ee74, 0x1ee77,
- 0x1ee79, 0x1ee7c,
- 0x1ee7e, 0x1ee7e,
- 0x1ee80, 0x1ee89,
- 0x1ee8b, 0x1ee9b,
- 0x1eea1, 0x1eea3,
- 0x1eea5, 0x1eea9,
- 0x1eeab, 0x1eebb,
- 0x20000, 0x2a6d6,
- 0x2a700, 0x2b734,
- 0x2b740, 0x2b81d,
- 0x2f800, 0x2fa1d
-};
-UCP_FN(Lo)
-
-static const unichar ucp_Lt_def[] = {
- 0x1c5, 0x1c5,
- 0x1c8, 0x1c8,
- 0x1cb, 0x1cb,
- 0x1f2, 0x1f2,
- 0x1f88, 0x1f8f,
- 0x1f98, 0x1f9f,
- 0x1fa8, 0x1faf,
- 0x1fbc, 0x1fbc,
- 0x1fcc, 0x1fcc,
- 0x1ffc, 0x1ffc
-};
-UCP_FN(Lt)
-
-static const unichar ucp_Lu_def[] = {
- 0x41, 0x5a,
- 0xc0, 0xd6,
- 0xd8, 0xde,
- 0x100, 0x100,
- 0x102, 0x102,
- 0x104, 0x104,
- 0x106, 0x106,
- 0x108, 0x108,
- 0x10a, 0x10a,
- 0x10c, 0x10c,
- 0x10e, 0x10e,
- 0x110, 0x110,
- 0x112, 0x112,
- 0x114, 0x114,
- 0x116, 0x116,
- 0x118, 0x118,
- 0x11a, 0x11a,
- 0x11c, 0x11c,
- 0x11e, 0x11e,
- 0x120, 0x120,
- 0x122, 0x122,
- 0x124, 0x124,
- 0x126, 0x126,
- 0x128, 0x128,
- 0x12a, 0x12a,
- 0x12c, 0x12c,
- 0x12e, 0x12e,
- 0x130, 0x130,
- 0x132, 0x132,
- 0x134, 0x134,
- 0x136, 0x136,
- 0x139, 0x139,
- 0x13b, 0x13b,
- 0x13d, 0x13d,
- 0x13f, 0x13f,
- 0x141, 0x141,
- 0x143, 0x143,
- 0x145, 0x145,
- 0x147, 0x147,
- 0x14a, 0x14a,
- 0x14c, 0x14c,
- 0x14e, 0x14e,
- 0x150, 0x150,
- 0x152, 0x152,
- 0x154, 0x154,
- 0x156, 0x156,
- 0x158, 0x158,
- 0x15a, 0x15a,
- 0x15c, 0x15c,
- 0x15e, 0x15e,
- 0x160, 0x160,
- 0x162, 0x162,
- 0x164, 0x164,
- 0x166, 0x166,
- 0x168, 0x168,
- 0x16a, 0x16a,
- 0x16c, 0x16c,
- 0x16e, 0x16e,
- 0x170, 0x170,
- 0x172, 0x172,
- 0x174, 0x174,
- 0x176, 0x176,
- 0x178, 0x179,
- 0x17b, 0x17b,
- 0x17d, 0x17d,
- 0x181, 0x182,
- 0x184, 0x184,
- 0x186, 0x187,
- 0x189, 0x18b,
- 0x18e, 0x191,
- 0x193, 0x194,
- 0x196, 0x198,
- 0x19c, 0x19d,
- 0x19f, 0x1a0,
- 0x1a2, 0x1a2,
- 0x1a4, 0x1a4,
- 0x1a6, 0x1a7,
- 0x1a9, 0x1a9,
- 0x1ac, 0x1ac,
- 0x1ae, 0x1af,
- 0x1b1, 0x1b3,
- 0x1b5, 0x1b5,
- 0x1b7, 0x1b8,
- 0x1bc, 0x1bc,
- 0x1c4, 0x1c4,
- 0x1c7, 0x1c7,
- 0x1ca, 0x1ca,
- 0x1cd, 0x1cd,
- 0x1cf, 0x1cf,
- 0x1d1, 0x1d1,
- 0x1d3, 0x1d3,
- 0x1d5, 0x1d5,
- 0x1d7, 0x1d7,
- 0x1d9, 0x1d9,
- 0x1db, 0x1db,
- 0x1de, 0x1de,
- 0x1e0, 0x1e0,
- 0x1e2, 0x1e2,
- 0x1e4, 0x1e4,
- 0x1e6, 0x1e6,
- 0x1e8, 0x1e8,
- 0x1ea, 0x1ea,
- 0x1ec, 0x1ec,
- 0x1ee, 0x1ee,
- 0x1f1, 0x1f1,
- 0x1f4, 0x1f4,
- 0x1f6, 0x1f8,
- 0x1fa, 0x1fa,
- 0x1fc, 0x1fc,
- 0x1fe, 0x1fe,
- 0x200, 0x200,
- 0x202, 0x202,
- 0x204, 0x204,
- 0x206, 0x206,
- 0x208, 0x208,
- 0x20a, 0x20a,
- 0x20c, 0x20c,
- 0x20e, 0x20e,
- 0x210, 0x210,
- 0x212, 0x212,
- 0x214, 0x214,
- 0x216, 0x216,
- 0x218, 0x218,
- 0x21a, 0x21a,
- 0x21c, 0x21c,
- 0x21e, 0x21e,
- 0x220, 0x220,
- 0x222, 0x222,
- 0x224, 0x224,
- 0x226, 0x226,
- 0x228, 0x228,
- 0x22a, 0x22a,
- 0x22c, 0x22c,
- 0x22e, 0x22e,
- 0x230, 0x230,
- 0x232, 0x232,
- 0x23a, 0x23b,
- 0x23d, 0x23e,
- 0x241, 0x241,
- 0x243, 0x246,
- 0x248, 0x248,
- 0x24a, 0x24a,
- 0x24c, 0x24c,
- 0x24e, 0x24e,
- 0x370, 0x370,
- 0x372, 0x372,
- 0x376, 0x376,
- 0x37f, 0x37f,
- 0x386, 0x386,
- 0x388, 0x38a,
- 0x38c, 0x38c,
- 0x38e, 0x38f,
- 0x391, 0x3a1,
- 0x3a3, 0x3ab,
- 0x3cf, 0x3cf,
- 0x3d2, 0x3d4,
- 0x3d8, 0x3d8,
- 0x3da, 0x3da,
- 0x3dc, 0x3dc,
- 0x3de, 0x3de,
- 0x3e0, 0x3e0,
- 0x3e2, 0x3e2,
- 0x3e4, 0x3e4,
- 0x3e6, 0x3e6,
- 0x3e8, 0x3e8,
- 0x3ea, 0x3ea,
- 0x3ec, 0x3ec,
- 0x3ee, 0x3ee,
- 0x3f4, 0x3f4,
- 0x3f7, 0x3f7,
- 0x3f9, 0x3fa,
- 0x3fd, 0x42f,
- 0x460, 0x460,
- 0x462, 0x462,
- 0x464, 0x464,
- 0x466, 0x466,
- 0x468, 0x468,
- 0x46a, 0x46a,
- 0x46c, 0x46c,
- 0x46e, 0x46e,
- 0x470, 0x470,
- 0x472, 0x472,
- 0x474, 0x474,
- 0x476, 0x476,
- 0x478, 0x478,
- 0x47a, 0x47a,
- 0x47c, 0x47c,
- 0x47e, 0x47e,
- 0x480, 0x480,
- 0x48a, 0x48a,
- 0x48c, 0x48c,
- 0x48e, 0x48e,
- 0x490, 0x490,
- 0x492, 0x492,
- 0x494, 0x494,
- 0x496, 0x496,
- 0x498, 0x498,
- 0x49a, 0x49a,
- 0x49c, 0x49c,
- 0x49e, 0x49e,
- 0x4a0, 0x4a0,
- 0x4a2, 0x4a2,
- 0x4a4, 0x4a4,
- 0x4a6, 0x4a6,
- 0x4a8, 0x4a8,
- 0x4aa, 0x4aa,
- 0x4ac, 0x4ac,
- 0x4ae, 0x4ae,
- 0x4b0, 0x4b0,
- 0x4b2, 0x4b2,
- 0x4b4, 0x4b4,
- 0x4b6, 0x4b6,
- 0x4b8, 0x4b8,
- 0x4ba, 0x4ba,
- 0x4bc, 0x4bc,
- 0x4be, 0x4be,
- 0x4c0, 0x4c1,
- 0x4c3, 0x4c3,
- 0x4c5, 0x4c5,
- 0x4c7, 0x4c7,
- 0x4c9, 0x4c9,
- 0x4cb, 0x4cb,
- 0x4cd, 0x4cd,
- 0x4d0, 0x4d0,
- 0x4d2, 0x4d2,
- 0x4d4, 0x4d4,
- 0x4d6, 0x4d6,
- 0x4d8, 0x4d8,
- 0x4da, 0x4da,
- 0x4dc, 0x4dc,
- 0x4de, 0x4de,
- 0x4e0, 0x4e0,
- 0x4e2, 0x4e2,
- 0x4e4, 0x4e4,
- 0x4e6, 0x4e6,
- 0x4e8, 0x4e8,
- 0x4ea, 0x4ea,
- 0x4ec, 0x4ec,
- 0x4ee, 0x4ee,
- 0x4f0, 0x4f0,
- 0x4f2, 0x4f2,
- 0x4f4, 0x4f4,
- 0x4f6, 0x4f6,
- 0x4f8, 0x4f8,
- 0x4fa, 0x4fa,
- 0x4fc, 0x4fc,
- 0x4fe, 0x4fe,
- 0x500, 0x500,
- 0x502, 0x502,
- 0x504, 0x504,
- 0x506, 0x506,
- 0x508, 0x508,
- 0x50a, 0x50a,
- 0x50c, 0x50c,
- 0x50e, 0x50e,
- 0x510, 0x510,
- 0x512, 0x512,
- 0x514, 0x514,
- 0x516, 0x516,
- 0x518, 0x518,
- 0x51a, 0x51a,
- 0x51c, 0x51c,
- 0x51e, 0x51e,
- 0x520, 0x520,
- 0x522, 0x522,
- 0x524, 0x524,
- 0x526, 0x526,
- 0x528, 0x528,
- 0x52a, 0x52a,
- 0x52c, 0x52c,
- 0x52e, 0x52e,
- 0x531, 0x556,
- 0x10a0, 0x10c5,
- 0x10c7, 0x10c7,
- 0x10cd, 0x10cd,
- 0x1e00, 0x1e00,
- 0x1e02, 0x1e02,
- 0x1e04, 0x1e04,
- 0x1e06, 0x1e06,
- 0x1e08, 0x1e08,
- 0x1e0a, 0x1e0a,
- 0x1e0c, 0x1e0c,
- 0x1e0e, 0x1e0e,
- 0x1e10, 0x1e10,
- 0x1e12, 0x1e12,
- 0x1e14, 0x1e14,
- 0x1e16, 0x1e16,
- 0x1e18, 0x1e18,
- 0x1e1a, 0x1e1a,
- 0x1e1c, 0x1e1c,
- 0x1e1e, 0x1e1e,
- 0x1e20, 0x1e20,
- 0x1e22, 0x1e22,
- 0x1e24, 0x1e24,
- 0x1e26, 0x1e26,
- 0x1e28, 0x1e28,
- 0x1e2a, 0x1e2a,
- 0x1e2c, 0x1e2c,
- 0x1e2e, 0x1e2e,
- 0x1e30, 0x1e30,
- 0x1e32, 0x1e32,
- 0x1e34, 0x1e34,
- 0x1e36, 0x1e36,
- 0x1e38, 0x1e38,
- 0x1e3a, 0x1e3a,
- 0x1e3c, 0x1e3c,
- 0x1e3e, 0x1e3e,
- 0x1e40, 0x1e40,
- 0x1e42, 0x1e42,
- 0x1e44, 0x1e44,
- 0x1e46, 0x1e46,
- 0x1e48, 0x1e48,
- 0x1e4a, 0x1e4a,
- 0x1e4c, 0x1e4c,
- 0x1e4e, 0x1e4e,
- 0x1e50, 0x1e50,
- 0x1e52, 0x1e52,
- 0x1e54, 0x1e54,
- 0x1e56, 0x1e56,
- 0x1e58, 0x1e58,
- 0x1e5a, 0x1e5a,
- 0x1e5c, 0x1e5c,
- 0x1e5e, 0x1e5e,
- 0x1e60, 0x1e60,
- 0x1e62, 0x1e62,
- 0x1e64, 0x1e64,
- 0x1e66, 0x1e66,
- 0x1e68, 0x1e68,
- 0x1e6a, 0x1e6a,
- 0x1e6c, 0x1e6c,
- 0x1e6e, 0x1e6e,
- 0x1e70, 0x1e70,
- 0x1e72, 0x1e72,
- 0x1e74, 0x1e74,
- 0x1e76, 0x1e76,
- 0x1e78, 0x1e78,
- 0x1e7a, 0x1e7a,
- 0x1e7c, 0x1e7c,
- 0x1e7e, 0x1e7e,
- 0x1e80, 0x1e80,
- 0x1e82, 0x1e82,
- 0x1e84, 0x1e84,
- 0x1e86, 0x1e86,
- 0x1e88, 0x1e88,
- 0x1e8a, 0x1e8a,
- 0x1e8c, 0x1e8c,
- 0x1e8e, 0x1e8e,
- 0x1e90, 0x1e90,
- 0x1e92, 0x1e92,
- 0x1e94, 0x1e94,
- 0x1e9e, 0x1e9e,
- 0x1ea0, 0x1ea0,
- 0x1ea2, 0x1ea2,
- 0x1ea4, 0x1ea4,
- 0x1ea6, 0x1ea6,
- 0x1ea8, 0x1ea8,
- 0x1eaa, 0x1eaa,
- 0x1eac, 0x1eac,
- 0x1eae, 0x1eae,
- 0x1eb0, 0x1eb0,
- 0x1eb2, 0x1eb2,
- 0x1eb4, 0x1eb4,
- 0x1eb6, 0x1eb6,
- 0x1eb8, 0x1eb8,
- 0x1eba, 0x1eba,
- 0x1ebc, 0x1ebc,
- 0x1ebe, 0x1ebe,
- 0x1ec0, 0x1ec0,
- 0x1ec2, 0x1ec2,
- 0x1ec4, 0x1ec4,
- 0x1ec6, 0x1ec6,
- 0x1ec8, 0x1ec8,
- 0x1eca, 0x1eca,
- 0x1ecc, 0x1ecc,
- 0x1ece, 0x1ece,
- 0x1ed0, 0x1ed0,
- 0x1ed2, 0x1ed2,
- 0x1ed4, 0x1ed4,
- 0x1ed6, 0x1ed6,
- 0x1ed8, 0x1ed8,
- 0x1eda, 0x1eda,
- 0x1edc, 0x1edc,
- 0x1ede, 0x1ede,
- 0x1ee0, 0x1ee0,
- 0x1ee2, 0x1ee2,
- 0x1ee4, 0x1ee4,
- 0x1ee6, 0x1ee6,
- 0x1ee8, 0x1ee8,
- 0x1eea, 0x1eea,
- 0x1eec, 0x1eec,
- 0x1eee, 0x1eee,
- 0x1ef0, 0x1ef0,
- 0x1ef2, 0x1ef2,
- 0x1ef4, 0x1ef4,
- 0x1ef6, 0x1ef6,
- 0x1ef8, 0x1ef8,
- 0x1efa, 0x1efa,
- 0x1efc, 0x1efc,
- 0x1efe, 0x1efe,
- 0x1f08, 0x1f0f,
- 0x1f18, 0x1f1d,
- 0x1f28, 0x1f2f,
- 0x1f38, 0x1f3f,
- 0x1f48, 0x1f4d,
- 0x1f59, 0x1f59,
- 0x1f5b, 0x1f5b,
- 0x1f5d, 0x1f5d,
- 0x1f5f, 0x1f5f,
- 0x1f68, 0x1f6f,
- 0x1fb8, 0x1fbb,
- 0x1fc8, 0x1fcb,
- 0x1fd8, 0x1fdb,
- 0x1fe8, 0x1fec,
- 0x1ff8, 0x1ffb,
- 0x2102, 0x2102,
- 0x2107, 0x2107,
- 0x210b, 0x210d,
- 0x2110, 0x2112,
- 0x2115, 0x2115,
- 0x2119, 0x211d,
- 0x2124, 0x2124,
- 0x2126, 0x2126,
- 0x2128, 0x2128,
- 0x212a, 0x212d,
- 0x2130, 0x2133,
- 0x213e, 0x213f,
- 0x2145, 0x2145,
- 0x2183, 0x2183,
- 0x2c00, 0x2c2e,
- 0x2c60, 0x2c60,
- 0x2c62, 0x2c64,
- 0x2c67, 0x2c67,
- 0x2c69, 0x2c69,
- 0x2c6b, 0x2c6b,
- 0x2c6d, 0x2c70,
- 0x2c72, 0x2c72,
- 0x2c75, 0x2c75,
- 0x2c7e, 0x2c80,
- 0x2c82, 0x2c82,
- 0x2c84, 0x2c84,
- 0x2c86, 0x2c86,
- 0x2c88, 0x2c88,
- 0x2c8a, 0x2c8a,
- 0x2c8c, 0x2c8c,
- 0x2c8e, 0x2c8e,
- 0x2c90, 0x2c90,
- 0x2c92, 0x2c92,
- 0x2c94, 0x2c94,
- 0x2c96, 0x2c96,
- 0x2c98, 0x2c98,
- 0x2c9a, 0x2c9a,
- 0x2c9c, 0x2c9c,
- 0x2c9e, 0x2c9e,
- 0x2ca0, 0x2ca0,
- 0x2ca2, 0x2ca2,
- 0x2ca4, 0x2ca4,
- 0x2ca6, 0x2ca6,
- 0x2ca8, 0x2ca8,
- 0x2caa, 0x2caa,
- 0x2cac, 0x2cac,
- 0x2cae, 0x2cae,
- 0x2cb0, 0x2cb0,
- 0x2cb2, 0x2cb2,
- 0x2cb4, 0x2cb4,
- 0x2cb6, 0x2cb6,
- 0x2cb8, 0x2cb8,
- 0x2cba, 0x2cba,
- 0x2cbc, 0x2cbc,
- 0x2cbe, 0x2cbe,
- 0x2cc0, 0x2cc0,
- 0x2cc2, 0x2cc2,
- 0x2cc4, 0x2cc4,
- 0x2cc6, 0x2cc6,
- 0x2cc8, 0x2cc8,
- 0x2cca, 0x2cca,
- 0x2ccc, 0x2ccc,
- 0x2cce, 0x2cce,
- 0x2cd0, 0x2cd0,
- 0x2cd2, 0x2cd2,
- 0x2cd4, 0x2cd4,
- 0x2cd6, 0x2cd6,
- 0x2cd8, 0x2cd8,
- 0x2cda, 0x2cda,
- 0x2cdc, 0x2cdc,
- 0x2cde, 0x2cde,
- 0x2ce0, 0x2ce0,
- 0x2ce2, 0x2ce2,
- 0x2ceb, 0x2ceb,
- 0x2ced, 0x2ced,
- 0x2cf2, 0x2cf2,
- 0xa640, 0xa640,
- 0xa642, 0xa642,
- 0xa644, 0xa644,
- 0xa646, 0xa646,
- 0xa648, 0xa648,
- 0xa64a, 0xa64a,
- 0xa64c, 0xa64c,
- 0xa64e, 0xa64e,
- 0xa650, 0xa650,
- 0xa652, 0xa652,
- 0xa654, 0xa654,
- 0xa656, 0xa656,
- 0xa658, 0xa658,
- 0xa65a, 0xa65a,
- 0xa65c, 0xa65c,
- 0xa65e, 0xa65e,
- 0xa660, 0xa660,
- 0xa662, 0xa662,
- 0xa664, 0xa664,
- 0xa666, 0xa666,
- 0xa668, 0xa668,
- 0xa66a, 0xa66a,
- 0xa66c, 0xa66c,
- 0xa680, 0xa680,
- 0xa682, 0xa682,
- 0xa684, 0xa684,
- 0xa686, 0xa686,
- 0xa688, 0xa688,
- 0xa68a, 0xa68a,
- 0xa68c, 0xa68c,
- 0xa68e, 0xa68e,
- 0xa690, 0xa690,
- 0xa692, 0xa692,
- 0xa694, 0xa694,
- 0xa696, 0xa696,
- 0xa698, 0xa698,
- 0xa69a, 0xa69a,
- 0xa722, 0xa722,
- 0xa724, 0xa724,
- 0xa726, 0xa726,
- 0xa728, 0xa728,
- 0xa72a, 0xa72a,
- 0xa72c, 0xa72c,
- 0xa72e, 0xa72e,
- 0xa732, 0xa732,
- 0xa734, 0xa734,
- 0xa736, 0xa736,
- 0xa738, 0xa738,
- 0xa73a, 0xa73a,
- 0xa73c, 0xa73c,
- 0xa73e, 0xa73e,
- 0xa740, 0xa740,
- 0xa742, 0xa742,
- 0xa744, 0xa744,
- 0xa746, 0xa746,
- 0xa748, 0xa748,
- 0xa74a, 0xa74a,
- 0xa74c, 0xa74c,
- 0xa74e, 0xa74e,
- 0xa750, 0xa750,
- 0xa752, 0xa752,
- 0xa754, 0xa754,
- 0xa756, 0xa756,
- 0xa758, 0xa758,
- 0xa75a, 0xa75a,
- 0xa75c, 0xa75c,
- 0xa75e, 0xa75e,
- 0xa760, 0xa760,
- 0xa762, 0xa762,
- 0xa764, 0xa764,
- 0xa766, 0xa766,
- 0xa768, 0xa768,
- 0xa76a, 0xa76a,
- 0xa76c, 0xa76c,
- 0xa76e, 0xa76e,
- 0xa779, 0xa779,
- 0xa77b, 0xa77b,
- 0xa77d, 0xa77e,
- 0xa780, 0xa780,
- 0xa782, 0xa782,
- 0xa784, 0xa784,
- 0xa786, 0xa786,
- 0xa78b, 0xa78b,
- 0xa78d, 0xa78d,
- 0xa790, 0xa790,
- 0xa792, 0xa792,
- 0xa796, 0xa796,
- 0xa798, 0xa798,
- 0xa79a, 0xa79a,
- 0xa79c, 0xa79c,
- 0xa79e, 0xa79e,
- 0xa7a0, 0xa7a0,
- 0xa7a2, 0xa7a2,
- 0xa7a4, 0xa7a4,
- 0xa7a6, 0xa7a6,
- 0xa7a8, 0xa7a8,
- 0xa7aa, 0xa7ad,
- 0xa7b0, 0xa7b1,
- 0xff21, 0xff3a,
- 0x10400, 0x10427,
- 0x118a0, 0x118bf,
- 0x1d400, 0x1d419,
- 0x1d434, 0x1d44d,
- 0x1d468, 0x1d481,
- 0x1d49c, 0x1d49c,
- 0x1d49e, 0x1d49f,
- 0x1d4a2, 0x1d4a2,
- 0x1d4a5, 0x1d4a6,
- 0x1d4a9, 0x1d4ac,
- 0x1d4ae, 0x1d4b5,
- 0x1d4d0, 0x1d4e9,
- 0x1d504, 0x1d505,
- 0x1d507, 0x1d50a,
- 0x1d50d, 0x1d514,
- 0x1d516, 0x1d51c,
- 0x1d538, 0x1d539,
- 0x1d53b, 0x1d53e,
- 0x1d540, 0x1d544,
- 0x1d546, 0x1d546,
- 0x1d54a, 0x1d550,
- 0x1d56c, 0x1d585,
- 0x1d5a0, 0x1d5b9,
- 0x1d5d4, 0x1d5ed,
- 0x1d608, 0x1d621,
- 0x1d63c, 0x1d655,
- 0x1d670, 0x1d689,
- 0x1d6a8, 0x1d6c0,
- 0x1d6e2, 0x1d6fa,
- 0x1d71c, 0x1d734,
- 0x1d756, 0x1d76e,
- 0x1d790, 0x1d7a8,
- 0x1d7ca, 0x1d7ca
-};
-UCP_FN(Lu)
-
-static const unichar ucp_M_def[] = {
- 0x300, 0x36f,
- 0x483, 0x489,
- 0x591, 0x5bd,
- 0x5bf, 0x5bf,
- 0x5c1, 0x5c2,
- 0x5c4, 0x5c5,
- 0x5c7, 0x5c7,
- 0x610, 0x61a,
- 0x64b, 0x65f,
- 0x670, 0x670,
- 0x6d6, 0x6dc,
- 0x6df, 0x6e4,
- 0x6e7, 0x6e8,
- 0x6ea, 0x6ed,
- 0x711, 0x711,
- 0x730, 0x74a,
- 0x7a6, 0x7b0,
- 0x7eb, 0x7f3,
- 0x816, 0x819,
- 0x81b, 0x823,
- 0x825, 0x827,
- 0x829, 0x82d,
- 0x859, 0x85b,
- 0x8e4, 0x903,
- 0x93a, 0x93c,
- 0x93e, 0x94f,
- 0x951, 0x957,
- 0x962, 0x963,
- 0x981, 0x983,
- 0x9bc, 0x9bc,
- 0x9be, 0x9c4,
- 0x9c7, 0x9c8,
- 0x9cb, 0x9cd,
- 0x9d7, 0x9d7,
- 0x9e2, 0x9e3,
- 0xa01, 0xa03,
- 0xa3c, 0xa3c,
- 0xa3e, 0xa42,
- 0xa47, 0xa48,
- 0xa4b, 0xa4d,
- 0xa51, 0xa51,
- 0xa70, 0xa71,
- 0xa75, 0xa75,
- 0xa81, 0xa83,
- 0xabc, 0xabc,
- 0xabe, 0xac5,
- 0xac7, 0xac9,
- 0xacb, 0xacd,
- 0xae2, 0xae3,
- 0xb01, 0xb03,
- 0xb3c, 0xb3c,
- 0xb3e, 0xb44,
- 0xb47, 0xb48,
- 0xb4b, 0xb4d,
- 0xb56, 0xb57,
- 0xb62, 0xb63,
- 0xb82, 0xb82,
- 0xbbe, 0xbc2,
- 0xbc6, 0xbc8,
- 0xbca, 0xbcd,
- 0xbd7, 0xbd7,
- 0xc00, 0xc03,
- 0xc3e, 0xc44,
- 0xc46, 0xc48,
- 0xc4a, 0xc4d,
- 0xc55, 0xc56,
- 0xc62, 0xc63,
- 0xc81, 0xc83,
- 0xcbc, 0xcbc,
- 0xcbe, 0xcc4,
- 0xcc6, 0xcc8,
- 0xcca, 0xccd,
- 0xcd5, 0xcd6,
- 0xce2, 0xce3,
- 0xd01, 0xd03,
- 0xd3e, 0xd44,
- 0xd46, 0xd48,
- 0xd4a, 0xd4d,
- 0xd57, 0xd57,
- 0xd62, 0xd63,
- 0xd82, 0xd83,
- 0xdca, 0xdca,
- 0xdcf, 0xdd4,
- 0xdd6, 0xdd6,
- 0xdd8, 0xddf,
- 0xdf2, 0xdf3,
- 0xe31, 0xe31,
- 0xe34, 0xe3a,
- 0xe47, 0xe4e,
- 0xeb1, 0xeb1,
- 0xeb4, 0xeb9,
- 0xebb, 0xebc,
- 0xec8, 0xecd,
- 0xf18, 0xf19,
- 0xf35, 0xf35,
- 0xf37, 0xf37,
- 0xf39, 0xf39,
- 0xf3e, 0xf3f,
- 0xf71, 0xf84,
- 0xf86, 0xf87,
- 0xf8d, 0xf97,
- 0xf99, 0xfbc,
- 0xfc6, 0xfc6,
- 0x102b, 0x103e,
- 0x1056, 0x1059,
- 0x105e, 0x1060,
- 0x1062, 0x1064,
- 0x1067, 0x106d,
- 0x1071, 0x1074,
- 0x1082, 0x108d,
- 0x108f, 0x108f,
- 0x109a, 0x109d,
- 0x135d, 0x135f,
- 0x1712, 0x1714,
- 0x1732, 0x1734,
- 0x1752, 0x1753,
- 0x1772, 0x1773,
- 0x17b4, 0x17d3,
- 0x17dd, 0x17dd,
- 0x180b, 0x180d,
- 0x18a9, 0x18a9,
- 0x1920, 0x192b,
- 0x1930, 0x193b,
- 0x19b0, 0x19c0,
- 0x19c8, 0x19c9,
- 0x1a17, 0x1a1b,
- 0x1a55, 0x1a5e,
- 0x1a60, 0x1a7c,
- 0x1a7f, 0x1a7f,
- 0x1ab0, 0x1abe,
- 0x1b00, 0x1b04,
- 0x1b34, 0x1b44,
- 0x1b6b, 0x1b73,
- 0x1b80, 0x1b82,
- 0x1ba1, 0x1bad,
- 0x1be6, 0x1bf3,
- 0x1c24, 0x1c37,
- 0x1cd0, 0x1cd2,
- 0x1cd4, 0x1ce8,
- 0x1ced, 0x1ced,
- 0x1cf2, 0x1cf4,
- 0x1cf8, 0x1cf9,
- 0x1dc0, 0x1df5,
- 0x1dfc, 0x1dff,
- 0x20d0, 0x20f0,
- 0x2cef, 0x2cf1,
- 0x2d7f, 0x2d7f,
- 0x2de0, 0x2dff,
- 0x302a, 0x302f,
- 0x3099, 0x309a,
- 0xa66f, 0xa672,
- 0xa674, 0xa67d,
- 0xa69f, 0xa69f,
- 0xa6f0, 0xa6f1,
- 0xa802, 0xa802,
- 0xa806, 0xa806,
- 0xa80b, 0xa80b,
- 0xa823, 0xa827,
- 0xa880, 0xa881,
- 0xa8b4, 0xa8c4,
- 0xa8e0, 0xa8f1,
- 0xa926, 0xa92d,
- 0xa947, 0xa953,
- 0xa980, 0xa983,
- 0xa9b3, 0xa9c0,
- 0xa9e5, 0xa9e5,
- 0xaa29, 0xaa36,
- 0xaa43, 0xaa43,
- 0xaa4c, 0xaa4d,
- 0xaa7b, 0xaa7d,
- 0xaab0, 0xaab0,
- 0xaab2, 0xaab4,
- 0xaab7, 0xaab8,
- 0xaabe, 0xaabf,
- 0xaac1, 0xaac1,
- 0xaaeb, 0xaaef,
- 0xaaf5, 0xaaf6,
- 0xabe3, 0xabea,
- 0xabec, 0xabed,
- 0xfb1e, 0xfb1e,
- 0xfe00, 0xfe0f,
- 0xfe20, 0xfe2d,
- 0x101fd, 0x101fd,
- 0x102e0, 0x102e0,
- 0x10376, 0x1037a,
- 0x10a01, 0x10a03,
- 0x10a05, 0x10a06,
- 0x10a0c, 0x10a0f,
- 0x10a38, 0x10a3a,
- 0x10a3f, 0x10a3f,
- 0x10ae5, 0x10ae6,
- 0x11000, 0x11002,
- 0x11038, 0x11046,
- 0x1107f, 0x11082,
- 0x110b0, 0x110ba,
- 0x11100, 0x11102,
- 0x11127, 0x11134,
- 0x11173, 0x11173,
- 0x11180, 0x11182,
- 0x111b3, 0x111c0,
- 0x1122c, 0x11237,
- 0x112df, 0x112ea,
- 0x11301, 0x11303,
- 0x1133c, 0x1133c,
- 0x1133e, 0x11344,
- 0x11347, 0x11348,
- 0x1134b, 0x1134d,
- 0x11357, 0x11357,
- 0x11362, 0x11363,
- 0x11366, 0x1136c,
- 0x11370, 0x11374,
- 0x114b0, 0x114c3,
- 0x115af, 0x115b5,
- 0x115b8, 0x115c0,
- 0x11630, 0x11640,
- 0x116ab, 0x116b7,
- 0x16af0, 0x16af4,
- 0x16b30, 0x16b36,
- 0x16f51, 0x16f7e,
- 0x16f8f, 0x16f92,
- 0x1bc9d, 0x1bc9e,
- 0x1d165, 0x1d169,
- 0x1d16d, 0x1d172,
- 0x1d17b, 0x1d182,
- 0x1d185, 0x1d18b,
- 0x1d1aa, 0x1d1ad,
- 0x1d242, 0x1d244,
- 0x1e8d0, 0x1e8d6,
- 0xe0100, 0xe01ef
-};
-UCP_FN(M)
-
-static const unichar ucp_Mc_def[] = {
- 0x903, 0x903,
- 0x93b, 0x93b,
- 0x93e, 0x940,
- 0x949, 0x94c,
- 0x94e, 0x94f,
- 0x982, 0x983,
- 0x9be, 0x9c0,
- 0x9c7, 0x9c8,
- 0x9cb, 0x9cc,
- 0x9d7, 0x9d7,
- 0xa03, 0xa03,
- 0xa3e, 0xa40,
- 0xa83, 0xa83,
- 0xabe, 0xac0,
- 0xac9, 0xac9,
- 0xacb, 0xacc,
- 0xb02, 0xb03,
- 0xb3e, 0xb3e,
- 0xb40, 0xb40,
- 0xb47, 0xb48,
- 0xb4b, 0xb4c,
- 0xb57, 0xb57,
- 0xbbe, 0xbbf,
- 0xbc1, 0xbc2,
- 0xbc6, 0xbc8,
- 0xbca, 0xbcc,
- 0xbd7, 0xbd7,
- 0xc01, 0xc03,
- 0xc41, 0xc44,
- 0xc82, 0xc83,
- 0xcbe, 0xcbe,
- 0xcc0, 0xcc4,
- 0xcc7, 0xcc8,
- 0xcca, 0xccb,
- 0xcd5, 0xcd6,
- 0xd02, 0xd03,
- 0xd3e, 0xd40,
- 0xd46, 0xd48,
- 0xd4a, 0xd4c,
- 0xd57, 0xd57,
- 0xd82, 0xd83,
- 0xdcf, 0xdd1,
- 0xdd8, 0xddf,
- 0xdf2, 0xdf3,
- 0xf3e, 0xf3f,
- 0xf7f, 0xf7f,
- 0x102b, 0x102c,
- 0x1031, 0x1031,
- 0x1038, 0x1038,
- 0x103b, 0x103c,
- 0x1056, 0x1057,
- 0x1062, 0x1064,
- 0x1067, 0x106d,
- 0x1083, 0x1084,
- 0x1087, 0x108c,
- 0x108f, 0x108f,
- 0x109a, 0x109c,
- 0x17b6, 0x17b6,
- 0x17be, 0x17c5,
- 0x17c7, 0x17c8,
- 0x1923, 0x1926,
- 0x1929, 0x192b,
- 0x1930, 0x1931,
- 0x1933, 0x1938,
- 0x19b0, 0x19c0,
- 0x19c8, 0x19c9,
- 0x1a19, 0x1a1a,
- 0x1a55, 0x1a55,
- 0x1a57, 0x1a57,
- 0x1a61, 0x1a61,
- 0x1a63, 0x1a64,
- 0x1a6d, 0x1a72,
- 0x1b04, 0x1b04,
- 0x1b35, 0x1b35,
- 0x1b3b, 0x1b3b,
- 0x1b3d, 0x1b41,
- 0x1b43, 0x1b44,
- 0x1b82, 0x1b82,
- 0x1ba1, 0x1ba1,
- 0x1ba6, 0x1ba7,
- 0x1baa, 0x1baa,
- 0x1be7, 0x1be7,
- 0x1bea, 0x1bec,
- 0x1bee, 0x1bee,
- 0x1bf2, 0x1bf3,
- 0x1c24, 0x1c2b,
- 0x1c34, 0x1c35,
- 0x1ce1, 0x1ce1,
- 0x1cf2, 0x1cf3,
- 0x302e, 0x302f,
- 0xa823, 0xa824,
- 0xa827, 0xa827,
- 0xa880, 0xa881,
- 0xa8b4, 0xa8c3,
- 0xa952, 0xa953,
- 0xa983, 0xa983,
- 0xa9b4, 0xa9b5,
- 0xa9ba, 0xa9bb,
- 0xa9bd, 0xa9c0,
- 0xaa2f, 0xaa30,
- 0xaa33, 0xaa34,
- 0xaa4d, 0xaa4d,
- 0xaa7b, 0xaa7b,
- 0xaa7d, 0xaa7d,
- 0xaaeb, 0xaaeb,
- 0xaaee, 0xaaef,
- 0xaaf5, 0xaaf5,
- 0xabe3, 0xabe4,
- 0xabe6, 0xabe7,
- 0xabe9, 0xabea,
- 0xabec, 0xabec,
- 0x11000, 0x11000,
- 0x11002, 0x11002,
- 0x11082, 0x11082,
- 0x110b0, 0x110b2,
- 0x110b7, 0x110b8,
- 0x1112c, 0x1112c,
- 0x11182, 0x11182,
- 0x111b3, 0x111b5,
- 0x111bf, 0x111c0,
- 0x1122c, 0x1122e,
- 0x11232, 0x11233,
- 0x11235, 0x11235,
- 0x112e0, 0x112e2,
- 0x11302, 0x11303,
- 0x1133e, 0x1133f,
- 0x11341, 0x11344,
- 0x11347, 0x11348,
- 0x1134b, 0x1134d,
- 0x11357, 0x11357,
- 0x11362, 0x11363,
- 0x114b0, 0x114b2,
- 0x114b9, 0x114b9,
- 0x114bb, 0x114be,
- 0x114c1, 0x114c1,
- 0x115af, 0x115b1,
- 0x115b8, 0x115bb,
- 0x115be, 0x115be,
- 0x11630, 0x11632,
- 0x1163b, 0x1163c,
- 0x1163e, 0x1163e,
- 0x116ac, 0x116ac,
- 0x116ae, 0x116af,
- 0x116b6, 0x116b6,
- 0x16f51, 0x16f7e,
- 0x1d165, 0x1d166,
- 0x1d16d, 0x1d172
-};
-UCP_FN(Mc)
-
-static const unichar ucp_Me_def[] = {
- 0x488, 0x489,
- 0x1abe, 0x1abe,
- 0x20dd, 0x20e0,
- 0x20e2, 0x20e4,
- 0xa670, 0xa672
-};
-UCP_FN(Me)
-
-static const unichar ucp_Mn_def[] = {
- 0x300, 0x36f,
- 0x483, 0x487,
- 0x591, 0x5bd,
- 0x5bf, 0x5bf,
- 0x5c1, 0x5c2,
- 0x5c4, 0x5c5,
- 0x5c7, 0x5c7,
- 0x610, 0x61a,
- 0x64b, 0x65f,
- 0x670, 0x670,
- 0x6d6, 0x6dc,
- 0x6df, 0x6e4,
- 0x6e7, 0x6e8,
- 0x6ea, 0x6ed,
- 0x711, 0x711,
- 0x730, 0x74a,
- 0x7a6, 0x7b0,
- 0x7eb, 0x7f3,
- 0x816, 0x819,
- 0x81b, 0x823,
- 0x825, 0x827,
- 0x829, 0x82d,
- 0x859, 0x85b,
- 0x8e4, 0x902,
- 0x93a, 0x93a,
- 0x93c, 0x93c,
- 0x941, 0x948,
- 0x94d, 0x94d,
- 0x951, 0x957,
- 0x962, 0x963,
- 0x981, 0x981,
- 0x9bc, 0x9bc,
- 0x9c1, 0x9c4,
- 0x9cd, 0x9cd,
- 0x9e2, 0x9e3,
- 0xa01, 0xa02,
- 0xa3c, 0xa3c,
- 0xa41, 0xa42,
- 0xa47, 0xa48,
- 0xa4b, 0xa4d,
- 0xa51, 0xa51,
- 0xa70, 0xa71,
- 0xa75, 0xa75,
- 0xa81, 0xa82,
- 0xabc, 0xabc,
- 0xac1, 0xac5,
- 0xac7, 0xac8,
- 0xacd, 0xacd,
- 0xae2, 0xae3,
- 0xb01, 0xb01,
- 0xb3c, 0xb3c,
- 0xb3f, 0xb3f,
- 0xb41, 0xb44,
- 0xb4d, 0xb4d,
- 0xb56, 0xb56,
- 0xb62, 0xb63,
- 0xb82, 0xb82,
- 0xbc0, 0xbc0,
- 0xbcd, 0xbcd,
- 0xc00, 0xc00,
- 0xc3e, 0xc40,
- 0xc46, 0xc48,
- 0xc4a, 0xc4d,
- 0xc55, 0xc56,
- 0xc62, 0xc63,
- 0xc81, 0xc81,
- 0xcbc, 0xcbc,
- 0xcbf, 0xcbf,
- 0xcc6, 0xcc6,
- 0xccc, 0xccd,
- 0xce2, 0xce3,
- 0xd01, 0xd01,
- 0xd41, 0xd44,
- 0xd4d, 0xd4d,
- 0xd62, 0xd63,
- 0xdca, 0xdca,
- 0xdd2, 0xdd4,
- 0xdd6, 0xdd6,
- 0xe31, 0xe31,
- 0xe34, 0xe3a,
- 0xe47, 0xe4e,
- 0xeb1, 0xeb1,
- 0xeb4, 0xeb9,
- 0xebb, 0xebc,
- 0xec8, 0xecd,
- 0xf18, 0xf19,
- 0xf35, 0xf35,
- 0xf37, 0xf37,
- 0xf39, 0xf39,
- 0xf71, 0xf7e,
- 0xf80, 0xf84,
- 0xf86, 0xf87,
- 0xf8d, 0xf97,
- 0xf99, 0xfbc,
- 0xfc6, 0xfc6,
- 0x102d, 0x1030,
- 0x1032, 0x1037,
- 0x1039, 0x103a,
- 0x103d, 0x103e,
- 0x1058, 0x1059,
- 0x105e, 0x1060,
- 0x1071, 0x1074,
- 0x1082, 0x1082,
- 0x1085, 0x1086,
- 0x108d, 0x108d,
- 0x109d, 0x109d,
- 0x135d, 0x135f,
- 0x1712, 0x1714,
- 0x1732, 0x1734,
- 0x1752, 0x1753,
- 0x1772, 0x1773,
- 0x17b4, 0x17b5,
- 0x17b7, 0x17bd,
- 0x17c6, 0x17c6,
- 0x17c9, 0x17d3,
- 0x17dd, 0x17dd,
- 0x180b, 0x180d,
- 0x18a9, 0x18a9,
- 0x1920, 0x1922,
- 0x1927, 0x1928,
- 0x1932, 0x1932,
- 0x1939, 0x193b,
- 0x1a17, 0x1a18,
- 0x1a1b, 0x1a1b,
- 0x1a56, 0x1a56,
- 0x1a58, 0x1a5e,
- 0x1a60, 0x1a60,
- 0x1a62, 0x1a62,
- 0x1a65, 0x1a6c,
- 0x1a73, 0x1a7c,
- 0x1a7f, 0x1a7f,
- 0x1ab0, 0x1abd,
- 0x1b00, 0x1b03,
- 0x1b34, 0x1b34,
- 0x1b36, 0x1b3a,
- 0x1b3c, 0x1b3c,
- 0x1b42, 0x1b42,
- 0x1b6b, 0x1b73,
- 0x1b80, 0x1b81,
- 0x1ba2, 0x1ba5,
- 0x1ba8, 0x1ba9,
- 0x1bab, 0x1bad,
- 0x1be6, 0x1be6,
- 0x1be8, 0x1be9,
- 0x1bed, 0x1bed,
- 0x1bef, 0x1bf1,
- 0x1c2c, 0x1c33,
- 0x1c36, 0x1c37,
- 0x1cd0, 0x1cd2,
- 0x1cd4, 0x1ce0,
- 0x1ce2, 0x1ce8,
- 0x1ced, 0x1ced,
- 0x1cf4, 0x1cf4,
- 0x1cf8, 0x1cf9,
- 0x1dc0, 0x1df5,
- 0x1dfc, 0x1dff,
- 0x20d0, 0x20dc,
- 0x20e1, 0x20e1,
- 0x20e5, 0x20f0,
- 0x2cef, 0x2cf1,
- 0x2d7f, 0x2d7f,
- 0x2de0, 0x2dff,
- 0x302a, 0x302d,
- 0x3099, 0x309a,
- 0xa66f, 0xa66f,
- 0xa674, 0xa67d,
- 0xa69f, 0xa69f,
- 0xa6f0, 0xa6f1,
- 0xa802, 0xa802,
- 0xa806, 0xa806,
- 0xa80b, 0xa80b,
- 0xa825, 0xa826,
- 0xa8c4, 0xa8c4,
- 0xa8e0, 0xa8f1,
- 0xa926, 0xa92d,
- 0xa947, 0xa951,
- 0xa980, 0xa982,
- 0xa9b3, 0xa9b3,
- 0xa9b6, 0xa9b9,
- 0xa9bc, 0xa9bc,
- 0xa9e5, 0xa9e5,
- 0xaa29, 0xaa2e,
- 0xaa31, 0xaa32,
- 0xaa35, 0xaa36,
- 0xaa43, 0xaa43,
- 0xaa4c, 0xaa4c,
- 0xaa7c, 0xaa7c,
- 0xaab0, 0xaab0,
- 0xaab2, 0xaab4,
- 0xaab7, 0xaab8,
- 0xaabe, 0xaabf,
- 0xaac1, 0xaac1,
- 0xaaec, 0xaaed,
- 0xaaf6, 0xaaf6,
- 0xabe5, 0xabe5,
- 0xabe8, 0xabe8,
- 0xabed, 0xabed,
- 0xfb1e, 0xfb1e,
- 0xfe00, 0xfe0f,
- 0xfe20, 0xfe2d,
- 0x101fd, 0x101fd,
- 0x102e0, 0x102e0,
- 0x10376, 0x1037a,
- 0x10a01, 0x10a03,
- 0x10a05, 0x10a06,
- 0x10a0c, 0x10a0f,
- 0x10a38, 0x10a3a,
- 0x10a3f, 0x10a3f,
- 0x10ae5, 0x10ae6,
- 0x11001, 0x11001,
- 0x11038, 0x11046,
- 0x1107f, 0x11081,
- 0x110b3, 0x110b6,
- 0x110b9, 0x110ba,
- 0x11100, 0x11102,
- 0x11127, 0x1112b,
- 0x1112d, 0x11134,
- 0x11173, 0x11173,
- 0x11180, 0x11181,
- 0x111b6, 0x111be,
- 0x1122f, 0x11231,
- 0x11234, 0x11234,
- 0x11236, 0x11237,
- 0x112df, 0x112df,
- 0x112e3, 0x112ea,
- 0x11301, 0x11301,
- 0x1133c, 0x1133c,
- 0x11340, 0x11340,
- 0x11366, 0x1136c,
- 0x11370, 0x11374,
- 0x114b3, 0x114b8,
- 0x114ba, 0x114ba,
- 0x114bf, 0x114c0,
- 0x114c2, 0x114c3,
- 0x115b2, 0x115b5,
- 0x115bc, 0x115bd,
- 0x115bf, 0x115c0,
- 0x11633, 0x1163a,
- 0x1163d, 0x1163d,
- 0x1163f, 0x11640,
- 0x116ab, 0x116ab,
- 0x116ad, 0x116ad,
- 0x116b0, 0x116b5,
- 0x116b7, 0x116b7,
- 0x16af0, 0x16af4,
- 0x16b30, 0x16b36,
- 0x16f8f, 0x16f92,
- 0x1bc9d, 0x1bc9e,
- 0x1d167, 0x1d169,
- 0x1d17b, 0x1d182,
- 0x1d185, 0x1d18b,
- 0x1d1aa, 0x1d1ad,
- 0x1d242, 0x1d244,
- 0x1e8d0, 0x1e8d6,
- 0xe0100, 0xe01ef
-};
-UCP_FN(Mn)
-
-static const unichar ucp_N_def[] = {
- 0x30, 0x39,
- 0xb2, 0xb3,
- 0xb9, 0xb9,
- 0xbc, 0xbe,
- 0x660, 0x669,
- 0x6f0, 0x6f9,
- 0x7c0, 0x7c9,
- 0x966, 0x96f,
- 0x9e6, 0x9ef,
- 0x9f4, 0x9f9,
- 0xa66, 0xa6f,
- 0xae6, 0xaef,
- 0xb66, 0xb6f,
- 0xb72, 0xb77,
- 0xbe6, 0xbf2,
- 0xc66, 0xc6f,
- 0xc78, 0xc7e,
- 0xce6, 0xcef,
- 0xd66, 0xd75,
- 0xde6, 0xdef,
- 0xe50, 0xe59,
- 0xed0, 0xed9,
- 0xf20, 0xf33,
- 0x1040, 0x1049,
- 0x1090, 0x1099,
- 0x1369, 0x137c,
- 0x16ee, 0x16f0,
- 0x17e0, 0x17e9,
- 0x17f0, 0x17f9,
- 0x1810, 0x1819,
- 0x1946, 0x194f,
- 0x19d0, 0x19da,
- 0x1a80, 0x1a89,
- 0x1a90, 0x1a99,
- 0x1b50, 0x1b59,
- 0x1bb0, 0x1bb9,
- 0x1c40, 0x1c49,
- 0x1c50, 0x1c59,
- 0x2070, 0x2070,
- 0x2074, 0x2079,
- 0x2080, 0x2089,
- 0x2150, 0x2182,
- 0x2185, 0x2189,
- 0x2460, 0x249b,
- 0x24ea, 0x24ff,
- 0x2776, 0x2793,
- 0x2cfd, 0x2cfd,
- 0x3007, 0x3007,
- 0x3021, 0x3029,
- 0x3038, 0x303a,
- 0x3192, 0x3195,
- 0x3220, 0x3229,
- 0x3248, 0x324f,
- 0x3251, 0x325f,
- 0x3280, 0x3289,
- 0x32b1, 0x32bf,
- 0xa620, 0xa629,
- 0xa6e6, 0xa6ef,
- 0xa830, 0xa835,
- 0xa8d0, 0xa8d9,
- 0xa900, 0xa909,
- 0xa9d0, 0xa9d9,
- 0xa9f0, 0xa9f9,
- 0xaa50, 0xaa59,
- 0xabf0, 0xabf9,
- 0xff10, 0xff19,
- 0x10107, 0x10133,
- 0x10140, 0x10178,
- 0x1018a, 0x1018b,
- 0x102e1, 0x102fb,
- 0x10320, 0x10323,
- 0x10341, 0x10341,
- 0x1034a, 0x1034a,
- 0x103d1, 0x103d5,
- 0x104a0, 0x104a9,
- 0x10858, 0x1085f,
- 0x10879, 0x1087f,
- 0x108a7, 0x108af,
- 0x10916, 0x1091b,
- 0x10a40, 0x10a47,
- 0x10a7d, 0x10a7e,
- 0x10a9d, 0x10a9f,
- 0x10aeb, 0x10aef,
- 0x10b58, 0x10b5f,
- 0x10b78, 0x10b7f,
- 0x10ba9, 0x10baf,
- 0x10e60, 0x10e7e,
- 0x11052, 0x1106f,
- 0x110f0, 0x110f9,
- 0x11136, 0x1113f,
- 0x111d0, 0x111d9,
- 0x111e1, 0x111f4,
- 0x112f0, 0x112f9,
- 0x114d0, 0x114d9,
- 0x11650, 0x11659,
- 0x116c0, 0x116c9,
- 0x118e0, 0x118f2,
- 0x12400, 0x1246e,
- 0x16a60, 0x16a69,
- 0x16b50, 0x16b59,
- 0x16b5b, 0x16b61,
- 0x1d360, 0x1d371,
- 0x1d7ce, 0x1d7ff,
- 0x1e8c7, 0x1e8cf,
- 0x1f100, 0x1f10c
-};
-UCP_FN(N)
-
-static const unichar ucp_Nd_def[] = {
- 0x30, 0x39,
- 0x660, 0x669,
- 0x6f0, 0x6f9,
- 0x7c0, 0x7c9,
- 0x966, 0x96f,
- 0x9e6, 0x9ef,
- 0xa66, 0xa6f,
- 0xae6, 0xaef,
- 0xb66, 0xb6f,
- 0xbe6, 0xbef,
- 0xc66, 0xc6f,
- 0xce6, 0xcef,
- 0xd66, 0xd6f,
- 0xde6, 0xdef,
- 0xe50, 0xe59,
- 0xed0, 0xed9,
- 0xf20, 0xf29,
- 0x1040, 0x1049,
- 0x1090, 0x1099,
- 0x17e0, 0x17e9,
- 0x1810, 0x1819,
- 0x1946, 0x194f,
- 0x19d0, 0x19d9,
- 0x1a80, 0x1a89,
- 0x1a90, 0x1a99,
- 0x1b50, 0x1b59,
- 0x1bb0, 0x1bb9,
- 0x1c40, 0x1c49,
- 0x1c50, 0x1c59,
- 0xa620, 0xa629,
- 0xa8d0, 0xa8d9,
- 0xa900, 0xa909,
- 0xa9d0, 0xa9d9,
- 0xa9f0, 0xa9f9,
- 0xaa50, 0xaa59,
- 0xabf0, 0xabf9,
- 0xff10, 0xff19,
- 0x104a0, 0x104a9,
- 0x11066, 0x1106f,
- 0x110f0, 0x110f9,
- 0x11136, 0x1113f,
- 0x111d0, 0x111d9,
- 0x112f0, 0x112f9,
- 0x114d0, 0x114d9,
- 0x11650, 0x11659,
- 0x116c0, 0x116c9,
- 0x118e0, 0x118e9,
- 0x16a60, 0x16a69,
- 0x16b50, 0x16b59,
- 0x1d7ce, 0x1d7ff
-};
-UCP_FN(Nd)
-
-static const unichar ucp_Nl_def[] = {
- 0x16ee, 0x16f0,
- 0x2160, 0x2182,
- 0x2185, 0x2188,
- 0x3007, 0x3007,
- 0x3021, 0x3029,
- 0x3038, 0x303a,
- 0xa6e6, 0xa6ef,
- 0x10140, 0x10174,
- 0x10341, 0x10341,
- 0x1034a, 0x1034a,
- 0x103d1, 0x103d5,
- 0x12400, 0x1246e
-};
-UCP_FN(Nl)
-
-static const unichar ucp_No_def[] = {
- 0xb2, 0xb3,
- 0xb9, 0xb9,
- 0xbc, 0xbe,
- 0x9f4, 0x9f9,
- 0xb72, 0xb77,
- 0xbf0, 0xbf2,
- 0xc78, 0xc7e,
- 0xd70, 0xd75,
- 0xf2a, 0xf33,
- 0x1369, 0x137c,
- 0x17f0, 0x17f9,
- 0x19da, 0x19da,
- 0x2070, 0x2070,
- 0x2074, 0x2079,
- 0x2080, 0x2089,
- 0x2150, 0x215f,
- 0x2189, 0x2189,
- 0x2460, 0x249b,
- 0x24ea, 0x24ff,
- 0x2776, 0x2793,
- 0x2cfd, 0x2cfd,
- 0x3192, 0x3195,
- 0x3220, 0x3229,
- 0x3248, 0x324f,
- 0x3251, 0x325f,
- 0x3280, 0x3289,
- 0x32b1, 0x32bf,
- 0xa830, 0xa835,
- 0x10107, 0x10133,
- 0x10175, 0x10178,
- 0x1018a, 0x1018b,
- 0x102e1, 0x102fb,
- 0x10320, 0x10323,
- 0x10858, 0x1085f,
- 0x10879, 0x1087f,
- 0x108a7, 0x108af,
- 0x10916, 0x1091b,
- 0x10a40, 0x10a47,
- 0x10a7d, 0x10a7e,
- 0x10a9d, 0x10a9f,
- 0x10aeb, 0x10aef,
- 0x10b58, 0x10b5f,
- 0x10b78, 0x10b7f,
- 0x10ba9, 0x10baf,
- 0x10e60, 0x10e7e,
- 0x11052, 0x11065,
- 0x111e1, 0x111f4,
- 0x118ea, 0x118f2,
- 0x16b5b, 0x16b61,
- 0x1d360, 0x1d371,
- 0x1e8c7, 0x1e8cf,
- 0x1f100, 0x1f10c
-};
-UCP_FN(No)
-
-static const unichar ucp_P_def[] = {
- 0x21, 0x23,
- 0x25, 0x2a,
- 0x2c, 0x2f,
- 0x3a, 0x3b,
- 0x3f, 0x40,
- 0x5b, 0x5d,
- 0x5f, 0x5f,
- 0x7b, 0x7b,
- 0x7d, 0x7d,
- 0xa1, 0xa1,
- 0xa7, 0xa7,
- 0xab, 0xab,
- 0xb6, 0xb7,
- 0xbb, 0xbb,
- 0xbf, 0xbf,
- 0x37e, 0x37e,
- 0x387, 0x387,
- 0x55a, 0x55f,
- 0x589, 0x58a,
- 0x5be, 0x5be,
- 0x5c0, 0x5c0,
- 0x5c3, 0x5c3,
- 0x5c6, 0x5c6,
- 0x5f3, 0x5f4,
- 0x609, 0x60a,
- 0x60c, 0x60d,
- 0x61b, 0x61b,
- 0x61e, 0x61f,
- 0x66a, 0x66d,
- 0x6d4, 0x6d4,
- 0x700, 0x70d,
- 0x7f7, 0x7f9,
- 0x830, 0x83e,
- 0x85e, 0x85e,
- 0x964, 0x965,
- 0x970, 0x970,
- 0xaf0, 0xaf0,
- 0xdf4, 0xdf4,
- 0xe4f, 0xe4f,
- 0xe5a, 0xe5b,
- 0xf04, 0xf12,
- 0xf14, 0xf14,
- 0xf3a, 0xf3d,
- 0xf85, 0xf85,
- 0xfd0, 0xfd4,
- 0xfd9, 0xfda,
- 0x104a, 0x104f,
- 0x10fb, 0x10fb,
- 0x1360, 0x1368,
- 0x1400, 0x1400,
- 0x166d, 0x166e,
- 0x169b, 0x169c,
- 0x16eb, 0x16ed,
- 0x1735, 0x1736,
- 0x17d4, 0x17d6,
- 0x17d8, 0x17da,
- 0x1800, 0x180a,
- 0x1944, 0x1945,
- 0x1a1e, 0x1a1f,
- 0x1aa0, 0x1aa6,
- 0x1aa8, 0x1aad,
- 0x1b5a, 0x1b60,
- 0x1bfc, 0x1bff,
- 0x1c3b, 0x1c3f,
- 0x1c7e, 0x1c7f,
- 0x1cc0, 0x1cc7,
- 0x1cd3, 0x1cd3,
- 0x2010, 0x2027,
- 0x2030, 0x2043,
- 0x2045, 0x2051,
- 0x2053, 0x205e,
- 0x207d, 0x207e,
- 0x208d, 0x208e,
- 0x2308, 0x230b,
- 0x2329, 0x232a,
- 0x2768, 0x2775,
- 0x27c5, 0x27c6,
- 0x27e6, 0x27ef,
- 0x2983, 0x2998,
- 0x29d8, 0x29db,
- 0x29fc, 0x29fd,
- 0x2cf9, 0x2cfc,
- 0x2cfe, 0x2cff,
- 0x2d70, 0x2d70,
- 0x2e00, 0x2e2e,
- 0x2e30, 0x2e42,
- 0x3001, 0x3003,
- 0x3008, 0x3011,
- 0x3014, 0x301f,
- 0x3030, 0x3030,
- 0x303d, 0x303d,
- 0x30a0, 0x30a0,
- 0x30fb, 0x30fb,
- 0xa4fe, 0xa4ff,
- 0xa60d, 0xa60f,
- 0xa673, 0xa673,
- 0xa67e, 0xa67e,
- 0xa6f2, 0xa6f7,
- 0xa874, 0xa877,
- 0xa8ce, 0xa8cf,
- 0xa8f8, 0xa8fa,
- 0xa92e, 0xa92f,
- 0xa95f, 0xa95f,
- 0xa9c1, 0xa9cd,
- 0xa9de, 0xa9df,
- 0xaa5c, 0xaa5f,
- 0xaade, 0xaadf,
- 0xaaf0, 0xaaf1,
- 0xabeb, 0xabeb,
- 0xfd3e, 0xfd3f,
- 0xfe10, 0xfe19,
- 0xfe30, 0xfe52,
- 0xfe54, 0xfe61,
- 0xfe63, 0xfe63,
- 0xfe68, 0xfe68,
- 0xfe6a, 0xfe6b,
- 0xff01, 0xff03,
- 0xff05, 0xff0a,
- 0xff0c, 0xff0f,
- 0xff1a, 0xff1b,
- 0xff1f, 0xff20,
- 0xff3b, 0xff3d,
- 0xff3f, 0xff3f,
- 0xff5b, 0xff5b,
- 0xff5d, 0xff5d,
- 0xff5f, 0xff65,
- 0x10100, 0x10102,
- 0x1039f, 0x1039f,
- 0x103d0, 0x103d0,
- 0x1056f, 0x1056f,
- 0x10857, 0x10857,
- 0x1091f, 0x1091f,
- 0x1093f, 0x1093f,
- 0x10a50, 0x10a58,
- 0x10a7f, 0x10a7f,
- 0x10af0, 0x10af6,
- 0x10b39, 0x10b3f,
- 0x10b99, 0x10b9c,
- 0x11047, 0x1104d,
- 0x110bb, 0x110bc,
- 0x110be, 0x110c1,
- 0x11140, 0x11143,
- 0x11174, 0x11175,
- 0x111c5, 0x111c8,
- 0x111cd, 0x111cd,
- 0x11238, 0x1123d,
- 0x114c6, 0x114c6,
- 0x115c1, 0x115c9,
- 0x11641, 0x11643,
- 0x12470, 0x12474,
- 0x16a6e, 0x16a6f,
- 0x16af5, 0x16af5,
- 0x16b37, 0x16b3b,
- 0x16b44, 0x16b44,
- 0x1bc9f, 0x1bc9f
-};
-UCP_FN(P)
-
-static const unichar ucp_Pc_def[] = {
- 0x5f, 0x5f,
- 0x203f, 0x2040,
- 0x2054, 0x2054,
- 0xfe33, 0xfe34,
- 0xfe4d, 0xfe4f,
- 0xff3f, 0xff3f
-};
-UCP_FN(Pc)
-
-static const unichar ucp_Pd_def[] = {
- 0x2d, 0x2d,
- 0x58a, 0x58a,
- 0x5be, 0x5be,
- 0x1400, 0x1400,
- 0x1806, 0x1806,
- 0x2010, 0x2015,
- 0x2e17, 0x2e17,
- 0x2e1a, 0x2e1a,
- 0x2e3a, 0x2e3b,
- 0x2e40, 0x2e40,
- 0x301c, 0x301c,
- 0x3030, 0x3030,
- 0x30a0, 0x30a0,
- 0xfe31, 0xfe32,
- 0xfe58, 0xfe58,
- 0xfe63, 0xfe63,
- 0xff0d, 0xff0d
-};
-UCP_FN(Pd)
-
-static const unichar ucp_Pe_def[] = {
- 0x29, 0x29,
- 0x5d, 0x5d,
- 0x7d, 0x7d,
- 0xf3b, 0xf3b,
- 0xf3d, 0xf3d,
- 0x169c, 0x169c,
- 0x2046, 0x2046,
- 0x207e, 0x207e,
- 0x208e, 0x208e,
- 0x2309, 0x2309,
- 0x230b, 0x230b,
- 0x232a, 0x232a,
- 0x2769, 0x2769,
- 0x276b, 0x276b,
- 0x276d, 0x276d,
- 0x276f, 0x276f,
- 0x2771, 0x2771,
- 0x2773, 0x2773,
- 0x2775, 0x2775,
- 0x27c6, 0x27c6,
- 0x27e7, 0x27e7,
- 0x27e9, 0x27e9,
- 0x27eb, 0x27eb,
- 0x27ed, 0x27ed,
- 0x27ef, 0x27ef,
- 0x2984, 0x2984,
- 0x2986, 0x2986,
- 0x2988, 0x2988,
- 0x298a, 0x298a,
- 0x298c, 0x298c,
- 0x298e, 0x298e,
- 0x2990, 0x2990,
- 0x2992, 0x2992,
- 0x2994, 0x2994,
- 0x2996, 0x2996,
- 0x2998, 0x2998,
- 0x29d9, 0x29d9,
- 0x29db, 0x29db,
- 0x29fd, 0x29fd,
- 0x2e23, 0x2e23,
- 0x2e25, 0x2e25,
- 0x2e27, 0x2e27,
- 0x2e29, 0x2e29,
- 0x3009, 0x3009,
- 0x300b, 0x300b,
- 0x300d, 0x300d,
- 0x300f, 0x300f,
- 0x3011, 0x3011,
- 0x3015, 0x3015,
- 0x3017, 0x3017,
- 0x3019, 0x3019,
- 0x301b, 0x301b,
- 0x301e, 0x301f,
- 0xfd3e, 0xfd3e,
- 0xfe18, 0xfe18,
- 0xfe36, 0xfe36,
- 0xfe38, 0xfe38,
- 0xfe3a, 0xfe3a,
- 0xfe3c, 0xfe3c,
- 0xfe3e, 0xfe3e,
- 0xfe40, 0xfe40,
- 0xfe42, 0xfe42,
- 0xfe44, 0xfe44,
- 0xfe48, 0xfe48,
- 0xfe5a, 0xfe5a,
- 0xfe5c, 0xfe5c,
- 0xfe5e, 0xfe5e,
- 0xff09, 0xff09,
- 0xff3d, 0xff3d,
- 0xff5d, 0xff5d,
- 0xff60, 0xff60,
- 0xff63, 0xff63
-};
-UCP_FN(Pe)
-
-static const unichar ucp_Pf_def[] = {
- 0xbb, 0xbb,
- 0x2019, 0x2019,
- 0x201d, 0x201d,
- 0x203a, 0x203a,
- 0x2e03, 0x2e03,
- 0x2e05, 0x2e05,
- 0x2e0a, 0x2e0a,
- 0x2e0d, 0x2e0d,
- 0x2e1d, 0x2e1d,
- 0x2e21, 0x2e21
-};
-UCP_FN(Pf)
-
-static const unichar ucp_Pi_def[] = {
- 0xab, 0xab,
- 0x2018, 0x2018,
- 0x201b, 0x201c,
- 0x201f, 0x201f,
- 0x2039, 0x2039,
- 0x2e02, 0x2e02,
- 0x2e04, 0x2e04,
- 0x2e09, 0x2e09,
- 0x2e0c, 0x2e0c,
- 0x2e1c, 0x2e1c,
- 0x2e20, 0x2e20
-};
-UCP_FN(Pi)
-
-static const unichar ucp_Po_def[] = {
- 0x21, 0x23,
- 0x25, 0x27,
- 0x2a, 0x2a,
- 0x2c, 0x2c,
- 0x2e, 0x2f,
- 0x3a, 0x3b,
- 0x3f, 0x40,
- 0x5c, 0x5c,
- 0xa1, 0xa1,
- 0xa7, 0xa7,
- 0xb6, 0xb7,
- 0xbf, 0xbf,
- 0x37e, 0x37e,
- 0x387, 0x387,
- 0x55a, 0x55f,
- 0x589, 0x589,
- 0x5c0, 0x5c0,
- 0x5c3, 0x5c3,
- 0x5c6, 0x5c6,
- 0x5f3, 0x5f4,
- 0x609, 0x60a,
- 0x60c, 0x60d,
- 0x61b, 0x61b,
- 0x61e, 0x61f,
- 0x66a, 0x66d,
- 0x6d4, 0x6d4,
- 0x700, 0x70d,
- 0x7f7, 0x7f9,
- 0x830, 0x83e,
- 0x85e, 0x85e,
- 0x964, 0x965,
- 0x970, 0x970,
- 0xaf0, 0xaf0,
- 0xdf4, 0xdf4,
- 0xe4f, 0xe4f,
- 0xe5a, 0xe5b,
- 0xf04, 0xf12,
- 0xf14, 0xf14,
- 0xf85, 0xf85,
- 0xfd0, 0xfd4,
- 0xfd9, 0xfda,
- 0x104a, 0x104f,
- 0x10fb, 0x10fb,
- 0x1360, 0x1368,
- 0x166d, 0x166e,
- 0x16eb, 0x16ed,
- 0x1735, 0x1736,
- 0x17d4, 0x17d6,
- 0x17d8, 0x17da,
- 0x1800, 0x1805,
- 0x1807, 0x180a,
- 0x1944, 0x1945,
- 0x1a1e, 0x1a1f,
- 0x1aa0, 0x1aa6,
- 0x1aa8, 0x1aad,
- 0x1b5a, 0x1b60,
- 0x1bfc, 0x1bff,
- 0x1c3b, 0x1c3f,
- 0x1c7e, 0x1c7f,
- 0x1cc0, 0x1cc7,
- 0x1cd3, 0x1cd3,
- 0x2016, 0x2017,
- 0x2020, 0x2027,
- 0x2030, 0x2038,
- 0x203b, 0x203e,
- 0x2041, 0x2043,
- 0x2047, 0x2051,
- 0x2053, 0x2053,
- 0x2055, 0x205e,
- 0x2cf9, 0x2cfc,
- 0x2cfe, 0x2cff,
- 0x2d70, 0x2d70,
- 0x2e00, 0x2e01,
- 0x2e06, 0x2e08,
- 0x2e0b, 0x2e0b,
- 0x2e0e, 0x2e16,
- 0x2e18, 0x2e19,
- 0x2e1b, 0x2e1b,
- 0x2e1e, 0x2e1f,
- 0x2e2a, 0x2e2e,
- 0x2e30, 0x2e39,
- 0x2e3c, 0x2e3f,
- 0x2e41, 0x2e41,
- 0x3001, 0x3003,
- 0x303d, 0x303d,
- 0x30fb, 0x30fb,
- 0xa4fe, 0xa4ff,
- 0xa60d, 0xa60f,
- 0xa673, 0xa673,
- 0xa67e, 0xa67e,
- 0xa6f2, 0xa6f7,
- 0xa874, 0xa877,
- 0xa8ce, 0xa8cf,
- 0xa8f8, 0xa8fa,
- 0xa92e, 0xa92f,
- 0xa95f, 0xa95f,
- 0xa9c1, 0xa9cd,
- 0xa9de, 0xa9df,
- 0xaa5c, 0xaa5f,
- 0xaade, 0xaadf,
- 0xaaf0, 0xaaf1,
- 0xabeb, 0xabeb,
- 0xfe10, 0xfe16,
- 0xfe19, 0xfe19,
- 0xfe30, 0xfe30,
- 0xfe45, 0xfe46,
- 0xfe49, 0xfe4c,
- 0xfe50, 0xfe52,
- 0xfe54, 0xfe57,
- 0xfe5f, 0xfe61,
- 0xfe68, 0xfe68,
- 0xfe6a, 0xfe6b,
- 0xff01, 0xff03,
- 0xff05, 0xff07,
- 0xff0a, 0xff0a,
- 0xff0c, 0xff0c,
- 0xff0e, 0xff0f,
- 0xff1a, 0xff1b,
- 0xff1f, 0xff20,
- 0xff3c, 0xff3c,
- 0xff61, 0xff61,
- 0xff64, 0xff65,
- 0x10100, 0x10102,
- 0x1039f, 0x1039f,
- 0x103d0, 0x103d0,
- 0x1056f, 0x1056f,
- 0x10857, 0x10857,
- 0x1091f, 0x1091f,
- 0x1093f, 0x1093f,
- 0x10a50, 0x10a58,
- 0x10a7f, 0x10a7f,
- 0x10af0, 0x10af6,
- 0x10b39, 0x10b3f,
- 0x10b99, 0x10b9c,
- 0x11047, 0x1104d,
- 0x110bb, 0x110bc,
- 0x110be, 0x110c1,
- 0x11140, 0x11143,
- 0x11174, 0x11175,
- 0x111c5, 0x111c8,
- 0x111cd, 0x111cd,
- 0x11238, 0x1123d,
- 0x114c6, 0x114c6,
- 0x115c1, 0x115c9,
- 0x11641, 0x11643,
- 0x12470, 0x12474,
- 0x16a6e, 0x16a6f,
- 0x16af5, 0x16af5,
- 0x16b37, 0x16b3b,
- 0x16b44, 0x16b44,
- 0x1bc9f, 0x1bc9f
-};
-UCP_FN(Po)
-
-static const unichar ucp_Ps_def[] = {
- 0x28, 0x28,
- 0x5b, 0x5b,
- 0x7b, 0x7b,
- 0xf3a, 0xf3a,
- 0xf3c, 0xf3c,
- 0x169b, 0x169b,
- 0x201a, 0x201a,
- 0x201e, 0x201e,
- 0x2045, 0x2045,
- 0x207d, 0x207d,
- 0x208d, 0x208d,
- 0x2308, 0x2308,
- 0x230a, 0x230a,
- 0x2329, 0x2329,
- 0x2768, 0x2768,
- 0x276a, 0x276a,
- 0x276c, 0x276c,
- 0x276e, 0x276e,
- 0x2770, 0x2770,
- 0x2772, 0x2772,
- 0x2774, 0x2774,
- 0x27c5, 0x27c5,
- 0x27e6, 0x27e6,
- 0x27e8, 0x27e8,
- 0x27ea, 0x27ea,
- 0x27ec, 0x27ec,
- 0x27ee, 0x27ee,
- 0x2983, 0x2983,
- 0x2985, 0x2985,
- 0x2987, 0x2987,
- 0x2989, 0x2989,
- 0x298b, 0x298b,
- 0x298d, 0x298d,
- 0x298f, 0x298f,
- 0x2991, 0x2991,
- 0x2993, 0x2993,
- 0x2995, 0x2995,
- 0x2997, 0x2997,
- 0x29d8, 0x29d8,
- 0x29da, 0x29da,
- 0x29fc, 0x29fc,
- 0x2e22, 0x2e22,
- 0x2e24, 0x2e24,
- 0x2e26, 0x2e26,
- 0x2e28, 0x2e28,
- 0x2e42, 0x2e42,
- 0x3008, 0x3008,
- 0x300a, 0x300a,
- 0x300c, 0x300c,
- 0x300e, 0x300e,
- 0x3010, 0x3010,
- 0x3014, 0x3014,
- 0x3016, 0x3016,
- 0x3018, 0x3018,
- 0x301a, 0x301a,
- 0x301d, 0x301d,
- 0xfd3f, 0xfd3f,
- 0xfe17, 0xfe17,
- 0xfe35, 0xfe35,
- 0xfe37, 0xfe37,
- 0xfe39, 0xfe39,
- 0xfe3b, 0xfe3b,
- 0xfe3d, 0xfe3d,
- 0xfe3f, 0xfe3f,
- 0xfe41, 0xfe41,
- 0xfe43, 0xfe43,
- 0xfe47, 0xfe47,
- 0xfe59, 0xfe59,
- 0xfe5b, 0xfe5b,
- 0xfe5d, 0xfe5d,
- 0xff08, 0xff08,
- 0xff3b, 0xff3b,
- 0xff5b, 0xff5b,
- 0xff5f, 0xff5f,
- 0xff62, 0xff62
-};
-UCP_FN(Ps)
-
-static const unichar ucp_S_def[] = {
- 0x24, 0x24,
- 0x2b, 0x2b,
- 0x3c, 0x3e,
- 0x5e, 0x5e,
- 0x60, 0x60,
- 0x7c, 0x7c,
- 0x7e, 0x7e,
- 0xa2, 0xa6,
- 0xa8, 0xa9,
- 0xac, 0xac,
- 0xae, 0xb1,
- 0xb4, 0xb4,
- 0xb8, 0xb8,
- 0xd7, 0xd7,
- 0xf7, 0xf7,
- 0x2c2, 0x2c5,
- 0x2d2, 0x2df,
- 0x2e5, 0x2eb,
- 0x2ed, 0x2ed,
- 0x2ef, 0x2ff,
- 0x375, 0x375,
- 0x384, 0x385,
- 0x3f6, 0x3f6,
- 0x482, 0x482,
- 0x58d, 0x58f,
- 0x606, 0x608,
- 0x60b, 0x60b,
- 0x60e, 0x60f,
- 0x6de, 0x6de,
- 0x6e9, 0x6e9,
- 0x6fd, 0x6fe,
- 0x7f6, 0x7f6,
- 0x9f2, 0x9f3,
- 0x9fa, 0x9fb,
- 0xaf1, 0xaf1,
- 0xb70, 0xb70,
- 0xbf3, 0xbfa,
- 0xc7f, 0xc7f,
- 0xd79, 0xd79,
- 0xe3f, 0xe3f,
- 0xf01, 0xf03,
- 0xf13, 0xf13,
- 0xf15, 0xf17,
- 0xf1a, 0xf1f,
- 0xf34, 0xf34,
- 0xf36, 0xf36,
- 0xf38, 0xf38,
- 0xfbe, 0xfc5,
- 0xfc7, 0xfcc,
- 0xfce, 0xfcf,
- 0xfd5, 0xfd8,
- 0x109e, 0x109f,
- 0x1390, 0x1399,
- 0x17db, 0x17db,
- 0x1940, 0x1940,
- 0x19de, 0x19ff,
- 0x1b61, 0x1b6a,
- 0x1b74, 0x1b7c,
- 0x1fbd, 0x1fbd,
- 0x1fbf, 0x1fc1,
- 0x1fcd, 0x1fcf,
- 0x1fdd, 0x1fdf,
- 0x1fed, 0x1fef,
- 0x1ffd, 0x1ffe,
- 0x2044, 0x2044,
- 0x2052, 0x2052,
- 0x207a, 0x207c,
- 0x208a, 0x208c,
- 0x20a0, 0x20bd,
- 0x2100, 0x2101,
- 0x2103, 0x2106,
- 0x2108, 0x2109,
- 0x2114, 0x2114,
- 0x2116, 0x2118,
- 0x211e, 0x2123,
- 0x2125, 0x2125,
- 0x2127, 0x2127,
- 0x2129, 0x2129,
- 0x212e, 0x212e,
- 0x213a, 0x213b,
- 0x2140, 0x2144,
- 0x214a, 0x214d,
- 0x214f, 0x214f,
- 0x2190, 0x2307,
- 0x230c, 0x2328,
- 0x232b, 0x23fa,
- 0x2400, 0x2426,
- 0x2440, 0x244a,
- 0x249c, 0x24e9,
- 0x2500, 0x2767,
- 0x2794, 0x27c4,
- 0x27c7, 0x27e5,
- 0x27f0, 0x2982,
- 0x2999, 0x29d7,
- 0x29dc, 0x29fb,
- 0x29fe, 0x2b73,
- 0x2b76, 0x2b95,
- 0x2b98, 0x2bb9,
- 0x2bbd, 0x2bc8,
- 0x2bca, 0x2bd1,
- 0x2ce5, 0x2cea,
- 0x2e80, 0x2e99,
- 0x2e9b, 0x2ef3,
- 0x2f00, 0x2fd5,
- 0x2ff0, 0x2ffb,
- 0x3004, 0x3004,
- 0x3012, 0x3013,
- 0x3020, 0x3020,
- 0x3036, 0x3037,
- 0x303e, 0x303f,
- 0x309b, 0x309c,
- 0x3190, 0x3191,
- 0x3196, 0x319f,
- 0x31c0, 0x31e3,
- 0x3200, 0x321e,
- 0x322a, 0x3247,
- 0x3250, 0x3250,
- 0x3260, 0x327f,
- 0x328a, 0x32b0,
- 0x32c0, 0x32fe,
- 0x3300, 0x33ff,
- 0x4dc0, 0x4dff,
- 0xa490, 0xa4c6,
- 0xa700, 0xa716,
- 0xa720, 0xa721,
- 0xa789, 0xa78a,
- 0xa828, 0xa82b,
- 0xa836, 0xa839,
- 0xaa77, 0xaa79,
- 0xab5b, 0xab5b,
- 0xfb29, 0xfb29,
- 0xfbb2, 0xfbc1,
- 0xfdfc, 0xfdfd,
- 0xfe62, 0xfe62,
- 0xfe64, 0xfe66,
- 0xfe69, 0xfe69,
- 0xff04, 0xff04,
- 0xff0b, 0xff0b,
- 0xff1c, 0xff1e,
- 0xff3e, 0xff3e,
- 0xff40, 0xff40,
- 0xff5c, 0xff5c,
- 0xff5e, 0xff5e,
- 0xffe0, 0xffe6,
- 0xffe8, 0xffee,
- 0xfffc, 0xfffd,
- 0x10137, 0x1013f,
- 0x10179, 0x10189,
- 0x1018c, 0x1018c,
- 0x10190, 0x1019b,
- 0x101a0, 0x101a0,
- 0x101d0, 0x101fc,
- 0x10877, 0x10878,
- 0x10ac8, 0x10ac8,
- 0x16b3c, 0x16b3f,
- 0x16b45, 0x16b45,
- 0x1bc9c, 0x1bc9c,
- 0x1d000, 0x1d0f5,
- 0x1d100, 0x1d126,
- 0x1d129, 0x1d164,
- 0x1d16a, 0x1d16c,
- 0x1d183, 0x1d184,
- 0x1d18c, 0x1d1a9,
- 0x1d1ae, 0x1d1dd,
- 0x1d200, 0x1d241,
- 0x1d245, 0x1d245,
- 0x1d300, 0x1d356,
- 0x1d6c1, 0x1d6c1,
- 0x1d6db, 0x1d6db,
- 0x1d6fb, 0x1d6fb,
- 0x1d715, 0x1d715,
- 0x1d735, 0x1d735,
- 0x1d74f, 0x1d74f,
- 0x1d76f, 0x1d76f,
- 0x1d789, 0x1d789,
- 0x1d7a9, 0x1d7a9,
- 0x1d7c3, 0x1d7c3,
- 0x1eef0, 0x1eef1,
- 0x1f000, 0x1f02b,
- 0x1f030, 0x1f093,
- 0x1f0a0, 0x1f0ae,
- 0x1f0b1, 0x1f0bf,
- 0x1f0c1, 0x1f0cf,
- 0x1f0d1, 0x1f0f5,
- 0x1f110, 0x1f12e,
- 0x1f130, 0x1f16b,
- 0x1f170, 0x1f19a,
- 0x1f1e6, 0x1f202,
- 0x1f210, 0x1f23a,
- 0x1f240, 0x1f248,
- 0x1f250, 0x1f251,
- 0x1f300, 0x1f32c,
- 0x1f330, 0x1f37d,
- 0x1f380, 0x1f3ce,
- 0x1f3d4, 0x1f3f7,
- 0x1f400, 0x1f4fe,
- 0x1f500, 0x1f54a,
- 0x1f550, 0x1f579,
- 0x1f57b, 0x1f5a3,
- 0x1f5a5, 0x1f642,
- 0x1f645, 0x1f6cf,
- 0x1f6e0, 0x1f6ec,
- 0x1f6f0, 0x1f6f3,
- 0x1f700, 0x1f773,
- 0x1f780, 0x1f7d4,
- 0x1f800, 0x1f80b,
- 0x1f810, 0x1f847,
- 0x1f850, 0x1f859,
- 0x1f860, 0x1f887,
- 0x1f890, 0x1f8ad
-};
-UCP_FN(S)
-
-static const unichar ucp_Sc_def[] = {
- 0x24, 0x24,
- 0xa2, 0xa5,
- 0x58f, 0x58f,
- 0x60b, 0x60b,
- 0x9f2, 0x9f3,
- 0x9fb, 0x9fb,
- 0xaf1, 0xaf1,
- 0xbf9, 0xbf9,
- 0xe3f, 0xe3f,
- 0x17db, 0x17db,
- 0x20a0, 0x20bd,
- 0xa838, 0xa838,
- 0xfdfc, 0xfdfc,
- 0xfe69, 0xfe69,
- 0xff04, 0xff04,
- 0xffe0, 0xffe1,
- 0xffe5, 0xffe6
-};
-UCP_FN(Sc)
-
-static const unichar ucp_Sk_def[] = {
- 0x5e, 0x5e,
- 0x60, 0x60,
- 0xa8, 0xa8,
- 0xaf, 0xaf,
- 0xb4, 0xb4,
- 0xb8, 0xb8,
- 0x2c2, 0x2c5,
- 0x2d2, 0x2df,
- 0x2e5, 0x2eb,
- 0x2ed, 0x2ed,
- 0x2ef, 0x2ff,
- 0x375, 0x375,
- 0x384, 0x385,
- 0x1fbd, 0x1fbd,
- 0x1fbf, 0x1fc1,
- 0x1fcd, 0x1fcf,
- 0x1fdd, 0x1fdf,
- 0x1fed, 0x1fef,
- 0x1ffd, 0x1ffe,
- 0x309b, 0x309c,
- 0xa700, 0xa716,
- 0xa720, 0xa721,
- 0xa789, 0xa78a,
- 0xab5b, 0xab5b,
- 0xfbb2, 0xfbc1,
- 0xff3e, 0xff3e,
- 0xff40, 0xff40,
- 0xffe3, 0xffe3
-};
-UCP_FN(Sk)
-
-static const unichar ucp_Sm_def[] = {
- 0x2b, 0x2b,
- 0x3c, 0x3e,
- 0x7c, 0x7c,
- 0x7e, 0x7e,
- 0xac, 0xac,
- 0xb1, 0xb1,
- 0xd7, 0xd7,
- 0xf7, 0xf7,
- 0x3f6, 0x3f6,
- 0x606, 0x608,
- 0x2044, 0x2044,
- 0x2052, 0x2052,
- 0x207a, 0x207c,
- 0x208a, 0x208c,
- 0x2118, 0x2118,
- 0x2140, 0x2144,
- 0x214b, 0x214b,
- 0x2190, 0x2194,
- 0x219a, 0x219b,
- 0x21a0, 0x21a0,
- 0x21a3, 0x21a3,
- 0x21a6, 0x21a6,
- 0x21ae, 0x21ae,
- 0x21ce, 0x21cf,
- 0x21d2, 0x21d2,
- 0x21d4, 0x21d4,
- 0x21f4, 0x22ff,
- 0x2320, 0x2321,
- 0x237c, 0x237c,
- 0x239b, 0x23b3,
- 0x23dc, 0x23e1,
- 0x25b7, 0x25b7,
- 0x25c1, 0x25c1,
- 0x25f8, 0x25ff,
- 0x266f, 0x266f,
- 0x27c0, 0x27c4,
- 0x27c7, 0x27e5,
- 0x27f0, 0x27ff,
- 0x2900, 0x2982,
- 0x2999, 0x29d7,
- 0x29dc, 0x29fb,
- 0x29fe, 0x2aff,
- 0x2b30, 0x2b44,
- 0x2b47, 0x2b4c,
- 0xfb29, 0xfb29,
- 0xfe62, 0xfe62,
- 0xfe64, 0xfe66,
- 0xff0b, 0xff0b,
- 0xff1c, 0xff1e,
- 0xff5c, 0xff5c,
- 0xff5e, 0xff5e,
- 0xffe2, 0xffe2,
- 0xffe9, 0xffec,
- 0x1d6c1, 0x1d6c1,
- 0x1d6db, 0x1d6db,
- 0x1d6fb, 0x1d6fb,
- 0x1d715, 0x1d715,
- 0x1d735, 0x1d735,
- 0x1d74f, 0x1d74f,
- 0x1d76f, 0x1d76f,
- 0x1d789, 0x1d789,
- 0x1d7a9, 0x1d7a9,
- 0x1d7c3, 0x1d7c3,
- 0x1eef0, 0x1eef1
-};
-UCP_FN(Sm)
-
-static const unichar ucp_So_def[] = {
- 0xa6, 0xa6,
- 0xa9, 0xa9,
- 0xae, 0xae,
- 0xb0, 0xb0,
- 0x482, 0x482,
- 0x58d, 0x58e,
- 0x60e, 0x60f,
- 0x6de, 0x6de,
- 0x6e9, 0x6e9,
- 0x6fd, 0x6fe,
- 0x7f6, 0x7f6,
- 0x9fa, 0x9fa,
- 0xb70, 0xb70,
- 0xbf3, 0xbf8,
- 0xbfa, 0xbfa,
- 0xc7f, 0xc7f,
- 0xd79, 0xd79,
- 0xf01, 0xf03,
- 0xf13, 0xf13,
- 0xf15, 0xf17,
- 0xf1a, 0xf1f,
- 0xf34, 0xf34,
- 0xf36, 0xf36,
- 0xf38, 0xf38,
- 0xfbe, 0xfc5,
- 0xfc7, 0xfcc,
- 0xfce, 0xfcf,
- 0xfd5, 0xfd8,
- 0x109e, 0x109f,
- 0x1390, 0x1399,
- 0x1940, 0x1940,
- 0x19de, 0x19ff,
- 0x1b61, 0x1b6a,
- 0x1b74, 0x1b7c,
- 0x2100, 0x2101,
- 0x2103, 0x2106,
- 0x2108, 0x2109,
- 0x2114, 0x2114,
- 0x2116, 0x2117,
- 0x211e, 0x2123,
- 0x2125, 0x2125,
- 0x2127, 0x2127,
- 0x2129, 0x2129,
- 0x212e, 0x212e,
- 0x213a, 0x213b,
- 0x214a, 0x214a,
- 0x214c, 0x214d,
- 0x214f, 0x214f,
- 0x2195, 0x2199,
- 0x219c, 0x219f,
- 0x21a1, 0x21a2,
- 0x21a4, 0x21a5,
- 0x21a7, 0x21ad,
- 0x21af, 0x21cd,
- 0x21d0, 0x21d1,
- 0x21d3, 0x21d3,
- 0x21d5, 0x21f3,
- 0x2300, 0x2307,
- 0x230c, 0x231f,
- 0x2322, 0x2328,
- 0x232b, 0x237b,
- 0x237d, 0x239a,
- 0x23b4, 0x23db,
- 0x23e2, 0x23fa,
- 0x2400, 0x2426,
- 0x2440, 0x244a,
- 0x249c, 0x24e9,
- 0x2500, 0x25b6,
- 0x25b8, 0x25c0,
- 0x25c2, 0x25f7,
- 0x2600, 0x266e,
- 0x2670, 0x2767,
- 0x2794, 0x27bf,
- 0x2800, 0x28ff,
- 0x2b00, 0x2b2f,
- 0x2b45, 0x2b46,
- 0x2b4d, 0x2b73,
- 0x2b76, 0x2b95,
- 0x2b98, 0x2bb9,
- 0x2bbd, 0x2bc8,
- 0x2bca, 0x2bd1,
- 0x2ce5, 0x2cea,
- 0x2e80, 0x2e99,
- 0x2e9b, 0x2ef3,
- 0x2f00, 0x2fd5,
- 0x2ff0, 0x2ffb,
- 0x3004, 0x3004,
- 0x3012, 0x3013,
- 0x3020, 0x3020,
- 0x3036, 0x3037,
- 0x303e, 0x303f,
- 0x3190, 0x3191,
- 0x3196, 0x319f,
- 0x31c0, 0x31e3,
- 0x3200, 0x321e,
- 0x322a, 0x3247,
- 0x3250, 0x3250,
- 0x3260, 0x327f,
- 0x328a, 0x32b0,
- 0x32c0, 0x32fe,
- 0x3300, 0x33ff,
- 0x4dc0, 0x4dff,
- 0xa490, 0xa4c6,
- 0xa828, 0xa82b,
- 0xa836, 0xa837,
- 0xa839, 0xa839,
- 0xaa77, 0xaa79,
- 0xfdfd, 0xfdfd,
- 0xffe4, 0xffe4,
- 0xffe8, 0xffe8,
- 0xffed, 0xffee,
- 0xfffc, 0xfffd,
- 0x10137, 0x1013f,
- 0x10179, 0x10189,
- 0x1018c, 0x1018c,
- 0x10190, 0x1019b,
- 0x101a0, 0x101a0,
- 0x101d0, 0x101fc,
- 0x10877, 0x10878,
- 0x10ac8, 0x10ac8,
- 0x16b3c, 0x16b3f,
- 0x16b45, 0x16b45,
- 0x1bc9c, 0x1bc9c,
- 0x1d000, 0x1d0f5,
- 0x1d100, 0x1d126,
- 0x1d129, 0x1d164,
- 0x1d16a, 0x1d16c,
- 0x1d183, 0x1d184,
- 0x1d18c, 0x1d1a9,
- 0x1d1ae, 0x1d1dd,
- 0x1d200, 0x1d241,
- 0x1d245, 0x1d245,
- 0x1d300, 0x1d356,
- 0x1f000, 0x1f02b,
- 0x1f030, 0x1f093,
- 0x1f0a0, 0x1f0ae,
- 0x1f0b1, 0x1f0bf,
- 0x1f0c1, 0x1f0cf,
- 0x1f0d1, 0x1f0f5,
- 0x1f110, 0x1f12e,
- 0x1f130, 0x1f16b,
- 0x1f170, 0x1f19a,
- 0x1f1e6, 0x1f202,
- 0x1f210, 0x1f23a,
- 0x1f240, 0x1f248,
- 0x1f250, 0x1f251,
- 0x1f300, 0x1f32c,
- 0x1f330, 0x1f37d,
- 0x1f380, 0x1f3ce,
- 0x1f3d4, 0x1f3f7,
- 0x1f400, 0x1f4fe,
- 0x1f500, 0x1f54a,
- 0x1f550, 0x1f579,
- 0x1f57b, 0x1f5a3,
- 0x1f5a5, 0x1f642,
- 0x1f645, 0x1f6cf,
- 0x1f6e0, 0x1f6ec,
- 0x1f6f0, 0x1f6f3,
- 0x1f700, 0x1f773,
- 0x1f780, 0x1f7d4,
- 0x1f800, 0x1f80b,
- 0x1f810, 0x1f847,
- 0x1f850, 0x1f859,
- 0x1f860, 0x1f887,
- 0x1f890, 0x1f8ad
-};
-UCP_FN(So)
-
-static const unichar ucp_Xan_def[] = {
- 0x30, 0x39,
- 0x41, 0x5a,
- 0x61, 0x7a,
- 0xaa, 0xaa,
- 0xb2, 0xb3,
- 0xb5, 0xb5,
- 0xb9, 0xba,
- 0xbc, 0xbe,
- 0xc0, 0xd6,
- 0xd8, 0xf6,
- 0xf8, 0x2c1,
- 0x2c6, 0x2d1,
- 0x2e0, 0x2e4,
- 0x2ec, 0x2ec,
- 0x2ee, 0x2ee,
- 0x370, 0x374,
- 0x376, 0x377,
- 0x37a, 0x37d,
- 0x37f, 0x37f,
- 0x386, 0x386,
- 0x388, 0x38a,
- 0x38c, 0x38c,
- 0x38e, 0x3a1,
- 0x3a3, 0x3f5,
- 0x3f7, 0x481,
- 0x48a, 0x52f,
- 0x531, 0x556,
- 0x559, 0x559,
- 0x561, 0x587,
- 0x5d0, 0x5ea,
- 0x5f0, 0x5f2,
- 0x620, 0x64a,
- 0x660, 0x669,
- 0x66e, 0x66f,
- 0x671, 0x6d3,
- 0x6d5, 0x6d5,
- 0x6e5, 0x6e6,
- 0x6ee, 0x6fc,
- 0x6ff, 0x6ff,
- 0x710, 0x710,
- 0x712, 0x72f,
- 0x74d, 0x7a5,
- 0x7b1, 0x7b1,
- 0x7c0, 0x7ea,
- 0x7f4, 0x7f5,
- 0x7fa, 0x7fa,
- 0x800, 0x815,
- 0x81a, 0x81a,
- 0x824, 0x824,
- 0x828, 0x828,
- 0x840, 0x858,
- 0x8a0, 0x8b2,
- 0x904, 0x939,
- 0x93d, 0x93d,
- 0x950, 0x950,
- 0x958, 0x961,
- 0x966, 0x96f,
- 0x971, 0x980,
- 0x985, 0x98c,
- 0x98f, 0x990,
- 0x993, 0x9a8,
- 0x9aa, 0x9b0,
- 0x9b2, 0x9b2,
- 0x9b6, 0x9b9,
- 0x9bd, 0x9bd,
- 0x9ce, 0x9ce,
- 0x9dc, 0x9dd,
- 0x9df, 0x9e1,
- 0x9e6, 0x9f1,
- 0x9f4, 0x9f9,
- 0xa05, 0xa0a,
- 0xa0f, 0xa10,
- 0xa13, 0xa28,
- 0xa2a, 0xa30,
- 0xa32, 0xa33,
- 0xa35, 0xa36,
- 0xa38, 0xa39,
- 0xa59, 0xa5c,
- 0xa5e, 0xa5e,
- 0xa66, 0xa6f,
- 0xa72, 0xa74,
- 0xa85, 0xa8d,
- 0xa8f, 0xa91,
- 0xa93, 0xaa8,
- 0xaaa, 0xab0,
- 0xab2, 0xab3,
- 0xab5, 0xab9,
- 0xabd, 0xabd,
- 0xad0, 0xad0,
- 0xae0, 0xae1,
- 0xae6, 0xaef,
- 0xb05, 0xb0c,
- 0xb0f, 0xb10,
- 0xb13, 0xb28,
- 0xb2a, 0xb30,
- 0xb32, 0xb33,
- 0xb35, 0xb39,
- 0xb3d, 0xb3d,
- 0xb5c, 0xb5d,
- 0xb5f, 0xb61,
- 0xb66, 0xb6f,
- 0xb71, 0xb77,
- 0xb83, 0xb83,
- 0xb85, 0xb8a,
- 0xb8e, 0xb90,
- 0xb92, 0xb95,
- 0xb99, 0xb9a,
- 0xb9c, 0xb9c,
- 0xb9e, 0xb9f,
- 0xba3, 0xba4,
- 0xba8, 0xbaa,
- 0xbae, 0xbb9,
- 0xbd0, 0xbd0,
- 0xbe6, 0xbf2,
- 0xc05, 0xc0c,
- 0xc0e, 0xc10,
- 0xc12, 0xc28,
- 0xc2a, 0xc39,
- 0xc3d, 0xc3d,
- 0xc58, 0xc59,
- 0xc60, 0xc61,
- 0xc66, 0xc6f,
- 0xc78, 0xc7e,
- 0xc85, 0xc8c,
- 0xc8e, 0xc90,
- 0xc92, 0xca8,
- 0xcaa, 0xcb3,
- 0xcb5, 0xcb9,
- 0xcbd, 0xcbd,
- 0xcde, 0xcde,
- 0xce0, 0xce1,
- 0xce6, 0xcef,
- 0xcf1, 0xcf2,
- 0xd05, 0xd0c,
- 0xd0e, 0xd10,
- 0xd12, 0xd3a,
- 0xd3d, 0xd3d,
- 0xd4e, 0xd4e,
- 0xd60, 0xd61,
- 0xd66, 0xd75,
- 0xd7a, 0xd7f,
- 0xd85, 0xd96,
- 0xd9a, 0xdb1,
- 0xdb3, 0xdbb,
- 0xdbd, 0xdbd,
- 0xdc0, 0xdc6,
- 0xde6, 0xdef,
- 0xe01, 0xe30,
- 0xe32, 0xe33,
- 0xe40, 0xe46,
- 0xe50, 0xe59,
- 0xe81, 0xe82,
- 0xe84, 0xe84,
- 0xe87, 0xe88,
- 0xe8a, 0xe8a,
- 0xe8d, 0xe8d,
- 0xe94, 0xe97,
- 0xe99, 0xe9f,
- 0xea1, 0xea3,
- 0xea5, 0xea5,
- 0xea7, 0xea7,
- 0xeaa, 0xeab,
- 0xead, 0xeb0,
- 0xeb2, 0xeb3,
- 0xebd, 0xebd,
- 0xec0, 0xec4,
- 0xec6, 0xec6,
- 0xed0, 0xed9,
- 0xedc, 0xedf,
- 0xf00, 0xf00,
- 0xf20, 0xf33,
- 0xf40, 0xf47,
- 0xf49, 0xf6c,
- 0xf88, 0xf8c,
- 0x1000, 0x102a,
- 0x103f, 0x1049,
- 0x1050, 0x1055,
- 0x105a, 0x105d,
- 0x1061, 0x1061,
- 0x1065, 0x1066,
- 0x106e, 0x1070,
- 0x1075, 0x1081,
- 0x108e, 0x108e,
- 0x1090, 0x1099,
- 0x10a0, 0x10c5,
- 0x10c7, 0x10c7,
- 0x10cd, 0x10cd,
- 0x10d0, 0x10fa,
- 0x10fc, 0x1248,
- 0x124a, 0x124d,
- 0x1250, 0x1256,
- 0x1258, 0x1258,
- 0x125a, 0x125d,
- 0x1260, 0x1288,
- 0x128a, 0x128d,
- 0x1290, 0x12b0,
- 0x12b2, 0x12b5,
- 0x12b8, 0x12be,
- 0x12c0, 0x12c0,
- 0x12c2, 0x12c5,
- 0x12c8, 0x12d6,
- 0x12d8, 0x1310,
- 0x1312, 0x1315,
- 0x1318, 0x135a,
- 0x1369, 0x137c,
- 0x1380, 0x138f,
- 0x13a0, 0x13f4,
- 0x1401, 0x166c,
- 0x166f, 0x167f,
- 0x1681, 0x169a,
- 0x16a0, 0x16ea,
- 0x16ee, 0x16f8,
- 0x1700, 0x170c,
- 0x170e, 0x1711,
- 0x1720, 0x1731,
- 0x1740, 0x1751,
- 0x1760, 0x176c,
- 0x176e, 0x1770,
- 0x1780, 0x17b3,
- 0x17d7, 0x17d7,
- 0x17dc, 0x17dc,
- 0x17e0, 0x17e9,
- 0x17f0, 0x17f9,
- 0x1810, 0x1819,
- 0x1820, 0x1877,
- 0x1880, 0x18a8,
- 0x18aa, 0x18aa,
- 0x18b0, 0x18f5,
- 0x1900, 0x191e,
- 0x1946, 0x196d,
- 0x1970, 0x1974,
- 0x1980, 0x19ab,
- 0x19c1, 0x19c7,
- 0x19d0, 0x19da,
- 0x1a00, 0x1a16,
- 0x1a20, 0x1a54,
- 0x1a80, 0x1a89,
- 0x1a90, 0x1a99,
- 0x1aa7, 0x1aa7,
- 0x1b05, 0x1b33,
- 0x1b45, 0x1b4b,
- 0x1b50, 0x1b59,
- 0x1b83, 0x1ba0,
- 0x1bae, 0x1be5,
- 0x1c00, 0x1c23,
- 0x1c40, 0x1c49,
- 0x1c4d, 0x1c7d,
- 0x1ce9, 0x1cec,
- 0x1cee, 0x1cf1,
- 0x1cf5, 0x1cf6,
- 0x1d00, 0x1dbf,
- 0x1e00, 0x1f15,
- 0x1f18, 0x1f1d,
- 0x1f20, 0x1f45,
- 0x1f48, 0x1f4d,
- 0x1f50, 0x1f57,
- 0x1f59, 0x1f59,
- 0x1f5b, 0x1f5b,
- 0x1f5d, 0x1f5d,
- 0x1f5f, 0x1f7d,
- 0x1f80, 0x1fb4,
- 0x1fb6, 0x1fbc,
- 0x1fbe, 0x1fbe,
- 0x1fc2, 0x1fc4,
- 0x1fc6, 0x1fcc,
- 0x1fd0, 0x1fd3,
- 0x1fd6, 0x1fdb,
- 0x1fe0, 0x1fec,
- 0x1ff2, 0x1ff4,
- 0x1ff6, 0x1ffc,
- 0x2070, 0x2071,
- 0x2074, 0x2079,
- 0x207f, 0x2089,
- 0x2090, 0x209c,
- 0x2102, 0x2102,
- 0x2107, 0x2107,
- 0x210a, 0x2113,
- 0x2115, 0x2115,
- 0x2119, 0x211d,
- 0x2124, 0x2124,
- 0x2126, 0x2126,
- 0x2128, 0x2128,
- 0x212a, 0x212d,
- 0x212f, 0x2139,
- 0x213c, 0x213f,
- 0x2145, 0x2149,
- 0x214e, 0x214e,
- 0x2150, 0x2189,
- 0x2460, 0x249b,
- 0x24ea, 0x24ff,
- 0x2776, 0x2793,
- 0x2c00, 0x2c2e,
- 0x2c30, 0x2c5e,
- 0x2c60, 0x2ce4,
- 0x2ceb, 0x2cee,
- 0x2cf2, 0x2cf3,
- 0x2cfd, 0x2cfd,
- 0x2d00, 0x2d25,
- 0x2d27, 0x2d27,
- 0x2d2d, 0x2d2d,
- 0x2d30, 0x2d67,
- 0x2d6f, 0x2d6f,
- 0x2d80, 0x2d96,
- 0x2da0, 0x2da6,
- 0x2da8, 0x2dae,
- 0x2db0, 0x2db6,
- 0x2db8, 0x2dbe,
- 0x2dc0, 0x2dc6,
- 0x2dc8, 0x2dce,
- 0x2dd0, 0x2dd6,
- 0x2dd8, 0x2dde,
- 0x2e2f, 0x2e2f,
- 0x3005, 0x3007,
- 0x3021, 0x3029,
- 0x3031, 0x3035,
- 0x3038, 0x303c,
- 0x3041, 0x3096,
- 0x309d, 0x309f,
- 0x30a1, 0x30fa,
- 0x30fc, 0x30ff,
- 0x3105, 0x312d,
- 0x3131, 0x318e,
- 0x3192, 0x3195,
- 0x31a0, 0x31ba,
- 0x31f0, 0x31ff,
- 0x3220, 0x3229,
- 0x3248, 0x324f,
- 0x3251, 0x325f,
- 0x3280, 0x3289,
- 0x32b1, 0x32bf,
- 0x3400, 0x4db5,
- 0x4e00, 0x9fcc,
- 0xa000, 0xa48c,
- 0xa4d0, 0xa4fd,
- 0xa500, 0xa60c,
- 0xa610, 0xa62b,
- 0xa640, 0xa66e,
- 0xa67f, 0xa69d,
- 0xa6a0, 0xa6ef,
- 0xa717, 0xa71f,
- 0xa722, 0xa788,
- 0xa78b, 0xa78e,
- 0xa790, 0xa7ad,
- 0xa7b0, 0xa7b1,
- 0xa7f7, 0xa801,
- 0xa803, 0xa805,
- 0xa807, 0xa80a,
- 0xa80c, 0xa822,
- 0xa830, 0xa835,
- 0xa840, 0xa873,
- 0xa882, 0xa8b3,
- 0xa8d0, 0xa8d9,
- 0xa8f2, 0xa8f7,
- 0xa8fb, 0xa8fb,
- 0xa900, 0xa925,
- 0xa930, 0xa946,
- 0xa960, 0xa97c,
- 0xa984, 0xa9b2,
- 0xa9cf, 0xa9d9,
- 0xa9e0, 0xa9e4,
- 0xa9e6, 0xa9fe,
- 0xaa00, 0xaa28,
- 0xaa40, 0xaa42,
- 0xaa44, 0xaa4b,
- 0xaa50, 0xaa59,
- 0xaa60, 0xaa76,
- 0xaa7a, 0xaa7a,
- 0xaa7e, 0xaaaf,
- 0xaab1, 0xaab1,
- 0xaab5, 0xaab6,
- 0xaab9, 0xaabd,
- 0xaac0, 0xaac0,
- 0xaac2, 0xaac2,
- 0xaadb, 0xaadd,
- 0xaae0, 0xaaea,
- 0xaaf2, 0xaaf4,
- 0xab01, 0xab06,
- 0xab09, 0xab0e,
- 0xab11, 0xab16,
- 0xab20, 0xab26,
- 0xab28, 0xab2e,
- 0xab30, 0xab5a,
- 0xab5c, 0xab5f,
- 0xab64, 0xab65,
- 0xabc0, 0xabe2,
- 0xabf0, 0xabf9,
- 0xac00, 0xd7a3,
- 0xd7b0, 0xd7c6,
- 0xd7cb, 0xd7fb,
- 0xf900, 0xfa6d,
- 0xfa70, 0xfad9,
- 0xfb00, 0xfb06,
- 0xfb13, 0xfb17,
- 0xfb1d, 0xfb1d,
- 0xfb1f, 0xfb28,
- 0xfb2a, 0xfb36,
- 0xfb38, 0xfb3c,
- 0xfb3e, 0xfb3e,
- 0xfb40, 0xfb41,
- 0xfb43, 0xfb44,
- 0xfb46, 0xfbb1,
- 0xfbd3, 0xfd3d,
- 0xfd50, 0xfd8f,
- 0xfd92, 0xfdc7,
- 0xfdf0, 0xfdfb,
- 0xfe70, 0xfe74,
- 0xfe76, 0xfefc,
- 0xff10, 0xff19,
- 0xff21, 0xff3a,
- 0xff41, 0xff5a,
- 0xff66, 0xffbe,
- 0xffc2, 0xffc7,
- 0xffca, 0xffcf,
- 0xffd2, 0xffd7,
- 0xffda, 0xffdc,
- 0x10000, 0x1000b,
- 0x1000d, 0x10026,
- 0x10028, 0x1003a,
- 0x1003c, 0x1003d,
- 0x1003f, 0x1004d,
- 0x10050, 0x1005d,
- 0x10080, 0x100fa,
- 0x10107, 0x10133,
- 0x10140, 0x10178,
- 0x1018a, 0x1018b,
- 0x10280, 0x1029c,
- 0x102a0, 0x102d0,
- 0x102e1, 0x102fb,
- 0x10300, 0x10323,
- 0x10330, 0x1034a,
- 0x10350, 0x10375,
- 0x10380, 0x1039d,
- 0x103a0, 0x103c3,
- 0x103c8, 0x103cf,
- 0x103d1, 0x103d5,
- 0x10400, 0x1049d,
- 0x104a0, 0x104a9,
- 0x10500, 0x10527,
- 0x10530, 0x10563,
- 0x10600, 0x10736,
- 0x10740, 0x10755,
- 0x10760, 0x10767,
- 0x10800, 0x10805,
- 0x10808, 0x10808,
- 0x1080a, 0x10835,
- 0x10837, 0x10838,
- 0x1083c, 0x1083c,
- 0x1083f, 0x10855,
- 0x10858, 0x10876,
- 0x10879, 0x1089e,
- 0x108a7, 0x108af,
- 0x10900, 0x1091b,
- 0x10920, 0x10939,
- 0x10980, 0x109b7,
- 0x109be, 0x109bf,
- 0x10a00, 0x10a00,
- 0x10a10, 0x10a13,
- 0x10a15, 0x10a17,
- 0x10a19, 0x10a33,
- 0x10a40, 0x10a47,
- 0x10a60, 0x10a7e,
- 0x10a80, 0x10a9f,
- 0x10ac0, 0x10ac7,
- 0x10ac9, 0x10ae4,
- 0x10aeb, 0x10aef,
- 0x10b00, 0x10b35,
- 0x10b40, 0x10b55,
- 0x10b58, 0x10b72,
- 0x10b78, 0x10b91,
- 0x10ba9, 0x10baf,
- 0x10c00, 0x10c48,
- 0x10e60, 0x10e7e,
- 0x11003, 0x11037,
- 0x11052, 0x1106f,
- 0x11083, 0x110af,
- 0x110d0, 0x110e8,
- 0x110f0, 0x110f9,
- 0x11103, 0x11126,
- 0x11136, 0x1113f,
- 0x11150, 0x11172,
- 0x11176, 0x11176,
- 0x11183, 0x111b2,
- 0x111c1, 0x111c4,
- 0x111d0, 0x111da,
- 0x111e1, 0x111f4,
- 0x11200, 0x11211,
- 0x11213, 0x1122b,
- 0x112b0, 0x112de,
- 0x112f0, 0x112f9,
- 0x11305, 0x1130c,
- 0x1130f, 0x11310,
- 0x11313, 0x11328,
- 0x1132a, 0x11330,
- 0x11332, 0x11333,
- 0x11335, 0x11339,
- 0x1133d, 0x1133d,
- 0x1135d, 0x11361,
- 0x11480, 0x114af,
- 0x114c4, 0x114c5,
- 0x114c7, 0x114c7,
- 0x114d0, 0x114d9,
- 0x11580, 0x115ae,
- 0x11600, 0x1162f,
- 0x11644, 0x11644,
- 0x11650, 0x11659,
- 0x11680, 0x116aa,
- 0x116c0, 0x116c9,
- 0x118a0, 0x118f2,
- 0x118ff, 0x118ff,
- 0x11ac0, 0x11af8,
- 0x12000, 0x12398,
- 0x12400, 0x1246e,
- 0x13000, 0x1342e,
- 0x16800, 0x16a38,
- 0x16a40, 0x16a5e,
- 0x16a60, 0x16a69,
- 0x16ad0, 0x16aed,
- 0x16b00, 0x16b2f,
- 0x16b40, 0x16b43,
- 0x16b50, 0x16b59,
- 0x16b5b, 0x16b61,
- 0x16b63, 0x16b77,
- 0x16b7d, 0x16b8f,
- 0x16f00, 0x16f44,
- 0x16f50, 0x16f50,
- 0x16f93, 0x16f9f,
- 0x1b000, 0x1b001,
- 0x1bc00, 0x1bc6a,
- 0x1bc70, 0x1bc7c,
- 0x1bc80, 0x1bc88,
- 0x1bc90, 0x1bc99,
- 0x1d360, 0x1d371,
- 0x1d400, 0x1d454,
- 0x1d456, 0x1d49c,
- 0x1d49e, 0x1d49f,
- 0x1d4a2, 0x1d4a2,
- 0x1d4a5, 0x1d4a6,
- 0x1d4a9, 0x1d4ac,
- 0x1d4ae, 0x1d4b9,
- 0x1d4bb, 0x1d4bb,
- 0x1d4bd, 0x1d4c3,
- 0x1d4c5, 0x1d505,
- 0x1d507, 0x1d50a,
- 0x1d50d, 0x1d514,
- 0x1d516, 0x1d51c,
- 0x1d51e, 0x1d539,
- 0x1d53b, 0x1d53e,
- 0x1d540, 0x1d544,
- 0x1d546, 0x1d546,
- 0x1d54a, 0x1d550,
- 0x1d552, 0x1d6a5,
- 0x1d6a8, 0x1d6c0,
- 0x1d6c2, 0x1d6da,
- 0x1d6dc, 0x1d6fa,
- 0x1d6fc, 0x1d714,
- 0x1d716, 0x1d734,
- 0x1d736, 0x1d74e,
- 0x1d750, 0x1d76e,
- 0x1d770, 0x1d788,
- 0x1d78a, 0x1d7a8,
- 0x1d7aa, 0x1d7c2,
- 0x1d7c4, 0x1d7cb,
- 0x1d7ce, 0x1d7ff,
- 0x1e800, 0x1e8c4,
- 0x1e8c7, 0x1e8cf,
- 0x1ee00, 0x1ee03,
- 0x1ee05, 0x1ee1f,
- 0x1ee21, 0x1ee22,
- 0x1ee24, 0x1ee24,
- 0x1ee27, 0x1ee27,
- 0x1ee29, 0x1ee32,
- 0x1ee34, 0x1ee37,
- 0x1ee39, 0x1ee39,
- 0x1ee3b, 0x1ee3b,
- 0x1ee42, 0x1ee42,
- 0x1ee47, 0x1ee47,
- 0x1ee49, 0x1ee49,
- 0x1ee4b, 0x1ee4b,
- 0x1ee4d, 0x1ee4f,
- 0x1ee51, 0x1ee52,
- 0x1ee54, 0x1ee54,
- 0x1ee57, 0x1ee57,
- 0x1ee59, 0x1ee59,
- 0x1ee5b, 0x1ee5b,
- 0x1ee5d, 0x1ee5d,
- 0x1ee5f, 0x1ee5f,
- 0x1ee61, 0x1ee62,
- 0x1ee64, 0x1ee64,
- 0x1ee67, 0x1ee6a,
- 0x1ee6c, 0x1ee72,
- 0x1ee74, 0x1ee77,
- 0x1ee79, 0x1ee7c,
- 0x1ee7e, 0x1ee7e,
- 0x1ee80, 0x1ee89,
- 0x1ee8b, 0x1ee9b,
- 0x1eea1, 0x1eea3,
- 0x1eea5, 0x1eea9,
- 0x1eeab, 0x1eebb,
- 0x1f100, 0x1f10c,
- 0x20000, 0x2a6d6,
- 0x2a700, 0x2b734,
- 0x2b740, 0x2b81d,
- 0x2f800, 0x2fa1d
-};
-UCP_FN(Xan)
-
-static const unichar ucp_Xps_def[] = {
- 0x9, 0xd,
- 0x20, 0x20,
- 0xa0, 0xa0,
- 0x1680, 0x1680,
- 0x2000, 0x200a,
- 0x2028, 0x2029,
- 0x202f, 0x202f,
- 0x205f, 0x205f,
- 0x3000, 0x3000
-};
-UCP_FN(Xps)
-
-static const unichar ucp_Xsp_def[] = {
- 0x9, 0xa,
- 0xc, 0xd,
- 0x20, 0x20,
- 0xa0, 0xa0,
- 0x1680, 0x1680,
- 0x2000, 0x200a,
- 0x2028, 0x2029,
- 0x202f, 0x202f,
- 0x205f, 0x205f,
- 0x3000, 0x3000
-};
-UCP_FN(Xsp)
-
-static const unichar ucp_Xwd_def[] = {
- 0x30, 0x39,
- 0x41, 0x5a,
- 0x5f, 0x5f,
- 0x61, 0x7a,
- 0xaa, 0xaa,
- 0xb2, 0xb3,
- 0xb5, 0xb5,
- 0xb9, 0xba,
- 0xbc, 0xbe,
- 0xc0, 0xd6,
- 0xd8, 0xf6,
- 0xf8, 0x2c1,
- 0x2c6, 0x2d1,
- 0x2e0, 0x2e4,
- 0x2ec, 0x2ec,
- 0x2ee, 0x2ee,
- 0x370, 0x374,
- 0x376, 0x377,
- 0x37a, 0x37d,
- 0x37f, 0x37f,
- 0x386, 0x386,
- 0x388, 0x38a,
- 0x38c, 0x38c,
- 0x38e, 0x3a1,
- 0x3a3, 0x3f5,
- 0x3f7, 0x481,
- 0x48a, 0x52f,
- 0x531, 0x556,
- 0x559, 0x559,
- 0x561, 0x587,
- 0x5d0, 0x5ea,
- 0x5f0, 0x5f2,
- 0x620, 0x64a,
- 0x660, 0x669,
- 0x66e, 0x66f,
- 0x671, 0x6d3,
- 0x6d5, 0x6d5,
- 0x6e5, 0x6e6,
- 0x6ee, 0x6fc,
- 0x6ff, 0x6ff,
- 0x710, 0x710,
- 0x712, 0x72f,
- 0x74d, 0x7a5,
- 0x7b1, 0x7b1,
- 0x7c0, 0x7ea,
- 0x7f4, 0x7f5,
- 0x7fa, 0x7fa,
- 0x800, 0x815,
- 0x81a, 0x81a,
- 0x824, 0x824,
- 0x828, 0x828,
- 0x840, 0x858,
- 0x8a0, 0x8b2,
- 0x904, 0x939,
- 0x93d, 0x93d,
- 0x950, 0x950,
- 0x958, 0x961,
- 0x966, 0x96f,
- 0x971, 0x980,
- 0x985, 0x98c,
- 0x98f, 0x990,
- 0x993, 0x9a8,
- 0x9aa, 0x9b0,
- 0x9b2, 0x9b2,
- 0x9b6, 0x9b9,
- 0x9bd, 0x9bd,
- 0x9ce, 0x9ce,
- 0x9dc, 0x9dd,
- 0x9df, 0x9e1,
- 0x9e6, 0x9f1,
- 0x9f4, 0x9f9,
- 0xa05, 0xa0a,
- 0xa0f, 0xa10,
- 0xa13, 0xa28,
- 0xa2a, 0xa30,
- 0xa32, 0xa33,
- 0xa35, 0xa36,
- 0xa38, 0xa39,
- 0xa59, 0xa5c,
- 0xa5e, 0xa5e,
- 0xa66, 0xa6f,
- 0xa72, 0xa74,
- 0xa85, 0xa8d,
- 0xa8f, 0xa91,
- 0xa93, 0xaa8,
- 0xaaa, 0xab0,
- 0xab2, 0xab3,
- 0xab5, 0xab9,
- 0xabd, 0xabd,
- 0xad0, 0xad0,
- 0xae0, 0xae1,
- 0xae6, 0xaef,
- 0xb05, 0xb0c,
- 0xb0f, 0xb10,
- 0xb13, 0xb28,
- 0xb2a, 0xb30,
- 0xb32, 0xb33,
- 0xb35, 0xb39,
- 0xb3d, 0xb3d,
- 0xb5c, 0xb5d,
- 0xb5f, 0xb61,
- 0xb66, 0xb6f,
- 0xb71, 0xb77,
- 0xb83, 0xb83,
- 0xb85, 0xb8a,
- 0xb8e, 0xb90,
- 0xb92, 0xb95,
- 0xb99, 0xb9a,
- 0xb9c, 0xb9c,
- 0xb9e, 0xb9f,
- 0xba3, 0xba4,
- 0xba8, 0xbaa,
- 0xbae, 0xbb9,
- 0xbd0, 0xbd0,
- 0xbe6, 0xbf2,
- 0xc05, 0xc0c,
- 0xc0e, 0xc10,
- 0xc12, 0xc28,
- 0xc2a, 0xc39,
- 0xc3d, 0xc3d,
- 0xc58, 0xc59,
- 0xc60, 0xc61,
- 0xc66, 0xc6f,
- 0xc78, 0xc7e,
- 0xc85, 0xc8c,
- 0xc8e, 0xc90,
- 0xc92, 0xca8,
- 0xcaa, 0xcb3,
- 0xcb5, 0xcb9,
- 0xcbd, 0xcbd,
- 0xcde, 0xcde,
- 0xce0, 0xce1,
- 0xce6, 0xcef,
- 0xcf1, 0xcf2,
- 0xd05, 0xd0c,
- 0xd0e, 0xd10,
- 0xd12, 0xd3a,
- 0xd3d, 0xd3d,
- 0xd4e, 0xd4e,
- 0xd60, 0xd61,
- 0xd66, 0xd75,
- 0xd7a, 0xd7f,
- 0xd85, 0xd96,
- 0xd9a, 0xdb1,
- 0xdb3, 0xdbb,
- 0xdbd, 0xdbd,
- 0xdc0, 0xdc6,
- 0xde6, 0xdef,
- 0xe01, 0xe30,
- 0xe32, 0xe33,
- 0xe40, 0xe46,
- 0xe50, 0xe59,
- 0xe81, 0xe82,
- 0xe84, 0xe84,
- 0xe87, 0xe88,
- 0xe8a, 0xe8a,
- 0xe8d, 0xe8d,
- 0xe94, 0xe97,
- 0xe99, 0xe9f,
- 0xea1, 0xea3,
- 0xea5, 0xea5,
- 0xea7, 0xea7,
- 0xeaa, 0xeab,
- 0xead, 0xeb0,
- 0xeb2, 0xeb3,
- 0xebd, 0xebd,
- 0xec0, 0xec4,
- 0xec6, 0xec6,
- 0xed0, 0xed9,
- 0xedc, 0xedf,
- 0xf00, 0xf00,
- 0xf20, 0xf33,
- 0xf40, 0xf47,
- 0xf49, 0xf6c,
- 0xf88, 0xf8c,
- 0x1000, 0x102a,
- 0x103f, 0x1049,
- 0x1050, 0x1055,
- 0x105a, 0x105d,
- 0x1061, 0x1061,
- 0x1065, 0x1066,
- 0x106e, 0x1070,
- 0x1075, 0x1081,
- 0x108e, 0x108e,
- 0x1090, 0x1099,
- 0x10a0, 0x10c5,
- 0x10c7, 0x10c7,
- 0x10cd, 0x10cd,
- 0x10d0, 0x10fa,
- 0x10fc, 0x1248,
- 0x124a, 0x124d,
- 0x1250, 0x1256,
- 0x1258, 0x1258,
- 0x125a, 0x125d,
- 0x1260, 0x1288,
- 0x128a, 0x128d,
- 0x1290, 0x12b0,
- 0x12b2, 0x12b5,
- 0x12b8, 0x12be,
- 0x12c0, 0x12c0,
- 0x12c2, 0x12c5,
- 0x12c8, 0x12d6,
- 0x12d8, 0x1310,
- 0x1312, 0x1315,
- 0x1318, 0x135a,
- 0x1369, 0x137c,
- 0x1380, 0x138f,
- 0x13a0, 0x13f4,
- 0x1401, 0x166c,
- 0x166f, 0x167f,
- 0x1681, 0x169a,
- 0x16a0, 0x16ea,
- 0x16ee, 0x16f8,
- 0x1700, 0x170c,
- 0x170e, 0x1711,
- 0x1720, 0x1731,
- 0x1740, 0x1751,
- 0x1760, 0x176c,
- 0x176e, 0x1770,
- 0x1780, 0x17b3,
- 0x17d7, 0x17d7,
- 0x17dc, 0x17dc,
- 0x17e0, 0x17e9,
- 0x17f0, 0x17f9,
- 0x1810, 0x1819,
- 0x1820, 0x1877,
- 0x1880, 0x18a8,
- 0x18aa, 0x18aa,
- 0x18b0, 0x18f5,
- 0x1900, 0x191e,
- 0x1946, 0x196d,
- 0x1970, 0x1974,
- 0x1980, 0x19ab,
- 0x19c1, 0x19c7,
- 0x19d0, 0x19da,
- 0x1a00, 0x1a16,
- 0x1a20, 0x1a54,
- 0x1a80, 0x1a89,
- 0x1a90, 0x1a99,
- 0x1aa7, 0x1aa7,
- 0x1b05, 0x1b33,
- 0x1b45, 0x1b4b,
- 0x1b50, 0x1b59,
- 0x1b83, 0x1ba0,
- 0x1bae, 0x1be5,
- 0x1c00, 0x1c23,
- 0x1c40, 0x1c49,
- 0x1c4d, 0x1c7d,
- 0x1ce9, 0x1cec,
- 0x1cee, 0x1cf1,
- 0x1cf5, 0x1cf6,
- 0x1d00, 0x1dbf,
- 0x1e00, 0x1f15,
- 0x1f18, 0x1f1d,
- 0x1f20, 0x1f45,
- 0x1f48, 0x1f4d,
- 0x1f50, 0x1f57,
- 0x1f59, 0x1f59,
- 0x1f5b, 0x1f5b,
- 0x1f5d, 0x1f5d,
- 0x1f5f, 0x1f7d,
- 0x1f80, 0x1fb4,
- 0x1fb6, 0x1fbc,
- 0x1fbe, 0x1fbe,
- 0x1fc2, 0x1fc4,
- 0x1fc6, 0x1fcc,
- 0x1fd0, 0x1fd3,
- 0x1fd6, 0x1fdb,
- 0x1fe0, 0x1fec,
- 0x1ff2, 0x1ff4,
- 0x1ff6, 0x1ffc,
- 0x2070, 0x2071,
- 0x2074, 0x2079,
- 0x207f, 0x2089,
- 0x2090, 0x209c,
- 0x2102, 0x2102,
- 0x2107, 0x2107,
- 0x210a, 0x2113,
- 0x2115, 0x2115,
- 0x2119, 0x211d,
- 0x2124, 0x2124,
- 0x2126, 0x2126,
- 0x2128, 0x2128,
- 0x212a, 0x212d,
- 0x212f, 0x2139,
- 0x213c, 0x213f,
- 0x2145, 0x2149,
- 0x214e, 0x214e,
- 0x2150, 0x2189,
- 0x2460, 0x249b,
- 0x24ea, 0x24ff,
- 0x2776, 0x2793,
- 0x2c00, 0x2c2e,
- 0x2c30, 0x2c5e,
- 0x2c60, 0x2ce4,
- 0x2ceb, 0x2cee,
- 0x2cf2, 0x2cf3,
- 0x2cfd, 0x2cfd,
- 0x2d00, 0x2d25,
- 0x2d27, 0x2d27,
- 0x2d2d, 0x2d2d,
- 0x2d30, 0x2d67,
- 0x2d6f, 0x2d6f,
- 0x2d80, 0x2d96,
- 0x2da0, 0x2da6,
- 0x2da8, 0x2dae,
- 0x2db0, 0x2db6,
- 0x2db8, 0x2dbe,
- 0x2dc0, 0x2dc6,
- 0x2dc8, 0x2dce,
- 0x2dd0, 0x2dd6,
- 0x2dd8, 0x2dde,
- 0x2e2f, 0x2e2f,
- 0x3005, 0x3007,
- 0x3021, 0x3029,
- 0x3031, 0x3035,
- 0x3038, 0x303c,
- 0x3041, 0x3096,
- 0x309d, 0x309f,
- 0x30a1, 0x30fa,
- 0x30fc, 0x30ff,
- 0x3105, 0x312d,
- 0x3131, 0x318e,
- 0x3192, 0x3195,
- 0x31a0, 0x31ba,
- 0x31f0, 0x31ff,
- 0x3220, 0x3229,
- 0x3248, 0x324f,
- 0x3251, 0x325f,
- 0x3280, 0x3289,
- 0x32b1, 0x32bf,
- 0x3400, 0x4db5,
- 0x4e00, 0x9fcc,
- 0xa000, 0xa48c,
- 0xa4d0, 0xa4fd,
- 0xa500, 0xa60c,
- 0xa610, 0xa62b,
- 0xa640, 0xa66e,
- 0xa67f, 0xa69d,
- 0xa6a0, 0xa6ef,
- 0xa717, 0xa71f,
- 0xa722, 0xa788,
- 0xa78b, 0xa78e,
- 0xa790, 0xa7ad,
- 0xa7b0, 0xa7b1,
- 0xa7f7, 0xa801,
- 0xa803, 0xa805,
- 0xa807, 0xa80a,
- 0xa80c, 0xa822,
- 0xa830, 0xa835,
- 0xa840, 0xa873,
- 0xa882, 0xa8b3,
- 0xa8d0, 0xa8d9,
- 0xa8f2, 0xa8f7,
- 0xa8fb, 0xa8fb,
- 0xa900, 0xa925,
- 0xa930, 0xa946,
- 0xa960, 0xa97c,
- 0xa984, 0xa9b2,
- 0xa9cf, 0xa9d9,
- 0xa9e0, 0xa9e4,
- 0xa9e6, 0xa9fe,
- 0xaa00, 0xaa28,
- 0xaa40, 0xaa42,
- 0xaa44, 0xaa4b,
- 0xaa50, 0xaa59,
- 0xaa60, 0xaa76,
- 0xaa7a, 0xaa7a,
- 0xaa7e, 0xaaaf,
- 0xaab1, 0xaab1,
- 0xaab5, 0xaab6,
- 0xaab9, 0xaabd,
- 0xaac0, 0xaac0,
- 0xaac2, 0xaac2,
- 0xaadb, 0xaadd,
- 0xaae0, 0xaaea,
- 0xaaf2, 0xaaf4,
- 0xab01, 0xab06,
- 0xab09, 0xab0e,
- 0xab11, 0xab16,
- 0xab20, 0xab26,
- 0xab28, 0xab2e,
- 0xab30, 0xab5a,
- 0xab5c, 0xab5f,
- 0xab64, 0xab65,
- 0xabc0, 0xabe2,
- 0xabf0, 0xabf9,
- 0xac00, 0xd7a3,
- 0xd7b0, 0xd7c6,
- 0xd7cb, 0xd7fb,
- 0xf900, 0xfa6d,
- 0xfa70, 0xfad9,
- 0xfb00, 0xfb06,
- 0xfb13, 0xfb17,
- 0xfb1d, 0xfb1d,
- 0xfb1f, 0xfb28,
- 0xfb2a, 0xfb36,
- 0xfb38, 0xfb3c,
- 0xfb3e, 0xfb3e,
- 0xfb40, 0xfb41,
- 0xfb43, 0xfb44,
- 0xfb46, 0xfbb1,
- 0xfbd3, 0xfd3d,
- 0xfd50, 0xfd8f,
- 0xfd92, 0xfdc7,
- 0xfdf0, 0xfdfb,
- 0xfe70, 0xfe74,
- 0xfe76, 0xfefc,
- 0xff10, 0xff19,
- 0xff21, 0xff3a,
- 0xff41, 0xff5a,
- 0xff66, 0xffbe,
- 0xffc2, 0xffc7,
- 0xffca, 0xffcf,
- 0xffd2, 0xffd7,
- 0xffda, 0xffdc,
- 0x10000, 0x1000b,
- 0x1000d, 0x10026,
- 0x10028, 0x1003a,
- 0x1003c, 0x1003d,
- 0x1003f, 0x1004d,
- 0x10050, 0x1005d,
- 0x10080, 0x100fa,
- 0x10107, 0x10133,
- 0x10140, 0x10178,
- 0x1018a, 0x1018b,
- 0x10280, 0x1029c,
- 0x102a0, 0x102d0,
- 0x102e1, 0x102fb,
- 0x10300, 0x10323,
- 0x10330, 0x1034a,
- 0x10350, 0x10375,
- 0x10380, 0x1039d,
- 0x103a0, 0x103c3,
- 0x103c8, 0x103cf,
- 0x103d1, 0x103d5,
- 0x10400, 0x1049d,
- 0x104a0, 0x104a9,
- 0x10500, 0x10527,
- 0x10530, 0x10563,
- 0x10600, 0x10736,
- 0x10740, 0x10755,
- 0x10760, 0x10767,
- 0x10800, 0x10805,
- 0x10808, 0x10808,
- 0x1080a, 0x10835,
- 0x10837, 0x10838,
- 0x1083c, 0x1083c,
- 0x1083f, 0x10855,
- 0x10858, 0x10876,
- 0x10879, 0x1089e,
- 0x108a7, 0x108af,
- 0x10900, 0x1091b,
- 0x10920, 0x10939,
- 0x10980, 0x109b7,
- 0x109be, 0x109bf,
- 0x10a00, 0x10a00,
- 0x10a10, 0x10a13,
- 0x10a15, 0x10a17,
- 0x10a19, 0x10a33,
- 0x10a40, 0x10a47,
- 0x10a60, 0x10a7e,
- 0x10a80, 0x10a9f,
- 0x10ac0, 0x10ac7,
- 0x10ac9, 0x10ae4,
- 0x10aeb, 0x10aef,
- 0x10b00, 0x10b35,
- 0x10b40, 0x10b55,
- 0x10b58, 0x10b72,
- 0x10b78, 0x10b91,
- 0x10ba9, 0x10baf,
- 0x10c00, 0x10c48,
- 0x10e60, 0x10e7e,
- 0x11003, 0x11037,
- 0x11052, 0x1106f,
- 0x11083, 0x110af,
- 0x110d0, 0x110e8,
- 0x110f0, 0x110f9,
- 0x11103, 0x11126,
- 0x11136, 0x1113f,
- 0x11150, 0x11172,
- 0x11176, 0x11176,
- 0x11183, 0x111b2,
- 0x111c1, 0x111c4,
- 0x111d0, 0x111da,
- 0x111e1, 0x111f4,
- 0x11200, 0x11211,
- 0x11213, 0x1122b,
- 0x112b0, 0x112de,
- 0x112f0, 0x112f9,
- 0x11305, 0x1130c,
- 0x1130f, 0x11310,
- 0x11313, 0x11328,
- 0x1132a, 0x11330,
- 0x11332, 0x11333,
- 0x11335, 0x11339,
- 0x1133d, 0x1133d,
- 0x1135d, 0x11361,
- 0x11480, 0x114af,
- 0x114c4, 0x114c5,
- 0x114c7, 0x114c7,
- 0x114d0, 0x114d9,
- 0x11580, 0x115ae,
- 0x11600, 0x1162f,
- 0x11644, 0x11644,
- 0x11650, 0x11659,
- 0x11680, 0x116aa,
- 0x116c0, 0x116c9,
- 0x118a0, 0x118f2,
- 0x118ff, 0x118ff,
- 0x11ac0, 0x11af8,
- 0x12000, 0x12398,
- 0x12400, 0x1246e,
- 0x13000, 0x1342e,
- 0x16800, 0x16a38,
- 0x16a40, 0x16a5e,
- 0x16a60, 0x16a69,
- 0x16ad0, 0x16aed,
- 0x16b00, 0x16b2f,
- 0x16b40, 0x16b43,
- 0x16b50, 0x16b59,
- 0x16b5b, 0x16b61,
- 0x16b63, 0x16b77,
- 0x16b7d, 0x16b8f,
- 0x16f00, 0x16f44,
- 0x16f50, 0x16f50,
- 0x16f93, 0x16f9f,
- 0x1b000, 0x1b001,
- 0x1bc00, 0x1bc6a,
- 0x1bc70, 0x1bc7c,
- 0x1bc80, 0x1bc88,
- 0x1bc90, 0x1bc99,
- 0x1d360, 0x1d371,
- 0x1d400, 0x1d454,
- 0x1d456, 0x1d49c,
- 0x1d49e, 0x1d49f,
- 0x1d4a2, 0x1d4a2,
- 0x1d4a5, 0x1d4a6,
- 0x1d4a9, 0x1d4ac,
- 0x1d4ae, 0x1d4b9,
- 0x1d4bb, 0x1d4bb,
- 0x1d4bd, 0x1d4c3,
- 0x1d4c5, 0x1d505,
- 0x1d507, 0x1d50a,
- 0x1d50d, 0x1d514,
- 0x1d516, 0x1d51c,
- 0x1d51e, 0x1d539,
- 0x1d53b, 0x1d53e,
- 0x1d540, 0x1d544,
- 0x1d546, 0x1d546,
- 0x1d54a, 0x1d550,
- 0x1d552, 0x1d6a5,
- 0x1d6a8, 0x1d6c0,
- 0x1d6c2, 0x1d6da,
- 0x1d6dc, 0x1d6fa,
- 0x1d6fc, 0x1d714,
- 0x1d716, 0x1d734,
- 0x1d736, 0x1d74e,
- 0x1d750, 0x1d76e,
- 0x1d770, 0x1d788,
- 0x1d78a, 0x1d7a8,
- 0x1d7aa, 0x1d7c2,
- 0x1d7c4, 0x1d7cb,
- 0x1d7ce, 0x1d7ff,
- 0x1e800, 0x1e8c4,
- 0x1e8c7, 0x1e8cf,
- 0x1ee00, 0x1ee03,
- 0x1ee05, 0x1ee1f,
- 0x1ee21, 0x1ee22,
- 0x1ee24, 0x1ee24,
- 0x1ee27, 0x1ee27,
- 0x1ee29, 0x1ee32,
- 0x1ee34, 0x1ee37,
- 0x1ee39, 0x1ee39,
- 0x1ee3b, 0x1ee3b,
- 0x1ee42, 0x1ee42,
- 0x1ee47, 0x1ee47,
- 0x1ee49, 0x1ee49,
- 0x1ee4b, 0x1ee4b,
- 0x1ee4d, 0x1ee4f,
- 0x1ee51, 0x1ee52,
- 0x1ee54, 0x1ee54,
- 0x1ee57, 0x1ee57,
- 0x1ee59, 0x1ee59,
- 0x1ee5b, 0x1ee5b,
- 0x1ee5d, 0x1ee5d,
- 0x1ee5f, 0x1ee5f,
- 0x1ee61, 0x1ee62,
- 0x1ee64, 0x1ee64,
- 0x1ee67, 0x1ee6a,
- 0x1ee6c, 0x1ee72,
- 0x1ee74, 0x1ee77,
- 0x1ee79, 0x1ee7c,
- 0x1ee7e, 0x1ee7e,
- 0x1ee80, 0x1ee89,
- 0x1ee8b, 0x1ee9b,
- 0x1eea1, 0x1eea3,
- 0x1eea5, 0x1eea9,
- 0x1eeab, 0x1eebb,
- 0x1f100, 0x1f10c,
- 0x20000, 0x2a6d6,
- 0x2a700, 0x2b734,
- 0x2b740, 0x2b81d,
- 0x2f800, 0x2fa1d
-};
-UCP_FN(Xwd)
-
-static const unichar ucp_Z_def[] = {
- 0x20, 0x20,
- 0xa0, 0xa0,
- 0x1680, 0x1680,
- 0x2000, 0x200a,
- 0x2028, 0x2029,
- 0x202f, 0x202f,
- 0x205f, 0x205f,
- 0x3000, 0x3000
-};
-UCP_FN(Z)
-
-static const unichar ucp_Zl_def[] = {
- 0x2028, 0x2028
-};
-UCP_FN(Zl)
-
-static const unichar ucp_Zp_def[] = {
- 0x2029, 0x2029
-};
-UCP_FN(Zp)
-
-static const unichar ucp_Zs_def[] = {
- 0x20, 0x20,
- 0xa0, 0xa0,
- 0x1680, 0x1680,
- 0x2000, 0x200a,
- 0x202f, 0x202f,
- 0x205f, 0x205f,
- 0x3000, 0x3000
-};
-UCP_FN(Zs)
-
-static const unichar ucp_Arabic_def[] = {
- 0x600, 0x604,
- 0x606, 0x60b,
- 0x60d, 0x61a,
- 0x61e, 0x61e,
- 0x620, 0x63f,
- 0x641, 0x64a,
- 0x656, 0x65f,
- 0x66a, 0x66f,
- 0x671, 0x6dc,
- 0x6de, 0x6ff,
- 0x750, 0x77f,
- 0x8a0, 0x8b2,
- 0x8e4, 0x8ff,
- 0xfb50, 0xfbc1,
- 0xfbd3, 0xfd3d,
- 0xfd50, 0xfd8f,
- 0xfd92, 0xfdc7,
- 0xfdf0, 0xfdfd,
- 0xfe70, 0xfe74,
- 0xfe76, 0xfefc,
- 0x10e60, 0x10e7e,
- 0x1ee00, 0x1ee03,
- 0x1ee05, 0x1ee1f,
- 0x1ee21, 0x1ee22,
- 0x1ee24, 0x1ee24,
- 0x1ee27, 0x1ee27,
- 0x1ee29, 0x1ee32,
- 0x1ee34, 0x1ee37,
- 0x1ee39, 0x1ee39,
- 0x1ee3b, 0x1ee3b,
- 0x1ee42, 0x1ee42,
- 0x1ee47, 0x1ee47,
- 0x1ee49, 0x1ee49,
- 0x1ee4b, 0x1ee4b,
- 0x1ee4d, 0x1ee4f,
- 0x1ee51, 0x1ee52,
- 0x1ee54, 0x1ee54,
- 0x1ee57, 0x1ee57,
- 0x1ee59, 0x1ee59,
- 0x1ee5b, 0x1ee5b,
- 0x1ee5d, 0x1ee5d,
- 0x1ee5f, 0x1ee5f,
- 0x1ee61, 0x1ee62,
- 0x1ee64, 0x1ee64,
- 0x1ee67, 0x1ee6a,
- 0x1ee6c, 0x1ee72,
- 0x1ee74, 0x1ee77,
- 0x1ee79, 0x1ee7c,
- 0x1ee7e, 0x1ee7e,
- 0x1ee80, 0x1ee89,
- 0x1ee8b, 0x1ee9b,
- 0x1eea1, 0x1eea3,
- 0x1eea5, 0x1eea9,
- 0x1eeab, 0x1eebb,
- 0x1eef0, 0x1eef1
-};
-UCP_FN(Arabic)
-
-static const unichar ucp_Armenian_def[] = {
- 0x531, 0x556,
- 0x559, 0x55f,
- 0x561, 0x587,
- 0x58a, 0x58a,
- 0x58d, 0x58f,
- 0xfb13, 0xfb17
-};
-UCP_FN(Armenian)
-
-static const unichar ucp_Avestan_def[] = {
- 0x10b00, 0x10b35,
- 0x10b39, 0x10b3f
-};
-UCP_FN(Avestan)
-
-static const unichar ucp_Balinese_def[] = {
- 0x1b00, 0x1b4b,
- 0x1b50, 0x1b7c
-};
-UCP_FN(Balinese)
-
-static const unichar ucp_Bamum_def[] = {
- 0xa6a0, 0xa6f7,
- 0x16800, 0x16a38
-};
-UCP_FN(Bamum)
-
-static const unichar ucp_Bassa_Vah_def[] = {
- 0x16ad0, 0x16aed,
- 0x16af0, 0x16af5
-};
-UCP_FN(Bassa_Vah)
-
-static const unichar ucp_Batak_def[] = {
- 0x1bc0, 0x1bf3,
- 0x1bfc, 0x1bff
-};
-UCP_FN(Batak)
-
-static const unichar ucp_Bengali_def[] = {
- 0x980, 0x983,
- 0x985, 0x98c,
- 0x98f, 0x990,
- 0x993, 0x9a8,
- 0x9aa, 0x9b0,
- 0x9b2, 0x9b2,
- 0x9b6, 0x9b9,
- 0x9bc, 0x9c4,
- 0x9c7, 0x9c8,
- 0x9cb, 0x9ce,
- 0x9d7, 0x9d7,
- 0x9dc, 0x9dd,
- 0x9df, 0x9e3,
- 0x9e6, 0x9fb
-};
-UCP_FN(Bengali)
-
-static const unichar ucp_Bopomofo_def[] = {
- 0x2ea, 0x2eb,
- 0x3105, 0x312d,
- 0x31a0, 0x31ba
-};
-UCP_FN(Bopomofo)
-
-static const unichar ucp_Brahmi_def[] = {
- 0x11000, 0x1104d,
- 0x11052, 0x1106f,
- 0x1107f, 0x1107f
-};
-UCP_FN(Brahmi)
-
-static const unichar ucp_Braille_def[] = {
- 0x2800, 0x28ff
-};
-UCP_FN(Braille)
-
-static const unichar ucp_Buginese_def[] = {
- 0x1a00, 0x1a1b,
- 0x1a1e, 0x1a1f
-};
-UCP_FN(Buginese)
-
-static const unichar ucp_Buhid_def[] = {
- 0x1740, 0x1753
-};
-UCP_FN(Buhid)
-
-static const unichar ucp_Canadian_Aboriginal_def[] = {
- 0x1400, 0x167f,
- 0x18b0, 0x18f5
-};
-UCP_FN(Canadian_Aboriginal)
-
-static const unichar ucp_Carian_def[] = {
- 0x102a0, 0x102d0
-};
-UCP_FN(Carian)
-
-static const unichar ucp_Caucasian_Albanian_def[] = {
- 0x10530, 0x10563,
- 0x1056f, 0x1056f
-};
-UCP_FN(Caucasian_Albanian)
-
-static const unichar ucp_Chakma_def[] = {
- 0x11100, 0x11134,
- 0x11136, 0x11143
-};
-UCP_FN(Chakma)
-
-static const unichar ucp_Cham_def[] = {
- 0xaa00, 0xaa36,
- 0xaa40, 0xaa4d,
- 0xaa50, 0xaa59,
- 0xaa5c, 0xaa5f
-};
-UCP_FN(Cham)
-
-static const unichar ucp_Cherokee_def[] = {
- 0x13a0, 0x13f4
-};
-UCP_FN(Cherokee)
-
-static const unichar ucp_Common_def[] = {
- 0x0, 0x40,
- 0x5b, 0x60,
- 0x7b, 0xa9,
- 0xab, 0xb9,
- 0xbb, 0xbf,
- 0xd7, 0xd7,
- 0xf7, 0xf7,
- 0x2b9, 0x2df,
- 0x2e5, 0x2e9,
- 0x2ec, 0x2ff,
- 0x374, 0x374,
- 0x378, 0x379,
- 0x37e, 0x37e,
- 0x380, 0x383,
- 0x385, 0x385,
- 0x387, 0x387,
- 0x38b, 0x38b,
- 0x38d, 0x38d,
- 0x3a2, 0x3a2,
- 0x530, 0x530,
- 0x557, 0x558,
- 0x560, 0x560,
- 0x588, 0x589,
- 0x58b, 0x58c,
- 0x590, 0x590,
- 0x5c8, 0x5cf,
- 0x5eb, 0x5ef,
- 0x5f5, 0x5ff,
- 0x605, 0x605,
- 0x60c, 0x60c,
- 0x61b, 0x61d,
- 0x61f, 0x61f,
- 0x640, 0x640,
- 0x660, 0x669,
- 0x6dd, 0x6dd,
- 0x70e, 0x70e,
- 0x74b, 0x74c,
- 0x7b2, 0x7bf,
- 0x7fb, 0x7ff,
- 0x82e, 0x82f,
- 0x83f, 0x83f,
- 0x85c, 0x85d,
- 0x85f, 0x89f,
- 0x8b3, 0x8e3,
- 0x964, 0x965,
- 0x984, 0x984,
- 0x98d, 0x98e,
- 0x991, 0x992,
- 0x9a9, 0x9a9,
- 0x9b1, 0x9b1,
- 0x9b3, 0x9b5,
- 0x9ba, 0x9bb,
- 0x9c5, 0x9c6,
- 0x9c9, 0x9ca,
- 0x9cf, 0x9d6,
- 0x9d8, 0x9db,
- 0x9de, 0x9de,
- 0x9e4, 0x9e5,
- 0x9fc, 0xa00,
- 0xa04, 0xa04,
- 0xa0b, 0xa0e,
- 0xa11, 0xa12,
- 0xa29, 0xa29,
- 0xa31, 0xa31,
- 0xa34, 0xa34,
- 0xa37, 0xa37,
- 0xa3a, 0xa3b,
- 0xa3d, 0xa3d,
- 0xa43, 0xa46,
- 0xa49, 0xa4a,
- 0xa4e, 0xa50,
- 0xa52, 0xa58,
- 0xa5d, 0xa5d,
- 0xa5f, 0xa65,
- 0xa76, 0xa80,
- 0xa84, 0xa84,
- 0xa8e, 0xa8e,
- 0xa92, 0xa92,
- 0xaa9, 0xaa9,
- 0xab1, 0xab1,
- 0xab4, 0xab4,
- 0xaba, 0xabb,
- 0xac6, 0xac6,
- 0xaca, 0xaca,
- 0xace, 0xacf,
- 0xad1, 0xadf,
- 0xae4, 0xae5,
- 0xaf2, 0xb00,
- 0xb04, 0xb04,
- 0xb0d, 0xb0e,
- 0xb11, 0xb12,
- 0xb29, 0xb29,
- 0xb31, 0xb31,
- 0xb34, 0xb34,
- 0xb3a, 0xb3b,
- 0xb45, 0xb46,
- 0xb49, 0xb4a,
- 0xb4e, 0xb55,
- 0xb58, 0xb5b,
- 0xb5e, 0xb5e,
- 0xb64, 0xb65,
- 0xb78, 0xb81,
- 0xb84, 0xb84,
- 0xb8b, 0xb8d,
- 0xb91, 0xb91,
- 0xb96, 0xb98,
- 0xb9b, 0xb9b,
- 0xb9d, 0xb9d,
- 0xba0, 0xba2,
- 0xba5, 0xba7,
- 0xbab, 0xbad,
- 0xbba, 0xbbd,
- 0xbc3, 0xbc5,
- 0xbc9, 0xbc9,
- 0xbce, 0xbcf,
- 0xbd1, 0xbd6,
- 0xbd8, 0xbe5,
- 0xbfb, 0xbff,
- 0xc04, 0xc04,
- 0xc0d, 0xc0d,
- 0xc11, 0xc11,
- 0xc29, 0xc29,
- 0xc3a, 0xc3c,
- 0xc45, 0xc45,
- 0xc49, 0xc49,
- 0xc4e, 0xc54,
- 0xc57, 0xc57,
- 0xc5a, 0xc5f,
- 0xc64, 0xc65,
- 0xc70, 0xc77,
- 0xc80, 0xc80,
- 0xc84, 0xc84,
- 0xc8d, 0xc8d,
- 0xc91, 0xc91,
- 0xca9, 0xca9,
- 0xcb4, 0xcb4,
- 0xcba, 0xcbb,
- 0xcc5, 0xcc5,
- 0xcc9, 0xcc9,
- 0xcce, 0xcd4,
- 0xcd7, 0xcdd,
- 0xcdf, 0xcdf,
- 0xce4, 0xce5,
- 0xcf0, 0xcf0,
- 0xcf3, 0xd00,
- 0xd04, 0xd04,
- 0xd0d, 0xd0d,
- 0xd11, 0xd11,
- 0xd3b, 0xd3c,
- 0xd45, 0xd45,
- 0xd49, 0xd49,
- 0xd4f, 0xd56,
- 0xd58, 0xd5f,
- 0xd64, 0xd65,
- 0xd76, 0xd78,
- 0xd80, 0xd81,
- 0xd84, 0xd84,
- 0xd97, 0xd99,
- 0xdb2, 0xdb2,
- 0xdbc, 0xdbc,
- 0xdbe, 0xdbf,
- 0xdc7, 0xdc9,
- 0xdcb, 0xdce,
- 0xdd5, 0xdd5,
- 0xdd7, 0xdd7,
- 0xde0, 0xde5,
- 0xdf0, 0xdf1,
- 0xdf5, 0xe00,
- 0xe3b, 0xe3f,
- 0xe5c, 0xe80,
- 0xe83, 0xe83,
- 0xe85, 0xe86,
- 0xe89, 0xe89,
- 0xe8b, 0xe8c,
- 0xe8e, 0xe93,
- 0xe98, 0xe98,
- 0xea0, 0xea0,
- 0xea4, 0xea4,
- 0xea6, 0xea6,
- 0xea8, 0xea9,
- 0xeac, 0xeac,
- 0xeba, 0xeba,
- 0xebe, 0xebf,
- 0xec5, 0xec5,
- 0xec7, 0xec7,
- 0xece, 0xecf,
- 0xeda, 0xedb,
- 0xee0, 0xeff,
- 0xf48, 0xf48,
- 0xf6d, 0xf70,
- 0xf98, 0xf98,
- 0xfbd, 0xfbd,
- 0xfcd, 0xfcd,
- 0xfd5, 0xfd8,
- 0xfdb, 0xfff,
- 0x10c6, 0x10c6,
- 0x10c8, 0x10cc,
- 0x10ce, 0x10cf,
- 0x10fb, 0x10fb,
- 0x1249, 0x1249,
- 0x124e, 0x124f,
- 0x1257, 0x1257,
- 0x1259, 0x1259,
- 0x125e, 0x125f,
- 0x1289, 0x1289,
- 0x128e, 0x128f,
- 0x12b1, 0x12b1,
- 0x12b6, 0x12b7,
- 0x12bf, 0x12bf,
- 0x12c1, 0x12c1,
- 0x12c6, 0x12c7,
- 0x12d7, 0x12d7,
- 0x1311, 0x1311,
- 0x1316, 0x1317,
- 0x135b, 0x135c,
- 0x137d, 0x137f,
- 0x139a, 0x139f,
- 0x13f5, 0x13ff,
- 0x169d, 0x169f,
- 0x16eb, 0x16ed,
- 0x16f9, 0x16ff,
- 0x170d, 0x170d,
- 0x1715, 0x171f,
- 0x1735, 0x173f,
- 0x1754, 0x175f,
- 0x176d, 0x176d,
- 0x1771, 0x1771,
- 0x1774, 0x177f,
- 0x17de, 0x17df,
- 0x17ea, 0x17ef,
- 0x17fa, 0x17ff,
- 0x1802, 0x1803,
- 0x1805, 0x1805,
- 0x180f, 0x180f,
- 0x181a, 0x181f,
- 0x1878, 0x187f,
- 0x18ab, 0x18af,
- 0x18f6, 0x18ff,
- 0x191f, 0x191f,
- 0x192c, 0x192f,
- 0x193c, 0x193f,
- 0x1941, 0x1943,
- 0x196e, 0x196f,
- 0x1975, 0x197f,
- 0x19ac, 0x19af,
- 0x19ca, 0x19cf,
- 0x19db, 0x19dd,
- 0x1a1c, 0x1a1d,
- 0x1a5f, 0x1a5f,
- 0x1a7d, 0x1a7e,
- 0x1a8a, 0x1a8f,
- 0x1a9a, 0x1a9f,
- 0x1aae, 0x1aaf,
- 0x1abf, 0x1aff,
- 0x1b4c, 0x1b4f,
- 0x1b7d, 0x1b7f,
- 0x1bf4, 0x1bfb,
- 0x1c38, 0x1c3a,
- 0x1c4a, 0x1c4c,
- 0x1c80, 0x1cbf,
- 0x1cc8, 0x1ccf,
- 0x1cd3, 0x1cd3,
- 0x1ce1, 0x1ce1,
- 0x1ce9, 0x1cec,
- 0x1cee, 0x1cf3,
- 0x1cf5, 0x1cf7,
- 0x1cfa, 0x1cff,
- 0x1df6, 0x1dfb,
- 0x1f16, 0x1f17,
- 0x1f1e, 0x1f1f,
- 0x1f46, 0x1f47,
- 0x1f4e, 0x1f4f,
- 0x1f58, 0x1f58,
- 0x1f5a, 0x1f5a,
- 0x1f5c, 0x1f5c,
- 0x1f5e, 0x1f5e,
- 0x1f7e, 0x1f7f,
- 0x1fb5, 0x1fb5,
- 0x1fc5, 0x1fc5,
- 0x1fd4, 0x1fd5,
- 0x1fdc, 0x1fdc,
- 0x1ff0, 0x1ff1,
- 0x1ff5, 0x1ff5,
- 0x1fff, 0x200b,
- 0x200e, 0x2070,
- 0x2072, 0x207e,
- 0x2080, 0x208f,
- 0x209d, 0x20cf,
- 0x20f1, 0x2125,
- 0x2127, 0x2129,
- 0x212c, 0x2131,
- 0x2133, 0x214d,
- 0x214f, 0x215f,
- 0x2189, 0x27ff,
- 0x2900, 0x2bff,
- 0x2c2f, 0x2c2f,
- 0x2c5f, 0x2c5f,
- 0x2cf4, 0x2cf8,
- 0x2d26, 0x2d26,
- 0x2d28, 0x2d2c,
- 0x2d2e, 0x2d2f,
- 0x2d68, 0x2d6e,
- 0x2d71, 0x2d7e,
- 0x2d97, 0x2d9f,
- 0x2da7, 0x2da7,
- 0x2daf, 0x2daf,
- 0x2db7, 0x2db7,
- 0x2dbf, 0x2dbf,
- 0x2dc7, 0x2dc7,
- 0x2dcf, 0x2dcf,
- 0x2dd7, 0x2dd7,
- 0x2ddf, 0x2ddf,
- 0x2e00, 0x2e7f,
- 0x2e9a, 0x2e9a,
- 0x2ef4, 0x2eff,
- 0x2fd6, 0x3004,
- 0x3006, 0x3006,
- 0x3008, 0x3020,
- 0x3030, 0x3037,
- 0x303c, 0x3040,
- 0x3097, 0x3098,
- 0x309b, 0x309c,
- 0x30a0, 0x30a0,
- 0x30fb, 0x30fc,
- 0x3100, 0x3104,
- 0x312e, 0x3130,
- 0x318f, 0x319f,
- 0x31bb, 0x31ef,
- 0x321f, 0x325f,
- 0x327f, 0x32cf,
- 0x32ff, 0x32ff,
- 0x3358, 0x33ff,
- 0x4db6, 0x4dff,
- 0x9fcd, 0x9fff,
- 0xa48d, 0xa48f,
- 0xa4c7, 0xa4cf,
- 0xa62c, 0xa63f,
- 0xa69e, 0xa69e,
- 0xa6f8, 0xa721,
- 0xa788, 0xa78a,
- 0xa78f, 0xa78f,
- 0xa7ae, 0xa7af,
- 0xa7b2, 0xa7f6,
- 0xa82c, 0xa83f,
- 0xa878, 0xa87f,
- 0xa8c5, 0xa8cd,
- 0xa8da, 0xa8df,
- 0xa8fc, 0xa8ff,
- 0xa92e, 0xa92e,
- 0xa954, 0xa95e,
- 0xa97d, 0xa97f,
- 0xa9ce, 0xa9cf,
- 0xa9da, 0xa9dd,
- 0xa9ff, 0xa9ff,
- 0xaa37, 0xaa3f,
- 0xaa4e, 0xaa4f,
- 0xaa5a, 0xaa5b,
- 0xaac3, 0xaada,
- 0xaaf7, 0xab00,
- 0xab07, 0xab08,
- 0xab0f, 0xab10,
- 0xab17, 0xab1f,
- 0xab27, 0xab27,
- 0xab2f, 0xab2f,
- 0xab5b, 0xab5b,
- 0xab60, 0xab63,
- 0xab66, 0xabbf,
- 0xabee, 0xabef,
- 0xabfa, 0xabff,
- 0xd7a4, 0xd7af,
- 0xd7c7, 0xd7ca,
- 0xd7fc, 0xf8ff,
- 0xfa6e, 0xfa6f,
- 0xfada, 0xfaff,
- 0xfb07, 0xfb12,
- 0xfb18, 0xfb1c,
- 0xfb37, 0xfb37,
- 0xfb3d, 0xfb3d,
- 0xfb3f, 0xfb3f,
- 0xfb42, 0xfb42,
- 0xfb45, 0xfb45,
- 0xfbc2, 0xfbd2,
- 0xfd3e, 0xfd4f,
- 0xfd90, 0xfd91,
- 0xfdc8, 0xfdef,
- 0xfdfe, 0xfdff,
- 0xfe10, 0xfe1f,
- 0xfe2e, 0xfe6f,
- 0xfe75, 0xfe75,
- 0xfefd, 0xff20,
- 0xff3b, 0xff40,
- 0xff5b, 0xff65,
- 0xff70, 0xff70,
- 0xff9e, 0xff9f,
- 0xffbf, 0xffc1,
- 0xffc8, 0xffc9,
- 0xffd0, 0xffd1,
- 0xffd8, 0xffd9,
- 0xffdd, 0xffff,
- 0x1000c, 0x1000c,
- 0x10027, 0x10027,
- 0x1003b, 0x1003b,
- 0x1003e, 0x1003e,
- 0x1004e, 0x1004f,
- 0x1005e, 0x1007f,
- 0x100fb, 0x1013f,
- 0x1018d, 0x1019f,
- 0x101a1, 0x101fc,
- 0x101fe, 0x1027f,
- 0x1029d, 0x1029f,
- 0x102d1, 0x102df,
- 0x102e1, 0x102ff,
- 0x10324, 0x1032f,
- 0x1034b, 0x1034f,
- 0x1037b, 0x1037f,
- 0x1039e, 0x1039e,
- 0x103c4, 0x103c7,
- 0x103d6, 0x103ff,
- 0x1049e, 0x1049f,
- 0x104aa, 0x104ff,
- 0x10528, 0x1052f,
- 0x10564, 0x1056e,
- 0x10570, 0x105ff,
- 0x10737, 0x1073f,
- 0x10756, 0x1075f,
- 0x10768, 0x107ff,
- 0x10806, 0x10807,
- 0x10809, 0x10809,
- 0x10836, 0x10836,
- 0x10839, 0x1083b,
- 0x1083d, 0x1083e,
- 0x10856, 0x10856,
- 0x1089f, 0x108a6,
- 0x108b0, 0x108ff,
- 0x1091c, 0x1091e,
- 0x1093a, 0x1093e,
- 0x10940, 0x1097f,
- 0x109b8, 0x109bd,
- 0x109c0, 0x109ff,
- 0x10a04, 0x10a04,
- 0x10a07, 0x10a0b,
- 0x10a14, 0x10a14,
- 0x10a18, 0x10a18,
- 0x10a34, 0x10a37,
- 0x10a3b, 0x10a3e,
- 0x10a48, 0x10a4f,
- 0x10a59, 0x10a5f,
- 0x10aa0, 0x10abf,
- 0x10ae7, 0x10aea,
- 0x10af7, 0x10aff,
- 0x10b36, 0x10b38,
- 0x10b56, 0x10b57,
- 0x10b73, 0x10b77,
- 0x10b92, 0x10b98,
- 0x10b9d, 0x10ba8,
- 0x10bb0, 0x10bff,
- 0x10c49, 0x10e5f,
- 0x10e7f, 0x10fff,
- 0x1104e, 0x11051,
- 0x11070, 0x1107e,
- 0x110c2, 0x110cf,
- 0x110e9, 0x110ef,
- 0x110fa, 0x110ff,
- 0x11135, 0x11135,
- 0x11144, 0x1114f,
- 0x11177, 0x1117f,
- 0x111c9, 0x111cc,
- 0x111ce, 0x111cf,
- 0x111db, 0x111e0,
- 0x111f5, 0x111ff,
- 0x11212, 0x11212,
- 0x1123e, 0x112af,
- 0x112eb, 0x112ef,
- 0x112fa, 0x11300,
- 0x11304, 0x11304,
- 0x1130d, 0x1130e,
- 0x11311, 0x11312,
- 0x11329, 0x11329,
- 0x11331, 0x11331,
- 0x11334, 0x11334,
- 0x1133a, 0x1133b,
- 0x11345, 0x11346,
- 0x11349, 0x1134a,
- 0x1134e, 0x11356,
- 0x11358, 0x1135c,
- 0x11364, 0x11365,
- 0x1136d, 0x1136f,
- 0x11375, 0x1147f,
- 0x114c8, 0x114cf,
- 0x114da, 0x1157f,
- 0x115b6, 0x115b7,
- 0x115ca, 0x115ff,
- 0x11645, 0x1164f,
- 0x1165a, 0x1167f,
- 0x116b8, 0x116bf,
- 0x116ca, 0x1189f,
- 0x118f3, 0x118fe,
- 0x11900, 0x11abf,
- 0x11af9, 0x11fff,
- 0x12399, 0x123ff,
- 0x1246f, 0x1246f,
- 0x12475, 0x12fff,
- 0x1342f, 0x167ff,
- 0x16a39, 0x16a3f,
- 0x16a5f, 0x16a5f,
- 0x16a6a, 0x16a6d,
- 0x16a70, 0x16acf,
- 0x16aee, 0x16aef,
- 0x16af6, 0x16aff,
- 0x16b46, 0x16b4f,
- 0x16b5a, 0x16b5a,
- 0x16b62, 0x16b62,
- 0x16b78, 0x16b7c,
- 0x16b90, 0x16eff,
- 0x16f45, 0x16f4f,
- 0x16f7f, 0x16f8e,
- 0x16fa0, 0x1afff,
- 0x1b002, 0x1bbff,
- 0x1bc6b, 0x1bc6f,
- 0x1bc7d, 0x1bc7f,
- 0x1bc89, 0x1bc8f,
- 0x1bc9a, 0x1bc9b,
- 0x1bca0, 0x1d166,
- 0x1d16a, 0x1d17a,
- 0x1d183, 0x1d184,
- 0x1d18c, 0x1d1a9,
- 0x1d1ae, 0x1d1ff,
- 0x1d246, 0x1e7ff,
- 0x1e8c5, 0x1e8c6,
- 0x1e8d7, 0x1edff,
- 0x1ee04, 0x1ee04,
- 0x1ee20, 0x1ee20,
- 0x1ee23, 0x1ee23,
- 0x1ee25, 0x1ee26,
- 0x1ee28, 0x1ee28,
- 0x1ee33, 0x1ee33,
- 0x1ee38, 0x1ee38,
- 0x1ee3a, 0x1ee3a,
- 0x1ee3c, 0x1ee41,
- 0x1ee43, 0x1ee46,
- 0x1ee48, 0x1ee48,
- 0x1ee4a, 0x1ee4a,
- 0x1ee4c, 0x1ee4c,
- 0x1ee50, 0x1ee50,
- 0x1ee53, 0x1ee53,
- 0x1ee55, 0x1ee56,
- 0x1ee58, 0x1ee58,
- 0x1ee5a, 0x1ee5a,
- 0x1ee5c, 0x1ee5c,
- 0x1ee5e, 0x1ee5e,
- 0x1ee60, 0x1ee60,
- 0x1ee63, 0x1ee63,
- 0x1ee65, 0x1ee66,
- 0x1ee6b, 0x1ee6b,
- 0x1ee73, 0x1ee73,
- 0x1ee78, 0x1ee78,
- 0x1ee7d, 0x1ee7d,
- 0x1ee7f, 0x1ee7f,
- 0x1ee8a, 0x1ee8a,
- 0x1ee9c, 0x1eea0,
- 0x1eea4, 0x1eea4,
- 0x1eeaa, 0x1eeaa,
- 0x1eebc, 0x1eeef,
- 0x1eef2, 0x1f1ff,
- 0x1f201, 0x1ffff,
- 0x2a6d7, 0x2a6ff,
- 0x2b735, 0x2b73f,
- 0x2b81e, 0x2f7ff,
- 0x2fa1e, 0xe00ff,
- 0xe01f0, 0x10ffff
-};
-UCP_FN(Common)
-
-static const unichar ucp_Coptic_def[] = {
- 0x3e2, 0x3ef,
- 0x2c80, 0x2cf3,
- 0x2cf9, 0x2cff
-};
-UCP_FN(Coptic)
-
-static const unichar ucp_Cuneiform_def[] = {
- 0x12000, 0x12398,
- 0x12400, 0x1246e,
- 0x12470, 0x12474
-};
-UCP_FN(Cuneiform)
-
-static const unichar ucp_Cypriot_def[] = {
- 0x10800, 0x10805,
- 0x10808, 0x10808,
- 0x1080a, 0x10835,
- 0x10837, 0x10838,
- 0x1083c, 0x1083c,
- 0x1083f, 0x1083f
-};
-UCP_FN(Cypriot)
-
-static const unichar ucp_Cyrillic_def[] = {
- 0x400, 0x484,
- 0x487, 0x52f,
- 0x1d2b, 0x1d2b,
- 0x1d78, 0x1d78,
- 0x2de0, 0x2dff,
- 0xa640, 0xa69d,
- 0xa69f, 0xa69f
-};
-UCP_FN(Cyrillic)
-
-static const unichar ucp_Deseret_def[] = {
- 0x10400, 0x1044f
-};
-UCP_FN(Deseret)
-
-static const unichar ucp_Devanagari_def[] = {
- 0x900, 0x950,
- 0x953, 0x963,
- 0x966, 0x97f,
- 0xa8e0, 0xa8fb
-};
-UCP_FN(Devanagari)
-
-static const unichar ucp_Duployan_def[] = {
- 0x1bc00, 0x1bc6a,
- 0x1bc70, 0x1bc7c,
- 0x1bc80, 0x1bc88,
- 0x1bc90, 0x1bc99,
- 0x1bc9c, 0x1bc9f
-};
-UCP_FN(Duployan)
-
-static const unichar ucp_Egyptian_Hieroglyphs_def[] = {
- 0x13000, 0x1342e
-};
-UCP_FN(Egyptian_Hieroglyphs)
-
-static const unichar ucp_Elbasan_def[] = {
- 0x10500, 0x10527
-};
-UCP_FN(Elbasan)
-
-static const unichar ucp_Ethiopic_def[] = {
- 0x1200, 0x1248,
- 0x124a, 0x124d,
- 0x1250, 0x1256,
- 0x1258, 0x1258,
- 0x125a, 0x125d,
- 0x1260, 0x1288,
- 0x128a, 0x128d,
- 0x1290, 0x12b0,
- 0x12b2, 0x12b5,
- 0x12b8, 0x12be,
- 0x12c0, 0x12c0,
- 0x12c2, 0x12c5,
- 0x12c8, 0x12d6,
- 0x12d8, 0x1310,
- 0x1312, 0x1315,
- 0x1318, 0x135a,
- 0x135d, 0x137c,
- 0x1380, 0x1399,
- 0x2d80, 0x2d96,
- 0x2da0, 0x2da6,
- 0x2da8, 0x2dae,
- 0x2db0, 0x2db6,
- 0x2db8, 0x2dbe,
- 0x2dc0, 0x2dc6,
- 0x2dc8, 0x2dce,
- 0x2dd0, 0x2dd6,
- 0x2dd8, 0x2dde,
- 0xab01, 0xab06,
- 0xab09, 0xab0e,
- 0xab11, 0xab16,
- 0xab20, 0xab26,
- 0xab28, 0xab2e
-};
-UCP_FN(Ethiopic)
-
-static const unichar ucp_Georgian_def[] = {
- 0x10a0, 0x10c5,
- 0x10c7, 0x10c7,
- 0x10cd, 0x10cd,
- 0x10d0, 0x10fa,
- 0x10fc, 0x10ff,
- 0x2d00, 0x2d25,
- 0x2d27, 0x2d27,
- 0x2d2d, 0x2d2d
-};
-UCP_FN(Georgian)
-
-static const unichar ucp_Glagolitic_def[] = {
- 0x2c00, 0x2c2e,
- 0x2c30, 0x2c5e
-};
-UCP_FN(Glagolitic)
-
-static const unichar ucp_Gothic_def[] = {
- 0x10330, 0x1034a
-};
-UCP_FN(Gothic)
-
-static const unichar ucp_Grantha_def[] = {
- 0x11301, 0x11303,
- 0x11305, 0x1130c,
- 0x1130f, 0x11310,
- 0x11313, 0x11328,
- 0x1132a, 0x11330,
- 0x11332, 0x11333,
- 0x11335, 0x11339,
- 0x1133c, 0x11344,
- 0x11347, 0x11348,
- 0x1134b, 0x1134d,
- 0x11357, 0x11357,
- 0x1135d, 0x11363,
- 0x11366, 0x1136c,
- 0x11370, 0x11374
-};
-UCP_FN(Grantha)
-
-static const unichar ucp_Greek_def[] = {
- 0x370, 0x373,
- 0x375, 0x377,
- 0x37a, 0x37d,
- 0x37f, 0x37f,
- 0x384, 0x384,
- 0x386, 0x386,
- 0x388, 0x38a,
- 0x38c, 0x38c,
- 0x38e, 0x3a1,
- 0x3a3, 0x3e1,
- 0x3f0, 0x3ff,
- 0x1d26, 0x1d2a,
- 0x1d5d, 0x1d61,
- 0x1d66, 0x1d6a,
- 0x1dbf, 0x1dbf,
- 0x1f00, 0x1f15,
- 0x1f18, 0x1f1d,
- 0x1f20, 0x1f45,
- 0x1f48, 0x1f4d,
- 0x1f50, 0x1f57,
- 0x1f59, 0x1f59,
- 0x1f5b, 0x1f5b,
- 0x1f5d, 0x1f5d,
- 0x1f5f, 0x1f7d,
- 0x1f80, 0x1fb4,
- 0x1fb6, 0x1fc4,
- 0x1fc6, 0x1fd3,
- 0x1fd6, 0x1fdb,
- 0x1fdd, 0x1fef,
- 0x1ff2, 0x1ff4,
- 0x1ff6, 0x1ffe,
- 0x2126, 0x2126,
- 0xab65, 0xab65,
- 0x10140, 0x1018c,
- 0x101a0, 0x101a0,
- 0x1d200, 0x1d245
-};
-UCP_FN(Greek)
-
-static const unichar ucp_Gujarati_def[] = {
- 0xa81, 0xa83,
- 0xa85, 0xa8d,
- 0xa8f, 0xa91,
- 0xa93, 0xaa8,
- 0xaaa, 0xab0,
- 0xab2, 0xab3,
- 0xab5, 0xab9,
- 0xabc, 0xac5,
- 0xac7, 0xac9,
- 0xacb, 0xacd,
- 0xad0, 0xad0,
- 0xae0, 0xae3,
- 0xae6, 0xaf1
-};
-UCP_FN(Gujarati)
-
-static const unichar ucp_Gurmukhi_def[] = {
- 0xa01, 0xa03,
- 0xa05, 0xa0a,
- 0xa0f, 0xa10,
- 0xa13, 0xa28,
- 0xa2a, 0xa30,
- 0xa32, 0xa33,
- 0xa35, 0xa36,
- 0xa38, 0xa39,
- 0xa3c, 0xa3c,
- 0xa3e, 0xa42,
- 0xa47, 0xa48,
- 0xa4b, 0xa4d,
- 0xa51, 0xa51,
- 0xa59, 0xa5c,
- 0xa5e, 0xa5e,
- 0xa66, 0xa75
-};
-UCP_FN(Gurmukhi)
-
-static const unichar ucp_Han_def[] = {
- 0x2e80, 0x2e99,
- 0x2e9b, 0x2ef3,
- 0x2f00, 0x2fd5,
- 0x3005, 0x3005,
- 0x3007, 0x3007,
- 0x3021, 0x3029,
- 0x3038, 0x303b,
- 0x3400, 0x4db5,
- 0x4e00, 0x9fcc,
- 0xf900, 0xfa6d,
- 0xfa70, 0xfad9,
- 0x20000, 0x2a6d6,
- 0x2a700, 0x2b734,
- 0x2b740, 0x2b81d,
- 0x2f800, 0x2fa1d
-};
-UCP_FN(Han)
-
-static const unichar ucp_Hangul_def[] = {
- 0x1100, 0x11ff,
- 0x302e, 0x302f,
- 0x3131, 0x318e,
- 0x3200, 0x321e,
- 0x3260, 0x327e,
- 0xa960, 0xa97c,
- 0xac00, 0xd7a3,
- 0xd7b0, 0xd7c6,
- 0xd7cb, 0xd7fb,
- 0xffa0, 0xffbe,
- 0xffc2, 0xffc7,
- 0xffca, 0xffcf,
- 0xffd2, 0xffd7,
- 0xffda, 0xffdc
-};
-UCP_FN(Hangul)
-
-static const unichar ucp_Hanunoo_def[] = {
- 0x1720, 0x1734
-};
-UCP_FN(Hanunoo)
-
-static const unichar ucp_Hebrew_def[] = {
- 0x591, 0x5c7,
- 0x5d0, 0x5ea,
- 0x5f0, 0x5f4,
- 0xfb1d, 0xfb36,
- 0xfb38, 0xfb3c,
- 0xfb3e, 0xfb3e,
- 0xfb40, 0xfb41,
- 0xfb43, 0xfb44,
- 0xfb46, 0xfb4f
-};
-UCP_FN(Hebrew)
-
-static const unichar ucp_Hiragana_def[] = {
- 0x3041, 0x3096,
- 0x309d, 0x309f,
- 0x1b001, 0x1b001,
- 0x1f200, 0x1f200
-};
-UCP_FN(Hiragana)
-
-static const unichar ucp_Imperial_Aramaic_def[] = {
- 0x10840, 0x10855,
- 0x10857, 0x1085f
-};
-UCP_FN(Imperial_Aramaic)
-
-static const unichar ucp_Inherited_def[] = {
- 0x300, 0x36f,
- 0x485, 0x486,
- 0x64b, 0x655,
- 0x670, 0x670,
- 0x951, 0x952,
- 0x1ab0, 0x1abe,
- 0x1cd0, 0x1cd2,
- 0x1cd4, 0x1ce0,
- 0x1ce2, 0x1ce8,
- 0x1ced, 0x1ced,
- 0x1cf4, 0x1cf4,
- 0x1cf8, 0x1cf9,
- 0x1dc0, 0x1df5,
- 0x1dfc, 0x1dff,
- 0x200c, 0x200d,
- 0x20d0, 0x20f0,
- 0x302a, 0x302d,
- 0x3099, 0x309a,
- 0xfe00, 0xfe0f,
- 0xfe20, 0xfe2d,
- 0x101fd, 0x101fd,
- 0x102e0, 0x102e0,
- 0x1d167, 0x1d169,
- 0x1d17b, 0x1d182,
- 0x1d185, 0x1d18b,
- 0x1d1aa, 0x1d1ad,
- 0xe0100, 0xe01ef
-};
-UCP_FN(Inherited)
-
-static const unichar ucp_Inscriptional_Pahlavi_def[] = {
- 0x10b60, 0x10b72,
- 0x10b78, 0x10b7f
-};
-UCP_FN(Inscriptional_Pahlavi)
-
-static const unichar ucp_Inscriptional_Parthian_def[] = {
- 0x10b40, 0x10b55,
- 0x10b58, 0x10b5f
-};
-UCP_FN(Inscriptional_Parthian)
-
-static const unichar ucp_Javanese_def[] = {
- 0xa980, 0xa9cd,
- 0xa9d0, 0xa9d9,
- 0xa9de, 0xa9df
-};
-UCP_FN(Javanese)
-
-static const unichar ucp_Kaithi_def[] = {
- 0x11080, 0x110c1
-};
-UCP_FN(Kaithi)
-
-static const unichar ucp_Kannada_def[] = {
- 0xc81, 0xc83,
- 0xc85, 0xc8c,
- 0xc8e, 0xc90,
- 0xc92, 0xca8,
- 0xcaa, 0xcb3,
- 0xcb5, 0xcb9,
- 0xcbc, 0xcc4,
- 0xcc6, 0xcc8,
- 0xcca, 0xccd,
- 0xcd5, 0xcd6,
- 0xcde, 0xcde,
- 0xce0, 0xce3,
- 0xce6, 0xcef,
- 0xcf1, 0xcf2
-};
-UCP_FN(Kannada)
-
-static const unichar ucp_Katakana_def[] = {
- 0x30a1, 0x30fa,
- 0x30fd, 0x30ff,
- 0x31f0, 0x31ff,
- 0x32d0, 0x32fe,
- 0x3300, 0x3357,
- 0xff66, 0xff6f,
- 0xff71, 0xff9d,
- 0x1b000, 0x1b000
-};
-UCP_FN(Katakana)
-
-static const unichar ucp_Kayah_Li_def[] = {
- 0xa900, 0xa92d,
- 0xa92f, 0xa92f
-};
-UCP_FN(Kayah_Li)
-
-static const unichar ucp_Kharoshthi_def[] = {
- 0x10a00, 0x10a03,
- 0x10a05, 0x10a06,
- 0x10a0c, 0x10a13,
- 0x10a15, 0x10a17,
- 0x10a19, 0x10a33,
- 0x10a38, 0x10a3a,
- 0x10a3f, 0x10a47,
- 0x10a50, 0x10a58
-};
-UCP_FN(Kharoshthi)
-
-static const unichar ucp_Khmer_def[] = {
- 0x1780, 0x17dd,
- 0x17e0, 0x17e9,
- 0x17f0, 0x17f9,
- 0x19e0, 0x19ff
-};
-UCP_FN(Khmer)
-
-static const unichar ucp_Khojki_def[] = {
- 0x11200, 0x11211,
- 0x11213, 0x1123d
-};
-UCP_FN(Khojki)
-
-static const unichar ucp_Khudawadi_def[] = {
- 0x112b0, 0x112ea,
- 0x112f0, 0x112f9
-};
-UCP_FN(Khudawadi)
-
-static const unichar ucp_Lao_def[] = {
- 0xe81, 0xe82,
- 0xe84, 0xe84,
- 0xe87, 0xe88,
- 0xe8a, 0xe8a,
- 0xe8d, 0xe8d,
- 0xe94, 0xe97,
- 0xe99, 0xe9f,
- 0xea1, 0xea3,
- 0xea5, 0xea5,
- 0xea7, 0xea7,
- 0xeaa, 0xeab,
- 0xead, 0xeb9,
- 0xebb, 0xebd,
- 0xec0, 0xec4,
- 0xec6, 0xec6,
- 0xec8, 0xecd,
- 0xed0, 0xed9,
- 0xedc, 0xedf
-};
-UCP_FN(Lao)
-
-static const unichar ucp_Latin_def[] = {
- 0x41, 0x5a,
- 0x61, 0x7a,
- 0xaa, 0xaa,
- 0xba, 0xba,
- 0xc0, 0xd6,
- 0xd8, 0xf6,
- 0xf8, 0x2b8,
- 0x2e0, 0x2e4,
- 0x1d00, 0x1d25,
- 0x1d2c, 0x1d5c,
- 0x1d62, 0x1d65,
- 0x1d6b, 0x1d77,
- 0x1d79, 0x1dbe,
- 0x1e00, 0x1eff,
- 0x2071, 0x2071,
- 0x207f, 0x207f,
- 0x2090, 0x209c,
- 0x212a, 0x212b,
- 0x2132, 0x2132,
- 0x214e, 0x214e,
- 0x2160, 0x2188,
- 0x2c60, 0x2c7f,
- 0xa722, 0xa787,
- 0xa78b, 0xa78e,
- 0xa790, 0xa7ad,
- 0xa7b0, 0xa7b1,
- 0xa7f7, 0xa7ff,
- 0xab30, 0xab5a,
- 0xab5c, 0xab5f,
- 0xab64, 0xab64,
- 0xfb00, 0xfb06,
- 0xff21, 0xff3a,
- 0xff41, 0xff5a
-};
-UCP_FN(Latin)
-
-static const unichar ucp_Lepcha_def[] = {
- 0x1c00, 0x1c37,
- 0x1c3b, 0x1c49,
- 0x1c4d, 0x1c4f
-};
-UCP_FN(Lepcha)
-
-static const unichar ucp_Limbu_def[] = {
- 0x1900, 0x191e,
- 0x1920, 0x192b,
- 0x1930, 0x193b,
- 0x1940, 0x1940,
- 0x1944, 0x194f
-};
-UCP_FN(Limbu)
-
-static const unichar ucp_Linear_A_def[] = {
- 0x10600, 0x10736,
- 0x10740, 0x10755,
- 0x10760, 0x10767
-};
-UCP_FN(Linear_A)
-
-static const unichar ucp_Linear_B_def[] = {
- 0x10000, 0x1000b,
- 0x1000d, 0x10026,
- 0x10028, 0x1003a,
- 0x1003c, 0x1003d,
- 0x1003f, 0x1004d,
- 0x10050, 0x1005d,
- 0x10080, 0x100fa
-};
-UCP_FN(Linear_B)
-
-static const unichar ucp_Lisu_def[] = {
- 0xa4d0, 0xa4ff
-};
-UCP_FN(Lisu)
-
-static const unichar ucp_Lycian_def[] = {
- 0x10280, 0x1029c
-};
-UCP_FN(Lycian)
-
-static const unichar ucp_Lydian_def[] = {
- 0x10920, 0x10939,
- 0x1093f, 0x1093f
-};
-UCP_FN(Lydian)
-
-static const unichar ucp_Mahajani_def[] = {
- 0x11150, 0x11176
-};
-UCP_FN(Mahajani)
-
-static const unichar ucp_Malayalam_def[] = {
- 0xd01, 0xd03,
- 0xd05, 0xd0c,
- 0xd0e, 0xd10,
- 0xd12, 0xd3a,
- 0xd3d, 0xd44,
- 0xd46, 0xd48,
- 0xd4a, 0xd4e,
- 0xd57, 0xd57,
- 0xd60, 0xd63,
- 0xd66, 0xd75,
- 0xd79, 0xd7f
-};
-UCP_FN(Malayalam)
-
-static const unichar ucp_Mandaic_def[] = {
- 0x840, 0x85b,
- 0x85e, 0x85e
-};
-UCP_FN(Mandaic)
-
-static const unichar ucp_Manichaean_def[] = {
- 0x10ac0, 0x10ae6,
- 0x10aeb, 0x10af6
-};
-UCP_FN(Manichaean)
-
-static const unichar ucp_Meetei_Mayek_def[] = {
- 0xaae0, 0xaaf6,
- 0xabc0, 0xabed,
- 0xabf0, 0xabf9
-};
-UCP_FN(Meetei_Mayek)
-
-static const unichar ucp_Mende_Kikakui_def[] = {
- 0x1e800, 0x1e8c4,
- 0x1e8c7, 0x1e8d6
-};
-UCP_FN(Mende_Kikakui)
-
-static const unichar ucp_Meroitic_Cursive_def[] = {
- 0x109a0, 0x109b7,
- 0x109be, 0x109bf
-};
-UCP_FN(Meroitic_Cursive)
-
-static const unichar ucp_Meroitic_Hieroglyphs_def[] = {
- 0x10980, 0x1099f
-};
-UCP_FN(Meroitic_Hieroglyphs)
-
-static const unichar ucp_Miao_def[] = {
- 0x16f00, 0x16f44,
- 0x16f50, 0x16f7e,
- 0x16f8f, 0x16f9f
-};
-UCP_FN(Miao)
-
-static const unichar ucp_Modi_def[] = {
- 0x11600, 0x11644,
- 0x11650, 0x11659
-};
-UCP_FN(Modi)
-
-static const unichar ucp_Mongolian_def[] = {
- 0x1800, 0x1801,
- 0x1804, 0x1804,
- 0x1806, 0x180e,
- 0x1810, 0x1819,
- 0x1820, 0x1877,
- 0x1880, 0x18aa
-};
-UCP_FN(Mongolian)
-
-static const unichar ucp_Mro_def[] = {
- 0x16a40, 0x16a5e,
- 0x16a60, 0x16a69,
- 0x16a6e, 0x16a6f
-};
-UCP_FN(Mro)
-
-static const unichar ucp_Myanmar_def[] = {
- 0x1000, 0x109f,
- 0xa9e0, 0xa9fe,
- 0xaa60, 0xaa7f
-};
-UCP_FN(Myanmar)
-
-static const unichar ucp_Nabataean_def[] = {
- 0x10880, 0x1089e,
- 0x108a7, 0x108af
-};
-UCP_FN(Nabataean)
-
-static const unichar ucp_New_Tai_Lue_def[] = {
- 0x1980, 0x19ab,
- 0x19b0, 0x19c9,
- 0x19d0, 0x19da,
- 0x19de, 0x19df
-};
-UCP_FN(New_Tai_Lue)
-
-static const unichar ucp_Nko_def[] = {
- 0x7c0, 0x7fa
-};
-UCP_FN(Nko)
-
-static const unichar ucp_Ogham_def[] = {
- 0x1680, 0x169c
-};
-UCP_FN(Ogham)
-
-static const unichar ucp_Ol_Chiki_def[] = {
- 0x1c50, 0x1c7f
-};
-UCP_FN(Ol_Chiki)
-
-static const unichar ucp_Old_Italic_def[] = {
- 0x10300, 0x10323
-};
-UCP_FN(Old_Italic)
-
-static const unichar ucp_Old_North_Arabian_def[] = {
- 0x10a80, 0x10a9f
-};
-UCP_FN(Old_North_Arabian)
-
-static const unichar ucp_Old_Permic_def[] = {
- 0x10350, 0x1037a
-};
-UCP_FN(Old_Permic)
-
-static const unichar ucp_Old_Persian_def[] = {
- 0x103a0, 0x103c3,
- 0x103c8, 0x103d5
-};
-UCP_FN(Old_Persian)
-
-static const unichar ucp_Old_South_Arabian_def[] = {
- 0x10a60, 0x10a7f
-};
-UCP_FN(Old_South_Arabian)
-
-static const unichar ucp_Old_Turkic_def[] = {
- 0x10c00, 0x10c48
-};
-UCP_FN(Old_Turkic)
-
-static const unichar ucp_Oriya_def[] = {
- 0xb01, 0xb03,
- 0xb05, 0xb0c,
- 0xb0f, 0xb10,
- 0xb13, 0xb28,
- 0xb2a, 0xb30,
- 0xb32, 0xb33,
- 0xb35, 0xb39,
- 0xb3c, 0xb44,
- 0xb47, 0xb48,
- 0xb4b, 0xb4d,
- 0xb56, 0xb57,
- 0xb5c, 0xb5d,
- 0xb5f, 0xb63,
- 0xb66, 0xb77
-};
-UCP_FN(Oriya)
-
-static const unichar ucp_Osmanya_def[] = {
- 0x10480, 0x1049d,
- 0x104a0, 0x104a9
-};
-UCP_FN(Osmanya)
-
-static const unichar ucp_Pahawh_Hmong_def[] = {
- 0x16b00, 0x16b45,
- 0x16b50, 0x16b59,
- 0x16b5b, 0x16b61,
- 0x16b63, 0x16b77,
- 0x16b7d, 0x16b8f
-};
-UCP_FN(Pahawh_Hmong)
-
-static const unichar ucp_Palmyrene_def[] = {
- 0x10860, 0x1087f
-};
-UCP_FN(Palmyrene)
-
-static const unichar ucp_Pau_Cin_Hau_def[] = {
- 0x11ac0, 0x11af8
-};
-UCP_FN(Pau_Cin_Hau)
-
-static const unichar ucp_Phags_Pa_def[] = {
- 0xa840, 0xa877
-};
-UCP_FN(Phags_Pa)
-
-static const unichar ucp_Phoenician_def[] = {
- 0x10900, 0x1091b,
- 0x1091f, 0x1091f
-};
-UCP_FN(Phoenician)
-
-static const unichar ucp_Psalter_Pahlavi_def[] = {
- 0x10b80, 0x10b91,
- 0x10b99, 0x10b9c,
- 0x10ba9, 0x10baf
-};
-UCP_FN(Psalter_Pahlavi)
-
-static const unichar ucp_Rejang_def[] = {
- 0xa930, 0xa953,
- 0xa95f, 0xa95f
-};
-UCP_FN(Rejang)
-
-static const unichar ucp_Runic_def[] = {
- 0x16a0, 0x16ea,
- 0x16ee, 0x16f8
-};
-UCP_FN(Runic)
-
-static const unichar ucp_Samaritan_def[] = {
- 0x800, 0x82d,
- 0x830, 0x83e
-};
-UCP_FN(Samaritan)
-
-static const unichar ucp_Saurashtra_def[] = {
- 0xa880, 0xa8c4,
- 0xa8ce, 0xa8d9
-};
-UCP_FN(Saurashtra)
-
-static const unichar ucp_Sharada_def[] = {
- 0x11180, 0x111c8,
- 0x111cd, 0x111cd,
- 0x111d0, 0x111da
-};
-UCP_FN(Sharada)
-
-static const unichar ucp_Shavian_def[] = {
- 0x10450, 0x1047f
-};
-UCP_FN(Shavian)
-
-static const unichar ucp_Siddham_def[] = {
- 0x11580, 0x115b5,
- 0x115b8, 0x115c9
-};
-UCP_FN(Siddham)
-
-static const unichar ucp_Sinhala_def[] = {
- 0xd82, 0xd83,
- 0xd85, 0xd96,
- 0xd9a, 0xdb1,
- 0xdb3, 0xdbb,
- 0xdbd, 0xdbd,
- 0xdc0, 0xdc6,
- 0xdca, 0xdca,
- 0xdcf, 0xdd4,
- 0xdd6, 0xdd6,
- 0xdd8, 0xddf,
- 0xde6, 0xdef,
- 0xdf2, 0xdf4,
- 0x111e1, 0x111f4
-};
-UCP_FN(Sinhala)
-
-static const unichar ucp_Sora_Sompeng_def[] = {
- 0x110d0, 0x110e8,
- 0x110f0, 0x110f9
-};
-UCP_FN(Sora_Sompeng)
-
-static const unichar ucp_Sundanese_def[] = {
- 0x1b80, 0x1bbf,
- 0x1cc0, 0x1cc7
-};
-UCP_FN(Sundanese)
-
-static const unichar ucp_Syloti_Nagri_def[] = {
- 0xa800, 0xa82b
-};
-UCP_FN(Syloti_Nagri)
-
-static const unichar ucp_Syriac_def[] = {
- 0x700, 0x70d,
- 0x70f, 0x74a,
- 0x74d, 0x74f
-};
-UCP_FN(Syriac)
-
-static const unichar ucp_Tagalog_def[] = {
- 0x1700, 0x170c,
- 0x170e, 0x1714
-};
-UCP_FN(Tagalog)
-
-static const unichar ucp_Tagbanwa_def[] = {
- 0x1760, 0x176c,
- 0x176e, 0x1770,
- 0x1772, 0x1773
-};
-UCP_FN(Tagbanwa)
-
-static const unichar ucp_Tai_Le_def[] = {
- 0x1950, 0x196d,
- 0x1970, 0x1974
-};
-UCP_FN(Tai_Le)
-
-static const unichar ucp_Tai_Tham_def[] = {
- 0x1a20, 0x1a5e,
- 0x1a60, 0x1a7c,
- 0x1a7f, 0x1a89,
- 0x1a90, 0x1a99,
- 0x1aa0, 0x1aad
-};
-UCP_FN(Tai_Tham)
-
-static const unichar ucp_Tai_Viet_def[] = {
- 0xaa80, 0xaac2,
- 0xaadb, 0xaadf
-};
-UCP_FN(Tai_Viet)
-
-static const unichar ucp_Takri_def[] = {
- 0x11680, 0x116b7,
- 0x116c0, 0x116c9
-};
-UCP_FN(Takri)
-
-static const unichar ucp_Tamil_def[] = {
- 0xb82, 0xb83,
- 0xb85, 0xb8a,
- 0xb8e, 0xb90,
- 0xb92, 0xb95,
- 0xb99, 0xb9a,
- 0xb9c, 0xb9c,
- 0xb9e, 0xb9f,
- 0xba3, 0xba4,
- 0xba8, 0xbaa,
- 0xbae, 0xbb9,
- 0xbbe, 0xbc2,
- 0xbc6, 0xbc8,
- 0xbca, 0xbcd,
- 0xbd0, 0xbd0,
- 0xbd7, 0xbd7,
- 0xbe6, 0xbfa
-};
-UCP_FN(Tamil)
-
-static const unichar ucp_Telugu_def[] = {
- 0xc00, 0xc03,
- 0xc05, 0xc0c,
- 0xc0e, 0xc10,
- 0xc12, 0xc28,
- 0xc2a, 0xc39,
- 0xc3d, 0xc44,
- 0xc46, 0xc48,
- 0xc4a, 0xc4d,
- 0xc55, 0xc56,
- 0xc58, 0xc59,
- 0xc60, 0xc63,
- 0xc66, 0xc6f,
- 0xc78, 0xc7f
-};
-UCP_FN(Telugu)
-
-static const unichar ucp_Thaana_def[] = {
- 0x780, 0x7b1
-};
-UCP_FN(Thaana)
-
-static const unichar ucp_Thai_def[] = {
- 0xe01, 0xe3a,
- 0xe40, 0xe5b
-};
-UCP_FN(Thai)
-
-static const unichar ucp_Tibetan_def[] = {
- 0xf00, 0xf47,
- 0xf49, 0xf6c,
- 0xf71, 0xf97,
- 0xf99, 0xfbc,
- 0xfbe, 0xfcc,
- 0xfce, 0xfd4,
- 0xfd9, 0xfda
-};
-UCP_FN(Tibetan)
-
-static const unichar ucp_Tifinagh_def[] = {
- 0x2d30, 0x2d67,
- 0x2d6f, 0x2d70,
- 0x2d7f, 0x2d7f
-};
-UCP_FN(Tifinagh)
-
-static const unichar ucp_Tirhuta_def[] = {
- 0x11480, 0x114c7,
- 0x114d0, 0x114d9
-};
-UCP_FN(Tirhuta)
-
-static const unichar ucp_Ugaritic_def[] = {
- 0x10380, 0x1039d,
- 0x1039f, 0x1039f
-};
-UCP_FN(Ugaritic)
-
-static const unichar ucp_Vai_def[] = {
- 0xa500, 0xa62b
-};
-UCP_FN(Vai)
-
-static const unichar ucp_Warang_Citi_def[] = {
- 0x118a0, 0x118f2,
- 0x118ff, 0x118ff
-};
-UCP_FN(Warang_Citi)
-
-static const unichar ucp_Yi_def[] = {
- 0xa000, 0xa48c,
- 0xa490, 0xa4c6
-};
-UCP_FN(Yi)
-
-static const unicase ucp_caseless_def[] = {
- {0x41, 0x61},
- {0x42, 0x62},
- {0x43, 0x63},
- {0x44, 0x64},
- {0x45, 0x65},
- {0x46, 0x66},
- {0x47, 0x67},
- {0x48, 0x68},
- {0x49, 0x69},
- {0x4a, 0x6a},
- {0x4b, 0x6b},
- {0x4b, 0x212a},
- {0x4c, 0x6c},
- {0x4d, 0x6d},
- {0x4e, 0x6e},
- {0x4f, 0x6f},
- {0x50, 0x70},
- {0x51, 0x71},
- {0x52, 0x72},
- {0x53, 0x73},
- {0x53, 0x17f},
- {0x54, 0x74},
- {0x55, 0x75},
- {0x56, 0x76},
- {0x57, 0x77},
- {0x58, 0x78},
- {0x59, 0x79},
- {0x5a, 0x7a},
- {0x61, 0x41},
- {0x62, 0x42},
- {0x63, 0x43},
- {0x64, 0x44},
- {0x65, 0x45},
- {0x66, 0x46},
- {0x67, 0x47},
- {0x68, 0x48},
- {0x69, 0x49},
- {0x6a, 0x4a},
- {0x6b, 0x4b},
- {0x6b, 0x212a},
- {0x6c, 0x4c},
- {0x6d, 0x4d},
- {0x6e, 0x4e},
- {0x6f, 0x4f},
- {0x70, 0x50},
- {0x71, 0x51},
- {0x72, 0x52},
- {0x73, 0x53},
- {0x73, 0x17f},
- {0x74, 0x54},
- {0x75, 0x55},
- {0x76, 0x56},
- {0x77, 0x57},
- {0x78, 0x58},
- {0x79, 0x59},
- {0x7a, 0x5a},
- {0xb5, 0x39c},
- {0xb5, 0x3bc},
- {0xc0, 0xe0},
- {0xc1, 0xe1},
- {0xc2, 0xe2},
- {0xc3, 0xe3},
- {0xc4, 0xe4},
- {0xc5, 0xe5},
- {0xc5, 0x212b},
- {0xc6, 0xe6},
- {0xc7, 0xe7},
- {0xc8, 0xe8},
- {0xc9, 0xe9},
- {0xca, 0xea},
- {0xcb, 0xeb},
- {0xcc, 0xec},
- {0xcd, 0xed},
- {0xce, 0xee},
- {0xcf, 0xef},
- {0xd0, 0xf0},
- {0xd1, 0xf1},
- {0xd2, 0xf2},
- {0xd3, 0xf3},
- {0xd4, 0xf4},
- {0xd5, 0xf5},
- {0xd6, 0xf6},
- {0xd8, 0xf8},
- {0xd9, 0xf9},
- {0xda, 0xfa},
- {0xdb, 0xfb},
- {0xdc, 0xfc},
- {0xdd, 0xfd},
- {0xde, 0xfe},
- {0xdf, 0x1e9e},
- {0xe0, 0xc0},
- {0xe1, 0xc1},
- {0xe2, 0xc2},
- {0xe3, 0xc3},
- {0xe4, 0xc4},
- {0xe5, 0xc5},
- {0xe5, 0x212b},
- {0xe6, 0xc6},
- {0xe7, 0xc7},
- {0xe8, 0xc8},
- {0xe9, 0xc9},
- {0xea, 0xca},
- {0xeb, 0xcb},
- {0xec, 0xcc},
- {0xed, 0xcd},
- {0xee, 0xce},
- {0xef, 0xcf},
- {0xf0, 0xd0},
- {0xf1, 0xd1},
- {0xf2, 0xd2},
- {0xf3, 0xd3},
- {0xf4, 0xd4},
- {0xf5, 0xd5},
- {0xf6, 0xd6},
- {0xf8, 0xd8},
- {0xf9, 0xd9},
- {0xfa, 0xda},
- {0xfb, 0xdb},
- {0xfc, 0xdc},
- {0xfd, 0xdd},
- {0xfe, 0xde},
- {0xff, 0x178},
- {0x100, 0x101},
- {0x101, 0x100},
- {0x102, 0x103},
- {0x103, 0x102},
- {0x104, 0x105},
- {0x105, 0x104},
- {0x106, 0x107},
- {0x107, 0x106},
- {0x108, 0x109},
- {0x109, 0x108},
- {0x10a, 0x10b},
- {0x10b, 0x10a},
- {0x10c, 0x10d},
- {0x10d, 0x10c},
- {0x10e, 0x10f},
- {0x10f, 0x10e},
- {0x110, 0x111},
- {0x111, 0x110},
- {0x112, 0x113},
- {0x113, 0x112},
- {0x114, 0x115},
- {0x115, 0x114},
- {0x116, 0x117},
- {0x117, 0x116},
- {0x118, 0x119},
- {0x119, 0x118},
- {0x11a, 0x11b},
- {0x11b, 0x11a},
- {0x11c, 0x11d},
- {0x11d, 0x11c},
- {0x11e, 0x11f},
- {0x11f, 0x11e},
- {0x120, 0x121},
- {0x121, 0x120},
- {0x122, 0x123},
- {0x123, 0x122},
- {0x124, 0x125},
- {0x125, 0x124},
- {0x126, 0x127},
- {0x127, 0x126},
- {0x128, 0x129},
- {0x129, 0x128},
- {0x12a, 0x12b},
- {0x12b, 0x12a},
- {0x12c, 0x12d},
- {0x12d, 0x12c},
- {0x12e, 0x12f},
- {0x12f, 0x12e},
- {0x132, 0x133},
- {0x133, 0x132},
- {0x134, 0x135},
- {0x135, 0x134},
- {0x136, 0x137},
- {0x137, 0x136},
- {0x139, 0x13a},
- {0x13a, 0x139},
- {0x13b, 0x13c},
- {0x13c, 0x13b},
- {0x13d, 0x13e},
- {0x13e, 0x13d},
- {0x13f, 0x140},
- {0x140, 0x13f},
- {0x141, 0x142},
- {0x142, 0x141},
- {0x143, 0x144},
- {0x144, 0x143},
- {0x145, 0x146},
- {0x146, 0x145},
- {0x147, 0x148},
- {0x148, 0x147},
- {0x14a, 0x14b},
- {0x14b, 0x14a},
- {0x14c, 0x14d},
- {0x14d, 0x14c},
- {0x14e, 0x14f},
- {0x14f, 0x14e},
- {0x150, 0x151},
- {0x151, 0x150},
- {0x152, 0x153},
- {0x153, 0x152},
- {0x154, 0x155},
- {0x155, 0x154},
- {0x156, 0x157},
- {0x157, 0x156},
- {0x158, 0x159},
- {0x159, 0x158},
- {0x15a, 0x15b},
- {0x15b, 0x15a},
- {0x15c, 0x15d},
- {0x15d, 0x15c},
- {0x15e, 0x15f},
- {0x15f, 0x15e},
- {0x160, 0x161},
- {0x161, 0x160},
- {0x162, 0x163},
- {0x163, 0x162},
- {0x164, 0x165},
- {0x165, 0x164},
- {0x166, 0x167},
- {0x167, 0x166},
- {0x168, 0x169},
- {0x169, 0x168},
- {0x16a, 0x16b},
- {0x16b, 0x16a},
- {0x16c, 0x16d},
- {0x16d, 0x16c},
- {0x16e, 0x16f},
- {0x16f, 0x16e},
- {0x170, 0x171},
- {0x171, 0x170},
- {0x172, 0x173},
- {0x173, 0x172},
- {0x174, 0x175},
- {0x175, 0x174},
- {0x176, 0x177},
- {0x177, 0x176},
- {0x178, 0xff},
- {0x179, 0x17a},
- {0x17a, 0x179},
- {0x17b, 0x17c},
- {0x17c, 0x17b},
- {0x17d, 0x17e},
- {0x17e, 0x17d},
- {0x17f, 0x53},
- {0x17f, 0x73},
- {0x180, 0x243},
- {0x181, 0x253},
- {0x182, 0x183},
- {0x183, 0x182},
- {0x184, 0x185},
- {0x185, 0x184},
- {0x186, 0x254},
- {0x187, 0x188},
- {0x188, 0x187},
- {0x189, 0x256},
- {0x18a, 0x257},
- {0x18b, 0x18c},
- {0x18c, 0x18b},
- {0x18e, 0x1dd},
- {0x18f, 0x259},
- {0x190, 0x25b},
- {0x191, 0x192},
- {0x192, 0x191},
- {0x193, 0x260},
- {0x194, 0x263},
- {0x195, 0x1f6},
- {0x196, 0x269},
- {0x197, 0x268},
- {0x198, 0x199},
- {0x199, 0x198},
- {0x19a, 0x23d},
- {0x19c, 0x26f},
- {0x19d, 0x272},
- {0x19e, 0x220},
- {0x19f, 0x275},
- {0x1a0, 0x1a1},
- {0x1a1, 0x1a0},
- {0x1a2, 0x1a3},
- {0x1a3, 0x1a2},
- {0x1a4, 0x1a5},
- {0x1a5, 0x1a4},
- {0x1a6, 0x280},
- {0x1a7, 0x1a8},
- {0x1a8, 0x1a7},
- {0x1a9, 0x283},
- {0x1ac, 0x1ad},
- {0x1ad, 0x1ac},
- {0x1ae, 0x288},
- {0x1af, 0x1b0},
- {0x1b0, 0x1af},
- {0x1b1, 0x28a},
- {0x1b2, 0x28b},
- {0x1b3, 0x1b4},
- {0x1b4, 0x1b3},
- {0x1b5, 0x1b6},
- {0x1b6, 0x1b5},
- {0x1b7, 0x292},
- {0x1b8, 0x1b9},
- {0x1b9, 0x1b8},
- {0x1bc, 0x1bd},
- {0x1bd, 0x1bc},
- {0x1bf, 0x1f7},
- {0x1c4, 0x1c5},
- {0x1c4, 0x1c6},
- {0x1c5, 0x1c4},
- {0x1c5, 0x1c6},
- {0x1c6, 0x1c4},
- {0x1c6, 0x1c5},
- {0x1c7, 0x1c8},
- {0x1c7, 0x1c9},
- {0x1c8, 0x1c7},
- {0x1c8, 0x1c9},
- {0x1c9, 0x1c7},
- {0x1c9, 0x1c8},
- {0x1ca, 0x1cb},
- {0x1ca, 0x1cc},
- {0x1cb, 0x1ca},
- {0x1cb, 0x1cc},
- {0x1cc, 0x1ca},
- {0x1cc, 0x1cb},
- {0x1cd, 0x1ce},
- {0x1ce, 0x1cd},
- {0x1cf, 0x1d0},
- {0x1d0, 0x1cf},
- {0x1d1, 0x1d2},
- {0x1d2, 0x1d1},
- {0x1d3, 0x1d4},
- {0x1d4, 0x1d3},
- {0x1d5, 0x1d6},
- {0x1d6, 0x1d5},
- {0x1d7, 0x1d8},
- {0x1d8, 0x1d7},
- {0x1d9, 0x1da},
- {0x1da, 0x1d9},
- {0x1db, 0x1dc},
- {0x1dc, 0x1db},
- {0x1dd, 0x18e},
- {0x1de, 0x1df},
- {0x1df, 0x1de},
- {0x1e0, 0x1e1},
- {0x1e1, 0x1e0},
- {0x1e2, 0x1e3},
- {0x1e3, 0x1e2},
- {0x1e4, 0x1e5},
- {0x1e5, 0x1e4},
- {0x1e6, 0x1e7},
- {0x1e7, 0x1e6},
- {0x1e8, 0x1e9},
- {0x1e9, 0x1e8},
- {0x1ea, 0x1eb},
- {0x1eb, 0x1ea},
- {0x1ec, 0x1ed},
- {0x1ed, 0x1ec},
- {0x1ee, 0x1ef},
- {0x1ef, 0x1ee},
- {0x1f1, 0x1f2},
- {0x1f1, 0x1f3},
- {0x1f2, 0x1f1},
- {0x1f2, 0x1f3},
- {0x1f3, 0x1f1},
- {0x1f3, 0x1f2},
- {0x1f4, 0x1f5},
- {0x1f5, 0x1f4},
- {0x1f6, 0x195},
- {0x1f7, 0x1bf},
- {0x1f8, 0x1f9},
- {0x1f9, 0x1f8},
- {0x1fa, 0x1fb},
- {0x1fb, 0x1fa},
- {0x1fc, 0x1fd},
- {0x1fd, 0x1fc},
- {0x1fe, 0x1ff},
- {0x1ff, 0x1fe},
- {0x200, 0x201},
- {0x201, 0x200},
- {0x202, 0x203},
- {0x203, 0x202},
- {0x204, 0x205},
- {0x205, 0x204},
- {0x206, 0x207},
- {0x207, 0x206},
- {0x208, 0x209},
- {0x209, 0x208},
- {0x20a, 0x20b},
- {0x20b, 0x20a},
- {0x20c, 0x20d},
- {0x20d, 0x20c},
- {0x20e, 0x20f},
- {0x20f, 0x20e},
- {0x210, 0x211},
- {0x211, 0x210},
- {0x212, 0x213},
- {0x213, 0x212},
- {0x214, 0x215},
- {0x215, 0x214},
- {0x216, 0x217},
- {0x217, 0x216},
- {0x218, 0x219},
- {0x219, 0x218},
- {0x21a, 0x21b},
- {0x21b, 0x21a},
- {0x21c, 0x21d},
- {0x21d, 0x21c},
- {0x21e, 0x21f},
- {0x21f, 0x21e},
- {0x220, 0x19e},
- {0x222, 0x223},
- {0x223, 0x222},
- {0x224, 0x225},
- {0x225, 0x224},
- {0x226, 0x227},
- {0x227, 0x226},
- {0x228, 0x229},
- {0x229, 0x228},
- {0x22a, 0x22b},
- {0x22b, 0x22a},
- {0x22c, 0x22d},
- {0x22d, 0x22c},
- {0x22e, 0x22f},
- {0x22f, 0x22e},
- {0x230, 0x231},
- {0x231, 0x230},
- {0x232, 0x233},
- {0x233, 0x232},
- {0x23a, 0x2c65},
- {0x23b, 0x23c},
- {0x23c, 0x23b},
- {0x23d, 0x19a},
- {0x23e, 0x2c66},
- {0x23f, 0x2c7e},
- {0x240, 0x2c7f},
- {0x241, 0x242},
- {0x242, 0x241},
- {0x243, 0x180},
- {0x244, 0x289},
- {0x245, 0x28c},
- {0x246, 0x247},
- {0x247, 0x246},
- {0x248, 0x249},
- {0x249, 0x248},
- {0x24a, 0x24b},
- {0x24b, 0x24a},
- {0x24c, 0x24d},
- {0x24d, 0x24c},
- {0x24e, 0x24f},
- {0x24f, 0x24e},
- {0x250, 0x2c6f},
- {0x251, 0x2c6d},
- {0x252, 0x2c70},
- {0x253, 0x181},
- {0x254, 0x186},
- {0x256, 0x189},
- {0x257, 0x18a},
- {0x259, 0x18f},
- {0x25b, 0x190},
- {0x25c, 0xa7ab},
- {0x260, 0x193},
- {0x261, 0xa7ac},
- {0x263, 0x194},
- {0x265, 0xa78d},
- {0x266, 0xa7aa},
- {0x268, 0x197},
- {0x269, 0x196},
- {0x26b, 0x2c62},
- {0x26c, 0xa7ad},
- {0x26f, 0x19c},
- {0x271, 0x2c6e},
- {0x272, 0x19d},
- {0x275, 0x19f},
- {0x27d, 0x2c64},
- {0x280, 0x1a6},
- {0x283, 0x1a9},
- {0x287, 0xa7b1},
- {0x288, 0x1ae},
- {0x289, 0x244},
- {0x28a, 0x1b1},
- {0x28b, 0x1b2},
- {0x28c, 0x245},
- {0x292, 0x1b7},
- {0x29e, 0xa7b0},
- {0x345, 0x399},
- {0x345, 0x3b9},
- {0x345, 0x1fbe},
- {0x370, 0x371},
- {0x371, 0x370},
- {0x372, 0x373},
- {0x373, 0x372},
- {0x376, 0x377},
- {0x377, 0x376},
- {0x37b, 0x3fd},
- {0x37c, 0x3fe},
- {0x37d, 0x3ff},
- {0x37f, 0x3f3},
- {0x386, 0x3ac},
- {0x388, 0x3ad},
- {0x389, 0x3ae},
- {0x38a, 0x3af},
- {0x38c, 0x3cc},
- {0x38e, 0x3cd},
- {0x38f, 0x3ce},
- {0x391, 0x3b1},
- {0x392, 0x3b2},
- {0x392, 0x3d0},
- {0x393, 0x3b3},
- {0x394, 0x3b4},
- {0x395, 0x3b5},
- {0x395, 0x3f5},
- {0x396, 0x3b6},
- {0x397, 0x3b7},
- {0x398, 0x3b8},
- {0x398, 0x3d1},
- {0x398, 0x3f4},
- {0x399, 0x345},
- {0x399, 0x3b9},
- {0x399, 0x1fbe},
- {0x39a, 0x3ba},
- {0x39a, 0x3f0},
- {0x39b, 0x3bb},
- {0x39c, 0xb5},
- {0x39c, 0x3bc},
- {0x39d, 0x3bd},
- {0x39e, 0x3be},
- {0x39f, 0x3bf},
- {0x3a0, 0x3c0},
- {0x3a0, 0x3d6},
- {0x3a1, 0x3c1},
- {0x3a1, 0x3f1},
- {0x3a3, 0x3c2},
- {0x3a3, 0x3c3},
- {0x3a4, 0x3c4},
- {0x3a5, 0x3c5},
- {0x3a6, 0x3c6},
- {0x3a6, 0x3d5},
- {0x3a7, 0x3c7},
- {0x3a8, 0x3c8},
- {0x3a9, 0x3c9},
- {0x3a9, 0x2126},
- {0x3aa, 0x3ca},
- {0x3ab, 0x3cb},
- {0x3ac, 0x386},
- {0x3ad, 0x388},
- {0x3ae, 0x389},
- {0x3af, 0x38a},
- {0x3b1, 0x391},
- {0x3b2, 0x392},
- {0x3b2, 0x3d0},
- {0x3b3, 0x393},
- {0x3b4, 0x394},
- {0x3b5, 0x395},
- {0x3b5, 0x3f5},
- {0x3b6, 0x396},
- {0x3b7, 0x397},
- {0x3b8, 0x398},
- {0x3b8, 0x3d1},
- {0x3b8, 0x3f4},
- {0x3b9, 0x345},
- {0x3b9, 0x399},
- {0x3b9, 0x1fbe},
- {0x3ba, 0x39a},
- {0x3ba, 0x3f0},
- {0x3bb, 0x39b},
- {0x3bc, 0xb5},
- {0x3bc, 0x39c},
- {0x3bd, 0x39d},
- {0x3be, 0x39e},
- {0x3bf, 0x39f},
- {0x3c0, 0x3a0},
- {0x3c0, 0x3d6},
- {0x3c1, 0x3a1},
- {0x3c1, 0x3f1},
- {0x3c2, 0x3a3},
- {0x3c2, 0x3c3},
- {0x3c3, 0x3a3},
- {0x3c3, 0x3c2},
- {0x3c4, 0x3a4},
- {0x3c5, 0x3a5},
- {0x3c6, 0x3a6},
- {0x3c6, 0x3d5},
- {0x3c7, 0x3a7},
- {0x3c8, 0x3a8},
- {0x3c9, 0x3a9},
- {0x3c9, 0x2126},
- {0x3ca, 0x3aa},
- {0x3cb, 0x3ab},
- {0x3cc, 0x38c},
- {0x3cd, 0x38e},
- {0x3ce, 0x38f},
- {0x3cf, 0x3d7},
- {0x3d0, 0x392},
- {0x3d0, 0x3b2},
- {0x3d1, 0x398},
- {0x3d1, 0x3b8},
- {0x3d1, 0x3f4},
- {0x3d5, 0x3a6},
- {0x3d5, 0x3c6},
- {0x3d6, 0x3a0},
- {0x3d6, 0x3c0},
- {0x3d7, 0x3cf},
- {0x3d8, 0x3d9},
- {0x3d9, 0x3d8},
- {0x3da, 0x3db},
- {0x3db, 0x3da},
- {0x3dc, 0x3dd},
- {0x3dd, 0x3dc},
- {0x3de, 0x3df},
- {0x3df, 0x3de},
- {0x3e0, 0x3e1},
- {0x3e1, 0x3e0},
- {0x3e2, 0x3e3},
- {0x3e3, 0x3e2},
- {0x3e4, 0x3e5},
- {0x3e5, 0x3e4},
- {0x3e6, 0x3e7},
- {0x3e7, 0x3e6},
- {0x3e8, 0x3e9},
- {0x3e9, 0x3e8},
- {0x3ea, 0x3eb},
- {0x3eb, 0x3ea},
- {0x3ec, 0x3ed},
- {0x3ed, 0x3ec},
- {0x3ee, 0x3ef},
- {0x3ef, 0x3ee},
- {0x3f0, 0x39a},
- {0x3f0, 0x3ba},
- {0x3f1, 0x3a1},
- {0x3f1, 0x3c1},
- {0x3f2, 0x3f9},
- {0x3f3, 0x37f},
- {0x3f4, 0x398},
- {0x3f4, 0x3b8},
- {0x3f4, 0x3d1},
- {0x3f5, 0x395},
- {0x3f5, 0x3b5},
- {0x3f7, 0x3f8},
- {0x3f8, 0x3f7},
- {0x3f9, 0x3f2},
- {0x3fa, 0x3fb},
- {0x3fb, 0x3fa},
- {0x3fd, 0x37b},
- {0x3fe, 0x37c},
- {0x3ff, 0x37d},
- {0x400, 0x450},
- {0x401, 0x451},
- {0x402, 0x452},
- {0x403, 0x453},
- {0x404, 0x454},
- {0x405, 0x455},
- {0x406, 0x456},
- {0x407, 0x457},
- {0x408, 0x458},
- {0x409, 0x459},
- {0x40a, 0x45a},
- {0x40b, 0x45b},
- {0x40c, 0x45c},
- {0x40d, 0x45d},
- {0x40e, 0x45e},
- {0x40f, 0x45f},
- {0x410, 0x430},
- {0x411, 0x431},
- {0x412, 0x432},
- {0x413, 0x433},
- {0x414, 0x434},
- {0x415, 0x435},
- {0x416, 0x436},
- {0x417, 0x437},
- {0x418, 0x438},
- {0x419, 0x439},
- {0x41a, 0x43a},
- {0x41b, 0x43b},
- {0x41c, 0x43c},
- {0x41d, 0x43d},
- {0x41e, 0x43e},
- {0x41f, 0x43f},
- {0x420, 0x440},
- {0x421, 0x441},
- {0x422, 0x442},
- {0x423, 0x443},
- {0x424, 0x444},
- {0x425, 0x445},
- {0x426, 0x446},
- {0x427, 0x447},
- {0x428, 0x448},
- {0x429, 0x449},
- {0x42a, 0x44a},
- {0x42b, 0x44b},
- {0x42c, 0x44c},
- {0x42d, 0x44d},
- {0x42e, 0x44e},
- {0x42f, 0x44f},
- {0x430, 0x410},
- {0x431, 0x411},
- {0x432, 0x412},
- {0x433, 0x413},
- {0x434, 0x414},
- {0x435, 0x415},
- {0x436, 0x416},
- {0x437, 0x417},
- {0x438, 0x418},
- {0x439, 0x419},
- {0x43a, 0x41a},
- {0x43b, 0x41b},
- {0x43c, 0x41c},
- {0x43d, 0x41d},
- {0x43e, 0x41e},
- {0x43f, 0x41f},
- {0x440, 0x420},
- {0x441, 0x421},
- {0x442, 0x422},
- {0x443, 0x423},
- {0x444, 0x424},
- {0x445, 0x425},
- {0x446, 0x426},
- {0x447, 0x427},
- {0x448, 0x428},
- {0x449, 0x429},
- {0x44a, 0x42a},
- {0x44b, 0x42b},
- {0x44c, 0x42c},
- {0x44d, 0x42d},
- {0x44e, 0x42e},
- {0x44f, 0x42f},
- {0x450, 0x400},
- {0x451, 0x401},
- {0x452, 0x402},
- {0x453, 0x403},
- {0x454, 0x404},
- {0x455, 0x405},
- {0x456, 0x406},
- {0x457, 0x407},
- {0x458, 0x408},
- {0x459, 0x409},
- {0x45a, 0x40a},
- {0x45b, 0x40b},
- {0x45c, 0x40c},
- {0x45d, 0x40d},
- {0x45e, 0x40e},
- {0x45f, 0x40f},
- {0x460, 0x461},
- {0x461, 0x460},
- {0x462, 0x463},
- {0x463, 0x462},
- {0x464, 0x465},
- {0x465, 0x464},
- {0x466, 0x467},
- {0x467, 0x466},
- {0x468, 0x469},
- {0x469, 0x468},
- {0x46a, 0x46b},
- {0x46b, 0x46a},
- {0x46c, 0x46d},
- {0x46d, 0x46c},
- {0x46e, 0x46f},
- {0x46f, 0x46e},
- {0x470, 0x471},
- {0x471, 0x470},
- {0x472, 0x473},
- {0x473, 0x472},
- {0x474, 0x475},
- {0x475, 0x474},
- {0x476, 0x477},
- {0x477, 0x476},
- {0x478, 0x479},
- {0x479, 0x478},
- {0x47a, 0x47b},
- {0x47b, 0x47a},
- {0x47c, 0x47d},
- {0x47d, 0x47c},
- {0x47e, 0x47f},
- {0x47f, 0x47e},
- {0x480, 0x481},
- {0x481, 0x480},
- {0x48a, 0x48b},
- {0x48b, 0x48a},
- {0x48c, 0x48d},
- {0x48d, 0x48c},
- {0x48e, 0x48f},
- {0x48f, 0x48e},
- {0x490, 0x491},
- {0x491, 0x490},
- {0x492, 0x493},
- {0x493, 0x492},
- {0x494, 0x495},
- {0x495, 0x494},
- {0x496, 0x497},
- {0x497, 0x496},
- {0x498, 0x499},
- {0x499, 0x498},
- {0x49a, 0x49b},
- {0x49b, 0x49a},
- {0x49c, 0x49d},
- {0x49d, 0x49c},
- {0x49e, 0x49f},
- {0x49f, 0x49e},
- {0x4a0, 0x4a1},
- {0x4a1, 0x4a0},
- {0x4a2, 0x4a3},
- {0x4a3, 0x4a2},
- {0x4a4, 0x4a5},
- {0x4a5, 0x4a4},
- {0x4a6, 0x4a7},
- {0x4a7, 0x4a6},
- {0x4a8, 0x4a9},
- {0x4a9, 0x4a8},
- {0x4aa, 0x4ab},
- {0x4ab, 0x4aa},
- {0x4ac, 0x4ad},
- {0x4ad, 0x4ac},
- {0x4ae, 0x4af},
- {0x4af, 0x4ae},
- {0x4b0, 0x4b1},
- {0x4b1, 0x4b0},
- {0x4b2, 0x4b3},
- {0x4b3, 0x4b2},
- {0x4b4, 0x4b5},
- {0x4b5, 0x4b4},
- {0x4b6, 0x4b7},
- {0x4b7, 0x4b6},
- {0x4b8, 0x4b9},
- {0x4b9, 0x4b8},
- {0x4ba, 0x4bb},
- {0x4bb, 0x4ba},
- {0x4bc, 0x4bd},
- {0x4bd, 0x4bc},
- {0x4be, 0x4bf},
- {0x4bf, 0x4be},
- {0x4c0, 0x4cf},
- {0x4c1, 0x4c2},
- {0x4c2, 0x4c1},
- {0x4c3, 0x4c4},
- {0x4c4, 0x4c3},
- {0x4c5, 0x4c6},
- {0x4c6, 0x4c5},
- {0x4c7, 0x4c8},
- {0x4c8, 0x4c7},
- {0x4c9, 0x4ca},
- {0x4ca, 0x4c9},
- {0x4cb, 0x4cc},
- {0x4cc, 0x4cb},
- {0x4cd, 0x4ce},
- {0x4ce, 0x4cd},
- {0x4cf, 0x4c0},
- {0x4d0, 0x4d1},
- {0x4d1, 0x4d0},
- {0x4d2, 0x4d3},
- {0x4d3, 0x4d2},
- {0x4d4, 0x4d5},
- {0x4d5, 0x4d4},
- {0x4d6, 0x4d7},
- {0x4d7, 0x4d6},
- {0x4d8, 0x4d9},
- {0x4d9, 0x4d8},
- {0x4da, 0x4db},
- {0x4db, 0x4da},
- {0x4dc, 0x4dd},
- {0x4dd, 0x4dc},
- {0x4de, 0x4df},
- {0x4df, 0x4de},
- {0x4e0, 0x4e1},
- {0x4e1, 0x4e0},
- {0x4e2, 0x4e3},
- {0x4e3, 0x4e2},
- {0x4e4, 0x4e5},
- {0x4e5, 0x4e4},
- {0x4e6, 0x4e7},
- {0x4e7, 0x4e6},
- {0x4e8, 0x4e9},
- {0x4e9, 0x4e8},
- {0x4ea, 0x4eb},
- {0x4eb, 0x4ea},
- {0x4ec, 0x4ed},
- {0x4ed, 0x4ec},
- {0x4ee, 0x4ef},
- {0x4ef, 0x4ee},
- {0x4f0, 0x4f1},
- {0x4f1, 0x4f0},
- {0x4f2, 0x4f3},
- {0x4f3, 0x4f2},
- {0x4f4, 0x4f5},
- {0x4f5, 0x4f4},
- {0x4f6, 0x4f7},
- {0x4f7, 0x4f6},
- {0x4f8, 0x4f9},
- {0x4f9, 0x4f8},
- {0x4fa, 0x4fb},
- {0x4fb, 0x4fa},
- {0x4fc, 0x4fd},
- {0x4fd, 0x4fc},
- {0x4fe, 0x4ff},
- {0x4ff, 0x4fe},
- {0x500, 0x501},
- {0x501, 0x500},
- {0x502, 0x503},
- {0x503, 0x502},
- {0x504, 0x505},
- {0x505, 0x504},
- {0x506, 0x507},
- {0x507, 0x506},
- {0x508, 0x509},
- {0x509, 0x508},
- {0x50a, 0x50b},
- {0x50b, 0x50a},
- {0x50c, 0x50d},
- {0x50d, 0x50c},
- {0x50e, 0x50f},
- {0x50f, 0x50e},
- {0x510, 0x511},
- {0x511, 0x510},
- {0x512, 0x513},
- {0x513, 0x512},
- {0x514, 0x515},
- {0x515, 0x514},
- {0x516, 0x517},
- {0x517, 0x516},
- {0x518, 0x519},
- {0x519, 0x518},
- {0x51a, 0x51b},
- {0x51b, 0x51a},
- {0x51c, 0x51d},
- {0x51d, 0x51c},
- {0x51e, 0x51f},
- {0x51f, 0x51e},
- {0x520, 0x521},
- {0x521, 0x520},
- {0x522, 0x523},
- {0x523, 0x522},
- {0x524, 0x525},
- {0x525, 0x524},
- {0x526, 0x527},
- {0x527, 0x526},
- {0x528, 0x529},
- {0x529, 0x528},
- {0x52a, 0x52b},
- {0x52b, 0x52a},
- {0x52c, 0x52d},
- {0x52d, 0x52c},
- {0x52e, 0x52f},
- {0x52f, 0x52e},
- {0x531, 0x561},
- {0x532, 0x562},
- {0x533, 0x563},
- {0x534, 0x564},
- {0x535, 0x565},
- {0x536, 0x566},
- {0x537, 0x567},
- {0x538, 0x568},
- {0x539, 0x569},
- {0x53a, 0x56a},
- {0x53b, 0x56b},
- {0x53c, 0x56c},
- {0x53d, 0x56d},
- {0x53e, 0x56e},
- {0x53f, 0x56f},
- {0x540, 0x570},
- {0x541, 0x571},
- {0x542, 0x572},
- {0x543, 0x573},
- {0x544, 0x574},
- {0x545, 0x575},
- {0x546, 0x576},
- {0x547, 0x577},
- {0x548, 0x578},
- {0x549, 0x579},
- {0x54a, 0x57a},
- {0x54b, 0x57b},
- {0x54c, 0x57c},
- {0x54d, 0x57d},
- {0x54e, 0x57e},
- {0x54f, 0x57f},
- {0x550, 0x580},
- {0x551, 0x581},
- {0x552, 0x582},
- {0x553, 0x583},
- {0x554, 0x584},
- {0x555, 0x585},
- {0x556, 0x586},
- {0x561, 0x531},
- {0x562, 0x532},
- {0x563, 0x533},
- {0x564, 0x534},
- {0x565, 0x535},
- {0x566, 0x536},
- {0x567, 0x537},
- {0x568, 0x538},
- {0x569, 0x539},
- {0x56a, 0x53a},
- {0x56b, 0x53b},
- {0x56c, 0x53c},
- {0x56d, 0x53d},
- {0x56e, 0x53e},
- {0x56f, 0x53f},
- {0x570, 0x540},
- {0x571, 0x541},
- {0x572, 0x542},
- {0x573, 0x543},
- {0x574, 0x544},
- {0x575, 0x545},
- {0x576, 0x546},
- {0x577, 0x547},
- {0x578, 0x548},
- {0x579, 0x549},
- {0x57a, 0x54a},
- {0x57b, 0x54b},
- {0x57c, 0x54c},
- {0x57d, 0x54d},
- {0x57e, 0x54e},
- {0x57f, 0x54f},
- {0x580, 0x550},
- {0x581, 0x551},
- {0x582, 0x552},
- {0x583, 0x553},
- {0x584, 0x554},
- {0x585, 0x555},
- {0x586, 0x556},
- {0x10a0, 0x2d00},
- {0x10a1, 0x2d01},
- {0x10a2, 0x2d02},
- {0x10a3, 0x2d03},
- {0x10a4, 0x2d04},
- {0x10a5, 0x2d05},
- {0x10a6, 0x2d06},
- {0x10a7, 0x2d07},
- {0x10a8, 0x2d08},
- {0x10a9, 0x2d09},
- {0x10aa, 0x2d0a},
- {0x10ab, 0x2d0b},
- {0x10ac, 0x2d0c},
- {0x10ad, 0x2d0d},
- {0x10ae, 0x2d0e},
- {0x10af, 0x2d0f},
- {0x10b0, 0x2d10},
- {0x10b1, 0x2d11},
- {0x10b2, 0x2d12},
- {0x10b3, 0x2d13},
- {0x10b4, 0x2d14},
- {0x10b5, 0x2d15},
- {0x10b6, 0x2d16},
- {0x10b7, 0x2d17},
- {0x10b8, 0x2d18},
- {0x10b9, 0x2d19},
- {0x10ba, 0x2d1a},
- {0x10bb, 0x2d1b},
- {0x10bc, 0x2d1c},
- {0x10bd, 0x2d1d},
- {0x10be, 0x2d1e},
- {0x10bf, 0x2d1f},
- {0x10c0, 0x2d20},
- {0x10c1, 0x2d21},
- {0x10c2, 0x2d22},
- {0x10c3, 0x2d23},
- {0x10c4, 0x2d24},
- {0x10c5, 0x2d25},
- {0x10c7, 0x2d27},
- {0x10cd, 0x2d2d},
- {0x1d79, 0xa77d},
- {0x1d7d, 0x2c63},
- {0x1e00, 0x1e01},
- {0x1e01, 0x1e00},
- {0x1e02, 0x1e03},
- {0x1e03, 0x1e02},
- {0x1e04, 0x1e05},
- {0x1e05, 0x1e04},
- {0x1e06, 0x1e07},
- {0x1e07, 0x1e06},
- {0x1e08, 0x1e09},
- {0x1e09, 0x1e08},
- {0x1e0a, 0x1e0b},
- {0x1e0b, 0x1e0a},
- {0x1e0c, 0x1e0d},
- {0x1e0d, 0x1e0c},
- {0x1e0e, 0x1e0f},
- {0x1e0f, 0x1e0e},
- {0x1e10, 0x1e11},
- {0x1e11, 0x1e10},
- {0x1e12, 0x1e13},
- {0x1e13, 0x1e12},
- {0x1e14, 0x1e15},
- {0x1e15, 0x1e14},
- {0x1e16, 0x1e17},
- {0x1e17, 0x1e16},
- {0x1e18, 0x1e19},
- {0x1e19, 0x1e18},
- {0x1e1a, 0x1e1b},
- {0x1e1b, 0x1e1a},
- {0x1e1c, 0x1e1d},
- {0x1e1d, 0x1e1c},
- {0x1e1e, 0x1e1f},
- {0x1e1f, 0x1e1e},
- {0x1e20, 0x1e21},
- {0x1e21, 0x1e20},
- {0x1e22, 0x1e23},
- {0x1e23, 0x1e22},
- {0x1e24, 0x1e25},
- {0x1e25, 0x1e24},
- {0x1e26, 0x1e27},
- {0x1e27, 0x1e26},
- {0x1e28, 0x1e29},
- {0x1e29, 0x1e28},
- {0x1e2a, 0x1e2b},
- {0x1e2b, 0x1e2a},
- {0x1e2c, 0x1e2d},
- {0x1e2d, 0x1e2c},
- {0x1e2e, 0x1e2f},
- {0x1e2f, 0x1e2e},
- {0x1e30, 0x1e31},
- {0x1e31, 0x1e30},
- {0x1e32, 0x1e33},
- {0x1e33, 0x1e32},
- {0x1e34, 0x1e35},
- {0x1e35, 0x1e34},
- {0x1e36, 0x1e37},
- {0x1e37, 0x1e36},
- {0x1e38, 0x1e39},
- {0x1e39, 0x1e38},
- {0x1e3a, 0x1e3b},
- {0x1e3b, 0x1e3a},
- {0x1e3c, 0x1e3d},
- {0x1e3d, 0x1e3c},
- {0x1e3e, 0x1e3f},
- {0x1e3f, 0x1e3e},
- {0x1e40, 0x1e41},
- {0x1e41, 0x1e40},
- {0x1e42, 0x1e43},
- {0x1e43, 0x1e42},
- {0x1e44, 0x1e45},
- {0x1e45, 0x1e44},
- {0x1e46, 0x1e47},
- {0x1e47, 0x1e46},
- {0x1e48, 0x1e49},
- {0x1e49, 0x1e48},
- {0x1e4a, 0x1e4b},
- {0x1e4b, 0x1e4a},
- {0x1e4c, 0x1e4d},
- {0x1e4d, 0x1e4c},
- {0x1e4e, 0x1e4f},
- {0x1e4f, 0x1e4e},
- {0x1e50, 0x1e51},
- {0x1e51, 0x1e50},
- {0x1e52, 0x1e53},
- {0x1e53, 0x1e52},
- {0x1e54, 0x1e55},
- {0x1e55, 0x1e54},
- {0x1e56, 0x1e57},
- {0x1e57, 0x1e56},
- {0x1e58, 0x1e59},
- {0x1e59, 0x1e58},
- {0x1e5a, 0x1e5b},
- {0x1e5b, 0x1e5a},
- {0x1e5c, 0x1e5d},
- {0x1e5d, 0x1e5c},
- {0x1e5e, 0x1e5f},
- {0x1e5f, 0x1e5e},
- {0x1e60, 0x1e61},
- {0x1e60, 0x1e9b},
- {0x1e61, 0x1e60},
- {0x1e61, 0x1e9b},
- {0x1e62, 0x1e63},
- {0x1e63, 0x1e62},
- {0x1e64, 0x1e65},
- {0x1e65, 0x1e64},
- {0x1e66, 0x1e67},
- {0x1e67, 0x1e66},
- {0x1e68, 0x1e69},
- {0x1e69, 0x1e68},
- {0x1e6a, 0x1e6b},
- {0x1e6b, 0x1e6a},
- {0x1e6c, 0x1e6d},
- {0x1e6d, 0x1e6c},
- {0x1e6e, 0x1e6f},
- {0x1e6f, 0x1e6e},
- {0x1e70, 0x1e71},
- {0x1e71, 0x1e70},
- {0x1e72, 0x1e73},
- {0x1e73, 0x1e72},
- {0x1e74, 0x1e75},
- {0x1e75, 0x1e74},
- {0x1e76, 0x1e77},
- {0x1e77, 0x1e76},
- {0x1e78, 0x1e79},
- {0x1e79, 0x1e78},
- {0x1e7a, 0x1e7b},
- {0x1e7b, 0x1e7a},
- {0x1e7c, 0x1e7d},
- {0x1e7d, 0x1e7c},
- {0x1e7e, 0x1e7f},
- {0x1e7f, 0x1e7e},
- {0x1e80, 0x1e81},
- {0x1e81, 0x1e80},
- {0x1e82, 0x1e83},
- {0x1e83, 0x1e82},
- {0x1e84, 0x1e85},
- {0x1e85, 0x1e84},
- {0x1e86, 0x1e87},
- {0x1e87, 0x1e86},
- {0x1e88, 0x1e89},
- {0x1e89, 0x1e88},
- {0x1e8a, 0x1e8b},
- {0x1e8b, 0x1e8a},
- {0x1e8c, 0x1e8d},
- {0x1e8d, 0x1e8c},
- {0x1e8e, 0x1e8f},
- {0x1e8f, 0x1e8e},
- {0x1e90, 0x1e91},
- {0x1e91, 0x1e90},
- {0x1e92, 0x1e93},
- {0x1e93, 0x1e92},
- {0x1e94, 0x1e95},
- {0x1e95, 0x1e94},
- {0x1e9b, 0x1e60},
- {0x1e9b, 0x1e61},
- {0x1e9e, 0xdf},
- {0x1ea0, 0x1ea1},
- {0x1ea1, 0x1ea0},
- {0x1ea2, 0x1ea3},
- {0x1ea3, 0x1ea2},
- {0x1ea4, 0x1ea5},
- {0x1ea5, 0x1ea4},
- {0x1ea6, 0x1ea7},
- {0x1ea7, 0x1ea6},
- {0x1ea8, 0x1ea9},
- {0x1ea9, 0x1ea8},
- {0x1eaa, 0x1eab},
- {0x1eab, 0x1eaa},
- {0x1eac, 0x1ead},
- {0x1ead, 0x1eac},
- {0x1eae, 0x1eaf},
- {0x1eaf, 0x1eae},
- {0x1eb0, 0x1eb1},
- {0x1eb1, 0x1eb0},
- {0x1eb2, 0x1eb3},
- {0x1eb3, 0x1eb2},
- {0x1eb4, 0x1eb5},
- {0x1eb5, 0x1eb4},
- {0x1eb6, 0x1eb7},
- {0x1eb7, 0x1eb6},
- {0x1eb8, 0x1eb9},
- {0x1eb9, 0x1eb8},
- {0x1eba, 0x1ebb},
- {0x1ebb, 0x1eba},
- {0x1ebc, 0x1ebd},
- {0x1ebd, 0x1ebc},
- {0x1ebe, 0x1ebf},
- {0x1ebf, 0x1ebe},
- {0x1ec0, 0x1ec1},
- {0x1ec1, 0x1ec0},
- {0x1ec2, 0x1ec3},
- {0x1ec3, 0x1ec2},
- {0x1ec4, 0x1ec5},
- {0x1ec5, 0x1ec4},
- {0x1ec6, 0x1ec7},
- {0x1ec7, 0x1ec6},
- {0x1ec8, 0x1ec9},
- {0x1ec9, 0x1ec8},
- {0x1eca, 0x1ecb},
- {0x1ecb, 0x1eca},
- {0x1ecc, 0x1ecd},
- {0x1ecd, 0x1ecc},
- {0x1ece, 0x1ecf},
- {0x1ecf, 0x1ece},
- {0x1ed0, 0x1ed1},
- {0x1ed1, 0x1ed0},
- {0x1ed2, 0x1ed3},
- {0x1ed3, 0x1ed2},
- {0x1ed4, 0x1ed5},
- {0x1ed5, 0x1ed4},
- {0x1ed6, 0x1ed7},
- {0x1ed7, 0x1ed6},
- {0x1ed8, 0x1ed9},
- {0x1ed9, 0x1ed8},
- {0x1eda, 0x1edb},
- {0x1edb, 0x1eda},
- {0x1edc, 0x1edd},
- {0x1edd, 0x1edc},
- {0x1ede, 0x1edf},
- {0x1edf, 0x1ede},
- {0x1ee0, 0x1ee1},
- {0x1ee1, 0x1ee0},
- {0x1ee2, 0x1ee3},
- {0x1ee3, 0x1ee2},
- {0x1ee4, 0x1ee5},
- {0x1ee5, 0x1ee4},
- {0x1ee6, 0x1ee7},
- {0x1ee7, 0x1ee6},
- {0x1ee8, 0x1ee9},
- {0x1ee9, 0x1ee8},
- {0x1eea, 0x1eeb},
- {0x1eeb, 0x1eea},
- {0x1eec, 0x1eed},
- {0x1eed, 0x1eec},
- {0x1eee, 0x1eef},
- {0x1eef, 0x1eee},
- {0x1ef0, 0x1ef1},
- {0x1ef1, 0x1ef0},
- {0x1ef2, 0x1ef3},
- {0x1ef3, 0x1ef2},
- {0x1ef4, 0x1ef5},
- {0x1ef5, 0x1ef4},
- {0x1ef6, 0x1ef7},
- {0x1ef7, 0x1ef6},
- {0x1ef8, 0x1ef9},
- {0x1ef9, 0x1ef8},
- {0x1efa, 0x1efb},
- {0x1efb, 0x1efa},
- {0x1efc, 0x1efd},
- {0x1efd, 0x1efc},
- {0x1efe, 0x1eff},
- {0x1eff, 0x1efe},
- {0x1f00, 0x1f08},
- {0x1f01, 0x1f09},
- {0x1f02, 0x1f0a},
- {0x1f03, 0x1f0b},
- {0x1f04, 0x1f0c},
- {0x1f05, 0x1f0d},
- {0x1f06, 0x1f0e},
- {0x1f07, 0x1f0f},
- {0x1f08, 0x1f00},
- {0x1f09, 0x1f01},
- {0x1f0a, 0x1f02},
- {0x1f0b, 0x1f03},
- {0x1f0c, 0x1f04},
- {0x1f0d, 0x1f05},
- {0x1f0e, 0x1f06},
- {0x1f0f, 0x1f07},
- {0x1f10, 0x1f18},
- {0x1f11, 0x1f19},
- {0x1f12, 0x1f1a},
- {0x1f13, 0x1f1b},
- {0x1f14, 0x1f1c},
- {0x1f15, 0x1f1d},
- {0x1f18, 0x1f10},
- {0x1f19, 0x1f11},
- {0x1f1a, 0x1f12},
- {0x1f1b, 0x1f13},
- {0x1f1c, 0x1f14},
- {0x1f1d, 0x1f15},
- {0x1f20, 0x1f28},
- {0x1f21, 0x1f29},
- {0x1f22, 0x1f2a},
- {0x1f23, 0x1f2b},
- {0x1f24, 0x1f2c},
- {0x1f25, 0x1f2d},
- {0x1f26, 0x1f2e},
- {0x1f27, 0x1f2f},
- {0x1f28, 0x1f20},
- {0x1f29, 0x1f21},
- {0x1f2a, 0x1f22},
- {0x1f2b, 0x1f23},
- {0x1f2c, 0x1f24},
- {0x1f2d, 0x1f25},
- {0x1f2e, 0x1f26},
- {0x1f2f, 0x1f27},
- {0x1f30, 0x1f38},
- {0x1f31, 0x1f39},
- {0x1f32, 0x1f3a},
- {0x1f33, 0x1f3b},
- {0x1f34, 0x1f3c},
- {0x1f35, 0x1f3d},
- {0x1f36, 0x1f3e},
- {0x1f37, 0x1f3f},
- {0x1f38, 0x1f30},
- {0x1f39, 0x1f31},
- {0x1f3a, 0x1f32},
- {0x1f3b, 0x1f33},
- {0x1f3c, 0x1f34},
- {0x1f3d, 0x1f35},
- {0x1f3e, 0x1f36},
- {0x1f3f, 0x1f37},
- {0x1f40, 0x1f48},
- {0x1f41, 0x1f49},
- {0x1f42, 0x1f4a},
- {0x1f43, 0x1f4b},
- {0x1f44, 0x1f4c},
- {0x1f45, 0x1f4d},
- {0x1f48, 0x1f40},
- {0x1f49, 0x1f41},
- {0x1f4a, 0x1f42},
- {0x1f4b, 0x1f43},
- {0x1f4c, 0x1f44},
- {0x1f4d, 0x1f45},
- {0x1f51, 0x1f59},
- {0x1f53, 0x1f5b},
- {0x1f55, 0x1f5d},
- {0x1f57, 0x1f5f},
- {0x1f59, 0x1f51},
- {0x1f5b, 0x1f53},
- {0x1f5d, 0x1f55},
- {0x1f5f, 0x1f57},
- {0x1f60, 0x1f68},
- {0x1f61, 0x1f69},
- {0x1f62, 0x1f6a},
- {0x1f63, 0x1f6b},
- {0x1f64, 0x1f6c},
- {0x1f65, 0x1f6d},
- {0x1f66, 0x1f6e},
- {0x1f67, 0x1f6f},
- {0x1f68, 0x1f60},
- {0x1f69, 0x1f61},
- {0x1f6a, 0x1f62},
- {0x1f6b, 0x1f63},
- {0x1f6c, 0x1f64},
- {0x1f6d, 0x1f65},
- {0x1f6e, 0x1f66},
- {0x1f6f, 0x1f67},
- {0x1f70, 0x1fba},
- {0x1f71, 0x1fbb},
- {0x1f72, 0x1fc8},
- {0x1f73, 0x1fc9},
- {0x1f74, 0x1fca},
- {0x1f75, 0x1fcb},
- {0x1f76, 0x1fda},
- {0x1f77, 0x1fdb},
- {0x1f78, 0x1ff8},
- {0x1f79, 0x1ff9},
- {0x1f7a, 0x1fea},
- {0x1f7b, 0x1feb},
- {0x1f7c, 0x1ffa},
- {0x1f7d, 0x1ffb},
- {0x1f80, 0x1f88},
- {0x1f81, 0x1f89},
- {0x1f82, 0x1f8a},
- {0x1f83, 0x1f8b},
- {0x1f84, 0x1f8c},
- {0x1f85, 0x1f8d},
- {0x1f86, 0x1f8e},
- {0x1f87, 0x1f8f},
- {0x1f88, 0x1f80},
- {0x1f89, 0x1f81},
- {0x1f8a, 0x1f82},
- {0x1f8b, 0x1f83},
- {0x1f8c, 0x1f84},
- {0x1f8d, 0x1f85},
- {0x1f8e, 0x1f86},
- {0x1f8f, 0x1f87},
- {0x1f90, 0x1f98},
- {0x1f91, 0x1f99},
- {0x1f92, 0x1f9a},
- {0x1f93, 0x1f9b},
- {0x1f94, 0x1f9c},
- {0x1f95, 0x1f9d},
- {0x1f96, 0x1f9e},
- {0x1f97, 0x1f9f},
- {0x1f98, 0x1f90},
- {0x1f99, 0x1f91},
- {0x1f9a, 0x1f92},
- {0x1f9b, 0x1f93},
- {0x1f9c, 0x1f94},
- {0x1f9d, 0x1f95},
- {0x1f9e, 0x1f96},
- {0x1f9f, 0x1f97},
- {0x1fa0, 0x1fa8},
- {0x1fa1, 0x1fa9},
- {0x1fa2, 0x1faa},
- {0x1fa3, 0x1fab},
- {0x1fa4, 0x1fac},
- {0x1fa5, 0x1fad},
- {0x1fa6, 0x1fae},
- {0x1fa7, 0x1faf},
- {0x1fa8, 0x1fa0},
- {0x1fa9, 0x1fa1},
- {0x1faa, 0x1fa2},
- {0x1fab, 0x1fa3},
- {0x1fac, 0x1fa4},
- {0x1fad, 0x1fa5},
- {0x1fae, 0x1fa6},
- {0x1faf, 0x1fa7},
- {0x1fb0, 0x1fb8},
- {0x1fb1, 0x1fb9},
- {0x1fb3, 0x1fbc},
- {0x1fb8, 0x1fb0},
- {0x1fb9, 0x1fb1},
- {0x1fba, 0x1f70},
- {0x1fbb, 0x1f71},
- {0x1fbc, 0x1fb3},
- {0x1fbe, 0x345},
- {0x1fbe, 0x399},
- {0x1fbe, 0x3b9},
- {0x1fc3, 0x1fcc},
- {0x1fc8, 0x1f72},
- {0x1fc9, 0x1f73},
- {0x1fca, 0x1f74},
- {0x1fcb, 0x1f75},
- {0x1fcc, 0x1fc3},
- {0x1fd0, 0x1fd8},
- {0x1fd1, 0x1fd9},
- {0x1fd8, 0x1fd0},
- {0x1fd9, 0x1fd1},
- {0x1fda, 0x1f76},
- {0x1fdb, 0x1f77},
- {0x1fe0, 0x1fe8},
- {0x1fe1, 0x1fe9},
- {0x1fe5, 0x1fec},
- {0x1fe8, 0x1fe0},
- {0x1fe9, 0x1fe1},
- {0x1fea, 0x1f7a},
- {0x1feb, 0x1f7b},
- {0x1fec, 0x1fe5},
- {0x1ff3, 0x1ffc},
- {0x1ff8, 0x1f78},
- {0x1ff9, 0x1f79},
- {0x1ffa, 0x1f7c},
- {0x1ffb, 0x1f7d},
- {0x1ffc, 0x1ff3},
- {0x2126, 0x3a9},
- {0x2126, 0x3c9},
- {0x212a, 0x4b},
- {0x212a, 0x6b},
- {0x212b, 0xc5},
- {0x212b, 0xe5},
- {0x2132, 0x214e},
- {0x214e, 0x2132},
- {0x2160, 0x2170},
- {0x2161, 0x2171},
- {0x2162, 0x2172},
- {0x2163, 0x2173},
- {0x2164, 0x2174},
- {0x2165, 0x2175},
- {0x2166, 0x2176},
- {0x2167, 0x2177},
- {0x2168, 0x2178},
- {0x2169, 0x2179},
- {0x216a, 0x217a},
- {0x216b, 0x217b},
- {0x216c, 0x217c},
- {0x216d, 0x217d},
- {0x216e, 0x217e},
- {0x216f, 0x217f},
- {0x2170, 0x2160},
- {0x2171, 0x2161},
- {0x2172, 0x2162},
- {0x2173, 0x2163},
- {0x2174, 0x2164},
- {0x2175, 0x2165},
- {0x2176, 0x2166},
- {0x2177, 0x2167},
- {0x2178, 0x2168},
- {0x2179, 0x2169},
- {0x217a, 0x216a},
- {0x217b, 0x216b},
- {0x217c, 0x216c},
- {0x217d, 0x216d},
- {0x217e, 0x216e},
- {0x217f, 0x216f},
- {0x2183, 0x2184},
- {0x2184, 0x2183},
- {0x24b6, 0x24d0},
- {0x24b7, 0x24d1},
- {0x24b8, 0x24d2},
- {0x24b9, 0x24d3},
- {0x24ba, 0x24d4},
- {0x24bb, 0x24d5},
- {0x24bc, 0x24d6},
- {0x24bd, 0x24d7},
- {0x24be, 0x24d8},
- {0x24bf, 0x24d9},
- {0x24c0, 0x24da},
- {0x24c1, 0x24db},
- {0x24c2, 0x24dc},
- {0x24c3, 0x24dd},
- {0x24c4, 0x24de},
- {0x24c5, 0x24df},
- {0x24c6, 0x24e0},
- {0x24c7, 0x24e1},
- {0x24c8, 0x24e2},
- {0x24c9, 0x24e3},
- {0x24ca, 0x24e4},
- {0x24cb, 0x24e5},
- {0x24cc, 0x24e6},
- {0x24cd, 0x24e7},
- {0x24ce, 0x24e8},
- {0x24cf, 0x24e9},
- {0x24d0, 0x24b6},
- {0x24d1, 0x24b7},
- {0x24d2, 0x24b8},
- {0x24d3, 0x24b9},
- {0x24d4, 0x24ba},
- {0x24d5, 0x24bb},
- {0x24d6, 0x24bc},
- {0x24d7, 0x24bd},
- {0x24d8, 0x24be},
- {0x24d9, 0x24bf},
- {0x24da, 0x24c0},
- {0x24db, 0x24c1},
- {0x24dc, 0x24c2},
- {0x24dd, 0x24c3},
- {0x24de, 0x24c4},
- {0x24df, 0x24c5},
- {0x24e0, 0x24c6},
- {0x24e1, 0x24c7},
- {0x24e2, 0x24c8},
- {0x24e3, 0x24c9},
- {0x24e4, 0x24ca},
- {0x24e5, 0x24cb},
- {0x24e6, 0x24cc},
- {0x24e7, 0x24cd},
- {0x24e8, 0x24ce},
- {0x24e9, 0x24cf},
- {0x2c00, 0x2c30},
- {0x2c01, 0x2c31},
- {0x2c02, 0x2c32},
- {0x2c03, 0x2c33},
- {0x2c04, 0x2c34},
- {0x2c05, 0x2c35},
- {0x2c06, 0x2c36},
- {0x2c07, 0x2c37},
- {0x2c08, 0x2c38},
- {0x2c09, 0x2c39},
- {0x2c0a, 0x2c3a},
- {0x2c0b, 0x2c3b},
- {0x2c0c, 0x2c3c},
- {0x2c0d, 0x2c3d},
- {0x2c0e, 0x2c3e},
- {0x2c0f, 0x2c3f},
- {0x2c10, 0x2c40},
- {0x2c11, 0x2c41},
- {0x2c12, 0x2c42},
- {0x2c13, 0x2c43},
- {0x2c14, 0x2c44},
- {0x2c15, 0x2c45},
- {0x2c16, 0x2c46},
- {0x2c17, 0x2c47},
- {0x2c18, 0x2c48},
- {0x2c19, 0x2c49},
- {0x2c1a, 0x2c4a},
- {0x2c1b, 0x2c4b},
- {0x2c1c, 0x2c4c},
- {0x2c1d, 0x2c4d},
- {0x2c1e, 0x2c4e},
- {0x2c1f, 0x2c4f},
- {0x2c20, 0x2c50},
- {0x2c21, 0x2c51},
- {0x2c22, 0x2c52},
- {0x2c23, 0x2c53},
- {0x2c24, 0x2c54},
- {0x2c25, 0x2c55},
- {0x2c26, 0x2c56},
- {0x2c27, 0x2c57},
- {0x2c28, 0x2c58},
- {0x2c29, 0x2c59},
- {0x2c2a, 0x2c5a},
- {0x2c2b, 0x2c5b},
- {0x2c2c, 0x2c5c},
- {0x2c2d, 0x2c5d},
- {0x2c2e, 0x2c5e},
- {0x2c30, 0x2c00},
- {0x2c31, 0x2c01},
- {0x2c32, 0x2c02},
- {0x2c33, 0x2c03},
- {0x2c34, 0x2c04},
- {0x2c35, 0x2c05},
- {0x2c36, 0x2c06},
- {0x2c37, 0x2c07},
- {0x2c38, 0x2c08},
- {0x2c39, 0x2c09},
- {0x2c3a, 0x2c0a},
- {0x2c3b, 0x2c0b},
- {0x2c3c, 0x2c0c},
- {0x2c3d, 0x2c0d},
- {0x2c3e, 0x2c0e},
- {0x2c3f, 0x2c0f},
- {0x2c40, 0x2c10},
- {0x2c41, 0x2c11},
- {0x2c42, 0x2c12},
- {0x2c43, 0x2c13},
- {0x2c44, 0x2c14},
- {0x2c45, 0x2c15},
- {0x2c46, 0x2c16},
- {0x2c47, 0x2c17},
- {0x2c48, 0x2c18},
- {0x2c49, 0x2c19},
- {0x2c4a, 0x2c1a},
- {0x2c4b, 0x2c1b},
- {0x2c4c, 0x2c1c},
- {0x2c4d, 0x2c1d},
- {0x2c4e, 0x2c1e},
- {0x2c4f, 0x2c1f},
- {0x2c50, 0x2c20},
- {0x2c51, 0x2c21},
- {0x2c52, 0x2c22},
- {0x2c53, 0x2c23},
- {0x2c54, 0x2c24},
- {0x2c55, 0x2c25},
- {0x2c56, 0x2c26},
- {0x2c57, 0x2c27},
- {0x2c58, 0x2c28},
- {0x2c59, 0x2c29},
- {0x2c5a, 0x2c2a},
- {0x2c5b, 0x2c2b},
- {0x2c5c, 0x2c2c},
- {0x2c5d, 0x2c2d},
- {0x2c5e, 0x2c2e},
- {0x2c60, 0x2c61},
- {0x2c61, 0x2c60},
- {0x2c62, 0x26b},
- {0x2c63, 0x1d7d},
- {0x2c64, 0x27d},
- {0x2c65, 0x23a},
- {0x2c66, 0x23e},
- {0x2c67, 0x2c68},
- {0x2c68, 0x2c67},
- {0x2c69, 0x2c6a},
- {0x2c6a, 0x2c69},
- {0x2c6b, 0x2c6c},
- {0x2c6c, 0x2c6b},
- {0x2c6d, 0x251},
- {0x2c6e, 0x271},
- {0x2c6f, 0x250},
- {0x2c70, 0x252},
- {0x2c72, 0x2c73},
- {0x2c73, 0x2c72},
- {0x2c75, 0x2c76},
- {0x2c76, 0x2c75},
- {0x2c7e, 0x23f},
- {0x2c7f, 0x240},
- {0x2c80, 0x2c81},
- {0x2c81, 0x2c80},
- {0x2c82, 0x2c83},
- {0x2c83, 0x2c82},
- {0x2c84, 0x2c85},
- {0x2c85, 0x2c84},
- {0x2c86, 0x2c87},
- {0x2c87, 0x2c86},
- {0x2c88, 0x2c89},
- {0x2c89, 0x2c88},
- {0x2c8a, 0x2c8b},
- {0x2c8b, 0x2c8a},
- {0x2c8c, 0x2c8d},
- {0x2c8d, 0x2c8c},
- {0x2c8e, 0x2c8f},
- {0x2c8f, 0x2c8e},
- {0x2c90, 0x2c91},
- {0x2c91, 0x2c90},
- {0x2c92, 0x2c93},
- {0x2c93, 0x2c92},
- {0x2c94, 0x2c95},
- {0x2c95, 0x2c94},
- {0x2c96, 0x2c97},
- {0x2c97, 0x2c96},
- {0x2c98, 0x2c99},
- {0x2c99, 0x2c98},
- {0x2c9a, 0x2c9b},
- {0x2c9b, 0x2c9a},
- {0x2c9c, 0x2c9d},
- {0x2c9d, 0x2c9c},
- {0x2c9e, 0x2c9f},
- {0x2c9f, 0x2c9e},
- {0x2ca0, 0x2ca1},
- {0x2ca1, 0x2ca0},
- {0x2ca2, 0x2ca3},
- {0x2ca3, 0x2ca2},
- {0x2ca4, 0x2ca5},
- {0x2ca5, 0x2ca4},
- {0x2ca6, 0x2ca7},
- {0x2ca7, 0x2ca6},
- {0x2ca8, 0x2ca9},
- {0x2ca9, 0x2ca8},
- {0x2caa, 0x2cab},
- {0x2cab, 0x2caa},
- {0x2cac, 0x2cad},
- {0x2cad, 0x2cac},
- {0x2cae, 0x2caf},
- {0x2caf, 0x2cae},
- {0x2cb0, 0x2cb1},
- {0x2cb1, 0x2cb0},
- {0x2cb2, 0x2cb3},
- {0x2cb3, 0x2cb2},
- {0x2cb4, 0x2cb5},
- {0x2cb5, 0x2cb4},
- {0x2cb6, 0x2cb7},
- {0x2cb7, 0x2cb6},
- {0x2cb8, 0x2cb9},
- {0x2cb9, 0x2cb8},
- {0x2cba, 0x2cbb},
- {0x2cbb, 0x2cba},
- {0x2cbc, 0x2cbd},
- {0x2cbd, 0x2cbc},
- {0x2cbe, 0x2cbf},
- {0x2cbf, 0x2cbe},
- {0x2cc0, 0x2cc1},
- {0x2cc1, 0x2cc0},
- {0x2cc2, 0x2cc3},
- {0x2cc3, 0x2cc2},
- {0x2cc4, 0x2cc5},
- {0x2cc5, 0x2cc4},
- {0x2cc6, 0x2cc7},
- {0x2cc7, 0x2cc6},
- {0x2cc8, 0x2cc9},
- {0x2cc9, 0x2cc8},
- {0x2cca, 0x2ccb},
- {0x2ccb, 0x2cca},
- {0x2ccc, 0x2ccd},
- {0x2ccd, 0x2ccc},
- {0x2cce, 0x2ccf},
- {0x2ccf, 0x2cce},
- {0x2cd0, 0x2cd1},
- {0x2cd1, 0x2cd0},
- {0x2cd2, 0x2cd3},
- {0x2cd3, 0x2cd2},
- {0x2cd4, 0x2cd5},
- {0x2cd5, 0x2cd4},
- {0x2cd6, 0x2cd7},
- {0x2cd7, 0x2cd6},
- {0x2cd8, 0x2cd9},
- {0x2cd9, 0x2cd8},
- {0x2cda, 0x2cdb},
- {0x2cdb, 0x2cda},
- {0x2cdc, 0x2cdd},
- {0x2cdd, 0x2cdc},
- {0x2cde, 0x2cdf},
- {0x2cdf, 0x2cde},
- {0x2ce0, 0x2ce1},
- {0x2ce1, 0x2ce0},
- {0x2ce2, 0x2ce3},
- {0x2ce3, 0x2ce2},
- {0x2ceb, 0x2cec},
- {0x2cec, 0x2ceb},
- {0x2ced, 0x2cee},
- {0x2cee, 0x2ced},
- {0x2cf2, 0x2cf3},
- {0x2cf3, 0x2cf2},
- {0x2d00, 0x10a0},
- {0x2d01, 0x10a1},
- {0x2d02, 0x10a2},
- {0x2d03, 0x10a3},
- {0x2d04, 0x10a4},
- {0x2d05, 0x10a5},
- {0x2d06, 0x10a6},
- {0x2d07, 0x10a7},
- {0x2d08, 0x10a8},
- {0x2d09, 0x10a9},
- {0x2d0a, 0x10aa},
- {0x2d0b, 0x10ab},
- {0x2d0c, 0x10ac},
- {0x2d0d, 0x10ad},
- {0x2d0e, 0x10ae},
- {0x2d0f, 0x10af},
- {0x2d10, 0x10b0},
- {0x2d11, 0x10b1},
- {0x2d12, 0x10b2},
- {0x2d13, 0x10b3},
- {0x2d14, 0x10b4},
- {0x2d15, 0x10b5},
- {0x2d16, 0x10b6},
- {0x2d17, 0x10b7},
- {0x2d18, 0x10b8},
- {0x2d19, 0x10b9},
- {0x2d1a, 0x10ba},
- {0x2d1b, 0x10bb},
- {0x2d1c, 0x10bc},
- {0x2d1d, 0x10bd},
- {0x2d1e, 0x10be},
- {0x2d1f, 0x10bf},
- {0x2d20, 0x10c0},
- {0x2d21, 0x10c1},
- {0x2d22, 0x10c2},
- {0x2d23, 0x10c3},
- {0x2d24, 0x10c4},
- {0x2d25, 0x10c5},
- {0x2d27, 0x10c7},
- {0x2d2d, 0x10cd},
- {0xa640, 0xa641},
- {0xa641, 0xa640},
- {0xa642, 0xa643},
- {0xa643, 0xa642},
- {0xa644, 0xa645},
- {0xa645, 0xa644},
- {0xa646, 0xa647},
- {0xa647, 0xa646},
- {0xa648, 0xa649},
- {0xa649, 0xa648},
- {0xa64a, 0xa64b},
- {0xa64b, 0xa64a},
- {0xa64c, 0xa64d},
- {0xa64d, 0xa64c},
- {0xa64e, 0xa64f},
- {0xa64f, 0xa64e},
- {0xa650, 0xa651},
- {0xa651, 0xa650},
- {0xa652, 0xa653},
- {0xa653, 0xa652},
- {0xa654, 0xa655},
- {0xa655, 0xa654},
- {0xa656, 0xa657},
- {0xa657, 0xa656},
- {0xa658, 0xa659},
- {0xa659, 0xa658},
- {0xa65a, 0xa65b},
- {0xa65b, 0xa65a},
- {0xa65c, 0xa65d},
- {0xa65d, 0xa65c},
- {0xa65e, 0xa65f},
- {0xa65f, 0xa65e},
- {0xa660, 0xa661},
- {0xa661, 0xa660},
- {0xa662, 0xa663},
- {0xa663, 0xa662},
- {0xa664, 0xa665},
- {0xa665, 0xa664},
- {0xa666, 0xa667},
- {0xa667, 0xa666},
- {0xa668, 0xa669},
- {0xa669, 0xa668},
- {0xa66a, 0xa66b},
- {0xa66b, 0xa66a},
- {0xa66c, 0xa66d},
- {0xa66d, 0xa66c},
- {0xa680, 0xa681},
- {0xa681, 0xa680},
- {0xa682, 0xa683},
- {0xa683, 0xa682},
- {0xa684, 0xa685},
- {0xa685, 0xa684},
- {0xa686, 0xa687},
- {0xa687, 0xa686},
- {0xa688, 0xa689},
- {0xa689, 0xa688},
- {0xa68a, 0xa68b},
- {0xa68b, 0xa68a},
- {0xa68c, 0xa68d},
- {0xa68d, 0xa68c},
- {0xa68e, 0xa68f},
- {0xa68f, 0xa68e},
- {0xa690, 0xa691},
- {0xa691, 0xa690},
- {0xa692, 0xa693},
- {0xa693, 0xa692},
- {0xa694, 0xa695},
- {0xa695, 0xa694},
- {0xa696, 0xa697},
- {0xa697, 0xa696},
- {0xa698, 0xa699},
- {0xa699, 0xa698},
- {0xa69a, 0xa69b},
- {0xa69b, 0xa69a},
- {0xa722, 0xa723},
- {0xa723, 0xa722},
- {0xa724, 0xa725},
- {0xa725, 0xa724},
- {0xa726, 0xa727},
- {0xa727, 0xa726},
- {0xa728, 0xa729},
- {0xa729, 0xa728},
- {0xa72a, 0xa72b},
- {0xa72b, 0xa72a},
- {0xa72c, 0xa72d},
- {0xa72d, 0xa72c},
- {0xa72e, 0xa72f},
- {0xa72f, 0xa72e},
- {0xa732, 0xa733},
- {0xa733, 0xa732},
- {0xa734, 0xa735},
- {0xa735, 0xa734},
- {0xa736, 0xa737},
- {0xa737, 0xa736},
- {0xa738, 0xa739},
- {0xa739, 0xa738},
- {0xa73a, 0xa73b},
- {0xa73b, 0xa73a},
- {0xa73c, 0xa73d},
- {0xa73d, 0xa73c},
- {0xa73e, 0xa73f},
- {0xa73f, 0xa73e},
- {0xa740, 0xa741},
- {0xa741, 0xa740},
- {0xa742, 0xa743},
- {0xa743, 0xa742},
- {0xa744, 0xa745},
- {0xa745, 0xa744},
- {0xa746, 0xa747},
- {0xa747, 0xa746},
- {0xa748, 0xa749},
- {0xa749, 0xa748},
- {0xa74a, 0xa74b},
- {0xa74b, 0xa74a},
- {0xa74c, 0xa74d},
- {0xa74d, 0xa74c},
- {0xa74e, 0xa74f},
- {0xa74f, 0xa74e},
- {0xa750, 0xa751},
- {0xa751, 0xa750},
- {0xa752, 0xa753},
- {0xa753, 0xa752},
- {0xa754, 0xa755},
- {0xa755, 0xa754},
- {0xa756, 0xa757},
- {0xa757, 0xa756},
- {0xa758, 0xa759},
- {0xa759, 0xa758},
- {0xa75a, 0xa75b},
- {0xa75b, 0xa75a},
- {0xa75c, 0xa75d},
- {0xa75d, 0xa75c},
- {0xa75e, 0xa75f},
- {0xa75f, 0xa75e},
- {0xa760, 0xa761},
- {0xa761, 0xa760},
- {0xa762, 0xa763},
- {0xa763, 0xa762},
- {0xa764, 0xa765},
- {0xa765, 0xa764},
- {0xa766, 0xa767},
- {0xa767, 0xa766},
- {0xa768, 0xa769},
- {0xa769, 0xa768},
- {0xa76a, 0xa76b},
- {0xa76b, 0xa76a},
- {0xa76c, 0xa76d},
- {0xa76d, 0xa76c},
- {0xa76e, 0xa76f},
- {0xa76f, 0xa76e},
- {0xa779, 0xa77a},
- {0xa77a, 0xa779},
- {0xa77b, 0xa77c},
- {0xa77c, 0xa77b},
- {0xa77d, 0x1d79},
- {0xa77e, 0xa77f},
- {0xa77f, 0xa77e},
- {0xa780, 0xa781},
- {0xa781, 0xa780},
- {0xa782, 0xa783},
- {0xa783, 0xa782},
- {0xa784, 0xa785},
- {0xa785, 0xa784},
- {0xa786, 0xa787},
- {0xa787, 0xa786},
- {0xa78b, 0xa78c},
- {0xa78c, 0xa78b},
- {0xa78d, 0x265},
- {0xa790, 0xa791},
- {0xa791, 0xa790},
- {0xa792, 0xa793},
- {0xa793, 0xa792},
- {0xa796, 0xa797},
- {0xa797, 0xa796},
- {0xa798, 0xa799},
- {0xa799, 0xa798},
- {0xa79a, 0xa79b},
- {0xa79b, 0xa79a},
- {0xa79c, 0xa79d},
- {0xa79d, 0xa79c},
- {0xa79e, 0xa79f},
- {0xa79f, 0xa79e},
- {0xa7a0, 0xa7a1},
- {0xa7a1, 0xa7a0},
- {0xa7a2, 0xa7a3},
- {0xa7a3, 0xa7a2},
- {0xa7a4, 0xa7a5},
- {0xa7a5, 0xa7a4},
- {0xa7a6, 0xa7a7},
- {0xa7a7, 0xa7a6},
- {0xa7a8, 0xa7a9},
- {0xa7a9, 0xa7a8},
- {0xa7aa, 0x266},
- {0xa7ab, 0x25c},
- {0xa7ac, 0x261},
- {0xa7ad, 0x26c},
- {0xa7b0, 0x29e},
- {0xa7b1, 0x287},
- {0xff21, 0xff41},
- {0xff22, 0xff42},
- {0xff23, 0xff43},
- {0xff24, 0xff44},
- {0xff25, 0xff45},
- {0xff26, 0xff46},
- {0xff27, 0xff47},
- {0xff28, 0xff48},
- {0xff29, 0xff49},
- {0xff2a, 0xff4a},
- {0xff2b, 0xff4b},
- {0xff2c, 0xff4c},
- {0xff2d, 0xff4d},
- {0xff2e, 0xff4e},
- {0xff2f, 0xff4f},
- {0xff30, 0xff50},
- {0xff31, 0xff51},
- {0xff32, 0xff52},
- {0xff33, 0xff53},
- {0xff34, 0xff54},
- {0xff35, 0xff55},
- {0xff36, 0xff56},
- {0xff37, 0xff57},
- {0xff38, 0xff58},
- {0xff39, 0xff59},
- {0xff3a, 0xff5a},
- {0xff41, 0xff21},
- {0xff42, 0xff22},
- {0xff43, 0xff23},
- {0xff44, 0xff24},
- {0xff45, 0xff25},
- {0xff46, 0xff26},
- {0xff47, 0xff27},
- {0xff48, 0xff28},
- {0xff49, 0xff29},
- {0xff4a, 0xff2a},
- {0xff4b, 0xff2b},
- {0xff4c, 0xff2c},
- {0xff4d, 0xff2d},
- {0xff4e, 0xff2e},
- {0xff4f, 0xff2f},
- {0xff50, 0xff30},
- {0xff51, 0xff31},
- {0xff52, 0xff32},
- {0xff53, 0xff33},
- {0xff54, 0xff34},
- {0xff55, 0xff35},
- {0xff56, 0xff36},
- {0xff57, 0xff37},
- {0xff58, 0xff38},
- {0xff59, 0xff39},
- {0xff5a, 0xff3a},
- {0x10400, 0x10428},
- {0x10401, 0x10429},
- {0x10402, 0x1042a},
- {0x10403, 0x1042b},
- {0x10404, 0x1042c},
- {0x10405, 0x1042d},
- {0x10406, 0x1042e},
- {0x10407, 0x1042f},
- {0x10408, 0x10430},
- {0x10409, 0x10431},
- {0x1040a, 0x10432},
- {0x1040b, 0x10433},
- {0x1040c, 0x10434},
- {0x1040d, 0x10435},
- {0x1040e, 0x10436},
- {0x1040f, 0x10437},
- {0x10410, 0x10438},
- {0x10411, 0x10439},
- {0x10412, 0x1043a},
- {0x10413, 0x1043b},
- {0x10414, 0x1043c},
- {0x10415, 0x1043d},
- {0x10416, 0x1043e},
- {0x10417, 0x1043f},
- {0x10418, 0x10440},
- {0x10419, 0x10441},
- {0x1041a, 0x10442},
- {0x1041b, 0x10443},
- {0x1041c, 0x10444},
- {0x1041d, 0x10445},
- {0x1041e, 0x10446},
- {0x1041f, 0x10447},
- {0x10420, 0x10448},
- {0x10421, 0x10449},
- {0x10422, 0x1044a},
- {0x10423, 0x1044b},
- {0x10424, 0x1044c},
- {0x10425, 0x1044d},
- {0x10426, 0x1044e},
- {0x10427, 0x1044f},
- {0x10428, 0x10400},
- {0x10429, 0x10401},
- {0x1042a, 0x10402},
- {0x1042b, 0x10403},
- {0x1042c, 0x10404},
- {0x1042d, 0x10405},
- {0x1042e, 0x10406},
- {0x1042f, 0x10407},
- {0x10430, 0x10408},
- {0x10431, 0x10409},
- {0x10432, 0x1040a},
- {0x10433, 0x1040b},
- {0x10434, 0x1040c},
- {0x10435, 0x1040d},
- {0x10436, 0x1040e},
- {0x10437, 0x1040f},
- {0x10438, 0x10410},
- {0x10439, 0x10411},
- {0x1043a, 0x10412},
- {0x1043b, 0x10413},
- {0x1043c, 0x10414},
- {0x1043d, 0x10415},
- {0x1043e, 0x10416},
- {0x1043f, 0x10417},
- {0x10440, 0x10418},
- {0x10441, 0x10419},
- {0x10442, 0x1041a},
- {0x10443, 0x1041b},
- {0x10444, 0x1041c},
- {0x10445, 0x1041d},
- {0x10446, 0x1041e},
- {0x10447, 0x1041f},
- {0x10448, 0x10420},
- {0x10449, 0x10421},
- {0x1044a, 0x10422},
- {0x1044b, 0x10423},
- {0x1044c, 0x10424},
- {0x1044d, 0x10425},
- {0x1044e, 0x10426},
- {0x1044f, 0x10427},
- {0x118a0, 0x118c0},
- {0x118a1, 0x118c1},
- {0x118a2, 0x118c2},
- {0x118a3, 0x118c3},
- {0x118a4, 0x118c4},
- {0x118a5, 0x118c5},
- {0x118a6, 0x118c6},
- {0x118a7, 0x118c7},
- {0x118a8, 0x118c8},
- {0x118a9, 0x118c9},
- {0x118aa, 0x118ca},
- {0x118ab, 0x118cb},
- {0x118ac, 0x118cc},
- {0x118ad, 0x118cd},
- {0x118ae, 0x118ce},
- {0x118af, 0x118cf},
- {0x118b0, 0x118d0},
- {0x118b1, 0x118d1},
- {0x118b2, 0x118d2},
- {0x118b3, 0x118d3},
- {0x118b4, 0x118d4},
- {0x118b5, 0x118d5},
- {0x118b6, 0x118d6},
- {0x118b7, 0x118d7},
- {0x118b8, 0x118d8},
- {0x118b9, 0x118d9},
- {0x118ba, 0x118da},
- {0x118bb, 0x118db},
- {0x118bc, 0x118dc},
- {0x118bd, 0x118dd},
- {0x118be, 0x118de},
- {0x118bf, 0x118df},
- {0x118c0, 0x118a0},
- {0x118c1, 0x118a1},
- {0x118c2, 0x118a2},
- {0x118c3, 0x118a3},
- {0x118c4, 0x118a4},
- {0x118c5, 0x118a5},
- {0x118c6, 0x118a6},
- {0x118c7, 0x118a7},
- {0x118c8, 0x118a8},
- {0x118c9, 0x118a9},
- {0x118ca, 0x118aa},
- {0x118cb, 0x118ab},
- {0x118cc, 0x118ac},
- {0x118cd, 0x118ad},
- {0x118ce, 0x118ae},
- {0x118cf, 0x118af},
- {0x118d0, 0x118b0},
- {0x118d1, 0x118b1},
- {0x118d2, 0x118b2},
- {0x118d3, 0x118b3},
- {0x118d4, 0x118b4},
- {0x118d5, 0x118b5},
- {0x118d6, 0x118b6},
- {0x118d7, 0x118b7},
- {0x118d8, 0x118b8},
- {0x118d9, 0x118b9},
- {0x118da, 0x118ba},
- {0x118db, 0x118bb},
- {0x118dc, 0x118bc},
- {0x118dd, 0x118bd},
- {0x118de, 0x118be},
- {0x118df, 0x118bf},
-};
-
-#endif // UCP_TABLE_DEFINE_FN
-
-} // namespace ue2
-
-#endif
-
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef UCP_TABLE_H
+#define UCP_TABLE_H
+
+/* Generated by tools/scripts/ucp.py based on unicode database
+ *
+ * Do not hand edit
+ */
+
+namespace ue2 {
+
+class CodePointSet;
+void make_caseless(CodePointSet *cps);
+bool flip_case(unichar *c);
+
+CodePointSet getUcpC(void);
+CodePointSet getUcpCc(void);
+CodePointSet getUcpCf(void);
+CodePointSet getUcpCn(void);
+CodePointSet getUcpCo(void);
+CodePointSet getUcpCs(void);
+CodePointSet getUcpL(void);
+CodePointSet getUcpL_and(void);
+CodePointSet getUcpLl(void);
+CodePointSet getUcpLm(void);
+CodePointSet getUcpLo(void);
+CodePointSet getUcpLt(void);
+CodePointSet getUcpLu(void);
+CodePointSet getUcpM(void);
+CodePointSet getUcpMc(void);
+CodePointSet getUcpMe(void);
+CodePointSet getUcpMn(void);
+CodePointSet getUcpN(void);
+CodePointSet getUcpNd(void);
+CodePointSet getUcpNl(void);
+CodePointSet getUcpNo(void);
+CodePointSet getUcpP(void);
+CodePointSet getUcpPc(void);
+CodePointSet getUcpPd(void);
+CodePointSet getUcpPe(void);
+CodePointSet getUcpPf(void);
+CodePointSet getUcpPi(void);
+CodePointSet getUcpPo(void);
+CodePointSet getUcpPs(void);
+CodePointSet getUcpS(void);
+CodePointSet getUcpSc(void);
+CodePointSet getUcpSk(void);
+CodePointSet getUcpSm(void);
+CodePointSet getUcpSo(void);
+CodePointSet getUcpXan(void);
+CodePointSet getUcpXps(void);
+CodePointSet getUcpXsp(void);
+CodePointSet getUcpXwd(void);
+CodePointSet getUcpZ(void);
+CodePointSet getUcpZl(void);
+CodePointSet getUcpZp(void);
+CodePointSet getUcpZs(void);
+CodePointSet getUcpArabic(void);
+CodePointSet getUcpArmenian(void);
+CodePointSet getUcpAvestan(void);
+CodePointSet getUcpBalinese(void);
+CodePointSet getUcpBamum(void);
+CodePointSet getUcpBassa_Vah(void);
+CodePointSet getUcpBatak(void);
+CodePointSet getUcpBengali(void);
+CodePointSet getUcpBopomofo(void);
+CodePointSet getUcpBrahmi(void);
+CodePointSet getUcpBraille(void);
+CodePointSet getUcpBuginese(void);
+CodePointSet getUcpBuhid(void);
+CodePointSet getUcpCanadian_Aboriginal(void);
+CodePointSet getUcpCarian(void);
+CodePointSet getUcpCaucasian_Albanian(void);
+CodePointSet getUcpChakma(void);
+CodePointSet getUcpCham(void);
+CodePointSet getUcpCherokee(void);
+CodePointSet getUcpCommon(void);
+CodePointSet getUcpCoptic(void);
+CodePointSet getUcpCuneiform(void);
+CodePointSet getUcpCypriot(void);
+CodePointSet getUcpCyrillic(void);
+CodePointSet getUcpDeseret(void);
+CodePointSet getUcpDevanagari(void);
+CodePointSet getUcpDuployan(void);
+CodePointSet getUcpEgyptian_Hieroglyphs(void);
+CodePointSet getUcpElbasan(void);
+CodePointSet getUcpEthiopic(void);
+CodePointSet getUcpGeorgian(void);
+CodePointSet getUcpGlagolitic(void);
+CodePointSet getUcpGothic(void);
+CodePointSet getUcpGrantha(void);
+CodePointSet getUcpGreek(void);
+CodePointSet getUcpGujarati(void);
+CodePointSet getUcpGurmukhi(void);
+CodePointSet getUcpHan(void);
+CodePointSet getUcpHangul(void);
+CodePointSet getUcpHanunoo(void);
+CodePointSet getUcpHebrew(void);
+CodePointSet getUcpHiragana(void);
+CodePointSet getUcpImperial_Aramaic(void);
+CodePointSet getUcpInherited(void);
+CodePointSet getUcpInscriptional_Pahlavi(void);
+CodePointSet getUcpInscriptional_Parthian(void);
+CodePointSet getUcpJavanese(void);
+CodePointSet getUcpKaithi(void);
+CodePointSet getUcpKannada(void);
+CodePointSet getUcpKatakana(void);
+CodePointSet getUcpKayah_Li(void);
+CodePointSet getUcpKharoshthi(void);
+CodePointSet getUcpKhmer(void);
+CodePointSet getUcpKhojki(void);
+CodePointSet getUcpKhudawadi(void);
+CodePointSet getUcpLao(void);
+CodePointSet getUcpLatin(void);
+CodePointSet getUcpLepcha(void);
+CodePointSet getUcpLimbu(void);
+CodePointSet getUcpLinear_A(void);
+CodePointSet getUcpLinear_B(void);
+CodePointSet getUcpLisu(void);
+CodePointSet getUcpLycian(void);
+CodePointSet getUcpLydian(void);
+CodePointSet getUcpMahajani(void);
+CodePointSet getUcpMalayalam(void);
+CodePointSet getUcpMandaic(void);
+CodePointSet getUcpManichaean(void);
+CodePointSet getUcpMeetei_Mayek(void);
+CodePointSet getUcpMende_Kikakui(void);
+CodePointSet getUcpMeroitic_Cursive(void);
+CodePointSet getUcpMeroitic_Hieroglyphs(void);
+CodePointSet getUcpMiao(void);
+CodePointSet getUcpModi(void);
+CodePointSet getUcpMongolian(void);
+CodePointSet getUcpMro(void);
+CodePointSet getUcpMyanmar(void);
+CodePointSet getUcpNabataean(void);
+CodePointSet getUcpNew_Tai_Lue(void);
+CodePointSet getUcpNko(void);
+CodePointSet getUcpOgham(void);
+CodePointSet getUcpOl_Chiki(void);
+CodePointSet getUcpOld_Italic(void);
+CodePointSet getUcpOld_North_Arabian(void);
+CodePointSet getUcpOld_Permic(void);
+CodePointSet getUcpOld_Persian(void);
+CodePointSet getUcpOld_South_Arabian(void);
+CodePointSet getUcpOld_Turkic(void);
+CodePointSet getUcpOriya(void);
+CodePointSet getUcpOsmanya(void);
+CodePointSet getUcpPahawh_Hmong(void);
+CodePointSet getUcpPalmyrene(void);
+CodePointSet getUcpPau_Cin_Hau(void);
+CodePointSet getUcpPhags_Pa(void);
+CodePointSet getUcpPhoenician(void);
+CodePointSet getUcpPsalter_Pahlavi(void);
+CodePointSet getUcpRejang(void);
+CodePointSet getUcpRunic(void);
+CodePointSet getUcpSamaritan(void);
+CodePointSet getUcpSaurashtra(void);
+CodePointSet getUcpSharada(void);
+CodePointSet getUcpShavian(void);
+CodePointSet getUcpSiddham(void);
+CodePointSet getUcpSinhala(void);
+CodePointSet getUcpSora_Sompeng(void);
+CodePointSet getUcpSundanese(void);
+CodePointSet getUcpSyloti_Nagri(void);
+CodePointSet getUcpSyriac(void);
+CodePointSet getUcpTagalog(void);
+CodePointSet getUcpTagbanwa(void);
+CodePointSet getUcpTai_Le(void);
+CodePointSet getUcpTai_Tham(void);
+CodePointSet getUcpTai_Viet(void);
+CodePointSet getUcpTakri(void);
+CodePointSet getUcpTamil(void);
+CodePointSet getUcpTelugu(void);
+CodePointSet getUcpThaana(void);
+CodePointSet getUcpThai(void);
+CodePointSet getUcpTibetan(void);
+CodePointSet getUcpTifinagh(void);
+CodePointSet getUcpTirhuta(void);
+CodePointSet getUcpUgaritic(void);
+CodePointSet getUcpVai(void);
+CodePointSet getUcpWarang_Citi(void);
+CodePointSet getUcpYi(void);
+
+#ifdef UCP_TABLE_DEFINE_FN
+
+static const unichar ucp_C_def[] = {
+ 0x0, 0x1f,
+ 0x7f, 0x9f,
+ 0xad, 0xad,
+ 0x378, 0x379,
+ 0x380, 0x383,
+ 0x38b, 0x38b,
+ 0x38d, 0x38d,
+ 0x3a2, 0x3a2,
+ 0x530, 0x530,
+ 0x557, 0x558,
+ 0x560, 0x560,
+ 0x588, 0x588,
+ 0x58b, 0x58c,
+ 0x590, 0x590,
+ 0x5c8, 0x5cf,
+ 0x5eb, 0x5ef,
+ 0x5f5, 0x605,
+ 0x61c, 0x61d,
+ 0x6dd, 0x6dd,
+ 0x70e, 0x70f,
+ 0x74b, 0x74c,
+ 0x7b2, 0x7bf,
+ 0x7fb, 0x7ff,
+ 0x82e, 0x82f,
+ 0x83f, 0x83f,
+ 0x85c, 0x85d,
+ 0x85f, 0x89f,
+ 0x8b3, 0x8e3,
+ 0x984, 0x984,
+ 0x98d, 0x98e,
+ 0x991, 0x992,
+ 0x9a9, 0x9a9,
+ 0x9b1, 0x9b1,
+ 0x9b3, 0x9b5,
+ 0x9ba, 0x9bb,
+ 0x9c5, 0x9c6,
+ 0x9c9, 0x9ca,
+ 0x9cf, 0x9d6,
+ 0x9d8, 0x9db,
+ 0x9de, 0x9de,
+ 0x9e4, 0x9e5,
+ 0x9fc, 0xa00,
+ 0xa04, 0xa04,
+ 0xa0b, 0xa0e,
+ 0xa11, 0xa12,
+ 0xa29, 0xa29,
+ 0xa31, 0xa31,
+ 0xa34, 0xa34,
+ 0xa37, 0xa37,
+ 0xa3a, 0xa3b,
+ 0xa3d, 0xa3d,
+ 0xa43, 0xa46,
+ 0xa49, 0xa4a,
+ 0xa4e, 0xa50,
+ 0xa52, 0xa58,
+ 0xa5d, 0xa5d,
+ 0xa5f, 0xa65,
+ 0xa76, 0xa80,
+ 0xa84, 0xa84,
+ 0xa8e, 0xa8e,
+ 0xa92, 0xa92,
+ 0xaa9, 0xaa9,
+ 0xab1, 0xab1,
+ 0xab4, 0xab4,
+ 0xaba, 0xabb,
+ 0xac6, 0xac6,
+ 0xaca, 0xaca,
+ 0xace, 0xacf,
+ 0xad1, 0xadf,
+ 0xae4, 0xae5,
+ 0xaf2, 0xb00,
+ 0xb04, 0xb04,
+ 0xb0d, 0xb0e,
+ 0xb11, 0xb12,
+ 0xb29, 0xb29,
+ 0xb31, 0xb31,
+ 0xb34, 0xb34,
+ 0xb3a, 0xb3b,
+ 0xb45, 0xb46,
+ 0xb49, 0xb4a,
+ 0xb4e, 0xb55,
+ 0xb58, 0xb5b,
+ 0xb5e, 0xb5e,
+ 0xb64, 0xb65,
+ 0xb78, 0xb81,
+ 0xb84, 0xb84,
+ 0xb8b, 0xb8d,
+ 0xb91, 0xb91,
+ 0xb96, 0xb98,
+ 0xb9b, 0xb9b,
+ 0xb9d, 0xb9d,
+ 0xba0, 0xba2,
+ 0xba5, 0xba7,
+ 0xbab, 0xbad,
+ 0xbba, 0xbbd,
+ 0xbc3, 0xbc5,
+ 0xbc9, 0xbc9,
+ 0xbce, 0xbcf,
+ 0xbd1, 0xbd6,
+ 0xbd8, 0xbe5,
+ 0xbfb, 0xbff,
+ 0xc04, 0xc04,
+ 0xc0d, 0xc0d,
+ 0xc11, 0xc11,
+ 0xc29, 0xc29,
+ 0xc3a, 0xc3c,
+ 0xc45, 0xc45,
+ 0xc49, 0xc49,
+ 0xc4e, 0xc54,
+ 0xc57, 0xc57,
+ 0xc5a, 0xc5f,
+ 0xc64, 0xc65,
+ 0xc70, 0xc77,
+ 0xc80, 0xc80,
+ 0xc84, 0xc84,
+ 0xc8d, 0xc8d,
+ 0xc91, 0xc91,
+ 0xca9, 0xca9,
+ 0xcb4, 0xcb4,
+ 0xcba, 0xcbb,
+ 0xcc5, 0xcc5,
+ 0xcc9, 0xcc9,
+ 0xcce, 0xcd4,
+ 0xcd7, 0xcdd,
+ 0xcdf, 0xcdf,
+ 0xce4, 0xce5,
+ 0xcf0, 0xcf0,
+ 0xcf3, 0xd00,
+ 0xd04, 0xd04,
+ 0xd0d, 0xd0d,
+ 0xd11, 0xd11,
+ 0xd3b, 0xd3c,
+ 0xd45, 0xd45,
+ 0xd49, 0xd49,
+ 0xd4f, 0xd56,
+ 0xd58, 0xd5f,
+ 0xd64, 0xd65,
+ 0xd76, 0xd78,
+ 0xd80, 0xd81,
+ 0xd84, 0xd84,
+ 0xd97, 0xd99,
+ 0xdb2, 0xdb2,
+ 0xdbc, 0xdbc,
+ 0xdbe, 0xdbf,
+ 0xdc7, 0xdc9,
+ 0xdcb, 0xdce,
+ 0xdd5, 0xdd5,
+ 0xdd7, 0xdd7,
+ 0xde0, 0xde5,
+ 0xdf0, 0xdf1,
+ 0xdf5, 0xe00,
+ 0xe3b, 0xe3e,
+ 0xe5c, 0xe80,
+ 0xe83, 0xe83,
+ 0xe85, 0xe86,
+ 0xe89, 0xe89,
+ 0xe8b, 0xe8c,
+ 0xe8e, 0xe93,
+ 0xe98, 0xe98,
+ 0xea0, 0xea0,
+ 0xea4, 0xea4,
+ 0xea6, 0xea6,
+ 0xea8, 0xea9,
+ 0xeac, 0xeac,
+ 0xeba, 0xeba,
+ 0xebe, 0xebf,
+ 0xec5, 0xec5,
+ 0xec7, 0xec7,
+ 0xece, 0xecf,
+ 0xeda, 0xedb,
+ 0xee0, 0xeff,
+ 0xf48, 0xf48,
+ 0xf6d, 0xf70,
+ 0xf98, 0xf98,
+ 0xfbd, 0xfbd,
+ 0xfcd, 0xfcd,
+ 0xfdb, 0xfff,
+ 0x10c6, 0x10c6,
+ 0x10c8, 0x10cc,
+ 0x10ce, 0x10cf,
+ 0x1249, 0x1249,
+ 0x124e, 0x124f,
+ 0x1257, 0x1257,
+ 0x1259, 0x1259,
+ 0x125e, 0x125f,
+ 0x1289, 0x1289,
+ 0x128e, 0x128f,
+ 0x12b1, 0x12b1,
+ 0x12b6, 0x12b7,
+ 0x12bf, 0x12bf,
+ 0x12c1, 0x12c1,
+ 0x12c6, 0x12c7,
+ 0x12d7, 0x12d7,
+ 0x1311, 0x1311,
+ 0x1316, 0x1317,
+ 0x135b, 0x135c,
+ 0x137d, 0x137f,
+ 0x139a, 0x139f,
+ 0x13f5, 0x13ff,
+ 0x169d, 0x169f,
+ 0x16f9, 0x16ff,
+ 0x170d, 0x170d,
+ 0x1715, 0x171f,
+ 0x1737, 0x173f,
+ 0x1754, 0x175f,
+ 0x176d, 0x176d,
+ 0x1771, 0x1771,
+ 0x1774, 0x177f,
+ 0x17de, 0x17df,
+ 0x17ea, 0x17ef,
+ 0x17fa, 0x17ff,
+ 0x180e, 0x180f,
+ 0x181a, 0x181f,
+ 0x1878, 0x187f,
+ 0x18ab, 0x18af,
+ 0x18f6, 0x18ff,
+ 0x191f, 0x191f,
+ 0x192c, 0x192f,
+ 0x193c, 0x193f,
+ 0x1941, 0x1943,
+ 0x196e, 0x196f,
+ 0x1975, 0x197f,
+ 0x19ac, 0x19af,
+ 0x19ca, 0x19cf,
+ 0x19db, 0x19dd,
+ 0x1a1c, 0x1a1d,
+ 0x1a5f, 0x1a5f,
+ 0x1a7d, 0x1a7e,
+ 0x1a8a, 0x1a8f,
+ 0x1a9a, 0x1a9f,
+ 0x1aae, 0x1aaf,
+ 0x1abf, 0x1aff,
+ 0x1b4c, 0x1b4f,
+ 0x1b7d, 0x1b7f,
+ 0x1bf4, 0x1bfb,
+ 0x1c38, 0x1c3a,
+ 0x1c4a, 0x1c4c,
+ 0x1c80, 0x1cbf,
+ 0x1cc8, 0x1ccf,
+ 0x1cf7, 0x1cf7,
+ 0x1cfa, 0x1cff,
+ 0x1df6, 0x1dfb,
+ 0x1f16, 0x1f17,
+ 0x1f1e, 0x1f1f,
+ 0x1f46, 0x1f47,
+ 0x1f4e, 0x1f4f,
+ 0x1f58, 0x1f58,
+ 0x1f5a, 0x1f5a,
+ 0x1f5c, 0x1f5c,
+ 0x1f5e, 0x1f5e,
+ 0x1f7e, 0x1f7f,
+ 0x1fb5, 0x1fb5,
+ 0x1fc5, 0x1fc5,
+ 0x1fd4, 0x1fd5,
+ 0x1fdc, 0x1fdc,
+ 0x1ff0, 0x1ff1,
+ 0x1ff5, 0x1ff5,
+ 0x1fff, 0x1fff,
+ 0x200b, 0x200f,
+ 0x202a, 0x202e,
+ 0x2060, 0x206f,
+ 0x2072, 0x2073,
+ 0x208f, 0x208f,
+ 0x209d, 0x209f,
+ 0x20be, 0x20cf,
+ 0x20f1, 0x20ff,
+ 0x218a, 0x218f,
+ 0x23fb, 0x23ff,
+ 0x2427, 0x243f,
+ 0x244b, 0x245f,
+ 0x2b74, 0x2b75,
+ 0x2b96, 0x2b97,
+ 0x2bba, 0x2bbc,
+ 0x2bc9, 0x2bc9,
+ 0x2bd2, 0x2bff,
+ 0x2c2f, 0x2c2f,
+ 0x2c5f, 0x2c5f,
+ 0x2cf4, 0x2cf8,
+ 0x2d26, 0x2d26,
+ 0x2d28, 0x2d2c,
+ 0x2d2e, 0x2d2f,
+ 0x2d68, 0x2d6e,
+ 0x2d71, 0x2d7e,
+ 0x2d97, 0x2d9f,
+ 0x2da7, 0x2da7,
+ 0x2daf, 0x2daf,
+ 0x2db7, 0x2db7,
+ 0x2dbf, 0x2dbf,
+ 0x2dc7, 0x2dc7,
+ 0x2dcf, 0x2dcf,
+ 0x2dd7, 0x2dd7,
+ 0x2ddf, 0x2ddf,
+ 0x2e43, 0x2e7f,
+ 0x2e9a, 0x2e9a,
+ 0x2ef4, 0x2eff,
+ 0x2fd6, 0x2fef,
+ 0x2ffc, 0x2fff,
+ 0x3040, 0x3040,
+ 0x3097, 0x3098,
+ 0x3100, 0x3104,
+ 0x312e, 0x3130,
+ 0x318f, 0x318f,
+ 0x31bb, 0x31bf,
+ 0x31e4, 0x31ef,
+ 0x321f, 0x321f,
+ 0x32ff, 0x32ff,
+ 0x4db6, 0x4dbf,
+ 0x9fcd, 0x9fff,
+ 0xa48d, 0xa48f,
+ 0xa4c7, 0xa4cf,
+ 0xa62c, 0xa63f,
+ 0xa69e, 0xa69e,
+ 0xa6f8, 0xa6ff,
+ 0xa78f, 0xa78f,
+ 0xa7ae, 0xa7af,
+ 0xa7b2, 0xa7f6,
+ 0xa82c, 0xa82f,
+ 0xa83a, 0xa83f,
+ 0xa878, 0xa87f,
+ 0xa8c5, 0xa8cd,
+ 0xa8da, 0xa8df,
+ 0xa8fc, 0xa8ff,
+ 0xa954, 0xa95e,
+ 0xa97d, 0xa97f,
+ 0xa9ce, 0xa9ce,
+ 0xa9da, 0xa9dd,
+ 0xa9ff, 0xa9ff,
+ 0xaa37, 0xaa3f,
+ 0xaa4e, 0xaa4f,
+ 0xaa5a, 0xaa5b,
+ 0xaac3, 0xaada,
+ 0xaaf7, 0xab00,
+ 0xab07, 0xab08,
+ 0xab0f, 0xab10,
+ 0xab17, 0xab1f,
+ 0xab27, 0xab27,
+ 0xab2f, 0xab2f,
+ 0xab60, 0xab63,
+ 0xab66, 0xabbf,
+ 0xabee, 0xabef,
+ 0xabfa, 0xabff,
+ 0xd7a4, 0xd7af,
+ 0xd7c7, 0xd7ca,
+ 0xd7fc, 0xf8ff,
+ 0xfa6e, 0xfa6f,
+ 0xfada, 0xfaff,
+ 0xfb07, 0xfb12,
+ 0xfb18, 0xfb1c,
+ 0xfb37, 0xfb37,
+ 0xfb3d, 0xfb3d,
+ 0xfb3f, 0xfb3f,
+ 0xfb42, 0xfb42,
+ 0xfb45, 0xfb45,
+ 0xfbc2, 0xfbd2,
+ 0xfd40, 0xfd4f,
+ 0xfd90, 0xfd91,
+ 0xfdc8, 0xfdef,
+ 0xfdfe, 0xfdff,
+ 0xfe1a, 0xfe1f,
+ 0xfe2e, 0xfe2f,
+ 0xfe53, 0xfe53,
+ 0xfe67, 0xfe67,
+ 0xfe6c, 0xfe6f,
+ 0xfe75, 0xfe75,
+ 0xfefd, 0xff00,
+ 0xffbf, 0xffc1,
+ 0xffc8, 0xffc9,
+ 0xffd0, 0xffd1,
+ 0xffd8, 0xffd9,
+ 0xffdd, 0xffdf,
+ 0xffe7, 0xffe7,
+ 0xffef, 0xfffb,
+ 0xfffe, 0xffff,
+ 0x1000c, 0x1000c,
+ 0x10027, 0x10027,
+ 0x1003b, 0x1003b,
+ 0x1003e, 0x1003e,
+ 0x1004e, 0x1004f,
+ 0x1005e, 0x1007f,
+ 0x100fb, 0x100ff,
+ 0x10103, 0x10106,
+ 0x10134, 0x10136,
+ 0x1018d, 0x1018f,
+ 0x1019c, 0x1019f,
+ 0x101a1, 0x101cf,
+ 0x101fe, 0x1027f,
+ 0x1029d, 0x1029f,
+ 0x102d1, 0x102df,
+ 0x102fc, 0x102ff,
+ 0x10324, 0x1032f,
+ 0x1034b, 0x1034f,
+ 0x1037b, 0x1037f,
+ 0x1039e, 0x1039e,
+ 0x103c4, 0x103c7,
+ 0x103d6, 0x103ff,
+ 0x1049e, 0x1049f,
+ 0x104aa, 0x104ff,
+ 0x10528, 0x1052f,
+ 0x10564, 0x1056e,
+ 0x10570, 0x105ff,
+ 0x10737, 0x1073f,
+ 0x10756, 0x1075f,
+ 0x10768, 0x107ff,
+ 0x10806, 0x10807,
+ 0x10809, 0x10809,
+ 0x10836, 0x10836,
+ 0x10839, 0x1083b,
+ 0x1083d, 0x1083e,
+ 0x10856, 0x10856,
+ 0x1089f, 0x108a6,
+ 0x108b0, 0x108ff,
+ 0x1091c, 0x1091e,
+ 0x1093a, 0x1093e,
+ 0x10940, 0x1097f,
+ 0x109b8, 0x109bd,
+ 0x109c0, 0x109ff,
+ 0x10a04, 0x10a04,
+ 0x10a07, 0x10a0b,
+ 0x10a14, 0x10a14,
+ 0x10a18, 0x10a18,
+ 0x10a34, 0x10a37,
+ 0x10a3b, 0x10a3e,
+ 0x10a48, 0x10a4f,
+ 0x10a59, 0x10a5f,
+ 0x10aa0, 0x10abf,
+ 0x10ae7, 0x10aea,
+ 0x10af7, 0x10aff,
+ 0x10b36, 0x10b38,
+ 0x10b56, 0x10b57,
+ 0x10b73, 0x10b77,
+ 0x10b92, 0x10b98,
+ 0x10b9d, 0x10ba8,
+ 0x10bb0, 0x10bff,
+ 0x10c49, 0x10e5f,
+ 0x10e7f, 0x10fff,
+ 0x1104e, 0x11051,
+ 0x11070, 0x1107e,
+ 0x110bd, 0x110bd,
+ 0x110c2, 0x110cf,
+ 0x110e9, 0x110ef,
+ 0x110fa, 0x110ff,
+ 0x11135, 0x11135,
+ 0x11144, 0x1114f,
+ 0x11177, 0x1117f,
+ 0x111c9, 0x111cc,
+ 0x111ce, 0x111cf,
+ 0x111db, 0x111e0,
+ 0x111f5, 0x111ff,
+ 0x11212, 0x11212,
+ 0x1123e, 0x112af,
+ 0x112eb, 0x112ef,
+ 0x112fa, 0x11300,
+ 0x11304, 0x11304,
+ 0x1130d, 0x1130e,
+ 0x11311, 0x11312,
+ 0x11329, 0x11329,
+ 0x11331, 0x11331,
+ 0x11334, 0x11334,
+ 0x1133a, 0x1133b,
+ 0x11345, 0x11346,
+ 0x11349, 0x1134a,
+ 0x1134e, 0x11356,
+ 0x11358, 0x1135c,
+ 0x11364, 0x11365,
+ 0x1136d, 0x1136f,
+ 0x11375, 0x1147f,
+ 0x114c8, 0x114cf,
+ 0x114da, 0x1157f,
+ 0x115b6, 0x115b7,
+ 0x115ca, 0x115ff,
+ 0x11645, 0x1164f,
+ 0x1165a, 0x1167f,
+ 0x116b8, 0x116bf,
+ 0x116ca, 0x1189f,
+ 0x118f3, 0x118fe,
+ 0x11900, 0x11abf,
+ 0x11af9, 0x11fff,
+ 0x12399, 0x123ff,
+ 0x1246f, 0x1246f,
+ 0x12475, 0x12fff,
+ 0x1342f, 0x167ff,
+ 0x16a39, 0x16a3f,
+ 0x16a5f, 0x16a5f,
+ 0x16a6a, 0x16a6d,
+ 0x16a70, 0x16acf,
+ 0x16aee, 0x16aef,
+ 0x16af6, 0x16aff,
+ 0x16b46, 0x16b4f,
+ 0x16b5a, 0x16b5a,
+ 0x16b62, 0x16b62,
+ 0x16b78, 0x16b7c,
+ 0x16b90, 0x16eff,
+ 0x16f45, 0x16f4f,
+ 0x16f7f, 0x16f8e,
+ 0x16fa0, 0x1afff,
+ 0x1b002, 0x1bbff,
+ 0x1bc6b, 0x1bc6f,
+ 0x1bc7d, 0x1bc7f,
+ 0x1bc89, 0x1bc8f,
+ 0x1bc9a, 0x1bc9b,
+ 0x1bca0, 0x1cfff,
+ 0x1d0f6, 0x1d0ff,
+ 0x1d127, 0x1d128,
+ 0x1d173, 0x1d17a,
+ 0x1d1de, 0x1d1ff,
+ 0x1d246, 0x1d2ff,
+ 0x1d357, 0x1d35f,
+ 0x1d372, 0x1d3ff,
+ 0x1d455, 0x1d455,
+ 0x1d49d, 0x1d49d,
+ 0x1d4a0, 0x1d4a1,
+ 0x1d4a3, 0x1d4a4,
+ 0x1d4a7, 0x1d4a8,
+ 0x1d4ad, 0x1d4ad,
+ 0x1d4ba, 0x1d4ba,
+ 0x1d4bc, 0x1d4bc,
+ 0x1d4c4, 0x1d4c4,
+ 0x1d506, 0x1d506,
+ 0x1d50b, 0x1d50c,
+ 0x1d515, 0x1d515,
+ 0x1d51d, 0x1d51d,
+ 0x1d53a, 0x1d53a,
+ 0x1d53f, 0x1d53f,
+ 0x1d545, 0x1d545,
+ 0x1d547, 0x1d549,
+ 0x1d551, 0x1d551,
+ 0x1d6a6, 0x1d6a7,
+ 0x1d7cc, 0x1d7cd,
+ 0x1d800, 0x1e7ff,
+ 0x1e8c5, 0x1e8c6,
+ 0x1e8d7, 0x1edff,
+ 0x1ee04, 0x1ee04,
+ 0x1ee20, 0x1ee20,
+ 0x1ee23, 0x1ee23,
+ 0x1ee25, 0x1ee26,
+ 0x1ee28, 0x1ee28,
+ 0x1ee33, 0x1ee33,
+ 0x1ee38, 0x1ee38,
+ 0x1ee3a, 0x1ee3a,
+ 0x1ee3c, 0x1ee41,
+ 0x1ee43, 0x1ee46,
+ 0x1ee48, 0x1ee48,
+ 0x1ee4a, 0x1ee4a,
+ 0x1ee4c, 0x1ee4c,
+ 0x1ee50, 0x1ee50,
+ 0x1ee53, 0x1ee53,
+ 0x1ee55, 0x1ee56,
+ 0x1ee58, 0x1ee58,
+ 0x1ee5a, 0x1ee5a,
+ 0x1ee5c, 0x1ee5c,
+ 0x1ee5e, 0x1ee5e,
+ 0x1ee60, 0x1ee60,
+ 0x1ee63, 0x1ee63,
+ 0x1ee65, 0x1ee66,
+ 0x1ee6b, 0x1ee6b,
+ 0x1ee73, 0x1ee73,
+ 0x1ee78, 0x1ee78,
+ 0x1ee7d, 0x1ee7d,
+ 0x1ee7f, 0x1ee7f,
+ 0x1ee8a, 0x1ee8a,
+ 0x1ee9c, 0x1eea0,
+ 0x1eea4, 0x1eea4,
+ 0x1eeaa, 0x1eeaa,
+ 0x1eebc, 0x1eeef,
+ 0x1eef2, 0x1efff,
+ 0x1f02c, 0x1f02f,
+ 0x1f094, 0x1f09f,
+ 0x1f0af, 0x1f0b0,
+ 0x1f0c0, 0x1f0c0,
+ 0x1f0d0, 0x1f0d0,
+ 0x1f0f6, 0x1f0ff,
+ 0x1f10d, 0x1f10f,
+ 0x1f12f, 0x1f12f,
+ 0x1f16c, 0x1f16f,
+ 0x1f19b, 0x1f1e5,
+ 0x1f203, 0x1f20f,
+ 0x1f23b, 0x1f23f,
+ 0x1f249, 0x1f24f,
+ 0x1f252, 0x1f2ff,
+ 0x1f32d, 0x1f32f,
+ 0x1f37e, 0x1f37f,
+ 0x1f3cf, 0x1f3d3,
+ 0x1f3f8, 0x1f3ff,
+ 0x1f4ff, 0x1f4ff,
+ 0x1f54b, 0x1f54f,
+ 0x1f57a, 0x1f57a,
+ 0x1f5a4, 0x1f5a4,
+ 0x1f643, 0x1f644,
+ 0x1f6d0, 0x1f6df,
+ 0x1f6ed, 0x1f6ef,
+ 0x1f6f4, 0x1f6ff,
+ 0x1f774, 0x1f77f,
+ 0x1f7d5, 0x1f7ff,
+ 0x1f80c, 0x1f80f,
+ 0x1f848, 0x1f84f,
+ 0x1f85a, 0x1f85f,
+ 0x1f888, 0x1f88f,
+ 0x1f8ae, 0x1ffff,
+ 0x2a6d7, 0x2a6ff,
+ 0x2b735, 0x2b73f,
+ 0x2b81e, 0x2f7ff,
+ 0x2fa1e, 0xe00ff,
+ 0xe01f0, 0x10ffff
+};
+UCP_FN(C)
+
+static const unichar ucp_Cc_def[] = {
+ 0x0, 0x1f,
+ 0x7f, 0x9f
+};
+UCP_FN(Cc)
+
+static const unichar ucp_Cf_def[] = {
+ 0xad, 0xad,
+ 0x600, 0x605,
+ 0x61c, 0x61c,
+ 0x6dd, 0x6dd,
+ 0x70f, 0x70f,
+ 0x180e, 0x180e,
+ 0x200b, 0x200f,
+ 0x202a, 0x202e,
+ 0x2060, 0x2064,
+ 0x2066, 0x206f,
+ 0xfeff, 0xfeff,
+ 0xfff9, 0xfffb,
+ 0x110bd, 0x110bd,
+ 0x1bca0, 0x1bca3,
+ 0x1d173, 0x1d17a,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f
+};
+UCP_FN(Cf)
+
+static const unichar ucp_Cn_def[] = {
+ 0x378, 0x379,
+ 0x380, 0x383,
+ 0x38b, 0x38b,
+ 0x38d, 0x38d,
+ 0x3a2, 0x3a2,
+ 0x530, 0x530,
+ 0x557, 0x558,
+ 0x560, 0x560,
+ 0x588, 0x588,
+ 0x58b, 0x58c,
+ 0x590, 0x590,
+ 0x5c8, 0x5cf,
+ 0x5eb, 0x5ef,
+ 0x5f5, 0x5ff,
+ 0x61d, 0x61d,
+ 0x70e, 0x70e,
+ 0x74b, 0x74c,
+ 0x7b2, 0x7bf,
+ 0x7fb, 0x7ff,
+ 0x82e, 0x82f,
+ 0x83f, 0x83f,
+ 0x85c, 0x85d,
+ 0x85f, 0x89f,
+ 0x8b3, 0x8e3,
+ 0x984, 0x984,
+ 0x98d, 0x98e,
+ 0x991, 0x992,
+ 0x9a9, 0x9a9,
+ 0x9b1, 0x9b1,
+ 0x9b3, 0x9b5,
+ 0x9ba, 0x9bb,
+ 0x9c5, 0x9c6,
+ 0x9c9, 0x9ca,
+ 0x9cf, 0x9d6,
+ 0x9d8, 0x9db,
+ 0x9de, 0x9de,
+ 0x9e4, 0x9e5,
+ 0x9fc, 0xa00,
+ 0xa04, 0xa04,
+ 0xa0b, 0xa0e,
+ 0xa11, 0xa12,
+ 0xa29, 0xa29,
+ 0xa31, 0xa31,
+ 0xa34, 0xa34,
+ 0xa37, 0xa37,
+ 0xa3a, 0xa3b,
+ 0xa3d, 0xa3d,
+ 0xa43, 0xa46,
+ 0xa49, 0xa4a,
+ 0xa4e, 0xa50,
+ 0xa52, 0xa58,
+ 0xa5d, 0xa5d,
+ 0xa5f, 0xa65,
+ 0xa76, 0xa80,
+ 0xa84, 0xa84,
+ 0xa8e, 0xa8e,
+ 0xa92, 0xa92,
+ 0xaa9, 0xaa9,
+ 0xab1, 0xab1,
+ 0xab4, 0xab4,
+ 0xaba, 0xabb,
+ 0xac6, 0xac6,
+ 0xaca, 0xaca,
+ 0xace, 0xacf,
+ 0xad1, 0xadf,
+ 0xae4, 0xae5,
+ 0xaf2, 0xb00,
+ 0xb04, 0xb04,
+ 0xb0d, 0xb0e,
+ 0xb11, 0xb12,
+ 0xb29, 0xb29,
+ 0xb31, 0xb31,
+ 0xb34, 0xb34,
+ 0xb3a, 0xb3b,
+ 0xb45, 0xb46,
+ 0xb49, 0xb4a,
+ 0xb4e, 0xb55,
+ 0xb58, 0xb5b,
+ 0xb5e, 0xb5e,
+ 0xb64, 0xb65,
+ 0xb78, 0xb81,
+ 0xb84, 0xb84,
+ 0xb8b, 0xb8d,
+ 0xb91, 0xb91,
+ 0xb96, 0xb98,
+ 0xb9b, 0xb9b,
+ 0xb9d, 0xb9d,
+ 0xba0, 0xba2,
+ 0xba5, 0xba7,
+ 0xbab, 0xbad,
+ 0xbba, 0xbbd,
+ 0xbc3, 0xbc5,
+ 0xbc9, 0xbc9,
+ 0xbce, 0xbcf,
+ 0xbd1, 0xbd6,
+ 0xbd8, 0xbe5,
+ 0xbfb, 0xbff,
+ 0xc04, 0xc04,
+ 0xc0d, 0xc0d,
+ 0xc11, 0xc11,
+ 0xc29, 0xc29,
+ 0xc3a, 0xc3c,
+ 0xc45, 0xc45,
+ 0xc49, 0xc49,
+ 0xc4e, 0xc54,
+ 0xc57, 0xc57,
+ 0xc5a, 0xc5f,
+ 0xc64, 0xc65,
+ 0xc70, 0xc77,
+ 0xc80, 0xc80,
+ 0xc84, 0xc84,
+ 0xc8d, 0xc8d,
+ 0xc91, 0xc91,
+ 0xca9, 0xca9,
+ 0xcb4, 0xcb4,
+ 0xcba, 0xcbb,
+ 0xcc5, 0xcc5,
+ 0xcc9, 0xcc9,
+ 0xcce, 0xcd4,
+ 0xcd7, 0xcdd,
+ 0xcdf, 0xcdf,
+ 0xce4, 0xce5,
+ 0xcf0, 0xcf0,
+ 0xcf3, 0xd00,
+ 0xd04, 0xd04,
+ 0xd0d, 0xd0d,
+ 0xd11, 0xd11,
+ 0xd3b, 0xd3c,
+ 0xd45, 0xd45,
+ 0xd49, 0xd49,
+ 0xd4f, 0xd56,
+ 0xd58, 0xd5f,
+ 0xd64, 0xd65,
+ 0xd76, 0xd78,
+ 0xd80, 0xd81,
+ 0xd84, 0xd84,
+ 0xd97, 0xd99,
+ 0xdb2, 0xdb2,
+ 0xdbc, 0xdbc,
+ 0xdbe, 0xdbf,
+ 0xdc7, 0xdc9,
+ 0xdcb, 0xdce,
+ 0xdd5, 0xdd5,
+ 0xdd7, 0xdd7,
+ 0xde0, 0xde5,
+ 0xdf0, 0xdf1,
+ 0xdf5, 0xe00,
+ 0xe3b, 0xe3e,
+ 0xe5c, 0xe80,
+ 0xe83, 0xe83,
+ 0xe85, 0xe86,
+ 0xe89, 0xe89,
+ 0xe8b, 0xe8c,
+ 0xe8e, 0xe93,
+ 0xe98, 0xe98,
+ 0xea0, 0xea0,
+ 0xea4, 0xea4,
+ 0xea6, 0xea6,
+ 0xea8, 0xea9,
+ 0xeac, 0xeac,
+ 0xeba, 0xeba,
+ 0xebe, 0xebf,
+ 0xec5, 0xec5,
+ 0xec7, 0xec7,
+ 0xece, 0xecf,
+ 0xeda, 0xedb,
+ 0xee0, 0xeff,
+ 0xf48, 0xf48,
+ 0xf6d, 0xf70,
+ 0xf98, 0xf98,
+ 0xfbd, 0xfbd,
+ 0xfcd, 0xfcd,
+ 0xfdb, 0xfff,
+ 0x10c6, 0x10c6,
+ 0x10c8, 0x10cc,
+ 0x10ce, 0x10cf,
+ 0x1249, 0x1249,
+ 0x124e, 0x124f,
+ 0x1257, 0x1257,
+ 0x1259, 0x1259,
+ 0x125e, 0x125f,
+ 0x1289, 0x1289,
+ 0x128e, 0x128f,
+ 0x12b1, 0x12b1,
+ 0x12b6, 0x12b7,
+ 0x12bf, 0x12bf,
+ 0x12c1, 0x12c1,
+ 0x12c6, 0x12c7,
+ 0x12d7, 0x12d7,
+ 0x1311, 0x1311,
+ 0x1316, 0x1317,
+ 0x135b, 0x135c,
+ 0x137d, 0x137f,
+ 0x139a, 0x139f,
+ 0x13f5, 0x13ff,
+ 0x169d, 0x169f,
+ 0x16f9, 0x16ff,
+ 0x170d, 0x170d,
+ 0x1715, 0x171f,
+ 0x1737, 0x173f,
+ 0x1754, 0x175f,
+ 0x176d, 0x176d,
+ 0x1771, 0x1771,
+ 0x1774, 0x177f,
+ 0x17de, 0x17df,
+ 0x17ea, 0x17ef,
+ 0x17fa, 0x17ff,
+ 0x180f, 0x180f,
+ 0x181a, 0x181f,
+ 0x1878, 0x187f,
+ 0x18ab, 0x18af,
+ 0x18f6, 0x18ff,
+ 0x191f, 0x191f,
+ 0x192c, 0x192f,
+ 0x193c, 0x193f,
+ 0x1941, 0x1943,
+ 0x196e, 0x196f,
+ 0x1975, 0x197f,
+ 0x19ac, 0x19af,
+ 0x19ca, 0x19cf,
+ 0x19db, 0x19dd,
+ 0x1a1c, 0x1a1d,
+ 0x1a5f, 0x1a5f,
+ 0x1a7d, 0x1a7e,
+ 0x1a8a, 0x1a8f,
+ 0x1a9a, 0x1a9f,
+ 0x1aae, 0x1aaf,
+ 0x1abf, 0x1aff,
+ 0x1b4c, 0x1b4f,
+ 0x1b7d, 0x1b7f,
+ 0x1bf4, 0x1bfb,
+ 0x1c38, 0x1c3a,
+ 0x1c4a, 0x1c4c,
+ 0x1c80, 0x1cbf,
+ 0x1cc8, 0x1ccf,
+ 0x1cf7, 0x1cf7,
+ 0x1cfa, 0x1cff,
+ 0x1df6, 0x1dfb,
+ 0x1f16, 0x1f17,
+ 0x1f1e, 0x1f1f,
+ 0x1f46, 0x1f47,
+ 0x1f4e, 0x1f4f,
+ 0x1f58, 0x1f58,
+ 0x1f5a, 0x1f5a,
+ 0x1f5c, 0x1f5c,
+ 0x1f5e, 0x1f5e,
+ 0x1f7e, 0x1f7f,
+ 0x1fb5, 0x1fb5,
+ 0x1fc5, 0x1fc5,
+ 0x1fd4, 0x1fd5,
+ 0x1fdc, 0x1fdc,
+ 0x1ff0, 0x1ff1,
+ 0x1ff5, 0x1ff5,
+ 0x1fff, 0x1fff,
+ 0x2065, 0x2065,
+ 0x2072, 0x2073,
+ 0x208f, 0x208f,
+ 0x209d, 0x209f,
+ 0x20be, 0x20cf,
+ 0x20f1, 0x20ff,
+ 0x218a, 0x218f,
+ 0x23fb, 0x23ff,
+ 0x2427, 0x243f,
+ 0x244b, 0x245f,
+ 0x2b74, 0x2b75,
+ 0x2b96, 0x2b97,
+ 0x2bba, 0x2bbc,
+ 0x2bc9, 0x2bc9,
+ 0x2bd2, 0x2bff,
+ 0x2c2f, 0x2c2f,
+ 0x2c5f, 0x2c5f,
+ 0x2cf4, 0x2cf8,
+ 0x2d26, 0x2d26,
+ 0x2d28, 0x2d2c,
+ 0x2d2e, 0x2d2f,
+ 0x2d68, 0x2d6e,
+ 0x2d71, 0x2d7e,
+ 0x2d97, 0x2d9f,
+ 0x2da7, 0x2da7,
+ 0x2daf, 0x2daf,
+ 0x2db7, 0x2db7,
+ 0x2dbf, 0x2dbf,
+ 0x2dc7, 0x2dc7,
+ 0x2dcf, 0x2dcf,
+ 0x2dd7, 0x2dd7,
+ 0x2ddf, 0x2ddf,
+ 0x2e43, 0x2e7f,
+ 0x2e9a, 0x2e9a,
+ 0x2ef4, 0x2eff,
+ 0x2fd6, 0x2fef,
+ 0x2ffc, 0x2fff,
+ 0x3040, 0x3040,
+ 0x3097, 0x3098,
+ 0x3100, 0x3104,
+ 0x312e, 0x3130,
+ 0x318f, 0x318f,
+ 0x31bb, 0x31bf,
+ 0x31e4, 0x31ef,
+ 0x321f, 0x321f,
+ 0x32ff, 0x32ff,
+ 0x4db6, 0x4dbf,
+ 0x9fcd, 0x9fff,
+ 0xa48d, 0xa48f,
+ 0xa4c7, 0xa4cf,
+ 0xa62c, 0xa63f,
+ 0xa69e, 0xa69e,
+ 0xa6f8, 0xa6ff,
+ 0xa78f, 0xa78f,
+ 0xa7ae, 0xa7af,
+ 0xa7b2, 0xa7f6,
+ 0xa82c, 0xa82f,
+ 0xa83a, 0xa83f,
+ 0xa878, 0xa87f,
+ 0xa8c5, 0xa8cd,
+ 0xa8da, 0xa8df,
+ 0xa8fc, 0xa8ff,
+ 0xa954, 0xa95e,
+ 0xa97d, 0xa97f,
+ 0xa9ce, 0xa9ce,
+ 0xa9da, 0xa9dd,
+ 0xa9ff, 0xa9ff,
+ 0xaa37, 0xaa3f,
+ 0xaa4e, 0xaa4f,
+ 0xaa5a, 0xaa5b,
+ 0xaac3, 0xaada,
+ 0xaaf7, 0xab00,
+ 0xab07, 0xab08,
+ 0xab0f, 0xab10,
+ 0xab17, 0xab1f,
+ 0xab27, 0xab27,
+ 0xab2f, 0xab2f,
+ 0xab60, 0xab63,
+ 0xab66, 0xabbf,
+ 0xabee, 0xabef,
+ 0xabfa, 0xabff,
+ 0xd7a4, 0xd7af,
+ 0xd7c7, 0xd7ca,
+ 0xd7fc, 0xd7ff,
+ 0xfa6e, 0xfa6f,
+ 0xfada, 0xfaff,
+ 0xfb07, 0xfb12,
+ 0xfb18, 0xfb1c,
+ 0xfb37, 0xfb37,
+ 0xfb3d, 0xfb3d,
+ 0xfb3f, 0xfb3f,
+ 0xfb42, 0xfb42,
+ 0xfb45, 0xfb45,
+ 0xfbc2, 0xfbd2,
+ 0xfd40, 0xfd4f,
+ 0xfd90, 0xfd91,
+ 0xfdc8, 0xfdef,
+ 0xfdfe, 0xfdff,
+ 0xfe1a, 0xfe1f,
+ 0xfe2e, 0xfe2f,
+ 0xfe53, 0xfe53,
+ 0xfe67, 0xfe67,
+ 0xfe6c, 0xfe6f,
+ 0xfe75, 0xfe75,
+ 0xfefd, 0xfefe,
+ 0xff00, 0xff00,
+ 0xffbf, 0xffc1,
+ 0xffc8, 0xffc9,
+ 0xffd0, 0xffd1,
+ 0xffd8, 0xffd9,
+ 0xffdd, 0xffdf,
+ 0xffe7, 0xffe7,
+ 0xffef, 0xfff8,
+ 0xfffe, 0xffff,
+ 0x1000c, 0x1000c,
+ 0x10027, 0x10027,
+ 0x1003b, 0x1003b,
+ 0x1003e, 0x1003e,
+ 0x1004e, 0x1004f,
+ 0x1005e, 0x1007f,
+ 0x100fb, 0x100ff,
+ 0x10103, 0x10106,
+ 0x10134, 0x10136,
+ 0x1018d, 0x1018f,
+ 0x1019c, 0x1019f,
+ 0x101a1, 0x101cf,
+ 0x101fe, 0x1027f,
+ 0x1029d, 0x1029f,
+ 0x102d1, 0x102df,
+ 0x102fc, 0x102ff,
+ 0x10324, 0x1032f,
+ 0x1034b, 0x1034f,
+ 0x1037b, 0x1037f,
+ 0x1039e, 0x1039e,
+ 0x103c4, 0x103c7,
+ 0x103d6, 0x103ff,
+ 0x1049e, 0x1049f,
+ 0x104aa, 0x104ff,
+ 0x10528, 0x1052f,
+ 0x10564, 0x1056e,
+ 0x10570, 0x105ff,
+ 0x10737, 0x1073f,
+ 0x10756, 0x1075f,
+ 0x10768, 0x107ff,
+ 0x10806, 0x10807,
+ 0x10809, 0x10809,
+ 0x10836, 0x10836,
+ 0x10839, 0x1083b,
+ 0x1083d, 0x1083e,
+ 0x10856, 0x10856,
+ 0x1089f, 0x108a6,
+ 0x108b0, 0x108ff,
+ 0x1091c, 0x1091e,
+ 0x1093a, 0x1093e,
+ 0x10940, 0x1097f,
+ 0x109b8, 0x109bd,
+ 0x109c0, 0x109ff,
+ 0x10a04, 0x10a04,
+ 0x10a07, 0x10a0b,
+ 0x10a14, 0x10a14,
+ 0x10a18, 0x10a18,
+ 0x10a34, 0x10a37,
+ 0x10a3b, 0x10a3e,
+ 0x10a48, 0x10a4f,
+ 0x10a59, 0x10a5f,
+ 0x10aa0, 0x10abf,
+ 0x10ae7, 0x10aea,
+ 0x10af7, 0x10aff,
+ 0x10b36, 0x10b38,
+ 0x10b56, 0x10b57,
+ 0x10b73, 0x10b77,
+ 0x10b92, 0x10b98,
+ 0x10b9d, 0x10ba8,
+ 0x10bb0, 0x10bff,
+ 0x10c49, 0x10e5f,
+ 0x10e7f, 0x10fff,
+ 0x1104e, 0x11051,
+ 0x11070, 0x1107e,
+ 0x110c2, 0x110cf,
+ 0x110e9, 0x110ef,
+ 0x110fa, 0x110ff,
+ 0x11135, 0x11135,
+ 0x11144, 0x1114f,
+ 0x11177, 0x1117f,
+ 0x111c9, 0x111cc,
+ 0x111ce, 0x111cf,
+ 0x111db, 0x111e0,
+ 0x111f5, 0x111ff,
+ 0x11212, 0x11212,
+ 0x1123e, 0x112af,
+ 0x112eb, 0x112ef,
+ 0x112fa, 0x11300,
+ 0x11304, 0x11304,
+ 0x1130d, 0x1130e,
+ 0x11311, 0x11312,
+ 0x11329, 0x11329,
+ 0x11331, 0x11331,
+ 0x11334, 0x11334,
+ 0x1133a, 0x1133b,
+ 0x11345, 0x11346,
+ 0x11349, 0x1134a,
+ 0x1134e, 0x11356,
+ 0x11358, 0x1135c,
+ 0x11364, 0x11365,
+ 0x1136d, 0x1136f,
+ 0x11375, 0x1147f,
+ 0x114c8, 0x114cf,
+ 0x114da, 0x1157f,
+ 0x115b6, 0x115b7,
+ 0x115ca, 0x115ff,
+ 0x11645, 0x1164f,
+ 0x1165a, 0x1167f,
+ 0x116b8, 0x116bf,
+ 0x116ca, 0x1189f,
+ 0x118f3, 0x118fe,
+ 0x11900, 0x11abf,
+ 0x11af9, 0x11fff,
+ 0x12399, 0x123ff,
+ 0x1246f, 0x1246f,
+ 0x12475, 0x12fff,
+ 0x1342f, 0x167ff,
+ 0x16a39, 0x16a3f,
+ 0x16a5f, 0x16a5f,
+ 0x16a6a, 0x16a6d,
+ 0x16a70, 0x16acf,
+ 0x16aee, 0x16aef,
+ 0x16af6, 0x16aff,
+ 0x16b46, 0x16b4f,
+ 0x16b5a, 0x16b5a,
+ 0x16b62, 0x16b62,
+ 0x16b78, 0x16b7c,
+ 0x16b90, 0x16eff,
+ 0x16f45, 0x16f4f,
+ 0x16f7f, 0x16f8e,
+ 0x16fa0, 0x1afff,
+ 0x1b002, 0x1bbff,
+ 0x1bc6b, 0x1bc6f,
+ 0x1bc7d, 0x1bc7f,
+ 0x1bc89, 0x1bc8f,
+ 0x1bc9a, 0x1bc9b,
+ 0x1bca4, 0x1cfff,
+ 0x1d0f6, 0x1d0ff,
+ 0x1d127, 0x1d128,
+ 0x1d1de, 0x1d1ff,
+ 0x1d246, 0x1d2ff,
+ 0x1d357, 0x1d35f,
+ 0x1d372, 0x1d3ff,
+ 0x1d455, 0x1d455,
+ 0x1d49d, 0x1d49d,
+ 0x1d4a0, 0x1d4a1,
+ 0x1d4a3, 0x1d4a4,
+ 0x1d4a7, 0x1d4a8,
+ 0x1d4ad, 0x1d4ad,
+ 0x1d4ba, 0x1d4ba,
+ 0x1d4bc, 0x1d4bc,
+ 0x1d4c4, 0x1d4c4,
+ 0x1d506, 0x1d506,
+ 0x1d50b, 0x1d50c,
+ 0x1d515, 0x1d515,
+ 0x1d51d, 0x1d51d,
+ 0x1d53a, 0x1d53a,
+ 0x1d53f, 0x1d53f,
+ 0x1d545, 0x1d545,
+ 0x1d547, 0x1d549,
+ 0x1d551, 0x1d551,
+ 0x1d6a6, 0x1d6a7,
+ 0x1d7cc, 0x1d7cd,
+ 0x1d800, 0x1e7ff,
+ 0x1e8c5, 0x1e8c6,
+ 0x1e8d7, 0x1edff,
+ 0x1ee04, 0x1ee04,
+ 0x1ee20, 0x1ee20,
+ 0x1ee23, 0x1ee23,
+ 0x1ee25, 0x1ee26,
+ 0x1ee28, 0x1ee28,
+ 0x1ee33, 0x1ee33,
+ 0x1ee38, 0x1ee38,
+ 0x1ee3a, 0x1ee3a,
+ 0x1ee3c, 0x1ee41,
+ 0x1ee43, 0x1ee46,
+ 0x1ee48, 0x1ee48,
+ 0x1ee4a, 0x1ee4a,
+ 0x1ee4c, 0x1ee4c,
+ 0x1ee50, 0x1ee50,
+ 0x1ee53, 0x1ee53,
+ 0x1ee55, 0x1ee56,
+ 0x1ee58, 0x1ee58,
+ 0x1ee5a, 0x1ee5a,
+ 0x1ee5c, 0x1ee5c,
+ 0x1ee5e, 0x1ee5e,
+ 0x1ee60, 0x1ee60,
+ 0x1ee63, 0x1ee63,
+ 0x1ee65, 0x1ee66,
+ 0x1ee6b, 0x1ee6b,
+ 0x1ee73, 0x1ee73,
+ 0x1ee78, 0x1ee78,
+ 0x1ee7d, 0x1ee7d,
+ 0x1ee7f, 0x1ee7f,
+ 0x1ee8a, 0x1ee8a,
+ 0x1ee9c, 0x1eea0,
+ 0x1eea4, 0x1eea4,
+ 0x1eeaa, 0x1eeaa,
+ 0x1eebc, 0x1eeef,
+ 0x1eef2, 0x1efff,
+ 0x1f02c, 0x1f02f,
+ 0x1f094, 0x1f09f,
+ 0x1f0af, 0x1f0b0,
+ 0x1f0c0, 0x1f0c0,
+ 0x1f0d0, 0x1f0d0,
+ 0x1f0f6, 0x1f0ff,
+ 0x1f10d, 0x1f10f,
+ 0x1f12f, 0x1f12f,
+ 0x1f16c, 0x1f16f,
+ 0x1f19b, 0x1f1e5,
+ 0x1f203, 0x1f20f,
+ 0x1f23b, 0x1f23f,
+ 0x1f249, 0x1f24f,
+ 0x1f252, 0x1f2ff,
+ 0x1f32d, 0x1f32f,
+ 0x1f37e, 0x1f37f,
+ 0x1f3cf, 0x1f3d3,
+ 0x1f3f8, 0x1f3ff,
+ 0x1f4ff, 0x1f4ff,
+ 0x1f54b, 0x1f54f,
+ 0x1f57a, 0x1f57a,
+ 0x1f5a4, 0x1f5a4,
+ 0x1f643, 0x1f644,
+ 0x1f6d0, 0x1f6df,
+ 0x1f6ed, 0x1f6ef,
+ 0x1f6f4, 0x1f6ff,
+ 0x1f774, 0x1f77f,
+ 0x1f7d5, 0x1f7ff,
+ 0x1f80c, 0x1f80f,
+ 0x1f848, 0x1f84f,
+ 0x1f85a, 0x1f85f,
+ 0x1f888, 0x1f88f,
+ 0x1f8ae, 0x1ffff,
+ 0x2a6d7, 0x2a6ff,
+ 0x2b735, 0x2b73f,
+ 0x2b81e, 0x2f7ff,
+ 0x2fa1e, 0xe0000,
+ 0xe0002, 0xe001f,
+ 0xe0080, 0xe00ff,
+ 0xe01f0, 0xeffff,
+ 0xffffe, 0xfffff,
+ 0x10fffe, 0x10ffff
+};
+UCP_FN(Cn)
+
+static const unichar ucp_Co_def[] = {
+ 0xe000, 0xf8ff,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+};
+UCP_FN(Co)
+
+static const unichar ucp_Cs_def[] = {
+ 0xd800, 0xdfff
+};
+UCP_FN(Cs)
+
+static const unichar ucp_L_def[] = {
+ 0x41, 0x5a,
+ 0x61, 0x7a,
+ 0xaa, 0xaa,
+ 0xb5, 0xb5,
+ 0xba, 0xba,
+ 0xc0, 0xd6,
+ 0xd8, 0xf6,
+ 0xf8, 0x2c1,
+ 0x2c6, 0x2d1,
+ 0x2e0, 0x2e4,
+ 0x2ec, 0x2ec,
+ 0x2ee, 0x2ee,
+ 0x370, 0x374,
+ 0x376, 0x377,
+ 0x37a, 0x37d,
+ 0x37f, 0x37f,
+ 0x386, 0x386,
+ 0x388, 0x38a,
+ 0x38c, 0x38c,
+ 0x38e, 0x3a1,
+ 0x3a3, 0x3f5,
+ 0x3f7, 0x481,
+ 0x48a, 0x52f,
+ 0x531, 0x556,
+ 0x559, 0x559,
+ 0x561, 0x587,
+ 0x5d0, 0x5ea,
+ 0x5f0, 0x5f2,
+ 0x620, 0x64a,
+ 0x66e, 0x66f,
+ 0x671, 0x6d3,
+ 0x6d5, 0x6d5,
+ 0x6e5, 0x6e6,
+ 0x6ee, 0x6ef,
+ 0x6fa, 0x6fc,
+ 0x6ff, 0x6ff,
+ 0x710, 0x710,
+ 0x712, 0x72f,
+ 0x74d, 0x7a5,
+ 0x7b1, 0x7b1,
+ 0x7ca, 0x7ea,
+ 0x7f4, 0x7f5,
+ 0x7fa, 0x7fa,
+ 0x800, 0x815,
+ 0x81a, 0x81a,
+ 0x824, 0x824,
+ 0x828, 0x828,
+ 0x840, 0x858,
+ 0x8a0, 0x8b2,
+ 0x904, 0x939,
+ 0x93d, 0x93d,
+ 0x950, 0x950,
+ 0x958, 0x961,
+ 0x971, 0x980,
+ 0x985, 0x98c,
+ 0x98f, 0x990,
+ 0x993, 0x9a8,
+ 0x9aa, 0x9b0,
+ 0x9b2, 0x9b2,
+ 0x9b6, 0x9b9,
+ 0x9bd, 0x9bd,
+ 0x9ce, 0x9ce,
+ 0x9dc, 0x9dd,
+ 0x9df, 0x9e1,
+ 0x9f0, 0x9f1,
+ 0xa05, 0xa0a,
+ 0xa0f, 0xa10,
+ 0xa13, 0xa28,
+ 0xa2a, 0xa30,
+ 0xa32, 0xa33,
+ 0xa35, 0xa36,
+ 0xa38, 0xa39,
+ 0xa59, 0xa5c,
+ 0xa5e, 0xa5e,
+ 0xa72, 0xa74,
+ 0xa85, 0xa8d,
+ 0xa8f, 0xa91,
+ 0xa93, 0xaa8,
+ 0xaaa, 0xab0,
+ 0xab2, 0xab3,
+ 0xab5, 0xab9,
+ 0xabd, 0xabd,
+ 0xad0, 0xad0,
+ 0xae0, 0xae1,
+ 0xb05, 0xb0c,
+ 0xb0f, 0xb10,
+ 0xb13, 0xb28,
+ 0xb2a, 0xb30,
+ 0xb32, 0xb33,
+ 0xb35, 0xb39,
+ 0xb3d, 0xb3d,
+ 0xb5c, 0xb5d,
+ 0xb5f, 0xb61,
+ 0xb71, 0xb71,
+ 0xb83, 0xb83,
+ 0xb85, 0xb8a,
+ 0xb8e, 0xb90,
+ 0xb92, 0xb95,
+ 0xb99, 0xb9a,
+ 0xb9c, 0xb9c,
+ 0xb9e, 0xb9f,
+ 0xba3, 0xba4,
+ 0xba8, 0xbaa,
+ 0xbae, 0xbb9,
+ 0xbd0, 0xbd0,
+ 0xc05, 0xc0c,
+ 0xc0e, 0xc10,
+ 0xc12, 0xc28,
+ 0xc2a, 0xc39,
+ 0xc3d, 0xc3d,
+ 0xc58, 0xc59,
+ 0xc60, 0xc61,
+ 0xc85, 0xc8c,
+ 0xc8e, 0xc90,
+ 0xc92, 0xca8,
+ 0xcaa, 0xcb3,
+ 0xcb5, 0xcb9,
+ 0xcbd, 0xcbd,
+ 0xcde, 0xcde,
+ 0xce0, 0xce1,
+ 0xcf1, 0xcf2,
+ 0xd05, 0xd0c,
+ 0xd0e, 0xd10,
+ 0xd12, 0xd3a,
+ 0xd3d, 0xd3d,
+ 0xd4e, 0xd4e,
+ 0xd60, 0xd61,
+ 0xd7a, 0xd7f,
+ 0xd85, 0xd96,
+ 0xd9a, 0xdb1,
+ 0xdb3, 0xdbb,
+ 0xdbd, 0xdbd,
+ 0xdc0, 0xdc6,
+ 0xe01, 0xe30,
+ 0xe32, 0xe33,
+ 0xe40, 0xe46,
+ 0xe81, 0xe82,
+ 0xe84, 0xe84,
+ 0xe87, 0xe88,
+ 0xe8a, 0xe8a,
+ 0xe8d, 0xe8d,
+ 0xe94, 0xe97,
+ 0xe99, 0xe9f,
+ 0xea1, 0xea3,
+ 0xea5, 0xea5,
+ 0xea7, 0xea7,
+ 0xeaa, 0xeab,
+ 0xead, 0xeb0,
+ 0xeb2, 0xeb3,
+ 0xebd, 0xebd,
+ 0xec0, 0xec4,
+ 0xec6, 0xec6,
+ 0xedc, 0xedf,
+ 0xf00, 0xf00,
+ 0xf40, 0xf47,
+ 0xf49, 0xf6c,
+ 0xf88, 0xf8c,
+ 0x1000, 0x102a,
+ 0x103f, 0x103f,
+ 0x1050, 0x1055,
+ 0x105a, 0x105d,
+ 0x1061, 0x1061,
+ 0x1065, 0x1066,
+ 0x106e, 0x1070,
+ 0x1075, 0x1081,
+ 0x108e, 0x108e,
+ 0x10a0, 0x10c5,
+ 0x10c7, 0x10c7,
+ 0x10cd, 0x10cd,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x167f,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16f1, 0x16f8,
+ 0x1700, 0x170c,
+ 0x170e, 0x1711,
+ 0x1720, 0x1731,
+ 0x1740, 0x1751,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1780, 0x17b3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dc,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a8,
+ 0x18aa, 0x18aa,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191e,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19c1, 0x19c7,
+ 0x1a00, 0x1a16,
+ 0x1a20, 0x1a54,
+ 0x1aa7, 0x1aa7,
+ 0x1b05, 0x1b33,
+ 0x1b45, 0x1b4b,
+ 0x1b83, 0x1ba0,
+ 0x1bae, 0x1baf,
+ 0x1bba, 0x1be5,
+ 0x1c00, 0x1c23,
+ 0x1c4d, 0x1c4f,
+ 0x1c5a, 0x1c7d,
+ 0x1ce9, 0x1cec,
+ 0x1cee, 0x1cf1,
+ 0x1cf5, 0x1cf6,
+ 0x1d00, 0x1dbf,
+ 0x1e00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x209c,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x214e, 0x214e,
+ 0x2183, 0x2184,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2ce4,
+ 0x2ceb, 0x2cee,
+ 0x2cf2, 0x2cf3,
+ 0x2d00, 0x2d25,
+ 0x2d27, 0x2d27,
+ 0x2d2d, 0x2d2d,
+ 0x2d30, 0x2d67,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2e2f, 0x2e2f,
+ 0x3005, 0x3006,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31ba,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcc,
+ 0xa000, 0xa48c,
+ 0xa4d0, 0xa4fd,
+ 0xa500, 0xa60c,
+ 0xa610, 0xa61f,
+ 0xa62a, 0xa62b,
+ 0xa640, 0xa66e,
+ 0xa67f, 0xa69d,
+ 0xa6a0, 0xa6e5,
+ 0xa717, 0xa71f,
+ 0xa722, 0xa788,
+ 0xa78b, 0xa78e,
+ 0xa790, 0xa7ad,
+ 0xa7b0, 0xa7b1,
+ 0xa7f7, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa822,
+ 0xa840, 0xa873,
+ 0xa882, 0xa8b3,
+ 0xa8f2, 0xa8f7,
+ 0xa8fb, 0xa8fb,
+ 0xa90a, 0xa925,
+ 0xa930, 0xa946,
+ 0xa960, 0xa97c,
+ 0xa984, 0xa9b2,
+ 0xa9cf, 0xa9cf,
+ 0xa9e0, 0xa9e4,
+ 0xa9e6, 0xa9ef,
+ 0xa9fa, 0xa9fe,
+ 0xaa00, 0xaa28,
+ 0xaa40, 0xaa42,
+ 0xaa44, 0xaa4b,
+ 0xaa60, 0xaa76,
+ 0xaa7a, 0xaa7a,
+ 0xaa7e, 0xaaaf,
+ 0xaab1, 0xaab1,
+ 0xaab5, 0xaab6,
+ 0xaab9, 0xaabd,
+ 0xaac0, 0xaac0,
+ 0xaac2, 0xaac2,
+ 0xaadb, 0xaadd,
+ 0xaae0, 0xaaea,
+ 0xaaf2, 0xaaf4,
+ 0xab01, 0xab06,
+ 0xab09, 0xab0e,
+ 0xab11, 0xab16,
+ 0xab20, 0xab26,
+ 0xab28, 0xab2e,
+ 0xab30, 0xab5a,
+ 0xab5c, 0xab5f,
+ 0xab64, 0xab65,
+ 0xabc0, 0xabe2,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xf900, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb1d,
+ 0xfb1f, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x10300, 0x1031f,
+ 0x10330, 0x10340,
+ 0x10342, 0x10349,
+ 0x10350, 0x10375,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x10400, 0x1049d,
+ 0x10500, 0x10527,
+ 0x10530, 0x10563,
+ 0x10600, 0x10736,
+ 0x10740, 0x10755,
+ 0x10760, 0x10767,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x10855,
+ 0x10860, 0x10876,
+ 0x10880, 0x1089e,
+ 0x10900, 0x10915,
+ 0x10920, 0x10939,
+ 0x10980, 0x109b7,
+ 0x109be, 0x109bf,
+ 0x10a00, 0x10a00,
+ 0x10a10, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a60, 0x10a7c,
+ 0x10a80, 0x10a9c,
+ 0x10ac0, 0x10ac7,
+ 0x10ac9, 0x10ae4,
+ 0x10b00, 0x10b35,
+ 0x10b40, 0x10b55,
+ 0x10b60, 0x10b72,
+ 0x10b80, 0x10b91,
+ 0x10c00, 0x10c48,
+ 0x11003, 0x11037,
+ 0x11083, 0x110af,
+ 0x110d0, 0x110e8,
+ 0x11103, 0x11126,
+ 0x11150, 0x11172,
+ 0x11176, 0x11176,
+ 0x11183, 0x111b2,
+ 0x111c1, 0x111c4,
+ 0x111da, 0x111da,
+ 0x11200, 0x11211,
+ 0x11213, 0x1122b,
+ 0x112b0, 0x112de,
+ 0x11305, 0x1130c,
+ 0x1130f, 0x11310,
+ 0x11313, 0x11328,
+ 0x1132a, 0x11330,
+ 0x11332, 0x11333,
+ 0x11335, 0x11339,
+ 0x1133d, 0x1133d,
+ 0x1135d, 0x11361,
+ 0x11480, 0x114af,
+ 0x114c4, 0x114c5,
+ 0x114c7, 0x114c7,
+ 0x11580, 0x115ae,
+ 0x11600, 0x1162f,
+ 0x11644, 0x11644,
+ 0x11680, 0x116aa,
+ 0x118a0, 0x118df,
+ 0x118ff, 0x118ff,
+ 0x11ac0, 0x11af8,
+ 0x12000, 0x12398,
+ 0x13000, 0x1342e,
+ 0x16800, 0x16a38,
+ 0x16a40, 0x16a5e,
+ 0x16ad0, 0x16aed,
+ 0x16b00, 0x16b2f,
+ 0x16b40, 0x16b43,
+ 0x16b63, 0x16b77,
+ 0x16b7d, 0x16b8f,
+ 0x16f00, 0x16f44,
+ 0x16f50, 0x16f50,
+ 0x16f93, 0x16f9f,
+ 0x1b000, 0x1b001,
+ 0x1bc00, 0x1bc6a,
+ 0x1bc70, 0x1bc7c,
+ 0x1bc80, 0x1bc88,
+ 0x1bc90, 0x1bc99,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7cb,
+ 0x1e800, 0x1e8c4,
+ 0x1ee00, 0x1ee03,
+ 0x1ee05, 0x1ee1f,
+ 0x1ee21, 0x1ee22,
+ 0x1ee24, 0x1ee24,
+ 0x1ee27, 0x1ee27,
+ 0x1ee29, 0x1ee32,
+ 0x1ee34, 0x1ee37,
+ 0x1ee39, 0x1ee39,
+ 0x1ee3b, 0x1ee3b,
+ 0x1ee42, 0x1ee42,
+ 0x1ee47, 0x1ee47,
+ 0x1ee49, 0x1ee49,
+ 0x1ee4b, 0x1ee4b,
+ 0x1ee4d, 0x1ee4f,
+ 0x1ee51, 0x1ee52,
+ 0x1ee54, 0x1ee54,
+ 0x1ee57, 0x1ee57,
+ 0x1ee59, 0x1ee59,
+ 0x1ee5b, 0x1ee5b,
+ 0x1ee5d, 0x1ee5d,
+ 0x1ee5f, 0x1ee5f,
+ 0x1ee61, 0x1ee62,
+ 0x1ee64, 0x1ee64,
+ 0x1ee67, 0x1ee6a,
+ 0x1ee6c, 0x1ee72,
+ 0x1ee74, 0x1ee77,
+ 0x1ee79, 0x1ee7c,
+ 0x1ee7e, 0x1ee7e,
+ 0x1ee80, 0x1ee89,
+ 0x1ee8b, 0x1ee9b,
+ 0x1eea1, 0x1eea3,
+ 0x1eea5, 0x1eea9,
+ 0x1eeab, 0x1eebb,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2b740, 0x2b81d,
+ 0x2f800, 0x2fa1d
+};
+UCP_FN(L)
+
+static const unichar ucp_L_and_def[] = {
+ 0x41, 0x5a,
+ 0x61, 0x7a,
+ 0xb5, 0xb5,
+ 0xc0, 0xd6,
+ 0xd8, 0xf6,
+ 0xf8, 0x1ba,
+ 0x1bc, 0x1bf,
+ 0x1c4, 0x293,
+ 0x295, 0x2af,
+ 0x370, 0x373,
+ 0x376, 0x377,
+ 0x37b, 0x37d,
+ 0x37f, 0x37f,
+ 0x386, 0x386,
+ 0x388, 0x38a,
+ 0x38c, 0x38c,
+ 0x38e, 0x3a1,
+ 0x3a3, 0x3f5,
+ 0x3f7, 0x481,
+ 0x48a, 0x52f,
+ 0x531, 0x556,
+ 0x561, 0x587,
+ 0x10a0, 0x10c5,
+ 0x10c7, 0x10c7,
+ 0x10cd, 0x10cd,
+ 0x1d00, 0x1d2b,
+ 0x1d6b, 0x1d77,
+ 0x1d79, 0x1d9a,
+ 0x1e00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2134,
+ 0x2139, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x214e, 0x214e,
+ 0x2183, 0x2184,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2c7b,
+ 0x2c7e, 0x2ce4,
+ 0x2ceb, 0x2cee,
+ 0x2cf2, 0x2cf3,
+ 0x2d00, 0x2d25,
+ 0x2d27, 0x2d27,
+ 0x2d2d, 0x2d2d,
+ 0xa640, 0xa66d,
+ 0xa680, 0xa69b,
+ 0xa722, 0xa76f,
+ 0xa771, 0xa787,
+ 0xa78b, 0xa78e,
+ 0xa790, 0xa7ad,
+ 0xa7b0, 0xa7b1,
+ 0xa7fa, 0xa7fa,
+ 0xab30, 0xab5a,
+ 0xab64, 0xab65,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0x10400, 0x1044f,
+ 0x118a0, 0x118df,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7cb
+};
+UCP_FN(L_and)
+
+static const unichar ucp_Ll_def[] = {
+ 0x61, 0x7a,
+ 0xb5, 0xb5,
+ 0xdf, 0xf6,
+ 0xf8, 0xff,
+ 0x101, 0x101,
+ 0x103, 0x103,
+ 0x105, 0x105,
+ 0x107, 0x107,
+ 0x109, 0x109,
+ 0x10b, 0x10b,
+ 0x10d, 0x10d,
+ 0x10f, 0x10f,
+ 0x111, 0x111,
+ 0x113, 0x113,
+ 0x115, 0x115,
+ 0x117, 0x117,
+ 0x119, 0x119,
+ 0x11b, 0x11b,
+ 0x11d, 0x11d,
+ 0x11f, 0x11f,
+ 0x121, 0x121,
+ 0x123, 0x123,
+ 0x125, 0x125,
+ 0x127, 0x127,
+ 0x129, 0x129,
+ 0x12b, 0x12b,
+ 0x12d, 0x12d,
+ 0x12f, 0x12f,
+ 0x131, 0x131,
+ 0x133, 0x133,
+ 0x135, 0x135,
+ 0x137, 0x138,
+ 0x13a, 0x13a,
+ 0x13c, 0x13c,
+ 0x13e, 0x13e,
+ 0x140, 0x140,
+ 0x142, 0x142,
+ 0x144, 0x144,
+ 0x146, 0x146,
+ 0x148, 0x149,
+ 0x14b, 0x14b,
+ 0x14d, 0x14d,
+ 0x14f, 0x14f,
+ 0x151, 0x151,
+ 0x153, 0x153,
+ 0x155, 0x155,
+ 0x157, 0x157,
+ 0x159, 0x159,
+ 0x15b, 0x15b,
+ 0x15d, 0x15d,
+ 0x15f, 0x15f,
+ 0x161, 0x161,
+ 0x163, 0x163,
+ 0x165, 0x165,
+ 0x167, 0x167,
+ 0x169, 0x169,
+ 0x16b, 0x16b,
+ 0x16d, 0x16d,
+ 0x16f, 0x16f,
+ 0x171, 0x171,
+ 0x173, 0x173,
+ 0x175, 0x175,
+ 0x177, 0x177,
+ 0x17a, 0x17a,
+ 0x17c, 0x17c,
+ 0x17e, 0x180,
+ 0x183, 0x183,
+ 0x185, 0x185,
+ 0x188, 0x188,
+ 0x18c, 0x18d,
+ 0x192, 0x192,
+ 0x195, 0x195,
+ 0x199, 0x19b,
+ 0x19e, 0x19e,
+ 0x1a1, 0x1a1,
+ 0x1a3, 0x1a3,
+ 0x1a5, 0x1a5,
+ 0x1a8, 0x1a8,
+ 0x1aa, 0x1ab,
+ 0x1ad, 0x1ad,
+ 0x1b0, 0x1b0,
+ 0x1b4, 0x1b4,
+ 0x1b6, 0x1b6,
+ 0x1b9, 0x1ba,
+ 0x1bd, 0x1bf,
+ 0x1c6, 0x1c6,
+ 0x1c9, 0x1c9,
+ 0x1cc, 0x1cc,
+ 0x1ce, 0x1ce,
+ 0x1d0, 0x1d0,
+ 0x1d2, 0x1d2,
+ 0x1d4, 0x1d4,
+ 0x1d6, 0x1d6,
+ 0x1d8, 0x1d8,
+ 0x1da, 0x1da,
+ 0x1dc, 0x1dd,
+ 0x1df, 0x1df,
+ 0x1e1, 0x1e1,
+ 0x1e3, 0x1e3,
+ 0x1e5, 0x1e5,
+ 0x1e7, 0x1e7,
+ 0x1e9, 0x1e9,
+ 0x1eb, 0x1eb,
+ 0x1ed, 0x1ed,
+ 0x1ef, 0x1f0,
+ 0x1f3, 0x1f3,
+ 0x1f5, 0x1f5,
+ 0x1f9, 0x1f9,
+ 0x1fb, 0x1fb,
+ 0x1fd, 0x1fd,
+ 0x1ff, 0x1ff,
+ 0x201, 0x201,
+ 0x203, 0x203,
+ 0x205, 0x205,
+ 0x207, 0x207,
+ 0x209, 0x209,
+ 0x20b, 0x20b,
+ 0x20d, 0x20d,
+ 0x20f, 0x20f,
+ 0x211, 0x211,
+ 0x213, 0x213,
+ 0x215, 0x215,
+ 0x217, 0x217,
+ 0x219, 0x219,
+ 0x21b, 0x21b,
+ 0x21d, 0x21d,
+ 0x21f, 0x21f,
+ 0x221, 0x221,
+ 0x223, 0x223,
+ 0x225, 0x225,
+ 0x227, 0x227,
+ 0x229, 0x229,
+ 0x22b, 0x22b,
+ 0x22d, 0x22d,
+ 0x22f, 0x22f,
+ 0x231, 0x231,
+ 0x233, 0x239,
+ 0x23c, 0x23c,
+ 0x23f, 0x240,
+ 0x242, 0x242,
+ 0x247, 0x247,
+ 0x249, 0x249,
+ 0x24b, 0x24b,
+ 0x24d, 0x24d,
+ 0x24f, 0x293,
+ 0x295, 0x2af,
+ 0x371, 0x371,
+ 0x373, 0x373,
+ 0x377, 0x377,
+ 0x37b, 0x37d,
+ 0x390, 0x390,
+ 0x3ac, 0x3ce,
+ 0x3d0, 0x3d1,
+ 0x3d5, 0x3d7,
+ 0x3d9, 0x3d9,
+ 0x3db, 0x3db,
+ 0x3dd, 0x3dd,
+ 0x3df, 0x3df,
+ 0x3e1, 0x3e1,
+ 0x3e3, 0x3e3,
+ 0x3e5, 0x3e5,
+ 0x3e7, 0x3e7,
+ 0x3e9, 0x3e9,
+ 0x3eb, 0x3eb,
+ 0x3ed, 0x3ed,
+ 0x3ef, 0x3f3,
+ 0x3f5, 0x3f5,
+ 0x3f8, 0x3f8,
+ 0x3fb, 0x3fc,
+ 0x430, 0x45f,
+ 0x461, 0x461,
+ 0x463, 0x463,
+ 0x465, 0x465,
+ 0x467, 0x467,
+ 0x469, 0x469,
+ 0x46b, 0x46b,
+ 0x46d, 0x46d,
+ 0x46f, 0x46f,
+ 0x471, 0x471,
+ 0x473, 0x473,
+ 0x475, 0x475,
+ 0x477, 0x477,
+ 0x479, 0x479,
+ 0x47b, 0x47b,
+ 0x47d, 0x47d,
+ 0x47f, 0x47f,
+ 0x481, 0x481,
+ 0x48b, 0x48b,
+ 0x48d, 0x48d,
+ 0x48f, 0x48f,
+ 0x491, 0x491,
+ 0x493, 0x493,
+ 0x495, 0x495,
+ 0x497, 0x497,
+ 0x499, 0x499,
+ 0x49b, 0x49b,
+ 0x49d, 0x49d,
+ 0x49f, 0x49f,
+ 0x4a1, 0x4a1,
+ 0x4a3, 0x4a3,
+ 0x4a5, 0x4a5,
+ 0x4a7, 0x4a7,
+ 0x4a9, 0x4a9,
+ 0x4ab, 0x4ab,
+ 0x4ad, 0x4ad,
+ 0x4af, 0x4af,
+ 0x4b1, 0x4b1,
+ 0x4b3, 0x4b3,
+ 0x4b5, 0x4b5,
+ 0x4b7, 0x4b7,
+ 0x4b9, 0x4b9,
+ 0x4bb, 0x4bb,
+ 0x4bd, 0x4bd,
+ 0x4bf, 0x4bf,
+ 0x4c2, 0x4c2,
+ 0x4c4, 0x4c4,
+ 0x4c6, 0x4c6,
+ 0x4c8, 0x4c8,
+ 0x4ca, 0x4ca,
+ 0x4cc, 0x4cc,
+ 0x4ce, 0x4cf,
+ 0x4d1, 0x4d1,
+ 0x4d3, 0x4d3,
+ 0x4d5, 0x4d5,
+ 0x4d7, 0x4d7,
+ 0x4d9, 0x4d9,
+ 0x4db, 0x4db,
+ 0x4dd, 0x4dd,
+ 0x4df, 0x4df,
+ 0x4e1, 0x4e1,
+ 0x4e3, 0x4e3,
+ 0x4e5, 0x4e5,
+ 0x4e7, 0x4e7,
+ 0x4e9, 0x4e9,
+ 0x4eb, 0x4eb,
+ 0x4ed, 0x4ed,
+ 0x4ef, 0x4ef,
+ 0x4f1, 0x4f1,
+ 0x4f3, 0x4f3,
+ 0x4f5, 0x4f5,
+ 0x4f7, 0x4f7,
+ 0x4f9, 0x4f9,
+ 0x4fb, 0x4fb,
+ 0x4fd, 0x4fd,
+ 0x4ff, 0x4ff,
+ 0x501, 0x501,
+ 0x503, 0x503,
+ 0x505, 0x505,
+ 0x507, 0x507,
+ 0x509, 0x509,
+ 0x50b, 0x50b,
+ 0x50d, 0x50d,
+ 0x50f, 0x50f,
+ 0x511, 0x511,
+ 0x513, 0x513,
+ 0x515, 0x515,
+ 0x517, 0x517,
+ 0x519, 0x519,
+ 0x51b, 0x51b,
+ 0x51d, 0x51d,
+ 0x51f, 0x51f,
+ 0x521, 0x521,
+ 0x523, 0x523,
+ 0x525, 0x525,
+ 0x527, 0x527,
+ 0x529, 0x529,
+ 0x52b, 0x52b,
+ 0x52d, 0x52d,
+ 0x52f, 0x52f,
+ 0x561, 0x587,
+ 0x1d00, 0x1d2b,
+ 0x1d6b, 0x1d77,
+ 0x1d79, 0x1d9a,
+ 0x1e01, 0x1e01,
+ 0x1e03, 0x1e03,
+ 0x1e05, 0x1e05,
+ 0x1e07, 0x1e07,
+ 0x1e09, 0x1e09,
+ 0x1e0b, 0x1e0b,
+ 0x1e0d, 0x1e0d,
+ 0x1e0f, 0x1e0f,
+ 0x1e11, 0x1e11,
+ 0x1e13, 0x1e13,
+ 0x1e15, 0x1e15,
+ 0x1e17, 0x1e17,
+ 0x1e19, 0x1e19,
+ 0x1e1b, 0x1e1b,
+ 0x1e1d, 0x1e1d,
+ 0x1e1f, 0x1e1f,
+ 0x1e21, 0x1e21,
+ 0x1e23, 0x1e23,
+ 0x1e25, 0x1e25,
+ 0x1e27, 0x1e27,
+ 0x1e29, 0x1e29,
+ 0x1e2b, 0x1e2b,
+ 0x1e2d, 0x1e2d,
+ 0x1e2f, 0x1e2f,
+ 0x1e31, 0x1e31,
+ 0x1e33, 0x1e33,
+ 0x1e35, 0x1e35,
+ 0x1e37, 0x1e37,
+ 0x1e39, 0x1e39,
+ 0x1e3b, 0x1e3b,
+ 0x1e3d, 0x1e3d,
+ 0x1e3f, 0x1e3f,
+ 0x1e41, 0x1e41,
+ 0x1e43, 0x1e43,
+ 0x1e45, 0x1e45,
+ 0x1e47, 0x1e47,
+ 0x1e49, 0x1e49,
+ 0x1e4b, 0x1e4b,
+ 0x1e4d, 0x1e4d,
+ 0x1e4f, 0x1e4f,
+ 0x1e51, 0x1e51,
+ 0x1e53, 0x1e53,
+ 0x1e55, 0x1e55,
+ 0x1e57, 0x1e57,
+ 0x1e59, 0x1e59,
+ 0x1e5b, 0x1e5b,
+ 0x1e5d, 0x1e5d,
+ 0x1e5f, 0x1e5f,
+ 0x1e61, 0x1e61,
+ 0x1e63, 0x1e63,
+ 0x1e65, 0x1e65,
+ 0x1e67, 0x1e67,
+ 0x1e69, 0x1e69,
+ 0x1e6b, 0x1e6b,
+ 0x1e6d, 0x1e6d,
+ 0x1e6f, 0x1e6f,
+ 0x1e71, 0x1e71,
+ 0x1e73, 0x1e73,
+ 0x1e75, 0x1e75,
+ 0x1e77, 0x1e77,
+ 0x1e79, 0x1e79,
+ 0x1e7b, 0x1e7b,
+ 0x1e7d, 0x1e7d,
+ 0x1e7f, 0x1e7f,
+ 0x1e81, 0x1e81,
+ 0x1e83, 0x1e83,
+ 0x1e85, 0x1e85,
+ 0x1e87, 0x1e87,
+ 0x1e89, 0x1e89,
+ 0x1e8b, 0x1e8b,
+ 0x1e8d, 0x1e8d,
+ 0x1e8f, 0x1e8f,
+ 0x1e91, 0x1e91,
+ 0x1e93, 0x1e93,
+ 0x1e95, 0x1e9d,
+ 0x1e9f, 0x1e9f,
+ 0x1ea1, 0x1ea1,
+ 0x1ea3, 0x1ea3,
+ 0x1ea5, 0x1ea5,
+ 0x1ea7, 0x1ea7,
+ 0x1ea9, 0x1ea9,
+ 0x1eab, 0x1eab,
+ 0x1ead, 0x1ead,
+ 0x1eaf, 0x1eaf,
+ 0x1eb1, 0x1eb1,
+ 0x1eb3, 0x1eb3,
+ 0x1eb5, 0x1eb5,
+ 0x1eb7, 0x1eb7,
+ 0x1eb9, 0x1eb9,
+ 0x1ebb, 0x1ebb,
+ 0x1ebd, 0x1ebd,
+ 0x1ebf, 0x1ebf,
+ 0x1ec1, 0x1ec1,
+ 0x1ec3, 0x1ec3,
+ 0x1ec5, 0x1ec5,
+ 0x1ec7, 0x1ec7,
+ 0x1ec9, 0x1ec9,
+ 0x1ecb, 0x1ecb,
+ 0x1ecd, 0x1ecd,
+ 0x1ecf, 0x1ecf,
+ 0x1ed1, 0x1ed1,
+ 0x1ed3, 0x1ed3,
+ 0x1ed5, 0x1ed5,
+ 0x1ed7, 0x1ed7,
+ 0x1ed9, 0x1ed9,
+ 0x1edb, 0x1edb,
+ 0x1edd, 0x1edd,
+ 0x1edf, 0x1edf,
+ 0x1ee1, 0x1ee1,
+ 0x1ee3, 0x1ee3,
+ 0x1ee5, 0x1ee5,
+ 0x1ee7, 0x1ee7,
+ 0x1ee9, 0x1ee9,
+ 0x1eeb, 0x1eeb,
+ 0x1eed, 0x1eed,
+ 0x1eef, 0x1eef,
+ 0x1ef1, 0x1ef1,
+ 0x1ef3, 0x1ef3,
+ 0x1ef5, 0x1ef5,
+ 0x1ef7, 0x1ef7,
+ 0x1ef9, 0x1ef9,
+ 0x1efb, 0x1efb,
+ 0x1efd, 0x1efd,
+ 0x1eff, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x210a, 0x210a,
+ 0x210e, 0x210f,
+ 0x2113, 0x2113,
+ 0x212f, 0x212f,
+ 0x2134, 0x2134,
+ 0x2139, 0x2139,
+ 0x213c, 0x213d,
+ 0x2146, 0x2149,
+ 0x214e, 0x214e,
+ 0x2184, 0x2184,
+ 0x2c30, 0x2c5e,
+ 0x2c61, 0x2c61,
+ 0x2c65, 0x2c66,
+ 0x2c68, 0x2c68,
+ 0x2c6a, 0x2c6a,
+ 0x2c6c, 0x2c6c,
+ 0x2c71, 0x2c71,
+ 0x2c73, 0x2c74,
+ 0x2c76, 0x2c7b,
+ 0x2c81, 0x2c81,
+ 0x2c83, 0x2c83,
+ 0x2c85, 0x2c85,
+ 0x2c87, 0x2c87,
+ 0x2c89, 0x2c89,
+ 0x2c8b, 0x2c8b,
+ 0x2c8d, 0x2c8d,
+ 0x2c8f, 0x2c8f,
+ 0x2c91, 0x2c91,
+ 0x2c93, 0x2c93,
+ 0x2c95, 0x2c95,
+ 0x2c97, 0x2c97,
+ 0x2c99, 0x2c99,
+ 0x2c9b, 0x2c9b,
+ 0x2c9d, 0x2c9d,
+ 0x2c9f, 0x2c9f,
+ 0x2ca1, 0x2ca1,
+ 0x2ca3, 0x2ca3,
+ 0x2ca5, 0x2ca5,
+ 0x2ca7, 0x2ca7,
+ 0x2ca9, 0x2ca9,
+ 0x2cab, 0x2cab,
+ 0x2cad, 0x2cad,
+ 0x2caf, 0x2caf,
+ 0x2cb1, 0x2cb1,
+ 0x2cb3, 0x2cb3,
+ 0x2cb5, 0x2cb5,
+ 0x2cb7, 0x2cb7,
+ 0x2cb9, 0x2cb9,
+ 0x2cbb, 0x2cbb,
+ 0x2cbd, 0x2cbd,
+ 0x2cbf, 0x2cbf,
+ 0x2cc1, 0x2cc1,
+ 0x2cc3, 0x2cc3,
+ 0x2cc5, 0x2cc5,
+ 0x2cc7, 0x2cc7,
+ 0x2cc9, 0x2cc9,
+ 0x2ccb, 0x2ccb,
+ 0x2ccd, 0x2ccd,
+ 0x2ccf, 0x2ccf,
+ 0x2cd1, 0x2cd1,
+ 0x2cd3, 0x2cd3,
+ 0x2cd5, 0x2cd5,
+ 0x2cd7, 0x2cd7,
+ 0x2cd9, 0x2cd9,
+ 0x2cdb, 0x2cdb,
+ 0x2cdd, 0x2cdd,
+ 0x2cdf, 0x2cdf,
+ 0x2ce1, 0x2ce1,
+ 0x2ce3, 0x2ce4,
+ 0x2cec, 0x2cec,
+ 0x2cee, 0x2cee,
+ 0x2cf3, 0x2cf3,
+ 0x2d00, 0x2d25,
+ 0x2d27, 0x2d27,
+ 0x2d2d, 0x2d2d,
+ 0xa641, 0xa641,
+ 0xa643, 0xa643,
+ 0xa645, 0xa645,
+ 0xa647, 0xa647,
+ 0xa649, 0xa649,
+ 0xa64b, 0xa64b,
+ 0xa64d, 0xa64d,
+ 0xa64f, 0xa64f,
+ 0xa651, 0xa651,
+ 0xa653, 0xa653,
+ 0xa655, 0xa655,
+ 0xa657, 0xa657,
+ 0xa659, 0xa659,
+ 0xa65b, 0xa65b,
+ 0xa65d, 0xa65d,
+ 0xa65f, 0xa65f,
+ 0xa661, 0xa661,
+ 0xa663, 0xa663,
+ 0xa665, 0xa665,
+ 0xa667, 0xa667,
+ 0xa669, 0xa669,
+ 0xa66b, 0xa66b,
+ 0xa66d, 0xa66d,
+ 0xa681, 0xa681,
+ 0xa683, 0xa683,
+ 0xa685, 0xa685,
+ 0xa687, 0xa687,
+ 0xa689, 0xa689,
+ 0xa68b, 0xa68b,
+ 0xa68d, 0xa68d,
+ 0xa68f, 0xa68f,
+ 0xa691, 0xa691,
+ 0xa693, 0xa693,
+ 0xa695, 0xa695,
+ 0xa697, 0xa697,
+ 0xa699, 0xa699,
+ 0xa69b, 0xa69b,
+ 0xa723, 0xa723,
+ 0xa725, 0xa725,
+ 0xa727, 0xa727,
+ 0xa729, 0xa729,
+ 0xa72b, 0xa72b,
+ 0xa72d, 0xa72d,
+ 0xa72f, 0xa731,
+ 0xa733, 0xa733,
+ 0xa735, 0xa735,
+ 0xa737, 0xa737,
+ 0xa739, 0xa739,
+ 0xa73b, 0xa73b,
+ 0xa73d, 0xa73d,
+ 0xa73f, 0xa73f,
+ 0xa741, 0xa741,
+ 0xa743, 0xa743,
+ 0xa745, 0xa745,
+ 0xa747, 0xa747,
+ 0xa749, 0xa749,
+ 0xa74b, 0xa74b,
+ 0xa74d, 0xa74d,
+ 0xa74f, 0xa74f,
+ 0xa751, 0xa751,
+ 0xa753, 0xa753,
+ 0xa755, 0xa755,
+ 0xa757, 0xa757,
+ 0xa759, 0xa759,
+ 0xa75b, 0xa75b,
+ 0xa75d, 0xa75d,
+ 0xa75f, 0xa75f,
+ 0xa761, 0xa761,
+ 0xa763, 0xa763,
+ 0xa765, 0xa765,
+ 0xa767, 0xa767,
+ 0xa769, 0xa769,
+ 0xa76b, 0xa76b,
+ 0xa76d, 0xa76d,
+ 0xa76f, 0xa76f,
+ 0xa771, 0xa778,
+ 0xa77a, 0xa77a,
+ 0xa77c, 0xa77c,
+ 0xa77f, 0xa77f,
+ 0xa781, 0xa781,
+ 0xa783, 0xa783,
+ 0xa785, 0xa785,
+ 0xa787, 0xa787,
+ 0xa78c, 0xa78c,
+ 0xa78e, 0xa78e,
+ 0xa791, 0xa791,
+ 0xa793, 0xa795,
+ 0xa797, 0xa797,
+ 0xa799, 0xa799,
+ 0xa79b, 0xa79b,
+ 0xa79d, 0xa79d,
+ 0xa79f, 0xa79f,
+ 0xa7a1, 0xa7a1,
+ 0xa7a3, 0xa7a3,
+ 0xa7a5, 0xa7a5,
+ 0xa7a7, 0xa7a7,
+ 0xa7a9, 0xa7a9,
+ 0xa7fa, 0xa7fa,
+ 0xab30, 0xab5a,
+ 0xab64, 0xab65,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+ 0x118c0, 0x118df,
+ 0x1d41a, 0x1d433,
+ 0x1d44e, 0x1d454,
+ 0x1d456, 0x1d467,
+ 0x1d482, 0x1d49b,
+ 0x1d4b6, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d4cf,
+ 0x1d4ea, 0x1d503,
+ 0x1d51e, 0x1d537,
+ 0x1d552, 0x1d56b,
+ 0x1d586, 0x1d59f,
+ 0x1d5ba, 0x1d5d3,
+ 0x1d5ee, 0x1d607,
+ 0x1d622, 0x1d63b,
+ 0x1d656, 0x1d66f,
+ 0x1d68a, 0x1d6a5,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6e1,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d71b,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d755,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d78f,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7cb, 0x1d7cb
+};
+UCP_FN(Ll)
+
+static const unichar ucp_Lm_def[] = {
+ 0x2b0, 0x2c1,
+ 0x2c6, 0x2d1,
+ 0x2e0, 0x2e4,
+ 0x2ec, 0x2ec,
+ 0x2ee, 0x2ee,
+ 0x374, 0x374,
+ 0x37a, 0x37a,
+ 0x559, 0x559,
+ 0x640, 0x640,
+ 0x6e5, 0x6e6,
+ 0x7f4, 0x7f5,
+ 0x7fa, 0x7fa,
+ 0x81a, 0x81a,
+ 0x824, 0x824,
+ 0x828, 0x828,
+ 0x971, 0x971,
+ 0xe46, 0xe46,
+ 0xec6, 0xec6,
+ 0x10fc, 0x10fc,
+ 0x17d7, 0x17d7,
+ 0x1843, 0x1843,
+ 0x1aa7, 0x1aa7,
+ 0x1c78, 0x1c7d,
+ 0x1d2c, 0x1d6a,
+ 0x1d78, 0x1d78,
+ 0x1d9b, 0x1dbf,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x209c,
+ 0x2c7c, 0x2c7d,
+ 0x2d6f, 0x2d6f,
+ 0x2e2f, 0x2e2f,
+ 0x3005, 0x3005,
+ 0x3031, 0x3035,
+ 0x303b, 0x303b,
+ 0x309d, 0x309e,
+ 0x30fc, 0x30fe,
+ 0xa015, 0xa015,
+ 0xa4f8, 0xa4fd,
+ 0xa60c, 0xa60c,
+ 0xa67f, 0xa67f,
+ 0xa69c, 0xa69d,
+ 0xa717, 0xa71f,
+ 0xa770, 0xa770,
+ 0xa788, 0xa788,
+ 0xa7f8, 0xa7f9,
+ 0xa9cf, 0xa9cf,
+ 0xa9e6, 0xa9e6,
+ 0xaa70, 0xaa70,
+ 0xaadd, 0xaadd,
+ 0xaaf3, 0xaaf4,
+ 0xab5c, 0xab5f,
+ 0xff70, 0xff70,
+ 0xff9e, 0xff9f,
+ 0x16b40, 0x16b43,
+ 0x16f93, 0x16f9f
+};
+UCP_FN(Lm)
+
+static const unichar ucp_Lo_def[] = {
+ 0xaa, 0xaa,
+ 0xba, 0xba,
+ 0x1bb, 0x1bb,
+ 0x1c0, 0x1c3,
+ 0x294, 0x294,
+ 0x5d0, 0x5ea,
+ 0x5f0, 0x5f2,
+ 0x620, 0x63f,
+ 0x641, 0x64a,
+ 0x66e, 0x66f,
+ 0x671, 0x6d3,
+ 0x6d5, 0x6d5,
+ 0x6ee, 0x6ef,
+ 0x6fa, 0x6fc,
+ 0x6ff, 0x6ff,
+ 0x710, 0x710,
+ 0x712, 0x72f,
+ 0x74d, 0x7a5,
+ 0x7b1, 0x7b1,
+ 0x7ca, 0x7ea,
+ 0x800, 0x815,
+ 0x840, 0x858,
+ 0x8a0, 0x8b2,
+ 0x904, 0x939,
+ 0x93d, 0x93d,
+ 0x950, 0x950,
+ 0x958, 0x961,
+ 0x972, 0x980,
+ 0x985, 0x98c,
+ 0x98f, 0x990,
+ 0x993, 0x9a8,
+ 0x9aa, 0x9b0,
+ 0x9b2, 0x9b2,
+ 0x9b6, 0x9b9,
+ 0x9bd, 0x9bd,
+ 0x9ce, 0x9ce,
+ 0x9dc, 0x9dd,
+ 0x9df, 0x9e1,
+ 0x9f0, 0x9f1,
+ 0xa05, 0xa0a,
+ 0xa0f, 0xa10,
+ 0xa13, 0xa28,
+ 0xa2a, 0xa30,
+ 0xa32, 0xa33,
+ 0xa35, 0xa36,
+ 0xa38, 0xa39,
+ 0xa59, 0xa5c,
+ 0xa5e, 0xa5e,
+ 0xa72, 0xa74,
+ 0xa85, 0xa8d,
+ 0xa8f, 0xa91,
+ 0xa93, 0xaa8,
+ 0xaaa, 0xab0,
+ 0xab2, 0xab3,
+ 0xab5, 0xab9,
+ 0xabd, 0xabd,
+ 0xad0, 0xad0,
+ 0xae0, 0xae1,
+ 0xb05, 0xb0c,
+ 0xb0f, 0xb10,
+ 0xb13, 0xb28,
+ 0xb2a, 0xb30,
+ 0xb32, 0xb33,
+ 0xb35, 0xb39,
+ 0xb3d, 0xb3d,
+ 0xb5c, 0xb5d,
+ 0xb5f, 0xb61,
+ 0xb71, 0xb71,
+ 0xb83, 0xb83,
+ 0xb85, 0xb8a,
+ 0xb8e, 0xb90,
+ 0xb92, 0xb95,
+ 0xb99, 0xb9a,
+ 0xb9c, 0xb9c,
+ 0xb9e, 0xb9f,
+ 0xba3, 0xba4,
+ 0xba8, 0xbaa,
+ 0xbae, 0xbb9,
+ 0xbd0, 0xbd0,
+ 0xc05, 0xc0c,
+ 0xc0e, 0xc10,
+ 0xc12, 0xc28,
+ 0xc2a, 0xc39,
+ 0xc3d, 0xc3d,
+ 0xc58, 0xc59,
+ 0xc60, 0xc61,
+ 0xc85, 0xc8c,
+ 0xc8e, 0xc90,
+ 0xc92, 0xca8,
+ 0xcaa, 0xcb3,
+ 0xcb5, 0xcb9,
+ 0xcbd, 0xcbd,
+ 0xcde, 0xcde,
+ 0xce0, 0xce1,
+ 0xcf1, 0xcf2,
+ 0xd05, 0xd0c,
+ 0xd0e, 0xd10,
+ 0xd12, 0xd3a,
+ 0xd3d, 0xd3d,
+ 0xd4e, 0xd4e,
+ 0xd60, 0xd61,
+ 0xd7a, 0xd7f,
+ 0xd85, 0xd96,
+ 0xd9a, 0xdb1,
+ 0xdb3, 0xdbb,
+ 0xdbd, 0xdbd,
+ 0xdc0, 0xdc6,
+ 0xe01, 0xe30,
+ 0xe32, 0xe33,
+ 0xe40, 0xe45,
+ 0xe81, 0xe82,
+ 0xe84, 0xe84,
+ 0xe87, 0xe88,
+ 0xe8a, 0xe8a,
+ 0xe8d, 0xe8d,
+ 0xe94, 0xe97,
+ 0xe99, 0xe9f,
+ 0xea1, 0xea3,
+ 0xea5, 0xea5,
+ 0xea7, 0xea7,
+ 0xeaa, 0xeab,
+ 0xead, 0xeb0,
+ 0xeb2, 0xeb3,
+ 0xebd, 0xebd,
+ 0xec0, 0xec4,
+ 0xedc, 0xedf,
+ 0xf00, 0xf00,
+ 0xf40, 0xf47,
+ 0xf49, 0xf6c,
+ 0xf88, 0xf8c,
+ 0x1000, 0x102a,
+ 0x103f, 0x103f,
+ 0x1050, 0x1055,
+ 0x105a, 0x105d,
+ 0x1061, 0x1061,
+ 0x1065, 0x1066,
+ 0x106e, 0x1070,
+ 0x1075, 0x1081,
+ 0x108e, 0x108e,
+ 0x10d0, 0x10fa,
+ 0x10fd, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x167f,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16f1, 0x16f8,
+ 0x1700, 0x170c,
+ 0x170e, 0x1711,
+ 0x1720, 0x1731,
+ 0x1740, 0x1751,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1780, 0x17b3,
+ 0x17dc, 0x17dc,
+ 0x1820, 0x1842,
+ 0x1844, 0x1877,
+ 0x1880, 0x18a8,
+ 0x18aa, 0x18aa,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191e,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19c1, 0x19c7,
+ 0x1a00, 0x1a16,
+ 0x1a20, 0x1a54,
+ 0x1b05, 0x1b33,
+ 0x1b45, 0x1b4b,
+ 0x1b83, 0x1ba0,
+ 0x1bae, 0x1baf,
+ 0x1bba, 0x1be5,
+ 0x1c00, 0x1c23,
+ 0x1c4d, 0x1c4f,
+ 0x1c5a, 0x1c77,
+ 0x1ce9, 0x1cec,
+ 0x1cee, 0x1cf1,
+ 0x1cf5, 0x1cf6,
+ 0x2135, 0x2138,
+ 0x2d30, 0x2d67,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x3006, 0x3006,
+ 0x303c, 0x303c,
+ 0x3041, 0x3096,
+ 0x309f, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30ff, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31ba,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcc,
+ 0xa000, 0xa014,
+ 0xa016, 0xa48c,
+ 0xa4d0, 0xa4f7,
+ 0xa500, 0xa60b,
+ 0xa610, 0xa61f,
+ 0xa62a, 0xa62b,
+ 0xa66e, 0xa66e,
+ 0xa6a0, 0xa6e5,
+ 0xa7f7, 0xa7f7,
+ 0xa7fb, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa822,
+ 0xa840, 0xa873,
+ 0xa882, 0xa8b3,
+ 0xa8f2, 0xa8f7,
+ 0xa8fb, 0xa8fb,
+ 0xa90a, 0xa925,
+ 0xa930, 0xa946,
+ 0xa960, 0xa97c,
+ 0xa984, 0xa9b2,
+ 0xa9e0, 0xa9e4,
+ 0xa9e7, 0xa9ef,
+ 0xa9fa, 0xa9fe,
+ 0xaa00, 0xaa28,
+ 0xaa40, 0xaa42,
+ 0xaa44, 0xaa4b,
+ 0xaa60, 0xaa6f,
+ 0xaa71, 0xaa76,
+ 0xaa7a, 0xaa7a,
+ 0xaa7e, 0xaaaf,
+ 0xaab1, 0xaab1,
+ 0xaab5, 0xaab6,
+ 0xaab9, 0xaabd,
+ 0xaac0, 0xaac0,
+ 0xaac2, 0xaac2,
+ 0xaadb, 0xaadc,
+ 0xaae0, 0xaaea,
+ 0xaaf2, 0xaaf2,
+ 0xab01, 0xab06,
+ 0xab09, 0xab0e,
+ 0xab11, 0xab16,
+ 0xab20, 0xab26,
+ 0xab28, 0xab2e,
+ 0xabc0, 0xabe2,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xf900, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb1d, 0xfb1d,
+ 0xfb1f, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff66, 0xff6f,
+ 0xff71, 0xff9d,
+ 0xffa0, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x10300, 0x1031f,
+ 0x10330, 0x10340,
+ 0x10342, 0x10349,
+ 0x10350, 0x10375,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x10450, 0x1049d,
+ 0x10500, 0x10527,
+ 0x10530, 0x10563,
+ 0x10600, 0x10736,
+ 0x10740, 0x10755,
+ 0x10760, 0x10767,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x10855,
+ 0x10860, 0x10876,
+ 0x10880, 0x1089e,
+ 0x10900, 0x10915,
+ 0x10920, 0x10939,
+ 0x10980, 0x109b7,
+ 0x109be, 0x109bf,
+ 0x10a00, 0x10a00,
+ 0x10a10, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a60, 0x10a7c,
+ 0x10a80, 0x10a9c,
+ 0x10ac0, 0x10ac7,
+ 0x10ac9, 0x10ae4,
+ 0x10b00, 0x10b35,
+ 0x10b40, 0x10b55,
+ 0x10b60, 0x10b72,
+ 0x10b80, 0x10b91,
+ 0x10c00, 0x10c48,
+ 0x11003, 0x11037,
+ 0x11083, 0x110af,
+ 0x110d0, 0x110e8,
+ 0x11103, 0x11126,
+ 0x11150, 0x11172,
+ 0x11176, 0x11176,
+ 0x11183, 0x111b2,
+ 0x111c1, 0x111c4,
+ 0x111da, 0x111da,
+ 0x11200, 0x11211,
+ 0x11213, 0x1122b,
+ 0x112b0, 0x112de,
+ 0x11305, 0x1130c,
+ 0x1130f, 0x11310,
+ 0x11313, 0x11328,
+ 0x1132a, 0x11330,
+ 0x11332, 0x11333,
+ 0x11335, 0x11339,
+ 0x1133d, 0x1133d,
+ 0x1135d, 0x11361,
+ 0x11480, 0x114af,
+ 0x114c4, 0x114c5,
+ 0x114c7, 0x114c7,
+ 0x11580, 0x115ae,
+ 0x11600, 0x1162f,
+ 0x11644, 0x11644,
+ 0x11680, 0x116aa,
+ 0x118ff, 0x118ff,
+ 0x11ac0, 0x11af8,
+ 0x12000, 0x12398,
+ 0x13000, 0x1342e,
+ 0x16800, 0x16a38,
+ 0x16a40, 0x16a5e,
+ 0x16ad0, 0x16aed,
+ 0x16b00, 0x16b2f,
+ 0x16b63, 0x16b77,
+ 0x16b7d, 0x16b8f,
+ 0x16f00, 0x16f44,
+ 0x16f50, 0x16f50,
+ 0x1b000, 0x1b001,
+ 0x1bc00, 0x1bc6a,
+ 0x1bc70, 0x1bc7c,
+ 0x1bc80, 0x1bc88,
+ 0x1bc90, 0x1bc99,
+ 0x1e800, 0x1e8c4,
+ 0x1ee00, 0x1ee03,
+ 0x1ee05, 0x1ee1f,
+ 0x1ee21, 0x1ee22,
+ 0x1ee24, 0x1ee24,
+ 0x1ee27, 0x1ee27,
+ 0x1ee29, 0x1ee32,
+ 0x1ee34, 0x1ee37,
+ 0x1ee39, 0x1ee39,
+ 0x1ee3b, 0x1ee3b,
+ 0x1ee42, 0x1ee42,
+ 0x1ee47, 0x1ee47,
+ 0x1ee49, 0x1ee49,
+ 0x1ee4b, 0x1ee4b,
+ 0x1ee4d, 0x1ee4f,
+ 0x1ee51, 0x1ee52,
+ 0x1ee54, 0x1ee54,
+ 0x1ee57, 0x1ee57,
+ 0x1ee59, 0x1ee59,
+ 0x1ee5b, 0x1ee5b,
+ 0x1ee5d, 0x1ee5d,
+ 0x1ee5f, 0x1ee5f,
+ 0x1ee61, 0x1ee62,
+ 0x1ee64, 0x1ee64,
+ 0x1ee67, 0x1ee6a,
+ 0x1ee6c, 0x1ee72,
+ 0x1ee74, 0x1ee77,
+ 0x1ee79, 0x1ee7c,
+ 0x1ee7e, 0x1ee7e,
+ 0x1ee80, 0x1ee89,
+ 0x1ee8b, 0x1ee9b,
+ 0x1eea1, 0x1eea3,
+ 0x1eea5, 0x1eea9,
+ 0x1eeab, 0x1eebb,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2b740, 0x2b81d,
+ 0x2f800, 0x2fa1d
+};
+UCP_FN(Lo)
+
+static const unichar ucp_Lt_def[] = {
+ 0x1c5, 0x1c5,
+ 0x1c8, 0x1c8,
+ 0x1cb, 0x1cb,
+ 0x1f2, 0x1f2,
+ 0x1f88, 0x1f8f,
+ 0x1f98, 0x1f9f,
+ 0x1fa8, 0x1faf,
+ 0x1fbc, 0x1fbc,
+ 0x1fcc, 0x1fcc,
+ 0x1ffc, 0x1ffc
+};
+UCP_FN(Lt)
+
+static const unichar ucp_Lu_def[] = {
+ 0x41, 0x5a,
+ 0xc0, 0xd6,
+ 0xd8, 0xde,
+ 0x100, 0x100,
+ 0x102, 0x102,
+ 0x104, 0x104,
+ 0x106, 0x106,
+ 0x108, 0x108,
+ 0x10a, 0x10a,
+ 0x10c, 0x10c,
+ 0x10e, 0x10e,
+ 0x110, 0x110,
+ 0x112, 0x112,
+ 0x114, 0x114,
+ 0x116, 0x116,
+ 0x118, 0x118,
+ 0x11a, 0x11a,
+ 0x11c, 0x11c,
+ 0x11e, 0x11e,
+ 0x120, 0x120,
+ 0x122, 0x122,
+ 0x124, 0x124,
+ 0x126, 0x126,
+ 0x128, 0x128,
+ 0x12a, 0x12a,
+ 0x12c, 0x12c,
+ 0x12e, 0x12e,
+ 0x130, 0x130,
+ 0x132, 0x132,
+ 0x134, 0x134,
+ 0x136, 0x136,
+ 0x139, 0x139,
+ 0x13b, 0x13b,
+ 0x13d, 0x13d,
+ 0x13f, 0x13f,
+ 0x141, 0x141,
+ 0x143, 0x143,
+ 0x145, 0x145,
+ 0x147, 0x147,
+ 0x14a, 0x14a,
+ 0x14c, 0x14c,
+ 0x14e, 0x14e,
+ 0x150, 0x150,
+ 0x152, 0x152,
+ 0x154, 0x154,
+ 0x156, 0x156,
+ 0x158, 0x158,
+ 0x15a, 0x15a,
+ 0x15c, 0x15c,
+ 0x15e, 0x15e,
+ 0x160, 0x160,
+ 0x162, 0x162,
+ 0x164, 0x164,
+ 0x166, 0x166,
+ 0x168, 0x168,
+ 0x16a, 0x16a,
+ 0x16c, 0x16c,
+ 0x16e, 0x16e,
+ 0x170, 0x170,
+ 0x172, 0x172,
+ 0x174, 0x174,
+ 0x176, 0x176,
+ 0x178, 0x179,
+ 0x17b, 0x17b,
+ 0x17d, 0x17d,
+ 0x181, 0x182,
+ 0x184, 0x184,
+ 0x186, 0x187,
+ 0x189, 0x18b,
+ 0x18e, 0x191,
+ 0x193, 0x194,
+ 0x196, 0x198,
+ 0x19c, 0x19d,
+ 0x19f, 0x1a0,
+ 0x1a2, 0x1a2,
+ 0x1a4, 0x1a4,
+ 0x1a6, 0x1a7,
+ 0x1a9, 0x1a9,
+ 0x1ac, 0x1ac,
+ 0x1ae, 0x1af,
+ 0x1b1, 0x1b3,
+ 0x1b5, 0x1b5,
+ 0x1b7, 0x1b8,
+ 0x1bc, 0x1bc,
+ 0x1c4, 0x1c4,
+ 0x1c7, 0x1c7,
+ 0x1ca, 0x1ca,
+ 0x1cd, 0x1cd,
+ 0x1cf, 0x1cf,
+ 0x1d1, 0x1d1,
+ 0x1d3, 0x1d3,
+ 0x1d5, 0x1d5,
+ 0x1d7, 0x1d7,
+ 0x1d9, 0x1d9,
+ 0x1db, 0x1db,
+ 0x1de, 0x1de,
+ 0x1e0, 0x1e0,
+ 0x1e2, 0x1e2,
+ 0x1e4, 0x1e4,
+ 0x1e6, 0x1e6,
+ 0x1e8, 0x1e8,
+ 0x1ea, 0x1ea,
+ 0x1ec, 0x1ec,
+ 0x1ee, 0x1ee,
+ 0x1f1, 0x1f1,
+ 0x1f4, 0x1f4,
+ 0x1f6, 0x1f8,
+ 0x1fa, 0x1fa,
+ 0x1fc, 0x1fc,
+ 0x1fe, 0x1fe,
+ 0x200, 0x200,
+ 0x202, 0x202,
+ 0x204, 0x204,
+ 0x206, 0x206,
+ 0x208, 0x208,
+ 0x20a, 0x20a,
+ 0x20c, 0x20c,
+ 0x20e, 0x20e,
+ 0x210, 0x210,
+ 0x212, 0x212,
+ 0x214, 0x214,
+ 0x216, 0x216,
+ 0x218, 0x218,
+ 0x21a, 0x21a,
+ 0x21c, 0x21c,
+ 0x21e, 0x21e,
+ 0x220, 0x220,
+ 0x222, 0x222,
+ 0x224, 0x224,
+ 0x226, 0x226,
+ 0x228, 0x228,
+ 0x22a, 0x22a,
+ 0x22c, 0x22c,
+ 0x22e, 0x22e,
+ 0x230, 0x230,
+ 0x232, 0x232,
+ 0x23a, 0x23b,
+ 0x23d, 0x23e,
+ 0x241, 0x241,
+ 0x243, 0x246,
+ 0x248, 0x248,
+ 0x24a, 0x24a,
+ 0x24c, 0x24c,
+ 0x24e, 0x24e,
+ 0x370, 0x370,
+ 0x372, 0x372,
+ 0x376, 0x376,
+ 0x37f, 0x37f,
+ 0x386, 0x386,
+ 0x388, 0x38a,
+ 0x38c, 0x38c,
+ 0x38e, 0x38f,
+ 0x391, 0x3a1,
+ 0x3a3, 0x3ab,
+ 0x3cf, 0x3cf,
+ 0x3d2, 0x3d4,
+ 0x3d8, 0x3d8,
+ 0x3da, 0x3da,
+ 0x3dc, 0x3dc,
+ 0x3de, 0x3de,
+ 0x3e0, 0x3e0,
+ 0x3e2, 0x3e2,
+ 0x3e4, 0x3e4,
+ 0x3e6, 0x3e6,
+ 0x3e8, 0x3e8,
+ 0x3ea, 0x3ea,
+ 0x3ec, 0x3ec,
+ 0x3ee, 0x3ee,
+ 0x3f4, 0x3f4,
+ 0x3f7, 0x3f7,
+ 0x3f9, 0x3fa,
+ 0x3fd, 0x42f,
+ 0x460, 0x460,
+ 0x462, 0x462,
+ 0x464, 0x464,
+ 0x466, 0x466,
+ 0x468, 0x468,
+ 0x46a, 0x46a,
+ 0x46c, 0x46c,
+ 0x46e, 0x46e,
+ 0x470, 0x470,
+ 0x472, 0x472,
+ 0x474, 0x474,
+ 0x476, 0x476,
+ 0x478, 0x478,
+ 0x47a, 0x47a,
+ 0x47c, 0x47c,
+ 0x47e, 0x47e,
+ 0x480, 0x480,
+ 0x48a, 0x48a,
+ 0x48c, 0x48c,
+ 0x48e, 0x48e,
+ 0x490, 0x490,
+ 0x492, 0x492,
+ 0x494, 0x494,
+ 0x496, 0x496,
+ 0x498, 0x498,
+ 0x49a, 0x49a,
+ 0x49c, 0x49c,
+ 0x49e, 0x49e,
+ 0x4a0, 0x4a0,
+ 0x4a2, 0x4a2,
+ 0x4a4, 0x4a4,
+ 0x4a6, 0x4a6,
+ 0x4a8, 0x4a8,
+ 0x4aa, 0x4aa,
+ 0x4ac, 0x4ac,
+ 0x4ae, 0x4ae,
+ 0x4b0, 0x4b0,
+ 0x4b2, 0x4b2,
+ 0x4b4, 0x4b4,
+ 0x4b6, 0x4b6,
+ 0x4b8, 0x4b8,
+ 0x4ba, 0x4ba,
+ 0x4bc, 0x4bc,
+ 0x4be, 0x4be,
+ 0x4c0, 0x4c1,
+ 0x4c3, 0x4c3,
+ 0x4c5, 0x4c5,
+ 0x4c7, 0x4c7,
+ 0x4c9, 0x4c9,
+ 0x4cb, 0x4cb,
+ 0x4cd, 0x4cd,
+ 0x4d0, 0x4d0,
+ 0x4d2, 0x4d2,
+ 0x4d4, 0x4d4,
+ 0x4d6, 0x4d6,
+ 0x4d8, 0x4d8,
+ 0x4da, 0x4da,
+ 0x4dc, 0x4dc,
+ 0x4de, 0x4de,
+ 0x4e0, 0x4e0,
+ 0x4e2, 0x4e2,
+ 0x4e4, 0x4e4,
+ 0x4e6, 0x4e6,
+ 0x4e8, 0x4e8,
+ 0x4ea, 0x4ea,
+ 0x4ec, 0x4ec,
+ 0x4ee, 0x4ee,
+ 0x4f0, 0x4f0,
+ 0x4f2, 0x4f2,
+ 0x4f4, 0x4f4,
+ 0x4f6, 0x4f6,
+ 0x4f8, 0x4f8,
+ 0x4fa, 0x4fa,
+ 0x4fc, 0x4fc,
+ 0x4fe, 0x4fe,
+ 0x500, 0x500,
+ 0x502, 0x502,
+ 0x504, 0x504,
+ 0x506, 0x506,
+ 0x508, 0x508,
+ 0x50a, 0x50a,
+ 0x50c, 0x50c,
+ 0x50e, 0x50e,
+ 0x510, 0x510,
+ 0x512, 0x512,
+ 0x514, 0x514,
+ 0x516, 0x516,
+ 0x518, 0x518,
+ 0x51a, 0x51a,
+ 0x51c, 0x51c,
+ 0x51e, 0x51e,
+ 0x520, 0x520,
+ 0x522, 0x522,
+ 0x524, 0x524,
+ 0x526, 0x526,
+ 0x528, 0x528,
+ 0x52a, 0x52a,
+ 0x52c, 0x52c,
+ 0x52e, 0x52e,
+ 0x531, 0x556,
+ 0x10a0, 0x10c5,
+ 0x10c7, 0x10c7,
+ 0x10cd, 0x10cd,
+ 0x1e00, 0x1e00,
+ 0x1e02, 0x1e02,
+ 0x1e04, 0x1e04,
+ 0x1e06, 0x1e06,
+ 0x1e08, 0x1e08,
+ 0x1e0a, 0x1e0a,
+ 0x1e0c, 0x1e0c,
+ 0x1e0e, 0x1e0e,
+ 0x1e10, 0x1e10,
+ 0x1e12, 0x1e12,
+ 0x1e14, 0x1e14,
+ 0x1e16, 0x1e16,
+ 0x1e18, 0x1e18,
+ 0x1e1a, 0x1e1a,
+ 0x1e1c, 0x1e1c,
+ 0x1e1e, 0x1e1e,
+ 0x1e20, 0x1e20,
+ 0x1e22, 0x1e22,
+ 0x1e24, 0x1e24,
+ 0x1e26, 0x1e26,
+ 0x1e28, 0x1e28,
+ 0x1e2a, 0x1e2a,
+ 0x1e2c, 0x1e2c,
+ 0x1e2e, 0x1e2e,
+ 0x1e30, 0x1e30,
+ 0x1e32, 0x1e32,
+ 0x1e34, 0x1e34,
+ 0x1e36, 0x1e36,
+ 0x1e38, 0x1e38,
+ 0x1e3a, 0x1e3a,
+ 0x1e3c, 0x1e3c,
+ 0x1e3e, 0x1e3e,
+ 0x1e40, 0x1e40,
+ 0x1e42, 0x1e42,
+ 0x1e44, 0x1e44,
+ 0x1e46, 0x1e46,
+ 0x1e48, 0x1e48,
+ 0x1e4a, 0x1e4a,
+ 0x1e4c, 0x1e4c,
+ 0x1e4e, 0x1e4e,
+ 0x1e50, 0x1e50,
+ 0x1e52, 0x1e52,
+ 0x1e54, 0x1e54,
+ 0x1e56, 0x1e56,
+ 0x1e58, 0x1e58,
+ 0x1e5a, 0x1e5a,
+ 0x1e5c, 0x1e5c,
+ 0x1e5e, 0x1e5e,
+ 0x1e60, 0x1e60,
+ 0x1e62, 0x1e62,
+ 0x1e64, 0x1e64,
+ 0x1e66, 0x1e66,
+ 0x1e68, 0x1e68,
+ 0x1e6a, 0x1e6a,
+ 0x1e6c, 0x1e6c,
+ 0x1e6e, 0x1e6e,
+ 0x1e70, 0x1e70,
+ 0x1e72, 0x1e72,
+ 0x1e74, 0x1e74,
+ 0x1e76, 0x1e76,
+ 0x1e78, 0x1e78,
+ 0x1e7a, 0x1e7a,
+ 0x1e7c, 0x1e7c,
+ 0x1e7e, 0x1e7e,
+ 0x1e80, 0x1e80,
+ 0x1e82, 0x1e82,
+ 0x1e84, 0x1e84,
+ 0x1e86, 0x1e86,
+ 0x1e88, 0x1e88,
+ 0x1e8a, 0x1e8a,
+ 0x1e8c, 0x1e8c,
+ 0x1e8e, 0x1e8e,
+ 0x1e90, 0x1e90,
+ 0x1e92, 0x1e92,
+ 0x1e94, 0x1e94,
+ 0x1e9e, 0x1e9e,
+ 0x1ea0, 0x1ea0,
+ 0x1ea2, 0x1ea2,
+ 0x1ea4, 0x1ea4,
+ 0x1ea6, 0x1ea6,
+ 0x1ea8, 0x1ea8,
+ 0x1eaa, 0x1eaa,
+ 0x1eac, 0x1eac,
+ 0x1eae, 0x1eae,
+ 0x1eb0, 0x1eb0,
+ 0x1eb2, 0x1eb2,
+ 0x1eb4, 0x1eb4,
+ 0x1eb6, 0x1eb6,
+ 0x1eb8, 0x1eb8,
+ 0x1eba, 0x1eba,
+ 0x1ebc, 0x1ebc,
+ 0x1ebe, 0x1ebe,
+ 0x1ec0, 0x1ec0,
+ 0x1ec2, 0x1ec2,
+ 0x1ec4, 0x1ec4,
+ 0x1ec6, 0x1ec6,
+ 0x1ec8, 0x1ec8,
+ 0x1eca, 0x1eca,
+ 0x1ecc, 0x1ecc,
+ 0x1ece, 0x1ece,
+ 0x1ed0, 0x1ed0,
+ 0x1ed2, 0x1ed2,
+ 0x1ed4, 0x1ed4,
+ 0x1ed6, 0x1ed6,
+ 0x1ed8, 0x1ed8,
+ 0x1eda, 0x1eda,
+ 0x1edc, 0x1edc,
+ 0x1ede, 0x1ede,
+ 0x1ee0, 0x1ee0,
+ 0x1ee2, 0x1ee2,
+ 0x1ee4, 0x1ee4,
+ 0x1ee6, 0x1ee6,
+ 0x1ee8, 0x1ee8,
+ 0x1eea, 0x1eea,
+ 0x1eec, 0x1eec,
+ 0x1eee, 0x1eee,
+ 0x1ef0, 0x1ef0,
+ 0x1ef2, 0x1ef2,
+ 0x1ef4, 0x1ef4,
+ 0x1ef6, 0x1ef6,
+ 0x1ef8, 0x1ef8,
+ 0x1efa, 0x1efa,
+ 0x1efc, 0x1efc,
+ 0x1efe, 0x1efe,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f5f,
+ 0x1f68, 0x1f6f,
+ 0x1fb8, 0x1fbb,
+ 0x1fc8, 0x1fcb,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210b, 0x210d,
+ 0x2110, 0x2112,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x2130, 0x2133,
+ 0x213e, 0x213f,
+ 0x2145, 0x2145,
+ 0x2183, 0x2183,
+ 0x2c00, 0x2c2e,
+ 0x2c60, 0x2c60,
+ 0x2c62, 0x2c64,
+ 0x2c67, 0x2c67,
+ 0x2c69, 0x2c69,
+ 0x2c6b, 0x2c6b,
+ 0x2c6d, 0x2c70,
+ 0x2c72, 0x2c72,
+ 0x2c75, 0x2c75,
+ 0x2c7e, 0x2c80,
+ 0x2c82, 0x2c82,
+ 0x2c84, 0x2c84,
+ 0x2c86, 0x2c86,
+ 0x2c88, 0x2c88,
+ 0x2c8a, 0x2c8a,
+ 0x2c8c, 0x2c8c,
+ 0x2c8e, 0x2c8e,
+ 0x2c90, 0x2c90,
+ 0x2c92, 0x2c92,
+ 0x2c94, 0x2c94,
+ 0x2c96, 0x2c96,
+ 0x2c98, 0x2c98,
+ 0x2c9a, 0x2c9a,
+ 0x2c9c, 0x2c9c,
+ 0x2c9e, 0x2c9e,
+ 0x2ca0, 0x2ca0,
+ 0x2ca2, 0x2ca2,
+ 0x2ca4, 0x2ca4,
+ 0x2ca6, 0x2ca6,
+ 0x2ca8, 0x2ca8,
+ 0x2caa, 0x2caa,
+ 0x2cac, 0x2cac,
+ 0x2cae, 0x2cae,
+ 0x2cb0, 0x2cb0,
+ 0x2cb2, 0x2cb2,
+ 0x2cb4, 0x2cb4,
+ 0x2cb6, 0x2cb6,
+ 0x2cb8, 0x2cb8,
+ 0x2cba, 0x2cba,
+ 0x2cbc, 0x2cbc,
+ 0x2cbe, 0x2cbe,
+ 0x2cc0, 0x2cc0,
+ 0x2cc2, 0x2cc2,
+ 0x2cc4, 0x2cc4,
+ 0x2cc6, 0x2cc6,
+ 0x2cc8, 0x2cc8,
+ 0x2cca, 0x2cca,
+ 0x2ccc, 0x2ccc,
+ 0x2cce, 0x2cce,
+ 0x2cd0, 0x2cd0,
+ 0x2cd2, 0x2cd2,
+ 0x2cd4, 0x2cd4,
+ 0x2cd6, 0x2cd6,
+ 0x2cd8, 0x2cd8,
+ 0x2cda, 0x2cda,
+ 0x2cdc, 0x2cdc,
+ 0x2cde, 0x2cde,
+ 0x2ce0, 0x2ce0,
+ 0x2ce2, 0x2ce2,
+ 0x2ceb, 0x2ceb,
+ 0x2ced, 0x2ced,
+ 0x2cf2, 0x2cf2,
+ 0xa640, 0xa640,
+ 0xa642, 0xa642,
+ 0xa644, 0xa644,
+ 0xa646, 0xa646,
+ 0xa648, 0xa648,
+ 0xa64a, 0xa64a,
+ 0xa64c, 0xa64c,
+ 0xa64e, 0xa64e,
+ 0xa650, 0xa650,
+ 0xa652, 0xa652,
+ 0xa654, 0xa654,
+ 0xa656, 0xa656,
+ 0xa658, 0xa658,
+ 0xa65a, 0xa65a,
+ 0xa65c, 0xa65c,
+ 0xa65e, 0xa65e,
+ 0xa660, 0xa660,
+ 0xa662, 0xa662,
+ 0xa664, 0xa664,
+ 0xa666, 0xa666,
+ 0xa668, 0xa668,
+ 0xa66a, 0xa66a,
+ 0xa66c, 0xa66c,
+ 0xa680, 0xa680,
+ 0xa682, 0xa682,
+ 0xa684, 0xa684,
+ 0xa686, 0xa686,
+ 0xa688, 0xa688,
+ 0xa68a, 0xa68a,
+ 0xa68c, 0xa68c,
+ 0xa68e, 0xa68e,
+ 0xa690, 0xa690,
+ 0xa692, 0xa692,
+ 0xa694, 0xa694,
+ 0xa696, 0xa696,
+ 0xa698, 0xa698,
+ 0xa69a, 0xa69a,
+ 0xa722, 0xa722,
+ 0xa724, 0xa724,
+ 0xa726, 0xa726,
+ 0xa728, 0xa728,
+ 0xa72a, 0xa72a,
+ 0xa72c, 0xa72c,
+ 0xa72e, 0xa72e,
+ 0xa732, 0xa732,
+ 0xa734, 0xa734,
+ 0xa736, 0xa736,
+ 0xa738, 0xa738,
+ 0xa73a, 0xa73a,
+ 0xa73c, 0xa73c,
+ 0xa73e, 0xa73e,
+ 0xa740, 0xa740,
+ 0xa742, 0xa742,
+ 0xa744, 0xa744,
+ 0xa746, 0xa746,
+ 0xa748, 0xa748,
+ 0xa74a, 0xa74a,
+ 0xa74c, 0xa74c,
+ 0xa74e, 0xa74e,
+ 0xa750, 0xa750,
+ 0xa752, 0xa752,
+ 0xa754, 0xa754,
+ 0xa756, 0xa756,
+ 0xa758, 0xa758,
+ 0xa75a, 0xa75a,
+ 0xa75c, 0xa75c,
+ 0xa75e, 0xa75e,
+ 0xa760, 0xa760,
+ 0xa762, 0xa762,
+ 0xa764, 0xa764,
+ 0xa766, 0xa766,
+ 0xa768, 0xa768,
+ 0xa76a, 0xa76a,
+ 0xa76c, 0xa76c,
+ 0xa76e, 0xa76e,
+ 0xa779, 0xa779,
+ 0xa77b, 0xa77b,
+ 0xa77d, 0xa77e,
+ 0xa780, 0xa780,
+ 0xa782, 0xa782,
+ 0xa784, 0xa784,
+ 0xa786, 0xa786,
+ 0xa78b, 0xa78b,
+ 0xa78d, 0xa78d,
+ 0xa790, 0xa790,
+ 0xa792, 0xa792,
+ 0xa796, 0xa796,
+ 0xa798, 0xa798,
+ 0xa79a, 0xa79a,
+ 0xa79c, 0xa79c,
+ 0xa79e, 0xa79e,
+ 0xa7a0, 0xa7a0,
+ 0xa7a2, 0xa7a2,
+ 0xa7a4, 0xa7a4,
+ 0xa7a6, 0xa7a6,
+ 0xa7a8, 0xa7a8,
+ 0xa7aa, 0xa7ad,
+ 0xa7b0, 0xa7b1,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+ 0x118a0, 0x118bf,
+ 0x1d400, 0x1d419,
+ 0x1d434, 0x1d44d,
+ 0x1d468, 0x1d481,
+ 0x1d49c, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b5,
+ 0x1d4d0, 0x1d4e9,
+ 0x1d504, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d538, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d56c, 0x1d585,
+ 0x1d5a0, 0x1d5b9,
+ 0x1d5d4, 0x1d5ed,
+ 0x1d608, 0x1d621,
+ 0x1d63c, 0x1d655,
+ 0x1d670, 0x1d689,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6e2, 0x1d6fa,
+ 0x1d71c, 0x1d734,
+ 0x1d756, 0x1d76e,
+ 0x1d790, 0x1d7a8,
+ 0x1d7ca, 0x1d7ca
+};
+UCP_FN(Lu)
+
+static const unichar ucp_M_def[] = {
+ 0x300, 0x36f,
+ 0x483, 0x489,
+ 0x591, 0x5bd,
+ 0x5bf, 0x5bf,
+ 0x5c1, 0x5c2,
+ 0x5c4, 0x5c5,
+ 0x5c7, 0x5c7,
+ 0x610, 0x61a,
+ 0x64b, 0x65f,
+ 0x670, 0x670,
+ 0x6d6, 0x6dc,
+ 0x6df, 0x6e4,
+ 0x6e7, 0x6e8,
+ 0x6ea, 0x6ed,
+ 0x711, 0x711,
+ 0x730, 0x74a,
+ 0x7a6, 0x7b0,
+ 0x7eb, 0x7f3,
+ 0x816, 0x819,
+ 0x81b, 0x823,
+ 0x825, 0x827,
+ 0x829, 0x82d,
+ 0x859, 0x85b,
+ 0x8e4, 0x903,
+ 0x93a, 0x93c,
+ 0x93e, 0x94f,
+ 0x951, 0x957,
+ 0x962, 0x963,
+ 0x981, 0x983,
+ 0x9bc, 0x9bc,
+ 0x9be, 0x9c4,
+ 0x9c7, 0x9c8,
+ 0x9cb, 0x9cd,
+ 0x9d7, 0x9d7,
+ 0x9e2, 0x9e3,
+ 0xa01, 0xa03,
+ 0xa3c, 0xa3c,
+ 0xa3e, 0xa42,
+ 0xa47, 0xa48,
+ 0xa4b, 0xa4d,
+ 0xa51, 0xa51,
+ 0xa70, 0xa71,
+ 0xa75, 0xa75,
+ 0xa81, 0xa83,
+ 0xabc, 0xabc,
+ 0xabe, 0xac5,
+ 0xac7, 0xac9,
+ 0xacb, 0xacd,
+ 0xae2, 0xae3,
+ 0xb01, 0xb03,
+ 0xb3c, 0xb3c,
+ 0xb3e, 0xb44,
+ 0xb47, 0xb48,
+ 0xb4b, 0xb4d,
+ 0xb56, 0xb57,
+ 0xb62, 0xb63,
+ 0xb82, 0xb82,
+ 0xbbe, 0xbc2,
+ 0xbc6, 0xbc8,
+ 0xbca, 0xbcd,
+ 0xbd7, 0xbd7,
+ 0xc00, 0xc03,
+ 0xc3e, 0xc44,
+ 0xc46, 0xc48,
+ 0xc4a, 0xc4d,
+ 0xc55, 0xc56,
+ 0xc62, 0xc63,
+ 0xc81, 0xc83,
+ 0xcbc, 0xcbc,
+ 0xcbe, 0xcc4,
+ 0xcc6, 0xcc8,
+ 0xcca, 0xccd,
+ 0xcd5, 0xcd6,
+ 0xce2, 0xce3,
+ 0xd01, 0xd03,
+ 0xd3e, 0xd44,
+ 0xd46, 0xd48,
+ 0xd4a, 0xd4d,
+ 0xd57, 0xd57,
+ 0xd62, 0xd63,
+ 0xd82, 0xd83,
+ 0xdca, 0xdca,
+ 0xdcf, 0xdd4,
+ 0xdd6, 0xdd6,
+ 0xdd8, 0xddf,
+ 0xdf2, 0xdf3,
+ 0xe31, 0xe31,
+ 0xe34, 0xe3a,
+ 0xe47, 0xe4e,
+ 0xeb1, 0xeb1,
+ 0xeb4, 0xeb9,
+ 0xebb, 0xebc,
+ 0xec8, 0xecd,
+ 0xf18, 0xf19,
+ 0xf35, 0xf35,
+ 0xf37, 0xf37,
+ 0xf39, 0xf39,
+ 0xf3e, 0xf3f,
+ 0xf71, 0xf84,
+ 0xf86, 0xf87,
+ 0xf8d, 0xf97,
+ 0xf99, 0xfbc,
+ 0xfc6, 0xfc6,
+ 0x102b, 0x103e,
+ 0x1056, 0x1059,
+ 0x105e, 0x1060,
+ 0x1062, 0x1064,
+ 0x1067, 0x106d,
+ 0x1071, 0x1074,
+ 0x1082, 0x108d,
+ 0x108f, 0x108f,
+ 0x109a, 0x109d,
+ 0x135d, 0x135f,
+ 0x1712, 0x1714,
+ 0x1732, 0x1734,
+ 0x1752, 0x1753,
+ 0x1772, 0x1773,
+ 0x17b4, 0x17d3,
+ 0x17dd, 0x17dd,
+ 0x180b, 0x180d,
+ 0x18a9, 0x18a9,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x19b0, 0x19c0,
+ 0x19c8, 0x19c9,
+ 0x1a17, 0x1a1b,
+ 0x1a55, 0x1a5e,
+ 0x1a60, 0x1a7c,
+ 0x1a7f, 0x1a7f,
+ 0x1ab0, 0x1abe,
+ 0x1b00, 0x1b04,
+ 0x1b34, 0x1b44,
+ 0x1b6b, 0x1b73,
+ 0x1b80, 0x1b82,
+ 0x1ba1, 0x1bad,
+ 0x1be6, 0x1bf3,
+ 0x1c24, 0x1c37,
+ 0x1cd0, 0x1cd2,
+ 0x1cd4, 0x1ce8,
+ 0x1ced, 0x1ced,
+ 0x1cf2, 0x1cf4,
+ 0x1cf8, 0x1cf9,
+ 0x1dc0, 0x1df5,
+ 0x1dfc, 0x1dff,
+ 0x20d0, 0x20f0,
+ 0x2cef, 0x2cf1,
+ 0x2d7f, 0x2d7f,
+ 0x2de0, 0x2dff,
+ 0x302a, 0x302f,
+ 0x3099, 0x309a,
+ 0xa66f, 0xa672,
+ 0xa674, 0xa67d,
+ 0xa69f, 0xa69f,
+ 0xa6f0, 0xa6f1,
+ 0xa802, 0xa802,
+ 0xa806, 0xa806,
+ 0xa80b, 0xa80b,
+ 0xa823, 0xa827,
+ 0xa880, 0xa881,
+ 0xa8b4, 0xa8c4,
+ 0xa8e0, 0xa8f1,
+ 0xa926, 0xa92d,
+ 0xa947, 0xa953,
+ 0xa980, 0xa983,
+ 0xa9b3, 0xa9c0,
+ 0xa9e5, 0xa9e5,
+ 0xaa29, 0xaa36,
+ 0xaa43, 0xaa43,
+ 0xaa4c, 0xaa4d,
+ 0xaa7b, 0xaa7d,
+ 0xaab0, 0xaab0,
+ 0xaab2, 0xaab4,
+ 0xaab7, 0xaab8,
+ 0xaabe, 0xaabf,
+ 0xaac1, 0xaac1,
+ 0xaaeb, 0xaaef,
+ 0xaaf5, 0xaaf6,
+ 0xabe3, 0xabea,
+ 0xabec, 0xabed,
+ 0xfb1e, 0xfb1e,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe2d,
+ 0x101fd, 0x101fd,
+ 0x102e0, 0x102e0,
+ 0x10376, 0x1037a,
+ 0x10a01, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a0f,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a3f,
+ 0x10ae5, 0x10ae6,
+ 0x11000, 0x11002,
+ 0x11038, 0x11046,
+ 0x1107f, 0x11082,
+ 0x110b0, 0x110ba,
+ 0x11100, 0x11102,
+ 0x11127, 0x11134,
+ 0x11173, 0x11173,
+ 0x11180, 0x11182,
+ 0x111b3, 0x111c0,
+ 0x1122c, 0x11237,
+ 0x112df, 0x112ea,
+ 0x11301, 0x11303,
+ 0x1133c, 0x1133c,
+ 0x1133e, 0x11344,
+ 0x11347, 0x11348,
+ 0x1134b, 0x1134d,
+ 0x11357, 0x11357,
+ 0x11362, 0x11363,
+ 0x11366, 0x1136c,
+ 0x11370, 0x11374,
+ 0x114b0, 0x114c3,
+ 0x115af, 0x115b5,
+ 0x115b8, 0x115c0,
+ 0x11630, 0x11640,
+ 0x116ab, 0x116b7,
+ 0x16af0, 0x16af4,
+ 0x16b30, 0x16b36,
+ 0x16f51, 0x16f7e,
+ 0x16f8f, 0x16f92,
+ 0x1bc9d, 0x1bc9e,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0x1e8d0, 0x1e8d6,
+ 0xe0100, 0xe01ef
+};
+UCP_FN(M)
+
+static const unichar ucp_Mc_def[] = {
+ 0x903, 0x903,
+ 0x93b, 0x93b,
+ 0x93e, 0x940,
+ 0x949, 0x94c,
+ 0x94e, 0x94f,
+ 0x982, 0x983,
+ 0x9be, 0x9c0,
+ 0x9c7, 0x9c8,
+ 0x9cb, 0x9cc,
+ 0x9d7, 0x9d7,
+ 0xa03, 0xa03,
+ 0xa3e, 0xa40,
+ 0xa83, 0xa83,
+ 0xabe, 0xac0,
+ 0xac9, 0xac9,
+ 0xacb, 0xacc,
+ 0xb02, 0xb03,
+ 0xb3e, 0xb3e,
+ 0xb40, 0xb40,
+ 0xb47, 0xb48,
+ 0xb4b, 0xb4c,
+ 0xb57, 0xb57,
+ 0xbbe, 0xbbf,
+ 0xbc1, 0xbc2,
+ 0xbc6, 0xbc8,
+ 0xbca, 0xbcc,
+ 0xbd7, 0xbd7,
+ 0xc01, 0xc03,
+ 0xc41, 0xc44,
+ 0xc82, 0xc83,
+ 0xcbe, 0xcbe,
+ 0xcc0, 0xcc4,
+ 0xcc7, 0xcc8,
+ 0xcca, 0xccb,
+ 0xcd5, 0xcd6,
+ 0xd02, 0xd03,
+ 0xd3e, 0xd40,
+ 0xd46, 0xd48,
+ 0xd4a, 0xd4c,
+ 0xd57, 0xd57,
+ 0xd82, 0xd83,
+ 0xdcf, 0xdd1,
+ 0xdd8, 0xddf,
+ 0xdf2, 0xdf3,
+ 0xf3e, 0xf3f,
+ 0xf7f, 0xf7f,
+ 0x102b, 0x102c,
+ 0x1031, 0x1031,
+ 0x1038, 0x1038,
+ 0x103b, 0x103c,
+ 0x1056, 0x1057,
+ 0x1062, 0x1064,
+ 0x1067, 0x106d,
+ 0x1083, 0x1084,
+ 0x1087, 0x108c,
+ 0x108f, 0x108f,
+ 0x109a, 0x109c,
+ 0x17b6, 0x17b6,
+ 0x17be, 0x17c5,
+ 0x17c7, 0x17c8,
+ 0x1923, 0x1926,
+ 0x1929, 0x192b,
+ 0x1930, 0x1931,
+ 0x1933, 0x1938,
+ 0x19b0, 0x19c0,
+ 0x19c8, 0x19c9,
+ 0x1a19, 0x1a1a,
+ 0x1a55, 0x1a55,
+ 0x1a57, 0x1a57,
+ 0x1a61, 0x1a61,
+ 0x1a63, 0x1a64,
+ 0x1a6d, 0x1a72,
+ 0x1b04, 0x1b04,
+ 0x1b35, 0x1b35,
+ 0x1b3b, 0x1b3b,
+ 0x1b3d, 0x1b41,
+ 0x1b43, 0x1b44,
+ 0x1b82, 0x1b82,
+ 0x1ba1, 0x1ba1,
+ 0x1ba6, 0x1ba7,
+ 0x1baa, 0x1baa,
+ 0x1be7, 0x1be7,
+ 0x1bea, 0x1bec,
+ 0x1bee, 0x1bee,
+ 0x1bf2, 0x1bf3,
+ 0x1c24, 0x1c2b,
+ 0x1c34, 0x1c35,
+ 0x1ce1, 0x1ce1,
+ 0x1cf2, 0x1cf3,
+ 0x302e, 0x302f,
+ 0xa823, 0xa824,
+ 0xa827, 0xa827,
+ 0xa880, 0xa881,
+ 0xa8b4, 0xa8c3,
+ 0xa952, 0xa953,
+ 0xa983, 0xa983,
+ 0xa9b4, 0xa9b5,
+ 0xa9ba, 0xa9bb,
+ 0xa9bd, 0xa9c0,
+ 0xaa2f, 0xaa30,
+ 0xaa33, 0xaa34,
+ 0xaa4d, 0xaa4d,
+ 0xaa7b, 0xaa7b,
+ 0xaa7d, 0xaa7d,
+ 0xaaeb, 0xaaeb,
+ 0xaaee, 0xaaef,
+ 0xaaf5, 0xaaf5,
+ 0xabe3, 0xabe4,
+ 0xabe6, 0xabe7,
+ 0xabe9, 0xabea,
+ 0xabec, 0xabec,
+ 0x11000, 0x11000,
+ 0x11002, 0x11002,
+ 0x11082, 0x11082,
+ 0x110b0, 0x110b2,
+ 0x110b7, 0x110b8,
+ 0x1112c, 0x1112c,
+ 0x11182, 0x11182,
+ 0x111b3, 0x111b5,
+ 0x111bf, 0x111c0,
+ 0x1122c, 0x1122e,
+ 0x11232, 0x11233,
+ 0x11235, 0x11235,
+ 0x112e0, 0x112e2,
+ 0x11302, 0x11303,
+ 0x1133e, 0x1133f,
+ 0x11341, 0x11344,
+ 0x11347, 0x11348,
+ 0x1134b, 0x1134d,
+ 0x11357, 0x11357,
+ 0x11362, 0x11363,
+ 0x114b0, 0x114b2,
+ 0x114b9, 0x114b9,
+ 0x114bb, 0x114be,
+ 0x114c1, 0x114c1,
+ 0x115af, 0x115b1,
+ 0x115b8, 0x115bb,
+ 0x115be, 0x115be,
+ 0x11630, 0x11632,
+ 0x1163b, 0x1163c,
+ 0x1163e, 0x1163e,
+ 0x116ac, 0x116ac,
+ 0x116ae, 0x116af,
+ 0x116b6, 0x116b6,
+ 0x16f51, 0x16f7e,
+ 0x1d165, 0x1d166,
+ 0x1d16d, 0x1d172
+};
+UCP_FN(Mc)
+
+static const unichar ucp_Me_def[] = {
+ 0x488, 0x489,
+ 0x1abe, 0x1abe,
+ 0x20dd, 0x20e0,
+ 0x20e2, 0x20e4,
+ 0xa670, 0xa672
+};
+UCP_FN(Me)
+
+static const unichar ucp_Mn_def[] = {
+ 0x300, 0x36f,
+ 0x483, 0x487,
+ 0x591, 0x5bd,
+ 0x5bf, 0x5bf,
+ 0x5c1, 0x5c2,
+ 0x5c4, 0x5c5,
+ 0x5c7, 0x5c7,
+ 0x610, 0x61a,
+ 0x64b, 0x65f,
+ 0x670, 0x670,
+ 0x6d6, 0x6dc,
+ 0x6df, 0x6e4,
+ 0x6e7, 0x6e8,
+ 0x6ea, 0x6ed,
+ 0x711, 0x711,
+ 0x730, 0x74a,
+ 0x7a6, 0x7b0,
+ 0x7eb, 0x7f3,
+ 0x816, 0x819,
+ 0x81b, 0x823,
+ 0x825, 0x827,
+ 0x829, 0x82d,
+ 0x859, 0x85b,
+ 0x8e4, 0x902,
+ 0x93a, 0x93a,
+ 0x93c, 0x93c,
+ 0x941, 0x948,
+ 0x94d, 0x94d,
+ 0x951, 0x957,
+ 0x962, 0x963,
+ 0x981, 0x981,
+ 0x9bc, 0x9bc,
+ 0x9c1, 0x9c4,
+ 0x9cd, 0x9cd,
+ 0x9e2, 0x9e3,
+ 0xa01, 0xa02,
+ 0xa3c, 0xa3c,
+ 0xa41, 0xa42,
+ 0xa47, 0xa48,
+ 0xa4b, 0xa4d,
+ 0xa51, 0xa51,
+ 0xa70, 0xa71,
+ 0xa75, 0xa75,
+ 0xa81, 0xa82,
+ 0xabc, 0xabc,
+ 0xac1, 0xac5,
+ 0xac7, 0xac8,
+ 0xacd, 0xacd,
+ 0xae2, 0xae3,
+ 0xb01, 0xb01,
+ 0xb3c, 0xb3c,
+ 0xb3f, 0xb3f,
+ 0xb41, 0xb44,
+ 0xb4d, 0xb4d,
+ 0xb56, 0xb56,
+ 0xb62, 0xb63,
+ 0xb82, 0xb82,
+ 0xbc0, 0xbc0,
+ 0xbcd, 0xbcd,
+ 0xc00, 0xc00,
+ 0xc3e, 0xc40,
+ 0xc46, 0xc48,
+ 0xc4a, 0xc4d,
+ 0xc55, 0xc56,
+ 0xc62, 0xc63,
+ 0xc81, 0xc81,
+ 0xcbc, 0xcbc,
+ 0xcbf, 0xcbf,
+ 0xcc6, 0xcc6,
+ 0xccc, 0xccd,
+ 0xce2, 0xce3,
+ 0xd01, 0xd01,
+ 0xd41, 0xd44,
+ 0xd4d, 0xd4d,
+ 0xd62, 0xd63,
+ 0xdca, 0xdca,
+ 0xdd2, 0xdd4,
+ 0xdd6, 0xdd6,
+ 0xe31, 0xe31,
+ 0xe34, 0xe3a,
+ 0xe47, 0xe4e,
+ 0xeb1, 0xeb1,
+ 0xeb4, 0xeb9,
+ 0xebb, 0xebc,
+ 0xec8, 0xecd,
+ 0xf18, 0xf19,
+ 0xf35, 0xf35,
+ 0xf37, 0xf37,
+ 0xf39, 0xf39,
+ 0xf71, 0xf7e,
+ 0xf80, 0xf84,
+ 0xf86, 0xf87,
+ 0xf8d, 0xf97,
+ 0xf99, 0xfbc,
+ 0xfc6, 0xfc6,
+ 0x102d, 0x1030,
+ 0x1032, 0x1037,
+ 0x1039, 0x103a,
+ 0x103d, 0x103e,
+ 0x1058, 0x1059,
+ 0x105e, 0x1060,
+ 0x1071, 0x1074,
+ 0x1082, 0x1082,
+ 0x1085, 0x1086,
+ 0x108d, 0x108d,
+ 0x109d, 0x109d,
+ 0x135d, 0x135f,
+ 0x1712, 0x1714,
+ 0x1732, 0x1734,
+ 0x1752, 0x1753,
+ 0x1772, 0x1773,
+ 0x17b4, 0x17b5,
+ 0x17b7, 0x17bd,
+ 0x17c6, 0x17c6,
+ 0x17c9, 0x17d3,
+ 0x17dd, 0x17dd,
+ 0x180b, 0x180d,
+ 0x18a9, 0x18a9,
+ 0x1920, 0x1922,
+ 0x1927, 0x1928,
+ 0x1932, 0x1932,
+ 0x1939, 0x193b,
+ 0x1a17, 0x1a18,
+ 0x1a1b, 0x1a1b,
+ 0x1a56, 0x1a56,
+ 0x1a58, 0x1a5e,
+ 0x1a60, 0x1a60,
+ 0x1a62, 0x1a62,
+ 0x1a65, 0x1a6c,
+ 0x1a73, 0x1a7c,
+ 0x1a7f, 0x1a7f,
+ 0x1ab0, 0x1abd,
+ 0x1b00, 0x1b03,
+ 0x1b34, 0x1b34,
+ 0x1b36, 0x1b3a,
+ 0x1b3c, 0x1b3c,
+ 0x1b42, 0x1b42,
+ 0x1b6b, 0x1b73,
+ 0x1b80, 0x1b81,
+ 0x1ba2, 0x1ba5,
+ 0x1ba8, 0x1ba9,
+ 0x1bab, 0x1bad,
+ 0x1be6, 0x1be6,
+ 0x1be8, 0x1be9,
+ 0x1bed, 0x1bed,
+ 0x1bef, 0x1bf1,
+ 0x1c2c, 0x1c33,
+ 0x1c36, 0x1c37,
+ 0x1cd0, 0x1cd2,
+ 0x1cd4, 0x1ce0,
+ 0x1ce2, 0x1ce8,
+ 0x1ced, 0x1ced,
+ 0x1cf4, 0x1cf4,
+ 0x1cf8, 0x1cf9,
+ 0x1dc0, 0x1df5,
+ 0x1dfc, 0x1dff,
+ 0x20d0, 0x20dc,
+ 0x20e1, 0x20e1,
+ 0x20e5, 0x20f0,
+ 0x2cef, 0x2cf1,
+ 0x2d7f, 0x2d7f,
+ 0x2de0, 0x2dff,
+ 0x302a, 0x302d,
+ 0x3099, 0x309a,
+ 0xa66f, 0xa66f,
+ 0xa674, 0xa67d,
+ 0xa69f, 0xa69f,
+ 0xa6f0, 0xa6f1,
+ 0xa802, 0xa802,
+ 0xa806, 0xa806,
+ 0xa80b, 0xa80b,
+ 0xa825, 0xa826,
+ 0xa8c4, 0xa8c4,
+ 0xa8e0, 0xa8f1,
+ 0xa926, 0xa92d,
+ 0xa947, 0xa951,
+ 0xa980, 0xa982,
+ 0xa9b3, 0xa9b3,
+ 0xa9b6, 0xa9b9,
+ 0xa9bc, 0xa9bc,
+ 0xa9e5, 0xa9e5,
+ 0xaa29, 0xaa2e,
+ 0xaa31, 0xaa32,
+ 0xaa35, 0xaa36,
+ 0xaa43, 0xaa43,
+ 0xaa4c, 0xaa4c,
+ 0xaa7c, 0xaa7c,
+ 0xaab0, 0xaab0,
+ 0xaab2, 0xaab4,
+ 0xaab7, 0xaab8,
+ 0xaabe, 0xaabf,
+ 0xaac1, 0xaac1,
+ 0xaaec, 0xaaed,
+ 0xaaf6, 0xaaf6,
+ 0xabe5, 0xabe5,
+ 0xabe8, 0xabe8,
+ 0xabed, 0xabed,
+ 0xfb1e, 0xfb1e,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe2d,
+ 0x101fd, 0x101fd,
+ 0x102e0, 0x102e0,
+ 0x10376, 0x1037a,
+ 0x10a01, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a0f,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a3f,
+ 0x10ae5, 0x10ae6,
+ 0x11001, 0x11001,
+ 0x11038, 0x11046,
+ 0x1107f, 0x11081,
+ 0x110b3, 0x110b6,
+ 0x110b9, 0x110ba,
+ 0x11100, 0x11102,
+ 0x11127, 0x1112b,
+ 0x1112d, 0x11134,
+ 0x11173, 0x11173,
+ 0x11180, 0x11181,
+ 0x111b6, 0x111be,
+ 0x1122f, 0x11231,
+ 0x11234, 0x11234,
+ 0x11236, 0x11237,
+ 0x112df, 0x112df,
+ 0x112e3, 0x112ea,
+ 0x11301, 0x11301,
+ 0x1133c, 0x1133c,
+ 0x11340, 0x11340,
+ 0x11366, 0x1136c,
+ 0x11370, 0x11374,
+ 0x114b3, 0x114b8,
+ 0x114ba, 0x114ba,
+ 0x114bf, 0x114c0,
+ 0x114c2, 0x114c3,
+ 0x115b2, 0x115b5,
+ 0x115bc, 0x115bd,
+ 0x115bf, 0x115c0,
+ 0x11633, 0x1163a,
+ 0x1163d, 0x1163d,
+ 0x1163f, 0x11640,
+ 0x116ab, 0x116ab,
+ 0x116ad, 0x116ad,
+ 0x116b0, 0x116b5,
+ 0x116b7, 0x116b7,
+ 0x16af0, 0x16af4,
+ 0x16b30, 0x16b36,
+ 0x16f8f, 0x16f92,
+ 0x1bc9d, 0x1bc9e,
+ 0x1d167, 0x1d169,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0x1e8d0, 0x1e8d6,
+ 0xe0100, 0xe01ef
+};
+UCP_FN(Mn)
+
+static const unichar ucp_N_def[] = {
+ 0x30, 0x39,
+ 0xb2, 0xb3,
+ 0xb9, 0xb9,
+ 0xbc, 0xbe,
+ 0x660, 0x669,
+ 0x6f0, 0x6f9,
+ 0x7c0, 0x7c9,
+ 0x966, 0x96f,
+ 0x9e6, 0x9ef,
+ 0x9f4, 0x9f9,
+ 0xa66, 0xa6f,
+ 0xae6, 0xaef,
+ 0xb66, 0xb6f,
+ 0xb72, 0xb77,
+ 0xbe6, 0xbf2,
+ 0xc66, 0xc6f,
+ 0xc78, 0xc7e,
+ 0xce6, 0xcef,
+ 0xd66, 0xd75,
+ 0xde6, 0xdef,
+ 0xe50, 0xe59,
+ 0xed0, 0xed9,
+ 0xf20, 0xf33,
+ 0x1040, 0x1049,
+ 0x1090, 0x1099,
+ 0x1369, 0x137c,
+ 0x16ee, 0x16f0,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0x19d0, 0x19da,
+ 0x1a80, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1b50, 0x1b59,
+ 0x1bb0, 0x1bb9,
+ 0x1c40, 0x1c49,
+ 0x1c50, 0x1c59,
+ 0x2070, 0x2070,
+ 0x2074, 0x2079,
+ 0x2080, 0x2089,
+ 0x2150, 0x2182,
+ 0x2185, 0x2189,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x2cfd, 0x2cfd,
+ 0x3007, 0x3007,
+ 0x3021, 0x3029,
+ 0x3038, 0x303a,
+ 0x3192, 0x3195,
+ 0x3220, 0x3229,
+ 0x3248, 0x324f,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0xa620, 0xa629,
+ 0xa6e6, 0xa6ef,
+ 0xa830, 0xa835,
+ 0xa8d0, 0xa8d9,
+ 0xa900, 0xa909,
+ 0xa9d0, 0xa9d9,
+ 0xa9f0, 0xa9f9,
+ 0xaa50, 0xaa59,
+ 0xabf0, 0xabf9,
+ 0xff10, 0xff19,
+ 0x10107, 0x10133,
+ 0x10140, 0x10178,
+ 0x1018a, 0x1018b,
+ 0x102e1, 0x102fb,
+ 0x10320, 0x10323,
+ 0x10341, 0x10341,
+ 0x1034a, 0x1034a,
+ 0x103d1, 0x103d5,
+ 0x104a0, 0x104a9,
+ 0x10858, 0x1085f,
+ 0x10879, 0x1087f,
+ 0x108a7, 0x108af,
+ 0x10916, 0x1091b,
+ 0x10a40, 0x10a47,
+ 0x10a7d, 0x10a7e,
+ 0x10a9d, 0x10a9f,
+ 0x10aeb, 0x10aef,
+ 0x10b58, 0x10b5f,
+ 0x10b78, 0x10b7f,
+ 0x10ba9, 0x10baf,
+ 0x10e60, 0x10e7e,
+ 0x11052, 0x1106f,
+ 0x110f0, 0x110f9,
+ 0x11136, 0x1113f,
+ 0x111d0, 0x111d9,
+ 0x111e1, 0x111f4,
+ 0x112f0, 0x112f9,
+ 0x114d0, 0x114d9,
+ 0x11650, 0x11659,
+ 0x116c0, 0x116c9,
+ 0x118e0, 0x118f2,
+ 0x12400, 0x1246e,
+ 0x16a60, 0x16a69,
+ 0x16b50, 0x16b59,
+ 0x16b5b, 0x16b61,
+ 0x1d360, 0x1d371,
+ 0x1d7ce, 0x1d7ff,
+ 0x1e8c7, 0x1e8cf,
+ 0x1f100, 0x1f10c
+};
+UCP_FN(N)
+
+static const unichar ucp_Nd_def[] = {
+ 0x30, 0x39,
+ 0x660, 0x669,
+ 0x6f0, 0x6f9,
+ 0x7c0, 0x7c9,
+ 0x966, 0x96f,
+ 0x9e6, 0x9ef,
+ 0xa66, 0xa6f,
+ 0xae6, 0xaef,
+ 0xb66, 0xb6f,
+ 0xbe6, 0xbef,
+ 0xc66, 0xc6f,
+ 0xce6, 0xcef,
+ 0xd66, 0xd6f,
+ 0xde6, 0xdef,
+ 0xe50, 0xe59,
+ 0xed0, 0xed9,
+ 0xf20, 0xf29,
+ 0x1040, 0x1049,
+ 0x1090, 0x1099,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0x19d0, 0x19d9,
+ 0x1a80, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1b50, 0x1b59,
+ 0x1bb0, 0x1bb9,
+ 0x1c40, 0x1c49,
+ 0x1c50, 0x1c59,
+ 0xa620, 0xa629,
+ 0xa8d0, 0xa8d9,
+ 0xa900, 0xa909,
+ 0xa9d0, 0xa9d9,
+ 0xa9f0, 0xa9f9,
+ 0xaa50, 0xaa59,
+ 0xabf0, 0xabf9,
+ 0xff10, 0xff19,
+ 0x104a0, 0x104a9,
+ 0x11066, 0x1106f,
+ 0x110f0, 0x110f9,
+ 0x11136, 0x1113f,
+ 0x111d0, 0x111d9,
+ 0x112f0, 0x112f9,
+ 0x114d0, 0x114d9,
+ 0x11650, 0x11659,
+ 0x116c0, 0x116c9,
+ 0x118e0, 0x118e9,
+ 0x16a60, 0x16a69,
+ 0x16b50, 0x16b59,
+ 0x1d7ce, 0x1d7ff
+};
+UCP_FN(Nd)
+
+static const unichar ucp_Nl_def[] = {
+ 0x16ee, 0x16f0,
+ 0x2160, 0x2182,
+ 0x2185, 0x2188,
+ 0x3007, 0x3007,
+ 0x3021, 0x3029,
+ 0x3038, 0x303a,
+ 0xa6e6, 0xa6ef,
+ 0x10140, 0x10174,
+ 0x10341, 0x10341,
+ 0x1034a, 0x1034a,
+ 0x103d1, 0x103d5,
+ 0x12400, 0x1246e
+};
+UCP_FN(Nl)
+
+static const unichar ucp_No_def[] = {
+ 0xb2, 0xb3,
+ 0xb9, 0xb9,
+ 0xbc, 0xbe,
+ 0x9f4, 0x9f9,
+ 0xb72, 0xb77,
+ 0xbf0, 0xbf2,
+ 0xc78, 0xc7e,
+ 0xd70, 0xd75,
+ 0xf2a, 0xf33,
+ 0x1369, 0x137c,
+ 0x17f0, 0x17f9,
+ 0x19da, 0x19da,
+ 0x2070, 0x2070,
+ 0x2074, 0x2079,
+ 0x2080, 0x2089,
+ 0x2150, 0x215f,
+ 0x2189, 0x2189,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x2cfd, 0x2cfd,
+ 0x3192, 0x3195,
+ 0x3220, 0x3229,
+ 0x3248, 0x324f,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0xa830, 0xa835,
+ 0x10107, 0x10133,
+ 0x10175, 0x10178,
+ 0x1018a, 0x1018b,
+ 0x102e1, 0x102fb,
+ 0x10320, 0x10323,
+ 0x10858, 0x1085f,
+ 0x10879, 0x1087f,
+ 0x108a7, 0x108af,
+ 0x10916, 0x1091b,
+ 0x10a40, 0x10a47,
+ 0x10a7d, 0x10a7e,
+ 0x10a9d, 0x10a9f,
+ 0x10aeb, 0x10aef,
+ 0x10b58, 0x10b5f,
+ 0x10b78, 0x10b7f,
+ 0x10ba9, 0x10baf,
+ 0x10e60, 0x10e7e,
+ 0x11052, 0x11065,
+ 0x111e1, 0x111f4,
+ 0x118ea, 0x118f2,
+ 0x16b5b, 0x16b61,
+ 0x1d360, 0x1d371,
+ 0x1e8c7, 0x1e8cf,
+ 0x1f100, 0x1f10c
+};
+UCP_FN(No)
+
+static const unichar ucp_P_def[] = {
+ 0x21, 0x23,
+ 0x25, 0x2a,
+ 0x2c, 0x2f,
+ 0x3a, 0x3b,
+ 0x3f, 0x40,
+ 0x5b, 0x5d,
+ 0x5f, 0x5f,
+ 0x7b, 0x7b,
+ 0x7d, 0x7d,
+ 0xa1, 0xa1,
+ 0xa7, 0xa7,
+ 0xab, 0xab,
+ 0xb6, 0xb7,
+ 0xbb, 0xbb,
+ 0xbf, 0xbf,
+ 0x37e, 0x37e,
+ 0x387, 0x387,
+ 0x55a, 0x55f,
+ 0x589, 0x58a,
+ 0x5be, 0x5be,
+ 0x5c0, 0x5c0,
+ 0x5c3, 0x5c3,
+ 0x5c6, 0x5c6,
+ 0x5f3, 0x5f4,
+ 0x609, 0x60a,
+ 0x60c, 0x60d,
+ 0x61b, 0x61b,
+ 0x61e, 0x61f,
+ 0x66a, 0x66d,
+ 0x6d4, 0x6d4,
+ 0x700, 0x70d,
+ 0x7f7, 0x7f9,
+ 0x830, 0x83e,
+ 0x85e, 0x85e,
+ 0x964, 0x965,
+ 0x970, 0x970,
+ 0xaf0, 0xaf0,
+ 0xdf4, 0xdf4,
+ 0xe4f, 0xe4f,
+ 0xe5a, 0xe5b,
+ 0xf04, 0xf12,
+ 0xf14, 0xf14,
+ 0xf3a, 0xf3d,
+ 0xf85, 0xf85,
+ 0xfd0, 0xfd4,
+ 0xfd9, 0xfda,
+ 0x104a, 0x104f,
+ 0x10fb, 0x10fb,
+ 0x1360, 0x1368,
+ 0x1400, 0x1400,
+ 0x166d, 0x166e,
+ 0x169b, 0x169c,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x17d4, 0x17d6,
+ 0x17d8, 0x17da,
+ 0x1800, 0x180a,
+ 0x1944, 0x1945,
+ 0x1a1e, 0x1a1f,
+ 0x1aa0, 0x1aa6,
+ 0x1aa8, 0x1aad,
+ 0x1b5a, 0x1b60,
+ 0x1bfc, 0x1bff,
+ 0x1c3b, 0x1c3f,
+ 0x1c7e, 0x1c7f,
+ 0x1cc0, 0x1cc7,
+ 0x1cd3, 0x1cd3,
+ 0x2010, 0x2027,
+ 0x2030, 0x2043,
+ 0x2045, 0x2051,
+ 0x2053, 0x205e,
+ 0x207d, 0x207e,
+ 0x208d, 0x208e,
+ 0x2308, 0x230b,
+ 0x2329, 0x232a,
+ 0x2768, 0x2775,
+ 0x27c5, 0x27c6,
+ 0x27e6, 0x27ef,
+ 0x2983, 0x2998,
+ 0x29d8, 0x29db,
+ 0x29fc, 0x29fd,
+ 0x2cf9, 0x2cfc,
+ 0x2cfe, 0x2cff,
+ 0x2d70, 0x2d70,
+ 0x2e00, 0x2e2e,
+ 0x2e30, 0x2e42,
+ 0x3001, 0x3003,
+ 0x3008, 0x3011,
+ 0x3014, 0x301f,
+ 0x3030, 0x3030,
+ 0x303d, 0x303d,
+ 0x30a0, 0x30a0,
+ 0x30fb, 0x30fb,
+ 0xa4fe, 0xa4ff,
+ 0xa60d, 0xa60f,
+ 0xa673, 0xa673,
+ 0xa67e, 0xa67e,
+ 0xa6f2, 0xa6f7,
+ 0xa874, 0xa877,
+ 0xa8ce, 0xa8cf,
+ 0xa8f8, 0xa8fa,
+ 0xa92e, 0xa92f,
+ 0xa95f, 0xa95f,
+ 0xa9c1, 0xa9cd,
+ 0xa9de, 0xa9df,
+ 0xaa5c, 0xaa5f,
+ 0xaade, 0xaadf,
+ 0xaaf0, 0xaaf1,
+ 0xabeb, 0xabeb,
+ 0xfd3e, 0xfd3f,
+ 0xfe10, 0xfe19,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe61,
+ 0xfe63, 0xfe63,
+ 0xfe68, 0xfe68,
+ 0xfe6a, 0xfe6b,
+ 0xff01, 0xff03,
+ 0xff05, 0xff0a,
+ 0xff0c, 0xff0f,
+ 0xff1a, 0xff1b,
+ 0xff1f, 0xff20,
+ 0xff3b, 0xff3d,
+ 0xff3f, 0xff3f,
+ 0xff5b, 0xff5b,
+ 0xff5d, 0xff5d,
+ 0xff5f, 0xff65,
+ 0x10100, 0x10102,
+ 0x1039f, 0x1039f,
+ 0x103d0, 0x103d0,
+ 0x1056f, 0x1056f,
+ 0x10857, 0x10857,
+ 0x1091f, 0x1091f,
+ 0x1093f, 0x1093f,
+ 0x10a50, 0x10a58,
+ 0x10a7f, 0x10a7f,
+ 0x10af0, 0x10af6,
+ 0x10b39, 0x10b3f,
+ 0x10b99, 0x10b9c,
+ 0x11047, 0x1104d,
+ 0x110bb, 0x110bc,
+ 0x110be, 0x110c1,
+ 0x11140, 0x11143,
+ 0x11174, 0x11175,
+ 0x111c5, 0x111c8,
+ 0x111cd, 0x111cd,
+ 0x11238, 0x1123d,
+ 0x114c6, 0x114c6,
+ 0x115c1, 0x115c9,
+ 0x11641, 0x11643,
+ 0x12470, 0x12474,
+ 0x16a6e, 0x16a6f,
+ 0x16af5, 0x16af5,
+ 0x16b37, 0x16b3b,
+ 0x16b44, 0x16b44,
+ 0x1bc9f, 0x1bc9f
+};
+UCP_FN(P)
+
+static const unichar ucp_Pc_def[] = {
+ 0x5f, 0x5f,
+ 0x203f, 0x2040,
+ 0x2054, 0x2054,
+ 0xfe33, 0xfe34,
+ 0xfe4d, 0xfe4f,
+ 0xff3f, 0xff3f
+};
+UCP_FN(Pc)
+
+static const unichar ucp_Pd_def[] = {
+ 0x2d, 0x2d,
+ 0x58a, 0x58a,
+ 0x5be, 0x5be,
+ 0x1400, 0x1400,
+ 0x1806, 0x1806,
+ 0x2010, 0x2015,
+ 0x2e17, 0x2e17,
+ 0x2e1a, 0x2e1a,
+ 0x2e3a, 0x2e3b,
+ 0x2e40, 0x2e40,
+ 0x301c, 0x301c,
+ 0x3030, 0x3030,
+ 0x30a0, 0x30a0,
+ 0xfe31, 0xfe32,
+ 0xfe58, 0xfe58,
+ 0xfe63, 0xfe63,
+ 0xff0d, 0xff0d
+};
+UCP_FN(Pd)
+
+static const unichar ucp_Pe_def[] = {
+ 0x29, 0x29,
+ 0x5d, 0x5d,
+ 0x7d, 0x7d,
+ 0xf3b, 0xf3b,
+ 0xf3d, 0xf3d,
+ 0x169c, 0x169c,
+ 0x2046, 0x2046,
+ 0x207e, 0x207e,
+ 0x208e, 0x208e,
+ 0x2309, 0x2309,
+ 0x230b, 0x230b,
+ 0x232a, 0x232a,
+ 0x2769, 0x2769,
+ 0x276b, 0x276b,
+ 0x276d, 0x276d,
+ 0x276f, 0x276f,
+ 0x2771, 0x2771,
+ 0x2773, 0x2773,
+ 0x2775, 0x2775,
+ 0x27c6, 0x27c6,
+ 0x27e7, 0x27e7,
+ 0x27e9, 0x27e9,
+ 0x27eb, 0x27eb,
+ 0x27ed, 0x27ed,
+ 0x27ef, 0x27ef,
+ 0x2984, 0x2984,
+ 0x2986, 0x2986,
+ 0x2988, 0x2988,
+ 0x298a, 0x298a,
+ 0x298c, 0x298c,
+ 0x298e, 0x298e,
+ 0x2990, 0x2990,
+ 0x2992, 0x2992,
+ 0x2994, 0x2994,
+ 0x2996, 0x2996,
+ 0x2998, 0x2998,
+ 0x29d9, 0x29d9,
+ 0x29db, 0x29db,
+ 0x29fd, 0x29fd,
+ 0x2e23, 0x2e23,
+ 0x2e25, 0x2e25,
+ 0x2e27, 0x2e27,
+ 0x2e29, 0x2e29,
+ 0x3009, 0x3009,
+ 0x300b, 0x300b,
+ 0x300d, 0x300d,
+ 0x300f, 0x300f,
+ 0x3011, 0x3011,
+ 0x3015, 0x3015,
+ 0x3017, 0x3017,
+ 0x3019, 0x3019,
+ 0x301b, 0x301b,
+ 0x301e, 0x301f,
+ 0xfd3e, 0xfd3e,
+ 0xfe18, 0xfe18,
+ 0xfe36, 0xfe36,
+ 0xfe38, 0xfe38,
+ 0xfe3a, 0xfe3a,
+ 0xfe3c, 0xfe3c,
+ 0xfe3e, 0xfe3e,
+ 0xfe40, 0xfe40,
+ 0xfe42, 0xfe42,
+ 0xfe44, 0xfe44,
+ 0xfe48, 0xfe48,
+ 0xfe5a, 0xfe5a,
+ 0xfe5c, 0xfe5c,
+ 0xfe5e, 0xfe5e,
+ 0xff09, 0xff09,
+ 0xff3d, 0xff3d,
+ 0xff5d, 0xff5d,
+ 0xff60, 0xff60,
+ 0xff63, 0xff63
+};
+UCP_FN(Pe)
+
+static const unichar ucp_Pf_def[] = {
+ 0xbb, 0xbb,
+ 0x2019, 0x2019,
+ 0x201d, 0x201d,
+ 0x203a, 0x203a,
+ 0x2e03, 0x2e03,
+ 0x2e05, 0x2e05,
+ 0x2e0a, 0x2e0a,
+ 0x2e0d, 0x2e0d,
+ 0x2e1d, 0x2e1d,
+ 0x2e21, 0x2e21
+};
+UCP_FN(Pf)
+
+static const unichar ucp_Pi_def[] = {
+ 0xab, 0xab,
+ 0x2018, 0x2018,
+ 0x201b, 0x201c,
+ 0x201f, 0x201f,
+ 0x2039, 0x2039,
+ 0x2e02, 0x2e02,
+ 0x2e04, 0x2e04,
+ 0x2e09, 0x2e09,
+ 0x2e0c, 0x2e0c,
+ 0x2e1c, 0x2e1c,
+ 0x2e20, 0x2e20
+};
+UCP_FN(Pi)
+
+static const unichar ucp_Po_def[] = {
+ 0x21, 0x23,
+ 0x25, 0x27,
+ 0x2a, 0x2a,
+ 0x2c, 0x2c,
+ 0x2e, 0x2f,
+ 0x3a, 0x3b,
+ 0x3f, 0x40,
+ 0x5c, 0x5c,
+ 0xa1, 0xa1,
+ 0xa7, 0xa7,
+ 0xb6, 0xb7,
+ 0xbf, 0xbf,
+ 0x37e, 0x37e,
+ 0x387, 0x387,
+ 0x55a, 0x55f,
+ 0x589, 0x589,
+ 0x5c0, 0x5c0,
+ 0x5c3, 0x5c3,
+ 0x5c6, 0x5c6,
+ 0x5f3, 0x5f4,
+ 0x609, 0x60a,
+ 0x60c, 0x60d,
+ 0x61b, 0x61b,
+ 0x61e, 0x61f,
+ 0x66a, 0x66d,
+ 0x6d4, 0x6d4,
+ 0x700, 0x70d,
+ 0x7f7, 0x7f9,
+ 0x830, 0x83e,
+ 0x85e, 0x85e,
+ 0x964, 0x965,
+ 0x970, 0x970,
+ 0xaf0, 0xaf0,
+ 0xdf4, 0xdf4,
+ 0xe4f, 0xe4f,
+ 0xe5a, 0xe5b,
+ 0xf04, 0xf12,
+ 0xf14, 0xf14,
+ 0xf85, 0xf85,
+ 0xfd0, 0xfd4,
+ 0xfd9, 0xfda,
+ 0x104a, 0x104f,
+ 0x10fb, 0x10fb,
+ 0x1360, 0x1368,
+ 0x166d, 0x166e,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x17d4, 0x17d6,
+ 0x17d8, 0x17da,
+ 0x1800, 0x1805,
+ 0x1807, 0x180a,
+ 0x1944, 0x1945,
+ 0x1a1e, 0x1a1f,
+ 0x1aa0, 0x1aa6,
+ 0x1aa8, 0x1aad,
+ 0x1b5a, 0x1b60,
+ 0x1bfc, 0x1bff,
+ 0x1c3b, 0x1c3f,
+ 0x1c7e, 0x1c7f,
+ 0x1cc0, 0x1cc7,
+ 0x1cd3, 0x1cd3,
+ 0x2016, 0x2017,
+ 0x2020, 0x2027,
+ 0x2030, 0x2038,
+ 0x203b, 0x203e,
+ 0x2041, 0x2043,
+ 0x2047, 0x2051,
+ 0x2053, 0x2053,
+ 0x2055, 0x205e,
+ 0x2cf9, 0x2cfc,
+ 0x2cfe, 0x2cff,
+ 0x2d70, 0x2d70,
+ 0x2e00, 0x2e01,
+ 0x2e06, 0x2e08,
+ 0x2e0b, 0x2e0b,
+ 0x2e0e, 0x2e16,
+ 0x2e18, 0x2e19,
+ 0x2e1b, 0x2e1b,
+ 0x2e1e, 0x2e1f,
+ 0x2e2a, 0x2e2e,
+ 0x2e30, 0x2e39,
+ 0x2e3c, 0x2e3f,
+ 0x2e41, 0x2e41,
+ 0x3001, 0x3003,
+ 0x303d, 0x303d,
+ 0x30fb, 0x30fb,
+ 0xa4fe, 0xa4ff,
+ 0xa60d, 0xa60f,
+ 0xa673, 0xa673,
+ 0xa67e, 0xa67e,
+ 0xa6f2, 0xa6f7,
+ 0xa874, 0xa877,
+ 0xa8ce, 0xa8cf,
+ 0xa8f8, 0xa8fa,
+ 0xa92e, 0xa92f,
+ 0xa95f, 0xa95f,
+ 0xa9c1, 0xa9cd,
+ 0xa9de, 0xa9df,
+ 0xaa5c, 0xaa5f,
+ 0xaade, 0xaadf,
+ 0xaaf0, 0xaaf1,
+ 0xabeb, 0xabeb,
+ 0xfe10, 0xfe16,
+ 0xfe19, 0xfe19,
+ 0xfe30, 0xfe30,
+ 0xfe45, 0xfe46,
+ 0xfe49, 0xfe4c,
+ 0xfe50, 0xfe52,
+ 0xfe54, 0xfe57,
+ 0xfe5f, 0xfe61,
+ 0xfe68, 0xfe68,
+ 0xfe6a, 0xfe6b,
+ 0xff01, 0xff03,
+ 0xff05, 0xff07,
+ 0xff0a, 0xff0a,
+ 0xff0c, 0xff0c,
+ 0xff0e, 0xff0f,
+ 0xff1a, 0xff1b,
+ 0xff1f, 0xff20,
+ 0xff3c, 0xff3c,
+ 0xff61, 0xff61,
+ 0xff64, 0xff65,
+ 0x10100, 0x10102,
+ 0x1039f, 0x1039f,
+ 0x103d0, 0x103d0,
+ 0x1056f, 0x1056f,
+ 0x10857, 0x10857,
+ 0x1091f, 0x1091f,
+ 0x1093f, 0x1093f,
+ 0x10a50, 0x10a58,
+ 0x10a7f, 0x10a7f,
+ 0x10af0, 0x10af6,
+ 0x10b39, 0x10b3f,
+ 0x10b99, 0x10b9c,
+ 0x11047, 0x1104d,
+ 0x110bb, 0x110bc,
+ 0x110be, 0x110c1,
+ 0x11140, 0x11143,
+ 0x11174, 0x11175,
+ 0x111c5, 0x111c8,
+ 0x111cd, 0x111cd,
+ 0x11238, 0x1123d,
+ 0x114c6, 0x114c6,
+ 0x115c1, 0x115c9,
+ 0x11641, 0x11643,
+ 0x12470, 0x12474,
+ 0x16a6e, 0x16a6f,
+ 0x16af5, 0x16af5,
+ 0x16b37, 0x16b3b,
+ 0x16b44, 0x16b44,
+ 0x1bc9f, 0x1bc9f
+};
+UCP_FN(Po)
+
+static const unichar ucp_Ps_def[] = {
+ 0x28, 0x28,
+ 0x5b, 0x5b,
+ 0x7b, 0x7b,
+ 0xf3a, 0xf3a,
+ 0xf3c, 0xf3c,
+ 0x169b, 0x169b,
+ 0x201a, 0x201a,
+ 0x201e, 0x201e,
+ 0x2045, 0x2045,
+ 0x207d, 0x207d,
+ 0x208d, 0x208d,
+ 0x2308, 0x2308,
+ 0x230a, 0x230a,
+ 0x2329, 0x2329,
+ 0x2768, 0x2768,
+ 0x276a, 0x276a,
+ 0x276c, 0x276c,
+ 0x276e, 0x276e,
+ 0x2770, 0x2770,
+ 0x2772, 0x2772,
+ 0x2774, 0x2774,
+ 0x27c5, 0x27c5,
+ 0x27e6, 0x27e6,
+ 0x27e8, 0x27e8,
+ 0x27ea, 0x27ea,
+ 0x27ec, 0x27ec,
+ 0x27ee, 0x27ee,
+ 0x2983, 0x2983,
+ 0x2985, 0x2985,
+ 0x2987, 0x2987,
+ 0x2989, 0x2989,
+ 0x298b, 0x298b,
+ 0x298d, 0x298d,
+ 0x298f, 0x298f,
+ 0x2991, 0x2991,
+ 0x2993, 0x2993,
+ 0x2995, 0x2995,
+ 0x2997, 0x2997,
+ 0x29d8, 0x29d8,
+ 0x29da, 0x29da,
+ 0x29fc, 0x29fc,
+ 0x2e22, 0x2e22,
+ 0x2e24, 0x2e24,
+ 0x2e26, 0x2e26,
+ 0x2e28, 0x2e28,
+ 0x2e42, 0x2e42,
+ 0x3008, 0x3008,
+ 0x300a, 0x300a,
+ 0x300c, 0x300c,
+ 0x300e, 0x300e,
+ 0x3010, 0x3010,
+ 0x3014, 0x3014,
+ 0x3016, 0x3016,
+ 0x3018, 0x3018,
+ 0x301a, 0x301a,
+ 0x301d, 0x301d,
+ 0xfd3f, 0xfd3f,
+ 0xfe17, 0xfe17,
+ 0xfe35, 0xfe35,
+ 0xfe37, 0xfe37,
+ 0xfe39, 0xfe39,
+ 0xfe3b, 0xfe3b,
+ 0xfe3d, 0xfe3d,
+ 0xfe3f, 0xfe3f,
+ 0xfe41, 0xfe41,
+ 0xfe43, 0xfe43,
+ 0xfe47, 0xfe47,
+ 0xfe59, 0xfe59,
+ 0xfe5b, 0xfe5b,
+ 0xfe5d, 0xfe5d,
+ 0xff08, 0xff08,
+ 0xff3b, 0xff3b,
+ 0xff5b, 0xff5b,
+ 0xff5f, 0xff5f,
+ 0xff62, 0xff62
+};
+UCP_FN(Ps)
+
+static const unichar ucp_S_def[] = {
+ 0x24, 0x24,
+ 0x2b, 0x2b,
+ 0x3c, 0x3e,
+ 0x5e, 0x5e,
+ 0x60, 0x60,
+ 0x7c, 0x7c,
+ 0x7e, 0x7e,
+ 0xa2, 0xa6,
+ 0xa8, 0xa9,
+ 0xac, 0xac,
+ 0xae, 0xb1,
+ 0xb4, 0xb4,
+ 0xb8, 0xb8,
+ 0xd7, 0xd7,
+ 0xf7, 0xf7,
+ 0x2c2, 0x2c5,
+ 0x2d2, 0x2df,
+ 0x2e5, 0x2eb,
+ 0x2ed, 0x2ed,
+ 0x2ef, 0x2ff,
+ 0x375, 0x375,
+ 0x384, 0x385,
+ 0x3f6, 0x3f6,
+ 0x482, 0x482,
+ 0x58d, 0x58f,
+ 0x606, 0x608,
+ 0x60b, 0x60b,
+ 0x60e, 0x60f,
+ 0x6de, 0x6de,
+ 0x6e9, 0x6e9,
+ 0x6fd, 0x6fe,
+ 0x7f6, 0x7f6,
+ 0x9f2, 0x9f3,
+ 0x9fa, 0x9fb,
+ 0xaf1, 0xaf1,
+ 0xb70, 0xb70,
+ 0xbf3, 0xbfa,
+ 0xc7f, 0xc7f,
+ 0xd79, 0xd79,
+ 0xe3f, 0xe3f,
+ 0xf01, 0xf03,
+ 0xf13, 0xf13,
+ 0xf15, 0xf17,
+ 0xf1a, 0xf1f,
+ 0xf34, 0xf34,
+ 0xf36, 0xf36,
+ 0xf38, 0xf38,
+ 0xfbe, 0xfc5,
+ 0xfc7, 0xfcc,
+ 0xfce, 0xfcf,
+ 0xfd5, 0xfd8,
+ 0x109e, 0x109f,
+ 0x1390, 0x1399,
+ 0x17db, 0x17db,
+ 0x1940, 0x1940,
+ 0x19de, 0x19ff,
+ 0x1b61, 0x1b6a,
+ 0x1b74, 0x1b7c,
+ 0x1fbd, 0x1fbd,
+ 0x1fbf, 0x1fc1,
+ 0x1fcd, 0x1fcf,
+ 0x1fdd, 0x1fdf,
+ 0x1fed, 0x1fef,
+ 0x1ffd, 0x1ffe,
+ 0x2044, 0x2044,
+ 0x2052, 0x2052,
+ 0x207a, 0x207c,
+ 0x208a, 0x208c,
+ 0x20a0, 0x20bd,
+ 0x2100, 0x2101,
+ 0x2103, 0x2106,
+ 0x2108, 0x2109,
+ 0x2114, 0x2114,
+ 0x2116, 0x2118,
+ 0x211e, 0x2123,
+ 0x2125, 0x2125,
+ 0x2127, 0x2127,
+ 0x2129, 0x2129,
+ 0x212e, 0x212e,
+ 0x213a, 0x213b,
+ 0x2140, 0x2144,
+ 0x214a, 0x214d,
+ 0x214f, 0x214f,
+ 0x2190, 0x2307,
+ 0x230c, 0x2328,
+ 0x232b, 0x23fa,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x249c, 0x24e9,
+ 0x2500, 0x2767,
+ 0x2794, 0x27c4,
+ 0x27c7, 0x27e5,
+ 0x27f0, 0x2982,
+ 0x2999, 0x29d7,
+ 0x29dc, 0x29fb,
+ 0x29fe, 0x2b73,
+ 0x2b76, 0x2b95,
+ 0x2b98, 0x2bb9,
+ 0x2bbd, 0x2bc8,
+ 0x2bca, 0x2bd1,
+ 0x2ce5, 0x2cea,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3004, 0x3004,
+ 0x3012, 0x3013,
+ 0x3020, 0x3020,
+ 0x3036, 0x3037,
+ 0x303e, 0x303f,
+ 0x309b, 0x309c,
+ 0x3190, 0x3191,
+ 0x3196, 0x319f,
+ 0x31c0, 0x31e3,
+ 0x3200, 0x321e,
+ 0x322a, 0x3247,
+ 0x3250, 0x3250,
+ 0x3260, 0x327f,
+ 0x328a, 0x32b0,
+ 0x32c0, 0x32fe,
+ 0x3300, 0x33ff,
+ 0x4dc0, 0x4dff,
+ 0xa490, 0xa4c6,
+ 0xa700, 0xa716,
+ 0xa720, 0xa721,
+ 0xa789, 0xa78a,
+ 0xa828, 0xa82b,
+ 0xa836, 0xa839,
+ 0xaa77, 0xaa79,
+ 0xab5b, 0xab5b,
+ 0xfb29, 0xfb29,
+ 0xfbb2, 0xfbc1,
+ 0xfdfc, 0xfdfd,
+ 0xfe62, 0xfe62,
+ 0xfe64, 0xfe66,
+ 0xfe69, 0xfe69,
+ 0xff04, 0xff04,
+ 0xff0b, 0xff0b,
+ 0xff1c, 0xff1e,
+ 0xff3e, 0xff3e,
+ 0xff40, 0xff40,
+ 0xff5c, 0xff5c,
+ 0xff5e, 0xff5e,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfffc, 0xfffd,
+ 0x10137, 0x1013f,
+ 0x10179, 0x10189,
+ 0x1018c, 0x1018c,
+ 0x10190, 0x1019b,
+ 0x101a0, 0x101a0,
+ 0x101d0, 0x101fc,
+ 0x10877, 0x10878,
+ 0x10ac8, 0x10ac8,
+ 0x16b3c, 0x16b3f,
+ 0x16b45, 0x16b45,
+ 0x1bc9c, 0x1bc9c,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d129, 0x1d164,
+ 0x1d16a, 0x1d16c,
+ 0x1d183, 0x1d184,
+ 0x1d18c, 0x1d1a9,
+ 0x1d1ae, 0x1d1dd,
+ 0x1d200, 0x1d241,
+ 0x1d245, 0x1d245,
+ 0x1d300, 0x1d356,
+ 0x1d6c1, 0x1d6c1,
+ 0x1d6db, 0x1d6db,
+ 0x1d6fb, 0x1d6fb,
+ 0x1d715, 0x1d715,
+ 0x1d735, 0x1d735,
+ 0x1d74f, 0x1d74f,
+ 0x1d76f, 0x1d76f,
+ 0x1d789, 0x1d789,
+ 0x1d7a9, 0x1d7a9,
+ 0x1d7c3, 0x1d7c3,
+ 0x1eef0, 0x1eef1,
+ 0x1f000, 0x1f02b,
+ 0x1f030, 0x1f093,
+ 0x1f0a0, 0x1f0ae,
+ 0x1f0b1, 0x1f0bf,
+ 0x1f0c1, 0x1f0cf,
+ 0x1f0d1, 0x1f0f5,
+ 0x1f110, 0x1f12e,
+ 0x1f130, 0x1f16b,
+ 0x1f170, 0x1f19a,
+ 0x1f1e6, 0x1f202,
+ 0x1f210, 0x1f23a,
+ 0x1f240, 0x1f248,
+ 0x1f250, 0x1f251,
+ 0x1f300, 0x1f32c,
+ 0x1f330, 0x1f37d,
+ 0x1f380, 0x1f3ce,
+ 0x1f3d4, 0x1f3f7,
+ 0x1f400, 0x1f4fe,
+ 0x1f500, 0x1f54a,
+ 0x1f550, 0x1f579,
+ 0x1f57b, 0x1f5a3,
+ 0x1f5a5, 0x1f642,
+ 0x1f645, 0x1f6cf,
+ 0x1f6e0, 0x1f6ec,
+ 0x1f6f0, 0x1f6f3,
+ 0x1f700, 0x1f773,
+ 0x1f780, 0x1f7d4,
+ 0x1f800, 0x1f80b,
+ 0x1f810, 0x1f847,
+ 0x1f850, 0x1f859,
+ 0x1f860, 0x1f887,
+ 0x1f890, 0x1f8ad
+};
+UCP_FN(S)
+
+static const unichar ucp_Sc_def[] = {
+ 0x24, 0x24,
+ 0xa2, 0xa5,
+ 0x58f, 0x58f,
+ 0x60b, 0x60b,
+ 0x9f2, 0x9f3,
+ 0x9fb, 0x9fb,
+ 0xaf1, 0xaf1,
+ 0xbf9, 0xbf9,
+ 0xe3f, 0xe3f,
+ 0x17db, 0x17db,
+ 0x20a0, 0x20bd,
+ 0xa838, 0xa838,
+ 0xfdfc, 0xfdfc,
+ 0xfe69, 0xfe69,
+ 0xff04, 0xff04,
+ 0xffe0, 0xffe1,
+ 0xffe5, 0xffe6
+};
+UCP_FN(Sc)
+
+static const unichar ucp_Sk_def[] = {
+ 0x5e, 0x5e,
+ 0x60, 0x60,
+ 0xa8, 0xa8,
+ 0xaf, 0xaf,
+ 0xb4, 0xb4,
+ 0xb8, 0xb8,
+ 0x2c2, 0x2c5,
+ 0x2d2, 0x2df,
+ 0x2e5, 0x2eb,
+ 0x2ed, 0x2ed,
+ 0x2ef, 0x2ff,
+ 0x375, 0x375,
+ 0x384, 0x385,
+ 0x1fbd, 0x1fbd,
+ 0x1fbf, 0x1fc1,
+ 0x1fcd, 0x1fcf,
+ 0x1fdd, 0x1fdf,
+ 0x1fed, 0x1fef,
+ 0x1ffd, 0x1ffe,
+ 0x309b, 0x309c,
+ 0xa700, 0xa716,
+ 0xa720, 0xa721,
+ 0xa789, 0xa78a,
+ 0xab5b, 0xab5b,
+ 0xfbb2, 0xfbc1,
+ 0xff3e, 0xff3e,
+ 0xff40, 0xff40,
+ 0xffe3, 0xffe3
+};
+UCP_FN(Sk)
+
+static const unichar ucp_Sm_def[] = {
+ 0x2b, 0x2b,
+ 0x3c, 0x3e,
+ 0x7c, 0x7c,
+ 0x7e, 0x7e,
+ 0xac, 0xac,
+ 0xb1, 0xb1,
+ 0xd7, 0xd7,
+ 0xf7, 0xf7,
+ 0x3f6, 0x3f6,
+ 0x606, 0x608,
+ 0x2044, 0x2044,
+ 0x2052, 0x2052,
+ 0x207a, 0x207c,
+ 0x208a, 0x208c,
+ 0x2118, 0x2118,
+ 0x2140, 0x2144,
+ 0x214b, 0x214b,
+ 0x2190, 0x2194,
+ 0x219a, 0x219b,
+ 0x21a0, 0x21a0,
+ 0x21a3, 0x21a3,
+ 0x21a6, 0x21a6,
+ 0x21ae, 0x21ae,
+ 0x21ce, 0x21cf,
+ 0x21d2, 0x21d2,
+ 0x21d4, 0x21d4,
+ 0x21f4, 0x22ff,
+ 0x2320, 0x2321,
+ 0x237c, 0x237c,
+ 0x239b, 0x23b3,
+ 0x23dc, 0x23e1,
+ 0x25b7, 0x25b7,
+ 0x25c1, 0x25c1,
+ 0x25f8, 0x25ff,
+ 0x266f, 0x266f,
+ 0x27c0, 0x27c4,
+ 0x27c7, 0x27e5,
+ 0x27f0, 0x27ff,
+ 0x2900, 0x2982,
+ 0x2999, 0x29d7,
+ 0x29dc, 0x29fb,
+ 0x29fe, 0x2aff,
+ 0x2b30, 0x2b44,
+ 0x2b47, 0x2b4c,
+ 0xfb29, 0xfb29,
+ 0xfe62, 0xfe62,
+ 0xfe64, 0xfe66,
+ 0xff0b, 0xff0b,
+ 0xff1c, 0xff1e,
+ 0xff5c, 0xff5c,
+ 0xff5e, 0xff5e,
+ 0xffe2, 0xffe2,
+ 0xffe9, 0xffec,
+ 0x1d6c1, 0x1d6c1,
+ 0x1d6db, 0x1d6db,
+ 0x1d6fb, 0x1d6fb,
+ 0x1d715, 0x1d715,
+ 0x1d735, 0x1d735,
+ 0x1d74f, 0x1d74f,
+ 0x1d76f, 0x1d76f,
+ 0x1d789, 0x1d789,
+ 0x1d7a9, 0x1d7a9,
+ 0x1d7c3, 0x1d7c3,
+ 0x1eef0, 0x1eef1
+};
+UCP_FN(Sm)
+
+static const unichar ucp_So_def[] = {
+ 0xa6, 0xa6,
+ 0xa9, 0xa9,
+ 0xae, 0xae,
+ 0xb0, 0xb0,
+ 0x482, 0x482,
+ 0x58d, 0x58e,
+ 0x60e, 0x60f,
+ 0x6de, 0x6de,
+ 0x6e9, 0x6e9,
+ 0x6fd, 0x6fe,
+ 0x7f6, 0x7f6,
+ 0x9fa, 0x9fa,
+ 0xb70, 0xb70,
+ 0xbf3, 0xbf8,
+ 0xbfa, 0xbfa,
+ 0xc7f, 0xc7f,
+ 0xd79, 0xd79,
+ 0xf01, 0xf03,
+ 0xf13, 0xf13,
+ 0xf15, 0xf17,
+ 0xf1a, 0xf1f,
+ 0xf34, 0xf34,
+ 0xf36, 0xf36,
+ 0xf38, 0xf38,
+ 0xfbe, 0xfc5,
+ 0xfc7, 0xfcc,
+ 0xfce, 0xfcf,
+ 0xfd5, 0xfd8,
+ 0x109e, 0x109f,
+ 0x1390, 0x1399,
+ 0x1940, 0x1940,
+ 0x19de, 0x19ff,
+ 0x1b61, 0x1b6a,
+ 0x1b74, 0x1b7c,
+ 0x2100, 0x2101,
+ 0x2103, 0x2106,
+ 0x2108, 0x2109,
+ 0x2114, 0x2114,
+ 0x2116, 0x2117,
+ 0x211e, 0x2123,
+ 0x2125, 0x2125,
+ 0x2127, 0x2127,
+ 0x2129, 0x2129,
+ 0x212e, 0x212e,
+ 0x213a, 0x213b,
+ 0x214a, 0x214a,
+ 0x214c, 0x214d,
+ 0x214f, 0x214f,
+ 0x2195, 0x2199,
+ 0x219c, 0x219f,
+ 0x21a1, 0x21a2,
+ 0x21a4, 0x21a5,
+ 0x21a7, 0x21ad,
+ 0x21af, 0x21cd,
+ 0x21d0, 0x21d1,
+ 0x21d3, 0x21d3,
+ 0x21d5, 0x21f3,
+ 0x2300, 0x2307,
+ 0x230c, 0x231f,
+ 0x2322, 0x2328,
+ 0x232b, 0x237b,
+ 0x237d, 0x239a,
+ 0x23b4, 0x23db,
+ 0x23e2, 0x23fa,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x249c, 0x24e9,
+ 0x2500, 0x25b6,
+ 0x25b8, 0x25c0,
+ 0x25c2, 0x25f7,
+ 0x2600, 0x266e,
+ 0x2670, 0x2767,
+ 0x2794, 0x27bf,
+ 0x2800, 0x28ff,
+ 0x2b00, 0x2b2f,
+ 0x2b45, 0x2b46,
+ 0x2b4d, 0x2b73,
+ 0x2b76, 0x2b95,
+ 0x2b98, 0x2bb9,
+ 0x2bbd, 0x2bc8,
+ 0x2bca, 0x2bd1,
+ 0x2ce5, 0x2cea,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3004, 0x3004,
+ 0x3012, 0x3013,
+ 0x3020, 0x3020,
+ 0x3036, 0x3037,
+ 0x303e, 0x303f,
+ 0x3190, 0x3191,
+ 0x3196, 0x319f,
+ 0x31c0, 0x31e3,
+ 0x3200, 0x321e,
+ 0x322a, 0x3247,
+ 0x3250, 0x3250,
+ 0x3260, 0x327f,
+ 0x328a, 0x32b0,
+ 0x32c0, 0x32fe,
+ 0x3300, 0x33ff,
+ 0x4dc0, 0x4dff,
+ 0xa490, 0xa4c6,
+ 0xa828, 0xa82b,
+ 0xa836, 0xa837,
+ 0xa839, 0xa839,
+ 0xaa77, 0xaa79,
+ 0xfdfd, 0xfdfd,
+ 0xffe4, 0xffe4,
+ 0xffe8, 0xffe8,
+ 0xffed, 0xffee,
+ 0xfffc, 0xfffd,
+ 0x10137, 0x1013f,
+ 0x10179, 0x10189,
+ 0x1018c, 0x1018c,
+ 0x10190, 0x1019b,
+ 0x101a0, 0x101a0,
+ 0x101d0, 0x101fc,
+ 0x10877, 0x10878,
+ 0x10ac8, 0x10ac8,
+ 0x16b3c, 0x16b3f,
+ 0x16b45, 0x16b45,
+ 0x1bc9c, 0x1bc9c,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d129, 0x1d164,
+ 0x1d16a, 0x1d16c,
+ 0x1d183, 0x1d184,
+ 0x1d18c, 0x1d1a9,
+ 0x1d1ae, 0x1d1dd,
+ 0x1d200, 0x1d241,
+ 0x1d245, 0x1d245,
+ 0x1d300, 0x1d356,
+ 0x1f000, 0x1f02b,
+ 0x1f030, 0x1f093,
+ 0x1f0a0, 0x1f0ae,
+ 0x1f0b1, 0x1f0bf,
+ 0x1f0c1, 0x1f0cf,
+ 0x1f0d1, 0x1f0f5,
+ 0x1f110, 0x1f12e,
+ 0x1f130, 0x1f16b,
+ 0x1f170, 0x1f19a,
+ 0x1f1e6, 0x1f202,
+ 0x1f210, 0x1f23a,
+ 0x1f240, 0x1f248,
+ 0x1f250, 0x1f251,
+ 0x1f300, 0x1f32c,
+ 0x1f330, 0x1f37d,
+ 0x1f380, 0x1f3ce,
+ 0x1f3d4, 0x1f3f7,
+ 0x1f400, 0x1f4fe,
+ 0x1f500, 0x1f54a,
+ 0x1f550, 0x1f579,
+ 0x1f57b, 0x1f5a3,
+ 0x1f5a5, 0x1f642,
+ 0x1f645, 0x1f6cf,
+ 0x1f6e0, 0x1f6ec,
+ 0x1f6f0, 0x1f6f3,
+ 0x1f700, 0x1f773,
+ 0x1f780, 0x1f7d4,
+ 0x1f800, 0x1f80b,
+ 0x1f810, 0x1f847,
+ 0x1f850, 0x1f859,
+ 0x1f860, 0x1f887,
+ 0x1f890, 0x1f8ad
+};
+UCP_FN(So)
+
+static const unichar ucp_Xan_def[] = {
+ 0x30, 0x39,
+ 0x41, 0x5a,
+ 0x61, 0x7a,
+ 0xaa, 0xaa,
+ 0xb2, 0xb3,
+ 0xb5, 0xb5,
+ 0xb9, 0xba,
+ 0xbc, 0xbe,
+ 0xc0, 0xd6,
+ 0xd8, 0xf6,
+ 0xf8, 0x2c1,
+ 0x2c6, 0x2d1,
+ 0x2e0, 0x2e4,
+ 0x2ec, 0x2ec,
+ 0x2ee, 0x2ee,
+ 0x370, 0x374,
+ 0x376, 0x377,
+ 0x37a, 0x37d,
+ 0x37f, 0x37f,
+ 0x386, 0x386,
+ 0x388, 0x38a,
+ 0x38c, 0x38c,
+ 0x38e, 0x3a1,
+ 0x3a3, 0x3f5,
+ 0x3f7, 0x481,
+ 0x48a, 0x52f,
+ 0x531, 0x556,
+ 0x559, 0x559,
+ 0x561, 0x587,
+ 0x5d0, 0x5ea,
+ 0x5f0, 0x5f2,
+ 0x620, 0x64a,
+ 0x660, 0x669,
+ 0x66e, 0x66f,
+ 0x671, 0x6d3,
+ 0x6d5, 0x6d5,
+ 0x6e5, 0x6e6,
+ 0x6ee, 0x6fc,
+ 0x6ff, 0x6ff,
+ 0x710, 0x710,
+ 0x712, 0x72f,
+ 0x74d, 0x7a5,
+ 0x7b1, 0x7b1,
+ 0x7c0, 0x7ea,
+ 0x7f4, 0x7f5,
+ 0x7fa, 0x7fa,
+ 0x800, 0x815,
+ 0x81a, 0x81a,
+ 0x824, 0x824,
+ 0x828, 0x828,
+ 0x840, 0x858,
+ 0x8a0, 0x8b2,
+ 0x904, 0x939,
+ 0x93d, 0x93d,
+ 0x950, 0x950,
+ 0x958, 0x961,
+ 0x966, 0x96f,
+ 0x971, 0x980,
+ 0x985, 0x98c,
+ 0x98f, 0x990,
+ 0x993, 0x9a8,
+ 0x9aa, 0x9b0,
+ 0x9b2, 0x9b2,
+ 0x9b6, 0x9b9,
+ 0x9bd, 0x9bd,
+ 0x9ce, 0x9ce,
+ 0x9dc, 0x9dd,
+ 0x9df, 0x9e1,
+ 0x9e6, 0x9f1,
+ 0x9f4, 0x9f9,
+ 0xa05, 0xa0a,
+ 0xa0f, 0xa10,
+ 0xa13, 0xa28,
+ 0xa2a, 0xa30,
+ 0xa32, 0xa33,
+ 0xa35, 0xa36,
+ 0xa38, 0xa39,
+ 0xa59, 0xa5c,
+ 0xa5e, 0xa5e,
+ 0xa66, 0xa6f,
+ 0xa72, 0xa74,
+ 0xa85, 0xa8d,
+ 0xa8f, 0xa91,
+ 0xa93, 0xaa8,
+ 0xaaa, 0xab0,
+ 0xab2, 0xab3,
+ 0xab5, 0xab9,
+ 0xabd, 0xabd,
+ 0xad0, 0xad0,
+ 0xae0, 0xae1,
+ 0xae6, 0xaef,
+ 0xb05, 0xb0c,
+ 0xb0f, 0xb10,
+ 0xb13, 0xb28,
+ 0xb2a, 0xb30,
+ 0xb32, 0xb33,
+ 0xb35, 0xb39,
+ 0xb3d, 0xb3d,
+ 0xb5c, 0xb5d,
+ 0xb5f, 0xb61,
+ 0xb66, 0xb6f,
+ 0xb71, 0xb77,
+ 0xb83, 0xb83,
+ 0xb85, 0xb8a,
+ 0xb8e, 0xb90,
+ 0xb92, 0xb95,
+ 0xb99, 0xb9a,
+ 0xb9c, 0xb9c,
+ 0xb9e, 0xb9f,
+ 0xba3, 0xba4,
+ 0xba8, 0xbaa,
+ 0xbae, 0xbb9,
+ 0xbd0, 0xbd0,
+ 0xbe6, 0xbf2,
+ 0xc05, 0xc0c,
+ 0xc0e, 0xc10,
+ 0xc12, 0xc28,
+ 0xc2a, 0xc39,
+ 0xc3d, 0xc3d,
+ 0xc58, 0xc59,
+ 0xc60, 0xc61,
+ 0xc66, 0xc6f,
+ 0xc78, 0xc7e,
+ 0xc85, 0xc8c,
+ 0xc8e, 0xc90,
+ 0xc92, 0xca8,
+ 0xcaa, 0xcb3,
+ 0xcb5, 0xcb9,
+ 0xcbd, 0xcbd,
+ 0xcde, 0xcde,
+ 0xce0, 0xce1,
+ 0xce6, 0xcef,
+ 0xcf1, 0xcf2,
+ 0xd05, 0xd0c,
+ 0xd0e, 0xd10,
+ 0xd12, 0xd3a,
+ 0xd3d, 0xd3d,
+ 0xd4e, 0xd4e,
+ 0xd60, 0xd61,
+ 0xd66, 0xd75,
+ 0xd7a, 0xd7f,
+ 0xd85, 0xd96,
+ 0xd9a, 0xdb1,
+ 0xdb3, 0xdbb,
+ 0xdbd, 0xdbd,
+ 0xdc0, 0xdc6,
+ 0xde6, 0xdef,
+ 0xe01, 0xe30,
+ 0xe32, 0xe33,
+ 0xe40, 0xe46,
+ 0xe50, 0xe59,
+ 0xe81, 0xe82,
+ 0xe84, 0xe84,
+ 0xe87, 0xe88,
+ 0xe8a, 0xe8a,
+ 0xe8d, 0xe8d,
+ 0xe94, 0xe97,
+ 0xe99, 0xe9f,
+ 0xea1, 0xea3,
+ 0xea5, 0xea5,
+ 0xea7, 0xea7,
+ 0xeaa, 0xeab,
+ 0xead, 0xeb0,
+ 0xeb2, 0xeb3,
+ 0xebd, 0xebd,
+ 0xec0, 0xec4,
+ 0xec6, 0xec6,
+ 0xed0, 0xed9,
+ 0xedc, 0xedf,
+ 0xf00, 0xf00,
+ 0xf20, 0xf33,
+ 0xf40, 0xf47,
+ 0xf49, 0xf6c,
+ 0xf88, 0xf8c,
+ 0x1000, 0x102a,
+ 0x103f, 0x1049,
+ 0x1050, 0x1055,
+ 0x105a, 0x105d,
+ 0x1061, 0x1061,
+ 0x1065, 0x1066,
+ 0x106e, 0x1070,
+ 0x1075, 0x1081,
+ 0x108e, 0x108e,
+ 0x1090, 0x1099,
+ 0x10a0, 0x10c5,
+ 0x10c7, 0x10c7,
+ 0x10cd, 0x10cd,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x1369, 0x137c,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x167f,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f8,
+ 0x1700, 0x170c,
+ 0x170e, 0x1711,
+ 0x1720, 0x1731,
+ 0x1740, 0x1751,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1780, 0x17b3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dc,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a8,
+ 0x18aa, 0x18aa,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191e,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19c1, 0x19c7,
+ 0x19d0, 0x19da,
+ 0x1a00, 0x1a16,
+ 0x1a20, 0x1a54,
+ 0x1a80, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1aa7, 0x1aa7,
+ 0x1b05, 0x1b33,
+ 0x1b45, 0x1b4b,
+ 0x1b50, 0x1b59,
+ 0x1b83, 0x1ba0,
+ 0x1bae, 0x1be5,
+ 0x1c00, 0x1c23,
+ 0x1c40, 0x1c49,
+ 0x1c4d, 0x1c7d,
+ 0x1ce9, 0x1cec,
+ 0x1cee, 0x1cf1,
+ 0x1cf5, 0x1cf6,
+ 0x1d00, 0x1dbf,
+ 0x1e00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2070, 0x2071,
+ 0x2074, 0x2079,
+ 0x207f, 0x2089,
+ 0x2090, 0x209c,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x214e, 0x214e,
+ 0x2150, 0x2189,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2ce4,
+ 0x2ceb, 0x2cee,
+ 0x2cf2, 0x2cf3,
+ 0x2cfd, 0x2cfd,
+ 0x2d00, 0x2d25,
+ 0x2d27, 0x2d27,
+ 0x2d2d, 0x2d2d,
+ 0x2d30, 0x2d67,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2e2f, 0x2e2f,
+ 0x3005, 0x3007,
+ 0x3021, 0x3029,
+ 0x3031, 0x3035,
+ 0x3038, 0x303c,
+ 0x3041, 0x3096,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x3192, 0x3195,
+ 0x31a0, 0x31ba,
+ 0x31f0, 0x31ff,
+ 0x3220, 0x3229,
+ 0x3248, 0x324f,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcc,
+ 0xa000, 0xa48c,
+ 0xa4d0, 0xa4fd,
+ 0xa500, 0xa60c,
+ 0xa610, 0xa62b,
+ 0xa640, 0xa66e,
+ 0xa67f, 0xa69d,
+ 0xa6a0, 0xa6ef,
+ 0xa717, 0xa71f,
+ 0xa722, 0xa788,
+ 0xa78b, 0xa78e,
+ 0xa790, 0xa7ad,
+ 0xa7b0, 0xa7b1,
+ 0xa7f7, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa822,
+ 0xa830, 0xa835,
+ 0xa840, 0xa873,
+ 0xa882, 0xa8b3,
+ 0xa8d0, 0xa8d9,
+ 0xa8f2, 0xa8f7,
+ 0xa8fb, 0xa8fb,
+ 0xa900, 0xa925,
+ 0xa930, 0xa946,
+ 0xa960, 0xa97c,
+ 0xa984, 0xa9b2,
+ 0xa9cf, 0xa9d9,
+ 0xa9e0, 0xa9e4,
+ 0xa9e6, 0xa9fe,
+ 0xaa00, 0xaa28,
+ 0xaa40, 0xaa42,
+ 0xaa44, 0xaa4b,
+ 0xaa50, 0xaa59,
+ 0xaa60, 0xaa76,
+ 0xaa7a, 0xaa7a,
+ 0xaa7e, 0xaaaf,
+ 0xaab1, 0xaab1,
+ 0xaab5, 0xaab6,
+ 0xaab9, 0xaabd,
+ 0xaac0, 0xaac0,
+ 0xaac2, 0xaac2,
+ 0xaadb, 0xaadd,
+ 0xaae0, 0xaaea,
+ 0xaaf2, 0xaaf4,
+ 0xab01, 0xab06,
+ 0xab09, 0xab0e,
+ 0xab11, 0xab16,
+ 0xab20, 0xab26,
+ 0xab28, 0xab2e,
+ 0xab30, 0xab5a,
+ 0xab5c, 0xab5f,
+ 0xab64, 0xab65,
+ 0xabc0, 0xabe2,
+ 0xabf0, 0xabf9,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xf900, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb1d,
+ 0xfb1f, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10107, 0x10133,
+ 0x10140, 0x10178,
+ 0x1018a, 0x1018b,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x102e1, 0x102fb,
+ 0x10300, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10350, 0x10375,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x103d1, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10500, 0x10527,
+ 0x10530, 0x10563,
+ 0x10600, 0x10736,
+ 0x10740, 0x10755,
+ 0x10760, 0x10767,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x10855,
+ 0x10858, 0x10876,
+ 0x10879, 0x1089e,
+ 0x108a7, 0x108af,
+ 0x10900, 0x1091b,
+ 0x10920, 0x10939,
+ 0x10980, 0x109b7,
+ 0x109be, 0x109bf,
+ 0x10a00, 0x10a00,
+ 0x10a10, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a40, 0x10a47,
+ 0x10a60, 0x10a7e,
+ 0x10a80, 0x10a9f,
+ 0x10ac0, 0x10ac7,
+ 0x10ac9, 0x10ae4,
+ 0x10aeb, 0x10aef,
+ 0x10b00, 0x10b35,
+ 0x10b40, 0x10b55,
+ 0x10b58, 0x10b72,
+ 0x10b78, 0x10b91,
+ 0x10ba9, 0x10baf,
+ 0x10c00, 0x10c48,
+ 0x10e60, 0x10e7e,
+ 0x11003, 0x11037,
+ 0x11052, 0x1106f,
+ 0x11083, 0x110af,
+ 0x110d0, 0x110e8,
+ 0x110f0, 0x110f9,
+ 0x11103, 0x11126,
+ 0x11136, 0x1113f,
+ 0x11150, 0x11172,
+ 0x11176, 0x11176,
+ 0x11183, 0x111b2,
+ 0x111c1, 0x111c4,
+ 0x111d0, 0x111da,
+ 0x111e1, 0x111f4,
+ 0x11200, 0x11211,
+ 0x11213, 0x1122b,
+ 0x112b0, 0x112de,
+ 0x112f0, 0x112f9,
+ 0x11305, 0x1130c,
+ 0x1130f, 0x11310,
+ 0x11313, 0x11328,
+ 0x1132a, 0x11330,
+ 0x11332, 0x11333,
+ 0x11335, 0x11339,
+ 0x1133d, 0x1133d,
+ 0x1135d, 0x11361,
+ 0x11480, 0x114af,
+ 0x114c4, 0x114c5,
+ 0x114c7, 0x114c7,
+ 0x114d0, 0x114d9,
+ 0x11580, 0x115ae,
+ 0x11600, 0x1162f,
+ 0x11644, 0x11644,
+ 0x11650, 0x11659,
+ 0x11680, 0x116aa,
+ 0x116c0, 0x116c9,
+ 0x118a0, 0x118f2,
+ 0x118ff, 0x118ff,
+ 0x11ac0, 0x11af8,
+ 0x12000, 0x12398,
+ 0x12400, 0x1246e,
+ 0x13000, 0x1342e,
+ 0x16800, 0x16a38,
+ 0x16a40, 0x16a5e,
+ 0x16a60, 0x16a69,
+ 0x16ad0, 0x16aed,
+ 0x16b00, 0x16b2f,
+ 0x16b40, 0x16b43,
+ 0x16b50, 0x16b59,
+ 0x16b5b, 0x16b61,
+ 0x16b63, 0x16b77,
+ 0x16b7d, 0x16b8f,
+ 0x16f00, 0x16f44,
+ 0x16f50, 0x16f50,
+ 0x16f93, 0x16f9f,
+ 0x1b000, 0x1b001,
+ 0x1bc00, 0x1bc6a,
+ 0x1bc70, 0x1bc7c,
+ 0x1bc80, 0x1bc88,
+ 0x1bc90, 0x1bc99,
+ 0x1d360, 0x1d371,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7cb,
+ 0x1d7ce, 0x1d7ff,
+ 0x1e800, 0x1e8c4,
+ 0x1e8c7, 0x1e8cf,
+ 0x1ee00, 0x1ee03,
+ 0x1ee05, 0x1ee1f,
+ 0x1ee21, 0x1ee22,
+ 0x1ee24, 0x1ee24,
+ 0x1ee27, 0x1ee27,
+ 0x1ee29, 0x1ee32,
+ 0x1ee34, 0x1ee37,
+ 0x1ee39, 0x1ee39,
+ 0x1ee3b, 0x1ee3b,
+ 0x1ee42, 0x1ee42,
+ 0x1ee47, 0x1ee47,
+ 0x1ee49, 0x1ee49,
+ 0x1ee4b, 0x1ee4b,
+ 0x1ee4d, 0x1ee4f,
+ 0x1ee51, 0x1ee52,
+ 0x1ee54, 0x1ee54,
+ 0x1ee57, 0x1ee57,
+ 0x1ee59, 0x1ee59,
+ 0x1ee5b, 0x1ee5b,
+ 0x1ee5d, 0x1ee5d,
+ 0x1ee5f, 0x1ee5f,
+ 0x1ee61, 0x1ee62,
+ 0x1ee64, 0x1ee64,
+ 0x1ee67, 0x1ee6a,
+ 0x1ee6c, 0x1ee72,
+ 0x1ee74, 0x1ee77,
+ 0x1ee79, 0x1ee7c,
+ 0x1ee7e, 0x1ee7e,
+ 0x1ee80, 0x1ee89,
+ 0x1ee8b, 0x1ee9b,
+ 0x1eea1, 0x1eea3,
+ 0x1eea5, 0x1eea9,
+ 0x1eeab, 0x1eebb,
+ 0x1f100, 0x1f10c,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2b740, 0x2b81d,
+ 0x2f800, 0x2fa1d
+};
+UCP_FN(Xan)
+
+static const unichar ucp_Xps_def[] = {
+ 0x9, 0xd,
+ 0x20, 0x20,
+ 0xa0, 0xa0,
+ 0x1680, 0x1680,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+};
+UCP_FN(Xps)
+
+static const unichar ucp_Xsp_def[] = {
+ 0x9, 0xa,
+ 0xc, 0xd,
+ 0x20, 0x20,
+ 0xa0, 0xa0,
+ 0x1680, 0x1680,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+};
+UCP_FN(Xsp)
+
+static const unichar ucp_Xwd_def[] = {
+ 0x30, 0x39,
+ 0x41, 0x5a,
+ 0x5f, 0x5f,
+ 0x61, 0x7a,
+ 0xaa, 0xaa,
+ 0xb2, 0xb3,
+ 0xb5, 0xb5,
+ 0xb9, 0xba,
+ 0xbc, 0xbe,
+ 0xc0, 0xd6,
+ 0xd8, 0xf6,
+ 0xf8, 0x2c1,
+ 0x2c6, 0x2d1,
+ 0x2e0, 0x2e4,
+ 0x2ec, 0x2ec,
+ 0x2ee, 0x2ee,
+ 0x370, 0x374,
+ 0x376, 0x377,
+ 0x37a, 0x37d,
+ 0x37f, 0x37f,
+ 0x386, 0x386,
+ 0x388, 0x38a,
+ 0x38c, 0x38c,
+ 0x38e, 0x3a1,
+ 0x3a3, 0x3f5,
+ 0x3f7, 0x481,
+ 0x48a, 0x52f,
+ 0x531, 0x556,
+ 0x559, 0x559,
+ 0x561, 0x587,
+ 0x5d0, 0x5ea,
+ 0x5f0, 0x5f2,
+ 0x620, 0x64a,
+ 0x660, 0x669,
+ 0x66e, 0x66f,
+ 0x671, 0x6d3,
+ 0x6d5, 0x6d5,
+ 0x6e5, 0x6e6,
+ 0x6ee, 0x6fc,
+ 0x6ff, 0x6ff,
+ 0x710, 0x710,
+ 0x712, 0x72f,
+ 0x74d, 0x7a5,
+ 0x7b1, 0x7b1,
+ 0x7c0, 0x7ea,
+ 0x7f4, 0x7f5,
+ 0x7fa, 0x7fa,
+ 0x800, 0x815,
+ 0x81a, 0x81a,
+ 0x824, 0x824,
+ 0x828, 0x828,
+ 0x840, 0x858,
+ 0x8a0, 0x8b2,
+ 0x904, 0x939,
+ 0x93d, 0x93d,
+ 0x950, 0x950,
+ 0x958, 0x961,
+ 0x966, 0x96f,
+ 0x971, 0x980,
+ 0x985, 0x98c,
+ 0x98f, 0x990,
+ 0x993, 0x9a8,
+ 0x9aa, 0x9b0,
+ 0x9b2, 0x9b2,
+ 0x9b6, 0x9b9,
+ 0x9bd, 0x9bd,
+ 0x9ce, 0x9ce,
+ 0x9dc, 0x9dd,
+ 0x9df, 0x9e1,
+ 0x9e6, 0x9f1,
+ 0x9f4, 0x9f9,
+ 0xa05, 0xa0a,
+ 0xa0f, 0xa10,
+ 0xa13, 0xa28,
+ 0xa2a, 0xa30,
+ 0xa32, 0xa33,
+ 0xa35, 0xa36,
+ 0xa38, 0xa39,
+ 0xa59, 0xa5c,
+ 0xa5e, 0xa5e,
+ 0xa66, 0xa6f,
+ 0xa72, 0xa74,
+ 0xa85, 0xa8d,
+ 0xa8f, 0xa91,
+ 0xa93, 0xaa8,
+ 0xaaa, 0xab0,
+ 0xab2, 0xab3,
+ 0xab5, 0xab9,
+ 0xabd, 0xabd,
+ 0xad0, 0xad0,
+ 0xae0, 0xae1,
+ 0xae6, 0xaef,
+ 0xb05, 0xb0c,
+ 0xb0f, 0xb10,
+ 0xb13, 0xb28,
+ 0xb2a, 0xb30,
+ 0xb32, 0xb33,
+ 0xb35, 0xb39,
+ 0xb3d, 0xb3d,
+ 0xb5c, 0xb5d,
+ 0xb5f, 0xb61,
+ 0xb66, 0xb6f,
+ 0xb71, 0xb77,
+ 0xb83, 0xb83,
+ 0xb85, 0xb8a,
+ 0xb8e, 0xb90,
+ 0xb92, 0xb95,
+ 0xb99, 0xb9a,
+ 0xb9c, 0xb9c,
+ 0xb9e, 0xb9f,
+ 0xba3, 0xba4,
+ 0xba8, 0xbaa,
+ 0xbae, 0xbb9,
+ 0xbd0, 0xbd0,
+ 0xbe6, 0xbf2,
+ 0xc05, 0xc0c,
+ 0xc0e, 0xc10,
+ 0xc12, 0xc28,
+ 0xc2a, 0xc39,
+ 0xc3d, 0xc3d,
+ 0xc58, 0xc59,
+ 0xc60, 0xc61,
+ 0xc66, 0xc6f,
+ 0xc78, 0xc7e,
+ 0xc85, 0xc8c,
+ 0xc8e, 0xc90,
+ 0xc92, 0xca8,
+ 0xcaa, 0xcb3,
+ 0xcb5, 0xcb9,
+ 0xcbd, 0xcbd,
+ 0xcde, 0xcde,
+ 0xce0, 0xce1,
+ 0xce6, 0xcef,
+ 0xcf1, 0xcf2,
+ 0xd05, 0xd0c,
+ 0xd0e, 0xd10,
+ 0xd12, 0xd3a,
+ 0xd3d, 0xd3d,
+ 0xd4e, 0xd4e,
+ 0xd60, 0xd61,
+ 0xd66, 0xd75,
+ 0xd7a, 0xd7f,
+ 0xd85, 0xd96,
+ 0xd9a, 0xdb1,
+ 0xdb3, 0xdbb,
+ 0xdbd, 0xdbd,
+ 0xdc0, 0xdc6,
+ 0xde6, 0xdef,
+ 0xe01, 0xe30,
+ 0xe32, 0xe33,
+ 0xe40, 0xe46,
+ 0xe50, 0xe59,
+ 0xe81, 0xe82,
+ 0xe84, 0xe84,
+ 0xe87, 0xe88,
+ 0xe8a, 0xe8a,
+ 0xe8d, 0xe8d,
+ 0xe94, 0xe97,
+ 0xe99, 0xe9f,
+ 0xea1, 0xea3,
+ 0xea5, 0xea5,
+ 0xea7, 0xea7,
+ 0xeaa, 0xeab,
+ 0xead, 0xeb0,
+ 0xeb2, 0xeb3,
+ 0xebd, 0xebd,
+ 0xec0, 0xec4,
+ 0xec6, 0xec6,
+ 0xed0, 0xed9,
+ 0xedc, 0xedf,
+ 0xf00, 0xf00,
+ 0xf20, 0xf33,
+ 0xf40, 0xf47,
+ 0xf49, 0xf6c,
+ 0xf88, 0xf8c,
+ 0x1000, 0x102a,
+ 0x103f, 0x1049,
+ 0x1050, 0x1055,
+ 0x105a, 0x105d,
+ 0x1061, 0x1061,
+ 0x1065, 0x1066,
+ 0x106e, 0x1070,
+ 0x1075, 0x1081,
+ 0x108e, 0x108e,
+ 0x1090, 0x1099,
+ 0x10a0, 0x10c5,
+ 0x10c7, 0x10c7,
+ 0x10cd, 0x10cd,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x1369, 0x137c,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x167f,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f8,
+ 0x1700, 0x170c,
+ 0x170e, 0x1711,
+ 0x1720, 0x1731,
+ 0x1740, 0x1751,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1780, 0x17b3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dc,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a8,
+ 0x18aa, 0x18aa,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191e,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19c1, 0x19c7,
+ 0x19d0, 0x19da,
+ 0x1a00, 0x1a16,
+ 0x1a20, 0x1a54,
+ 0x1a80, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1aa7, 0x1aa7,
+ 0x1b05, 0x1b33,
+ 0x1b45, 0x1b4b,
+ 0x1b50, 0x1b59,
+ 0x1b83, 0x1ba0,
+ 0x1bae, 0x1be5,
+ 0x1c00, 0x1c23,
+ 0x1c40, 0x1c49,
+ 0x1c4d, 0x1c7d,
+ 0x1ce9, 0x1cec,
+ 0x1cee, 0x1cf1,
+ 0x1cf5, 0x1cf6,
+ 0x1d00, 0x1dbf,
+ 0x1e00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2070, 0x2071,
+ 0x2074, 0x2079,
+ 0x207f, 0x2089,
+ 0x2090, 0x209c,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x214e, 0x214e,
+ 0x2150, 0x2189,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2ce4,
+ 0x2ceb, 0x2cee,
+ 0x2cf2, 0x2cf3,
+ 0x2cfd, 0x2cfd,
+ 0x2d00, 0x2d25,
+ 0x2d27, 0x2d27,
+ 0x2d2d, 0x2d2d,
+ 0x2d30, 0x2d67,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2e2f, 0x2e2f,
+ 0x3005, 0x3007,
+ 0x3021, 0x3029,
+ 0x3031, 0x3035,
+ 0x3038, 0x303c,
+ 0x3041, 0x3096,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x3192, 0x3195,
+ 0x31a0, 0x31ba,
+ 0x31f0, 0x31ff,
+ 0x3220, 0x3229,
+ 0x3248, 0x324f,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcc,
+ 0xa000, 0xa48c,
+ 0xa4d0, 0xa4fd,
+ 0xa500, 0xa60c,
+ 0xa610, 0xa62b,
+ 0xa640, 0xa66e,
+ 0xa67f, 0xa69d,
+ 0xa6a0, 0xa6ef,
+ 0xa717, 0xa71f,
+ 0xa722, 0xa788,
+ 0xa78b, 0xa78e,
+ 0xa790, 0xa7ad,
+ 0xa7b0, 0xa7b1,
+ 0xa7f7, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa822,
+ 0xa830, 0xa835,
+ 0xa840, 0xa873,
+ 0xa882, 0xa8b3,
+ 0xa8d0, 0xa8d9,
+ 0xa8f2, 0xa8f7,
+ 0xa8fb, 0xa8fb,
+ 0xa900, 0xa925,
+ 0xa930, 0xa946,
+ 0xa960, 0xa97c,
+ 0xa984, 0xa9b2,
+ 0xa9cf, 0xa9d9,
+ 0xa9e0, 0xa9e4,
+ 0xa9e6, 0xa9fe,
+ 0xaa00, 0xaa28,
+ 0xaa40, 0xaa42,
+ 0xaa44, 0xaa4b,
+ 0xaa50, 0xaa59,
+ 0xaa60, 0xaa76,
+ 0xaa7a, 0xaa7a,
+ 0xaa7e, 0xaaaf,
+ 0xaab1, 0xaab1,
+ 0xaab5, 0xaab6,
+ 0xaab9, 0xaabd,
+ 0xaac0, 0xaac0,
+ 0xaac2, 0xaac2,
+ 0xaadb, 0xaadd,
+ 0xaae0, 0xaaea,
+ 0xaaf2, 0xaaf4,
+ 0xab01, 0xab06,
+ 0xab09, 0xab0e,
+ 0xab11, 0xab16,
+ 0xab20, 0xab26,
+ 0xab28, 0xab2e,
+ 0xab30, 0xab5a,
+ 0xab5c, 0xab5f,
+ 0xab64, 0xab65,
+ 0xabc0, 0xabe2,
+ 0xabf0, 0xabf9,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xf900, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb1d,
+ 0xfb1f, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10107, 0x10133,
+ 0x10140, 0x10178,
+ 0x1018a, 0x1018b,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x102e1, 0x102fb,
+ 0x10300, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10350, 0x10375,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x103d1, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10500, 0x10527,
+ 0x10530, 0x10563,
+ 0x10600, 0x10736,
+ 0x10740, 0x10755,
+ 0x10760, 0x10767,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x10855,
+ 0x10858, 0x10876,
+ 0x10879, 0x1089e,
+ 0x108a7, 0x108af,
+ 0x10900, 0x1091b,
+ 0x10920, 0x10939,
+ 0x10980, 0x109b7,
+ 0x109be, 0x109bf,
+ 0x10a00, 0x10a00,
+ 0x10a10, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a40, 0x10a47,
+ 0x10a60, 0x10a7e,
+ 0x10a80, 0x10a9f,
+ 0x10ac0, 0x10ac7,
+ 0x10ac9, 0x10ae4,
+ 0x10aeb, 0x10aef,
+ 0x10b00, 0x10b35,
+ 0x10b40, 0x10b55,
+ 0x10b58, 0x10b72,
+ 0x10b78, 0x10b91,
+ 0x10ba9, 0x10baf,
+ 0x10c00, 0x10c48,
+ 0x10e60, 0x10e7e,
+ 0x11003, 0x11037,
+ 0x11052, 0x1106f,
+ 0x11083, 0x110af,
+ 0x110d0, 0x110e8,
+ 0x110f0, 0x110f9,
+ 0x11103, 0x11126,
+ 0x11136, 0x1113f,
+ 0x11150, 0x11172,
+ 0x11176, 0x11176,
+ 0x11183, 0x111b2,
+ 0x111c1, 0x111c4,
+ 0x111d0, 0x111da,
+ 0x111e1, 0x111f4,
+ 0x11200, 0x11211,
+ 0x11213, 0x1122b,
+ 0x112b0, 0x112de,
+ 0x112f0, 0x112f9,
+ 0x11305, 0x1130c,
+ 0x1130f, 0x11310,
+ 0x11313, 0x11328,
+ 0x1132a, 0x11330,
+ 0x11332, 0x11333,
+ 0x11335, 0x11339,
+ 0x1133d, 0x1133d,
+ 0x1135d, 0x11361,
+ 0x11480, 0x114af,
+ 0x114c4, 0x114c5,
+ 0x114c7, 0x114c7,
+ 0x114d0, 0x114d9,
+ 0x11580, 0x115ae,
+ 0x11600, 0x1162f,
+ 0x11644, 0x11644,
+ 0x11650, 0x11659,
+ 0x11680, 0x116aa,
+ 0x116c0, 0x116c9,
+ 0x118a0, 0x118f2,
+ 0x118ff, 0x118ff,
+ 0x11ac0, 0x11af8,
+ 0x12000, 0x12398,
+ 0x12400, 0x1246e,
+ 0x13000, 0x1342e,
+ 0x16800, 0x16a38,
+ 0x16a40, 0x16a5e,
+ 0x16a60, 0x16a69,
+ 0x16ad0, 0x16aed,
+ 0x16b00, 0x16b2f,
+ 0x16b40, 0x16b43,
+ 0x16b50, 0x16b59,
+ 0x16b5b, 0x16b61,
+ 0x16b63, 0x16b77,
+ 0x16b7d, 0x16b8f,
+ 0x16f00, 0x16f44,
+ 0x16f50, 0x16f50,
+ 0x16f93, 0x16f9f,
+ 0x1b000, 0x1b001,
+ 0x1bc00, 0x1bc6a,
+ 0x1bc70, 0x1bc7c,
+ 0x1bc80, 0x1bc88,
+ 0x1bc90, 0x1bc99,
+ 0x1d360, 0x1d371,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7cb,
+ 0x1d7ce, 0x1d7ff,
+ 0x1e800, 0x1e8c4,
+ 0x1e8c7, 0x1e8cf,
+ 0x1ee00, 0x1ee03,
+ 0x1ee05, 0x1ee1f,
+ 0x1ee21, 0x1ee22,
+ 0x1ee24, 0x1ee24,
+ 0x1ee27, 0x1ee27,
+ 0x1ee29, 0x1ee32,
+ 0x1ee34, 0x1ee37,
+ 0x1ee39, 0x1ee39,
+ 0x1ee3b, 0x1ee3b,
+ 0x1ee42, 0x1ee42,
+ 0x1ee47, 0x1ee47,
+ 0x1ee49, 0x1ee49,
+ 0x1ee4b, 0x1ee4b,
+ 0x1ee4d, 0x1ee4f,
+ 0x1ee51, 0x1ee52,
+ 0x1ee54, 0x1ee54,
+ 0x1ee57, 0x1ee57,
+ 0x1ee59, 0x1ee59,
+ 0x1ee5b, 0x1ee5b,
+ 0x1ee5d, 0x1ee5d,
+ 0x1ee5f, 0x1ee5f,
+ 0x1ee61, 0x1ee62,
+ 0x1ee64, 0x1ee64,
+ 0x1ee67, 0x1ee6a,
+ 0x1ee6c, 0x1ee72,
+ 0x1ee74, 0x1ee77,
+ 0x1ee79, 0x1ee7c,
+ 0x1ee7e, 0x1ee7e,
+ 0x1ee80, 0x1ee89,
+ 0x1ee8b, 0x1ee9b,
+ 0x1eea1, 0x1eea3,
+ 0x1eea5, 0x1eea9,
+ 0x1eeab, 0x1eebb,
+ 0x1f100, 0x1f10c,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2b740, 0x2b81d,
+ 0x2f800, 0x2fa1d
+};
+UCP_FN(Xwd)
+
+static const unichar ucp_Z_def[] = {
+ 0x20, 0x20,
+ 0xa0, 0xa0,
+ 0x1680, 0x1680,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+};
+UCP_FN(Z)
+
+static const unichar ucp_Zl_def[] = {
+ 0x2028, 0x2028
+};
+UCP_FN(Zl)
+
+static const unichar ucp_Zp_def[] = {
+ 0x2029, 0x2029
+};
+UCP_FN(Zp)
+
+static const unichar ucp_Zs_def[] = {
+ 0x20, 0x20,
+ 0xa0, 0xa0,
+ 0x1680, 0x1680,
+ 0x2000, 0x200a,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+};
+UCP_FN(Zs)
+
+static const unichar ucp_Arabic_def[] = {
+ 0x600, 0x604,
+ 0x606, 0x60b,
+ 0x60d, 0x61a,
+ 0x61e, 0x61e,
+ 0x620, 0x63f,
+ 0x641, 0x64a,
+ 0x656, 0x65f,
+ 0x66a, 0x66f,
+ 0x671, 0x6dc,
+ 0x6de, 0x6ff,
+ 0x750, 0x77f,
+ 0x8a0, 0x8b2,
+ 0x8e4, 0x8ff,
+ 0xfb50, 0xfbc1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0x10e60, 0x10e7e,
+ 0x1ee00, 0x1ee03,
+ 0x1ee05, 0x1ee1f,
+ 0x1ee21, 0x1ee22,
+ 0x1ee24, 0x1ee24,
+ 0x1ee27, 0x1ee27,
+ 0x1ee29, 0x1ee32,
+ 0x1ee34, 0x1ee37,
+ 0x1ee39, 0x1ee39,
+ 0x1ee3b, 0x1ee3b,
+ 0x1ee42, 0x1ee42,
+ 0x1ee47, 0x1ee47,
+ 0x1ee49, 0x1ee49,
+ 0x1ee4b, 0x1ee4b,
+ 0x1ee4d, 0x1ee4f,
+ 0x1ee51, 0x1ee52,
+ 0x1ee54, 0x1ee54,
+ 0x1ee57, 0x1ee57,
+ 0x1ee59, 0x1ee59,
+ 0x1ee5b, 0x1ee5b,
+ 0x1ee5d, 0x1ee5d,
+ 0x1ee5f, 0x1ee5f,
+ 0x1ee61, 0x1ee62,
+ 0x1ee64, 0x1ee64,
+ 0x1ee67, 0x1ee6a,
+ 0x1ee6c, 0x1ee72,
+ 0x1ee74, 0x1ee77,
+ 0x1ee79, 0x1ee7c,
+ 0x1ee7e, 0x1ee7e,
+ 0x1ee80, 0x1ee89,
+ 0x1ee8b, 0x1ee9b,
+ 0x1eea1, 0x1eea3,
+ 0x1eea5, 0x1eea9,
+ 0x1eeab, 0x1eebb,
+ 0x1eef0, 0x1eef1
+};
+UCP_FN(Arabic)
+
+static const unichar ucp_Armenian_def[] = {
+ 0x531, 0x556,
+ 0x559, 0x55f,
+ 0x561, 0x587,
+ 0x58a, 0x58a,
+ 0x58d, 0x58f,
+ 0xfb13, 0xfb17
+};
+UCP_FN(Armenian)
+
+static const unichar ucp_Avestan_def[] = {
+ 0x10b00, 0x10b35,
+ 0x10b39, 0x10b3f
+};
+UCP_FN(Avestan)
+
+static const unichar ucp_Balinese_def[] = {
+ 0x1b00, 0x1b4b,
+ 0x1b50, 0x1b7c
+};
+UCP_FN(Balinese)
+
+static const unichar ucp_Bamum_def[] = {
+ 0xa6a0, 0xa6f7,
+ 0x16800, 0x16a38
+};
+UCP_FN(Bamum)
+
+static const unichar ucp_Bassa_Vah_def[] = {
+ 0x16ad0, 0x16aed,
+ 0x16af0, 0x16af5
+};
+UCP_FN(Bassa_Vah)
+
+static const unichar ucp_Batak_def[] = {
+ 0x1bc0, 0x1bf3,
+ 0x1bfc, 0x1bff
+};
+UCP_FN(Batak)
+
+static const unichar ucp_Bengali_def[] = {
+ 0x980, 0x983,
+ 0x985, 0x98c,
+ 0x98f, 0x990,
+ 0x993, 0x9a8,
+ 0x9aa, 0x9b0,
+ 0x9b2, 0x9b2,
+ 0x9b6, 0x9b9,
+ 0x9bc, 0x9c4,
+ 0x9c7, 0x9c8,
+ 0x9cb, 0x9ce,
+ 0x9d7, 0x9d7,
+ 0x9dc, 0x9dd,
+ 0x9df, 0x9e3,
+ 0x9e6, 0x9fb
+};
+UCP_FN(Bengali)
+
+static const unichar ucp_Bopomofo_def[] = {
+ 0x2ea, 0x2eb,
+ 0x3105, 0x312d,
+ 0x31a0, 0x31ba
+};
+UCP_FN(Bopomofo)
+
+static const unichar ucp_Brahmi_def[] = {
+ 0x11000, 0x1104d,
+ 0x11052, 0x1106f,
+ 0x1107f, 0x1107f
+};
+UCP_FN(Brahmi)
+
+static const unichar ucp_Braille_def[] = {
+ 0x2800, 0x28ff
+};
+UCP_FN(Braille)
+
+static const unichar ucp_Buginese_def[] = {
+ 0x1a00, 0x1a1b,
+ 0x1a1e, 0x1a1f
+};
+UCP_FN(Buginese)
+
+static const unichar ucp_Buhid_def[] = {
+ 0x1740, 0x1753
+};
+UCP_FN(Buhid)
+
+static const unichar ucp_Canadian_Aboriginal_def[] = {
+ 0x1400, 0x167f,
+ 0x18b0, 0x18f5
+};
+UCP_FN(Canadian_Aboriginal)
+
+static const unichar ucp_Carian_def[] = {
+ 0x102a0, 0x102d0
+};
+UCP_FN(Carian)
+
+static const unichar ucp_Caucasian_Albanian_def[] = {
+ 0x10530, 0x10563,
+ 0x1056f, 0x1056f
+};
+UCP_FN(Caucasian_Albanian)
+
+static const unichar ucp_Chakma_def[] = {
+ 0x11100, 0x11134,
+ 0x11136, 0x11143
+};
+UCP_FN(Chakma)
+
+static const unichar ucp_Cham_def[] = {
+ 0xaa00, 0xaa36,
+ 0xaa40, 0xaa4d,
+ 0xaa50, 0xaa59,
+ 0xaa5c, 0xaa5f
+};
+UCP_FN(Cham)
+
+static const unichar ucp_Cherokee_def[] = {
+ 0x13a0, 0x13f4
+};
+UCP_FN(Cherokee)
+
+static const unichar ucp_Common_def[] = {
+ 0x0, 0x40,
+ 0x5b, 0x60,
+ 0x7b, 0xa9,
+ 0xab, 0xb9,
+ 0xbb, 0xbf,
+ 0xd7, 0xd7,
+ 0xf7, 0xf7,
+ 0x2b9, 0x2df,
+ 0x2e5, 0x2e9,
+ 0x2ec, 0x2ff,
+ 0x374, 0x374,
+ 0x378, 0x379,
+ 0x37e, 0x37e,
+ 0x380, 0x383,
+ 0x385, 0x385,
+ 0x387, 0x387,
+ 0x38b, 0x38b,
+ 0x38d, 0x38d,
+ 0x3a2, 0x3a2,
+ 0x530, 0x530,
+ 0x557, 0x558,
+ 0x560, 0x560,
+ 0x588, 0x589,
+ 0x58b, 0x58c,
+ 0x590, 0x590,
+ 0x5c8, 0x5cf,
+ 0x5eb, 0x5ef,
+ 0x5f5, 0x5ff,
+ 0x605, 0x605,
+ 0x60c, 0x60c,
+ 0x61b, 0x61d,
+ 0x61f, 0x61f,
+ 0x640, 0x640,
+ 0x660, 0x669,
+ 0x6dd, 0x6dd,
+ 0x70e, 0x70e,
+ 0x74b, 0x74c,
+ 0x7b2, 0x7bf,
+ 0x7fb, 0x7ff,
+ 0x82e, 0x82f,
+ 0x83f, 0x83f,
+ 0x85c, 0x85d,
+ 0x85f, 0x89f,
+ 0x8b3, 0x8e3,
+ 0x964, 0x965,
+ 0x984, 0x984,
+ 0x98d, 0x98e,
+ 0x991, 0x992,
+ 0x9a9, 0x9a9,
+ 0x9b1, 0x9b1,
+ 0x9b3, 0x9b5,
+ 0x9ba, 0x9bb,
+ 0x9c5, 0x9c6,
+ 0x9c9, 0x9ca,
+ 0x9cf, 0x9d6,
+ 0x9d8, 0x9db,
+ 0x9de, 0x9de,
+ 0x9e4, 0x9e5,
+ 0x9fc, 0xa00,
+ 0xa04, 0xa04,
+ 0xa0b, 0xa0e,
+ 0xa11, 0xa12,
+ 0xa29, 0xa29,
+ 0xa31, 0xa31,
+ 0xa34, 0xa34,
+ 0xa37, 0xa37,
+ 0xa3a, 0xa3b,
+ 0xa3d, 0xa3d,
+ 0xa43, 0xa46,
+ 0xa49, 0xa4a,
+ 0xa4e, 0xa50,
+ 0xa52, 0xa58,
+ 0xa5d, 0xa5d,
+ 0xa5f, 0xa65,
+ 0xa76, 0xa80,
+ 0xa84, 0xa84,
+ 0xa8e, 0xa8e,
+ 0xa92, 0xa92,
+ 0xaa9, 0xaa9,
+ 0xab1, 0xab1,
+ 0xab4, 0xab4,
+ 0xaba, 0xabb,
+ 0xac6, 0xac6,
+ 0xaca, 0xaca,
+ 0xace, 0xacf,
+ 0xad1, 0xadf,
+ 0xae4, 0xae5,
+ 0xaf2, 0xb00,
+ 0xb04, 0xb04,
+ 0xb0d, 0xb0e,
+ 0xb11, 0xb12,
+ 0xb29, 0xb29,
+ 0xb31, 0xb31,
+ 0xb34, 0xb34,
+ 0xb3a, 0xb3b,
+ 0xb45, 0xb46,
+ 0xb49, 0xb4a,
+ 0xb4e, 0xb55,
+ 0xb58, 0xb5b,
+ 0xb5e, 0xb5e,
+ 0xb64, 0xb65,
+ 0xb78, 0xb81,
+ 0xb84, 0xb84,
+ 0xb8b, 0xb8d,
+ 0xb91, 0xb91,
+ 0xb96, 0xb98,
+ 0xb9b, 0xb9b,
+ 0xb9d, 0xb9d,
+ 0xba0, 0xba2,
+ 0xba5, 0xba7,
+ 0xbab, 0xbad,
+ 0xbba, 0xbbd,
+ 0xbc3, 0xbc5,
+ 0xbc9, 0xbc9,
+ 0xbce, 0xbcf,
+ 0xbd1, 0xbd6,
+ 0xbd8, 0xbe5,
+ 0xbfb, 0xbff,
+ 0xc04, 0xc04,
+ 0xc0d, 0xc0d,
+ 0xc11, 0xc11,
+ 0xc29, 0xc29,
+ 0xc3a, 0xc3c,
+ 0xc45, 0xc45,
+ 0xc49, 0xc49,
+ 0xc4e, 0xc54,
+ 0xc57, 0xc57,
+ 0xc5a, 0xc5f,
+ 0xc64, 0xc65,
+ 0xc70, 0xc77,
+ 0xc80, 0xc80,
+ 0xc84, 0xc84,
+ 0xc8d, 0xc8d,
+ 0xc91, 0xc91,
+ 0xca9, 0xca9,
+ 0xcb4, 0xcb4,
+ 0xcba, 0xcbb,
+ 0xcc5, 0xcc5,
+ 0xcc9, 0xcc9,
+ 0xcce, 0xcd4,
+ 0xcd7, 0xcdd,
+ 0xcdf, 0xcdf,
+ 0xce4, 0xce5,
+ 0xcf0, 0xcf0,
+ 0xcf3, 0xd00,
+ 0xd04, 0xd04,
+ 0xd0d, 0xd0d,
+ 0xd11, 0xd11,
+ 0xd3b, 0xd3c,
+ 0xd45, 0xd45,
+ 0xd49, 0xd49,
+ 0xd4f, 0xd56,
+ 0xd58, 0xd5f,
+ 0xd64, 0xd65,
+ 0xd76, 0xd78,
+ 0xd80, 0xd81,
+ 0xd84, 0xd84,
+ 0xd97, 0xd99,
+ 0xdb2, 0xdb2,
+ 0xdbc, 0xdbc,
+ 0xdbe, 0xdbf,
+ 0xdc7, 0xdc9,
+ 0xdcb, 0xdce,
+ 0xdd5, 0xdd5,
+ 0xdd7, 0xdd7,
+ 0xde0, 0xde5,
+ 0xdf0, 0xdf1,
+ 0xdf5, 0xe00,
+ 0xe3b, 0xe3f,
+ 0xe5c, 0xe80,
+ 0xe83, 0xe83,
+ 0xe85, 0xe86,
+ 0xe89, 0xe89,
+ 0xe8b, 0xe8c,
+ 0xe8e, 0xe93,
+ 0xe98, 0xe98,
+ 0xea0, 0xea0,
+ 0xea4, 0xea4,
+ 0xea6, 0xea6,
+ 0xea8, 0xea9,
+ 0xeac, 0xeac,
+ 0xeba, 0xeba,
+ 0xebe, 0xebf,
+ 0xec5, 0xec5,
+ 0xec7, 0xec7,
+ 0xece, 0xecf,
+ 0xeda, 0xedb,
+ 0xee0, 0xeff,
+ 0xf48, 0xf48,
+ 0xf6d, 0xf70,
+ 0xf98, 0xf98,
+ 0xfbd, 0xfbd,
+ 0xfcd, 0xfcd,
+ 0xfd5, 0xfd8,
+ 0xfdb, 0xfff,
+ 0x10c6, 0x10c6,
+ 0x10c8, 0x10cc,
+ 0x10ce, 0x10cf,
+ 0x10fb, 0x10fb,
+ 0x1249, 0x1249,
+ 0x124e, 0x124f,
+ 0x1257, 0x1257,
+ 0x1259, 0x1259,
+ 0x125e, 0x125f,
+ 0x1289, 0x1289,
+ 0x128e, 0x128f,
+ 0x12b1, 0x12b1,
+ 0x12b6, 0x12b7,
+ 0x12bf, 0x12bf,
+ 0x12c1, 0x12c1,
+ 0x12c6, 0x12c7,
+ 0x12d7, 0x12d7,
+ 0x1311, 0x1311,
+ 0x1316, 0x1317,
+ 0x135b, 0x135c,
+ 0x137d, 0x137f,
+ 0x139a, 0x139f,
+ 0x13f5, 0x13ff,
+ 0x169d, 0x169f,
+ 0x16eb, 0x16ed,
+ 0x16f9, 0x16ff,
+ 0x170d, 0x170d,
+ 0x1715, 0x171f,
+ 0x1735, 0x173f,
+ 0x1754, 0x175f,
+ 0x176d, 0x176d,
+ 0x1771, 0x1771,
+ 0x1774, 0x177f,
+ 0x17de, 0x17df,
+ 0x17ea, 0x17ef,
+ 0x17fa, 0x17ff,
+ 0x1802, 0x1803,
+ 0x1805, 0x1805,
+ 0x180f, 0x180f,
+ 0x181a, 0x181f,
+ 0x1878, 0x187f,
+ 0x18ab, 0x18af,
+ 0x18f6, 0x18ff,
+ 0x191f, 0x191f,
+ 0x192c, 0x192f,
+ 0x193c, 0x193f,
+ 0x1941, 0x1943,
+ 0x196e, 0x196f,
+ 0x1975, 0x197f,
+ 0x19ac, 0x19af,
+ 0x19ca, 0x19cf,
+ 0x19db, 0x19dd,
+ 0x1a1c, 0x1a1d,
+ 0x1a5f, 0x1a5f,
+ 0x1a7d, 0x1a7e,
+ 0x1a8a, 0x1a8f,
+ 0x1a9a, 0x1a9f,
+ 0x1aae, 0x1aaf,
+ 0x1abf, 0x1aff,
+ 0x1b4c, 0x1b4f,
+ 0x1b7d, 0x1b7f,
+ 0x1bf4, 0x1bfb,
+ 0x1c38, 0x1c3a,
+ 0x1c4a, 0x1c4c,
+ 0x1c80, 0x1cbf,
+ 0x1cc8, 0x1ccf,
+ 0x1cd3, 0x1cd3,
+ 0x1ce1, 0x1ce1,
+ 0x1ce9, 0x1cec,
+ 0x1cee, 0x1cf3,
+ 0x1cf5, 0x1cf7,
+ 0x1cfa, 0x1cff,
+ 0x1df6, 0x1dfb,
+ 0x1f16, 0x1f17,
+ 0x1f1e, 0x1f1f,
+ 0x1f46, 0x1f47,
+ 0x1f4e, 0x1f4f,
+ 0x1f58, 0x1f58,
+ 0x1f5a, 0x1f5a,
+ 0x1f5c, 0x1f5c,
+ 0x1f5e, 0x1f5e,
+ 0x1f7e, 0x1f7f,
+ 0x1fb5, 0x1fb5,
+ 0x1fc5, 0x1fc5,
+ 0x1fd4, 0x1fd5,
+ 0x1fdc, 0x1fdc,
+ 0x1ff0, 0x1ff1,
+ 0x1ff5, 0x1ff5,
+ 0x1fff, 0x200b,
+ 0x200e, 0x2070,
+ 0x2072, 0x207e,
+ 0x2080, 0x208f,
+ 0x209d, 0x20cf,
+ 0x20f1, 0x2125,
+ 0x2127, 0x2129,
+ 0x212c, 0x2131,
+ 0x2133, 0x214d,
+ 0x214f, 0x215f,
+ 0x2189, 0x27ff,
+ 0x2900, 0x2bff,
+ 0x2c2f, 0x2c2f,
+ 0x2c5f, 0x2c5f,
+ 0x2cf4, 0x2cf8,
+ 0x2d26, 0x2d26,
+ 0x2d28, 0x2d2c,
+ 0x2d2e, 0x2d2f,
+ 0x2d68, 0x2d6e,
+ 0x2d71, 0x2d7e,
+ 0x2d97, 0x2d9f,
+ 0x2da7, 0x2da7,
+ 0x2daf, 0x2daf,
+ 0x2db7, 0x2db7,
+ 0x2dbf, 0x2dbf,
+ 0x2dc7, 0x2dc7,
+ 0x2dcf, 0x2dcf,
+ 0x2dd7, 0x2dd7,
+ 0x2ddf, 0x2ddf,
+ 0x2e00, 0x2e7f,
+ 0x2e9a, 0x2e9a,
+ 0x2ef4, 0x2eff,
+ 0x2fd6, 0x3004,
+ 0x3006, 0x3006,
+ 0x3008, 0x3020,
+ 0x3030, 0x3037,
+ 0x303c, 0x3040,
+ 0x3097, 0x3098,
+ 0x309b, 0x309c,
+ 0x30a0, 0x30a0,
+ 0x30fb, 0x30fc,
+ 0x3100, 0x3104,
+ 0x312e, 0x3130,
+ 0x318f, 0x319f,
+ 0x31bb, 0x31ef,
+ 0x321f, 0x325f,
+ 0x327f, 0x32cf,
+ 0x32ff, 0x32ff,
+ 0x3358, 0x33ff,
+ 0x4db6, 0x4dff,
+ 0x9fcd, 0x9fff,
+ 0xa48d, 0xa48f,
+ 0xa4c7, 0xa4cf,
+ 0xa62c, 0xa63f,
+ 0xa69e, 0xa69e,
+ 0xa6f8, 0xa721,
+ 0xa788, 0xa78a,
+ 0xa78f, 0xa78f,
+ 0xa7ae, 0xa7af,
+ 0xa7b2, 0xa7f6,
+ 0xa82c, 0xa83f,
+ 0xa878, 0xa87f,
+ 0xa8c5, 0xa8cd,
+ 0xa8da, 0xa8df,
+ 0xa8fc, 0xa8ff,
+ 0xa92e, 0xa92e,
+ 0xa954, 0xa95e,
+ 0xa97d, 0xa97f,
+ 0xa9ce, 0xa9cf,
+ 0xa9da, 0xa9dd,
+ 0xa9ff, 0xa9ff,
+ 0xaa37, 0xaa3f,
+ 0xaa4e, 0xaa4f,
+ 0xaa5a, 0xaa5b,
+ 0xaac3, 0xaada,
+ 0xaaf7, 0xab00,
+ 0xab07, 0xab08,
+ 0xab0f, 0xab10,
+ 0xab17, 0xab1f,
+ 0xab27, 0xab27,
+ 0xab2f, 0xab2f,
+ 0xab5b, 0xab5b,
+ 0xab60, 0xab63,
+ 0xab66, 0xabbf,
+ 0xabee, 0xabef,
+ 0xabfa, 0xabff,
+ 0xd7a4, 0xd7af,
+ 0xd7c7, 0xd7ca,
+ 0xd7fc, 0xf8ff,
+ 0xfa6e, 0xfa6f,
+ 0xfada, 0xfaff,
+ 0xfb07, 0xfb12,
+ 0xfb18, 0xfb1c,
+ 0xfb37, 0xfb37,
+ 0xfb3d, 0xfb3d,
+ 0xfb3f, 0xfb3f,
+ 0xfb42, 0xfb42,
+ 0xfb45, 0xfb45,
+ 0xfbc2, 0xfbd2,
+ 0xfd3e, 0xfd4f,
+ 0xfd90, 0xfd91,
+ 0xfdc8, 0xfdef,
+ 0xfdfe, 0xfdff,
+ 0xfe10, 0xfe1f,
+ 0xfe2e, 0xfe6f,
+ 0xfe75, 0xfe75,
+ 0xfefd, 0xff20,
+ 0xff3b, 0xff40,
+ 0xff5b, 0xff65,
+ 0xff70, 0xff70,
+ 0xff9e, 0xff9f,
+ 0xffbf, 0xffc1,
+ 0xffc8, 0xffc9,
+ 0xffd0, 0xffd1,
+ 0xffd8, 0xffd9,
+ 0xffdd, 0xffff,
+ 0x1000c, 0x1000c,
+ 0x10027, 0x10027,
+ 0x1003b, 0x1003b,
+ 0x1003e, 0x1003e,
+ 0x1004e, 0x1004f,
+ 0x1005e, 0x1007f,
+ 0x100fb, 0x1013f,
+ 0x1018d, 0x1019f,
+ 0x101a1, 0x101fc,
+ 0x101fe, 0x1027f,
+ 0x1029d, 0x1029f,
+ 0x102d1, 0x102df,
+ 0x102e1, 0x102ff,
+ 0x10324, 0x1032f,
+ 0x1034b, 0x1034f,
+ 0x1037b, 0x1037f,
+ 0x1039e, 0x1039e,
+ 0x103c4, 0x103c7,
+ 0x103d6, 0x103ff,
+ 0x1049e, 0x1049f,
+ 0x104aa, 0x104ff,
+ 0x10528, 0x1052f,
+ 0x10564, 0x1056e,
+ 0x10570, 0x105ff,
+ 0x10737, 0x1073f,
+ 0x10756, 0x1075f,
+ 0x10768, 0x107ff,
+ 0x10806, 0x10807,
+ 0x10809, 0x10809,
+ 0x10836, 0x10836,
+ 0x10839, 0x1083b,
+ 0x1083d, 0x1083e,
+ 0x10856, 0x10856,
+ 0x1089f, 0x108a6,
+ 0x108b0, 0x108ff,
+ 0x1091c, 0x1091e,
+ 0x1093a, 0x1093e,
+ 0x10940, 0x1097f,
+ 0x109b8, 0x109bd,
+ 0x109c0, 0x109ff,
+ 0x10a04, 0x10a04,
+ 0x10a07, 0x10a0b,
+ 0x10a14, 0x10a14,
+ 0x10a18, 0x10a18,
+ 0x10a34, 0x10a37,
+ 0x10a3b, 0x10a3e,
+ 0x10a48, 0x10a4f,
+ 0x10a59, 0x10a5f,
+ 0x10aa0, 0x10abf,
+ 0x10ae7, 0x10aea,
+ 0x10af7, 0x10aff,
+ 0x10b36, 0x10b38,
+ 0x10b56, 0x10b57,
+ 0x10b73, 0x10b77,
+ 0x10b92, 0x10b98,
+ 0x10b9d, 0x10ba8,
+ 0x10bb0, 0x10bff,
+ 0x10c49, 0x10e5f,
+ 0x10e7f, 0x10fff,
+ 0x1104e, 0x11051,
+ 0x11070, 0x1107e,
+ 0x110c2, 0x110cf,
+ 0x110e9, 0x110ef,
+ 0x110fa, 0x110ff,
+ 0x11135, 0x11135,
+ 0x11144, 0x1114f,
+ 0x11177, 0x1117f,
+ 0x111c9, 0x111cc,
+ 0x111ce, 0x111cf,
+ 0x111db, 0x111e0,
+ 0x111f5, 0x111ff,
+ 0x11212, 0x11212,
+ 0x1123e, 0x112af,
+ 0x112eb, 0x112ef,
+ 0x112fa, 0x11300,
+ 0x11304, 0x11304,
+ 0x1130d, 0x1130e,
+ 0x11311, 0x11312,
+ 0x11329, 0x11329,
+ 0x11331, 0x11331,
+ 0x11334, 0x11334,
+ 0x1133a, 0x1133b,
+ 0x11345, 0x11346,
+ 0x11349, 0x1134a,
+ 0x1134e, 0x11356,
+ 0x11358, 0x1135c,
+ 0x11364, 0x11365,
+ 0x1136d, 0x1136f,
+ 0x11375, 0x1147f,
+ 0x114c8, 0x114cf,
+ 0x114da, 0x1157f,
+ 0x115b6, 0x115b7,
+ 0x115ca, 0x115ff,
+ 0x11645, 0x1164f,
+ 0x1165a, 0x1167f,
+ 0x116b8, 0x116bf,
+ 0x116ca, 0x1189f,
+ 0x118f3, 0x118fe,
+ 0x11900, 0x11abf,
+ 0x11af9, 0x11fff,
+ 0x12399, 0x123ff,
+ 0x1246f, 0x1246f,
+ 0x12475, 0x12fff,
+ 0x1342f, 0x167ff,
+ 0x16a39, 0x16a3f,
+ 0x16a5f, 0x16a5f,
+ 0x16a6a, 0x16a6d,
+ 0x16a70, 0x16acf,
+ 0x16aee, 0x16aef,
+ 0x16af6, 0x16aff,
+ 0x16b46, 0x16b4f,
+ 0x16b5a, 0x16b5a,
+ 0x16b62, 0x16b62,
+ 0x16b78, 0x16b7c,
+ 0x16b90, 0x16eff,
+ 0x16f45, 0x16f4f,
+ 0x16f7f, 0x16f8e,
+ 0x16fa0, 0x1afff,
+ 0x1b002, 0x1bbff,
+ 0x1bc6b, 0x1bc6f,
+ 0x1bc7d, 0x1bc7f,
+ 0x1bc89, 0x1bc8f,
+ 0x1bc9a, 0x1bc9b,
+ 0x1bca0, 0x1d166,
+ 0x1d16a, 0x1d17a,
+ 0x1d183, 0x1d184,
+ 0x1d18c, 0x1d1a9,
+ 0x1d1ae, 0x1d1ff,
+ 0x1d246, 0x1e7ff,
+ 0x1e8c5, 0x1e8c6,
+ 0x1e8d7, 0x1edff,
+ 0x1ee04, 0x1ee04,
+ 0x1ee20, 0x1ee20,
+ 0x1ee23, 0x1ee23,
+ 0x1ee25, 0x1ee26,
+ 0x1ee28, 0x1ee28,
+ 0x1ee33, 0x1ee33,
+ 0x1ee38, 0x1ee38,
+ 0x1ee3a, 0x1ee3a,
+ 0x1ee3c, 0x1ee41,
+ 0x1ee43, 0x1ee46,
+ 0x1ee48, 0x1ee48,
+ 0x1ee4a, 0x1ee4a,
+ 0x1ee4c, 0x1ee4c,
+ 0x1ee50, 0x1ee50,
+ 0x1ee53, 0x1ee53,
+ 0x1ee55, 0x1ee56,
+ 0x1ee58, 0x1ee58,
+ 0x1ee5a, 0x1ee5a,
+ 0x1ee5c, 0x1ee5c,
+ 0x1ee5e, 0x1ee5e,
+ 0x1ee60, 0x1ee60,
+ 0x1ee63, 0x1ee63,
+ 0x1ee65, 0x1ee66,
+ 0x1ee6b, 0x1ee6b,
+ 0x1ee73, 0x1ee73,
+ 0x1ee78, 0x1ee78,
+ 0x1ee7d, 0x1ee7d,
+ 0x1ee7f, 0x1ee7f,
+ 0x1ee8a, 0x1ee8a,
+ 0x1ee9c, 0x1eea0,
+ 0x1eea4, 0x1eea4,
+ 0x1eeaa, 0x1eeaa,
+ 0x1eebc, 0x1eeef,
+ 0x1eef2, 0x1f1ff,
+ 0x1f201, 0x1ffff,
+ 0x2a6d7, 0x2a6ff,
+ 0x2b735, 0x2b73f,
+ 0x2b81e, 0x2f7ff,
+ 0x2fa1e, 0xe00ff,
+ 0xe01f0, 0x10ffff
+};
+UCP_FN(Common)
+
+static const unichar ucp_Coptic_def[] = {
+ 0x3e2, 0x3ef,
+ 0x2c80, 0x2cf3,
+ 0x2cf9, 0x2cff
+};
+UCP_FN(Coptic)
+
+static const unichar ucp_Cuneiform_def[] = {
+ 0x12000, 0x12398,
+ 0x12400, 0x1246e,
+ 0x12470, 0x12474
+};
+UCP_FN(Cuneiform)
+
+static const unichar ucp_Cypriot_def[] = {
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f
+};
+UCP_FN(Cypriot)
+
+static const unichar ucp_Cyrillic_def[] = {
+ 0x400, 0x484,
+ 0x487, 0x52f,
+ 0x1d2b, 0x1d2b,
+ 0x1d78, 0x1d78,
+ 0x2de0, 0x2dff,
+ 0xa640, 0xa69d,
+ 0xa69f, 0xa69f
+};
+UCP_FN(Cyrillic)
+
+static const unichar ucp_Deseret_def[] = {
+ 0x10400, 0x1044f
+};
+UCP_FN(Deseret)
+
+static const unichar ucp_Devanagari_def[] = {
+ 0x900, 0x950,
+ 0x953, 0x963,
+ 0x966, 0x97f,
+ 0xa8e0, 0xa8fb
+};
+UCP_FN(Devanagari)
+
+static const unichar ucp_Duployan_def[] = {
+ 0x1bc00, 0x1bc6a,
+ 0x1bc70, 0x1bc7c,
+ 0x1bc80, 0x1bc88,
+ 0x1bc90, 0x1bc99,
+ 0x1bc9c, 0x1bc9f
+};
+UCP_FN(Duployan)
+
+static const unichar ucp_Egyptian_Hieroglyphs_def[] = {
+ 0x13000, 0x1342e
+};
+UCP_FN(Egyptian_Hieroglyphs)
+
+static const unichar ucp_Elbasan_def[] = {
+ 0x10500, 0x10527
+};
+UCP_FN(Elbasan)
+
+static const unichar ucp_Ethiopic_def[] = {
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135d, 0x137c,
+ 0x1380, 0x1399,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0xab01, 0xab06,
+ 0xab09, 0xab0e,
+ 0xab11, 0xab16,
+ 0xab20, 0xab26,
+ 0xab28, 0xab2e
+};
+UCP_FN(Ethiopic)
+
+static const unichar ucp_Georgian_def[] = {
+ 0x10a0, 0x10c5,
+ 0x10c7, 0x10c7,
+ 0x10cd, 0x10cd,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10ff,
+ 0x2d00, 0x2d25,
+ 0x2d27, 0x2d27,
+ 0x2d2d, 0x2d2d
+};
+UCP_FN(Georgian)
+
+static const unichar ucp_Glagolitic_def[] = {
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e
+};
+UCP_FN(Glagolitic)
+
+static const unichar ucp_Gothic_def[] = {
+ 0x10330, 0x1034a
+};
+UCP_FN(Gothic)
+
+static const unichar ucp_Grantha_def[] = {
+ 0x11301, 0x11303,
+ 0x11305, 0x1130c,
+ 0x1130f, 0x11310,
+ 0x11313, 0x11328,
+ 0x1132a, 0x11330,
+ 0x11332, 0x11333,
+ 0x11335, 0x11339,
+ 0x1133c, 0x11344,
+ 0x11347, 0x11348,
+ 0x1134b, 0x1134d,
+ 0x11357, 0x11357,
+ 0x1135d, 0x11363,
+ 0x11366, 0x1136c,
+ 0x11370, 0x11374
+};
+UCP_FN(Grantha)
+
+static const unichar ucp_Greek_def[] = {
+ 0x370, 0x373,
+ 0x375, 0x377,
+ 0x37a, 0x37d,
+ 0x37f, 0x37f,
+ 0x384, 0x384,
+ 0x386, 0x386,
+ 0x388, 0x38a,
+ 0x38c, 0x38c,
+ 0x38e, 0x3a1,
+ 0x3a3, 0x3e1,
+ 0x3f0, 0x3ff,
+ 0x1d26, 0x1d2a,
+ 0x1d5d, 0x1d61,
+ 0x1d66, 0x1d6a,
+ 0x1dbf, 0x1dbf,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x2126, 0x2126,
+ 0xab65, 0xab65,
+ 0x10140, 0x1018c,
+ 0x101a0, 0x101a0,
+ 0x1d200, 0x1d245
+};
+UCP_FN(Greek)
+
+static const unichar ucp_Gujarati_def[] = {
+ 0xa81, 0xa83,
+ 0xa85, 0xa8d,
+ 0xa8f, 0xa91,
+ 0xa93, 0xaa8,
+ 0xaaa, 0xab0,
+ 0xab2, 0xab3,
+ 0xab5, 0xab9,
+ 0xabc, 0xac5,
+ 0xac7, 0xac9,
+ 0xacb, 0xacd,
+ 0xad0, 0xad0,
+ 0xae0, 0xae3,
+ 0xae6, 0xaf1
+};
+UCP_FN(Gujarati)
+
+static const unichar ucp_Gurmukhi_def[] = {
+ 0xa01, 0xa03,
+ 0xa05, 0xa0a,
+ 0xa0f, 0xa10,
+ 0xa13, 0xa28,
+ 0xa2a, 0xa30,
+ 0xa32, 0xa33,
+ 0xa35, 0xa36,
+ 0xa38, 0xa39,
+ 0xa3c, 0xa3c,
+ 0xa3e, 0xa42,
+ 0xa47, 0xa48,
+ 0xa4b, 0xa4d,
+ 0xa51, 0xa51,
+ 0xa59, 0xa5c,
+ 0xa5e, 0xa5e,
+ 0xa66, 0xa75
+};
+UCP_FN(Gurmukhi)
+
+static const unichar ucp_Han_def[] = {
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x3005, 0x3005,
+ 0x3007, 0x3007,
+ 0x3021, 0x3029,
+ 0x3038, 0x303b,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcc,
+ 0xf900, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2b740, 0x2b81d,
+ 0x2f800, 0x2fa1d
+};
+UCP_FN(Han)
+
+static const unichar ucp_Hangul_def[] = {
+ 0x1100, 0x11ff,
+ 0x302e, 0x302f,
+ 0x3131, 0x318e,
+ 0x3200, 0x321e,
+ 0x3260, 0x327e,
+ 0xa960, 0xa97c,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xffa0, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc
+};
+UCP_FN(Hangul)
+
+static const unichar ucp_Hanunoo_def[] = {
+ 0x1720, 0x1734
+};
+UCP_FN(Hanunoo)
+
+static const unichar ucp_Hebrew_def[] = {
+ 0x591, 0x5c7,
+ 0x5d0, 0x5ea,
+ 0x5f0, 0x5f4,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfb4f
+};
+UCP_FN(Hebrew)
+
+static const unichar ucp_Hiragana_def[] = {
+ 0x3041, 0x3096,
+ 0x309d, 0x309f,
+ 0x1b001, 0x1b001,
+ 0x1f200, 0x1f200
+};
+UCP_FN(Hiragana)
+
+static const unichar ucp_Imperial_Aramaic_def[] = {
+ 0x10840, 0x10855,
+ 0x10857, 0x1085f
+};
+UCP_FN(Imperial_Aramaic)
+
+static const unichar ucp_Inherited_def[] = {
+ 0x300, 0x36f,
+ 0x485, 0x486,
+ 0x64b, 0x655,
+ 0x670, 0x670,
+ 0x951, 0x952,
+ 0x1ab0, 0x1abe,
+ 0x1cd0, 0x1cd2,
+ 0x1cd4, 0x1ce0,
+ 0x1ce2, 0x1ce8,
+ 0x1ced, 0x1ced,
+ 0x1cf4, 0x1cf4,
+ 0x1cf8, 0x1cf9,
+ 0x1dc0, 0x1df5,
+ 0x1dfc, 0x1dff,
+ 0x200c, 0x200d,
+ 0x20d0, 0x20f0,
+ 0x302a, 0x302d,
+ 0x3099, 0x309a,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe2d,
+ 0x101fd, 0x101fd,
+ 0x102e0, 0x102e0,
+ 0x1d167, 0x1d169,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0xe0100, 0xe01ef
+};
+UCP_FN(Inherited)
+
+static const unichar ucp_Inscriptional_Pahlavi_def[] = {
+ 0x10b60, 0x10b72,
+ 0x10b78, 0x10b7f
+};
+UCP_FN(Inscriptional_Pahlavi)
+
+static const unichar ucp_Inscriptional_Parthian_def[] = {
+ 0x10b40, 0x10b55,
+ 0x10b58, 0x10b5f
+};
+UCP_FN(Inscriptional_Parthian)
+
+static const unichar ucp_Javanese_def[] = {
+ 0xa980, 0xa9cd,
+ 0xa9d0, 0xa9d9,
+ 0xa9de, 0xa9df
+};
+UCP_FN(Javanese)
+
+static const unichar ucp_Kaithi_def[] = {
+ 0x11080, 0x110c1
+};
+UCP_FN(Kaithi)
+
+static const unichar ucp_Kannada_def[] = {
+ 0xc81, 0xc83,
+ 0xc85, 0xc8c,
+ 0xc8e, 0xc90,
+ 0xc92, 0xca8,
+ 0xcaa, 0xcb3,
+ 0xcb5, 0xcb9,
+ 0xcbc, 0xcc4,
+ 0xcc6, 0xcc8,
+ 0xcca, 0xccd,
+ 0xcd5, 0xcd6,
+ 0xcde, 0xcde,
+ 0xce0, 0xce3,
+ 0xce6, 0xcef,
+ 0xcf1, 0xcf2
+};
+UCP_FN(Kannada)
+
+static const unichar ucp_Katakana_def[] = {
+ 0x30a1, 0x30fa,
+ 0x30fd, 0x30ff,
+ 0x31f0, 0x31ff,
+ 0x32d0, 0x32fe,
+ 0x3300, 0x3357,
+ 0xff66, 0xff6f,
+ 0xff71, 0xff9d,
+ 0x1b000, 0x1b000
+};
+UCP_FN(Katakana)
+
+static const unichar ucp_Kayah_Li_def[] = {
+ 0xa900, 0xa92d,
+ 0xa92f, 0xa92f
+};
+UCP_FN(Kayah_Li)
+
+static const unichar ucp_Kharoshthi_def[] = {
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a47,
+ 0x10a50, 0x10a58
+};
+UCP_FN(Kharoshthi)
+
+static const unichar ucp_Khmer_def[] = {
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x19e0, 0x19ff
+};
+UCP_FN(Khmer)
+
+static const unichar ucp_Khojki_def[] = {
+ 0x11200, 0x11211,
+ 0x11213, 0x1123d
+};
+UCP_FN(Khojki)
+
+static const unichar ucp_Khudawadi_def[] = {
+ 0x112b0, 0x112ea,
+ 0x112f0, 0x112f9
+};
+UCP_FN(Khudawadi)
+
+static const unichar ucp_Lao_def[] = {
+ 0xe81, 0xe82,
+ 0xe84, 0xe84,
+ 0xe87, 0xe88,
+ 0xe8a, 0xe8a,
+ 0xe8d, 0xe8d,
+ 0xe94, 0xe97,
+ 0xe99, 0xe9f,
+ 0xea1, 0xea3,
+ 0xea5, 0xea5,
+ 0xea7, 0xea7,
+ 0xeaa, 0xeab,
+ 0xead, 0xeb9,
+ 0xebb, 0xebd,
+ 0xec0, 0xec4,
+ 0xec6, 0xec6,
+ 0xec8, 0xecd,
+ 0xed0, 0xed9,
+ 0xedc, 0xedf
+};
+UCP_FN(Lao)
+
+static const unichar ucp_Latin_def[] = {
+ 0x41, 0x5a,
+ 0x61, 0x7a,
+ 0xaa, 0xaa,
+ 0xba, 0xba,
+ 0xc0, 0xd6,
+ 0xd8, 0xf6,
+ 0xf8, 0x2b8,
+ 0x2e0, 0x2e4,
+ 0x1d00, 0x1d25,
+ 0x1d2c, 0x1d5c,
+ 0x1d62, 0x1d65,
+ 0x1d6b, 0x1d77,
+ 0x1d79, 0x1dbe,
+ 0x1e00, 0x1eff,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x209c,
+ 0x212a, 0x212b,
+ 0x2132, 0x2132,
+ 0x214e, 0x214e,
+ 0x2160, 0x2188,
+ 0x2c60, 0x2c7f,
+ 0xa722, 0xa787,
+ 0xa78b, 0xa78e,
+ 0xa790, 0xa7ad,
+ 0xa7b0, 0xa7b1,
+ 0xa7f7, 0xa7ff,
+ 0xab30, 0xab5a,
+ 0xab5c, 0xab5f,
+ 0xab64, 0xab64,
+ 0xfb00, 0xfb06,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a
+};
+UCP_FN(Latin)
+
+static const unichar ucp_Lepcha_def[] = {
+ 0x1c00, 0x1c37,
+ 0x1c3b, 0x1c49,
+ 0x1c4d, 0x1c4f
+};
+UCP_FN(Lepcha)
+
+static const unichar ucp_Limbu_def[] = {
+ 0x1900, 0x191e,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x194f
+};
+UCP_FN(Limbu)
+
+static const unichar ucp_Linear_A_def[] = {
+ 0x10600, 0x10736,
+ 0x10740, 0x10755,
+ 0x10760, 0x10767
+};
+UCP_FN(Linear_A)
+
+static const unichar ucp_Linear_B_def[] = {
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa
+};
+UCP_FN(Linear_B)
+
+static const unichar ucp_Lisu_def[] = {
+ 0xa4d0, 0xa4ff
+};
+UCP_FN(Lisu)
+
+static const unichar ucp_Lycian_def[] = {
+ 0x10280, 0x1029c
+};
+UCP_FN(Lycian)
+
+static const unichar ucp_Lydian_def[] = {
+ 0x10920, 0x10939,
+ 0x1093f, 0x1093f
+};
+UCP_FN(Lydian)
+
+static const unichar ucp_Mahajani_def[] = {
+ 0x11150, 0x11176
+};
+UCP_FN(Mahajani)
+
+static const unichar ucp_Malayalam_def[] = {
+ 0xd01, 0xd03,
+ 0xd05, 0xd0c,
+ 0xd0e, 0xd10,
+ 0xd12, 0xd3a,
+ 0xd3d, 0xd44,
+ 0xd46, 0xd48,
+ 0xd4a, 0xd4e,
+ 0xd57, 0xd57,
+ 0xd60, 0xd63,
+ 0xd66, 0xd75,
+ 0xd79, 0xd7f
+};
+UCP_FN(Malayalam)
+
+static const unichar ucp_Mandaic_def[] = {
+ 0x840, 0x85b,
+ 0x85e, 0x85e
+};
+UCP_FN(Mandaic)
+
+static const unichar ucp_Manichaean_def[] = {
+ 0x10ac0, 0x10ae6,
+ 0x10aeb, 0x10af6
+};
+UCP_FN(Manichaean)
+
+static const unichar ucp_Meetei_Mayek_def[] = {
+ 0xaae0, 0xaaf6,
+ 0xabc0, 0xabed,
+ 0xabf0, 0xabf9
+};
+UCP_FN(Meetei_Mayek)
+
+static const unichar ucp_Mende_Kikakui_def[] = {
+ 0x1e800, 0x1e8c4,
+ 0x1e8c7, 0x1e8d6
+};
+UCP_FN(Mende_Kikakui)
+
+static const unichar ucp_Meroitic_Cursive_def[] = {
+ 0x109a0, 0x109b7,
+ 0x109be, 0x109bf
+};
+UCP_FN(Meroitic_Cursive)
+
+static const unichar ucp_Meroitic_Hieroglyphs_def[] = {
+ 0x10980, 0x1099f
+};
+UCP_FN(Meroitic_Hieroglyphs)
+
+static const unichar ucp_Miao_def[] = {
+ 0x16f00, 0x16f44,
+ 0x16f50, 0x16f7e,
+ 0x16f8f, 0x16f9f
+};
+UCP_FN(Miao)
+
+static const unichar ucp_Modi_def[] = {
+ 0x11600, 0x11644,
+ 0x11650, 0x11659
+};
+UCP_FN(Modi)
+
+static const unichar ucp_Mongolian_def[] = {
+ 0x1800, 0x1801,
+ 0x1804, 0x1804,
+ 0x1806, 0x180e,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18aa
+};
+UCP_FN(Mongolian)
+
+static const unichar ucp_Mro_def[] = {
+ 0x16a40, 0x16a5e,
+ 0x16a60, 0x16a69,
+ 0x16a6e, 0x16a6f
+};
+UCP_FN(Mro)
+
+static const unichar ucp_Myanmar_def[] = {
+ 0x1000, 0x109f,
+ 0xa9e0, 0xa9fe,
+ 0xaa60, 0xaa7f
+};
+UCP_FN(Myanmar)
+
+static const unichar ucp_Nabataean_def[] = {
+ 0x10880, 0x1089e,
+ 0x108a7, 0x108af
+};
+UCP_FN(Nabataean)
+
+static const unichar ucp_New_Tai_Lue_def[] = {
+ 0x1980, 0x19ab,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19da,
+ 0x19de, 0x19df
+};
+UCP_FN(New_Tai_Lue)
+
+static const unichar ucp_Nko_def[] = {
+ 0x7c0, 0x7fa
+};
+UCP_FN(Nko)
+
+static const unichar ucp_Ogham_def[] = {
+ 0x1680, 0x169c
+};
+UCP_FN(Ogham)
+
+static const unichar ucp_Ol_Chiki_def[] = {
+ 0x1c50, 0x1c7f
+};
+UCP_FN(Ol_Chiki)
+
+static const unichar ucp_Old_Italic_def[] = {
+ 0x10300, 0x10323
+};
+UCP_FN(Old_Italic)
+
+static const unichar ucp_Old_North_Arabian_def[] = {
+ 0x10a80, 0x10a9f
+};
+UCP_FN(Old_North_Arabian)
+
+static const unichar ucp_Old_Permic_def[] = {
+ 0x10350, 0x1037a
+};
+UCP_FN(Old_Permic)
+
+static const unichar ucp_Old_Persian_def[] = {
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103d5
+};
+UCP_FN(Old_Persian)
+
+static const unichar ucp_Old_South_Arabian_def[] = {
+ 0x10a60, 0x10a7f
+};
+UCP_FN(Old_South_Arabian)
+
+static const unichar ucp_Old_Turkic_def[] = {
+ 0x10c00, 0x10c48
+};
+UCP_FN(Old_Turkic)
+
+static const unichar ucp_Oriya_def[] = {
+ 0xb01, 0xb03,
+ 0xb05, 0xb0c,
+ 0xb0f, 0xb10,
+ 0xb13, 0xb28,
+ 0xb2a, 0xb30,
+ 0xb32, 0xb33,
+ 0xb35, 0xb39,
+ 0xb3c, 0xb44,
+ 0xb47, 0xb48,
+ 0xb4b, 0xb4d,
+ 0xb56, 0xb57,
+ 0xb5c, 0xb5d,
+ 0xb5f, 0xb63,
+ 0xb66, 0xb77
+};
+UCP_FN(Oriya)
+
+static const unichar ucp_Osmanya_def[] = {
+ 0x10480, 0x1049d,
+ 0x104a0, 0x104a9
+};
+UCP_FN(Osmanya)
+
+static const unichar ucp_Pahawh_Hmong_def[] = {
+ 0x16b00, 0x16b45,
+ 0x16b50, 0x16b59,
+ 0x16b5b, 0x16b61,
+ 0x16b63, 0x16b77,
+ 0x16b7d, 0x16b8f
+};
+UCP_FN(Pahawh_Hmong)
+
+static const unichar ucp_Palmyrene_def[] = {
+ 0x10860, 0x1087f
+};
+UCP_FN(Palmyrene)
+
+static const unichar ucp_Pau_Cin_Hau_def[] = {
+ 0x11ac0, 0x11af8
+};
+UCP_FN(Pau_Cin_Hau)
+
+static const unichar ucp_Phags_Pa_def[] = {
+ 0xa840, 0xa877
+};
+UCP_FN(Phags_Pa)
+
+static const unichar ucp_Phoenician_def[] = {
+ 0x10900, 0x1091b,
+ 0x1091f, 0x1091f
+};
+UCP_FN(Phoenician)
+
+static const unichar ucp_Psalter_Pahlavi_def[] = {
+ 0x10b80, 0x10b91,
+ 0x10b99, 0x10b9c,
+ 0x10ba9, 0x10baf
+};
+UCP_FN(Psalter_Pahlavi)
+
+static const unichar ucp_Rejang_def[] = {
+ 0xa930, 0xa953,
+ 0xa95f, 0xa95f
+};
+UCP_FN(Rejang)
+
+static const unichar ucp_Runic_def[] = {
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f8
+};
+UCP_FN(Runic)
+
+static const unichar ucp_Samaritan_def[] = {
+ 0x800, 0x82d,
+ 0x830, 0x83e
+};
+UCP_FN(Samaritan)
+
+static const unichar ucp_Saurashtra_def[] = {
+ 0xa880, 0xa8c4,
+ 0xa8ce, 0xa8d9
+};
+UCP_FN(Saurashtra)
+
+static const unichar ucp_Sharada_def[] = {
+ 0x11180, 0x111c8,
+ 0x111cd, 0x111cd,
+ 0x111d0, 0x111da
+};
+UCP_FN(Sharada)
+
+static const unichar ucp_Shavian_def[] = {
+ 0x10450, 0x1047f
+};
+UCP_FN(Shavian)
+
+static const unichar ucp_Siddham_def[] = {
+ 0x11580, 0x115b5,
+ 0x115b8, 0x115c9
+};
+UCP_FN(Siddham)
+
+static const unichar ucp_Sinhala_def[] = {
+ 0xd82, 0xd83,
+ 0xd85, 0xd96,
+ 0xd9a, 0xdb1,
+ 0xdb3, 0xdbb,
+ 0xdbd, 0xdbd,
+ 0xdc0, 0xdc6,
+ 0xdca, 0xdca,
+ 0xdcf, 0xdd4,
+ 0xdd6, 0xdd6,
+ 0xdd8, 0xddf,
+ 0xde6, 0xdef,
+ 0xdf2, 0xdf4,
+ 0x111e1, 0x111f4
+};
+UCP_FN(Sinhala)
+
+static const unichar ucp_Sora_Sompeng_def[] = {
+ 0x110d0, 0x110e8,
+ 0x110f0, 0x110f9
+};
+UCP_FN(Sora_Sompeng)
+
+static const unichar ucp_Sundanese_def[] = {
+ 0x1b80, 0x1bbf,
+ 0x1cc0, 0x1cc7
+};
+UCP_FN(Sundanese)
+
+static const unichar ucp_Syloti_Nagri_def[] = {
+ 0xa800, 0xa82b
+};
+UCP_FN(Syloti_Nagri)
+
+static const unichar ucp_Syriac_def[] = {
+ 0x700, 0x70d,
+ 0x70f, 0x74a,
+ 0x74d, 0x74f
+};
+UCP_FN(Syriac)
+
+static const unichar ucp_Tagalog_def[] = {
+ 0x1700, 0x170c,
+ 0x170e, 0x1714
+};
+UCP_FN(Tagalog)
+
+static const unichar ucp_Tagbanwa_def[] = {
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773
+};
+UCP_FN(Tagbanwa)
+
+static const unichar ucp_Tai_Le_def[] = {
+ 0x1950, 0x196d,
+ 0x1970, 0x1974
+};
+UCP_FN(Tai_Le)
+
+static const unichar ucp_Tai_Tham_def[] = {
+ 0x1a20, 0x1a5e,
+ 0x1a60, 0x1a7c,
+ 0x1a7f, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1aa0, 0x1aad
+};
+UCP_FN(Tai_Tham)
+
+static const unichar ucp_Tai_Viet_def[] = {
+ 0xaa80, 0xaac2,
+ 0xaadb, 0xaadf
+};
+UCP_FN(Tai_Viet)
+
+static const unichar ucp_Takri_def[] = {
+ 0x11680, 0x116b7,
+ 0x116c0, 0x116c9
+};
+UCP_FN(Takri)
+
+static const unichar ucp_Tamil_def[] = {
+ 0xb82, 0xb83,
+ 0xb85, 0xb8a,
+ 0xb8e, 0xb90,
+ 0xb92, 0xb95,
+ 0xb99, 0xb9a,
+ 0xb9c, 0xb9c,
+ 0xb9e, 0xb9f,
+ 0xba3, 0xba4,
+ 0xba8, 0xbaa,
+ 0xbae, 0xbb9,
+ 0xbbe, 0xbc2,
+ 0xbc6, 0xbc8,
+ 0xbca, 0xbcd,
+ 0xbd0, 0xbd0,
+ 0xbd7, 0xbd7,
+ 0xbe6, 0xbfa
+};
+UCP_FN(Tamil)
+
+static const unichar ucp_Telugu_def[] = {
+ 0xc00, 0xc03,
+ 0xc05, 0xc0c,
+ 0xc0e, 0xc10,
+ 0xc12, 0xc28,
+ 0xc2a, 0xc39,
+ 0xc3d, 0xc44,
+ 0xc46, 0xc48,
+ 0xc4a, 0xc4d,
+ 0xc55, 0xc56,
+ 0xc58, 0xc59,
+ 0xc60, 0xc63,
+ 0xc66, 0xc6f,
+ 0xc78, 0xc7f
+};
+UCP_FN(Telugu)
+
+static const unichar ucp_Thaana_def[] = {
+ 0x780, 0x7b1
+};
+UCP_FN(Thaana)
+
+static const unichar ucp_Thai_def[] = {
+ 0xe01, 0xe3a,
+ 0xe40, 0xe5b
+};
+UCP_FN(Thai)
+
+static const unichar ucp_Tibetan_def[] = {
+ 0xf00, 0xf47,
+ 0xf49, 0xf6c,
+ 0xf71, 0xf97,
+ 0xf99, 0xfbc,
+ 0xfbe, 0xfcc,
+ 0xfce, 0xfd4,
+ 0xfd9, 0xfda
+};
+UCP_FN(Tibetan)
+
+static const unichar ucp_Tifinagh_def[] = {
+ 0x2d30, 0x2d67,
+ 0x2d6f, 0x2d70,
+ 0x2d7f, 0x2d7f
+};
+UCP_FN(Tifinagh)
+
+static const unichar ucp_Tirhuta_def[] = {
+ 0x11480, 0x114c7,
+ 0x114d0, 0x114d9
+};
+UCP_FN(Tirhuta)
+
+static const unichar ucp_Ugaritic_def[] = {
+ 0x10380, 0x1039d,
+ 0x1039f, 0x1039f
+};
+UCP_FN(Ugaritic)
+
+static const unichar ucp_Vai_def[] = {
+ 0xa500, 0xa62b
+};
+UCP_FN(Vai)
+
+static const unichar ucp_Warang_Citi_def[] = {
+ 0x118a0, 0x118f2,
+ 0x118ff, 0x118ff
+};
+UCP_FN(Warang_Citi)
+
+static const unichar ucp_Yi_def[] = {
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6
+};
+UCP_FN(Yi)
+
+static const unicase ucp_caseless_def[] = {
+ {0x41, 0x61},
+ {0x42, 0x62},
+ {0x43, 0x63},
+ {0x44, 0x64},
+ {0x45, 0x65},
+ {0x46, 0x66},
+ {0x47, 0x67},
+ {0x48, 0x68},
+ {0x49, 0x69},
+ {0x4a, 0x6a},
+ {0x4b, 0x6b},
+ {0x4b, 0x212a},
+ {0x4c, 0x6c},
+ {0x4d, 0x6d},
+ {0x4e, 0x6e},
+ {0x4f, 0x6f},
+ {0x50, 0x70},
+ {0x51, 0x71},
+ {0x52, 0x72},
+ {0x53, 0x73},
+ {0x53, 0x17f},
+ {0x54, 0x74},
+ {0x55, 0x75},
+ {0x56, 0x76},
+ {0x57, 0x77},
+ {0x58, 0x78},
+ {0x59, 0x79},
+ {0x5a, 0x7a},
+ {0x61, 0x41},
+ {0x62, 0x42},
+ {0x63, 0x43},
+ {0x64, 0x44},
+ {0x65, 0x45},
+ {0x66, 0x46},
+ {0x67, 0x47},
+ {0x68, 0x48},
+ {0x69, 0x49},
+ {0x6a, 0x4a},
+ {0x6b, 0x4b},
+ {0x6b, 0x212a},
+ {0x6c, 0x4c},
+ {0x6d, 0x4d},
+ {0x6e, 0x4e},
+ {0x6f, 0x4f},
+ {0x70, 0x50},
+ {0x71, 0x51},
+ {0x72, 0x52},
+ {0x73, 0x53},
+ {0x73, 0x17f},
+ {0x74, 0x54},
+ {0x75, 0x55},
+ {0x76, 0x56},
+ {0x77, 0x57},
+ {0x78, 0x58},
+ {0x79, 0x59},
+ {0x7a, 0x5a},
+ {0xb5, 0x39c},
+ {0xb5, 0x3bc},
+ {0xc0, 0xe0},
+ {0xc1, 0xe1},
+ {0xc2, 0xe2},
+ {0xc3, 0xe3},
+ {0xc4, 0xe4},
+ {0xc5, 0xe5},
+ {0xc5, 0x212b},
+ {0xc6, 0xe6},
+ {0xc7, 0xe7},
+ {0xc8, 0xe8},
+ {0xc9, 0xe9},
+ {0xca, 0xea},
+ {0xcb, 0xeb},
+ {0xcc, 0xec},
+ {0xcd, 0xed},
+ {0xce, 0xee},
+ {0xcf, 0xef},
+ {0xd0, 0xf0},
+ {0xd1, 0xf1},
+ {0xd2, 0xf2},
+ {0xd3, 0xf3},
+ {0xd4, 0xf4},
+ {0xd5, 0xf5},
+ {0xd6, 0xf6},
+ {0xd8, 0xf8},
+ {0xd9, 0xf9},
+ {0xda, 0xfa},
+ {0xdb, 0xfb},
+ {0xdc, 0xfc},
+ {0xdd, 0xfd},
+ {0xde, 0xfe},
+ {0xdf, 0x1e9e},
+ {0xe0, 0xc0},
+ {0xe1, 0xc1},
+ {0xe2, 0xc2},
+ {0xe3, 0xc3},
+ {0xe4, 0xc4},
+ {0xe5, 0xc5},
+ {0xe5, 0x212b},
+ {0xe6, 0xc6},
+ {0xe7, 0xc7},
+ {0xe8, 0xc8},
+ {0xe9, 0xc9},
+ {0xea, 0xca},
+ {0xeb, 0xcb},
+ {0xec, 0xcc},
+ {0xed, 0xcd},
+ {0xee, 0xce},
+ {0xef, 0xcf},
+ {0xf0, 0xd0},
+ {0xf1, 0xd1},
+ {0xf2, 0xd2},
+ {0xf3, 0xd3},
+ {0xf4, 0xd4},
+ {0xf5, 0xd5},
+ {0xf6, 0xd6},
+ {0xf8, 0xd8},
+ {0xf9, 0xd9},
+ {0xfa, 0xda},
+ {0xfb, 0xdb},
+ {0xfc, 0xdc},
+ {0xfd, 0xdd},
+ {0xfe, 0xde},
+ {0xff, 0x178},
+ {0x100, 0x101},
+ {0x101, 0x100},
+ {0x102, 0x103},
+ {0x103, 0x102},
+ {0x104, 0x105},
+ {0x105, 0x104},
+ {0x106, 0x107},
+ {0x107, 0x106},
+ {0x108, 0x109},
+ {0x109, 0x108},
+ {0x10a, 0x10b},
+ {0x10b, 0x10a},
+ {0x10c, 0x10d},
+ {0x10d, 0x10c},
+ {0x10e, 0x10f},
+ {0x10f, 0x10e},
+ {0x110, 0x111},
+ {0x111, 0x110},
+ {0x112, 0x113},
+ {0x113, 0x112},
+ {0x114, 0x115},
+ {0x115, 0x114},
+ {0x116, 0x117},
+ {0x117, 0x116},
+ {0x118, 0x119},
+ {0x119, 0x118},
+ {0x11a, 0x11b},
+ {0x11b, 0x11a},
+ {0x11c, 0x11d},
+ {0x11d, 0x11c},
+ {0x11e, 0x11f},
+ {0x11f, 0x11e},
+ {0x120, 0x121},
+ {0x121, 0x120},
+ {0x122, 0x123},
+ {0x123, 0x122},
+ {0x124, 0x125},
+ {0x125, 0x124},
+ {0x126, 0x127},
+ {0x127, 0x126},
+ {0x128, 0x129},
+ {0x129, 0x128},
+ {0x12a, 0x12b},
+ {0x12b, 0x12a},
+ {0x12c, 0x12d},
+ {0x12d, 0x12c},
+ {0x12e, 0x12f},
+ {0x12f, 0x12e},
+ {0x132, 0x133},
+ {0x133, 0x132},
+ {0x134, 0x135},
+ {0x135, 0x134},
+ {0x136, 0x137},
+ {0x137, 0x136},
+ {0x139, 0x13a},
+ {0x13a, 0x139},
+ {0x13b, 0x13c},
+ {0x13c, 0x13b},
+ {0x13d, 0x13e},
+ {0x13e, 0x13d},
+ {0x13f, 0x140},
+ {0x140, 0x13f},
+ {0x141, 0x142},
+ {0x142, 0x141},
+ {0x143, 0x144},
+ {0x144, 0x143},
+ {0x145, 0x146},
+ {0x146, 0x145},
+ {0x147, 0x148},
+ {0x148, 0x147},
+ {0x14a, 0x14b},
+ {0x14b, 0x14a},
+ {0x14c, 0x14d},
+ {0x14d, 0x14c},
+ {0x14e, 0x14f},
+ {0x14f, 0x14e},
+ {0x150, 0x151},
+ {0x151, 0x150},
+ {0x152, 0x153},
+ {0x153, 0x152},
+ {0x154, 0x155},
+ {0x155, 0x154},
+ {0x156, 0x157},
+ {0x157, 0x156},
+ {0x158, 0x159},
+ {0x159, 0x158},
+ {0x15a, 0x15b},
+ {0x15b, 0x15a},
+ {0x15c, 0x15d},
+ {0x15d, 0x15c},
+ {0x15e, 0x15f},
+ {0x15f, 0x15e},
+ {0x160, 0x161},
+ {0x161, 0x160},
+ {0x162, 0x163},
+ {0x163, 0x162},
+ {0x164, 0x165},
+ {0x165, 0x164},
+ {0x166, 0x167},
+ {0x167, 0x166},
+ {0x168, 0x169},
+ {0x169, 0x168},
+ {0x16a, 0x16b},
+ {0x16b, 0x16a},
+ {0x16c, 0x16d},
+ {0x16d, 0x16c},
+ {0x16e, 0x16f},
+ {0x16f, 0x16e},
+ {0x170, 0x171},
+ {0x171, 0x170},
+ {0x172, 0x173},
+ {0x173, 0x172},
+ {0x174, 0x175},
+ {0x175, 0x174},
+ {0x176, 0x177},
+ {0x177, 0x176},
+ {0x178, 0xff},
+ {0x179, 0x17a},
+ {0x17a, 0x179},
+ {0x17b, 0x17c},
+ {0x17c, 0x17b},
+ {0x17d, 0x17e},
+ {0x17e, 0x17d},
+ {0x17f, 0x53},
+ {0x17f, 0x73},
+ {0x180, 0x243},
+ {0x181, 0x253},
+ {0x182, 0x183},
+ {0x183, 0x182},
+ {0x184, 0x185},
+ {0x185, 0x184},
+ {0x186, 0x254},
+ {0x187, 0x188},
+ {0x188, 0x187},
+ {0x189, 0x256},
+ {0x18a, 0x257},
+ {0x18b, 0x18c},
+ {0x18c, 0x18b},
+ {0x18e, 0x1dd},
+ {0x18f, 0x259},
+ {0x190, 0x25b},
+ {0x191, 0x192},
+ {0x192, 0x191},
+ {0x193, 0x260},
+ {0x194, 0x263},
+ {0x195, 0x1f6},
+ {0x196, 0x269},
+ {0x197, 0x268},
+ {0x198, 0x199},
+ {0x199, 0x198},
+ {0x19a, 0x23d},
+ {0x19c, 0x26f},
+ {0x19d, 0x272},
+ {0x19e, 0x220},
+ {0x19f, 0x275},
+ {0x1a0, 0x1a1},
+ {0x1a1, 0x1a0},
+ {0x1a2, 0x1a3},
+ {0x1a3, 0x1a2},
+ {0x1a4, 0x1a5},
+ {0x1a5, 0x1a4},
+ {0x1a6, 0x280},
+ {0x1a7, 0x1a8},
+ {0x1a8, 0x1a7},
+ {0x1a9, 0x283},
+ {0x1ac, 0x1ad},
+ {0x1ad, 0x1ac},
+ {0x1ae, 0x288},
+ {0x1af, 0x1b0},
+ {0x1b0, 0x1af},
+ {0x1b1, 0x28a},
+ {0x1b2, 0x28b},
+ {0x1b3, 0x1b4},
+ {0x1b4, 0x1b3},
+ {0x1b5, 0x1b6},
+ {0x1b6, 0x1b5},
+ {0x1b7, 0x292},
+ {0x1b8, 0x1b9},
+ {0x1b9, 0x1b8},
+ {0x1bc, 0x1bd},
+ {0x1bd, 0x1bc},
+ {0x1bf, 0x1f7},
+ {0x1c4, 0x1c5},
+ {0x1c4, 0x1c6},
+ {0x1c5, 0x1c4},
+ {0x1c5, 0x1c6},
+ {0x1c6, 0x1c4},
+ {0x1c6, 0x1c5},
+ {0x1c7, 0x1c8},
+ {0x1c7, 0x1c9},
+ {0x1c8, 0x1c7},
+ {0x1c8, 0x1c9},
+ {0x1c9, 0x1c7},
+ {0x1c9, 0x1c8},
+ {0x1ca, 0x1cb},
+ {0x1ca, 0x1cc},
+ {0x1cb, 0x1ca},
+ {0x1cb, 0x1cc},
+ {0x1cc, 0x1ca},
+ {0x1cc, 0x1cb},
+ {0x1cd, 0x1ce},
+ {0x1ce, 0x1cd},
+ {0x1cf, 0x1d0},
+ {0x1d0, 0x1cf},
+ {0x1d1, 0x1d2},
+ {0x1d2, 0x1d1},
+ {0x1d3, 0x1d4},
+ {0x1d4, 0x1d3},
+ {0x1d5, 0x1d6},
+ {0x1d6, 0x1d5},
+ {0x1d7, 0x1d8},
+ {0x1d8, 0x1d7},
+ {0x1d9, 0x1da},
+ {0x1da, 0x1d9},
+ {0x1db, 0x1dc},
+ {0x1dc, 0x1db},
+ {0x1dd, 0x18e},
+ {0x1de, 0x1df},
+ {0x1df, 0x1de},
+ {0x1e0, 0x1e1},
+ {0x1e1, 0x1e0},
+ {0x1e2, 0x1e3},
+ {0x1e3, 0x1e2},
+ {0x1e4, 0x1e5},
+ {0x1e5, 0x1e4},
+ {0x1e6, 0x1e7},
+ {0x1e7, 0x1e6},
+ {0x1e8, 0x1e9},
+ {0x1e9, 0x1e8},
+ {0x1ea, 0x1eb},
+ {0x1eb, 0x1ea},
+ {0x1ec, 0x1ed},
+ {0x1ed, 0x1ec},
+ {0x1ee, 0x1ef},
+ {0x1ef, 0x1ee},
+ {0x1f1, 0x1f2},
+ {0x1f1, 0x1f3},
+ {0x1f2, 0x1f1},
+ {0x1f2, 0x1f3},
+ {0x1f3, 0x1f1},
+ {0x1f3, 0x1f2},
+ {0x1f4, 0x1f5},
+ {0x1f5, 0x1f4},
+ {0x1f6, 0x195},
+ {0x1f7, 0x1bf},
+ {0x1f8, 0x1f9},
+ {0x1f9, 0x1f8},
+ {0x1fa, 0x1fb},
+ {0x1fb, 0x1fa},
+ {0x1fc, 0x1fd},
+ {0x1fd, 0x1fc},
+ {0x1fe, 0x1ff},
+ {0x1ff, 0x1fe},
+ {0x200, 0x201},
+ {0x201, 0x200},
+ {0x202, 0x203},
+ {0x203, 0x202},
+ {0x204, 0x205},
+ {0x205, 0x204},
+ {0x206, 0x207},
+ {0x207, 0x206},
+ {0x208, 0x209},
+ {0x209, 0x208},
+ {0x20a, 0x20b},
+ {0x20b, 0x20a},
+ {0x20c, 0x20d},
+ {0x20d, 0x20c},
+ {0x20e, 0x20f},
+ {0x20f, 0x20e},
+ {0x210, 0x211},
+ {0x211, 0x210},
+ {0x212, 0x213},
+ {0x213, 0x212},
+ {0x214, 0x215},
+ {0x215, 0x214},
+ {0x216, 0x217},
+ {0x217, 0x216},
+ {0x218, 0x219},
+ {0x219, 0x218},
+ {0x21a, 0x21b},
+ {0x21b, 0x21a},
+ {0x21c, 0x21d},
+ {0x21d, 0x21c},
+ {0x21e, 0x21f},
+ {0x21f, 0x21e},
+ {0x220, 0x19e},
+ {0x222, 0x223},
+ {0x223, 0x222},
+ {0x224, 0x225},
+ {0x225, 0x224},
+ {0x226, 0x227},
+ {0x227, 0x226},
+ {0x228, 0x229},
+ {0x229, 0x228},
+ {0x22a, 0x22b},
+ {0x22b, 0x22a},
+ {0x22c, 0x22d},
+ {0x22d, 0x22c},
+ {0x22e, 0x22f},
+ {0x22f, 0x22e},
+ {0x230, 0x231},
+ {0x231, 0x230},
+ {0x232, 0x233},
+ {0x233, 0x232},
+ {0x23a, 0x2c65},
+ {0x23b, 0x23c},
+ {0x23c, 0x23b},
+ {0x23d, 0x19a},
+ {0x23e, 0x2c66},
+ {0x23f, 0x2c7e},
+ {0x240, 0x2c7f},
+ {0x241, 0x242},
+ {0x242, 0x241},
+ {0x243, 0x180},
+ {0x244, 0x289},
+ {0x245, 0x28c},
+ {0x246, 0x247},
+ {0x247, 0x246},
+ {0x248, 0x249},
+ {0x249, 0x248},
+ {0x24a, 0x24b},
+ {0x24b, 0x24a},
+ {0x24c, 0x24d},
+ {0x24d, 0x24c},
+ {0x24e, 0x24f},
+ {0x24f, 0x24e},
+ {0x250, 0x2c6f},
+ {0x251, 0x2c6d},
+ {0x252, 0x2c70},
+ {0x253, 0x181},
+ {0x254, 0x186},
+ {0x256, 0x189},
+ {0x257, 0x18a},
+ {0x259, 0x18f},
+ {0x25b, 0x190},
+ {0x25c, 0xa7ab},
+ {0x260, 0x193},
+ {0x261, 0xa7ac},
+ {0x263, 0x194},
+ {0x265, 0xa78d},
+ {0x266, 0xa7aa},
+ {0x268, 0x197},
+ {0x269, 0x196},
+ {0x26b, 0x2c62},
+ {0x26c, 0xa7ad},
+ {0x26f, 0x19c},
+ {0x271, 0x2c6e},
+ {0x272, 0x19d},
+ {0x275, 0x19f},
+ {0x27d, 0x2c64},
+ {0x280, 0x1a6},
+ {0x283, 0x1a9},
+ {0x287, 0xa7b1},
+ {0x288, 0x1ae},
+ {0x289, 0x244},
+ {0x28a, 0x1b1},
+ {0x28b, 0x1b2},
+ {0x28c, 0x245},
+ {0x292, 0x1b7},
+ {0x29e, 0xa7b0},
+ {0x345, 0x399},
+ {0x345, 0x3b9},
+ {0x345, 0x1fbe},
+ {0x370, 0x371},
+ {0x371, 0x370},
+ {0x372, 0x373},
+ {0x373, 0x372},
+ {0x376, 0x377},
+ {0x377, 0x376},
+ {0x37b, 0x3fd},
+ {0x37c, 0x3fe},
+ {0x37d, 0x3ff},
+ {0x37f, 0x3f3},
+ {0x386, 0x3ac},
+ {0x388, 0x3ad},
+ {0x389, 0x3ae},
+ {0x38a, 0x3af},
+ {0x38c, 0x3cc},
+ {0x38e, 0x3cd},
+ {0x38f, 0x3ce},
+ {0x391, 0x3b1},
+ {0x392, 0x3b2},
+ {0x392, 0x3d0},
+ {0x393, 0x3b3},
+ {0x394, 0x3b4},
+ {0x395, 0x3b5},
+ {0x395, 0x3f5},
+ {0x396, 0x3b6},
+ {0x397, 0x3b7},
+ {0x398, 0x3b8},
+ {0x398, 0x3d1},
+ {0x398, 0x3f4},
+ {0x399, 0x345},
+ {0x399, 0x3b9},
+ {0x399, 0x1fbe},
+ {0x39a, 0x3ba},
+ {0x39a, 0x3f0},
+ {0x39b, 0x3bb},
+ {0x39c, 0xb5},
+ {0x39c, 0x3bc},
+ {0x39d, 0x3bd},
+ {0x39e, 0x3be},
+ {0x39f, 0x3bf},
+ {0x3a0, 0x3c0},
+ {0x3a0, 0x3d6},
+ {0x3a1, 0x3c1},
+ {0x3a1, 0x3f1},
+ {0x3a3, 0x3c2},
+ {0x3a3, 0x3c3},
+ {0x3a4, 0x3c4},
+ {0x3a5, 0x3c5},
+ {0x3a6, 0x3c6},
+ {0x3a6, 0x3d5},
+ {0x3a7, 0x3c7},
+ {0x3a8, 0x3c8},
+ {0x3a9, 0x3c9},
+ {0x3a9, 0x2126},
+ {0x3aa, 0x3ca},
+ {0x3ab, 0x3cb},
+ {0x3ac, 0x386},
+ {0x3ad, 0x388},
+ {0x3ae, 0x389},
+ {0x3af, 0x38a},
+ {0x3b1, 0x391},
+ {0x3b2, 0x392},
+ {0x3b2, 0x3d0},
+ {0x3b3, 0x393},
+ {0x3b4, 0x394},
+ {0x3b5, 0x395},
+ {0x3b5, 0x3f5},
+ {0x3b6, 0x396},
+ {0x3b7, 0x397},
+ {0x3b8, 0x398},
+ {0x3b8, 0x3d1},
+ {0x3b8, 0x3f4},
+ {0x3b9, 0x345},
+ {0x3b9, 0x399},
+ {0x3b9, 0x1fbe},
+ {0x3ba, 0x39a},
+ {0x3ba, 0x3f0},
+ {0x3bb, 0x39b},
+ {0x3bc, 0xb5},
+ {0x3bc, 0x39c},
+ {0x3bd, 0x39d},
+ {0x3be, 0x39e},
+ {0x3bf, 0x39f},
+ {0x3c0, 0x3a0},
+ {0x3c0, 0x3d6},
+ {0x3c1, 0x3a1},
+ {0x3c1, 0x3f1},
+ {0x3c2, 0x3a3},
+ {0x3c2, 0x3c3},
+ {0x3c3, 0x3a3},
+ {0x3c3, 0x3c2},
+ {0x3c4, 0x3a4},
+ {0x3c5, 0x3a5},
+ {0x3c6, 0x3a6},
+ {0x3c6, 0x3d5},
+ {0x3c7, 0x3a7},
+ {0x3c8, 0x3a8},
+ {0x3c9, 0x3a9},
+ {0x3c9, 0x2126},
+ {0x3ca, 0x3aa},
+ {0x3cb, 0x3ab},
+ {0x3cc, 0x38c},
+ {0x3cd, 0x38e},
+ {0x3ce, 0x38f},
+ {0x3cf, 0x3d7},
+ {0x3d0, 0x392},
+ {0x3d0, 0x3b2},
+ {0x3d1, 0x398},
+ {0x3d1, 0x3b8},
+ {0x3d1, 0x3f4},
+ {0x3d5, 0x3a6},
+ {0x3d5, 0x3c6},
+ {0x3d6, 0x3a0},
+ {0x3d6, 0x3c0},
+ {0x3d7, 0x3cf},
+ {0x3d8, 0x3d9},
+ {0x3d9, 0x3d8},
+ {0x3da, 0x3db},
+ {0x3db, 0x3da},
+ {0x3dc, 0x3dd},
+ {0x3dd, 0x3dc},
+ {0x3de, 0x3df},
+ {0x3df, 0x3de},
+ {0x3e0, 0x3e1},
+ {0x3e1, 0x3e0},
+ {0x3e2, 0x3e3},
+ {0x3e3, 0x3e2},
+ {0x3e4, 0x3e5},
+ {0x3e5, 0x3e4},
+ {0x3e6, 0x3e7},
+ {0x3e7, 0x3e6},
+ {0x3e8, 0x3e9},
+ {0x3e9, 0x3e8},
+ {0x3ea, 0x3eb},
+ {0x3eb, 0x3ea},
+ {0x3ec, 0x3ed},
+ {0x3ed, 0x3ec},
+ {0x3ee, 0x3ef},
+ {0x3ef, 0x3ee},
+ {0x3f0, 0x39a},
+ {0x3f0, 0x3ba},
+ {0x3f1, 0x3a1},
+ {0x3f1, 0x3c1},
+ {0x3f2, 0x3f9},
+ {0x3f3, 0x37f},
+ {0x3f4, 0x398},
+ {0x3f4, 0x3b8},
+ {0x3f4, 0x3d1},
+ {0x3f5, 0x395},
+ {0x3f5, 0x3b5},
+ {0x3f7, 0x3f8},
+ {0x3f8, 0x3f7},
+ {0x3f9, 0x3f2},
+ {0x3fa, 0x3fb},
+ {0x3fb, 0x3fa},
+ {0x3fd, 0x37b},
+ {0x3fe, 0x37c},
+ {0x3ff, 0x37d},
+ {0x400, 0x450},
+ {0x401, 0x451},
+ {0x402, 0x452},
+ {0x403, 0x453},
+ {0x404, 0x454},
+ {0x405, 0x455},
+ {0x406, 0x456},
+ {0x407, 0x457},
+ {0x408, 0x458},
+ {0x409, 0x459},
+ {0x40a, 0x45a},
+ {0x40b, 0x45b},
+ {0x40c, 0x45c},
+ {0x40d, 0x45d},
+ {0x40e, 0x45e},
+ {0x40f, 0x45f},
+ {0x410, 0x430},
+ {0x411, 0x431},
+ {0x412, 0x432},
+ {0x413, 0x433},
+ {0x414, 0x434},
+ {0x415, 0x435},
+ {0x416, 0x436},
+ {0x417, 0x437},
+ {0x418, 0x438},
+ {0x419, 0x439},
+ {0x41a, 0x43a},
+ {0x41b, 0x43b},
+ {0x41c, 0x43c},
+ {0x41d, 0x43d},
+ {0x41e, 0x43e},
+ {0x41f, 0x43f},
+ {0x420, 0x440},
+ {0x421, 0x441},
+ {0x422, 0x442},
+ {0x423, 0x443},
+ {0x424, 0x444},
+ {0x425, 0x445},
+ {0x426, 0x446},
+ {0x427, 0x447},
+ {0x428, 0x448},
+ {0x429, 0x449},
+ {0x42a, 0x44a},
+ {0x42b, 0x44b},
+ {0x42c, 0x44c},
+ {0x42d, 0x44d},
+ {0x42e, 0x44e},
+ {0x42f, 0x44f},
+ {0x430, 0x410},
+ {0x431, 0x411},
+ {0x432, 0x412},
+ {0x433, 0x413},
+ {0x434, 0x414},
+ {0x435, 0x415},
+ {0x436, 0x416},
+ {0x437, 0x417},
+ {0x438, 0x418},
+ {0x439, 0x419},
+ {0x43a, 0x41a},
+ {0x43b, 0x41b},
+ {0x43c, 0x41c},
+ {0x43d, 0x41d},
+ {0x43e, 0x41e},
+ {0x43f, 0x41f},
+ {0x440, 0x420},
+ {0x441, 0x421},
+ {0x442, 0x422},
+ {0x443, 0x423},
+ {0x444, 0x424},
+ {0x445, 0x425},
+ {0x446, 0x426},
+ {0x447, 0x427},
+ {0x448, 0x428},
+ {0x449, 0x429},
+ {0x44a, 0x42a},
+ {0x44b, 0x42b},
+ {0x44c, 0x42c},
+ {0x44d, 0x42d},
+ {0x44e, 0x42e},
+ {0x44f, 0x42f},
+ {0x450, 0x400},
+ {0x451, 0x401},
+ {0x452, 0x402},
+ {0x453, 0x403},
+ {0x454, 0x404},
+ {0x455, 0x405},
+ {0x456, 0x406},
+ {0x457, 0x407},
+ {0x458, 0x408},
+ {0x459, 0x409},
+ {0x45a, 0x40a},
+ {0x45b, 0x40b},
+ {0x45c, 0x40c},
+ {0x45d, 0x40d},
+ {0x45e, 0x40e},
+ {0x45f, 0x40f},
+ {0x460, 0x461},
+ {0x461, 0x460},
+ {0x462, 0x463},
+ {0x463, 0x462},
+ {0x464, 0x465},
+ {0x465, 0x464},
+ {0x466, 0x467},
+ {0x467, 0x466},
+ {0x468, 0x469},
+ {0x469, 0x468},
+ {0x46a, 0x46b},
+ {0x46b, 0x46a},
+ {0x46c, 0x46d},
+ {0x46d, 0x46c},
+ {0x46e, 0x46f},
+ {0x46f, 0x46e},
+ {0x470, 0x471},
+ {0x471, 0x470},
+ {0x472, 0x473},
+ {0x473, 0x472},
+ {0x474, 0x475},
+ {0x475, 0x474},
+ {0x476, 0x477},
+ {0x477, 0x476},
+ {0x478, 0x479},
+ {0x479, 0x478},
+ {0x47a, 0x47b},
+ {0x47b, 0x47a},
+ {0x47c, 0x47d},
+ {0x47d, 0x47c},
+ {0x47e, 0x47f},
+ {0x47f, 0x47e},
+ {0x480, 0x481},
+ {0x481, 0x480},
+ {0x48a, 0x48b},
+ {0x48b, 0x48a},
+ {0x48c, 0x48d},
+ {0x48d, 0x48c},
+ {0x48e, 0x48f},
+ {0x48f, 0x48e},
+ {0x490, 0x491},
+ {0x491, 0x490},
+ {0x492, 0x493},
+ {0x493, 0x492},
+ {0x494, 0x495},
+ {0x495, 0x494},
+ {0x496, 0x497},
+ {0x497, 0x496},
+ {0x498, 0x499},
+ {0x499, 0x498},
+ {0x49a, 0x49b},
+ {0x49b, 0x49a},
+ {0x49c, 0x49d},
+ {0x49d, 0x49c},
+ {0x49e, 0x49f},
+ {0x49f, 0x49e},
+ {0x4a0, 0x4a1},
+ {0x4a1, 0x4a0},
+ {0x4a2, 0x4a3},
+ {0x4a3, 0x4a2},
+ {0x4a4, 0x4a5},
+ {0x4a5, 0x4a4},
+ {0x4a6, 0x4a7},
+ {0x4a7, 0x4a6},
+ {0x4a8, 0x4a9},
+ {0x4a9, 0x4a8},
+ {0x4aa, 0x4ab},
+ {0x4ab, 0x4aa},
+ {0x4ac, 0x4ad},
+ {0x4ad, 0x4ac},
+ {0x4ae, 0x4af},
+ {0x4af, 0x4ae},
+ {0x4b0, 0x4b1},
+ {0x4b1, 0x4b0},
+ {0x4b2, 0x4b3},
+ {0x4b3, 0x4b2},
+ {0x4b4, 0x4b5},
+ {0x4b5, 0x4b4},
+ {0x4b6, 0x4b7},
+ {0x4b7, 0x4b6},
+ {0x4b8, 0x4b9},
+ {0x4b9, 0x4b8},
+ {0x4ba, 0x4bb},
+ {0x4bb, 0x4ba},
+ {0x4bc, 0x4bd},
+ {0x4bd, 0x4bc},
+ {0x4be, 0x4bf},
+ {0x4bf, 0x4be},
+ {0x4c0, 0x4cf},
+ {0x4c1, 0x4c2},
+ {0x4c2, 0x4c1},
+ {0x4c3, 0x4c4},
+ {0x4c4, 0x4c3},
+ {0x4c5, 0x4c6},
+ {0x4c6, 0x4c5},
+ {0x4c7, 0x4c8},
+ {0x4c8, 0x4c7},
+ {0x4c9, 0x4ca},
+ {0x4ca, 0x4c9},
+ {0x4cb, 0x4cc},
+ {0x4cc, 0x4cb},
+ {0x4cd, 0x4ce},
+ {0x4ce, 0x4cd},
+ {0x4cf, 0x4c0},
+ {0x4d0, 0x4d1},
+ {0x4d1, 0x4d0},
+ {0x4d2, 0x4d3},
+ {0x4d3, 0x4d2},
+ {0x4d4, 0x4d5},
+ {0x4d5, 0x4d4},
+ {0x4d6, 0x4d7},
+ {0x4d7, 0x4d6},
+ {0x4d8, 0x4d9},
+ {0x4d9, 0x4d8},
+ {0x4da, 0x4db},
+ {0x4db, 0x4da},
+ {0x4dc, 0x4dd},
+ {0x4dd, 0x4dc},
+ {0x4de, 0x4df},
+ {0x4df, 0x4de},
+ {0x4e0, 0x4e1},
+ {0x4e1, 0x4e0},
+ {0x4e2, 0x4e3},
+ {0x4e3, 0x4e2},
+ {0x4e4, 0x4e5},
+ {0x4e5, 0x4e4},
+ {0x4e6, 0x4e7},
+ {0x4e7, 0x4e6},
+ {0x4e8, 0x4e9},
+ {0x4e9, 0x4e8},
+ {0x4ea, 0x4eb},
+ {0x4eb, 0x4ea},
+ {0x4ec, 0x4ed},
+ {0x4ed, 0x4ec},
+ {0x4ee, 0x4ef},
+ {0x4ef, 0x4ee},
+ {0x4f0, 0x4f1},
+ {0x4f1, 0x4f0},
+ {0x4f2, 0x4f3},
+ {0x4f3, 0x4f2},
+ {0x4f4, 0x4f5},
+ {0x4f5, 0x4f4},
+ {0x4f6, 0x4f7},
+ {0x4f7, 0x4f6},
+ {0x4f8, 0x4f9},
+ {0x4f9, 0x4f8},
+ {0x4fa, 0x4fb},
+ {0x4fb, 0x4fa},
+ {0x4fc, 0x4fd},
+ {0x4fd, 0x4fc},
+ {0x4fe, 0x4ff},
+ {0x4ff, 0x4fe},
+ {0x500, 0x501},
+ {0x501, 0x500},
+ {0x502, 0x503},
+ {0x503, 0x502},
+ {0x504, 0x505},
+ {0x505, 0x504},
+ {0x506, 0x507},
+ {0x507, 0x506},
+ {0x508, 0x509},
+ {0x509, 0x508},
+ {0x50a, 0x50b},
+ {0x50b, 0x50a},
+ {0x50c, 0x50d},
+ {0x50d, 0x50c},
+ {0x50e, 0x50f},
+ {0x50f, 0x50e},
+ {0x510, 0x511},
+ {0x511, 0x510},
+ {0x512, 0x513},
+ {0x513, 0x512},
+ {0x514, 0x515},
+ {0x515, 0x514},
+ {0x516, 0x517},
+ {0x517, 0x516},
+ {0x518, 0x519},
+ {0x519, 0x518},
+ {0x51a, 0x51b},
+ {0x51b, 0x51a},
+ {0x51c, 0x51d},
+ {0x51d, 0x51c},
+ {0x51e, 0x51f},
+ {0x51f, 0x51e},
+ {0x520, 0x521},
+ {0x521, 0x520},
+ {0x522, 0x523},
+ {0x523, 0x522},
+ {0x524, 0x525},
+ {0x525, 0x524},
+ {0x526, 0x527},
+ {0x527, 0x526},
+ {0x528, 0x529},
+ {0x529, 0x528},
+ {0x52a, 0x52b},
+ {0x52b, 0x52a},
+ {0x52c, 0x52d},
+ {0x52d, 0x52c},
+ {0x52e, 0x52f},
+ {0x52f, 0x52e},
+ {0x531, 0x561},
+ {0x532, 0x562},
+ {0x533, 0x563},
+ {0x534, 0x564},
+ {0x535, 0x565},
+ {0x536, 0x566},
+ {0x537, 0x567},
+ {0x538, 0x568},
+ {0x539, 0x569},
+ {0x53a, 0x56a},
+ {0x53b, 0x56b},
+ {0x53c, 0x56c},
+ {0x53d, 0x56d},
+ {0x53e, 0x56e},
+ {0x53f, 0x56f},
+ {0x540, 0x570},
+ {0x541, 0x571},
+ {0x542, 0x572},
+ {0x543, 0x573},
+ {0x544, 0x574},
+ {0x545, 0x575},
+ {0x546, 0x576},
+ {0x547, 0x577},
+ {0x548, 0x578},
+ {0x549, 0x579},
+ {0x54a, 0x57a},
+ {0x54b, 0x57b},
+ {0x54c, 0x57c},
+ {0x54d, 0x57d},
+ {0x54e, 0x57e},
+ {0x54f, 0x57f},
+ {0x550, 0x580},
+ {0x551, 0x581},
+ {0x552, 0x582},
+ {0x553, 0x583},
+ {0x554, 0x584},
+ {0x555, 0x585},
+ {0x556, 0x586},
+ {0x561, 0x531},
+ {0x562, 0x532},
+ {0x563, 0x533},
+ {0x564, 0x534},
+ {0x565, 0x535},
+ {0x566, 0x536},
+ {0x567, 0x537},
+ {0x568, 0x538},
+ {0x569, 0x539},
+ {0x56a, 0x53a},
+ {0x56b, 0x53b},
+ {0x56c, 0x53c},
+ {0x56d, 0x53d},
+ {0x56e, 0x53e},
+ {0x56f, 0x53f},
+ {0x570, 0x540},
+ {0x571, 0x541},
+ {0x572, 0x542},
+ {0x573, 0x543},
+ {0x574, 0x544},
+ {0x575, 0x545},
+ {0x576, 0x546},
+ {0x577, 0x547},
+ {0x578, 0x548},
+ {0x579, 0x549},
+ {0x57a, 0x54a},
+ {0x57b, 0x54b},
+ {0x57c, 0x54c},
+ {0x57d, 0x54d},
+ {0x57e, 0x54e},
+ {0x57f, 0x54f},
+ {0x580, 0x550},
+ {0x581, 0x551},
+ {0x582, 0x552},
+ {0x583, 0x553},
+ {0x584, 0x554},
+ {0x585, 0x555},
+ {0x586, 0x556},
+ {0x10a0, 0x2d00},
+ {0x10a1, 0x2d01},
+ {0x10a2, 0x2d02},
+ {0x10a3, 0x2d03},
+ {0x10a4, 0x2d04},
+ {0x10a5, 0x2d05},
+ {0x10a6, 0x2d06},
+ {0x10a7, 0x2d07},
+ {0x10a8, 0x2d08},
+ {0x10a9, 0x2d09},
+ {0x10aa, 0x2d0a},
+ {0x10ab, 0x2d0b},
+ {0x10ac, 0x2d0c},
+ {0x10ad, 0x2d0d},
+ {0x10ae, 0x2d0e},
+ {0x10af, 0x2d0f},
+ {0x10b0, 0x2d10},
+ {0x10b1, 0x2d11},
+ {0x10b2, 0x2d12},
+ {0x10b3, 0x2d13},
+ {0x10b4, 0x2d14},
+ {0x10b5, 0x2d15},
+ {0x10b6, 0x2d16},
+ {0x10b7, 0x2d17},
+ {0x10b8, 0x2d18},
+ {0x10b9, 0x2d19},
+ {0x10ba, 0x2d1a},
+ {0x10bb, 0x2d1b},
+ {0x10bc, 0x2d1c},
+ {0x10bd, 0x2d1d},
+ {0x10be, 0x2d1e},
+ {0x10bf, 0x2d1f},
+ {0x10c0, 0x2d20},
+ {0x10c1, 0x2d21},
+ {0x10c2, 0x2d22},
+ {0x10c3, 0x2d23},
+ {0x10c4, 0x2d24},
+ {0x10c5, 0x2d25},
+ {0x10c7, 0x2d27},
+ {0x10cd, 0x2d2d},
+ {0x1d79, 0xa77d},
+ {0x1d7d, 0x2c63},
+ {0x1e00, 0x1e01},
+ {0x1e01, 0x1e00},
+ {0x1e02, 0x1e03},
+ {0x1e03, 0x1e02},
+ {0x1e04, 0x1e05},
+ {0x1e05, 0x1e04},
+ {0x1e06, 0x1e07},
+ {0x1e07, 0x1e06},
+ {0x1e08, 0x1e09},
+ {0x1e09, 0x1e08},
+ {0x1e0a, 0x1e0b},
+ {0x1e0b, 0x1e0a},
+ {0x1e0c, 0x1e0d},
+ {0x1e0d, 0x1e0c},
+ {0x1e0e, 0x1e0f},
+ {0x1e0f, 0x1e0e},
+ {0x1e10, 0x1e11},
+ {0x1e11, 0x1e10},
+ {0x1e12, 0x1e13},
+ {0x1e13, 0x1e12},
+ {0x1e14, 0x1e15},
+ {0x1e15, 0x1e14},
+ {0x1e16, 0x1e17},
+ {0x1e17, 0x1e16},
+ {0x1e18, 0x1e19},
+ {0x1e19, 0x1e18},
+ {0x1e1a, 0x1e1b},
+ {0x1e1b, 0x1e1a},
+ {0x1e1c, 0x1e1d},
+ {0x1e1d, 0x1e1c},
+ {0x1e1e, 0x1e1f},
+ {0x1e1f, 0x1e1e},
+ {0x1e20, 0x1e21},
+ {0x1e21, 0x1e20},
+ {0x1e22, 0x1e23},
+ {0x1e23, 0x1e22},
+ {0x1e24, 0x1e25},
+ {0x1e25, 0x1e24},
+ {0x1e26, 0x1e27},
+ {0x1e27, 0x1e26},
+ {0x1e28, 0x1e29},
+ {0x1e29, 0x1e28},
+ {0x1e2a, 0x1e2b},
+ {0x1e2b, 0x1e2a},
+ {0x1e2c, 0x1e2d},
+ {0x1e2d, 0x1e2c},
+ {0x1e2e, 0x1e2f},
+ {0x1e2f, 0x1e2e},
+ {0x1e30, 0x1e31},
+ {0x1e31, 0x1e30},
+ {0x1e32, 0x1e33},
+ {0x1e33, 0x1e32},
+ {0x1e34, 0x1e35},
+ {0x1e35, 0x1e34},
+ {0x1e36, 0x1e37},
+ {0x1e37, 0x1e36},
+ {0x1e38, 0x1e39},
+ {0x1e39, 0x1e38},
+ {0x1e3a, 0x1e3b},
+ {0x1e3b, 0x1e3a},
+ {0x1e3c, 0x1e3d},
+ {0x1e3d, 0x1e3c},
+ {0x1e3e, 0x1e3f},
+ {0x1e3f, 0x1e3e},
+ {0x1e40, 0x1e41},
+ {0x1e41, 0x1e40},
+ {0x1e42, 0x1e43},
+ {0x1e43, 0x1e42},
+ {0x1e44, 0x1e45},
+ {0x1e45, 0x1e44},
+ {0x1e46, 0x1e47},
+ {0x1e47, 0x1e46},
+ {0x1e48, 0x1e49},
+ {0x1e49, 0x1e48},
+ {0x1e4a, 0x1e4b},
+ {0x1e4b, 0x1e4a},
+ {0x1e4c, 0x1e4d},
+ {0x1e4d, 0x1e4c},
+ {0x1e4e, 0x1e4f},
+ {0x1e4f, 0x1e4e},
+ {0x1e50, 0x1e51},
+ {0x1e51, 0x1e50},
+ {0x1e52, 0x1e53},
+ {0x1e53, 0x1e52},
+ {0x1e54, 0x1e55},
+ {0x1e55, 0x1e54},
+ {0x1e56, 0x1e57},
+ {0x1e57, 0x1e56},
+ {0x1e58, 0x1e59},
+ {0x1e59, 0x1e58},
+ {0x1e5a, 0x1e5b},
+ {0x1e5b, 0x1e5a},
+ {0x1e5c, 0x1e5d},
+ {0x1e5d, 0x1e5c},
+ {0x1e5e, 0x1e5f},
+ {0x1e5f, 0x1e5e},
+ {0x1e60, 0x1e61},
+ {0x1e60, 0x1e9b},
+ {0x1e61, 0x1e60},
+ {0x1e61, 0x1e9b},
+ {0x1e62, 0x1e63},
+ {0x1e63, 0x1e62},
+ {0x1e64, 0x1e65},
+ {0x1e65, 0x1e64},
+ {0x1e66, 0x1e67},
+ {0x1e67, 0x1e66},
+ {0x1e68, 0x1e69},
+ {0x1e69, 0x1e68},
+ {0x1e6a, 0x1e6b},
+ {0x1e6b, 0x1e6a},
+ {0x1e6c, 0x1e6d},
+ {0x1e6d, 0x1e6c},
+ {0x1e6e, 0x1e6f},
+ {0x1e6f, 0x1e6e},
+ {0x1e70, 0x1e71},
+ {0x1e71, 0x1e70},
+ {0x1e72, 0x1e73},
+ {0x1e73, 0x1e72},
+ {0x1e74, 0x1e75},
+ {0x1e75, 0x1e74},
+ {0x1e76, 0x1e77},
+ {0x1e77, 0x1e76},
+ {0x1e78, 0x1e79},
+ {0x1e79, 0x1e78},
+ {0x1e7a, 0x1e7b},
+ {0x1e7b, 0x1e7a},
+ {0x1e7c, 0x1e7d},
+ {0x1e7d, 0x1e7c},
+ {0x1e7e, 0x1e7f},
+ {0x1e7f, 0x1e7e},
+ {0x1e80, 0x1e81},
+ {0x1e81, 0x1e80},
+ {0x1e82, 0x1e83},
+ {0x1e83, 0x1e82},
+ {0x1e84, 0x1e85},
+ {0x1e85, 0x1e84},
+ {0x1e86, 0x1e87},
+ {0x1e87, 0x1e86},
+ {0x1e88, 0x1e89},
+ {0x1e89, 0x1e88},
+ {0x1e8a, 0x1e8b},
+ {0x1e8b, 0x1e8a},
+ {0x1e8c, 0x1e8d},
+ {0x1e8d, 0x1e8c},
+ {0x1e8e, 0x1e8f},
+ {0x1e8f, 0x1e8e},
+ {0x1e90, 0x1e91},
+ {0x1e91, 0x1e90},
+ {0x1e92, 0x1e93},
+ {0x1e93, 0x1e92},
+ {0x1e94, 0x1e95},
+ {0x1e95, 0x1e94},
+ {0x1e9b, 0x1e60},
+ {0x1e9b, 0x1e61},
+ {0x1e9e, 0xdf},
+ {0x1ea0, 0x1ea1},
+ {0x1ea1, 0x1ea0},
+ {0x1ea2, 0x1ea3},
+ {0x1ea3, 0x1ea2},
+ {0x1ea4, 0x1ea5},
+ {0x1ea5, 0x1ea4},
+ {0x1ea6, 0x1ea7},
+ {0x1ea7, 0x1ea6},
+ {0x1ea8, 0x1ea9},
+ {0x1ea9, 0x1ea8},
+ {0x1eaa, 0x1eab},
+ {0x1eab, 0x1eaa},
+ {0x1eac, 0x1ead},
+ {0x1ead, 0x1eac},
+ {0x1eae, 0x1eaf},
+ {0x1eaf, 0x1eae},
+ {0x1eb0, 0x1eb1},
+ {0x1eb1, 0x1eb0},
+ {0x1eb2, 0x1eb3},
+ {0x1eb3, 0x1eb2},
+ {0x1eb4, 0x1eb5},
+ {0x1eb5, 0x1eb4},
+ {0x1eb6, 0x1eb7},
+ {0x1eb7, 0x1eb6},
+ {0x1eb8, 0x1eb9},
+ {0x1eb9, 0x1eb8},
+ {0x1eba, 0x1ebb},
+ {0x1ebb, 0x1eba},
+ {0x1ebc, 0x1ebd},
+ {0x1ebd, 0x1ebc},
+ {0x1ebe, 0x1ebf},
+ {0x1ebf, 0x1ebe},
+ {0x1ec0, 0x1ec1},
+ {0x1ec1, 0x1ec0},
+ {0x1ec2, 0x1ec3},
+ {0x1ec3, 0x1ec2},
+ {0x1ec4, 0x1ec5},
+ {0x1ec5, 0x1ec4},
+ {0x1ec6, 0x1ec7},
+ {0x1ec7, 0x1ec6},
+ {0x1ec8, 0x1ec9},
+ {0x1ec9, 0x1ec8},
+ {0x1eca, 0x1ecb},
+ {0x1ecb, 0x1eca},
+ {0x1ecc, 0x1ecd},
+ {0x1ecd, 0x1ecc},
+ {0x1ece, 0x1ecf},
+ {0x1ecf, 0x1ece},
+ {0x1ed0, 0x1ed1},
+ {0x1ed1, 0x1ed0},
+ {0x1ed2, 0x1ed3},
+ {0x1ed3, 0x1ed2},
+ {0x1ed4, 0x1ed5},
+ {0x1ed5, 0x1ed4},
+ {0x1ed6, 0x1ed7},
+ {0x1ed7, 0x1ed6},
+ {0x1ed8, 0x1ed9},
+ {0x1ed9, 0x1ed8},
+ {0x1eda, 0x1edb},
+ {0x1edb, 0x1eda},
+ {0x1edc, 0x1edd},
+ {0x1edd, 0x1edc},
+ {0x1ede, 0x1edf},
+ {0x1edf, 0x1ede},
+ {0x1ee0, 0x1ee1},
+ {0x1ee1, 0x1ee0},
+ {0x1ee2, 0x1ee3},
+ {0x1ee3, 0x1ee2},
+ {0x1ee4, 0x1ee5},
+ {0x1ee5, 0x1ee4},
+ {0x1ee6, 0x1ee7},
+ {0x1ee7, 0x1ee6},
+ {0x1ee8, 0x1ee9},
+ {0x1ee9, 0x1ee8},
+ {0x1eea, 0x1eeb},
+ {0x1eeb, 0x1eea},
+ {0x1eec, 0x1eed},
+ {0x1eed, 0x1eec},
+ {0x1eee, 0x1eef},
+ {0x1eef, 0x1eee},
+ {0x1ef0, 0x1ef1},
+ {0x1ef1, 0x1ef0},
+ {0x1ef2, 0x1ef3},
+ {0x1ef3, 0x1ef2},
+ {0x1ef4, 0x1ef5},
+ {0x1ef5, 0x1ef4},
+ {0x1ef6, 0x1ef7},
+ {0x1ef7, 0x1ef6},
+ {0x1ef8, 0x1ef9},
+ {0x1ef9, 0x1ef8},
+ {0x1efa, 0x1efb},
+ {0x1efb, 0x1efa},
+ {0x1efc, 0x1efd},
+ {0x1efd, 0x1efc},
+ {0x1efe, 0x1eff},
+ {0x1eff, 0x1efe},
+ {0x1f00, 0x1f08},
+ {0x1f01, 0x1f09},
+ {0x1f02, 0x1f0a},
+ {0x1f03, 0x1f0b},
+ {0x1f04, 0x1f0c},
+ {0x1f05, 0x1f0d},
+ {0x1f06, 0x1f0e},
+ {0x1f07, 0x1f0f},
+ {0x1f08, 0x1f00},
+ {0x1f09, 0x1f01},
+ {0x1f0a, 0x1f02},
+ {0x1f0b, 0x1f03},
+ {0x1f0c, 0x1f04},
+ {0x1f0d, 0x1f05},
+ {0x1f0e, 0x1f06},
+ {0x1f0f, 0x1f07},
+ {0x1f10, 0x1f18},
+ {0x1f11, 0x1f19},
+ {0x1f12, 0x1f1a},
+ {0x1f13, 0x1f1b},
+ {0x1f14, 0x1f1c},
+ {0x1f15, 0x1f1d},
+ {0x1f18, 0x1f10},
+ {0x1f19, 0x1f11},
+ {0x1f1a, 0x1f12},
+ {0x1f1b, 0x1f13},
+ {0x1f1c, 0x1f14},
+ {0x1f1d, 0x1f15},
+ {0x1f20, 0x1f28},
+ {0x1f21, 0x1f29},
+ {0x1f22, 0x1f2a},
+ {0x1f23, 0x1f2b},
+ {0x1f24, 0x1f2c},
+ {0x1f25, 0x1f2d},
+ {0x1f26, 0x1f2e},
+ {0x1f27, 0x1f2f},
+ {0x1f28, 0x1f20},
+ {0x1f29, 0x1f21},
+ {0x1f2a, 0x1f22},
+ {0x1f2b, 0x1f23},
+ {0x1f2c, 0x1f24},
+ {0x1f2d, 0x1f25},
+ {0x1f2e, 0x1f26},
+ {0x1f2f, 0x1f27},
+ {0x1f30, 0x1f38},
+ {0x1f31, 0x1f39},
+ {0x1f32, 0x1f3a},
+ {0x1f33, 0x1f3b},
+ {0x1f34, 0x1f3c},
+ {0x1f35, 0x1f3d},
+ {0x1f36, 0x1f3e},
+ {0x1f37, 0x1f3f},
+ {0x1f38, 0x1f30},
+ {0x1f39, 0x1f31},
+ {0x1f3a, 0x1f32},
+ {0x1f3b, 0x1f33},
+ {0x1f3c, 0x1f34},
+ {0x1f3d, 0x1f35},
+ {0x1f3e, 0x1f36},
+ {0x1f3f, 0x1f37},
+ {0x1f40, 0x1f48},
+ {0x1f41, 0x1f49},
+ {0x1f42, 0x1f4a},
+ {0x1f43, 0x1f4b},
+ {0x1f44, 0x1f4c},
+ {0x1f45, 0x1f4d},
+ {0x1f48, 0x1f40},
+ {0x1f49, 0x1f41},
+ {0x1f4a, 0x1f42},
+ {0x1f4b, 0x1f43},
+ {0x1f4c, 0x1f44},
+ {0x1f4d, 0x1f45},
+ {0x1f51, 0x1f59},
+ {0x1f53, 0x1f5b},
+ {0x1f55, 0x1f5d},
+ {0x1f57, 0x1f5f},
+ {0x1f59, 0x1f51},
+ {0x1f5b, 0x1f53},
+ {0x1f5d, 0x1f55},
+ {0x1f5f, 0x1f57},
+ {0x1f60, 0x1f68},
+ {0x1f61, 0x1f69},
+ {0x1f62, 0x1f6a},
+ {0x1f63, 0x1f6b},
+ {0x1f64, 0x1f6c},
+ {0x1f65, 0x1f6d},
+ {0x1f66, 0x1f6e},
+ {0x1f67, 0x1f6f},
+ {0x1f68, 0x1f60},
+ {0x1f69, 0x1f61},
+ {0x1f6a, 0x1f62},
+ {0x1f6b, 0x1f63},
+ {0x1f6c, 0x1f64},
+ {0x1f6d, 0x1f65},
+ {0x1f6e, 0x1f66},
+ {0x1f6f, 0x1f67},
+ {0x1f70, 0x1fba},
+ {0x1f71, 0x1fbb},
+ {0x1f72, 0x1fc8},
+ {0x1f73, 0x1fc9},
+ {0x1f74, 0x1fca},
+ {0x1f75, 0x1fcb},
+ {0x1f76, 0x1fda},
+ {0x1f77, 0x1fdb},
+ {0x1f78, 0x1ff8},
+ {0x1f79, 0x1ff9},
+ {0x1f7a, 0x1fea},
+ {0x1f7b, 0x1feb},
+ {0x1f7c, 0x1ffa},
+ {0x1f7d, 0x1ffb},
+ {0x1f80, 0x1f88},
+ {0x1f81, 0x1f89},
+ {0x1f82, 0x1f8a},
+ {0x1f83, 0x1f8b},
+ {0x1f84, 0x1f8c},
+ {0x1f85, 0x1f8d},
+ {0x1f86, 0x1f8e},
+ {0x1f87, 0x1f8f},
+ {0x1f88, 0x1f80},
+ {0x1f89, 0x1f81},
+ {0x1f8a, 0x1f82},
+ {0x1f8b, 0x1f83},
+ {0x1f8c, 0x1f84},
+ {0x1f8d, 0x1f85},
+ {0x1f8e, 0x1f86},
+ {0x1f8f, 0x1f87},
+ {0x1f90, 0x1f98},
+ {0x1f91, 0x1f99},
+ {0x1f92, 0x1f9a},
+ {0x1f93, 0x1f9b},
+ {0x1f94, 0x1f9c},
+ {0x1f95, 0x1f9d},
+ {0x1f96, 0x1f9e},
+ {0x1f97, 0x1f9f},
+ {0x1f98, 0x1f90},
+ {0x1f99, 0x1f91},
+ {0x1f9a, 0x1f92},
+ {0x1f9b, 0x1f93},
+ {0x1f9c, 0x1f94},
+ {0x1f9d, 0x1f95},
+ {0x1f9e, 0x1f96},
+ {0x1f9f, 0x1f97},
+ {0x1fa0, 0x1fa8},
+ {0x1fa1, 0x1fa9},
+ {0x1fa2, 0x1faa},
+ {0x1fa3, 0x1fab},
+ {0x1fa4, 0x1fac},
+ {0x1fa5, 0x1fad},
+ {0x1fa6, 0x1fae},
+ {0x1fa7, 0x1faf},
+ {0x1fa8, 0x1fa0},
+ {0x1fa9, 0x1fa1},
+ {0x1faa, 0x1fa2},
+ {0x1fab, 0x1fa3},
+ {0x1fac, 0x1fa4},
+ {0x1fad, 0x1fa5},
+ {0x1fae, 0x1fa6},
+ {0x1faf, 0x1fa7},
+ {0x1fb0, 0x1fb8},
+ {0x1fb1, 0x1fb9},
+ {0x1fb3, 0x1fbc},
+ {0x1fb8, 0x1fb0},
+ {0x1fb9, 0x1fb1},
+ {0x1fba, 0x1f70},
+ {0x1fbb, 0x1f71},
+ {0x1fbc, 0x1fb3},
+ {0x1fbe, 0x345},
+ {0x1fbe, 0x399},
+ {0x1fbe, 0x3b9},
+ {0x1fc3, 0x1fcc},
+ {0x1fc8, 0x1f72},
+ {0x1fc9, 0x1f73},
+ {0x1fca, 0x1f74},
+ {0x1fcb, 0x1f75},
+ {0x1fcc, 0x1fc3},
+ {0x1fd0, 0x1fd8},
+ {0x1fd1, 0x1fd9},
+ {0x1fd8, 0x1fd0},
+ {0x1fd9, 0x1fd1},
+ {0x1fda, 0x1f76},
+ {0x1fdb, 0x1f77},
+ {0x1fe0, 0x1fe8},
+ {0x1fe1, 0x1fe9},
+ {0x1fe5, 0x1fec},
+ {0x1fe8, 0x1fe0},
+ {0x1fe9, 0x1fe1},
+ {0x1fea, 0x1f7a},
+ {0x1feb, 0x1f7b},
+ {0x1fec, 0x1fe5},
+ {0x1ff3, 0x1ffc},
+ {0x1ff8, 0x1f78},
+ {0x1ff9, 0x1f79},
+ {0x1ffa, 0x1f7c},
+ {0x1ffb, 0x1f7d},
+ {0x1ffc, 0x1ff3},
+ {0x2126, 0x3a9},
+ {0x2126, 0x3c9},
+ {0x212a, 0x4b},
+ {0x212a, 0x6b},
+ {0x212b, 0xc5},
+ {0x212b, 0xe5},
+ {0x2132, 0x214e},
+ {0x214e, 0x2132},
+ {0x2160, 0x2170},
+ {0x2161, 0x2171},
+ {0x2162, 0x2172},
+ {0x2163, 0x2173},
+ {0x2164, 0x2174},
+ {0x2165, 0x2175},
+ {0x2166, 0x2176},
+ {0x2167, 0x2177},
+ {0x2168, 0x2178},
+ {0x2169, 0x2179},
+ {0x216a, 0x217a},
+ {0x216b, 0x217b},
+ {0x216c, 0x217c},
+ {0x216d, 0x217d},
+ {0x216e, 0x217e},
+ {0x216f, 0x217f},
+ {0x2170, 0x2160},
+ {0x2171, 0x2161},
+ {0x2172, 0x2162},
+ {0x2173, 0x2163},
+ {0x2174, 0x2164},
+ {0x2175, 0x2165},
+ {0x2176, 0x2166},
+ {0x2177, 0x2167},
+ {0x2178, 0x2168},
+ {0x2179, 0x2169},
+ {0x217a, 0x216a},
+ {0x217b, 0x216b},
+ {0x217c, 0x216c},
+ {0x217d, 0x216d},
+ {0x217e, 0x216e},
+ {0x217f, 0x216f},
+ {0x2183, 0x2184},
+ {0x2184, 0x2183},
+ {0x24b6, 0x24d0},
+ {0x24b7, 0x24d1},
+ {0x24b8, 0x24d2},
+ {0x24b9, 0x24d3},
+ {0x24ba, 0x24d4},
+ {0x24bb, 0x24d5},
+ {0x24bc, 0x24d6},
+ {0x24bd, 0x24d7},
+ {0x24be, 0x24d8},
+ {0x24bf, 0x24d9},
+ {0x24c0, 0x24da},
+ {0x24c1, 0x24db},
+ {0x24c2, 0x24dc},
+ {0x24c3, 0x24dd},
+ {0x24c4, 0x24de},
+ {0x24c5, 0x24df},
+ {0x24c6, 0x24e0},
+ {0x24c7, 0x24e1},
+ {0x24c8, 0x24e2},
+ {0x24c9, 0x24e3},
+ {0x24ca, 0x24e4},
+ {0x24cb, 0x24e5},
+ {0x24cc, 0x24e6},
+ {0x24cd, 0x24e7},
+ {0x24ce, 0x24e8},
+ {0x24cf, 0x24e9},
+ {0x24d0, 0x24b6},
+ {0x24d1, 0x24b7},
+ {0x24d2, 0x24b8},
+ {0x24d3, 0x24b9},
+ {0x24d4, 0x24ba},
+ {0x24d5, 0x24bb},
+ {0x24d6, 0x24bc},
+ {0x24d7, 0x24bd},
+ {0x24d8, 0x24be},
+ {0x24d9, 0x24bf},
+ {0x24da, 0x24c0},
+ {0x24db, 0x24c1},
+ {0x24dc, 0x24c2},
+ {0x24dd, 0x24c3},
+ {0x24de, 0x24c4},
+ {0x24df, 0x24c5},
+ {0x24e0, 0x24c6},
+ {0x24e1, 0x24c7},
+ {0x24e2, 0x24c8},
+ {0x24e3, 0x24c9},
+ {0x24e4, 0x24ca},
+ {0x24e5, 0x24cb},
+ {0x24e6, 0x24cc},
+ {0x24e7, 0x24cd},
+ {0x24e8, 0x24ce},
+ {0x24e9, 0x24cf},
+ {0x2c00, 0x2c30},
+ {0x2c01, 0x2c31},
+ {0x2c02, 0x2c32},
+ {0x2c03, 0x2c33},
+ {0x2c04, 0x2c34},
+ {0x2c05, 0x2c35},
+ {0x2c06, 0x2c36},
+ {0x2c07, 0x2c37},
+ {0x2c08, 0x2c38},
+ {0x2c09, 0x2c39},
+ {0x2c0a, 0x2c3a},
+ {0x2c0b, 0x2c3b},
+ {0x2c0c, 0x2c3c},
+ {0x2c0d, 0x2c3d},
+ {0x2c0e, 0x2c3e},
+ {0x2c0f, 0x2c3f},
+ {0x2c10, 0x2c40},
+ {0x2c11, 0x2c41},
+ {0x2c12, 0x2c42},
+ {0x2c13, 0x2c43},
+ {0x2c14, 0x2c44},
+ {0x2c15, 0x2c45},
+ {0x2c16, 0x2c46},
+ {0x2c17, 0x2c47},
+ {0x2c18, 0x2c48},
+ {0x2c19, 0x2c49},
+ {0x2c1a, 0x2c4a},
+ {0x2c1b, 0x2c4b},
+ {0x2c1c, 0x2c4c},
+ {0x2c1d, 0x2c4d},
+ {0x2c1e, 0x2c4e},
+ {0x2c1f, 0x2c4f},
+ {0x2c20, 0x2c50},
+ {0x2c21, 0x2c51},
+ {0x2c22, 0x2c52},
+ {0x2c23, 0x2c53},
+ {0x2c24, 0x2c54},
+ {0x2c25, 0x2c55},
+ {0x2c26, 0x2c56},
+ {0x2c27, 0x2c57},
+ {0x2c28, 0x2c58},
+ {0x2c29, 0x2c59},
+ {0x2c2a, 0x2c5a},
+ {0x2c2b, 0x2c5b},
+ {0x2c2c, 0x2c5c},
+ {0x2c2d, 0x2c5d},
+ {0x2c2e, 0x2c5e},
+ {0x2c30, 0x2c00},
+ {0x2c31, 0x2c01},
+ {0x2c32, 0x2c02},
+ {0x2c33, 0x2c03},
+ {0x2c34, 0x2c04},
+ {0x2c35, 0x2c05},
+ {0x2c36, 0x2c06},
+ {0x2c37, 0x2c07},
+ {0x2c38, 0x2c08},
+ {0x2c39, 0x2c09},
+ {0x2c3a, 0x2c0a},
+ {0x2c3b, 0x2c0b},
+ {0x2c3c, 0x2c0c},
+ {0x2c3d, 0x2c0d},
+ {0x2c3e, 0x2c0e},
+ {0x2c3f, 0x2c0f},
+ {0x2c40, 0x2c10},
+ {0x2c41, 0x2c11},
+ {0x2c42, 0x2c12},
+ {0x2c43, 0x2c13},
+ {0x2c44, 0x2c14},
+ {0x2c45, 0x2c15},
+ {0x2c46, 0x2c16},
+ {0x2c47, 0x2c17},
+ {0x2c48, 0x2c18},
+ {0x2c49, 0x2c19},
+ {0x2c4a, 0x2c1a},
+ {0x2c4b, 0x2c1b},
+ {0x2c4c, 0x2c1c},
+ {0x2c4d, 0x2c1d},
+ {0x2c4e, 0x2c1e},
+ {0x2c4f, 0x2c1f},
+ {0x2c50, 0x2c20},
+ {0x2c51, 0x2c21},
+ {0x2c52, 0x2c22},
+ {0x2c53, 0x2c23},
+ {0x2c54, 0x2c24},
+ {0x2c55, 0x2c25},
+ {0x2c56, 0x2c26},
+ {0x2c57, 0x2c27},
+ {0x2c58, 0x2c28},
+ {0x2c59, 0x2c29},
+ {0x2c5a, 0x2c2a},
+ {0x2c5b, 0x2c2b},
+ {0x2c5c, 0x2c2c},
+ {0x2c5d, 0x2c2d},
+ {0x2c5e, 0x2c2e},
+ {0x2c60, 0x2c61},
+ {0x2c61, 0x2c60},
+ {0x2c62, 0x26b},
+ {0x2c63, 0x1d7d},
+ {0x2c64, 0x27d},
+ {0x2c65, 0x23a},
+ {0x2c66, 0x23e},
+ {0x2c67, 0x2c68},
+ {0x2c68, 0x2c67},
+ {0x2c69, 0x2c6a},
+ {0x2c6a, 0x2c69},
+ {0x2c6b, 0x2c6c},
+ {0x2c6c, 0x2c6b},
+ {0x2c6d, 0x251},
+ {0x2c6e, 0x271},
+ {0x2c6f, 0x250},
+ {0x2c70, 0x252},
+ {0x2c72, 0x2c73},
+ {0x2c73, 0x2c72},
+ {0x2c75, 0x2c76},
+ {0x2c76, 0x2c75},
+ {0x2c7e, 0x23f},
+ {0x2c7f, 0x240},
+ {0x2c80, 0x2c81},
+ {0x2c81, 0x2c80},
+ {0x2c82, 0x2c83},
+ {0x2c83, 0x2c82},
+ {0x2c84, 0x2c85},
+ {0x2c85, 0x2c84},
+ {0x2c86, 0x2c87},
+ {0x2c87, 0x2c86},
+ {0x2c88, 0x2c89},
+ {0x2c89, 0x2c88},
+ {0x2c8a, 0x2c8b},
+ {0x2c8b, 0x2c8a},
+ {0x2c8c, 0x2c8d},
+ {0x2c8d, 0x2c8c},
+ {0x2c8e, 0x2c8f},
+ {0x2c8f, 0x2c8e},
+ {0x2c90, 0x2c91},
+ {0x2c91, 0x2c90},
+ {0x2c92, 0x2c93},
+ {0x2c93, 0x2c92},
+ {0x2c94, 0x2c95},
+ {0x2c95, 0x2c94},
+ {0x2c96, 0x2c97},
+ {0x2c97, 0x2c96},
+ {0x2c98, 0x2c99},
+ {0x2c99, 0x2c98},
+ {0x2c9a, 0x2c9b},
+ {0x2c9b, 0x2c9a},
+ {0x2c9c, 0x2c9d},
+ {0x2c9d, 0x2c9c},
+ {0x2c9e, 0x2c9f},
+ {0x2c9f, 0x2c9e},
+ {0x2ca0, 0x2ca1},
+ {0x2ca1, 0x2ca0},
+ {0x2ca2, 0x2ca3},
+ {0x2ca3, 0x2ca2},
+ {0x2ca4, 0x2ca5},
+ {0x2ca5, 0x2ca4},
+ {0x2ca6, 0x2ca7},
+ {0x2ca7, 0x2ca6},
+ {0x2ca8, 0x2ca9},
+ {0x2ca9, 0x2ca8},
+ {0x2caa, 0x2cab},
+ {0x2cab, 0x2caa},
+ {0x2cac, 0x2cad},
+ {0x2cad, 0x2cac},
+ {0x2cae, 0x2caf},
+ {0x2caf, 0x2cae},
+ {0x2cb0, 0x2cb1},
+ {0x2cb1, 0x2cb0},
+ {0x2cb2, 0x2cb3},
+ {0x2cb3, 0x2cb2},
+ {0x2cb4, 0x2cb5},
+ {0x2cb5, 0x2cb4},
+ {0x2cb6, 0x2cb7},
+ {0x2cb7, 0x2cb6},
+ {0x2cb8, 0x2cb9},
+ {0x2cb9, 0x2cb8},
+ {0x2cba, 0x2cbb},
+ {0x2cbb, 0x2cba},
+ {0x2cbc, 0x2cbd},
+ {0x2cbd, 0x2cbc},
+ {0x2cbe, 0x2cbf},
+ {0x2cbf, 0x2cbe},
+ {0x2cc0, 0x2cc1},
+ {0x2cc1, 0x2cc0},
+ {0x2cc2, 0x2cc3},
+ {0x2cc3, 0x2cc2},
+ {0x2cc4, 0x2cc5},
+ {0x2cc5, 0x2cc4},
+ {0x2cc6, 0x2cc7},
+ {0x2cc7, 0x2cc6},
+ {0x2cc8, 0x2cc9},
+ {0x2cc9, 0x2cc8},
+ {0x2cca, 0x2ccb},
+ {0x2ccb, 0x2cca},
+ {0x2ccc, 0x2ccd},
+ {0x2ccd, 0x2ccc},
+ {0x2cce, 0x2ccf},
+ {0x2ccf, 0x2cce},
+ {0x2cd0, 0x2cd1},
+ {0x2cd1, 0x2cd0},
+ {0x2cd2, 0x2cd3},
+ {0x2cd3, 0x2cd2},
+ {0x2cd4, 0x2cd5},
+ {0x2cd5, 0x2cd4},
+ {0x2cd6, 0x2cd7},
+ {0x2cd7, 0x2cd6},
+ {0x2cd8, 0x2cd9},
+ {0x2cd9, 0x2cd8},
+ {0x2cda, 0x2cdb},
+ {0x2cdb, 0x2cda},
+ {0x2cdc, 0x2cdd},
+ {0x2cdd, 0x2cdc},
+ {0x2cde, 0x2cdf},
+ {0x2cdf, 0x2cde},
+ {0x2ce0, 0x2ce1},
+ {0x2ce1, 0x2ce0},
+ {0x2ce2, 0x2ce3},
+ {0x2ce3, 0x2ce2},
+ {0x2ceb, 0x2cec},
+ {0x2cec, 0x2ceb},
+ {0x2ced, 0x2cee},
+ {0x2cee, 0x2ced},
+ {0x2cf2, 0x2cf3},
+ {0x2cf3, 0x2cf2},
+ {0x2d00, 0x10a0},
+ {0x2d01, 0x10a1},
+ {0x2d02, 0x10a2},
+ {0x2d03, 0x10a3},
+ {0x2d04, 0x10a4},
+ {0x2d05, 0x10a5},
+ {0x2d06, 0x10a6},
+ {0x2d07, 0x10a7},
+ {0x2d08, 0x10a8},
+ {0x2d09, 0x10a9},
+ {0x2d0a, 0x10aa},
+ {0x2d0b, 0x10ab},
+ {0x2d0c, 0x10ac},
+ {0x2d0d, 0x10ad},
+ {0x2d0e, 0x10ae},
+ {0x2d0f, 0x10af},
+ {0x2d10, 0x10b0},
+ {0x2d11, 0x10b1},
+ {0x2d12, 0x10b2},
+ {0x2d13, 0x10b3},
+ {0x2d14, 0x10b4},
+ {0x2d15, 0x10b5},
+ {0x2d16, 0x10b6},
+ {0x2d17, 0x10b7},
+ {0x2d18, 0x10b8},
+ {0x2d19, 0x10b9},
+ {0x2d1a, 0x10ba},
+ {0x2d1b, 0x10bb},
+ {0x2d1c, 0x10bc},
+ {0x2d1d, 0x10bd},
+ {0x2d1e, 0x10be},
+ {0x2d1f, 0x10bf},
+ {0x2d20, 0x10c0},
+ {0x2d21, 0x10c1},
+ {0x2d22, 0x10c2},
+ {0x2d23, 0x10c3},
+ {0x2d24, 0x10c4},
+ {0x2d25, 0x10c5},
+ {0x2d27, 0x10c7},
+ {0x2d2d, 0x10cd},
+ {0xa640, 0xa641},
+ {0xa641, 0xa640},
+ {0xa642, 0xa643},
+ {0xa643, 0xa642},
+ {0xa644, 0xa645},
+ {0xa645, 0xa644},
+ {0xa646, 0xa647},
+ {0xa647, 0xa646},
+ {0xa648, 0xa649},
+ {0xa649, 0xa648},
+ {0xa64a, 0xa64b},
+ {0xa64b, 0xa64a},
+ {0xa64c, 0xa64d},
+ {0xa64d, 0xa64c},
+ {0xa64e, 0xa64f},
+ {0xa64f, 0xa64e},
+ {0xa650, 0xa651},
+ {0xa651, 0xa650},
+ {0xa652, 0xa653},
+ {0xa653, 0xa652},
+ {0xa654, 0xa655},
+ {0xa655, 0xa654},
+ {0xa656, 0xa657},
+ {0xa657, 0xa656},
+ {0xa658, 0xa659},
+ {0xa659, 0xa658},
+ {0xa65a, 0xa65b},
+ {0xa65b, 0xa65a},
+ {0xa65c, 0xa65d},
+ {0xa65d, 0xa65c},
+ {0xa65e, 0xa65f},
+ {0xa65f, 0xa65e},
+ {0xa660, 0xa661},
+ {0xa661, 0xa660},
+ {0xa662, 0xa663},
+ {0xa663, 0xa662},
+ {0xa664, 0xa665},
+ {0xa665, 0xa664},
+ {0xa666, 0xa667},
+ {0xa667, 0xa666},
+ {0xa668, 0xa669},
+ {0xa669, 0xa668},
+ {0xa66a, 0xa66b},
+ {0xa66b, 0xa66a},
+ {0xa66c, 0xa66d},
+ {0xa66d, 0xa66c},
+ {0xa680, 0xa681},
+ {0xa681, 0xa680},
+ {0xa682, 0xa683},
+ {0xa683, 0xa682},
+ {0xa684, 0xa685},
+ {0xa685, 0xa684},
+ {0xa686, 0xa687},
+ {0xa687, 0xa686},
+ {0xa688, 0xa689},
+ {0xa689, 0xa688},
+ {0xa68a, 0xa68b},
+ {0xa68b, 0xa68a},
+ {0xa68c, 0xa68d},
+ {0xa68d, 0xa68c},
+ {0xa68e, 0xa68f},
+ {0xa68f, 0xa68e},
+ {0xa690, 0xa691},
+ {0xa691, 0xa690},
+ {0xa692, 0xa693},
+ {0xa693, 0xa692},
+ {0xa694, 0xa695},
+ {0xa695, 0xa694},
+ {0xa696, 0xa697},
+ {0xa697, 0xa696},
+ {0xa698, 0xa699},
+ {0xa699, 0xa698},
+ {0xa69a, 0xa69b},
+ {0xa69b, 0xa69a},
+ {0xa722, 0xa723},
+ {0xa723, 0xa722},
+ {0xa724, 0xa725},
+ {0xa725, 0xa724},
+ {0xa726, 0xa727},
+ {0xa727, 0xa726},
+ {0xa728, 0xa729},
+ {0xa729, 0xa728},
+ {0xa72a, 0xa72b},
+ {0xa72b, 0xa72a},
+ {0xa72c, 0xa72d},
+ {0xa72d, 0xa72c},
+ {0xa72e, 0xa72f},
+ {0xa72f, 0xa72e},
+ {0xa732, 0xa733},
+ {0xa733, 0xa732},
+ {0xa734, 0xa735},
+ {0xa735, 0xa734},
+ {0xa736, 0xa737},
+ {0xa737, 0xa736},
+ {0xa738, 0xa739},
+ {0xa739, 0xa738},
+ {0xa73a, 0xa73b},
+ {0xa73b, 0xa73a},
+ {0xa73c, 0xa73d},
+ {0xa73d, 0xa73c},
+ {0xa73e, 0xa73f},
+ {0xa73f, 0xa73e},
+ {0xa740, 0xa741},
+ {0xa741, 0xa740},
+ {0xa742, 0xa743},
+ {0xa743, 0xa742},
+ {0xa744, 0xa745},
+ {0xa745, 0xa744},
+ {0xa746, 0xa747},
+ {0xa747, 0xa746},
+ {0xa748, 0xa749},
+ {0xa749, 0xa748},
+ {0xa74a, 0xa74b},
+ {0xa74b, 0xa74a},
+ {0xa74c, 0xa74d},
+ {0xa74d, 0xa74c},
+ {0xa74e, 0xa74f},
+ {0xa74f, 0xa74e},
+ {0xa750, 0xa751},
+ {0xa751, 0xa750},
+ {0xa752, 0xa753},
+ {0xa753, 0xa752},
+ {0xa754, 0xa755},
+ {0xa755, 0xa754},
+ {0xa756, 0xa757},
+ {0xa757, 0xa756},
+ {0xa758, 0xa759},
+ {0xa759, 0xa758},
+ {0xa75a, 0xa75b},
+ {0xa75b, 0xa75a},
+ {0xa75c, 0xa75d},
+ {0xa75d, 0xa75c},
+ {0xa75e, 0xa75f},
+ {0xa75f, 0xa75e},
+ {0xa760, 0xa761},
+ {0xa761, 0xa760},
+ {0xa762, 0xa763},
+ {0xa763, 0xa762},
+ {0xa764, 0xa765},
+ {0xa765, 0xa764},
+ {0xa766, 0xa767},
+ {0xa767, 0xa766},
+ {0xa768, 0xa769},
+ {0xa769, 0xa768},
+ {0xa76a, 0xa76b},
+ {0xa76b, 0xa76a},
+ {0xa76c, 0xa76d},
+ {0xa76d, 0xa76c},
+ {0xa76e, 0xa76f},
+ {0xa76f, 0xa76e},
+ {0xa779, 0xa77a},
+ {0xa77a, 0xa779},
+ {0xa77b, 0xa77c},
+ {0xa77c, 0xa77b},
+ {0xa77d, 0x1d79},
+ {0xa77e, 0xa77f},
+ {0xa77f, 0xa77e},
+ {0xa780, 0xa781},
+ {0xa781, 0xa780},
+ {0xa782, 0xa783},
+ {0xa783, 0xa782},
+ {0xa784, 0xa785},
+ {0xa785, 0xa784},
+ {0xa786, 0xa787},
+ {0xa787, 0xa786},
+ {0xa78b, 0xa78c},
+ {0xa78c, 0xa78b},
+ {0xa78d, 0x265},
+ {0xa790, 0xa791},
+ {0xa791, 0xa790},
+ {0xa792, 0xa793},
+ {0xa793, 0xa792},
+ {0xa796, 0xa797},
+ {0xa797, 0xa796},
+ {0xa798, 0xa799},
+ {0xa799, 0xa798},
+ {0xa79a, 0xa79b},
+ {0xa79b, 0xa79a},
+ {0xa79c, 0xa79d},
+ {0xa79d, 0xa79c},
+ {0xa79e, 0xa79f},
+ {0xa79f, 0xa79e},
+ {0xa7a0, 0xa7a1},
+ {0xa7a1, 0xa7a0},
+ {0xa7a2, 0xa7a3},
+ {0xa7a3, 0xa7a2},
+ {0xa7a4, 0xa7a5},
+ {0xa7a5, 0xa7a4},
+ {0xa7a6, 0xa7a7},
+ {0xa7a7, 0xa7a6},
+ {0xa7a8, 0xa7a9},
+ {0xa7a9, 0xa7a8},
+ {0xa7aa, 0x266},
+ {0xa7ab, 0x25c},
+ {0xa7ac, 0x261},
+ {0xa7ad, 0x26c},
+ {0xa7b0, 0x29e},
+ {0xa7b1, 0x287},
+ {0xff21, 0xff41},
+ {0xff22, 0xff42},
+ {0xff23, 0xff43},
+ {0xff24, 0xff44},
+ {0xff25, 0xff45},
+ {0xff26, 0xff46},
+ {0xff27, 0xff47},
+ {0xff28, 0xff48},
+ {0xff29, 0xff49},
+ {0xff2a, 0xff4a},
+ {0xff2b, 0xff4b},
+ {0xff2c, 0xff4c},
+ {0xff2d, 0xff4d},
+ {0xff2e, 0xff4e},
+ {0xff2f, 0xff4f},
+ {0xff30, 0xff50},
+ {0xff31, 0xff51},
+ {0xff32, 0xff52},
+ {0xff33, 0xff53},
+ {0xff34, 0xff54},
+ {0xff35, 0xff55},
+ {0xff36, 0xff56},
+ {0xff37, 0xff57},
+ {0xff38, 0xff58},
+ {0xff39, 0xff59},
+ {0xff3a, 0xff5a},
+ {0xff41, 0xff21},
+ {0xff42, 0xff22},
+ {0xff43, 0xff23},
+ {0xff44, 0xff24},
+ {0xff45, 0xff25},
+ {0xff46, 0xff26},
+ {0xff47, 0xff27},
+ {0xff48, 0xff28},
+ {0xff49, 0xff29},
+ {0xff4a, 0xff2a},
+ {0xff4b, 0xff2b},
+ {0xff4c, 0xff2c},
+ {0xff4d, 0xff2d},
+ {0xff4e, 0xff2e},
+ {0xff4f, 0xff2f},
+ {0xff50, 0xff30},
+ {0xff51, 0xff31},
+ {0xff52, 0xff32},
+ {0xff53, 0xff33},
+ {0xff54, 0xff34},
+ {0xff55, 0xff35},
+ {0xff56, 0xff36},
+ {0xff57, 0xff37},
+ {0xff58, 0xff38},
+ {0xff59, 0xff39},
+ {0xff5a, 0xff3a},
+ {0x10400, 0x10428},
+ {0x10401, 0x10429},
+ {0x10402, 0x1042a},
+ {0x10403, 0x1042b},
+ {0x10404, 0x1042c},
+ {0x10405, 0x1042d},
+ {0x10406, 0x1042e},
+ {0x10407, 0x1042f},
+ {0x10408, 0x10430},
+ {0x10409, 0x10431},
+ {0x1040a, 0x10432},
+ {0x1040b, 0x10433},
+ {0x1040c, 0x10434},
+ {0x1040d, 0x10435},
+ {0x1040e, 0x10436},
+ {0x1040f, 0x10437},
+ {0x10410, 0x10438},
+ {0x10411, 0x10439},
+ {0x10412, 0x1043a},
+ {0x10413, 0x1043b},
+ {0x10414, 0x1043c},
+ {0x10415, 0x1043d},
+ {0x10416, 0x1043e},
+ {0x10417, 0x1043f},
+ {0x10418, 0x10440},
+ {0x10419, 0x10441},
+ {0x1041a, 0x10442},
+ {0x1041b, 0x10443},
+ {0x1041c, 0x10444},
+ {0x1041d, 0x10445},
+ {0x1041e, 0x10446},
+ {0x1041f, 0x10447},
+ {0x10420, 0x10448},
+ {0x10421, 0x10449},
+ {0x10422, 0x1044a},
+ {0x10423, 0x1044b},
+ {0x10424, 0x1044c},
+ {0x10425, 0x1044d},
+ {0x10426, 0x1044e},
+ {0x10427, 0x1044f},
+ {0x10428, 0x10400},
+ {0x10429, 0x10401},
+ {0x1042a, 0x10402},
+ {0x1042b, 0x10403},
+ {0x1042c, 0x10404},
+ {0x1042d, 0x10405},
+ {0x1042e, 0x10406},
+ {0x1042f, 0x10407},
+ {0x10430, 0x10408},
+ {0x10431, 0x10409},
+ {0x10432, 0x1040a},
+ {0x10433, 0x1040b},
+ {0x10434, 0x1040c},
+ {0x10435, 0x1040d},
+ {0x10436, 0x1040e},
+ {0x10437, 0x1040f},
+ {0x10438, 0x10410},
+ {0x10439, 0x10411},
+ {0x1043a, 0x10412},
+ {0x1043b, 0x10413},
+ {0x1043c, 0x10414},
+ {0x1043d, 0x10415},
+ {0x1043e, 0x10416},
+ {0x1043f, 0x10417},
+ {0x10440, 0x10418},
+ {0x10441, 0x10419},
+ {0x10442, 0x1041a},
+ {0x10443, 0x1041b},
+ {0x10444, 0x1041c},
+ {0x10445, 0x1041d},
+ {0x10446, 0x1041e},
+ {0x10447, 0x1041f},
+ {0x10448, 0x10420},
+ {0x10449, 0x10421},
+ {0x1044a, 0x10422},
+ {0x1044b, 0x10423},
+ {0x1044c, 0x10424},
+ {0x1044d, 0x10425},
+ {0x1044e, 0x10426},
+ {0x1044f, 0x10427},
+ {0x118a0, 0x118c0},
+ {0x118a1, 0x118c1},
+ {0x118a2, 0x118c2},
+ {0x118a3, 0x118c3},
+ {0x118a4, 0x118c4},
+ {0x118a5, 0x118c5},
+ {0x118a6, 0x118c6},
+ {0x118a7, 0x118c7},
+ {0x118a8, 0x118c8},
+ {0x118a9, 0x118c9},
+ {0x118aa, 0x118ca},
+ {0x118ab, 0x118cb},
+ {0x118ac, 0x118cc},
+ {0x118ad, 0x118cd},
+ {0x118ae, 0x118ce},
+ {0x118af, 0x118cf},
+ {0x118b0, 0x118d0},
+ {0x118b1, 0x118d1},
+ {0x118b2, 0x118d2},
+ {0x118b3, 0x118d3},
+ {0x118b4, 0x118d4},
+ {0x118b5, 0x118d5},
+ {0x118b6, 0x118d6},
+ {0x118b7, 0x118d7},
+ {0x118b8, 0x118d8},
+ {0x118b9, 0x118d9},
+ {0x118ba, 0x118da},
+ {0x118bb, 0x118db},
+ {0x118bc, 0x118dc},
+ {0x118bd, 0x118dd},
+ {0x118be, 0x118de},
+ {0x118bf, 0x118df},
+ {0x118c0, 0x118a0},
+ {0x118c1, 0x118a1},
+ {0x118c2, 0x118a2},
+ {0x118c3, 0x118a3},
+ {0x118c4, 0x118a4},
+ {0x118c5, 0x118a5},
+ {0x118c6, 0x118a6},
+ {0x118c7, 0x118a7},
+ {0x118c8, 0x118a8},
+ {0x118c9, 0x118a9},
+ {0x118ca, 0x118aa},
+ {0x118cb, 0x118ab},
+ {0x118cc, 0x118ac},
+ {0x118cd, 0x118ad},
+ {0x118ce, 0x118ae},
+ {0x118cf, 0x118af},
+ {0x118d0, 0x118b0},
+ {0x118d1, 0x118b1},
+ {0x118d2, 0x118b2},
+ {0x118d3, 0x118b3},
+ {0x118d4, 0x118b4},
+ {0x118d5, 0x118b5},
+ {0x118d6, 0x118b6},
+ {0x118d7, 0x118b7},
+ {0x118d8, 0x118b8},
+ {0x118d9, 0x118b9},
+ {0x118da, 0x118ba},
+ {0x118db, 0x118bb},
+ {0x118dc, 0x118bc},
+ {0x118dd, 0x118bd},
+ {0x118de, 0x118be},
+ {0x118df, 0x118bf},
+};
+
+#endif // UCP_TABLE_DEFINE_FN
+
+} // namespace ue2
+
+#endif
+
diff --git a/contrib/libs/hyperscan/src/parser/unsupported.cpp b/contrib/libs/hyperscan/src/parser/unsupported.cpp
index ccb555d7ca..c4b18b6a30 100644
--- a/contrib/libs/hyperscan/src/parser/unsupported.cpp
+++ b/contrib/libs/hyperscan/src/parser/unsupported.cpp
@@ -1,88 +1,88 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Checks component trees for unsupported components.
- */
-#include "ConstComponentVisitor.h"
-#include "ComponentEUS.h"
-#include "ComponentRepeat.h"
-#include "ComponentWordBoundary.h"
-#include "parse_error.h"
-#include "unsupported.h"
-
-#include <sstream>
-
-namespace ue2 {
-
-/** \brief Visitor class that throws a ParseError exception when it encounters
- * an unsupported component. */
-class UnsupportedVisitor : public DefaultConstComponentVisitor {
-public:
- ~UnsupportedVisitor() override;
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Checks component trees for unsupported components.
+ */
+#include "ConstComponentVisitor.h"
+#include "ComponentEUS.h"
+#include "ComponentRepeat.h"
+#include "ComponentWordBoundary.h"
+#include "parse_error.h"
+#include "unsupported.h"
+
+#include <sstream>
+
+namespace ue2 {
+
+/** \brief Visitor class that throws a ParseError exception when it encounters
+ * an unsupported component. */
+class UnsupportedVisitor : public DefaultConstComponentVisitor {
+public:
+ ~UnsupportedVisitor() override;
using DefaultConstComponentVisitor::pre;
- void pre(const ComponentAssertion &) override {
- throw ParseError("Zero-width assertions are not supported.");
- }
- void pre(const ComponentAtomicGroup &) override {
- throw ParseError("Atomic groups are unsupported.");
- }
- void pre(const ComponentBackReference &) override {
- throw ParseError("Back-references are unsupported.");
- }
- void pre(const ComponentCondReference &) override {
- throw ParseError("Conditional references are not supported.");
- }
- void pre(const ComponentEUS &c) override {
- std::ostringstream str;
- str << "\\X unsupported at index " << c.loc << ".";
- throw ParseError(str.str());
- }
- void pre(const ComponentRepeat &c) override {
- if (c.type == ComponentRepeat::REPEAT_POSSESSIVE) {
- throw ParseError("Possessive quantifiers are not supported.");
- }
- }
- void pre(const ComponentWordBoundary &c) override {
- if (c.ucp && !c.prefilter) {
- std::ostringstream str;
- str << (!c.negated ? "\\b" : "\\B")
- << " unsupported in UCP mode at index " << c.loc << ".";
- throw ParseError(str.str());
- }
- }
-};
-
-UnsupportedVisitor::~UnsupportedVisitor() {}
-
-void checkUnsupported(const Component &root) {
- UnsupportedVisitor vis;
- root.accept(vis);
-}
-
-} // namespace ue2
+ void pre(const ComponentAssertion &) override {
+ throw ParseError("Zero-width assertions are not supported.");
+ }
+ void pre(const ComponentAtomicGroup &) override {
+ throw ParseError("Atomic groups are unsupported.");
+ }
+ void pre(const ComponentBackReference &) override {
+ throw ParseError("Back-references are unsupported.");
+ }
+ void pre(const ComponentCondReference &) override {
+ throw ParseError("Conditional references are not supported.");
+ }
+ void pre(const ComponentEUS &c) override {
+ std::ostringstream str;
+ str << "\\X unsupported at index " << c.loc << ".";
+ throw ParseError(str.str());
+ }
+ void pre(const ComponentRepeat &c) override {
+ if (c.type == ComponentRepeat::REPEAT_POSSESSIVE) {
+ throw ParseError("Possessive quantifiers are not supported.");
+ }
+ }
+ void pre(const ComponentWordBoundary &c) override {
+ if (c.ucp && !c.prefilter) {
+ std::ostringstream str;
+ str << (!c.negated ? "\\b" : "\\B")
+ << " unsupported in UCP mode at index " << c.loc << ".";
+ throw ParseError(str.str());
+ }
+ }
+};
+
+UnsupportedVisitor::~UnsupportedVisitor() {}
+
+void checkUnsupported(const Component &root) {
+ UnsupportedVisitor vis;
+ root.accept(vis);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/unsupported.h b/contrib/libs/hyperscan/src/parser/unsupported.h
index 41a75a1b13..f3905cdf06 100644
--- a/contrib/libs/hyperscan/src/parser/unsupported.h
+++ b/contrib/libs/hyperscan/src/parser/unsupported.h
@@ -1,47 +1,47 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Checks component trees for unsupported components.
- */
-#ifndef PARSER_UNSUPPORTED_H_
-#define PARSER_UNSUPPORTED_H_
-
-#include "parse_error.h"
-
-namespace ue2 {
-
-class Component;
-
-/** \brief Throws a ParseError if this component tree contains an unsupported
- * Component. */
-void checkUnsupported(const Component &root);
-
-} // namespace
-
-#endif // PARSER_UNSUPPORTED_H_
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Checks component trees for unsupported components.
+ */
+#ifndef PARSER_UNSUPPORTED_H_
+#define PARSER_UNSUPPORTED_H_
+
+#include "parse_error.h"
+
+namespace ue2 {
+
+class Component;
+
+/** \brief Throws a ParseError if this component tree contains an unsupported
+ * Component. */
+void checkUnsupported(const Component &root);
+
+} // namespace
+
+#endif // PARSER_UNSUPPORTED_H_
diff --git a/contrib/libs/hyperscan/src/parser/utf8_validate.cpp b/contrib/libs/hyperscan/src/parser/utf8_validate.cpp
index 77807d5d97..50aa06d8e7 100644
--- a/contrib/libs/hyperscan/src/parser/utf8_validate.cpp
+++ b/contrib/libs/hyperscan/src/parser/utf8_validate.cpp
@@ -1,162 +1,162 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#include "utf8_validate.h"
-
-#include "ue2common.h"
-#include "util/unicode_def.h"
-
-#include <cstring>
-
-namespace ue2 {
-
-static
-bool hasValidContBytes(const u8 *s, size_t num) {
- /* continuer bytes must all be of the form 10xx xxxx */
- for (size_t i = 0; i < num; i++) {
- if ((s[i] & 0xc0) != UTF_CONT_BYTE_HEADER) {
- return false;
- }
- }
- return true;
-}
-
-static
-bool isAllowedCodepoint(u32 val) {
- if (val >= 0xd800 && val <= 0xdfff) {
- return false; // High and low surrogate halves
- }
- if (val > 0x10ffff) {
- return false; // As per limit in RFC 3629
- }
-
- return true;
-}
-
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "utf8_validate.h"
+
+#include "ue2common.h"
+#include "util/unicode_def.h"
+
+#include <cstring>
+
+namespace ue2 {
+
+static
+bool hasValidContBytes(const u8 *s, size_t num) {
+ /* continuer bytes must all be of the form 10xx xxxx */
+ for (size_t i = 0; i < num; i++) {
+ if ((s[i] & 0xc0) != UTF_CONT_BYTE_HEADER) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static
+bool isAllowedCodepoint(u32 val) {
+ if (val >= 0xd800 && val <= 0xdfff) {
+ return false; // High and low surrogate halves
+ }
+ if (val > 0x10ffff) {
+ return false; // As per limit in RFC 3629
+ }
+
+ return true;
+}
+
bool isValidUtf8(const char *expression, const size_t len) {
- if (!expression) {
- return true;
- }
-
- const u8 *s = (const u8 *)expression;
- u32 val;
-
- size_t i = 0;
- while (i < len) {
- DEBUG_PRINTF("byte %zu: 0x%02x\n", i, s[i]);
- // One octet.
- if (s[i] < 0x7f) {
- DEBUG_PRINTF("one octet\n");
- i++;
- continue;
- }
-
- // Two octets.
- if ((s[i] & 0xe0) == UTF_TWO_BYTE_HEADER) {
- DEBUG_PRINTF("two octets\n");
- if (i + 2 > len) {
- break;
- }
- if (!hasValidContBytes(&s[i] + 1, 1)) {
- break;
- }
- val = ((s[i] & 0x1f) << 6) | (s[i + 1] & UTF_CONT_BYTE_VALUE_MASK);
- DEBUG_PRINTF("val=0x%x\n", val);
- if (val < 1U << 7) {
- DEBUG_PRINTF("overlong encoding\n");
- break;
- }
- if (!isAllowedCodepoint(val)) {
- DEBUG_PRINTF("codepoint not allowed\n");
- break;
- }
- i += 2;
- continue;
- }
-
- // Three octets.
- if ((s[i] & 0xf0) == UTF_THREE_BYTE_HEADER) {
- DEBUG_PRINTF("three octets\n");
- if (i + 3 > len) {
- break;
- }
- if (!hasValidContBytes(&s[i] + 1, 2)) {
- break;
- }
- val = ((s[i] & 0xf) << 12) |
- ((s[i + 1] & UTF_CONT_BYTE_VALUE_MASK) << 6) |
- (s[i + 2] & UTF_CONT_BYTE_VALUE_MASK);
- if (val < 1U << 11) {
- DEBUG_PRINTF("overlong encoding\n");
- break;
- }
- if (!isAllowedCodepoint(val)) {
- DEBUG_PRINTF("codepoint not allowed\n");
- break;
- }
- i += 3;
- continue;
- }
-
- // Four octets.
- if ((s[i] & 0xf8) == UTF_FOUR_BYTE_HEADER) {
- DEBUG_PRINTF("four octets\n");
- if (i + 4 > len) {
- break;
- }
- if (!hasValidContBytes(&s[i] + 1, 3)) {
- break;
- }
- val = ((s[i] & 0xf) << 18) |
- ((s[i + 1] & UTF_CONT_BYTE_VALUE_MASK) << 12) |
- ((s[i + 2] & UTF_CONT_BYTE_VALUE_MASK) << 6) |
- (s[i + 3] & UTF_CONT_BYTE_VALUE_MASK);
- if (val < 1U << 16) {
- DEBUG_PRINTF("overlong encoding\n");
- break;
- }
- if (!isAllowedCodepoint(val)) {
- DEBUG_PRINTF("codepoint not allowed\n");
- break;
- }
- i += 4;
- continue;
- }
-
- // Something else?
- DEBUG_PRINTF("bad byte 0x%02x\n", s[i]);
- break;
- }
-
- DEBUG_PRINTF("i=%zu, len=%zu\n", i, len);
- return i == len;
-}
-
-} // namespace ue2
+ if (!expression) {
+ return true;
+ }
+
+ const u8 *s = (const u8 *)expression;
+ u32 val;
+
+ size_t i = 0;
+ while (i < len) {
+ DEBUG_PRINTF("byte %zu: 0x%02x\n", i, s[i]);
+ // One octet.
+ if (s[i] < 0x7f) {
+ DEBUG_PRINTF("one octet\n");
+ i++;
+ continue;
+ }
+
+ // Two octets.
+ if ((s[i] & 0xe0) == UTF_TWO_BYTE_HEADER) {
+ DEBUG_PRINTF("two octets\n");
+ if (i + 2 > len) {
+ break;
+ }
+ if (!hasValidContBytes(&s[i] + 1, 1)) {
+ break;
+ }
+ val = ((s[i] & 0x1f) << 6) | (s[i + 1] & UTF_CONT_BYTE_VALUE_MASK);
+ DEBUG_PRINTF("val=0x%x\n", val);
+ if (val < 1U << 7) {
+ DEBUG_PRINTF("overlong encoding\n");
+ break;
+ }
+ if (!isAllowedCodepoint(val)) {
+ DEBUG_PRINTF("codepoint not allowed\n");
+ break;
+ }
+ i += 2;
+ continue;
+ }
+
+ // Three octets.
+ if ((s[i] & 0xf0) == UTF_THREE_BYTE_HEADER) {
+ DEBUG_PRINTF("three octets\n");
+ if (i + 3 > len) {
+ break;
+ }
+ if (!hasValidContBytes(&s[i] + 1, 2)) {
+ break;
+ }
+ val = ((s[i] & 0xf) << 12) |
+ ((s[i + 1] & UTF_CONT_BYTE_VALUE_MASK) << 6) |
+ (s[i + 2] & UTF_CONT_BYTE_VALUE_MASK);
+ if (val < 1U << 11) {
+ DEBUG_PRINTF("overlong encoding\n");
+ break;
+ }
+ if (!isAllowedCodepoint(val)) {
+ DEBUG_PRINTF("codepoint not allowed\n");
+ break;
+ }
+ i += 3;
+ continue;
+ }
+
+ // Four octets.
+ if ((s[i] & 0xf8) == UTF_FOUR_BYTE_HEADER) {
+ DEBUG_PRINTF("four octets\n");
+ if (i + 4 > len) {
+ break;
+ }
+ if (!hasValidContBytes(&s[i] + 1, 3)) {
+ break;
+ }
+ val = ((s[i] & 0xf) << 18) |
+ ((s[i + 1] & UTF_CONT_BYTE_VALUE_MASK) << 12) |
+ ((s[i + 2] & UTF_CONT_BYTE_VALUE_MASK) << 6) |
+ (s[i + 3] & UTF_CONT_BYTE_VALUE_MASK);
+ if (val < 1U << 16) {
+ DEBUG_PRINTF("overlong encoding\n");
+ break;
+ }
+ if (!isAllowedCodepoint(val)) {
+ DEBUG_PRINTF("codepoint not allowed\n");
+ break;
+ }
+ i += 4;
+ continue;
+ }
+
+ // Something else?
+ DEBUG_PRINTF("bad byte 0x%02x\n", s[i]);
+ break;
+ }
+
+ DEBUG_PRINTF("i=%zu, len=%zu\n", i, len);
+ return i == len;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/parser/utf8_validate.h b/contrib/libs/hyperscan/src/parser/utf8_validate.h
index 938454c4a9..6389a0859f 100644
--- a/contrib/libs/hyperscan/src/parser/utf8_validate.h
+++ b/contrib/libs/hyperscan/src/parser/utf8_validate.h
@@ -1,41 +1,41 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef PARSER_UTF8_VALIDATE_H
-#define PARSER_UTF8_VALIDATE_H
-
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef PARSER_UTF8_VALIDATE_H
+#define PARSER_UTF8_VALIDATE_H
+
#include <cstddef> // size_t
-namespace ue2 {
-
-/** \brief Validate that the given expression is well-formed UTF-8. */
+namespace ue2 {
+
+/** \brief Validate that the given expression is well-formed UTF-8. */
bool isValidUtf8(const char *expression, const size_t len);
-
-} // namespace ue2
-
-#endif // PARSER_UTF8_VALIDATE_H
+
+} // namespace ue2
+
+#endif // PARSER_UTF8_VALIDATE_H
diff --git a/contrib/libs/hyperscan/src/rose/block.c b/contrib/libs/hyperscan/src/rose/block.c
index 7c8b43aed9..b3f424cb73 100644
--- a/contrib/libs/hyperscan/src/rose/block.c
+++ b/contrib/libs/hyperscan/src/rose/block.c
@@ -1,164 +1,164 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "catchup.h"
-#include "init.h"
-#include "match.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "catchup.h"
+#include "init.h"
+#include "match.h"
#include "program_runtime.h"
#include "rose.h"
#include "rose_common.h"
-#include "nfa/nfa_api.h"
-#include "nfa/nfa_internal.h"
-#include "nfa/nfa_rev_api.h"
-#include "nfa/mcclellan.h"
-#include "util/fatbit.h"
-
-static rose_inline
-void runAnchoredTableBlock(const struct RoseEngine *t, const void *atable,
- struct hs_scratch *scratch) {
- const u8 *buffer = scratch->core_info.buf;
- size_t length = scratch->core_info.len;
- size_t alen = MIN(length, t->anchoredDistance);
- const struct anchored_matcher_info *curr = atable;
-
- DEBUG_PRINTF("BEGIN ANCHORED (over %zu/%zu)\n", alen, length);
-
- do {
- const struct NFA *nfa
- = (const struct NFA *)((const char *)curr + sizeof(*curr));
-
- assert(t->anchoredDistance > curr->anchoredMinDistance);
- if (length >= curr->anchoredMinDistance) {
- size_t local_alen = alen - curr->anchoredMinDistance;
- const u8 *local_buffer = buffer + curr->anchoredMinDistance;
-
- DEBUG_PRINTF("--anchored nfa (+%u)\n", curr->anchoredMinDistance);
- assert(isMcClellanType(nfa->type));
- if (nfa->type == MCCLELLAN_NFA_8) {
- nfaExecMcClellan8_B(nfa, curr->anchoredMinDistance,
- local_buffer, local_alen,
+#include "nfa/nfa_api.h"
+#include "nfa/nfa_internal.h"
+#include "nfa/nfa_rev_api.h"
+#include "nfa/mcclellan.h"
+#include "util/fatbit.h"
+
+static rose_inline
+void runAnchoredTableBlock(const struct RoseEngine *t, const void *atable,
+ struct hs_scratch *scratch) {
+ const u8 *buffer = scratch->core_info.buf;
+ size_t length = scratch->core_info.len;
+ size_t alen = MIN(length, t->anchoredDistance);
+ const struct anchored_matcher_info *curr = atable;
+
+ DEBUG_PRINTF("BEGIN ANCHORED (over %zu/%zu)\n", alen, length);
+
+ do {
+ const struct NFA *nfa
+ = (const struct NFA *)((const char *)curr + sizeof(*curr));
+
+ assert(t->anchoredDistance > curr->anchoredMinDistance);
+ if (length >= curr->anchoredMinDistance) {
+ size_t local_alen = alen - curr->anchoredMinDistance;
+ const u8 *local_buffer = buffer + curr->anchoredMinDistance;
+
+ DEBUG_PRINTF("--anchored nfa (+%u)\n", curr->anchoredMinDistance);
+ assert(isMcClellanType(nfa->type));
+ if (nfa->type == MCCLELLAN_NFA_8) {
+ nfaExecMcClellan8_B(nfa, curr->anchoredMinDistance,
+ local_buffer, local_alen,
roseAnchoredCallback, scratch);
- } else {
- nfaExecMcClellan16_B(nfa, curr->anchoredMinDistance,
- local_buffer, local_alen,
+ } else {
+ nfaExecMcClellan16_B(nfa, curr->anchoredMinDistance,
+ local_buffer, local_alen,
roseAnchoredCallback, scratch);
- }
- }
-
- if (!curr->next_offset) {
- break;
- }
-
- curr = (const void *)((const char *)curr + curr->next_offset);
- } while (1);
-}
-
-static really_inline
+ }
+ }
+
+ if (!curr->next_offset) {
+ break;
+ }
+
+ curr = (const void *)((const char *)curr + curr->next_offset);
+ } while (1);
+}
+
+static really_inline
void init_state_for_block(const struct RoseEngine *t, char *state) {
- assert(t);
- assert(state);
-
+ assert(t);
+ assert(state);
+
DEBUG_PRINTF("init for Rose %p with %u state indices\n", t,
t->rolesWithStateCount);
-
- // Rose is guaranteed 8-aligned state
- assert(ISALIGNED_N(state, 8));
-
- init_state(t, state);
-}
-
-static really_inline
-void init_outfixes_for_block(const struct RoseEngine *t,
+
+ // Rose is guaranteed 8-aligned state
+ assert(ISALIGNED_N(state, 8));
+
+ init_state(t, state);
+}
+
+static really_inline
+void init_outfixes_for_block(const struct RoseEngine *t,
struct hs_scratch *scratch, char *state,
- char is_small_block) {
- /* active leaf array has been cleared by the init scatter */
-
- if (t->initMpvNfa != MO_INVALID_IDX) {
- assert(t->initMpvNfa == 0);
- const struct NFA *nfa = getNfaByQueue(t, 0);
- DEBUG_PRINTF("testing minwidth %u > len %zu\n", nfa->minWidth,
- scratch->core_info.len);
- size_t len = nfaRevAccelCheck(nfa, scratch->core_info.buf,
- scratch->core_info.len);
- if (len) {
- u8 *activeArray = getActiveLeafArray(t, state);
- const u32 activeArraySize = t->activeArrayCount;
- const u32 qCount = t->queueCount;
-
- mmbit_set(activeArray, activeArraySize, 0);
- fatbit_set(scratch->aqa, qCount, 0);
-
- struct mq *q = scratch->queues;
+ char is_small_block) {
+ /* active leaf array has been cleared by the init scatter */
+
+ if (t->initMpvNfa != MO_INVALID_IDX) {
+ assert(t->initMpvNfa == 0);
+ const struct NFA *nfa = getNfaByQueue(t, 0);
+ DEBUG_PRINTF("testing minwidth %u > len %zu\n", nfa->minWidth,
+ scratch->core_info.len);
+ size_t len = nfaRevAccelCheck(nfa, scratch->core_info.buf,
+ scratch->core_info.len);
+ if (len) {
+ u8 *activeArray = getActiveLeafArray(t, state);
+ const u32 activeArraySize = t->activeArrayCount;
+ const u32 qCount = t->queueCount;
+
+ mmbit_set(activeArray, activeArraySize, 0);
+ fatbit_set(scratch->aqa, qCount, 0);
+
+ struct mq *q = scratch->queues;
initQueue(q, 0, t, scratch);
- q->length = len; /* adjust for rev_accel */
- nfaQueueInitState(nfa, q);
- pushQueueAt(q, 0, MQE_START, 0);
- pushQueueAt(q, 1, MQE_TOP, 0);
- }
- }
-
- if (is_small_block && !t->hasOutfixesInSmallBlock) {
- DEBUG_PRINTF("all outfixes in small block table\n");
- return;
- }
-
- if (t->outfixBeginQueue != t->outfixEndQueue) {
- blockInitSufPQ(t, state, scratch, is_small_block);
- }
-}
-
-static really_inline
-void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch,
+ q->length = len; /* adjust for rev_accel */
+ nfaQueueInitState(nfa, q);
+ pushQueueAt(q, 0, MQE_START, 0);
+ pushQueueAt(q, 1, MQE_TOP, 0);
+ }
+ }
+
+ if (is_small_block && !t->hasOutfixesInSmallBlock) {
+ DEBUG_PRINTF("all outfixes in small block table\n");
+ return;
+ }
+
+ if (t->outfixBeginQueue != t->outfixEndQueue) {
+ blockInitSufPQ(t, state, scratch, is_small_block);
+ }
+}
+
+static really_inline
+void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch,
char *state, char is_small_block) {
- init_state_for_block(t, state);
-
- struct RoseContext *tctxt = &scratch->tctxt;
-
- tctxt->groups = t->initialGroups;
- tctxt->lit_offset_adjust = 1; // index after last byte
- tctxt->delayLastEndOffset = 0;
- tctxt->lastEndOffset = 0;
- tctxt->filledDelayedSlots = 0;
- tctxt->lastMatchOffset = 0;
+ init_state_for_block(t, state);
+
+ struct RoseContext *tctxt = &scratch->tctxt;
+
+ tctxt->groups = t->initialGroups;
+ tctxt->lit_offset_adjust = 1; // index after last byte
+ tctxt->delayLastEndOffset = 0;
+ tctxt->lastEndOffset = 0;
+ tctxt->filledDelayedSlots = 0;
+ tctxt->lastMatchOffset = 0;
tctxt->lastCombMatchOffset = 0;
- tctxt->minMatchOffset = 0;
- tctxt->minNonMpvMatchOffset = 0;
- tctxt->next_mpv_offset = 0;
-
- scratch->al_log_sum = 0;
-
- fatbit_clear(scratch->aqa);
-
- scratch->catchup_pq.qm_size = 0;
-
- init_outfixes_for_block(t, scratch, state, is_small_block);
-}
-
+ tctxt->minMatchOffset = 0;
+ tctxt->minNonMpvMatchOffset = 0;
+ tctxt->next_mpv_offset = 0;
+
+ scratch->al_log_sum = 0;
+
+ fatbit_clear(scratch->aqa);
+
+ scratch->catchup_pq.qm_size = 0;
+
+ init_outfixes_for_block(t, scratch, state, is_small_block);
+}
+
static rose_inline
void roseBlockEodExec(const struct RoseEngine *t, u64a offset,
struct hs_scratch *scratch) {
@@ -343,12 +343,12 @@ void runEagerPrefixesBlock(const struct RoseEngine *t,
}
void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
- assert(t);
- assert(scratch);
- assert(scratch->core_info.buf);
- assert(mmbit_sparse_iter_state_size(t->rolesWithStateCount)
- < MAX_SPARSE_ITER_STATES);
-
+ assert(t);
+ assert(scratch);
+ assert(scratch->core_info.buf);
+ assert(mmbit_sparse_iter_state_size(t->rolesWithStateCount)
+ < MAX_SPARSE_ITER_STATES);
+
// We should not have been called if we've already been told to terminate
// matching.
assert(!told_to_stop_matching(scratch));
@@ -364,59 +364,59 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF
|| scratch->core_info.len <= t->maxBiAnchoredWidth);
- const size_t length = scratch->core_info.len;
-
- // We have optimizations for small block scans: we run a single coalesced
- // HWLM scan instead of running the anchored and floating matchers. Some
- // outfixes are disabled as well (for SEP scans of single-byte literals,
- // which are also run in the HWLM scan).
- const char is_small_block =
- (length < ROSE_SMALL_BLOCK_LEN && t->sbmatcherOffset);
-
+ const size_t length = scratch->core_info.len;
+
+ // We have optimizations for small block scans: we run a single coalesced
+ // HWLM scan instead of running the anchored and floating matchers. Some
+ // outfixes are disabled as well (for SEP scans of single-byte literals,
+ // which are also run in the HWLM scan).
+ const char is_small_block =
+ (length < ROSE_SMALL_BLOCK_LEN && t->sbmatcherOffset);
+
char *state = scratch->core_info.state;
-
+
init_for_block(t, scratch, state, is_small_block);
-
- struct RoseContext *tctxt = &scratch->tctxt;
-
- if (is_small_block) {
- const void *sbtable = getSBLiteralMatcher(t);
- assert(sbtable);
-
- size_t sblen = MIN(length, t->smallBlockDistance);
-
- DEBUG_PRINTF("BEGIN SMALL BLOCK (over %zu/%zu)\n", sblen, length);
- DEBUG_PRINTF("-- %016llx\n", tctxt->groups);
- hwlmExec(sbtable, scratch->core_info.buf, sblen, 0, roseCallback,
+
+ struct RoseContext *tctxt = &scratch->tctxt;
+
+ if (is_small_block) {
+ const void *sbtable = getSBLiteralMatcher(t);
+ assert(sbtable);
+
+ size_t sblen = MIN(length, t->smallBlockDistance);
+
+ DEBUG_PRINTF("BEGIN SMALL BLOCK (over %zu/%zu)\n", sblen, length);
+ DEBUG_PRINTF("-- %016llx\n", tctxt->groups);
+ hwlmExec(sbtable, scratch->core_info.buf, sblen, 0, roseCallback,
scratch, tctxt->groups);
} else {
runEagerPrefixesBlock(t, scratch);
-
+
if (roseBlockAnchored(t, scratch)) {
return;
- }
+ }
if (roseBlockFloating(t, scratch)) {
return;
- }
+ }
}
-
+
if (cleanUpDelayed(t, scratch, length, 0) == HWLM_TERMINATE_MATCHING) {
return;
- }
-
+ }
+
assert(!can_stop_matching(scratch));
-
+
roseCatchUpTo(t, scratch, length);
-
+
if (!t->requiresEodCheck || !t->eodProgramOffset) {
DEBUG_PRINTF("no eod check required\n");
return;
- }
-
+ }
+
if (can_stop_matching(scratch)) {
DEBUG_PRINTF("bailing, already halted\n");
- return;
- }
-
+ return;
+ }
+
roseBlockEodExec(t, length, scratch);
-}
+}
diff --git a/contrib/libs/hyperscan/src/rose/catchup.c b/contrib/libs/hyperscan/src/rose/catchup.c
index 14e0094dff..7a6648da98 100644
--- a/contrib/libs/hyperscan/src/rose/catchup.c
+++ b/contrib/libs/hyperscan/src/rose/catchup.c
@@ -1,76 +1,76 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
* \brief Rose runtime: code for catching up output-exposed engines.
*/
-#include "catchup.h"
-#include "match.h"
+#include "catchup.h"
+#include "match.h"
#include "program_runtime.h"
-#include "rose.h"
-#include "nfa/nfa_rev_api.h"
-#include "nfa/mpv.h"
-#include "som/som_runtime.h"
-#include "util/fatbit.h"
+#include "rose.h"
+#include "nfa/nfa_rev_api.h"
+#include "nfa/mpv.h"
+#include "som/som_runtime.h"
+#include "util/fatbit.h"
#include "report.h"
-
-typedef struct queue_match PQ_T;
-#define PQ_COMP(pqc_items, a, b) ((pqc_items)[a].loc < (pqc_items)[b].loc)
-#define PQ_COMP_B(pqc_items, a, b_fixed) ((pqc_items)[a].loc < (b_fixed).loc)
-
-#include "util/pqueue.h"
-
-static really_inline
+
+typedef struct queue_match PQ_T;
+#define PQ_COMP(pqc_items, a, b) ((pqc_items)[a].loc < (pqc_items)[b].loc)
+#define PQ_COMP_B(pqc_items, a, b_fixed) ((pqc_items)[a].loc < (b_fixed).loc)
+
+#include "util/pqueue.h"
+
+static really_inline
int roseNfaRunProgram(const struct RoseEngine *rose, struct hs_scratch *scratch,
u64a som, u64a offset, ReportID id, const char from_mpv) {
const u32 program = id;
u8 flags = ROSE_PROG_FLAG_IN_CATCHUP;
if (from_mpv) {
flags |= ROSE_PROG_FLAG_FROM_MPV;
- }
-
+ }
+
roseRunProgram(rose, scratch, program, som, offset, flags);
return can_stop_matching(scratch) ? MO_HALT_MATCHING : MO_CONTINUE_MATCHING;
-}
-
+}
+
static rose_inline
char roseSuffixInfoIsExhausted(const struct RoseEngine *rose,
const struct NfaInfo *info,
const char *exhausted) {
if (!info->ekeyListOffset) {
- return 0;
- }
-
+ return 0;
+ }
+
DEBUG_PRINTF("check exhaustion -> start at %u\n", info->ekeyListOffset);
-
+
/* INVALID_EKEY terminated list */
const u32 *ekeys = getByOffset(rose, info->ekeyListOffset);
while (*ekeys != INVALID_EKEY) {
@@ -80,367 +80,367 @@ char roseSuffixInfoIsExhausted(const struct RoseEngine *rose,
return 0;
}
++ekeys;
- }
-
+ }
+
DEBUG_PRINTF("all ekeys exhausted -> dead\n");
return 1;
-}
-
+}
+
static really_inline
char roseSuffixIsExhausted(const struct RoseEngine *rose, u32 qi,
const char *exhausted) {
DEBUG_PRINTF("check queue %u\n", qi);
const struct NfaInfo *info = getNfaInfoByQueue(rose, qi);
return roseSuffixInfoIsExhausted(rose, info, exhausted);
-}
-
-static really_inline
+}
+
+static really_inline
void deactivateQueue(const struct RoseEngine *t, u8 *aa, u32 qi,
struct hs_scratch *scratch) {
u32 aaCount = t->activeArrayCount;
u32 qCount = t->queueCount;
-
- /* this is sailing close to the wind with regards to invalidating an
- * iteration. We are saved by the fact that unsetting does not clear the
- * summary bits -> the block under the gun remains valid
- */
- DEBUG_PRINTF("killing off zombie queue %u\n", qi);
- mmbit_unset(aa, aaCount, qi);
- fatbit_unset(scratch->aqa, qCount, qi);
-}
-
-static really_inline
-void ensureQueueActive(const struct RoseEngine *t, u32 qi, u32 qCount,
- struct mq *q, struct hs_scratch *scratch) {
- if (!fatbit_set(scratch->aqa, qCount, qi)) {
- DEBUG_PRINTF("initing %u\n", qi);
+
+ /* this is sailing close to the wind with regards to invalidating an
+ * iteration. We are saved by the fact that unsetting does not clear the
+ * summary bits -> the block under the gun remains valid
+ */
+ DEBUG_PRINTF("killing off zombie queue %u\n", qi);
+ mmbit_unset(aa, aaCount, qi);
+ fatbit_unset(scratch->aqa, qCount, qi);
+}
+
+static really_inline
+void ensureQueueActive(const struct RoseEngine *t, u32 qi, u32 qCount,
+ struct mq *q, struct hs_scratch *scratch) {
+ if (!fatbit_set(scratch->aqa, qCount, qi)) {
+ DEBUG_PRINTF("initing %u\n", qi);
initQueue(q, qi, t, scratch);
- loadStreamState(q->nfa, q, 0);
- pushQueueAt(q, 0, MQE_START, 0);
- }
-}
-
-static really_inline
-void pq_replace_top_with(struct catchup_pq *pq,
- UNUSED struct hs_scratch *scratch, u32 queue,
- s64a loc) {
- DEBUG_PRINTF("inserting q%u in pq at %lld\n", queue, loc);
- struct queue_match temp = {
- .queue = queue,
- .loc = (size_t)loc
- };
-
- assert(loc > 0);
- assert(pq->qm_size);
- assert(loc <= (s64a)scratch->core_info.len);
- pq_replace_top(pq->qm, pq->qm_size, temp);
-}
-
-static really_inline
-void pq_insert_with(struct catchup_pq *pq,
- UNUSED struct hs_scratch *scratch, u32 queue, s64a loc) {
- DEBUG_PRINTF("inserting q%u in pq at %lld\n", queue, loc);
- struct queue_match temp = {
- .queue = queue,
- .loc = (size_t)loc
- };
-
- assert(loc > 0);
- assert(loc <= (s64a)scratch->core_info.len);
- pq_insert(pq->qm, pq->qm_size, temp);
- ++pq->qm_size;
-}
-
-static really_inline
-void pq_pop_nice(struct catchup_pq *pq) {
- pq_pop(pq->qm, pq->qm_size);
- pq->qm_size--;
-}
-
-static really_inline
-s64a pq_top_loc(struct catchup_pq *pq) {
- assert(pq->qm_size);
- return (s64a)pq_top(pq->qm)->loc;
-}
-
-/* requires that we are the top item on the pq */
-static really_inline
+ loadStreamState(q->nfa, q, 0);
+ pushQueueAt(q, 0, MQE_START, 0);
+ }
+}
+
+static really_inline
+void pq_replace_top_with(struct catchup_pq *pq,
+ UNUSED struct hs_scratch *scratch, u32 queue,
+ s64a loc) {
+ DEBUG_PRINTF("inserting q%u in pq at %lld\n", queue, loc);
+ struct queue_match temp = {
+ .queue = queue,
+ .loc = (size_t)loc
+ };
+
+ assert(loc > 0);
+ assert(pq->qm_size);
+ assert(loc <= (s64a)scratch->core_info.len);
+ pq_replace_top(pq->qm, pq->qm_size, temp);
+}
+
+static really_inline
+void pq_insert_with(struct catchup_pq *pq,
+ UNUSED struct hs_scratch *scratch, u32 queue, s64a loc) {
+ DEBUG_PRINTF("inserting q%u in pq at %lld\n", queue, loc);
+ struct queue_match temp = {
+ .queue = queue,
+ .loc = (size_t)loc
+ };
+
+ assert(loc > 0);
+ assert(loc <= (s64a)scratch->core_info.len);
+ pq_insert(pq->qm, pq->qm_size, temp);
+ ++pq->qm_size;
+}
+
+static really_inline
+void pq_pop_nice(struct catchup_pq *pq) {
+ pq_pop(pq->qm, pq->qm_size);
+ pq->qm_size--;
+}
+
+static really_inline
+s64a pq_top_loc(struct catchup_pq *pq) {
+ assert(pq->qm_size);
+ return (s64a)pq_top(pq->qm)->loc;
+}
+
+/* requires that we are the top item on the pq */
+static really_inline
hwlmcb_rv_t runExistingNfaToNextMatch(const struct RoseEngine *t, u32 qi,
struct mq *q, s64a loc,
- struct hs_scratch *scratch, u8 *aa,
- char report_curr) {
- assert(pq_top(scratch->catchup_pq.qm)->queue == qi);
- assert(scratch->catchup_pq.qm_size);
- assert(!q->report_current);
- if (report_curr) {
- DEBUG_PRINTF("need to report matches\n");
- q->report_current = 1;
- }
-
- DEBUG_PRINTF("running queue from %u:%lld to %lld\n", q->cur, q_cur_loc(q),
- loc);
-
- assert(q_cur_loc(q) <= loc);
-
- char alive = nfaQueueExecToMatch(q->nfa, q, loc);
-
- /* exit via gift shop */
- if (alive == MO_MATCHES_PENDING) {
- /* we have pending matches */
- assert(q_cur_loc(q) + scratch->core_info.buf_offset
- >= scratch->tctxt.minMatchOffset);
- pq_replace_top_with(&scratch->catchup_pq, scratch, qi, q_cur_loc(q));
- return HWLM_CONTINUE_MATCHING;
- } else if (!alive) {
- if (report_curr && can_stop_matching(scratch)) {
- DEBUG_PRINTF("bailing\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
+ struct hs_scratch *scratch, u8 *aa,
+ char report_curr) {
+ assert(pq_top(scratch->catchup_pq.qm)->queue == qi);
+ assert(scratch->catchup_pq.qm_size);
+ assert(!q->report_current);
+ if (report_curr) {
+ DEBUG_PRINTF("need to report matches\n");
+ q->report_current = 1;
+ }
+
+ DEBUG_PRINTF("running queue from %u:%lld to %lld\n", q->cur, q_cur_loc(q),
+ loc);
+
+ assert(q_cur_loc(q) <= loc);
+
+ char alive = nfaQueueExecToMatch(q->nfa, q, loc);
+
+ /* exit via gift shop */
+ if (alive == MO_MATCHES_PENDING) {
+ /* we have pending matches */
+ assert(q_cur_loc(q) + scratch->core_info.buf_offset
+ >= scratch->tctxt.minMatchOffset);
+ pq_replace_top_with(&scratch->catchup_pq, scratch, qi, q_cur_loc(q));
+ return HWLM_CONTINUE_MATCHING;
+ } else if (!alive) {
+ if (report_curr && can_stop_matching(scratch)) {
+ DEBUG_PRINTF("bailing\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
deactivateQueue(t, aa, qi, scratch);
- } else if (q->cur == q->end) {
- DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, loc);
- } else {
- DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi);
- u32 i = 0;
- while (q->cur < q->end) {
- q->items[i] = q->items[q->cur++];
- DEBUG_PRINTF("q[%u] = %u:%lld\n", i, q->items[i].type,
- q->items[i].location);
- assert(q->items[i].type != MQE_END);
- i++;
- }
- q->cur = 0;
- q->end = i;
- }
-
- pq_pop_nice(&scratch->catchup_pq);
-
- return HWLM_CONTINUE_MATCHING;
-}
-
-static really_inline
+ } else if (q->cur == q->end) {
+ DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, loc);
+ } else {
+ DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi);
+ u32 i = 0;
+ while (q->cur < q->end) {
+ q->items[i] = q->items[q->cur++];
+ DEBUG_PRINTF("q[%u] = %u:%lld\n", i, q->items[i].type,
+ q->items[i].location);
+ assert(q->items[i].type != MQE_END);
+ i++;
+ }
+ q->cur = 0;
+ q->end = i;
+ }
+
+ pq_pop_nice(&scratch->catchup_pq);
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
+static really_inline
hwlmcb_rv_t runNewNfaToNextMatch(const struct RoseEngine *t, u32 qi,
struct mq *q, s64a loc,
- struct hs_scratch *scratch, u8 *aa,
- s64a report_ok_loc) {
- assert(!q->report_current);
- DEBUG_PRINTF("running queue from %u:%lld to %lld\n", q->cur, q_cur_loc(q),
- loc);
- DEBUG_PRINTF("min match offset %llu\n", scratch->tctxt.minMatchOffset);
-
- char alive = 1;
-
-restart:
- alive = nfaQueueExecToMatch(q->nfa, q, loc);
-
- if (alive == MO_MATCHES_PENDING) {
- DEBUG_PRINTF("we have pending matches at %lld\n", q_cur_loc(q));
- s64a qcl = q_cur_loc(q);
-
- if (qcl == report_ok_loc) {
- assert(q->cur != q->end); /* the queue shouldn't be empty if there
- * are pending matches. */
- q->report_current = 1;
- DEBUG_PRINTF("restarting...\n");
- goto restart;
- }
- assert(qcl + scratch->core_info.buf_offset
- >= scratch->tctxt.minMatchOffset);
- pq_insert_with(&scratch->catchup_pq, scratch, qi, qcl);
- } else if (!alive) {
- if (can_stop_matching(scratch)) {
- DEBUG_PRINTF("bailing\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
+ struct hs_scratch *scratch, u8 *aa,
+ s64a report_ok_loc) {
+ assert(!q->report_current);
+ DEBUG_PRINTF("running queue from %u:%lld to %lld\n", q->cur, q_cur_loc(q),
+ loc);
+ DEBUG_PRINTF("min match offset %llu\n", scratch->tctxt.minMatchOffset);
+
+ char alive = 1;
+
+restart:
+ alive = nfaQueueExecToMatch(q->nfa, q, loc);
+
+ if (alive == MO_MATCHES_PENDING) {
+ DEBUG_PRINTF("we have pending matches at %lld\n", q_cur_loc(q));
+ s64a qcl = q_cur_loc(q);
+
+ if (qcl == report_ok_loc) {
+ assert(q->cur != q->end); /* the queue shouldn't be empty if there
+ * are pending matches. */
+ q->report_current = 1;
+ DEBUG_PRINTF("restarting...\n");
+ goto restart;
+ }
+ assert(qcl + scratch->core_info.buf_offset
+ >= scratch->tctxt.minMatchOffset);
+ pq_insert_with(&scratch->catchup_pq, scratch, qi, qcl);
+ } else if (!alive) {
+ if (can_stop_matching(scratch)) {
+ DEBUG_PRINTF("bailing\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
deactivateQueue(t, aa, qi, scratch);
- } else if (q->cur == q->end) {
- DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, loc);
- } else {
- DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi);
- u32 i = 0;
- while (q->cur < q->end) {
- q->items[i] = q->items[q->cur++];
- DEBUG_PRINTF("q[%u] = %u:%lld\n", i, q->items[i].type,
- q->items[i].location);
- assert(q->items[i].type != MQE_END);
- i++;
- }
- q->cur = 0;
- q->end = i;
- }
-
- return HWLM_CONTINUE_MATCHING;
-}
-
-/* for use by mpv (chained) only */
+ } else if (q->cur == q->end) {
+ DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, loc);
+ } else {
+ DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi);
+ u32 i = 0;
+ while (q->cur < q->end) {
+ q->items[i] = q->items[q->cur++];
+ DEBUG_PRINTF("q[%u] = %u:%lld\n", i, q->items[i].type,
+ q->items[i].location);
+ assert(q->items[i].type != MQE_END);
+ i++;
+ }
+ q->cur = 0;
+ q->end = i;
+ }
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
+/* for use by mpv (chained) only */
static
int roseNfaFinalBlastAdaptor(u64a start, u64a end, ReportID id, void *context) {
struct hs_scratch *scratch = context;
assert(scratch && scratch->magic == SCRATCH_MAGIC);
const struct RoseEngine *t = scratch->core_info.rose;
-
+
DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end);
-
+
int cb_rv = roseNfaRunProgram(t, scratch, start, end, id, 1);
- if (cb_rv == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
- return MO_CONTINUE_MATCHING;
- } else {
- assert(cb_rv == MO_CONTINUE_MATCHING);
+ if (cb_rv == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
+ return MO_CONTINUE_MATCHING;
+ } else {
+ assert(cb_rv == MO_CONTINUE_MATCHING);
return !roseSuffixIsExhausted(t, 0,
scratch->core_info.exhaustionVector);
- }
-}
-
-static really_inline
-void ensureEnd(struct mq *q, UNUSED u32 qi, s64a final_loc) {
- DEBUG_PRINTF("ensure MQE_END %lld for queue %u\n", final_loc, qi);
- if (final_loc >= q_last_loc(q)) {
- /* TODO: ensure situation does not arise */
- assert(q_last_type(q) != MQE_END);
- pushQueueNoMerge(q, MQE_END, final_loc);
- }
-}
-
-static really_inline
-hwlmcb_rv_t add_to_queue(const struct RoseEngine *t, struct mq *queues,
- u32 qCount, u8 *aa, struct hs_scratch *scratch,
- s64a loc, u32 qi, s64a report_ok_loc) {
- struct mq *q = queues + qi;
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
-
- if (roseSuffixInfoIsExhausted(t, info,
- scratch->core_info.exhaustionVector)) {
+ }
+}
+
+static really_inline
+void ensureEnd(struct mq *q, UNUSED u32 qi, s64a final_loc) {
+ DEBUG_PRINTF("ensure MQE_END %lld for queue %u\n", final_loc, qi);
+ if (final_loc >= q_last_loc(q)) {
+ /* TODO: ensure situation does not arise */
+ assert(q_last_type(q) != MQE_END);
+ pushQueueNoMerge(q, MQE_END, final_loc);
+ }
+}
+
+static really_inline
+hwlmcb_rv_t add_to_queue(const struct RoseEngine *t, struct mq *queues,
+ u32 qCount, u8 *aa, struct hs_scratch *scratch,
+ s64a loc, u32 qi, s64a report_ok_loc) {
+ struct mq *q = queues + qi;
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+
+ if (roseSuffixInfoIsExhausted(t, info,
+ scratch->core_info.exhaustionVector)) {
deactivateQueue(t, aa, qi, scratch);
- return HWLM_CONTINUE_MATCHING;
- }
-
- ensureQueueActive(t, qi, qCount, q, scratch);
-
- if (unlikely(loc < q_cur_loc(q))) {
- DEBUG_PRINTF("err loc %lld < location %lld\n", loc, q_cur_loc(q));
- return HWLM_CONTINUE_MATCHING;
- }
-
- ensureEnd(q, qi, loc);
-
+ return HWLM_CONTINUE_MATCHING;
+ }
+
+ ensureQueueActive(t, qi, qCount, q, scratch);
+
+ if (unlikely(loc < q_cur_loc(q))) {
+ DEBUG_PRINTF("err loc %lld < location %lld\n", loc, q_cur_loc(q));
+ return HWLM_CONTINUE_MATCHING;
+ }
+
+ ensureEnd(q, qi, loc);
+
return runNewNfaToNextMatch(t, qi, q, loc, scratch, aa, report_ok_loc);
-}
-
-static really_inline
-s64a findSecondPlace(struct catchup_pq *pq, s64a loc_limit) {
- assert(pq->qm_size); /* we are still on the pq and we are first place */
-
- /* we know (*cough* encapsulation) that second place will either be in
- * pq->qm[1] or pq->qm[2] (we are pq->qm[0]) */
- switch (pq->qm_size) {
- case 0:
- case 1:
- return (s64a)loc_limit;
- case 2:
- return MIN((s64a)pq->qm[1].loc, loc_limit);
- default:;
- size_t best = MIN(pq->qm[1].loc, pq->qm[2].loc);
- return MIN((s64a)best, loc_limit);
- }
-}
-
+}
+
+static really_inline
+s64a findSecondPlace(struct catchup_pq *pq, s64a loc_limit) {
+ assert(pq->qm_size); /* we are still on the pq and we are first place */
+
+ /* we know (*cough* encapsulation) that second place will either be in
+ * pq->qm[1] or pq->qm[2] (we are pq->qm[0]) */
+ switch (pq->qm_size) {
+ case 0:
+ case 1:
+ return (s64a)loc_limit;
+ case 2:
+ return MIN((s64a)pq->qm[1].loc, loc_limit);
+ default:;
+ size_t best = MIN(pq->qm[1].loc, pq->qm[2].loc);
+ return MIN((s64a)best, loc_limit);
+ }
+}
+
hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc,
- struct hs_scratch *scratch) {
+ struct hs_scratch *scratch) {
char *state = scratch->core_info.state;
- struct mq *queues = scratch->queues;
- u8 *aa = getActiveLeafArray(t, state);
- UNUSED u32 aaCount = t->activeArrayCount;
- u32 qCount = t->queueCount;
-
- /* find first match of each pending nfa */
- DEBUG_PRINTF("aa=%p, aaCount=%u\n", aa, aaCount);
-
- assert(t->outfixBeginQueue == 1);
-
- u32 qi = 0;
- assert(mmbit_isset(aa, aaCount, 0)); /* caller should have already bailed */
-
- DEBUG_PRINTF("catching up qi=%u to loc %lld\n", qi, loc);
-
- struct mq *q = queues + qi;
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
- u64a mpv_exec_end = scratch->core_info.buf_offset + loc;
- u64a next_pos_match_loc = 0;
-
- if (roseSuffixInfoIsExhausted(t, info,
- scratch->core_info.exhaustionVector)) {
+ struct mq *queues = scratch->queues;
+ u8 *aa = getActiveLeafArray(t, state);
+ UNUSED u32 aaCount = t->activeArrayCount;
+ u32 qCount = t->queueCount;
+
+ /* find first match of each pending nfa */
+ DEBUG_PRINTF("aa=%p, aaCount=%u\n", aa, aaCount);
+
+ assert(t->outfixBeginQueue == 1);
+
+ u32 qi = 0;
+ assert(mmbit_isset(aa, aaCount, 0)); /* caller should have already bailed */
+
+ DEBUG_PRINTF("catching up qi=%u to loc %lld\n", qi, loc);
+
+ struct mq *q = queues + qi;
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+ u64a mpv_exec_end = scratch->core_info.buf_offset + loc;
+ u64a next_pos_match_loc = 0;
+
+ if (roseSuffixInfoIsExhausted(t, info,
+ scratch->core_info.exhaustionVector)) {
deactivateQueue(t, aa, qi, scratch);
- goto done;
- }
-
- ensureQueueActive(t, qi, qCount, q, scratch);
-
- if (unlikely(loc < q_cur_loc(q))) {
- DEBUG_PRINTF("err loc %lld < location %lld\n", loc, q_cur_loc(q));
- goto done;
- }
-
- ensureEnd(q, qi, loc);
-
- assert(!q->report_current);
-
+ goto done;
+ }
+
+ ensureQueueActive(t, qi, qCount, q, scratch);
+
+ if (unlikely(loc < q_cur_loc(q))) {
+ DEBUG_PRINTF("err loc %lld < location %lld\n", loc, q_cur_loc(q));
+ goto done;
+ }
+
+ ensureEnd(q, qi, loc);
+
+ assert(!q->report_current);
+
q->cb = roseNfaFinalBlastAdaptor;
-
- DEBUG_PRINTF("queue %u blasting, %u/%u [%lld/%lld]\n",
- qi, q->cur, q->end, q->items[q->cur].location, loc);
-
- scratch->tctxt.mpv_inactive = 0;
-
- /* we know it is going to be an mpv, skip the indirection */
+
+ DEBUG_PRINTF("queue %u blasting, %u/%u [%lld/%lld]\n",
+ qi, q->cur, q->end, q->items[q->cur].location, loc);
+
+ scratch->tctxt.mpv_inactive = 0;
+
+ /* we know it is going to be an mpv, skip the indirection */
next_pos_match_loc = nfaExecMpv_QueueExecRaw(q->nfa, q, loc);
- assert(!q->report_current);
-
- if (!next_pos_match_loc) { /* 0 means dead */
- DEBUG_PRINTF("mpv is pining for the fjords\n");
- if (can_stop_matching(scratch)) {
+ assert(!q->report_current);
+
+ if (!next_pos_match_loc) { /* 0 means dead */
+ DEBUG_PRINTF("mpv is pining for the fjords\n");
+ if (can_stop_matching(scratch)) {
deactivateQueue(t, aa, qi, scratch);
- return HWLM_TERMINATE_MATCHING;
- }
-
- next_pos_match_loc = scratch->core_info.len;
- scratch->tctxt.mpv_inactive = 1;
- }
-
- if (q->cur == q->end) {
- DEBUG_PRINTF("queue %u finished, nfa lives [%lld]\n", qi, loc);
- q->cur = 0;
- q->end = 0;
- pushQueueAt(q, 0, MQE_START, loc);
- } else {
- DEBUG_PRINTF("queue %u not finished, nfa lives [%lld]\n", qi, loc);
- }
-
-done:
+ return HWLM_TERMINATE_MATCHING;
+ }
+
+ next_pos_match_loc = scratch->core_info.len;
+ scratch->tctxt.mpv_inactive = 1;
+ }
+
+ if (q->cur == q->end) {
+ DEBUG_PRINTF("queue %u finished, nfa lives [%lld]\n", qi, loc);
+ q->cur = 0;
+ q->end = 0;
+ pushQueueAt(q, 0, MQE_START, loc);
+ } else {
+ DEBUG_PRINTF("queue %u not finished, nfa lives [%lld]\n", qi, loc);
+ }
+
+done:
if (t->flushCombProgramOffset) {
if (roseRunFlushCombProgram(t, scratch, mpv_exec_end)
== HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
}
- updateMinMatchOffsetFromMpv(&scratch->tctxt, mpv_exec_end);
- scratch->tctxt.next_mpv_offset
- = MAX(next_pos_match_loc + scratch->core_info.buf_offset,
- mpv_exec_end + 1);
-
- DEBUG_PRINTF("next match loc %lld (off %llu)\n", next_pos_match_loc,
- scratch->tctxt.next_mpv_offset);
- return can_stop_matching(scratch) ? HWLM_TERMINATE_MATCHING
- : HWLM_CONTINUE_MATCHING;
-}
-
+ updateMinMatchOffsetFromMpv(&scratch->tctxt, mpv_exec_end);
+ scratch->tctxt.next_mpv_offset
+ = MAX(next_pos_match_loc + scratch->core_info.buf_offset,
+ mpv_exec_end + 1);
+
+ DEBUG_PRINTF("next match loc %lld (off %llu)\n", next_pos_match_loc,
+ scratch->tctxt.next_mpv_offset);
+ return can_stop_matching(scratch) ? HWLM_TERMINATE_MATCHING
+ : HWLM_CONTINUE_MATCHING;
+}
+
static really_inline
char in_mpv(const struct RoseEngine *rose, const struct hs_scratch *scratch) {
const struct RoseContext *tctxt = &scratch->tctxt;
@@ -448,453 +448,453 @@ char in_mpv(const struct RoseEngine *rose, const struct hs_scratch *scratch) {
if (tctxt->curr_qi < rose->outfixBeginQueue) {
assert(getNfaByQueue(rose, tctxt->curr_qi)->type == MPV_NFA);
return 1;
- }
+ }
return 0;
-}
-
+}
+
static
int roseNfaBlastAdaptor(u64a start, u64a end, ReportID id, void *context) {
struct hs_scratch *scratch = context;
assert(scratch && scratch->magic == SCRATCH_MAGIC);
const struct RoseEngine *t = scratch->core_info.rose;
-
+
DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end);
-
+
const char from_mpv = in_mpv(t, scratch);
int cb_rv = roseNfaRunProgram(t, scratch, start, end, id, from_mpv);
- if (cb_rv == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
- return MO_CONTINUE_MATCHING;
- } else {
- assert(cb_rv == MO_CONTINUE_MATCHING);
+ if (cb_rv == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
+ return MO_CONTINUE_MATCHING;
+ } else {
+ assert(cb_rv == MO_CONTINUE_MATCHING);
return !roseSuffixIsExhausted(t, scratch->tctxt.curr_qi,
scratch->core_info.exhaustionVector);
- }
-}
-
+ }
+}
+
int roseNfaAdaptor(u64a start, u64a end, ReportID id, void *context) {
struct hs_scratch *scratch = context;
assert(scratch && scratch->magic == SCRATCH_MAGIC);
-
+
DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end);
-
- /* must be a external report as haig cannot directly participate in chain */
+
+ /* must be a external report as haig cannot directly participate in chain */
return roseNfaRunProgram(scratch->core_info.rose, scratch, start, end, id,
0);
-}
-
-static really_inline
+}
+
+static really_inline
char blast_queue(struct hs_scratch *scratch, struct mq *q, u32 qi, s64a to_loc,
char report_current) {
scratch->tctxt.curr_qi = qi;
q->cb = roseNfaBlastAdaptor;
- q->report_current = report_current;
- DEBUG_PRINTF("queue %u blasting, %u/%u [%lld/%lld]\n", qi, q->cur, q->end,
- q_cur_loc(q), to_loc);
- char alive = nfaQueueExec(q->nfa, q, to_loc);
+ q->report_current = report_current;
+ DEBUG_PRINTF("queue %u blasting, %u/%u [%lld/%lld]\n", qi, q->cur, q->end,
+ q_cur_loc(q), to_loc);
+ char alive = nfaQueueExec(q->nfa, q, to_loc);
q->cb = roseNfaAdaptor;
- assert(!q->report_current);
-
- return alive;
-}
-
-static really_inline
-hwlmcb_rv_t buildSufPQ_final(const struct RoseEngine *t, s64a report_ok_loc,
- s64a second_place_loc, s64a final_loc,
- struct hs_scratch *scratch, u8 *aa, u32 a_qi) {
- struct mq *q = scratch->queues + a_qi;
- const struct NfaInfo *info = getNfaInfoByQueue(t, a_qi);
- DEBUG_PRINTF("blasting qi=%u to %lld [final %lld]\n", a_qi, second_place_loc,
- final_loc);
-
- if (roseSuffixInfoIsExhausted(t, info,
- scratch->core_info.exhaustionVector)) {
+ assert(!q->report_current);
+
+ return alive;
+}
+
+static really_inline
+hwlmcb_rv_t buildSufPQ_final(const struct RoseEngine *t, s64a report_ok_loc,
+ s64a second_place_loc, s64a final_loc,
+ struct hs_scratch *scratch, u8 *aa, u32 a_qi) {
+ struct mq *q = scratch->queues + a_qi;
+ const struct NfaInfo *info = getNfaInfoByQueue(t, a_qi);
+ DEBUG_PRINTF("blasting qi=%u to %lld [final %lld]\n", a_qi, second_place_loc,
+ final_loc);
+
+ if (roseSuffixInfoIsExhausted(t, info,
+ scratch->core_info.exhaustionVector)) {
deactivateQueue(t, aa, a_qi, scratch);
- return HWLM_CONTINUE_MATCHING;
- }
-
- ensureQueueActive(t, a_qi, t->queueCount, q, scratch);
-
- if (unlikely(final_loc < q_cur_loc(q))) {
- DEBUG_PRINTF("err loc %lld < location %lld\n", final_loc, q_cur_loc(q));
- return HWLM_CONTINUE_MATCHING;
- }
-
- ensureEnd(q, a_qi, final_loc);
-
+ return HWLM_CONTINUE_MATCHING;
+ }
+
+ ensureQueueActive(t, a_qi, t->queueCount, q, scratch);
+
+ if (unlikely(final_loc < q_cur_loc(q))) {
+ DEBUG_PRINTF("err loc %lld < location %lld\n", final_loc, q_cur_loc(q));
+ return HWLM_CONTINUE_MATCHING;
+ }
+
+ ensureEnd(q, a_qi, final_loc);
+
char alive = blast_queue(scratch, q, a_qi, second_place_loc, 0);
-
+
/* We have three possible outcomes:
- * (1) the nfa died
- * (2) we completed the queue (implies that second_place_loc == final_loc)
- * (3) the queue ran to second_place_loc and stopped. In this case we need
- * to find the next match location.
- */
-
- if (!alive) {
- if (can_stop_matching(scratch)) {
- DEBUG_PRINTF("roseCatchUpNfas done as bailing\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
+ * (1) the nfa died
+ * (2) we completed the queue (implies that second_place_loc == final_loc)
+ * (3) the queue ran to second_place_loc and stopped. In this case we need
+ * to find the next match location.
+ */
+
+ if (!alive) {
+ if (can_stop_matching(scratch)) {
+ DEBUG_PRINTF("roseCatchUpNfas done as bailing\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
deactivateQueue(t, aa, a_qi, scratch);
- } else if (q->cur == q->end) {
- DEBUG_PRINTF("queue %u finished, nfa lives [%lld]\n", a_qi, final_loc);
-
- assert(second_place_loc == final_loc);
-
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, final_loc);
- } else {
- DEBUG_PRINTF("queue %u not finished, %u/%u [%lld/%lld]\n", a_qi, q->cur,
- q->end, q_cur_loc(q), final_loc);
- DEBUG_PRINTF("finding next match location\n");
-
- assert(second_place_loc < final_loc);
- assert(q_cur_loc(q) >= second_place_loc);
-
+ } else if (q->cur == q->end) {
+ DEBUG_PRINTF("queue %u finished, nfa lives [%lld]\n", a_qi, final_loc);
+
+ assert(second_place_loc == final_loc);
+
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, final_loc);
+ } else {
+ DEBUG_PRINTF("queue %u not finished, %u/%u [%lld/%lld]\n", a_qi, q->cur,
+ q->end, q_cur_loc(q), final_loc);
+ DEBUG_PRINTF("finding next match location\n");
+
+ assert(second_place_loc < final_loc);
+ assert(q_cur_loc(q) >= second_place_loc);
+
if (runNewNfaToNextMatch(t, a_qi, q, final_loc, scratch, aa,
report_ok_loc) == HWLM_TERMINATE_MATCHING) {
- DEBUG_PRINTF("roseCatchUpNfas done\n");
- return HWLM_TERMINATE_MATCHING;
- }
- }
-
- return HWLM_CONTINUE_MATCHING;
-}
-
+ DEBUG_PRINTF("roseCatchUpNfas done\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
void streamInitSufPQ(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch) {
- assert(scratch->catchup_pq.qm_size == 0);
- assert(t->outfixBeginQueue != t->outfixEndQueue);
-
- DEBUG_PRINTF("initSufPQ: outfixes [%u,%u)\n", t->outfixBeginQueue,
- t->outfixEndQueue);
-
- u32 qCount = t->queueCount;
- u8 *aa = getActiveLeafArray(t, state);
- u32 aaCount = t->activeArrayCount;
- struct mq *queues = scratch->queues;
- size_t length = scratch->core_info.len;
-
- u32 qi = mmbit_iterate_bounded(aa, aaCount, t->outfixBeginQueue,
- t->outfixEndQueue);
- for (; qi < t->outfixEndQueue;) {
- DEBUG_PRINTF("adding qi=%u\n", qi);
- struct mq *q = queues + qi;
-
- ensureQueueActive(t, qi, qCount, q, scratch);
- ensureEnd(q, qi, length);
-
- char alive = nfaQueueExecToMatch(q->nfa, q, length);
-
- if (alive == MO_MATCHES_PENDING) {
- DEBUG_PRINTF("we have pending matches at %lld\n", q_cur_loc(q));
- s64a qcl = q_cur_loc(q);
-
- pq_insert_with(&scratch->catchup_pq, scratch, qi, qcl);
- } else if (!alive) {
+ struct hs_scratch *scratch) {
+ assert(scratch->catchup_pq.qm_size == 0);
+ assert(t->outfixBeginQueue != t->outfixEndQueue);
+
+ DEBUG_PRINTF("initSufPQ: outfixes [%u,%u)\n", t->outfixBeginQueue,
+ t->outfixEndQueue);
+
+ u32 qCount = t->queueCount;
+ u8 *aa = getActiveLeafArray(t, state);
+ u32 aaCount = t->activeArrayCount;
+ struct mq *queues = scratch->queues;
+ size_t length = scratch->core_info.len;
+
+ u32 qi = mmbit_iterate_bounded(aa, aaCount, t->outfixBeginQueue,
+ t->outfixEndQueue);
+ for (; qi < t->outfixEndQueue;) {
+ DEBUG_PRINTF("adding qi=%u\n", qi);
+ struct mq *q = queues + qi;
+
+ ensureQueueActive(t, qi, qCount, q, scratch);
+ ensureEnd(q, qi, length);
+
+ char alive = nfaQueueExecToMatch(q->nfa, q, length);
+
+ if (alive == MO_MATCHES_PENDING) {
+ DEBUG_PRINTF("we have pending matches at %lld\n", q_cur_loc(q));
+ s64a qcl = q_cur_loc(q);
+
+ pq_insert_with(&scratch->catchup_pq, scratch, qi, qcl);
+ } else if (!alive) {
deactivateQueue(t, aa, qi, scratch);
- } else {
- assert(q->cur == q->end);
- /* TODO: can this be simplified? the nfa will never produce any
- * matches for this block. */
- DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, length);
- }
-
- qi = mmbit_iterate_bounded(aa, aaCount, qi + 1, t->outfixEndQueue);
- }
-}
-
+ } else {
+ assert(q->cur == q->end);
+ /* TODO: can this be simplified? the nfa will never produce any
+ * matches for this block. */
+ DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, length);
+ }
+
+ qi = mmbit_iterate_bounded(aa, aaCount, qi + 1, t->outfixEndQueue);
+ }
+}
+
void blockInitSufPQ(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch, char is_small_block) {
- DEBUG_PRINTF("initSufPQ: outfixes [%u,%u)\n", t->outfixBeginQueue,
- t->outfixEndQueue);
-
- assert(scratch->catchup_pq.qm_size == 0);
- assert(t->outfixBeginQueue != t->outfixEndQueue);
-
- struct mq *queues = scratch->queues;
- u8 *aa = getActiveLeafArray(t, state);
- struct fatbit *aqa = scratch->aqa;
- u32 aaCount = t->activeArrayCount;
- u32 qCount = t->queueCount;
- size_t length = scratch->core_info.len;
-
- for (u32 qi = t->outfixBeginQueue; qi < t->outfixEndQueue; qi++) {
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
-
- if (is_small_block && info->in_sbmatcher) {
- DEBUG_PRINTF("skip outfix %u as it's in the SB matcher\n", qi);
- continue;
- }
-
- const struct NFA *nfa = getNfaByInfo(t, info);
- DEBUG_PRINTF("testing minwidth %u > len %zu\n", nfa->minWidth,
- length);
- size_t len = nfaRevAccelCheck(nfa, scratch->core_info.buf, length);
- if (!len) {
- continue;
- }
- mmbit_set(aa, aaCount, qi);
- fatbit_set(aqa, qCount, qi);
- struct mq *q = queues + qi;
+ struct hs_scratch *scratch, char is_small_block) {
+ DEBUG_PRINTF("initSufPQ: outfixes [%u,%u)\n", t->outfixBeginQueue,
+ t->outfixEndQueue);
+
+ assert(scratch->catchup_pq.qm_size == 0);
+ assert(t->outfixBeginQueue != t->outfixEndQueue);
+
+ struct mq *queues = scratch->queues;
+ u8 *aa = getActiveLeafArray(t, state);
+ struct fatbit *aqa = scratch->aqa;
+ u32 aaCount = t->activeArrayCount;
+ u32 qCount = t->queueCount;
+ size_t length = scratch->core_info.len;
+
+ for (u32 qi = t->outfixBeginQueue; qi < t->outfixEndQueue; qi++) {
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+
+ if (is_small_block && info->in_sbmatcher) {
+ DEBUG_PRINTF("skip outfix %u as it's in the SB matcher\n", qi);
+ continue;
+ }
+
+ const struct NFA *nfa = getNfaByInfo(t, info);
+ DEBUG_PRINTF("testing minwidth %u > len %zu\n", nfa->minWidth,
+ length);
+ size_t len = nfaRevAccelCheck(nfa, scratch->core_info.buf, length);
+ if (!len) {
+ continue;
+ }
+ mmbit_set(aa, aaCount, qi);
+ fatbit_set(aqa, qCount, qi);
+ struct mq *q = queues + qi;
initQueue(q, qi, t, scratch);
- q->length = len; /* adjust for rev_accel */
- nfaQueueInitState(nfa, q);
- pushQueueAt(q, 0, MQE_START, 0);
- pushQueueAt(q, 1, MQE_TOP, 0);
- pushQueueAt(q, 2, MQE_END, length);
-
- DEBUG_PRINTF("adding qi=%u to pq\n", qi);
-
- char alive = nfaQueueExecToMatch(q->nfa, q, length);
-
- if (alive == MO_MATCHES_PENDING) {
- DEBUG_PRINTF("we have pending matches at %lld\n", q_cur_loc(q));
- s64a qcl = q_cur_loc(q);
-
- pq_insert_with(&scratch->catchup_pq, scratch, qi, qcl);
- } else if (!alive) {
+ q->length = len; /* adjust for rev_accel */
+ nfaQueueInitState(nfa, q);
+ pushQueueAt(q, 0, MQE_START, 0);
+ pushQueueAt(q, 1, MQE_TOP, 0);
+ pushQueueAt(q, 2, MQE_END, length);
+
+ DEBUG_PRINTF("adding qi=%u to pq\n", qi);
+
+ char alive = nfaQueueExecToMatch(q->nfa, q, length);
+
+ if (alive == MO_MATCHES_PENDING) {
+ DEBUG_PRINTF("we have pending matches at %lld\n", q_cur_loc(q));
+ s64a qcl = q_cur_loc(q);
+
+ pq_insert_with(&scratch->catchup_pq, scratch, qi, qcl);
+ } else if (!alive) {
deactivateQueue(t, aa, qi, scratch);
- } else {
- assert(q->cur == q->end);
- /* TODO: can this be simplified? the nfa will never produce any
- * matches for this block. */
- DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, length);
- }
- }
-}
-
-/**
- * safe_loc is ???
- */
-static rose_inline
+ } else {
+ assert(q->cur == q->end);
+ /* TODO: can this be simplified? the nfa will never produce any
+ * matches for this block. */
+ DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, length);
+ }
+ }
+}
+
+/**
+ * safe_loc is ???
+ */
+static rose_inline
hwlmcb_rv_t buildSufPQ(const struct RoseEngine *t, char *state, s64a safe_loc,
- s64a final_loc, struct hs_scratch *scratch) {
- assert(scratch->catchup_pq.qm_size <= t->outfixEndQueue);
-
- struct RoseContext *tctxt = &scratch->tctxt;
- assert(t->activeArrayCount);
-
- assert(scratch->core_info.buf_offset + final_loc
- > tctxt->minNonMpvMatchOffset);
- DEBUG_PRINTF("buildSufPQ final loc %lld (safe %lld)\n", final_loc,
- safe_loc);
- assert(safe_loc <= final_loc);
-
- u8 *aa = getActiveLeafArray(t, state);
- u32 aaCount = t->activeArrayCount;
-
- /* find first match of each pending nfa */
- DEBUG_PRINTF("aa=%p, aaCount=%u\n", aa, aaCount);
-
- /* Note: mpv MUST not participate in the main priority queue as
- * they may have events pushed on during this process which may be before
- * the catch up point. Outfixes are remain in the pq between catchup events
- * as they never have any incoming events to worry about.
- */
- if (aaCount == t->outfixEndQueue) {
- return HWLM_CONTINUE_MATCHING;
- }
-
- DEBUG_PRINTF("mib %u/%u\n", t->outfixBeginQueue, aaCount);
-
- u32 a_qi = mmbit_iterate_bounded(aa, aaCount, t->outfixEndQueue, aaCount);
-
- if (a_qi == MMB_INVALID) {
- return HWLM_CONTINUE_MATCHING;
- }
-
- s64a report_ok_loc = tctxt->minNonMpvMatchOffset + 1
- - scratch->core_info.buf_offset;
-
+ s64a final_loc, struct hs_scratch *scratch) {
+ assert(scratch->catchup_pq.qm_size <= t->outfixEndQueue);
+
+ struct RoseContext *tctxt = &scratch->tctxt;
+ assert(t->activeArrayCount);
+
+ assert(scratch->core_info.buf_offset + final_loc
+ > tctxt->minNonMpvMatchOffset);
+ DEBUG_PRINTF("buildSufPQ final loc %lld (safe %lld)\n", final_loc,
+ safe_loc);
+ assert(safe_loc <= final_loc);
+
+ u8 *aa = getActiveLeafArray(t, state);
+ u32 aaCount = t->activeArrayCount;
+
+ /* find first match of each pending nfa */
+ DEBUG_PRINTF("aa=%p, aaCount=%u\n", aa, aaCount);
+
+ /* Note: mpv MUST not participate in the main priority queue as
+ * they may have events pushed on during this process which may be before
+ * the catch up point. Outfixes are remain in the pq between catchup events
+ * as they never have any incoming events to worry about.
+ */
+ if (aaCount == t->outfixEndQueue) {
+ return HWLM_CONTINUE_MATCHING;
+ }
+
+ DEBUG_PRINTF("mib %u/%u\n", t->outfixBeginQueue, aaCount);
+
+ u32 a_qi = mmbit_iterate_bounded(aa, aaCount, t->outfixEndQueue, aaCount);
+
+ if (a_qi == MMB_INVALID) {
+ return HWLM_CONTINUE_MATCHING;
+ }
+
+ s64a report_ok_loc = tctxt->minNonMpvMatchOffset + 1
+ - scratch->core_info.buf_offset;
+
hwlmcb_rv_t rv = roseCatchUpMPV(t, report_ok_loc, scratch);
- if (rv != HWLM_CONTINUE_MATCHING) {
+ if (rv != HWLM_CONTINUE_MATCHING) {
DEBUG_PRINTF("terminating...\n");
- return rv;
- }
-
- while (a_qi != MMB_INVALID) {
- DEBUG_PRINTF("catching up qi=%u to %lld\n", a_qi, final_loc);
- u32 n_qi = mmbit_iterate(aa, aaCount, a_qi);
-
- s64a second_place_loc
- = scratch->catchup_pq.qm_size ? pq_top_loc(&scratch->catchup_pq)
- : safe_loc;
- second_place_loc = MIN(second_place_loc, safe_loc);
+ return rv;
+ }
+
+ while (a_qi != MMB_INVALID) {
+ DEBUG_PRINTF("catching up qi=%u to %lld\n", a_qi, final_loc);
+ u32 n_qi = mmbit_iterate(aa, aaCount, a_qi);
+
+ s64a second_place_loc
+ = scratch->catchup_pq.qm_size ? pq_top_loc(&scratch->catchup_pq)
+ : safe_loc;
+ second_place_loc = MIN(second_place_loc, safe_loc);
if (n_qi == MMB_INVALID && report_ok_loc <= second_place_loc) {
- if (buildSufPQ_final(t, report_ok_loc, second_place_loc, final_loc,
- scratch, aa, a_qi)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- break;
- }
-
- if (add_to_queue(t, scratch->queues, t->queueCount, aa, scratch,
- final_loc, a_qi, report_ok_loc)
- == HWLM_TERMINATE_MATCHING) {
- DEBUG_PRINTF("roseCatchUpNfas done\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
- a_qi = n_qi;
- }
-
- DEBUG_PRINTF("PQ BUILD %u items\n", scratch->catchup_pq.qm_size);
- return HWLM_CONTINUE_MATCHING;
-}
-
-static never_inline
+ if (buildSufPQ_final(t, report_ok_loc, second_place_loc, final_loc,
+ scratch, aa, a_qi)
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ break;
+ }
+
+ if (add_to_queue(t, scratch->queues, t->queueCount, aa, scratch,
+ final_loc, a_qi, report_ok_loc)
+ == HWLM_TERMINATE_MATCHING) {
+ DEBUG_PRINTF("roseCatchUpNfas done\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
+ a_qi = n_qi;
+ }
+
+ DEBUG_PRINTF("PQ BUILD %u items\n", scratch->catchup_pq.qm_size);
+ return HWLM_CONTINUE_MATCHING;
+}
+
+static never_inline
hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, s64a loc,
- s64a final_loc, struct hs_scratch *scratch) {
- assert(t->activeArrayCount);
-
+ s64a final_loc, struct hs_scratch *scratch) {
+ assert(t->activeArrayCount);
+
DEBUG_PRINTF("roseCatchUpNfas offset=%llu + %lld/%lld\n",
scratch->core_info.buf_offset, loc, final_loc);
- DEBUG_PRINTF("min non mpv match offset %llu\n",
- scratch->tctxt.minNonMpvMatchOffset);
-
+ DEBUG_PRINTF("min non mpv match offset %llu\n",
+ scratch->tctxt.minNonMpvMatchOffset);
+
struct RoseContext *tctxt = &scratch->tctxt;
assert(scratch->core_info.buf_offset + loc >= tctxt->minNonMpvMatchOffset);
char *state = scratch->core_info.state;
- struct mq *queues = scratch->queues;
- u8 *aa = getActiveLeafArray(t, state);
-
- /* fire off earliest nfa match and catchup anchored matches to that point */
- while (scratch->catchup_pq.qm_size) {
- s64a match_loc = pq_top_loc(&scratch->catchup_pq);
- u32 qi = pq_top(scratch->catchup_pq.qm)->queue;
-
- DEBUG_PRINTF("winrar q%u@%lld loc %lld\n", qi, match_loc, loc);
- assert(match_loc + scratch->core_info.buf_offset
- >= scratch->tctxt.minNonMpvMatchOffset);
-
- if (match_loc > loc) {
- /* we have processed all the matches at or before rose's current
- * location; only things remaining on the pq should be outfixes. */
- DEBUG_PRINTF("saving for later\n");
- goto exit;
- }
-
- /* catch up char matches to this point */
+ struct mq *queues = scratch->queues;
+ u8 *aa = getActiveLeafArray(t, state);
+
+ /* fire off earliest nfa match and catchup anchored matches to that point */
+ while (scratch->catchup_pq.qm_size) {
+ s64a match_loc = pq_top_loc(&scratch->catchup_pq);
+ u32 qi = pq_top(scratch->catchup_pq.qm)->queue;
+
+ DEBUG_PRINTF("winrar q%u@%lld loc %lld\n", qi, match_loc, loc);
+ assert(match_loc + scratch->core_info.buf_offset
+ >= scratch->tctxt.minNonMpvMatchOffset);
+
+ if (match_loc > loc) {
+ /* we have processed all the matches at or before rose's current
+ * location; only things remaining on the pq should be outfixes. */
+ DEBUG_PRINTF("saving for later\n");
+ goto exit;
+ }
+
+ /* catch up char matches to this point */
if (roseCatchUpMPV(t, match_loc, scratch)
- == HWLM_TERMINATE_MATCHING) {
- DEBUG_PRINTF("roseCatchUpNfas done\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
- assert(match_loc + scratch->core_info.buf_offset
- >= scratch->tctxt.minNonMpvMatchOffset);
-
- struct mq *q = queues + qi;
-
- /* outfixes must be advanced all the way as they persist in the pq
- * between catchup events */
- s64a q_final_loc = qi >= t->outfixEndQueue ? final_loc
- : (s64a)scratch->core_info.len;
-
- /* fire nfa matches, and find next place this nfa match */
- DEBUG_PRINTF("reporting matches %u@%llu [q->cur %u/%u]\n", qi,
- match_loc, q->cur, q->end);
-
- /* we then need to catch this nfa up to next earliest nfa match. These
- * matches can be fired directly from the callback. The callback needs
- * to ensure that the anchored matches remain in sync though */
- s64a second_place_loc = findSecondPlace(&scratch->catchup_pq, loc);
- DEBUG_PRINTF("second place %lld loc %lld\n", second_place_loc, loc);
-
- if (second_place_loc == q_cur_loc(q)) {
+ == HWLM_TERMINATE_MATCHING) {
+ DEBUG_PRINTF("roseCatchUpNfas done\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
+ assert(match_loc + scratch->core_info.buf_offset
+ >= scratch->tctxt.minNonMpvMatchOffset);
+
+ struct mq *q = queues + qi;
+
+ /* outfixes must be advanced all the way as they persist in the pq
+ * between catchup events */
+ s64a q_final_loc = qi >= t->outfixEndQueue ? final_loc
+ : (s64a)scratch->core_info.len;
+
+ /* fire nfa matches, and find next place this nfa match */
+ DEBUG_PRINTF("reporting matches %u@%llu [q->cur %u/%u]\n", qi,
+ match_loc, q->cur, q->end);
+
+ /* we then need to catch this nfa up to next earliest nfa match. These
+ * matches can be fired directly from the callback. The callback needs
+ * to ensure that the anchored matches remain in sync though */
+ s64a second_place_loc = findSecondPlace(&scratch->catchup_pq, loc);
+ DEBUG_PRINTF("second place %lld loc %lld\n", second_place_loc, loc);
+
+ if (second_place_loc == q_cur_loc(q)) {
if (runExistingNfaToNextMatch(t, qi, q, q_final_loc, scratch, aa, 1)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- continue;
- }
-
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ continue;
+ }
+
char alive = blast_queue(scratch, q, qi, second_place_loc, 1);
-
- if (!alive) {
- if (can_stop_matching(scratch)) {
- DEBUG_PRINTF("roseCatchUpNfas done as bailing\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
+
+ if (!alive) {
+ if (can_stop_matching(scratch)) {
+ DEBUG_PRINTF("roseCatchUpNfas done as bailing\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
deactivateQueue(t, aa, qi, scratch);
- pq_pop_nice(&scratch->catchup_pq);
- } else if (q->cur == q->end) {
- DEBUG_PRINTF("queue %u finished, nfa lives [%lld]\n", qi, loc);
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, loc);
- pq_pop_nice(&scratch->catchup_pq);
- } else if (second_place_loc == q_final_loc) {
- DEBUG_PRINTF("queue %u on hold\n", qi);
- pq_pop_nice(&scratch->catchup_pq);
- break;
- } else {
- DEBUG_PRINTF("queue %u not finished, %u/%u [%lld/%lld]\n",
- qi, q->cur, q->end, q->items[q->cur].location, loc);
+ pq_pop_nice(&scratch->catchup_pq);
+ } else if (q->cur == q->end) {
+ DEBUG_PRINTF("queue %u finished, nfa lives [%lld]\n", qi, loc);
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, loc);
+ pq_pop_nice(&scratch->catchup_pq);
+ } else if (second_place_loc == q_final_loc) {
+ DEBUG_PRINTF("queue %u on hold\n", qi);
+ pq_pop_nice(&scratch->catchup_pq);
+ break;
+ } else {
+ DEBUG_PRINTF("queue %u not finished, %u/%u [%lld/%lld]\n",
+ qi, q->cur, q->end, q->items[q->cur].location, loc);
runExistingNfaToNextMatch(t, qi, q, q_final_loc, scratch, aa, 0);
- }
- }
-exit:;
- tctxt->minNonMpvMatchOffset = scratch->core_info.buf_offset + loc;
- DEBUG_PRINTF("roseCatchUpNfas done\n");
- return HWLM_CONTINUE_MATCHING;
-}
-
-hwlmcb_rv_t roseCatchUpAll(s64a loc, struct hs_scratch *scratch) {
- /* just need suf/outfixes and mpv */
- DEBUG_PRINTF("loc %lld mnmmo %llu mmo %llu\n", loc,
- scratch->tctxt.minNonMpvMatchOffset,
- scratch->tctxt.minMatchOffset);
- assert(scratch->core_info.buf_offset + loc
- > scratch->tctxt.minNonMpvMatchOffset);
-
+ }
+ }
+exit:;
+ tctxt->minNonMpvMatchOffset = scratch->core_info.buf_offset + loc;
+ DEBUG_PRINTF("roseCatchUpNfas done\n");
+ return HWLM_CONTINUE_MATCHING;
+}
+
+hwlmcb_rv_t roseCatchUpAll(s64a loc, struct hs_scratch *scratch) {
+ /* just need suf/outfixes and mpv */
+ DEBUG_PRINTF("loc %lld mnmmo %llu mmo %llu\n", loc,
+ scratch->tctxt.minNonMpvMatchOffset,
+ scratch->tctxt.minMatchOffset);
+ assert(scratch->core_info.buf_offset + loc
+ > scratch->tctxt.minNonMpvMatchOffset);
+
const struct RoseEngine *t = scratch->core_info.rose;
char *state = scratch->core_info.state;
hwlmcb_rv_t rv = buildSufPQ(t, state, loc, loc, scratch);
- if (rv != HWLM_CONTINUE_MATCHING) {
- return rv;
- }
-
+ if (rv != HWLM_CONTINUE_MATCHING) {
+ return rv;
+ }
+
rv = roseCatchUpNfas(t, loc, loc, scratch);
- if (rv != HWLM_CONTINUE_MATCHING) {
- return rv;
- }
-
+ if (rv != HWLM_CONTINUE_MATCHING) {
+ return rv;
+ }
+
rv = roseCatchUpMPV(t, loc, scratch);
- assert(rv != HWLM_CONTINUE_MATCHING
+ assert(rv != HWLM_CONTINUE_MATCHING
|| scratch->catchup_pq.qm_size <= t->outfixEndQueue);
assert(!can_stop_matching(scratch) || rv == HWLM_TERMINATE_MATCHING);
- return rv;
-}
-
-hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch) {
- /* just need suf/outfixes. mpv will be caught up only to last reported
- * external match */
- assert(scratch->core_info.buf_offset + loc
- > scratch->tctxt.minNonMpvMatchOffset);
-
+ return rv;
+}
+
+hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch) {
+ /* just need suf/outfixes. mpv will be caught up only to last reported
+ * external match */
+ assert(scratch->core_info.buf_offset + loc
+ > scratch->tctxt.minNonMpvMatchOffset);
+
const struct RoseEngine *t = scratch->core_info.rose;
char *state = scratch->core_info.state;
hwlmcb_rv_t rv = buildSufPQ(t, state, loc, loc, scratch);
- if (rv != HWLM_CONTINUE_MATCHING) {
- return rv;
- }
-
+ if (rv != HWLM_CONTINUE_MATCHING) {
+ return rv;
+ }
+
rv = roseCatchUpNfas(t, loc, loc, scratch);
assert(rv != HWLM_CONTINUE_MATCHING ||
scratch->catchup_pq.qm_size <= t->outfixEndQueue);
-
- return rv;
-}
+
+ return rv;
+}
diff --git a/contrib/libs/hyperscan/src/rose/catchup.h b/contrib/libs/hyperscan/src/rose/catchup.h
index 8274bb839e..8188d5af01 100644
--- a/contrib/libs/hyperscan/src/rose/catchup.h
+++ b/contrib/libs/hyperscan/src/rose/catchup.h
@@ -1,31 +1,31 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
* \brief Rose runtime: code for catching up output-exposed engines.
@@ -45,58 +45,58 @@
* exists.
*/
-#ifndef ROSE_CATCHUP_H
-#define ROSE_CATCHUP_H
-
-#include "hwlm/hwlm.h"
-#include "runtime.h"
-#include "scratch.h"
+#ifndef ROSE_CATCHUP_H
+#define ROSE_CATCHUP_H
+
+#include "hwlm/hwlm.h"
+#include "runtime.h"
+#include "scratch.h"
#include "rose.h"
-#include "rose_common.h"
-#include "rose_internal.h"
-#include "ue2common.h"
-#include "util/multibit.h"
-
-hwlmcb_rv_t roseCatchUpAll(s64a loc, struct hs_scratch *scratch);
-
+#include "rose_common.h"
+#include "rose_internal.h"
+#include "ue2common.h"
+#include "util/multibit.h"
+
+hwlmcb_rv_t roseCatchUpAll(s64a loc, struct hs_scratch *scratch);
+
/* will only catch mpv up to last reported external match */
-hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch);
-
+hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch);
+
hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc,
- struct hs_scratch *scratch);
-
+ struct hs_scratch *scratch);
+
void blockInitSufPQ(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch, char is_small_block);
+ struct hs_scratch *scratch, char is_small_block);
void streamInitSufPQ(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch);
-
-static really_inline
+ struct hs_scratch *scratch);
+
+static really_inline
int canSkipCatchUpMPV(const struct RoseEngine *t, struct hs_scratch *scratch,
u64a cur_offset) {
- if (!has_chained_nfas(t)) {
+ if (!has_chained_nfas(t)) {
return 1;
- }
-
- /* note: we may have to run at less than tctxt.minMatchOffset as we may
- * have a full queue of postponed events that we need to flush */
- if (cur_offset < scratch->tctxt.next_mpv_offset) {
+ }
+
+ /* note: we may have to run at less than tctxt.minMatchOffset as we may
+ * have a full queue of postponed events that we need to flush */
+ if (cur_offset < scratch->tctxt.next_mpv_offset) {
DEBUG_PRINTF("skipping cur_offset %llu min %llu, mpv %llu\n",
- cur_offset, scratch->tctxt.minMatchOffset,
- scratch->tctxt.next_mpv_offset);
+ cur_offset, scratch->tctxt.minMatchOffset,
+ scratch->tctxt.next_mpv_offset);
return 1;
- }
-
- assert(t->activeArrayCount);
-
+ }
+
+ assert(t->activeArrayCount);
+
DEBUG_PRINTF("cur offset offset: %llu\n", cur_offset);
- DEBUG_PRINTF("min match offset %llu\n", scratch->tctxt.minMatchOffset);
-
- assert(t->outfixBeginQueue == 1); /* if it exists mpv is queue 0 */
-
+ DEBUG_PRINTF("min match offset %llu\n", scratch->tctxt.minMatchOffset);
+
+ assert(t->outfixBeginQueue == 1); /* if it exists mpv is queue 0 */
+
const u8 *aa = getActiveLeafArray(t, scratch->core_info.state);
return !mmbit_isset(aa, t->activeArrayCount, 0);
}
-
+
/** \brief Catches up the MPV. */
static really_inline
hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, s64a loc,
@@ -114,36 +114,36 @@ hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, s64a loc,
}
updateMinMatchOffsetFromMpv(&scratch->tctxt, cur_offset);
return HWLM_CONTINUE_MATCHING;
- }
-
- /* Note: chained tails MUST not participate in the priority queue as
- * they may have events pushed on during this process which may be before
- * the catch up point */
-
+ }
+
+ /* Note: chained tails MUST not participate in the priority queue as
+ * they may have events pushed on during this process which may be before
+ * the catch up point */
+
return roseCatchUpMPV_i(t, loc, scratch);
-}
-
+}
+
/** \brief Catches up NFAs and the MPV. */
-static rose_inline
+static rose_inline
hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a end) {
- /* no need to catch up if we are at the same offset as last time */
- if (end <= scratch->tctxt.minMatchOffset) {
- /* we must already be up to date */
- DEBUG_PRINTF("skip\n");
- return HWLM_CONTINUE_MATCHING;
- }
-
+ /* no need to catch up if we are at the same offset as last time */
+ if (end <= scratch->tctxt.minMatchOffset) {
+ /* we must already be up to date */
+ DEBUG_PRINTF("skip\n");
+ return HWLM_CONTINUE_MATCHING;
+ }
+
char *state = scratch->core_info.state;
- s64a loc = end - scratch->core_info.buf_offset;
-
- if (end <= scratch->tctxt.minNonMpvMatchOffset) {
- /* only need to catch up the mpv */
+ s64a loc = end - scratch->core_info.buf_offset;
+
+ if (end <= scratch->tctxt.minNonMpvMatchOffset) {
+ /* only need to catch up the mpv */
return roseCatchUpMPV(t, loc, scratch);
- }
-
- assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset);
- hwlmcb_rv_t rv;
+ }
+
+ assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset);
+ hwlmcb_rv_t rv;
if (!t->activeArrayCount
|| !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
if (t->flushCombProgramOffset) {
@@ -154,18 +154,18 @@ hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t,
}
updateMinMatchOffset(&scratch->tctxt, end);
rv = HWLM_CONTINUE_MATCHING;
- } else {
+ } else {
rv = roseCatchUpAll(loc, scratch);
- }
-
- assert(rv != HWLM_CONTINUE_MATCHING
- || scratch->tctxt.minMatchOffset == end);
- assert(rv != HWLM_CONTINUE_MATCHING
- || scratch->tctxt.minNonMpvMatchOffset == end);
+ }
+
+ assert(rv != HWLM_CONTINUE_MATCHING
+ || scratch->tctxt.minMatchOffset == end);
+ assert(rv != HWLM_CONTINUE_MATCHING
+ || scratch->tctxt.minNonMpvMatchOffset == end);
assert(!can_stop_matching(scratch) || rv == HWLM_TERMINATE_MATCHING);
- return rv;
-}
-
+ return rv;
+}
+
/**
* \brief Catches up anything which may add triggers on the MPV (suffixes and
* outfixes).
@@ -173,35 +173,35 @@ hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t,
* The MPV will be run only to intersperse matches in the output match stream
* if external matches are raised.
*/
-static rose_inline
+static rose_inline
hwlmcb_rv_t roseCatchUpMpvFeeders(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a end) {
- /* no need to catch up if we are at the same offset as last time */
- if (end <= scratch->tctxt.minNonMpvMatchOffset) {
- /* we must already be up to date */
- DEBUG_PRINTF("skip\n");
- return HWLM_CONTINUE_MATCHING;
- }
-
- s64a loc = end - scratch->core_info.buf_offset;
-
- assert(t->activeArrayCount); /* mpv is in active array */
- assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset);
-
+ /* no need to catch up if we are at the same offset as last time */
+ if (end <= scratch->tctxt.minNonMpvMatchOffset) {
+ /* we must already be up to date */
+ DEBUG_PRINTF("skip\n");
+ return HWLM_CONTINUE_MATCHING;
+ }
+
+ s64a loc = end - scratch->core_info.buf_offset;
+
+ assert(t->activeArrayCount); /* mpv is in active array */
+ assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset);
+
if (!t->mpvTriggeredByLeaf) {
/* no need to check as they never put triggers onto the mpv */
return HWLM_CONTINUE_MATCHING;
}
-
+
/* sadly, this branch rarely gets taken as the mpv itself is usually
* alive. */
char *state = scratch->core_info.state;
if (!mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
scratch->tctxt.minNonMpvMatchOffset = end;
return HWLM_CONTINUE_MATCHING;
- }
+ }
return roseCatchUpSuf(loc, scratch);
-}
-
-#endif
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/counting_miracle.h b/contrib/libs/hyperscan/src/rose/counting_miracle.h
index 17ab965eae..976208b738 100644
--- a/contrib/libs/hyperscan/src/rose/counting_miracle.h
+++ b/contrib/libs/hyperscan/src/rose/counting_miracle.h
@@ -1,263 +1,263 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_COUNTING_MIRACLE_H
-#define ROSE_COUNTING_MIRACLE_H
-
-#include "ue2common.h"
-#include "runtime.h"
-#include "rose_internal.h"
-#include "nfa/nfa_api_queue.h"
-#include "util/simd_utils.h"
-
-/** \brief Maximum number of bytes to scan when looking for a "counting miracle"
- * stop character. */
-#define COUNTING_MIRACLE_LEN_MAX 256
-
-static really_inline
-char roseCountingMiracleScan(u8 c, const u8 *d, const u8 *d_end,
- u32 target_count, u32 *count_inout,
- const u8 **d_out) {
- assert(d <= d_end);
-
- u32 count = *count_inout;
-
- m128 chars = set16x8(c);
-
- for (; d + 16 <= d_end; d_end -= 16) {
- m128 data = loadu128(d_end - 16);
- u32 z1 = movemask128(eq128(chars, data));
- count += popcount32(z1);
-
- if (count >= target_count) {
- *d_out = d_end - 16;
- *count_inout = count;
- return 1;
- }
- }
-
- if (d != d_end) {
- char temp[sizeof(m128)];
- assert(d + sizeof(temp) > d_end);
- memset(temp, c + 1, sizeof(temp));
- memcpy(temp, d, d_end - d);
- m128 data = loadu128(temp);
- u32 z1 = movemask128(eq128(chars, data));
- count += popcount32(z1);
-
- if (count >= target_count) {
- *d_out = d;
- *count_inout = count;
- return 1;
- }
- }
-
- *count_inout = count;
- return 0;
-}
-
-#define GET_LO_4(chars) and128(chars, low4bits)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_COUNTING_MIRACLE_H
+#define ROSE_COUNTING_MIRACLE_H
+
+#include "ue2common.h"
+#include "runtime.h"
+#include "rose_internal.h"
+#include "nfa/nfa_api_queue.h"
+#include "util/simd_utils.h"
+
+/** \brief Maximum number of bytes to scan when looking for a "counting miracle"
+ * stop character. */
+#define COUNTING_MIRACLE_LEN_MAX 256
+
+static really_inline
+char roseCountingMiracleScan(u8 c, const u8 *d, const u8 *d_end,
+ u32 target_count, u32 *count_inout,
+ const u8 **d_out) {
+ assert(d <= d_end);
+
+ u32 count = *count_inout;
+
+ m128 chars = set16x8(c);
+
+ for (; d + 16 <= d_end; d_end -= 16) {
+ m128 data = loadu128(d_end - 16);
+ u32 z1 = movemask128(eq128(chars, data));
+ count += popcount32(z1);
+
+ if (count >= target_count) {
+ *d_out = d_end - 16;
+ *count_inout = count;
+ return 1;
+ }
+ }
+
+ if (d != d_end) {
+ char temp[sizeof(m128)];
+ assert(d + sizeof(temp) > d_end);
+ memset(temp, c + 1, sizeof(temp));
+ memcpy(temp, d, d_end - d);
+ m128 data = loadu128(temp);
+ u32 z1 = movemask128(eq128(chars, data));
+ count += popcount32(z1);
+
+ if (count >= target_count) {
+ *d_out = d;
+ *count_inout = count;
+ return 1;
+ }
+ }
+
+ *count_inout = count;
+ return 0;
+}
+
+#define GET_LO_4(chars) and128(chars, low4bits)
#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4)
-
-static really_inline
-u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison,
- const u8 *d, const u8 *d_end,
- u32 target_count, u32 *count_inout,
- const u8 **d_out) {
- assert(d <= d_end);
-
- u32 count = *count_inout;
-
- const m128 zeroes = zeroes128();
- const m128 low4bits = _mm_set1_epi8(0xf);
-
- for (; d + 16 <= d_end; d_end -= 16) {
- m128 data = loadu128(d_end - 16);
+
+static really_inline
+u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison,
+ const u8 *d, const u8 *d_end,
+ u32 target_count, u32 *count_inout,
+ const u8 **d_out) {
+ assert(d <= d_end);
+
+ u32 count = *count_inout;
+
+ const m128 zeroes = zeroes128();
+ const m128 low4bits = _mm_set1_epi8(0xf);
+
+ for (; d + 16 <= d_end; d_end -= 16) {
+ m128 data = loadu128(d_end - 16);
m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(data));
m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(data));
- m128 t = and128(c_lo, c_hi);
- u32 z1 = movemask128(eq128(t, zeroes));
- count += popcount32(z1 ^ 0xffff);
-
- if (count >= target_count) {
- *d_out = d_end - 16;
- *count_inout = count;
- return 1;
- }
- }
-
- if (d != d_end) {
- char temp[sizeof(m128)];
- assert(d + sizeof(temp) > d_end);
- memset(temp, poison, sizeof(temp));
- memcpy(temp, d, d_end - d);
- m128 data = loadu128(temp);
+ m128 t = and128(c_lo, c_hi);
+ u32 z1 = movemask128(eq128(t, zeroes));
+ count += popcount32(z1 ^ 0xffff);
+
+ if (count >= target_count) {
+ *d_out = d_end - 16;
+ *count_inout = count;
+ return 1;
+ }
+ }
+
+ if (d != d_end) {
+ char temp[sizeof(m128)];
+ assert(d + sizeof(temp) > d_end);
+ memset(temp, poison, sizeof(temp));
+ memcpy(temp, d, d_end - d);
+ m128 data = loadu128(temp);
m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(data));
m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(data));
- m128 t = and128(c_lo, c_hi);
- u32 z1 = movemask128(eq128(t, zeroes));
- count += popcount32(z1 ^ 0xffff);
-
- if (count >= target_count) {
- *d_out = d;
- *count_inout = count;
- return 1;
- }
- }
-
- *count_inout = count;
- return 0;
-}
-
-/**
- * \brief "Counting Miracle" scan: If we see more than N instances of a
- * particular character class we know that the engine must be dead.
- *
- * Scans the buffer/history between relative locations \a begin_loc and \a
- * end_loc, and returns a miracle location (if any) that appears in the stream
- * after \a begin_loc.
- *
- * Returns 1 if some bytes can be skipped and sets \a miracle_loc
- * appropriately, 0 otherwise.
- */
-static never_inline
-int roseCountingMiracleOccurs(const struct RoseEngine *t,
- const struct LeftNfaInfo *left,
- const struct core_info *ci, s64a begin_loc,
- const s64a end_loc, s64a *miracle_loc) {
- if (!left->countingMiracleOffset) {
- return 0;
- }
-
- const struct RoseCountingMiracle *cm
- = (const void *)((const char *)t + left->countingMiracleOffset);
-
- assert(!left->transient);
- assert(cm->count > 1); /* should be a normal miracle then */
-
- DEBUG_PRINTF("looking for counting miracle over [%lld,%lld], maxLag=%u\n",
- begin_loc, end_loc, left->maxLag);
- DEBUG_PRINTF("ci->len=%zu, ci->hlen=%zu\n", ci->len, ci->hlen);
-
- assert(begin_loc <= end_loc);
- assert(begin_loc >= -(s64a)ci->hlen);
- assert(end_loc <= (s64a)ci->len);
-
- const s64a scan_end_loc = end_loc - left->maxLag;
- if (scan_end_loc <= begin_loc) {
- DEBUG_PRINTF("nothing to scan\n");
- return 0;
- }
-
- const s64a start = MAX(begin_loc, scan_end_loc - COUNTING_MIRACLE_LEN_MAX);
- DEBUG_PRINTF("scan [%lld..%lld]\n", start, scan_end_loc);
-
- u32 count = 0;
-
- s64a m_loc = start;
-
- if (!cm->shufti) {
- u8 c = cm->c;
-
- // Scan buffer.
- const s64a buf_scan_start = MAX(0, start);
- if (scan_end_loc > buf_scan_start) {
- const u8 *buf = ci->buf;
- const u8 *d = buf + scan_end_loc;
- const u8 *d_start = buf + buf_scan_start;
- const u8 *d_out;
- if (roseCountingMiracleScan(c, d_start, d, cm->count, &count,
- &d_out)) {
- assert(d_out >= d_start);
- m_loc = (d_out - d_start) + buf_scan_start;
- goto success;
- }
- }
-
- // Scan history.
- if (start < 0) {
- const u8 *hbuf_end = ci->hbuf + ci->hlen;
- const u8 *d = hbuf_end + MIN(0, scan_end_loc);
- const u8 *d_start = hbuf_end + start;
- const u8 *d_out;
- if (roseCountingMiracleScan(c, d_start, d, cm->count, &count,
- &d_out)) {
- assert(d_out >= d_start);
- m_loc = (d_out - d_start) + start;
- goto success;
- }
- }
- } else {
- m128 lo = cm->lo;
- m128 hi = cm->hi;
- u8 poison = cm->poison;
-
- // Scan buffer.
- const s64a buf_scan_start = MAX(0, start);
- if (scan_end_loc > buf_scan_start) {
- const u8 *buf = ci->buf;
- const u8 *d = buf + scan_end_loc;
- const u8 *d_start = buf + buf_scan_start;
- const u8 *d_out;
- if (roseCountingMiracleScanShufti(lo, hi, poison, d_start, d,
- cm->count, &count, &d_out)) {
- assert(d_out >= d_start);
- m_loc = (d_out - d_start) + buf_scan_start;
- goto success;
- }
- }
-
- // Scan history.
- if (start < 0) {
- const u8 *hbuf_end = ci->hbuf + ci->hlen;
- const u8 *d = hbuf_end + MIN(0, scan_end_loc);
- const u8 *d_start = hbuf_end + start;
- const u8 *d_out;
- if (roseCountingMiracleScanShufti(lo, hi, poison, d_start, d,
- cm->count, &count, &d_out)) {
- assert(d_out >= d_start);
- m_loc = (d_out - d_start) + start;
- goto success;
- }
- }
- }
-
- DEBUG_PRINTF("found %u/%u\n", count, cm->count);
- return 0;
-
-success:
- DEBUG_PRINTF("found %u/%u\n", count, cm->count);
- assert(count >= cm->count);
- assert(m_loc < scan_end_loc);
- assert(m_loc >= start);
-
- *miracle_loc = m_loc;
- return 1;
-}
-
-#endif
+ m128 t = and128(c_lo, c_hi);
+ u32 z1 = movemask128(eq128(t, zeroes));
+ count += popcount32(z1 ^ 0xffff);
+
+ if (count >= target_count) {
+ *d_out = d;
+ *count_inout = count;
+ return 1;
+ }
+ }
+
+ *count_inout = count;
+ return 0;
+}
+
+/**
+ * \brief "Counting Miracle" scan: If we see more than N instances of a
+ * particular character class we know that the engine must be dead.
+ *
+ * Scans the buffer/history between relative locations \a begin_loc and \a
+ * end_loc, and returns a miracle location (if any) that appears in the stream
+ * after \a begin_loc.
+ *
+ * Returns 1 if some bytes can be skipped and sets \a miracle_loc
+ * appropriately, 0 otherwise.
+ */
+static never_inline
+int roseCountingMiracleOccurs(const struct RoseEngine *t,
+ const struct LeftNfaInfo *left,
+ const struct core_info *ci, s64a begin_loc,
+ const s64a end_loc, s64a *miracle_loc) {
+ if (!left->countingMiracleOffset) {
+ return 0;
+ }
+
+ const struct RoseCountingMiracle *cm
+ = (const void *)((const char *)t + left->countingMiracleOffset);
+
+ assert(!left->transient);
+ assert(cm->count > 1); /* should be a normal miracle then */
+
+ DEBUG_PRINTF("looking for counting miracle over [%lld,%lld], maxLag=%u\n",
+ begin_loc, end_loc, left->maxLag);
+ DEBUG_PRINTF("ci->len=%zu, ci->hlen=%zu\n", ci->len, ci->hlen);
+
+ assert(begin_loc <= end_loc);
+ assert(begin_loc >= -(s64a)ci->hlen);
+ assert(end_loc <= (s64a)ci->len);
+
+ const s64a scan_end_loc = end_loc - left->maxLag;
+ if (scan_end_loc <= begin_loc) {
+ DEBUG_PRINTF("nothing to scan\n");
+ return 0;
+ }
+
+ const s64a start = MAX(begin_loc, scan_end_loc - COUNTING_MIRACLE_LEN_MAX);
+ DEBUG_PRINTF("scan [%lld..%lld]\n", start, scan_end_loc);
+
+ u32 count = 0;
+
+ s64a m_loc = start;
+
+ if (!cm->shufti) {
+ u8 c = cm->c;
+
+ // Scan buffer.
+ const s64a buf_scan_start = MAX(0, start);
+ if (scan_end_loc > buf_scan_start) {
+ const u8 *buf = ci->buf;
+ const u8 *d = buf + scan_end_loc;
+ const u8 *d_start = buf + buf_scan_start;
+ const u8 *d_out;
+ if (roseCountingMiracleScan(c, d_start, d, cm->count, &count,
+ &d_out)) {
+ assert(d_out >= d_start);
+ m_loc = (d_out - d_start) + buf_scan_start;
+ goto success;
+ }
+ }
+
+ // Scan history.
+ if (start < 0) {
+ const u8 *hbuf_end = ci->hbuf + ci->hlen;
+ const u8 *d = hbuf_end + MIN(0, scan_end_loc);
+ const u8 *d_start = hbuf_end + start;
+ const u8 *d_out;
+ if (roseCountingMiracleScan(c, d_start, d, cm->count, &count,
+ &d_out)) {
+ assert(d_out >= d_start);
+ m_loc = (d_out - d_start) + start;
+ goto success;
+ }
+ }
+ } else {
+ m128 lo = cm->lo;
+ m128 hi = cm->hi;
+ u8 poison = cm->poison;
+
+ // Scan buffer.
+ const s64a buf_scan_start = MAX(0, start);
+ if (scan_end_loc > buf_scan_start) {
+ const u8 *buf = ci->buf;
+ const u8 *d = buf + scan_end_loc;
+ const u8 *d_start = buf + buf_scan_start;
+ const u8 *d_out;
+ if (roseCountingMiracleScanShufti(lo, hi, poison, d_start, d,
+ cm->count, &count, &d_out)) {
+ assert(d_out >= d_start);
+ m_loc = (d_out - d_start) + buf_scan_start;
+ goto success;
+ }
+ }
+
+ // Scan history.
+ if (start < 0) {
+ const u8 *hbuf_end = ci->hbuf + ci->hlen;
+ const u8 *d = hbuf_end + MIN(0, scan_end_loc);
+ const u8 *d_start = hbuf_end + start;
+ const u8 *d_out;
+ if (roseCountingMiracleScanShufti(lo, hi, poison, d_start, d,
+ cm->count, &count, &d_out)) {
+ assert(d_out >= d_start);
+ m_loc = (d_out - d_start) + start;
+ goto success;
+ }
+ }
+ }
+
+ DEBUG_PRINTF("found %u/%u\n", count, cm->count);
+ return 0;
+
+success:
+ DEBUG_PRINTF("found %u/%u\n", count, cm->count);
+ assert(count >= cm->count);
+ assert(m_loc < scan_end_loc);
+ assert(m_loc >= start);
+
+ *miracle_loc = m_loc;
+ return 1;
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/infix.h b/contrib/libs/hyperscan/src/rose/infix.h
index 480aa3a52c..9cf9c0ad74 100644
--- a/contrib/libs/hyperscan/src/rose/infix.h
+++ b/contrib/libs/hyperscan/src/rose/infix.h
@@ -1,54 +1,54 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef INFIX_H
-#define INFIX_H
-
-#include "ue2common.h"
-#include "nfa/nfa_api.h"
-#include "nfa/nfa_api_queue.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef INFIX_H
+#define INFIX_H
+
+#include "ue2common.h"
+#include "nfa/nfa_api.h"
+#include "nfa/nfa_api_queue.h"
#include "nfa/nfa_internal.h"
-
-static really_inline
-int infixTooOld(struct mq *q, s64a curr_loc) {
- u32 maxAge = q->nfa->maxWidth;
-
- if (!maxAge) {
- return 0;
- }
-
- return q_last_loc(q) + maxAge < curr_loc;
-}
-
+
+static really_inline
+int infixTooOld(struct mq *q, s64a curr_loc) {
+ u32 maxAge = q->nfa->maxWidth;
+
+ if (!maxAge) {
+ return 0;
+ }
+
+ return q_last_loc(q) + maxAge < curr_loc;
+}
+
static really_inline
int canReduceQueue(const struct mq *q, s64a curr_loc, u32 maxTops, u32 maxAge) {
u32 qlen = q->end - q->cur; /* includes MQE_START */
-
+
if (maxAge && q->items[q->cur].location + maxAge < curr_loc) {
return 1;
}
@@ -66,96 +66,96 @@ int canReduceQueue(const struct mq *q, s64a curr_loc, u32 maxTops, u32 maxAge) {
return 0;
}
-/**
- * Removes tops which are known not to affect the final state from the queue.
- * May also reinitialise the engine state if it is unneeded.
- *
- * maxAge is the maximum width of the infix. Any tops/state before this can be
- * ignored. 0 is used to indicate that there is no upper bound on the width of
- * the pattern.
- *
- * maxTops is the maximum number of locations of tops that can affect the top.
- * It is only possible for the last maxTops tops to affect the final state -
- * earlier ones can be safely removed. Also, any state before the max tops may
- * be ignored.
- *
- * This code assumes/requires that there are not multiple tops at the same
- * location in the queue. This code also assumes that it is not a multitop
- * engine.
- */
-static really_inline
+/**
+ * Removes tops which are known not to affect the final state from the queue.
+ * May also reinitialise the engine state if it is unneeded.
+ *
+ * maxAge is the maximum width of the infix. Any tops/state before this can be
+ * ignored. 0 is used to indicate that there is no upper bound on the width of
+ * the pattern.
+ *
+ * maxTops is the maximum number of locations of tops that can affect the top.
+ * It is only possible for the last maxTops tops to affect the final state -
+ * earlier ones can be safely removed. Also, any state before the max tops may
+ * be ignored.
+ *
+ * This code assumes/requires that there are not multiple tops at the same
+ * location in the queue. This code also assumes that it is not a multitop
+ * engine.
+ */
+static really_inline
void reduceInfixQueue(struct mq *q, s64a curr_loc, u32 maxTops, u32 maxAge) {
- assert(q->end > q->cur);
- assert(maxTops);
- u32 qlen = q->end - q->cur; /* includes MQE_START */
- DEBUG_PRINTF("q=%p, len=%u, maxTops=%u maxAge=%u\n", q, qlen, maxTops,
- maxAge);
-
+ assert(q->end > q->cur);
+ assert(maxTops);
+ u32 qlen = q->end - q->cur; /* includes MQE_START */
+ DEBUG_PRINTF("q=%p, len=%u, maxTops=%u maxAge=%u\n", q, qlen, maxTops,
+ maxAge);
+
if (!canReduceQueue(q, curr_loc, maxTops, maxAge)) {
- DEBUG_PRINTF("nothing to do\n");
- return;
- }
-
-#ifdef DEBUG
- debugQueue(q);
-#endif
-
- char drop_state = qlen - 1 >= maxTops
- || (maxAge && q->items[q->cur].location + maxAge < curr_loc);
-
- LIMIT_TO_AT_MOST(&maxTops, qlen - 1);
-
- // We leave our START where it is, at the front of the queue.
- assert(q->items[q->cur].type == MQE_START);
-
- // We want to shuffle maxQueueLen items from the end of the queue to just
- // after the start, effectively dequeuing old items. We could use memmove
- // for this, but it's probably not a good idea to take the cost of the
- // function call.
- const struct mq_item *src = &q->items[q->cur + qlen - maxTops];
-
- q->items[0] = q->items[q->cur]; /* shift start event to 0 slot */
- q->cur = 0;
- q->end = 1;
- struct mq_item *dst = &q->items[1];
- u32 i = 0;
- if (maxAge) {
- /* any event which is older than maxAge can be dropped */
- for (; i < maxTops; i++, src++) {
- if (src->location >= curr_loc - maxAge) {
- break;
- }
- }
- }
-
- for (; i < maxTops; i++) {
- *dst = *src;
- src++;
- dst++;
- q->end++;
- }
-
- if (drop_state) {
- /* clear state and shift start up to first top */
- s64a new_loc;
- if (q->end > 1) {
- new_loc = q->items[1].location;
- } else {
- DEBUG_PRINTF("no tops\n");
- new_loc = curr_loc;
- }
-
- DEBUG_PRINTF("advancing start from %lld to %lld\n",
- q->items[0].location, new_loc);
- assert(new_loc > q->items[0].location);
- q->items[0].location = new_loc;
- nfaQueueInitState(q->nfa, q);
- }
-
- DEBUG_PRINTF("reduced queue to len=%u\n", q->end - q->cur);
-#ifdef DEBUG
- debugQueue(q);
-#endif
-}
-
-#endif
+ DEBUG_PRINTF("nothing to do\n");
+ return;
+ }
+
+#ifdef DEBUG
+ debugQueue(q);
+#endif
+
+ char drop_state = qlen - 1 >= maxTops
+ || (maxAge && q->items[q->cur].location + maxAge < curr_loc);
+
+ LIMIT_TO_AT_MOST(&maxTops, qlen - 1);
+
+ // We leave our START where it is, at the front of the queue.
+ assert(q->items[q->cur].type == MQE_START);
+
+ // We want to shuffle maxQueueLen items from the end of the queue to just
+ // after the start, effectively dequeuing old items. We could use memmove
+ // for this, but it's probably not a good idea to take the cost of the
+ // function call.
+ const struct mq_item *src = &q->items[q->cur + qlen - maxTops];
+
+ q->items[0] = q->items[q->cur]; /* shift start event to 0 slot */
+ q->cur = 0;
+ q->end = 1;
+ struct mq_item *dst = &q->items[1];
+ u32 i = 0;
+ if (maxAge) {
+ /* any event which is older than maxAge can be dropped */
+ for (; i < maxTops; i++, src++) {
+ if (src->location >= curr_loc - maxAge) {
+ break;
+ }
+ }
+ }
+
+ for (; i < maxTops; i++) {
+ *dst = *src;
+ src++;
+ dst++;
+ q->end++;
+ }
+
+ if (drop_state) {
+ /* clear state and shift start up to first top */
+ s64a new_loc;
+ if (q->end > 1) {
+ new_loc = q->items[1].location;
+ } else {
+ DEBUG_PRINTF("no tops\n");
+ new_loc = curr_loc;
+ }
+
+ DEBUG_PRINTF("advancing start from %lld to %lld\n",
+ q->items[0].location, new_loc);
+ assert(new_loc > q->items[0].location);
+ q->items[0].location = new_loc;
+ nfaQueueInitState(q->nfa, q);
+ }
+
+ DEBUG_PRINTF("reduced queue to len=%u\n", q->end - q->cur);
+#ifdef DEBUG
+ debugQueue(q);
+#endif
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/init.c b/contrib/libs/hyperscan/src/rose/init.c
index 33f8ff5e6c..025ecca0d6 100644
--- a/contrib/libs/hyperscan/src/rose/init.c
+++ b/contrib/libs/hyperscan/src/rose/init.c
@@ -1,88 +1,88 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "init.h"
-#include "match.h"
-#include "runtime.h"
-#include "scratch.h"
-#include "rose.h"
-#include "rose_common.h"
-#include "rose_internal.h"
-#include "ue2common.h"
-#include "nfa/mcclellan.h"
-#include "nfa/nfa_api_util.h"
-#include "nfa/nfa_internal.h"
-#include "util/multibit.h"
-
-#include <string.h>
-
-static really_inline
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "init.h"
+#include "match.h"
+#include "runtime.h"
+#include "scratch.h"
+#include "rose.h"
+#include "rose_common.h"
+#include "rose_internal.h"
+#include "ue2common.h"
+#include "nfa/mcclellan.h"
+#include "nfa/nfa_api_util.h"
+#include "nfa/nfa_internal.h"
+#include "util/multibit.h"
+
+#include <string.h>
+
+static really_inline
void init_rstate(const struct RoseEngine *t, char *state) {
// Set runtime state: we take our initial groups from the RoseEngine.
- DEBUG_PRINTF("setting initial groups to 0x%016llx\n", t->initialGroups);
- storeGroups(t, state, t->initialGroups);
-}
-
-static really_inline
+ DEBUG_PRINTF("setting initial groups to 0x%016llx\n", t->initialGroups);
+ storeGroups(t, state, t->initialGroups);
+}
+
+static really_inline
void init_outfixes(const struct RoseEngine *t, char *state) {
- /* The active leaf array has been init'ed by the scatter with outfix
- * bits set on */
-
- // Init the NFA state for each outfix.
- for (u32 qi = t->outfixBeginQueue; qi < t->outfixEndQueue; qi++) {
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
- const struct NFA *nfa = getNfaByInfo(t, info);
- nfaInitCompressedState(nfa, 0, state + info->stateOffset,
- 0 /* assume NUL at start */);
- }
-
- if (t->initMpvNfa != MO_INVALID_IDX) {
- const struct NfaInfo *info = getNfaInfoByQueue(t, t->initMpvNfa);
- const struct NFA *nfa = getNfaByInfo(t, info);
- nfaInitCompressedState(nfa, 0, state + info->stateOffset,
- 0 /* assume NUL at start */);
- mmbit_set(getActiveLeafArray(t, state), t->activeArrayCount,
- t->initMpvNfa);
- }
-}
-
+ /* The active leaf array has been init'ed by the scatter with outfix
+ * bits set on */
+
+ // Init the NFA state for each outfix.
+ for (u32 qi = t->outfixBeginQueue; qi < t->outfixEndQueue; qi++) {
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+ const struct NFA *nfa = getNfaByInfo(t, info);
+ nfaInitCompressedState(nfa, 0, state + info->stateOffset,
+ 0 /* assume NUL at start */);
+ }
+
+ if (t->initMpvNfa != MO_INVALID_IDX) {
+ const struct NfaInfo *info = getNfaInfoByQueue(t, t->initMpvNfa);
+ const struct NFA *nfa = getNfaByInfo(t, info);
+ nfaInitCompressedState(nfa, 0, state + info->stateOffset,
+ 0 /* assume NUL at start */);
+ mmbit_set(getActiveLeafArray(t, state), t->activeArrayCount,
+ t->initMpvNfa);
+ }
+}
+
void roseInitState(const struct RoseEngine *t, char *state) {
- assert(t);
- assert(state);
-
+ assert(t);
+ assert(state);
+
DEBUG_PRINTF("init for Rose %p with %u state indices)\n", t,
t->rolesWithStateCount);
-
- // Rose is guaranteed 8-aligned state
- assert(ISALIGNED_N(state, 8));
-
- init_rstate(t, state);
-
- init_state(t, state);
- init_outfixes(t, state);
-}
+
+ // Rose is guaranteed 8-aligned state
+ assert(ISALIGNED_N(state, 8));
+
+ init_rstate(t, state);
+
+ init_state(t, state);
+ init_outfixes(t, state);
+}
diff --git a/contrib/libs/hyperscan/src/rose/init.h b/contrib/libs/hyperscan/src/rose/init.h
index 9ae0db2a08..b37053b261 100644
--- a/contrib/libs/hyperscan/src/rose/init.h
+++ b/contrib/libs/hyperscan/src/rose/init.h
@@ -1,46 +1,46 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_INIT_H
-#define ROSE_INIT_H
-
-#include "rose_internal.h"
-#include "ue2common.h"
-#include "util/scatter_runtime.h"
-
-/*
- * Initialisation code common to streaming mode Rose (init.c) and block mode
- * Rose (block.c) code.
- */
-
-static really_inline
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_INIT_H
+#define ROSE_INIT_H
+
+#include "rose_internal.h"
+#include "ue2common.h"
+#include "util/scatter_runtime.h"
+
+/*
+ * Initialisation code common to streaming mode Rose (init.c) and block mode
+ * Rose (block.c) code.
+ */
+
+static really_inline
void init_state(const struct RoseEngine *t, char *state) {
- scatter(state, t, &t->state_init);
-}
-
-#endif // ROSE_INIT_H
+ scatter(state, t, &t->state_init);
+}
+
+#endif // ROSE_INIT_H
diff --git a/contrib/libs/hyperscan/src/rose/match.c b/contrib/libs/hyperscan/src/rose/match.c
index c7f8189cd2..84d3b1fdc2 100644
--- a/contrib/libs/hyperscan/src/rose/match.c
+++ b/contrib/libs/hyperscan/src/rose/match.c
@@ -1,240 +1,240 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "catchup.h"
-#include "match.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "catchup.h"
+#include "match.h"
#include "program_runtime.h"
-#include "rose.h"
-#include "util/bitutils.h"
-#include "util/fatbit.h"
-
-#if defined(DEBUG) || defined(DUMP_SUPPORT)
-#include "util/compare.h"
-/** A debugging crutch: print a hex-escaped version of the match for our
- * perusal. The start and end offsets are stream offsets. */
-static UNUSED
-void printMatch(const struct core_info *ci, u64a start, u64a end) {
- assert(start <= end);
- assert(end <= ci->buf_offset + ci->len);
-
- printf("'");
- u64a i = start;
- for (; i <= MIN(ci->buf_offset, end); i++) {
- u64a h_idx = ci->buf_offset - i;
- u8 c = h_idx >= ci->hlen ? '?' : ci->hbuf[ci->hlen - h_idx - 1];
- if (ourisprint(c) && c != '\'') {
- printf("%c", c);
- } else {
- printf("\\x%02x", c);
- }
- }
- for (; i <= end; i++) {
- u64a b_idx = i - ci->buf_offset - 1;
- u8 c = b_idx >= ci->len ? '?' : ci->buf[b_idx];
- if (ourisprint(c) && c != '\'') {
- printf("%c", c);
- } else {
- printf("\\x%02x", c);
- }
- }
- printf("'");
-}
-#endif
-
+#include "rose.h"
+#include "util/bitutils.h"
+#include "util/fatbit.h"
+
+#if defined(DEBUG) || defined(DUMP_SUPPORT)
+#include "util/compare.h"
+/** A debugging crutch: print a hex-escaped version of the match for our
+ * perusal. The start and end offsets are stream offsets. */
+static UNUSED
+void printMatch(const struct core_info *ci, u64a start, u64a end) {
+ assert(start <= end);
+ assert(end <= ci->buf_offset + ci->len);
+
+ printf("'");
+ u64a i = start;
+ for (; i <= MIN(ci->buf_offset, end); i++) {
+ u64a h_idx = ci->buf_offset - i;
+ u8 c = h_idx >= ci->hlen ? '?' : ci->hbuf[ci->hlen - h_idx - 1];
+ if (ourisprint(c) && c != '\'') {
+ printf("%c", c);
+ } else {
+ printf("\\x%02x", c);
+ }
+ }
+ for (; i <= end; i++) {
+ u64a b_idx = i - ci->buf_offset - 1;
+ u8 c = b_idx >= ci->len ? '?' : ci->buf[b_idx];
+ if (ourisprint(c) && c != '\'') {
+ printf("%c", c);
+ } else {
+ printf("\\x%02x", c);
+ }
+ }
+ printf("'");
+}
+#endif
+
hwlmcb_rv_t roseDelayRebuildCallback(size_t end, u32 id,
struct hs_scratch *scratch) {
- struct RoseContext *tctx = &scratch->tctxt;
- struct core_info *ci = &scratch->core_info;
+ struct RoseContext *tctx = &scratch->tctxt;
+ struct core_info *ci = &scratch->core_info;
const struct RoseEngine *t = ci->rose;
- size_t rb_len = MIN(ci->hlen, t->delayRebuildLength);
-
- u64a real_end = ci->buf_offset - rb_len + end + 1; // index after last byte
-
-#ifdef DEBUG
+ size_t rb_len = MIN(ci->hlen, t->delayRebuildLength);
+
+ u64a real_end = ci->buf_offset - rb_len + end + 1; // index after last byte
+
+#ifdef DEBUG
DEBUG_PRINTF("REBUILD MATCH id=%u end offset@%llu]: ", id, real_end);
u64a start = real_end < 8 ? 1 : real_end - 7;
printMatch(ci, start, real_end);
- printf("\n");
-#endif
-
+ printf("\n");
+#endif
+
DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups);
-
+
assert(id && id < t->size); // id is a program offset
const u64a som = 0;
const u8 flags = 0;
UNUSED hwlmcb_rv_t rv =
roseRunProgram(t, scratch, id, som, real_end, flags);
assert(rv != HWLM_TERMINATE_MATCHING);
-
+
/* we are just repopulating the delay queue, groups should be
- * already set from the original scan. */
-
- return tctx->groups;
-}
-
-static really_inline
-hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t,
- struct hs_scratch *scratch, u32 qi, s64a loc,
+ * already set from the original scan. */
+
+ return tctx->groups;
+}
+
+static really_inline
+hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t,
+ struct hs_scratch *scratch, u32 qi, s64a loc,
char in_chained) {
return ensureQueueFlushed_i(t, scratch, qi, loc, 1, in_chained);
-}
-
+}
+
hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t,
struct hs_scratch *scratch, u32 event,
u64a top_squash_distance, u64a end,
char in_catchup) {
assert(event == MQE_TOP || event >= MQE_TOP_FIRST);
- struct core_info *ci = &scratch->core_info;
-
+ struct core_info *ci = &scratch->core_info;
+
u8 *aa = getActiveLeafArray(t, scratch->core_info.state);
- u32 aaCount = t->activeArrayCount;
- struct fatbit *activeQueues = scratch->aqa;
- u32 qCount = t->queueCount;
-
+ u32 aaCount = t->activeArrayCount;
+ struct fatbit *activeQueues = scratch->aqa;
+ u32 qCount = t->queueCount;
+
const u32 qi = 0; /* MPV is always queue 0 if it exists */
- struct mq *q = &scratch->queues[qi];
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
-
- s64a loc = (s64a)end - ci->buf_offset;
- assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen);
-
- if (!mmbit_set(aa, aaCount, qi)) {
+ struct mq *q = &scratch->queues[qi];
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+
+ s64a loc = (s64a)end - ci->buf_offset;
+ assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen);
+
+ if (!mmbit_set(aa, aaCount, qi)) {
initQueue(q, qi, t, scratch);
- nfaQueueInitState(q->nfa, q);
- pushQueueAt(q, 0, MQE_START, loc);
- fatbit_set(activeQueues, qCount, qi);
- } else if (info->no_retrigger) {
- DEBUG_PRINTF("yawn\n");
- /* nfa only needs one top; we can go home now */
- return HWLM_CONTINUE_MATCHING;
- } else if (!fatbit_set(activeQueues, qCount, qi)) {
+ nfaQueueInitState(q->nfa, q);
+ pushQueueAt(q, 0, MQE_START, loc);
+ fatbit_set(activeQueues, qCount, qi);
+ } else if (info->no_retrigger) {
+ DEBUG_PRINTF("yawn\n");
+ /* nfa only needs one top; we can go home now */
+ return HWLM_CONTINUE_MATCHING;
+ } else if (!fatbit_set(activeQueues, qCount, qi)) {
initQueue(q, qi, t, scratch);
- loadStreamState(q->nfa, q, 0);
- pushQueueAt(q, 0, MQE_START, 0);
- } else if (isQueueFull(q)) {
- DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi);
- /* we know it is a chained nfa and the suffixes/outfixes must already
- * be known to be consistent */
+ loadStreamState(q->nfa, q, 0);
+ pushQueueAt(q, 0, MQE_START, 0);
+ } else if (isQueueFull(q)) {
+ DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi);
+ /* we know it is a chained nfa and the suffixes/outfixes must already
+ * be known to be consistent */
if (ensureMpvQueueFlushed(t, scratch, qi, loc, in_catchup)
- == HWLM_TERMINATE_MATCHING) {
+ == HWLM_TERMINATE_MATCHING) {
DEBUG_PRINTF("terminating...\n");
- return HWLM_TERMINATE_MATCHING;
- }
- }
-
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
+
if (top_squash_distance) {
assert(q->cur < q->end);
- struct mq_item *last = &q->items[q->end - 1];
- if (last->type == event
+ struct mq_item *last = &q->items[q->end - 1];
+ if (last->type == event
&& last->location >= loc - (s64a)top_squash_distance) {
- last->location = loc;
- goto event_enqueued;
- }
- }
-
- pushQueue(q, event, loc);
-
-event_enqueued:
- if (q_cur_loc(q) == (s64a)ci->len) {
- /* we may not run the nfa; need to ensure state is fine */
- DEBUG_PRINTF("empty run\n");
- pushQueueNoMerge(q, MQE_END, loc);
- char alive = nfaQueueExec(q->nfa, q, loc);
- if (alive) {
+ last->location = loc;
+ goto event_enqueued;
+ }
+ }
+
+ pushQueue(q, event, loc);
+
+event_enqueued:
+ if (q_cur_loc(q) == (s64a)ci->len) {
+ /* we may not run the nfa; need to ensure state is fine */
+ DEBUG_PRINTF("empty run\n");
+ pushQueueNoMerge(q, MQE_END, loc);
+ char alive = nfaQueueExec(q->nfa, q, loc);
+ if (alive) {
scratch->tctxt.mpv_inactive = 0;
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, loc);
- } else {
- mmbit_unset(aa, aaCount, qi);
- fatbit_unset(scratch->aqa, qCount, qi);
- }
- }
-
- DEBUG_PRINTF("added mpv event at %lld\n", loc);
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, loc);
+ } else {
+ mmbit_unset(aa, aaCount, qi);
+ fatbit_unset(scratch->aqa, qCount, qi);
+ }
+ }
+
+ DEBUG_PRINTF("added mpv event at %lld\n", loc);
scratch->tctxt.next_mpv_offset = 0; /* the top event may result in matches
* earlier than expected */
- return HWLM_CONTINUE_MATCHING;
-}
-
+ return HWLM_CONTINUE_MATCHING;
+}
+
int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) {
struct hs_scratch *scratch = ctx;
assert(scratch && scratch->magic == SCRATCH_MAGIC);
struct RoseContext *tctxt = &scratch->tctxt;
- struct core_info *ci = &scratch->core_info;
+ struct core_info *ci = &scratch->core_info;
const struct RoseEngine *t = ci->rose;
-
- u64a real_end = ci->buf_offset + end; // index after last byte
-
- DEBUG_PRINTF("MATCH id=%u offsets=[???,%llu]\n", id, real_end);
+
+ u64a real_end = ci->buf_offset + end; // index after last byte
+
+ DEBUG_PRINTF("MATCH id=%u offsets=[???,%llu]\n", id, real_end);
DEBUG_PRINTF("STATE groups=0x%016llx\n", tctxt->groups);
-
+
if (can_stop_matching(scratch)) {
- DEBUG_PRINTF("received a match when we're already dead!\n");
- return MO_HALT_MATCHING;
- }
-
- /* delayed literals need to be delivered before real literals; however
- * delayed literals only come from the floating table so if we are going
- * to deliver a literal here it must be too early for a delayed literal */
-
- /* no history checks from anchored region and we are before the flush
- * boundary */
-
- if (real_end <= t->floatingMinLiteralMatchOffset) {
+ DEBUG_PRINTF("received a match when we're already dead!\n");
+ return MO_HALT_MATCHING;
+ }
+
+ /* delayed literals need to be delivered before real literals; however
+ * delayed literals only come from the floating table so if we are going
+ * to deliver a literal here it must be too early for a delayed literal */
+
+ /* no history checks from anchored region and we are before the flush
+ * boundary */
+
+ if (real_end <= t->floatingMinLiteralMatchOffset) {
roseFlushLastByteHistory(t, scratch, real_end);
- tctxt->lastEndOffset = real_end;
- }
-
+ tctxt->lastEndOffset = real_end;
+ }
+
// Note that the "id" we have been handed is the program offset.
const u8 flags = ROSE_PROG_FLAG_IN_ANCHORED;
if (roseRunProgram(t, scratch, id, start, real_end, flags)
== HWLM_TERMINATE_MATCHING) {
assert(can_stop_matching(scratch));
- DEBUG_PRINTF("caller requested termination\n");
- return MO_HALT_MATCHING;
- }
-
+ DEBUG_PRINTF("caller requested termination\n");
+ return MO_HALT_MATCHING;
+ }
+
DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups);
-
- return MO_CONTINUE_MATCHING;
-}
-
+
+ return MO_CONTINUE_MATCHING;
+}
+
/**
* \brief Run the program for the given literal ID, with the interpreter
* inlined into this call.
*
* Assumes not in_anchored.
*/
-static really_inline
+static really_inline
hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a end,
u32 id) {
- DEBUG_PRINTF("id=%u\n", id);
+ DEBUG_PRINTF("id=%u\n", id);
assert(id && id < t->size); // id is an offset into bytecode
const u64a som = 0;
const u8 flags = 0;
@@ -243,296 +243,296 @@ hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t,
} else {
return roseRunProgram(t, scratch, id, som, end, flags);
}
-}
-
-static rose_inline
+}
+
+static rose_inline
hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t,
struct hs_scratch *scratch,
struct fatbit **delaySlots, u32 vicIndex,
u64a offset) {
- /* assert(!tctxt->in_anchored); */
- assert(vicIndex < DELAY_SLOT_COUNT);
+ /* assert(!tctxt->in_anchored); */
+ assert(vicIndex < DELAY_SLOT_COUNT);
const struct fatbit *vicSlot = delaySlots[vicIndex];
u32 delay_count = t->delay_count;
-
+
if (offset < t->floatingMinLiteralMatchOffset) {
- DEBUG_PRINTF("too soon\n");
- return HWLM_CONTINUE_MATCHING;
- }
-
+ DEBUG_PRINTF("too soon\n");
+ return HWLM_CONTINUE_MATCHING;
+ }
+
struct RoseContext *tctxt = &scratch->tctxt;
roseFlushLastByteHistory(t, scratch, offset);
- tctxt->lastEndOffset = offset;
-
+ tctxt->lastEndOffset = offset;
+
const u32 *programs = getByOffset(t, t->delayProgramOffset);
-
+
for (u32 it = fatbit_iterate(vicSlot, delay_count, MMB_INVALID);
it != MMB_INVALID; it = fatbit_iterate(vicSlot, delay_count, it)) {
- UNUSED rose_group old_groups = tctxt->groups;
-
+ UNUSED rose_group old_groups = tctxt->groups;
+
DEBUG_PRINTF("DELAYED MATCH id=%u offset=%llu\n", it, offset);
const u64a som = 0;
const u8 flags = 0;
hwlmcb_rv_t rv = roseRunProgram(t, scratch, programs[it], som, offset,
flags);
DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups);
-
+
/* delayed literals can't safely set groups.
- * However we may be setting groups that successors already have
- * worked out that we don't need to match the group */
- DEBUG_PRINTF("groups in %016llx out %016llx\n", old_groups,
- tctxt->groups);
-
- if (rv == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- }
-
- return HWLM_CONTINUE_MATCHING;
-}
-
-static really_inline
+ * However we may be setting groups that successors already have
+ * worked out that we don't need to match the group */
+ DEBUG_PRINTF("groups in %016llx out %016llx\n", old_groups,
+ tctxt->groups);
+
+ if (rv == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
+static really_inline
hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t,
struct hs_scratch *scratch,
u32 curr_loc) {
struct RoseContext *tctxt = &scratch->tctxt;
struct fatbit *curr_row = getAnchoredLiteralLog(scratch)[curr_loc - 1];
u32 region_width = t->anchored_count;
-
+
const u32 *programs = getByOffset(t, t->anchoredProgramOffset);
- DEBUG_PRINTF("report matches at curr loc\n");
+ DEBUG_PRINTF("report matches at curr loc\n");
for (u32 it = fatbit_iterate(curr_row, region_width, MMB_INVALID);
it != MMB_INVALID; it = fatbit_iterate(curr_row, region_width, it)) {
- DEBUG_PRINTF("it = %u/%u\n", it, region_width);
-
- rose_group old_groups = tctxt->groups;
+ DEBUG_PRINTF("it = %u/%u\n", it, region_width);
+
+ rose_group old_groups = tctxt->groups;
DEBUG_PRINTF("ANCH REPLAY MATCH id=%u offset=%u\n", it, curr_loc);
const u64a som = 0;
const u8 flags = 0;
hwlmcb_rv_t rv = roseRunProgram(t, scratch, programs[it], som, curr_loc,
flags);
DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups);
-
+
/* anchored literals can't safely set groups.
* However we may be setting groups that successors already
- * have worked out that we don't need to match the group */
- DEBUG_PRINTF("groups in %016llx out %016llx\n", old_groups,
- tctxt->groups);
- tctxt->groups &= old_groups;
-
- if (rv == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- }
-
- /* clear row; does not invalidate iteration */
- bf64_unset(&scratch->al_log_sum, curr_loc - 1);
-
- return HWLM_CONTINUE_MATCHING;
-}
-
-static really_inline
+ * have worked out that we don't need to match the group */
+ DEBUG_PRINTF("groups in %016llx out %016llx\n", old_groups,
+ tctxt->groups);
+ tctxt->groups &= old_groups;
+
+ if (rv == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
+
+ /* clear row; does not invalidate iteration */
+ bf64_unset(&scratch->al_log_sum, curr_loc - 1);
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
+static really_inline
u32 anchored_it_begin(struct hs_scratch *scratch) {
struct RoseContext *tctxt = &scratch->tctxt;
- if (tctxt->lastEndOffset >= scratch->anchored_literal_region_len) {
- return MMB_INVALID;
- }
- u32 begin = tctxt->lastEndOffset;
- begin--;
-
+ if (tctxt->lastEndOffset >= scratch->anchored_literal_region_len) {
+ return MMB_INVALID;
+ }
+ u32 begin = tctxt->lastEndOffset;
+ begin--;
+
return bf64_iterate(scratch->al_log_sum, begin);
-}
-
-static really_inline
+}
+
+static really_inline
hwlmcb_rv_t flushAnchoredLiterals(const struct RoseEngine *t,
struct hs_scratch *scratch,
- u32 *anchored_it_param, u64a to_off) {
+ u32 *anchored_it_param, u64a to_off) {
struct RoseContext *tctxt = &scratch->tctxt;
- u32 anchored_it = *anchored_it_param;
- /* catch up any remaining anchored matches */
- for (; anchored_it != MMB_INVALID && anchored_it < to_off;
- anchored_it = bf64_iterate(scratch->al_log_sum, anchored_it)) {
- assert(anchored_it < scratch->anchored_literal_region_len);
- DEBUG_PRINTF("loc_it = %u\n", anchored_it);
- u32 curr_off = anchored_it + 1;
+ u32 anchored_it = *anchored_it_param;
+ /* catch up any remaining anchored matches */
+ for (; anchored_it != MMB_INVALID && anchored_it < to_off;
+ anchored_it = bf64_iterate(scratch->al_log_sum, anchored_it)) {
+ assert(anchored_it < scratch->anchored_literal_region_len);
+ DEBUG_PRINTF("loc_it = %u\n", anchored_it);
+ u32 curr_off = anchored_it + 1;
roseFlushLastByteHistory(t, scratch, curr_off);
- tctxt->lastEndOffset = curr_off;
-
+ tctxt->lastEndOffset = curr_off;
+
if (flushAnchoredLiteralAtLoc(t, scratch, curr_off)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- }
-
- *anchored_it_param = anchored_it;
- return HWLM_CONTINUE_MATCHING;
-}
-
-static really_inline
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
+
+ *anchored_it_param = anchored_it;
+ return HWLM_CONTINUE_MATCHING;
+}
+
+static really_inline
hwlmcb_rv_t playVictims(const struct RoseEngine *t, struct hs_scratch *scratch,
u32 *anchored_it, u64a lastEnd, u64a victimDelaySlots,
struct fatbit **delaySlots) {
- while (victimDelaySlots) {
- u32 vic = findAndClearLSB_64(&victimDelaySlots);
- DEBUG_PRINTF("vic = %u\n", vic);
- u64a vicOffset = vic + (lastEnd & ~(u64a)DELAY_MASK);
-
+ while (victimDelaySlots) {
+ u32 vic = findAndClearLSB_64(&victimDelaySlots);
+ DEBUG_PRINTF("vic = %u\n", vic);
+ u64a vicOffset = vic + (lastEnd & ~(u64a)DELAY_MASK);
+
if (flushAnchoredLiterals(t, scratch, anchored_it, vicOffset)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
-
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+
if (playDelaySlot(t, scratch, delaySlots, vic % DELAY_SLOT_COUNT,
vicOffset) == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- }
-
- return HWLM_CONTINUE_MATCHING;
-}
-
-/* call flushQueuedLiterals instead */
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
+/* call flushQueuedLiterals instead */
hwlmcb_rv_t flushQueuedLiterals_i(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a currEnd) {
struct RoseContext *tctxt = &scratch->tctxt;
- u64a lastEnd = tctxt->delayLastEndOffset;
- DEBUG_PRINTF("flushing backed up matches @%llu up from %llu\n", currEnd,
- lastEnd);
-
- assert(currEnd != lastEnd); /* checked in main entry point */
-
+ u64a lastEnd = tctxt->delayLastEndOffset;
+ DEBUG_PRINTF("flushing backed up matches @%llu up from %llu\n", currEnd,
+ lastEnd);
+
+ assert(currEnd != lastEnd); /* checked in main entry point */
+
u32 anchored_it = anchored_it_begin(scratch);
-
- if (!tctxt->filledDelayedSlots) {
- DEBUG_PRINTF("no delayed, no flush\n");
- goto anchored_leftovers;
- }
-
- {
+
+ if (!tctxt->filledDelayedSlots) {
+ DEBUG_PRINTF("no delayed, no flush\n");
+ goto anchored_leftovers;
+ }
+
+ {
struct fatbit **delaySlots = getDelaySlots(scratch);
-
- u32 lastIndex = lastEnd & DELAY_MASK;
- u32 currIndex = currEnd & DELAY_MASK;
-
- int wrapped = (lastEnd | DELAY_MASK) < currEnd;
-
- u64a victimDelaySlots; /* needs to be twice as wide as the number of
- * slots. */
-
- DEBUG_PRINTF("hello %08x\n", tctxt->filledDelayedSlots);
- if (!wrapped) {
- victimDelaySlots = tctxt->filledDelayedSlots;
-
- DEBUG_PRINTF("unwrapped %016llx %08x\n", victimDelaySlots,
- tctxt->filledDelayedSlots);
- /* index vars < 32 so 64bit shifts are safe */
-
- /* clear all slots at last index and below, */
- victimDelaySlots &= ~((1LLU << (lastIndex + 1)) - 1);
-
- /* clear all slots above curr index */
- victimDelaySlots &= (1LLU << (currIndex + 1)) - 1;
-
- tctxt->filledDelayedSlots &= ~victimDelaySlots;
-
- DEBUG_PRINTF("unwrapped %016llx %08x\n", victimDelaySlots,
- tctxt->filledDelayedSlots);
- } else {
- DEBUG_PRINTF("wrapped %08x\n", tctxt->filledDelayedSlots);
-
- /* 1st half: clear all slots at last index and below, */
- u64a first_half = tctxt->filledDelayedSlots;
- first_half &= ~((1ULL << (lastIndex + 1)) - 1);
- tctxt->filledDelayedSlots &= (1ULL << (lastIndex + 1)) - 1;
-
- u64a second_half = tctxt->filledDelayedSlots;
-
- if (currEnd > lastEnd + DELAY_SLOT_COUNT) {
- /* 2nd half: clear all slots above last index */
- second_half &= (1ULL << (lastIndex + 1)) - 1;
- } else {
- /* 2nd half: clear all slots above curr index */
- second_half &= (1ULL << (currIndex + 1)) - 1;
- }
- tctxt->filledDelayedSlots &= ~second_half;
-
- victimDelaySlots = first_half | (second_half << DELAY_SLOT_COUNT);
-
- DEBUG_PRINTF("-- %016llx %016llx = %016llx (li %u)\n", first_half,
- second_half, victimDelaySlots, lastIndex);
- }
-
+
+ u32 lastIndex = lastEnd & DELAY_MASK;
+ u32 currIndex = currEnd & DELAY_MASK;
+
+ int wrapped = (lastEnd | DELAY_MASK) < currEnd;
+
+ u64a victimDelaySlots; /* needs to be twice as wide as the number of
+ * slots. */
+
+ DEBUG_PRINTF("hello %08x\n", tctxt->filledDelayedSlots);
+ if (!wrapped) {
+ victimDelaySlots = tctxt->filledDelayedSlots;
+
+ DEBUG_PRINTF("unwrapped %016llx %08x\n", victimDelaySlots,
+ tctxt->filledDelayedSlots);
+ /* index vars < 32 so 64bit shifts are safe */
+
+ /* clear all slots at last index and below, */
+ victimDelaySlots &= ~((1LLU << (lastIndex + 1)) - 1);
+
+ /* clear all slots above curr index */
+ victimDelaySlots &= (1LLU << (currIndex + 1)) - 1;
+
+ tctxt->filledDelayedSlots &= ~victimDelaySlots;
+
+ DEBUG_PRINTF("unwrapped %016llx %08x\n", victimDelaySlots,
+ tctxt->filledDelayedSlots);
+ } else {
+ DEBUG_PRINTF("wrapped %08x\n", tctxt->filledDelayedSlots);
+
+ /* 1st half: clear all slots at last index and below, */
+ u64a first_half = tctxt->filledDelayedSlots;
+ first_half &= ~((1ULL << (lastIndex + 1)) - 1);
+ tctxt->filledDelayedSlots &= (1ULL << (lastIndex + 1)) - 1;
+
+ u64a second_half = tctxt->filledDelayedSlots;
+
+ if (currEnd > lastEnd + DELAY_SLOT_COUNT) {
+ /* 2nd half: clear all slots above last index */
+ second_half &= (1ULL << (lastIndex + 1)) - 1;
+ } else {
+ /* 2nd half: clear all slots above curr index */
+ second_half &= (1ULL << (currIndex + 1)) - 1;
+ }
+ tctxt->filledDelayedSlots &= ~second_half;
+
+ victimDelaySlots = first_half | (second_half << DELAY_SLOT_COUNT);
+
+ DEBUG_PRINTF("-- %016llx %016llx = %016llx (li %u)\n", first_half,
+ second_half, victimDelaySlots, lastIndex);
+ }
+
if (playVictims(t, scratch, &anchored_it, lastEnd, victimDelaySlots,
delaySlots) == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- }
-
-anchored_leftovers:;
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
+
+anchored_leftovers:;
hwlmcb_rv_t rv = flushAnchoredLiterals(t, scratch, &anchored_it, currEnd);
- tctxt->delayLastEndOffset = currEnd;
- return rv;
-}
-
+ tctxt->delayLastEndOffset = currEnd;
+ return rv;
+}
+
static really_inline
hwlmcb_rv_t roseCallback_i(size_t end, u32 id, struct hs_scratch *scratch) {
struct RoseContext *tctx = &scratch->tctxt;
const struct RoseEngine *t = scratch->core_info.rose;
- u64a real_end = end + tctx->lit_offset_adjust;
-
-#if defined(DEBUG)
+ u64a real_end = end + tctx->lit_offset_adjust;
+
+#if defined(DEBUG)
DEBUG_PRINTF("MATCH id=%u end offset@%llu: ", id, real_end);
u64a start = real_end < 8 ? 1 : real_end - 7;
printMatch(&scratch->core_info, start, real_end);
- printf("\n");
-#endif
- DEBUG_PRINTF("last end %llu\n", tctx->lastEndOffset);
-
+ printf("\n");
+#endif
+ DEBUG_PRINTF("last end %llu\n", tctx->lastEndOffset);
+
DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups);
-
+
if (can_stop_matching(scratch)) {
- DEBUG_PRINTF("received a match when we're already dead!\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
+ DEBUG_PRINTF("received a match when we're already dead!\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
hwlmcb_rv_t rv = flushQueuedLiterals(t, scratch, real_end);
- /* flushDelayed may have advanced tctx->lastEndOffset */
-
+ /* flushDelayed may have advanced tctx->lastEndOffset */
+
if (real_end >= t->floatingMinLiteralMatchOffset) {
roseFlushLastByteHistory(t, scratch, real_end);
- tctx->lastEndOffset = real_end;
- }
-
- if (rv == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
-
+ tctx->lastEndOffset = real_end;
+ }
+
+ if (rv == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+
rv = roseProcessMatchInline(t, scratch, real_end, id);
-
+
DEBUG_PRINTF("DONE groups=0x%016llx\n", tctx->groups);
-
- if (rv != HWLM_TERMINATE_MATCHING) {
- return tctx->groups;
- }
-
+
+ if (rv != HWLM_TERMINATE_MATCHING) {
+ return tctx->groups;
+ }
+
assert(can_stop_matching(scratch));
- DEBUG_PRINTF("user requested halt\n");
- return HWLM_TERMINATE_MATCHING;
-}
-
+ DEBUG_PRINTF("user requested halt\n");
+ return HWLM_TERMINATE_MATCHING;
+}
+
hwlmcb_rv_t roseCallback(size_t end, u32 id, struct hs_scratch *scratch) {
return roseCallback_i(end, id, scratch);
}
-
+
hwlmcb_rv_t roseFloatingCallback(size_t end, u32 id,
struct hs_scratch *scratch) {
const struct RoseEngine *t = scratch->core_info.rose;
-
+
return roseCallback_i(end, id, scratch) & t->floating_group_mask;
}
-
+
/**
* \brief Execute a boundary report program.
*
@@ -542,12 +542,12 @@ hwlmcb_rv_t roseFloatingCallback(size_t end, u32 id,
int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
u64a stream_offset, struct hs_scratch *scratch) {
DEBUG_PRINTF("running boundary program at offset %u\n", program);
-
+
if (can_stop_matching(scratch)) {
DEBUG_PRINTF("can stop matching\n");
return MO_HALT_MATCHING;
}
-
+
if (rose->hasSom && scratch->deduper.current_report_offset == ~0ULL) {
/* we cannot delay the initialization of the som deduper logs any longer
* as we are reporting matches. This is done explicitly as we are
@@ -557,13 +557,13 @@ int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
fatbit_clear(scratch->deduper.som_log[1]);
scratch->deduper.som_log_dirty = 0;
}
-
+
// Keep assertions in program report path happy. At offset zero, there can
// have been no earlier reports. At EOD, all earlier reports should have
// been handled and we will have been caught up to the stream offset by the
// time we are running boundary report programs.
scratch->tctxt.minMatchOffset = stream_offset;
-
+
const u64a som = 0;
const u8 flags = 0;
hwlmcb_rv_t rv = roseRunProgram(rose, scratch, program, som, stream_offset,
@@ -573,7 +573,7 @@ int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
}
return MO_CONTINUE_MATCHING;
-}
+}
/**
* \brief Execute a flush combination program.
diff --git a/contrib/libs/hyperscan/src/rose/match.h b/contrib/libs/hyperscan/src/rose/match.h
index b323213cde..c03b1ebbae 100644
--- a/contrib/libs/hyperscan/src/rose/match.h
+++ b/contrib/libs/hyperscan/src/rose/match.h
@@ -1,283 +1,283 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_MATCH_H
-#define ROSE_MATCH_H
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_MATCH_H
+#define ROSE_MATCH_H
+
#include "catchup.h"
-#include "runtime.h"
-#include "scratch.h"
+#include "runtime.h"
+#include "scratch.h"
#include "report.h"
-#include "rose_common.h"
-#include "rose_internal.h"
-#include "ue2common.h"
+#include "rose_common.h"
+#include "rose_internal.h"
+#include "ue2common.h"
#include "hwlm/hwlm.h"
-#include "nfa/nfa_api.h"
-#include "nfa/nfa_api_queue.h"
-#include "nfa/nfa_api_util.h"
-#include "som/som_runtime.h"
-#include "util/bitutils.h"
+#include "nfa/nfa_api.h"
+#include "nfa/nfa_api_queue.h"
+#include "nfa/nfa_api_util.h"
+#include "som/som_runtime.h"
+#include "util/bitutils.h"
#include "util/exhaust.h"
#include "util/fatbit.h"
-#include "util/multibit.h"
-
-/* Callbacks, defined in catchup.c */
-
+#include "util/multibit.h"
+
+/* Callbacks, defined in catchup.c */
+
int roseNfaAdaptor(u64a start, u64a end, ReportID id, void *context);
-
-/* Callbacks, defined in match.c */
-
+
+/* Callbacks, defined in match.c */
+
hwlmcb_rv_t roseCallback(size_t end, u32 id, struct hs_scratch *scratch);
hwlmcb_rv_t roseFloatingCallback(size_t end, u32 id,
struct hs_scratch *scratch);
hwlmcb_rv_t roseDelayRebuildCallback(size_t end, u32 id,
struct hs_scratch *scratch);
int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx);
-
-/* Common code, used all over Rose runtime */
-
+
+/* Common code, used all over Rose runtime */
+
hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t,
struct hs_scratch *scratch, u32 event,
u64a top_squash_distance, u64a end,
char in_catchup);
-
+
/** \brief Initialize the queue for a suffix/outfix engine. */
-static really_inline
-void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t,
+static really_inline
+void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t,
struct hs_scratch *scratch) {
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
- assert(scratch->fullState);
- q->nfa = getNfaByInfo(t, info);
- q->end = 0;
- q->cur = 0;
- q->state = scratch->fullState + info->fullStateOffset;
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+ assert(scratch->fullState);
+ q->nfa = getNfaByInfo(t, info);
+ q->end = 0;
+ q->cur = 0;
+ q->state = scratch->fullState + info->fullStateOffset;
q->streamState = scratch->core_info.state + info->stateOffset;
- q->offset = scratch->core_info.buf_offset;
- q->buffer = scratch->core_info.buf;
- q->length = scratch->core_info.len;
- q->history = scratch->core_info.hbuf;
- q->hlength = scratch->core_info.hlen;
+ q->offset = scratch->core_info.buf_offset;
+ q->buffer = scratch->core_info.buf;
+ q->length = scratch->core_info.len;
+ q->history = scratch->core_info.hbuf;
+ q->hlength = scratch->core_info.hlen;
q->cb = roseNfaAdaptor;
q->context = scratch;
- q->report_current = 0;
-
- DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
- "state=%u\n", qi, q->offset, info->fullStateOffset,
- info->stateOffset, *(u32 *)q->state);
-}
-
+ q->report_current = 0;
+
+ DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
+ "state=%u\n", qi, q->offset, info->fullStateOffset,
+ info->stateOffset, *(u32 *)q->state);
+}
+
/** \brief Initialize the queue for a leftfix (prefix/infix) engine. */
-static really_inline
-void initRoseQueue(const struct RoseEngine *t, u32 qi,
- const struct LeftNfaInfo *left,
+static really_inline
+void initRoseQueue(const struct RoseEngine *t, u32 qi,
+ const struct LeftNfaInfo *left,
struct hs_scratch *scratch) {
- struct mq *q = scratch->queues + qi;
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
- q->nfa = getNfaByInfo(t, info);
- q->end = 0;
- q->cur = 0;
- q->state = scratch->fullState + info->fullStateOffset;
-
- // Transient roses don't have stream state, we use tstate in scratch
- // instead. The only reason we need this at ALL is for LimEx extended
- // regions, which assume that they have access to q->streamState +
- // compressedStateSize.
- if (left->transient) {
- q->streamState = (char *)scratch->tstate + info->stateOffset;
- } else {
+ struct mq *q = scratch->queues + qi;
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+ q->nfa = getNfaByInfo(t, info);
+ q->end = 0;
+ q->cur = 0;
+ q->state = scratch->fullState + info->fullStateOffset;
+
+ // Transient roses don't have stream state, we use tstate in scratch
+ // instead. The only reason we need this at ALL is for LimEx extended
+ // regions, which assume that they have access to q->streamState +
+ // compressedStateSize.
+ if (left->transient) {
+ q->streamState = (char *)scratch->tstate + info->stateOffset;
+ } else {
q->streamState = scratch->core_info.state + info->stateOffset;
- }
-
- q->offset = scratch->core_info.buf_offset;
- q->buffer = scratch->core_info.buf;
- q->length = scratch->core_info.len;
- q->history = scratch->core_info.hbuf;
- q->hlength = scratch->core_info.hlen;
- q->cb = NULL;
- q->context = NULL;
- q->report_current = 0;
-
- DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
- "state=%u\n", qi, q->offset, info->fullStateOffset,
- info->stateOffset, *(u32 *)q->state);
-}
-
-/** returns 0 if space for two items (top and end) on the queue */
-static really_inline
-char isQueueFull(const struct mq *q) {
- return q->end + 2 > MAX_MQE_LEN;
-}
-
-static really_inline
-void loadStreamState(const struct NFA *nfa, struct mq *q, s64a loc) {
- DEBUG_PRINTF("offset=%llu, length=%zu, hlength=%zu, loc=%lld\n",
- q->offset, q->length, q->hlength, loc);
- nfaExpandState(nfa, q->state, q->streamState, q->offset + loc,
- queue_prev_byte(q, loc));
-}
-
-static really_inline
+ }
+
+ q->offset = scratch->core_info.buf_offset;
+ q->buffer = scratch->core_info.buf;
+ q->length = scratch->core_info.len;
+ q->history = scratch->core_info.hbuf;
+ q->hlength = scratch->core_info.hlen;
+ q->cb = NULL;
+ q->context = NULL;
+ q->report_current = 0;
+
+ DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
+ "state=%u\n", qi, q->offset, info->fullStateOffset,
+ info->stateOffset, *(u32 *)q->state);
+}
+
+/** returns 0 if space for two items (top and end) on the queue */
+static really_inline
+char isQueueFull(const struct mq *q) {
+ return q->end + 2 > MAX_MQE_LEN;
+}
+
+static really_inline
+void loadStreamState(const struct NFA *nfa, struct mq *q, s64a loc) {
+ DEBUG_PRINTF("offset=%llu, length=%zu, hlength=%zu, loc=%lld\n",
+ q->offset, q->length, q->hlength, loc);
+ nfaExpandState(nfa, q->state, q->streamState, q->offset + loc,
+ queue_prev_byte(q, loc));
+}
+
+static really_inline
void storeRoseDelay(const struct RoseEngine *t, char *state,
- const struct LeftNfaInfo *left, u32 loc) {
- u32 di = left->lagIndex;
- if (di == ROSE_OFFSET_INVALID) {
- return;
- }
-
- assert(loc < 256); // ONE WHOLE BYTE!
- DEBUG_PRINTF("storing rose delay %u in slot %u\n", loc, di);
- u8 *leftfixDelay = getLeftfixLagTable(t, state);
- assert(loc <= MAX_STORED_LEFTFIX_LAG);
- leftfixDelay[di] = loc;
-}
-
-static really_inline
+ const struct LeftNfaInfo *left, u32 loc) {
+ u32 di = left->lagIndex;
+ if (di == ROSE_OFFSET_INVALID) {
+ return;
+ }
+
+ assert(loc < 256); // ONE WHOLE BYTE!
+ DEBUG_PRINTF("storing rose delay %u in slot %u\n", loc, di);
+ u8 *leftfixDelay = getLeftfixLagTable(t, state);
+ assert(loc <= MAX_STORED_LEFTFIX_LAG);
+ leftfixDelay[di] = loc;
+}
+
+static really_inline
void setAsZombie(const struct RoseEngine *t, char *state,
- const struct LeftNfaInfo *left) {
- u32 di = left->lagIndex;
- assert(di != ROSE_OFFSET_INVALID);
- if (di == ROSE_OFFSET_INVALID) {
- return;
- }
-
- u8 *leftfixDelay = getLeftfixLagTable(t, state);
- leftfixDelay[di] = OWB_ZOMBIE_ALWAYS_YES;
-}
-
-/* loadRoseDelay MUST NOT be called on the first stream write as it is only
- * initialized for running nfas on stream boundaries */
-static really_inline
+ const struct LeftNfaInfo *left) {
+ u32 di = left->lagIndex;
+ assert(di != ROSE_OFFSET_INVALID);
+ if (di == ROSE_OFFSET_INVALID) {
+ return;
+ }
+
+ u8 *leftfixDelay = getLeftfixLagTable(t, state);
+ leftfixDelay[di] = OWB_ZOMBIE_ALWAYS_YES;
+}
+
+/* loadRoseDelay MUST NOT be called on the first stream write as it is only
+ * initialized for running nfas on stream boundaries */
+static really_inline
u32 loadRoseDelay(const struct RoseEngine *t, const char *state,
- const struct LeftNfaInfo *left) {
- u32 di = left->lagIndex;
- if (di == ROSE_OFFSET_INVALID) {
- return 0;
- }
-
- const u8 *leftfixDelay = getLeftfixLagTableConst(t, state);
- u32 loc = leftfixDelay[di];
- DEBUG_PRINTF("read rose delay %u from slot %u\n", loc, di);
- return loc;
-}
-
-static really_inline
+ const struct LeftNfaInfo *left) {
+ u32 di = left->lagIndex;
+ if (di == ROSE_OFFSET_INVALID) {
+ return 0;
+ }
+
+ const u8 *leftfixDelay = getLeftfixLagTableConst(t, state);
+ u32 loc = leftfixDelay[di];
+ DEBUG_PRINTF("read rose delay %u from slot %u\n", loc, di);
+ return loc;
+}
+
+static really_inline
char isZombie(const struct RoseEngine *t, const char *state,
- const struct LeftNfaInfo *left) {
- u32 di = left->lagIndex;
- assert(di != ROSE_OFFSET_INVALID);
- if (di == ROSE_OFFSET_INVALID) {
- return 0;
- }
-
- const u8 *leftfixDelay = getLeftfixLagTableConst(t, state);
- DEBUG_PRINTF("read owb %hhu from slot %u\n", leftfixDelay[di], di);
- return leftfixDelay[di] == OWB_ZOMBIE_ALWAYS_YES;
-}
-
+ const struct LeftNfaInfo *left) {
+ u32 di = left->lagIndex;
+ assert(di != ROSE_OFFSET_INVALID);
+ if (di == ROSE_OFFSET_INVALID) {
+ return 0;
+ }
+
+ const u8 *leftfixDelay = getLeftfixLagTableConst(t, state);
+ DEBUG_PRINTF("read owb %hhu from slot %u\n", leftfixDelay[di], di);
+ return leftfixDelay[di] == OWB_ZOMBIE_ALWAYS_YES;
+}
+
hwlmcb_rv_t flushQueuedLiterals_i(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a end);
-
-static really_inline
+
+static really_inline
hwlmcb_rv_t flushQueuedLiterals(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a end) {
struct RoseContext *tctxt = &scratch->tctxt;
- if (tctxt->delayLastEndOffset == end) {
- DEBUG_PRINTF("no progress, no flush\n");
- return HWLM_CONTINUE_MATCHING;
- }
-
+ if (tctxt->delayLastEndOffset == end) {
+ DEBUG_PRINTF("no progress, no flush\n");
+ return HWLM_CONTINUE_MATCHING;
+ }
+
if (!tctxt->filledDelayedSlots && !scratch->al_log_sum) {
- tctxt->delayLastEndOffset = end;
- return HWLM_CONTINUE_MATCHING;
- }
-
+ tctxt->delayLastEndOffset = end;
+ return HWLM_CONTINUE_MATCHING;
+ }
+
return flushQueuedLiterals_i(t, scratch, end);
-}
-
-static really_inline
+}
+
+static really_inline
hwlmcb_rv_t cleanUpDelayed(const struct RoseEngine *t,
struct hs_scratch *scratch, size_t length,
u64a offset) {
if (can_stop_matching(scratch)) {
- return HWLM_TERMINATE_MATCHING;
- }
-
+ return HWLM_TERMINATE_MATCHING;
+ }
+
if (flushQueuedLiterals(t, scratch, length + offset)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
-
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+
struct RoseContext *tctxt = &scratch->tctxt;
- if (tctxt->filledDelayedSlots) {
- DEBUG_PRINTF("dirty\n");
+ if (tctxt->filledDelayedSlots) {
+ DEBUG_PRINTF("dirty\n");
scratch->core_info.status |= STATUS_DELAY_DIRTY;
- } else {
+ } else {
scratch->core_info.status &= ~STATUS_DELAY_DIRTY;
- }
-
- tctxt->filledDelayedSlots = 0;
- tctxt->delayLastEndOffset = offset;
-
- return HWLM_CONTINUE_MATCHING;
-}
-
-static rose_inline
+ }
+
+ tctxt->filledDelayedSlots = 0;
+ tctxt->delayLastEndOffset = offset;
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
+static rose_inline
void roseFlushLastByteHistory(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a currEnd) {
- if (!t->lastByteHistoryIterOffset) {
- return;
- }
-
+ if (!t->lastByteHistoryIterOffset) {
+ return;
+ }
+
struct RoseContext *tctxt = &scratch->tctxt;
- struct core_info *ci = &scratch->core_info;
-
- /* currEnd is last byte of string + 1 */
- if (tctxt->lastEndOffset == ci->buf_offset + ci->len
- || currEnd != ci->buf_offset + ci->len) {
- /* already flushed or it is not yet time to flush */
- return;
- }
-
- DEBUG_PRINTF("flushing\n");
-
+ struct core_info *ci = &scratch->core_info;
+
+ /* currEnd is last byte of string + 1 */
+ if (tctxt->lastEndOffset == ci->buf_offset + ci->len
+ || currEnd != ci->buf_offset + ci->len) {
+ /* already flushed or it is not yet time to flush */
+ return;
+ }
+
+ DEBUG_PRINTF("flushing\n");
+
const struct mmbit_sparse_iter *it =
getByOffset(t, t->lastByteHistoryIterOffset);
assert(ISALIGNED(it));
- const u32 numStates = t->rolesWithStateCount;
+ const u32 numStates = t->rolesWithStateCount;
void *role_state = getRoleState(scratch->core_info.state);
-
+
struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
mmbit_sparse_iter_unset(role_state, numStates, it, si_state);
-}
-
+}
+
static rose_inline
int roseHasInFlightMatches(const struct RoseEngine *t, char *state,
const struct hs_scratch *scratch) {
@@ -380,4 +380,4 @@ hwlmcb_rv_t ensureQueueFlushed(const struct RoseEngine *t,
return ensureQueueFlushed_i(t, scratch, qi, loc, 0, 0);
}
-#endif
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/miracle.h b/contrib/libs/hyperscan/src/rose/miracle.h
index 28f61a2992..604c50205c 100644
--- a/contrib/libs/hyperscan/src/rose/miracle.h
+++ b/contrib/libs/hyperscan/src/rose/miracle.h
@@ -1,138 +1,138 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_MIRACLE_H
-#define ROSE_MIRACLE_H
-
-#include "ue2common.h"
-#include "runtime.h"
-#include "rose_internal.h"
-
-/** \brief Maximum number of bytes to scan when looking for a "miracle" stop
- * character. */
-#define MIRACLE_LEN_MAX 32
-
-static really_inline
-u64a roseMiracleScan(const u8 *stop, const u8 *d, const u8 *d_start) {
- assert(d >= d_start);
-
- // Note: unrolling this loop manually does appear to reduce its
- // performance. I'm sick of tilting at this particular windmill.
-
- u32 mshift = 0;
- do {
- u64a s = (u64a)stop[*d];
- if (s) {
- s <<= mshift;
- return s;
- }
- mshift++;
- } while (--d >= d_start);
- return 0;
-}
-
-/**
- * \brief "Miracle" scan: uses stop table to check if we can skip forward to a
- * location where we know that the given rose engine will be in a known state.
- *
- * Scans the buffer/history between relative locations \a begin_loc and \a
- * end_loc, and returns a miracle location (if any) that appears in the stream
- * after \a begin_loc.
- *
- * Returns 1 if some bytes can be skipped and sets \a miracle_loc
- * appropriately, 0 otherwise.
- */
-static rose_inline
-char roseMiracleOccurs(const struct RoseEngine *t,
- const struct LeftNfaInfo *left,
- const struct core_info *ci, const s64a begin_loc,
- const s64a end_loc, s64a *miracle_loc) {
- assert(!left->transient);
- assert(left->stopTable);
-
- DEBUG_PRINTF("looking for miracle over [%lld,%lld], maxLag=%u\n",
- begin_loc, end_loc, left->maxLag);
- DEBUG_PRINTF("ci->len=%zu, ci->hlen=%zu\n", ci->len, ci->hlen);
-
- assert(begin_loc <= end_loc);
- assert(begin_loc >= -(s64a)ci->hlen);
- assert(end_loc <= (s64a)ci->len);
-
- const u8 *stop = getByOffset(t, left->stopTable);
-
- const s64a scan_end_loc = end_loc - left->maxLag;
- if (scan_end_loc <= begin_loc) {
- DEBUG_PRINTF("nothing to scan\n");
- return 0;
- }
-
- const s64a start = MAX(begin_loc, scan_end_loc - MIRACLE_LEN_MAX);
- DEBUG_PRINTF("scan [%lld..%lld]\n", start, scan_end_loc);
-
- u64a s = 0; // state, on bits are miracle locations
-
- // Scan buffer.
- const s64a buf_scan_start = MAX(0, start);
- if (scan_end_loc > buf_scan_start) {
- const u8 *buf = ci->buf;
- const u8 *d = buf + scan_end_loc - 1;
- const u8 *d_start = buf + buf_scan_start;
- s = roseMiracleScan(stop, d, d_start);
- if (s) {
- goto miracle_found;
- }
- }
-
- // Scan history.
- if (start < 0) {
- const u8 *hbuf_end = ci->hbuf + ci->hlen;
- const u8 *d = hbuf_end + MIN(0, scan_end_loc) - 1;
- const u8 *d_start = hbuf_end + start;
- s = roseMiracleScan(stop, d, d_start);
- if (scan_end_loc > 0) {
- // Shift s over to account for the buffer scan above.
- s <<= scan_end_loc;
- }
- }
-
- if (s) {
- miracle_found:
- DEBUG_PRINTF("s=0x%llx, ctz=%u\n", s, ctz64(s));
- s64a loc = end_loc - left->maxLag - ctz64(s) - 1;
- if (loc > begin_loc) {
- DEBUG_PRINTF("miracle at %lld\n", loc);
- *miracle_loc = loc;
- return 1;
- }
- }
-
- DEBUG_PRINTF("no viable miraculous stop characters found\n");
- return 0;
-}
-
-#endif // ROSE_MIRACLE_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_MIRACLE_H
+#define ROSE_MIRACLE_H
+
+#include "ue2common.h"
+#include "runtime.h"
+#include "rose_internal.h"
+
+/** \brief Maximum number of bytes to scan when looking for a "miracle" stop
+ * character. */
+#define MIRACLE_LEN_MAX 32
+
+static really_inline
+u64a roseMiracleScan(const u8 *stop, const u8 *d, const u8 *d_start) {
+ assert(d >= d_start);
+
+ // Note: unrolling this loop manually does appear to reduce its
+ // performance. I'm sick of tilting at this particular windmill.
+
+ u32 mshift = 0;
+ do {
+ u64a s = (u64a)stop[*d];
+ if (s) {
+ s <<= mshift;
+ return s;
+ }
+ mshift++;
+ } while (--d >= d_start);
+ return 0;
+}
+
+/**
+ * \brief "Miracle" scan: uses stop table to check if we can skip forward to a
+ * location where we know that the given rose engine will be in a known state.
+ *
+ * Scans the buffer/history between relative locations \a begin_loc and \a
+ * end_loc, and returns a miracle location (if any) that appears in the stream
+ * after \a begin_loc.
+ *
+ * Returns 1 if some bytes can be skipped and sets \a miracle_loc
+ * appropriately, 0 otherwise.
+ */
+static rose_inline
+char roseMiracleOccurs(const struct RoseEngine *t,
+ const struct LeftNfaInfo *left,
+ const struct core_info *ci, const s64a begin_loc,
+ const s64a end_loc, s64a *miracle_loc) {
+ assert(!left->transient);
+ assert(left->stopTable);
+
+ DEBUG_PRINTF("looking for miracle over [%lld,%lld], maxLag=%u\n",
+ begin_loc, end_loc, left->maxLag);
+ DEBUG_PRINTF("ci->len=%zu, ci->hlen=%zu\n", ci->len, ci->hlen);
+
+ assert(begin_loc <= end_loc);
+ assert(begin_loc >= -(s64a)ci->hlen);
+ assert(end_loc <= (s64a)ci->len);
+
+ const u8 *stop = getByOffset(t, left->stopTable);
+
+ const s64a scan_end_loc = end_loc - left->maxLag;
+ if (scan_end_loc <= begin_loc) {
+ DEBUG_PRINTF("nothing to scan\n");
+ return 0;
+ }
+
+ const s64a start = MAX(begin_loc, scan_end_loc - MIRACLE_LEN_MAX);
+ DEBUG_PRINTF("scan [%lld..%lld]\n", start, scan_end_loc);
+
+ u64a s = 0; // state, on bits are miracle locations
+
+ // Scan buffer.
+ const s64a buf_scan_start = MAX(0, start);
+ if (scan_end_loc > buf_scan_start) {
+ const u8 *buf = ci->buf;
+ const u8 *d = buf + scan_end_loc - 1;
+ const u8 *d_start = buf + buf_scan_start;
+ s = roseMiracleScan(stop, d, d_start);
+ if (s) {
+ goto miracle_found;
+ }
+ }
+
+ // Scan history.
+ if (start < 0) {
+ const u8 *hbuf_end = ci->hbuf + ci->hlen;
+ const u8 *d = hbuf_end + MIN(0, scan_end_loc) - 1;
+ const u8 *d_start = hbuf_end + start;
+ s = roseMiracleScan(stop, d, d_start);
+ if (scan_end_loc > 0) {
+ // Shift s over to account for the buffer scan above.
+ s <<= scan_end_loc;
+ }
+ }
+
+ if (s) {
+ miracle_found:
+ DEBUG_PRINTF("s=0x%llx, ctz=%u\n", s, ctz64(s));
+ s64a loc = end_loc - left->maxLag - ctz64(s) - 1;
+ if (loc > begin_loc) {
+ DEBUG_PRINTF("miracle at %lld\n", loc);
+ *miracle_loc = loc;
+ return 1;
+ }
+ }
+
+ DEBUG_PRINTF("no viable miraculous stop characters found\n");
+ return 0;
+}
+
+#endif // ROSE_MIRACLE_H
diff --git a/contrib/libs/hyperscan/src/rose/rose.h b/contrib/libs/hyperscan/src/rose/rose.h
index e227f04292..409b70028f 100644
--- a/contrib/libs/hyperscan/src/rose/rose.h
+++ b/contrib/libs/hyperscan/src/rose/rose.h
@@ -1,62 +1,62 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_H
-#define ROSE_H
-
-#include "ue2common.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_H
+#define ROSE_H
+
+#include "ue2common.h"
+
struct RoseEngine;
struct hs_scratch;
-// Initialise state space for engine use.
+// Initialise state space for engine use.
void roseInitState(const struct RoseEngine *t, char *state);
-
+
/* assumes core_info in scratch has been init to point to data */
void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch);
-
-/* assumes core_info in scratch has been init to point to data */
+
+/* assumes core_info in scratch has been init to point to data */
void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch);
-
+
void roseStreamEodExec(const struct RoseEngine *t, u64a offset,
struct hs_scratch *scratch);
-
+
hwlmcb_rv_t roseCallback(size_t end, u32 id, struct hs_scratch *scratch);
-
+
int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context);
-
+
int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
u64a stream_offset, struct hs_scratch *scratch);
-
+
int roseRunFlushCombProgram(const struct RoseEngine *rose,
struct hs_scratch *scratch, u64a end);
int roseRunLastFlushCombProgram(const struct RoseEngine *rose,
struct hs_scratch *scratch, u64a end);
-#endif // ROSE_H
+#endif // ROSE_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_build.h b/contrib/libs/hyperscan/src/rose/rose_build.h
index 958eb0f7fa..ca3ba3696e 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build.h
@@ -1,143 +1,143 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose Build interface.
- *
- * Rose Build interface. Everything you ever needed to feed literals in and
- * get a RoseEngine out. This header should be everything needed by the rest
- * of UE2.
- */
-
-#ifndef ROSE_BUILD_H
-#define ROSE_BUILD_H
-
-#include "ue2common.h"
-#include "rose_common.h"
-#include "rose_in_graph.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose Build interface.
+ *
+ * Rose Build interface. Everything you ever needed to feed literals in and
+ * get a RoseEngine out. This header should be everything needed by the rest
+ * of UE2.
+ */
+
+#ifndef ROSE_BUILD_H
+#define ROSE_BUILD_H
+
+#include "ue2common.h"
+#include "rose_common.h"
+#include "rose_in_graph.h"
#include "util/bytecode_ptr.h"
-#include "util/charreach.h"
+#include "util/charreach.h"
#include "util/flat_containers.h"
#include "util/noncopyable.h"
-#include "util/ue2string.h"
-
-#include <memory>
-#include <set>
-#include <utility>
-#include <vector>
-
-struct NFA;
-struct SmallWriteEngine;
-struct RoseEngine;
-
-namespace ue2 {
-
-struct BoundaryReports;
-struct CompileContext;
-struct raw_puff;
-struct raw_som_dfa;
-class CharReach;
-class NGHolder;
-class ReportManager;
+#include "util/ue2string.h"
+
+#include <memory>
+#include <set>
+#include <utility>
+#include <vector>
+
+struct NFA;
+struct SmallWriteEngine;
+struct RoseEngine;
+
+namespace ue2 {
+
+struct BoundaryReports;
+struct CompileContext;
+struct raw_puff;
+struct raw_som_dfa;
+class CharReach;
+class NGHolder;
+class ReportManager;
class SmallWriteBuild;
-class SomSlotManager;
-
-class RoseDedupeAux {
-public:
- virtual ~RoseDedupeAux();
-
- /** \brief True if we can not establish that at most a single callback will
- * be generated at a given offset from this set of reports. */
+class SomSlotManager;
+
+class RoseDedupeAux {
+public:
+ virtual ~RoseDedupeAux();
+
+ /** \brief True if we can not establish that at most a single callback will
+ * be generated at a given offset from this set of reports. */
virtual bool requiresDedupeSupport(const flat_set<ReportID> &reports)
- const = 0;
-};
-
-/** \brief Abstract interface intended for callers from elsewhere in the tree,
- * real underlying implementation is RoseBuildImpl in rose_build_impl.h. */
+ const = 0;
+};
+
+/** \brief Abstract interface intended for callers from elsewhere in the tree,
+ * real underlying implementation is RoseBuildImpl in rose_build_impl.h. */
class RoseBuild : noncopyable {
-public:
- virtual ~RoseBuild();
-
- /** \brief Adds a single literal. */
- virtual void add(bool anchored, bool eod, const ue2_literal &lit,
+public:
+ virtual ~RoseBuild();
+
+ /** \brief Adds a single literal. */
+ virtual void add(bool anchored, bool eod, const ue2_literal &lit,
const flat_set<ReportID> &ids) = 0;
-
+
virtual bool addRose(const RoseInGraph &ig, bool prefilter) = 0;
- virtual bool addSombeRose(const RoseInGraph &ig) = 0;
-
- virtual bool addOutfix(const NGHolder &h) = 0;
- virtual bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) = 0;
- virtual bool addOutfix(const raw_puff &rp) = 0;
-
- virtual bool addChainTail(const raw_puff &rp, u32 *queue_out,
- u32 *event_out) = 0;
-
- /** \brief Returns true if we were able to add it as a mask. */
- virtual bool add(bool anchored, const std::vector<CharReach> &mask,
+ virtual bool addSombeRose(const RoseInGraph &ig) = 0;
+
+ virtual bool addOutfix(const NGHolder &h) = 0;
+ virtual bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) = 0;
+ virtual bool addOutfix(const raw_puff &rp) = 0;
+
+ virtual bool addChainTail(const raw_puff &rp, u32 *queue_out,
+ u32 *event_out) = 0;
+
+ /** \brief Returns true if we were able to add it as a mask. */
+ virtual bool add(bool anchored, const std::vector<CharReach> &mask,
const flat_set<ReportID> &reports) = 0;
-
- /** \brief Attempts to add the graph to the anchored acyclic table. Returns
- * true on success. */
- virtual bool addAnchoredAcyclic(const NGHolder &graph) = 0;
-
- virtual bool validateMask(const std::vector<CharReach> &mask,
+
+ /** \brief Attempts to add the graph to the anchored acyclic table. Returns
+ * true on success. */
+ virtual bool addAnchoredAcyclic(const NGHolder &graph) = 0;
+
+ virtual bool validateMask(const std::vector<CharReach> &mask,
const flat_set<ReportID> &reports,
- bool anchored, bool eod) const = 0;
- virtual void addMask(const std::vector<CharReach> &mask,
+ bool anchored, bool eod) const = 0;
+ virtual void addMask(const std::vector<CharReach> &mask,
const flat_set<ReportID> &reports, bool anchored,
- bool eod) = 0;
-
- /** \brief Construct a runtime implementation. */
+ bool eod) = 0;
+
+ /** \brief Construct a runtime implementation. */
virtual bytecode_ptr<RoseEngine> buildRose(u32 minWidth) = 0;
-
- virtual std::unique_ptr<RoseDedupeAux> generateDedupeAux() const = 0;
-
- /** Get a unique report identifier for a prefix|infix engine */
- virtual ReportID getNewNfaReport() = 0;
-
- /** Note that we have seen a SOM pattern. */
- virtual void setSom() = 0;
-};
-
-// Construct a usable Rose builder.
-std::unique_ptr<RoseBuild> makeRoseBuilder(ReportManager &rm,
- SomSlotManager &ssm,
+
+ virtual std::unique_ptr<RoseDedupeAux> generateDedupeAux() const = 0;
+
+ /** Get a unique report identifier for a prefix|infix engine */
+ virtual ReportID getNewNfaReport() = 0;
+
+ /** Note that we have seen a SOM pattern. */
+ virtual void setSom() = 0;
+};
+
+// Construct a usable Rose builder.
+std::unique_ptr<RoseBuild> makeRoseBuilder(ReportManager &rm,
+ SomSlotManager &ssm,
SmallWriteBuild &smwr,
- const CompileContext &cc,
- const BoundaryReports &boundary);
-
-bool roseCheckRose(const RoseInGraph &ig, bool prefilter,
- const ReportManager &rm, const CompileContext &cc);
-
-bool roseIsPureLiteral(const RoseEngine *t);
-
-size_t maxOverlap(const ue2_literal &a, const ue2_literal &b, u32 b_delay);
-
-} // namespace ue2
-
-#endif // ROSE_BUILD_H
+ const CompileContext &cc,
+ const BoundaryReports &boundary);
+
+bool roseCheckRose(const RoseInGraph &ig, bool prefilter,
+ const ReportManager &rm, const CompileContext &cc);
+
+bool roseIsPureLiteral(const RoseEngine *t);
+
+size_t maxOverlap(const ue2_literal &a, const ue2_literal &b, u32 b_delay);
+
+} // namespace ue2
+
+#endif // ROSE_BUILD_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_add.cpp b/contrib/libs/hyperscan/src/rose/rose_build_add.cpp
index 2dc136c0f1..4929c95fce 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_add.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_add.cpp
@@ -1,675 +1,675 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_add_internal.h"
-#include "rose_build_impl.h"
-
-#include "ue2common.h"
-#include "grey.h"
-#include "rose_build_anchored.h"
-#include "rose_in_util.h"
-#include "hwlm/hwlm_literal.h"
-#include "nfa/goughcompile.h"
-#include "nfa/nfa_api_queue.h"
-#include "nfagraph/ng_depth.h"
-#include "nfagraph/ng_dump.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_limex.h"
-#include "nfagraph/ng_mcclellan.h"
-#include "nfagraph/ng_prefilter.h"
-#include "nfagraph/ng_prune.h"
-#include "nfagraph/ng_region.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_reports.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_width.h"
-#include "util/charreach.h"
-#include "util/charreach_util.h"
-#include "util/compare.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_add_internal.h"
+#include "rose_build_impl.h"
+
+#include "ue2common.h"
+#include "grey.h"
+#include "rose_build_anchored.h"
+#include "rose_in_util.h"
+#include "hwlm/hwlm_literal.h"
+#include "nfa/goughcompile.h"
+#include "nfa/nfa_api_queue.h"
+#include "nfagraph/ng_depth.h"
+#include "nfagraph/ng_dump.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_mcclellan.h"
+#include "nfagraph/ng_prefilter.h"
+#include "nfagraph/ng_prune.h"
+#include "nfagraph/ng_region.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_reports.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
+#include "util/charreach.h"
+#include "util/charreach_util.h"
+#include "util/compare.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
#include "util/insertion_ordered.h"
-#include "util/make_unique.h"
+#include "util/make_unique.h"
#include "util/noncopyable.h"
-#include "util/order_check.h"
-#include "util/report_manager.h"
-#include "util/ue2string.h"
-#include "util/verify_types.h"
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-#include <utility>
-
-using namespace std;
-
-namespace ue2 {
-
-/**
- * \brief Data used by most of the construction code in this file.
- */
+#include "util/order_check.h"
+#include "util/report_manager.h"
+#include "util/ue2string.h"
+#include "util/verify_types.h"
+
+#include <algorithm>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+#include <utility>
+
+using namespace std;
+
+namespace ue2 {
+
+/**
+ * \brief Data used by most of the construction code in this file.
+ */
struct RoseBuildData : noncopyable {
- RoseBuildData(const RoseInGraph &ig_in, bool som_in)
- : ig(ig_in), som(som_in) {}
-
- /** Input rose graph. */
- const RoseInGraph &ig;
-
- /** Edges we've transformed (in \ref transformAnchoredLiteralOverlap) which
- * require ANCH history to prevent overlap. */
+ RoseBuildData(const RoseInGraph &ig_in, bool som_in)
+ : ig(ig_in), som(som_in) {}
+
+ /** Input rose graph. */
+ const RoseInGraph &ig;
+
+ /** Edges we've transformed (in \ref transformAnchoredLiteralOverlap) which
+ * require ANCH history to prevent overlap. */
unordered_set<RoseInEdge> anch_history_edges;
-
- /** True if we're tracking Start of Match. */
- bool som;
-};
-
-static
-ReportID findReportId(const NGHolder &g) {
- /* prefix/infix always have an edge to accept and only 1 reportid initially
- */
- assert(in_degree(g.accept, g));
- const auto &rep = g[*inv_adjacent_vertices(g.accept, g).first].reports;
- assert(!rep.empty());
- return *rep.begin();
-}
-
-static
-RoseVertex createVertex(RoseBuildImpl *build, u32 literalId, u32 min_offset,
- u32 max_offset) {
- RoseGraph &g = build->g;
- // add to tree
- RoseVertex v = add_vertex(g);
- g[v].min_offset = min_offset;
- g[v].max_offset = max_offset;
-
+
+ /** True if we're tracking Start of Match. */
+ bool som;
+};
+
+static
+ReportID findReportId(const NGHolder &g) {
+ /* prefix/infix always have an edge to accept and only 1 reportid initially
+ */
+ assert(in_degree(g.accept, g));
+ const auto &rep = g[*inv_adjacent_vertices(g.accept, g).first].reports;
+ assert(!rep.empty());
+ return *rep.begin();
+}
+
+static
+RoseVertex createVertex(RoseBuildImpl *build, u32 literalId, u32 min_offset,
+ u32 max_offset) {
+ RoseGraph &g = build->g;
+ // add to tree
+ RoseVertex v = add_vertex(g);
+ g[v].min_offset = min_offset;
+ g[v].max_offset = max_offset;
+
DEBUG_PRINTF("insert vertex %zu into literal %u's vertex set\n", g[v].index,
- literalId);
- g[v].literals.insert(literalId);
- build->literal_info[literalId].vertices.insert(v);
-
- return v;
-}
-
-RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent,
- u32 minBound, u32 maxBound, u32 literalId,
- size_t literalLength,
+ literalId);
+ g[v].literals.insert(literalId);
+ build->literal_info[literalId].vertices.insert(v);
+
+ return v;
+}
+
+RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent,
+ u32 minBound, u32 maxBound, u32 literalId,
+ size_t literalLength,
const flat_set<ReportID> &reports) {
- assert(parent != RoseGraph::null_vertex());
-
- RoseGraph &g = build->g;
- // add to tree (offsets set latter)
- RoseVertex v = createVertex(build, literalId, 0U, 0U);
-
- /* fill in report information */
- g[v].reports.insert(reports.begin(), reports.end());
-
+ assert(parent != RoseGraph::null_vertex());
+
+ RoseGraph &g = build->g;
+ // add to tree (offsets set latter)
+ RoseVertex v = createVertex(build, literalId, 0U, 0U);
+
+ /* fill in report information */
+ g[v].reports.insert(reports.begin(), reports.end());
+
RoseEdge e = add_edge(parent, v, g);
- DEBUG_PRINTF("adding edge (%u, %u) to parent\n", minBound, maxBound);
-
- g[e].minBound = minBound;
- g[e].maxBound = maxBound;
- g[e].rose_top = 0;
-
- u32 min_offset = add_rose_depth(g[parent].min_offset, minBound);
- u32 max_offset = add_rose_depth(g[parent].max_offset, maxBound);
-
- /* take literal length into account for offsets */
- const u32 lit_len = verify_u32(literalLength);
- min_offset = add_rose_depth(min_offset, lit_len);
- max_offset = add_rose_depth(max_offset, lit_len);
-
- g[v].min_offset = min_offset;
- g[v].max_offset = max_offset;
-
- return v;
-}
-
-static
-RoseVertex createAnchoredVertex(RoseBuildImpl *build, u32 literalId,
- u32 min_offset, u32 max_offset) {
- RoseGraph &g = build->g;
- RoseVertex v = createVertex(build, literalId, min_offset, max_offset);
-
+ DEBUG_PRINTF("adding edge (%u, %u) to parent\n", minBound, maxBound);
+
+ g[e].minBound = minBound;
+ g[e].maxBound = maxBound;
+ g[e].rose_top = 0;
+
+ u32 min_offset = add_rose_depth(g[parent].min_offset, minBound);
+ u32 max_offset = add_rose_depth(g[parent].max_offset, maxBound);
+
+ /* take literal length into account for offsets */
+ const u32 lit_len = verify_u32(literalLength);
+ min_offset = add_rose_depth(min_offset, lit_len);
+ max_offset = add_rose_depth(max_offset, lit_len);
+
+ g[v].min_offset = min_offset;
+ g[v].max_offset = max_offset;
+
+ return v;
+}
+
+static
+RoseVertex createAnchoredVertex(RoseBuildImpl *build, u32 literalId,
+ u32 min_offset, u32 max_offset) {
+ RoseGraph &g = build->g;
+ RoseVertex v = createVertex(build, literalId, min_offset, max_offset);
+
DEBUG_PRINTF("created anchored vertex %zu with lit id %u\n", g[v].index,
- literalId);
-
+ literalId);
+
RoseEdge e = add_edge(build->anchored_root, v, g);
- g[e].minBound = min_offset;
- g[e].maxBound = max_offset;
-
- return v;
-}
-
-static
-RoseVertex duplicate(RoseBuildImpl *build, RoseVertex v) {
- RoseGraph &g = build->g;
- RoseVertex w = add_vertex(g[v], g);
+ g[e].minBound = min_offset;
+ g[e].maxBound = max_offset;
+
+ return v;
+}
+
+static
+RoseVertex duplicate(RoseBuildImpl *build, RoseVertex v) {
+ RoseGraph &g = build->g;
+ RoseVertex w = add_vertex(g[v], g);
DEBUG_PRINTF("added vertex %zu\n", g[w].index);
-
- for (auto lit_id : g[w].literals) {
- build->literal_info[lit_id].vertices.insert(w);
- }
-
- for (const auto &e : in_edges_range(v, g)) {
- RoseVertex s = source(e, g);
- add_edge(s, w, g[e], g);
+
+ for (auto lit_id : g[w].literals) {
+ build->literal_info[lit_id].vertices.insert(w);
+ }
+
+ for (const auto &e : in_edges_range(v, g)) {
+ RoseVertex s = source(e, g);
+ add_edge(s, w, g[e], g);
DEBUG_PRINTF("added edge (%zu,%zu)\n", g[s].index, g[w].index);
- }
-
- return w;
-}
-
-namespace {
-struct created_key {
- explicit created_key(const RoseInEdgeProps &trep)
+ }
+
+ return w;
+}
+
+namespace {
+struct created_key {
+ explicit created_key(const RoseInEdgeProps &trep)
: prefix(trep.graph.get()), lag(trep.graph_lag) {
- }
- bool operator<(const created_key &b) const {
- const created_key &a = *this;
- ORDER_CHECK(prefix);
- ORDER_CHECK(lag);
- return false;
- }
- NGHolder *prefix;
- u32 lag;
-};
-}
-
-static
-bool isPureAnchored(const NGHolder &h) {
- return !proper_out_degree(h.startDs, h);
-}
-
-static
-RoseRoleHistory selectHistory(const RoseBuildImpl &tbi, const RoseBuildData &bd,
- const RoseInEdge &rose_edge, const RoseEdge &e) {
- const RoseGraph &g = tbi.g;
- const RoseVertex u = source(e, g), v = target(e, g);
- const bool fixed_offset_src = g[u].fixedOffset();
- const bool has_bounds = g[e].minBound || (g[e].maxBound != ROSE_BOUND_INF);
-
- DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n",
+ }
+ bool operator<(const created_key &b) const {
+ const created_key &a = *this;
+ ORDER_CHECK(prefix);
+ ORDER_CHECK(lag);
+ return false;
+ }
+ NGHolder *prefix;
+ u32 lag;
+};
+}
+
+static
+bool isPureAnchored(const NGHolder &h) {
+ return !proper_out_degree(h.startDs, h);
+}
+
+static
+RoseRoleHistory selectHistory(const RoseBuildImpl &tbi, const RoseBuildData &bd,
+ const RoseInEdge &rose_edge, const RoseEdge &e) {
+ const RoseGraph &g = tbi.g;
+ const RoseVertex u = source(e, g), v = target(e, g);
+ const bool fixed_offset_src = g[u].fixedOffset();
+ const bool has_bounds = g[e].minBound || (g[e].maxBound != ROSE_BOUND_INF);
+
+ DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n",
g[u].index, g[v].index, g[e].minBound, g[e].maxBound,
- (int)g[u].fixedOffset(), (int)g[v].left);
-
- if (g[v].left) {
- // Roles with prefix engines have their history handled by that prefix.
- assert(!contains(bd.anch_history_edges, rose_edge));
- return ROSE_ROLE_HISTORY_NONE;
- }
-
- if (contains(bd.anch_history_edges, rose_edge)) {
- DEBUG_PRINTF("needs anch history\n");
- return ROSE_ROLE_HISTORY_ANCH;
- }
-
- if (fixed_offset_src && has_bounds) {
- DEBUG_PRINTF("needs anch history\n");
- return ROSE_ROLE_HISTORY_ANCH;
- }
-
- return ROSE_ROLE_HISTORY_NONE;
-}
-
-static
-bool hasSuccessorLiterals(RoseInVertex iv, const RoseInGraph &ig) {
- for (auto v : adjacent_vertices_range(iv, ig)) {
- if (ig[v].type != RIV_ACCEPT) {
- return true;
- }
- }
- return false;
-}
-
-static
-void createVertices(RoseBuildImpl *tbi,
- map<RoseInVertex, vector<RoseVertex> > &vertex_map,
- const vector<pair<RoseVertex, RoseInEdge> > &parents,
- RoseInVertex iv, u32 min_offset, u32 max_offset,
- u32 literalId, u32 delay, const RoseBuildData &bd) {
- RoseGraph &g = tbi->g;
-
- DEBUG_PRINTF("vertex has %zu parents\n", parents.size());
-
- map<created_key, RoseVertex> created;
-
- for (const auto &pv : parents) {
- RoseVertex w;
- const RoseInEdgeProps &edge_props = bd.ig[pv.second];
- shared_ptr<NGHolder> prefix_graph = edge_props.graph;
- u32 prefix_lag = edge_props.graph_lag;
-
- created_key key(edge_props);
-
- if (!contains(created, key)) {
- assert(prefix_graph || !edge_props.haig);
- w = createVertex(tbi, literalId, min_offset, max_offset);
- created[key] = w;
-
- if (prefix_graph) {
- g[w].left.graph = prefix_graph;
+ (int)g[u].fixedOffset(), (int)g[v].left);
+
+ if (g[v].left) {
+ // Roles with prefix engines have their history handled by that prefix.
+ assert(!contains(bd.anch_history_edges, rose_edge));
+ return ROSE_ROLE_HISTORY_NONE;
+ }
+
+ if (contains(bd.anch_history_edges, rose_edge)) {
+ DEBUG_PRINTF("needs anch history\n");
+ return ROSE_ROLE_HISTORY_ANCH;
+ }
+
+ if (fixed_offset_src && has_bounds) {
+ DEBUG_PRINTF("needs anch history\n");
+ return ROSE_ROLE_HISTORY_ANCH;
+ }
+
+ return ROSE_ROLE_HISTORY_NONE;
+}
+
+static
+bool hasSuccessorLiterals(RoseInVertex iv, const RoseInGraph &ig) {
+ for (auto v : adjacent_vertices_range(iv, ig)) {
+ if (ig[v].type != RIV_ACCEPT) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static
+void createVertices(RoseBuildImpl *tbi,
+ map<RoseInVertex, vector<RoseVertex> > &vertex_map,
+ const vector<pair<RoseVertex, RoseInEdge> > &parents,
+ RoseInVertex iv, u32 min_offset, u32 max_offset,
+ u32 literalId, u32 delay, const RoseBuildData &bd) {
+ RoseGraph &g = tbi->g;
+
+ DEBUG_PRINTF("vertex has %zu parents\n", parents.size());
+
+ map<created_key, RoseVertex> created;
+
+ for (const auto &pv : parents) {
+ RoseVertex w;
+ const RoseInEdgeProps &edge_props = bd.ig[pv.second];
+ shared_ptr<NGHolder> prefix_graph = edge_props.graph;
+ u32 prefix_lag = edge_props.graph_lag;
+
+ created_key key(edge_props);
+
+ if (!contains(created, key)) {
+ assert(prefix_graph || !edge_props.haig);
+ w = createVertex(tbi, literalId, min_offset, max_offset);
+ created[key] = w;
+
+ if (prefix_graph) {
+ g[w].left.graph = prefix_graph;
if (edge_props.dfa) {
g[w].left.dfa = edge_props.dfa;
- }
- g[w].left.haig = edge_props.haig;
- g[w].left.lag = prefix_lag;
-
- // The graph already has its report id allocated - find it.
- g[w].left.leftfix_report = findReportId(*prefix_graph);
-
- if (g[w].left.dfa || g[w].left.haig) {
- assert(prefix_graph);
- g[w].left.dfa_min_width = findMinWidth(*prefix_graph);
- g[w].left.dfa_max_width = findMaxWidth(*prefix_graph);
- }
- }
-
- if (bd.som && !g[w].left.haig) {
- /* no prefix - som based on literal start */
- assert(!prefix_graph);
+ }
+ g[w].left.haig = edge_props.haig;
+ g[w].left.lag = prefix_lag;
+
+ // The graph already has its report id allocated - find it.
+ g[w].left.leftfix_report = findReportId(*prefix_graph);
+
+ if (g[w].left.dfa || g[w].left.haig) {
+ assert(prefix_graph);
+ g[w].left.dfa_min_width = findMinWidth(*prefix_graph);
+ g[w].left.dfa_max_width = findMaxWidth(*prefix_graph);
+ }
+ }
+
+ if (bd.som && !g[w].left.haig) {
+ /* no prefix - som based on literal start */
+ assert(!prefix_graph);
g[w].som_adjust = tbi->literals.at(literalId).elength();
- DEBUG_PRINTF("set som_adjust to %u\n", g[w].som_adjust);
- }
-
+ DEBUG_PRINTF("set som_adjust to %u\n", g[w].som_adjust);
+ }
+
DEBUG_PRINTF(" adding new vertex index=%zu\n", tbi->g[w].index);
- vertex_map[iv].push_back(w);
- } else {
- w = created[key];
- }
-
+ vertex_map[iv].push_back(w);
+ } else {
+ w = created[key];
+ }
+
RoseVertex p = pv.first;
-
+
RoseEdge e = add_edge(p, w, g);
- DEBUG_PRINTF("adding edge (%u,%u) to parent\n", edge_props.minBound,
- edge_props.maxBound);
- g[e].minBound = edge_props.minBound;
- if (p != tbi->root && g[w].left.graph
- && (!tbi->isAnyStart(p) || isPureAnchored(*g[w].left.graph))) {
- depth mw = findMaxWidth(*g[w].left.graph);
- if (mw.is_infinite()) {
- g[e].maxBound = ROSE_BOUND_INF;
- } else {
- DEBUG_PRINTF("setting max to %s + %u\n", mw.str().c_str(),
- prefix_lag);
- g[e].maxBound = prefix_lag + mw;
- }
- } else {
- g[e].maxBound = edge_props.maxBound;
- }
- g[e].rose_top = 0;
- g[e].history = selectHistory(*tbi, bd, pv.second, e);
- }
-
- if (delay && hasSuccessorLiterals(iv, bd.ig)) {
- // Add an undelayed "ghost" vertex for this literal.
- u32 ghostId = tbi->literal_info[literalId].undelayed_id;
- DEBUG_PRINTF("creating delay ghost vertex, id=%u\n", ghostId);
- assert(ghostId != literalId);
+ DEBUG_PRINTF("adding edge (%u,%u) to parent\n", edge_props.minBound,
+ edge_props.maxBound);
+ g[e].minBound = edge_props.minBound;
+ if (p != tbi->root && g[w].left.graph
+ && (!tbi->isAnyStart(p) || isPureAnchored(*g[w].left.graph))) {
+ depth mw = findMaxWidth(*g[w].left.graph);
+ if (mw.is_infinite()) {
+ g[e].maxBound = ROSE_BOUND_INF;
+ } else {
+ DEBUG_PRINTF("setting max to %s + %u\n", mw.str().c_str(),
+ prefix_lag);
+ g[e].maxBound = prefix_lag + mw;
+ }
+ } else {
+ g[e].maxBound = edge_props.maxBound;
+ }
+ g[e].rose_top = 0;
+ g[e].history = selectHistory(*tbi, bd, pv.second, e);
+ }
+
+ if (delay && hasSuccessorLiterals(iv, bd.ig)) {
+ // Add an undelayed "ghost" vertex for this literal.
+ u32 ghostId = tbi->literal_info[literalId].undelayed_id;
+ DEBUG_PRINTF("creating delay ghost vertex, id=%u\n", ghostId);
+ assert(ghostId != literalId);
assert(tbi->literals.at(ghostId).delay == 0);
-
- // Adjust offsets, removing delay.
- u32 ghost_min = min_offset, ghost_max = max_offset;
- assert(ghost_min < ROSE_BOUND_INF && ghost_min >= delay);
- ghost_min -= delay;
- ghost_max -= ghost_max == ROSE_BOUND_INF ? 0 : delay;
-
- RoseVertex g_v = createVertex(tbi, ghostId, ghost_min, ghost_max);
-
- for (const auto &pv : parents) {
- const RoseInEdgeProps &edge_props = bd.ig[pv.second];
+
+ // Adjust offsets, removing delay.
+ u32 ghost_min = min_offset, ghost_max = max_offset;
+ assert(ghost_min < ROSE_BOUND_INF && ghost_min >= delay);
+ ghost_min -= delay;
+ ghost_max -= ghost_max == ROSE_BOUND_INF ? 0 : delay;
+
+ RoseVertex g_v = createVertex(tbi, ghostId, ghost_min, ghost_max);
+
+ for (const auto &pv : parents) {
+ const RoseInEdgeProps &edge_props = bd.ig[pv.second];
RoseEdge e = add_edge(pv.first, g_v, tbi->g);
- g[e].minBound = edge_props.minBound;
- g[e].maxBound = edge_props.maxBound;
- g[e].history = selectHistory(*tbi, bd, pv.second, e);
- DEBUG_PRINTF("parent edge has bounds [%u,%u]\n",
- edge_props.minBound, edge_props.maxBound);
- }
-
- for (auto &m : created) {
- tbi->ghost[m.second] = g_v;
- }
- }
-}
-
-/* ensure the holder does not accept any paths which do not end with lit */
-static
-void removeFalsePaths(NGHolder &g, const ue2_literal &lit) {
+ g[e].minBound = edge_props.minBound;
+ g[e].maxBound = edge_props.maxBound;
+ g[e].history = selectHistory(*tbi, bd, pv.second, e);
+ DEBUG_PRINTF("parent edge has bounds [%u,%u]\n",
+ edge_props.minBound, edge_props.maxBound);
+ }
+
+ for (auto &m : created) {
+ tbi->ghost[m.second] = g_v;
+ }
+ }
+}
+
+/* ensure the holder does not accept any paths which do not end with lit */
+static
+void removeFalsePaths(NGHolder &g, const ue2_literal &lit) {
DEBUG_PRINTF("strip '%s'\n", dumpString(lit).c_str());
- set<NFAVertex> curr, next;
- curr.insert(g.accept);
- curr.insert(g.acceptEod);
-
- for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
- next.clear();
- for (auto curr_v : curr) {
+ set<NFAVertex> curr, next;
+ curr.insert(g.accept);
+ curr.insert(g.acceptEod);
+
+ for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
+ next.clear();
+ for (auto curr_v : curr) {
DEBUG_PRINTF("handling %zu\n", g[curr_v].index);
- vector<NFAVertex> next_cand;
- insert(&next_cand, next_cand.end(),
- inv_adjacent_vertices(curr_v, g));
- clear_in_edges(curr_v, g);
- if (curr_v == g.acceptEod) {
- add_edge(g.accept, g.acceptEod, g);
- }
-
- for (auto v : next_cand) {
- assert(v != g.startDs);
- if (v == g.start || v == g.startDs || v == g.accept) {
- continue;
- }
-
- const CharReach &cr = g[v].char_reach;
-
- if (!overlaps(*it, cr)) {
+ vector<NFAVertex> next_cand;
+ insert(&next_cand, next_cand.end(),
+ inv_adjacent_vertices(curr_v, g));
+ clear_in_edges(curr_v, g);
+ if (curr_v == g.acceptEod) {
+ add_edge(g.accept, g.acceptEod, g);
+ }
+
+ for (auto v : next_cand) {
+ assert(v != g.startDs);
+ if (v == g.start || v == g.startDs || v == g.accept) {
+ continue;
+ }
+
+ const CharReach &cr = g[v].char_reach;
+
+ if (!overlaps(*it, cr)) {
DEBUG_PRINTF("false edge %zu\n", g[v].index);
- continue;
- }
-
- NFAVertex v2 = clone_vertex(g, v);
- clone_in_edges(g, v, v2);
- add_edge(v2, curr_v, g);
- g[v2].char_reach &= *it;
+ continue;
+ }
+
+ NFAVertex v2 = clone_vertex(g, v);
+ clone_in_edges(g, v, v2);
+ add_edge(v2, curr_v, g);
+ g[v2].char_reach &= *it;
DEBUG_PRINTF("next <- %zu\n", g[v2].index);
- next.insert(v2);
- }
- }
-
- curr.swap(next);
- }
-
- pruneUseless(g);
+ next.insert(v2);
+ }
+ }
+
+ curr.swap(next);
+ }
+
+ pruneUseless(g);
clearReports(g);
- assert(in_degree(g.accept, g) || in_degree(g.acceptEod, g) > 1);
- assert(allMatchStatesHaveReports(g));
-
- DEBUG_PRINTF("graph has %zu vertices left\n", num_vertices(g));
-}
-
-static
-RoseVertex tryForAnchoredVertex(RoseBuildImpl *tbi,
- const RoseInVertexProps &iv_info,
- const RoseInEdgeProps &ep) {
- if (ep.graph_lag && ep.graph_lag != iv_info.s.length()) {
- DEBUG_PRINTF("bad lag %u != %zu\n", ep.graph_lag, iv_info.s.length());
- return RoseGraph::null_vertex(); /* TODO: better */
- }
-
- const depth anchored_max_depth(tbi->cc.grey.maxAnchoredRegion);
- depth min_width(0), max_width(0);
-
- if (ep.graph.get()) {
- const depth graph_lag(ep.graph_lag);
- max_width = findMaxWidth(*ep.graph) + graph_lag;
- min_width = findMinWidth(*ep.graph) + graph_lag;
- if (proper_out_degree(ep.graph->startDs, *ep.graph)) {
- max_width = depth::infinity();
- }
- }
-
- DEBUG_PRINTF("mw = %s; lag = %u\n", max_width.str().c_str(), ep.graph_lag);
-
- NGHolder h;
-
- if (ep.graph.get() && max_width <= anchored_max_depth) {
- cloneHolder(h, *ep.graph);
-
- /* add literal/dots */
- if (ep.graph_lag) {
- assert(ep.graph_lag == iv_info.s.length());
- appendLiteral(h, iv_info.s);
- } else {
- removeFalsePaths(h, iv_info.s);
- }
- } else if (!ep.graph.get() && ep.maxBound < ROSE_BOUND_INF
- && iv_info.s.length() + ep.maxBound
- <= tbi->cc.grey.maxAnchoredRegion) {
- if (ep.maxBound || ep.minBound) {
- /* TODO: handle, however these cases are not generated currently by
+ assert(in_degree(g.accept, g) || in_degree(g.acceptEod, g) > 1);
+ assert(allMatchStatesHaveReports(g));
+
+ DEBUG_PRINTF("graph has %zu vertices left\n", num_vertices(g));
+}
+
+static
+RoseVertex tryForAnchoredVertex(RoseBuildImpl *tbi,
+ const RoseInVertexProps &iv_info,
+ const RoseInEdgeProps &ep) {
+ if (ep.graph_lag && ep.graph_lag != iv_info.s.length()) {
+ DEBUG_PRINTF("bad lag %u != %zu\n", ep.graph_lag, iv_info.s.length());
+ return RoseGraph::null_vertex(); /* TODO: better */
+ }
+
+ const depth anchored_max_depth(tbi->cc.grey.maxAnchoredRegion);
+ depth min_width(0), max_width(0);
+
+ if (ep.graph.get()) {
+ const depth graph_lag(ep.graph_lag);
+ max_width = findMaxWidth(*ep.graph) + graph_lag;
+ min_width = findMinWidth(*ep.graph) + graph_lag;
+ if (proper_out_degree(ep.graph->startDs, *ep.graph)) {
+ max_width = depth::infinity();
+ }
+ }
+
+ DEBUG_PRINTF("mw = %s; lag = %u\n", max_width.str().c_str(), ep.graph_lag);
+
+ NGHolder h;
+
+ if (ep.graph.get() && max_width <= anchored_max_depth) {
+ cloneHolder(h, *ep.graph);
+
+ /* add literal/dots */
+ if (ep.graph_lag) {
+ assert(ep.graph_lag == iv_info.s.length());
+ appendLiteral(h, iv_info.s);
+ } else {
+ removeFalsePaths(h, iv_info.s);
+ }
+ } else if (!ep.graph.get() && ep.maxBound < ROSE_BOUND_INF
+ && iv_info.s.length() + ep.maxBound
+ <= tbi->cc.grey.maxAnchoredRegion) {
+ if (ep.maxBound || ep.minBound) {
+ /* TODO: handle, however these cases are not generated currently by
ng_violet */
- return RoseGraph::null_vertex();
- }
- max_width = depth(ep.maxBound + iv_info.s.length());
- min_width = depth(ep.minBound + iv_info.s.length());
- add_edge(h.start, h.accept, h);
- appendLiteral(h, iv_info.s);
- } else {
- return RoseGraph::null_vertex();
- }
-
- u32 anchored_exit_id = tbi->getNewLiteralId();
- u32 remap_id = 0;
- DEBUG_PRINTF(" trying to add dfa stuff\n");
- int rv = addToAnchoredMatcher(*tbi, h, anchored_exit_id, &remap_id);
-
- if (rv == ANCHORED_FAIL) {
- return RoseGraph::null_vertex();
- } else if (rv == ANCHORED_REMAP) {
- anchored_exit_id = remap_id;
- } else {
- assert(rv == ANCHORED_SUCCESS);
- }
-
- // Store the literal itself in a side structure so that we can use it for
- // overlap calculations later. This may be obsolete when the old Rose
- // construction path (and its history selection code) goes away.
- rose_literal_id lit(iv_info.s, ROSE_ANCHORED, 0);
- tbi->anchoredLitSuffix.insert(make_pair(anchored_exit_id, lit));
-
- assert(min_width <= anchored_max_depth);
- assert(max_width <= anchored_max_depth);
- assert(min_width <= max_width);
-
- /* Note: bounds are end-to-end as anchored lits are considered
- * to have 0 length. */
- RoseVertex v = createAnchoredVertex(tbi, anchored_exit_id, min_width,
- max_width);
- return v;
-}
-
-static
-u32 findRoseAnchorFloatingOverlap(const RoseInEdgeProps &ep,
- const RoseInVertexProps &succ_vp) {
- /* we need to ensure there is enough history to find the successor literal
- * when we enable its group.
- */
-
- if (!ep.graph.get()) {
- return 0; /* non overlapping */
- }
- depth graph_min_width = findMinWidth(*ep.graph);
- u32 min_width = ep.graph_lag + graph_min_width;
- u32 s_len = succ_vp.s.length();
-
- if (s_len <= min_width) {
- return 0; /* no overlap */
- }
-
- u32 overlap = s_len - min_width;
- DEBUG_PRINTF("found overlap of %u\n", overlap);
- return overlap;
-}
-
-static
-void findRoseLiteralMask(const NGHolder &h, const u32 lag, vector<u8> &msk,
- vector<u8> &cmp) {
- if (lag >= HWLM_MASKLEN) {
- msk.clear(); cmp.clear();
- return;
- }
-
- assert(in_degree(h.acceptEod, h) == 1); // no eod reports
-
- // Start with the set of reporter vertices for this rose.
- set<NFAVertex> curr, next;
- insert(&curr, inv_adjacent_vertices(h.accept, h));
- assert(!curr.empty());
-
- msk.assign(HWLM_MASKLEN, 0);
- cmp.assign(HWLM_MASKLEN, 0);
- size_t i = HWLM_MASKLEN - lag - 1;
- do {
- if (curr.empty() || contains(curr, h.start) ||
- contains(curr, h.startDs)) {
- DEBUG_PRINTF("end of the road\n");
- break;
- }
-
- next.clear();
- CharReach cr;
- for (auto v : curr) {
+ return RoseGraph::null_vertex();
+ }
+ max_width = depth(ep.maxBound + iv_info.s.length());
+ min_width = depth(ep.minBound + iv_info.s.length());
+ add_edge(h.start, h.accept, h);
+ appendLiteral(h, iv_info.s);
+ } else {
+ return RoseGraph::null_vertex();
+ }
+
+ u32 anchored_exit_id = tbi->getNewLiteralId();
+ u32 remap_id = 0;
+ DEBUG_PRINTF(" trying to add dfa stuff\n");
+ int rv = addToAnchoredMatcher(*tbi, h, anchored_exit_id, &remap_id);
+
+ if (rv == ANCHORED_FAIL) {
+ return RoseGraph::null_vertex();
+ } else if (rv == ANCHORED_REMAP) {
+ anchored_exit_id = remap_id;
+ } else {
+ assert(rv == ANCHORED_SUCCESS);
+ }
+
+ // Store the literal itself in a side structure so that we can use it for
+ // overlap calculations later. This may be obsolete when the old Rose
+ // construction path (and its history selection code) goes away.
+ rose_literal_id lit(iv_info.s, ROSE_ANCHORED, 0);
+ tbi->anchoredLitSuffix.insert(make_pair(anchored_exit_id, lit));
+
+ assert(min_width <= anchored_max_depth);
+ assert(max_width <= anchored_max_depth);
+ assert(min_width <= max_width);
+
+ /* Note: bounds are end-to-end as anchored lits are considered
+ * to have 0 length. */
+ RoseVertex v = createAnchoredVertex(tbi, anchored_exit_id, min_width,
+ max_width);
+ return v;
+}
+
+static
+u32 findRoseAnchorFloatingOverlap(const RoseInEdgeProps &ep,
+ const RoseInVertexProps &succ_vp) {
+ /* we need to ensure there is enough history to find the successor literal
+ * when we enable its group.
+ */
+
+ if (!ep.graph.get()) {
+ return 0; /* non overlapping */
+ }
+ depth graph_min_width = findMinWidth(*ep.graph);
+ u32 min_width = ep.graph_lag + graph_min_width;
+ u32 s_len = succ_vp.s.length();
+
+ if (s_len <= min_width) {
+ return 0; /* no overlap */
+ }
+
+ u32 overlap = s_len - min_width;
+ DEBUG_PRINTF("found overlap of %u\n", overlap);
+ return overlap;
+}
+
+static
+void findRoseLiteralMask(const NGHolder &h, const u32 lag, vector<u8> &msk,
+ vector<u8> &cmp) {
+ if (lag >= HWLM_MASKLEN) {
+ msk.clear(); cmp.clear();
+ return;
+ }
+
+ assert(in_degree(h.acceptEod, h) == 1); // no eod reports
+
+ // Start with the set of reporter vertices for this rose.
+ set<NFAVertex> curr, next;
+ insert(&curr, inv_adjacent_vertices(h.accept, h));
+ assert(!curr.empty());
+
+ msk.assign(HWLM_MASKLEN, 0);
+ cmp.assign(HWLM_MASKLEN, 0);
+ size_t i = HWLM_MASKLEN - lag - 1;
+ do {
+ if (curr.empty() || contains(curr, h.start) ||
+ contains(curr, h.startDs)) {
+ DEBUG_PRINTF("end of the road\n");
+ break;
+ }
+
+ next.clear();
+ CharReach cr;
+ for (auto v : curr) {
DEBUG_PRINTF("vertex %zu, reach %s\n", h[v].index,
- describeClass(h[v].char_reach).c_str());
- cr |= h[v].char_reach;
- insert(&next, inv_adjacent_vertices(v, h));
- }
- make_and_cmp_mask(cr, &msk[i], &cmp[i]);
- DEBUG_PRINTF("%zu: reach=%s, msk=%u, cmp=%u\n", i,
- describeClass(cr).c_str(), msk.at(i), cmp.at(i));
- curr.swap(next);
- } while (i-- > 0);
-}
-
-static
-void doRoseLiteralVertex(RoseBuildImpl *tbi, bool use_eod_table,
- map<RoseInVertex, vector<RoseVertex> > &vertex_map,
- const vector<pair<RoseVertex, RoseInEdge> > &parents,
- RoseInVertex iv, const RoseBuildData &bd) {
- const RoseInGraph &ig = bd.ig;
- const RoseInVertexProps &iv_info = ig[iv];
- assert(iv_info.type == RIV_LITERAL);
- assert(!parents.empty()); /* start vertices should not be here */
-
+ describeClass(h[v].char_reach).c_str());
+ cr |= h[v].char_reach;
+ insert(&next, inv_adjacent_vertices(v, h));
+ }
+ make_and_cmp_mask(cr, &msk[i], &cmp[i]);
+ DEBUG_PRINTF("%zu: reach=%s, msk=%u, cmp=%u\n", i,
+ describeClass(cr).c_str(), msk.at(i), cmp.at(i));
+ curr.swap(next);
+ } while (i-- > 0);
+}
+
+static
+void doRoseLiteralVertex(RoseBuildImpl *tbi, bool use_eod_table,
+ map<RoseInVertex, vector<RoseVertex> > &vertex_map,
+ const vector<pair<RoseVertex, RoseInEdge> > &parents,
+ RoseInVertex iv, const RoseBuildData &bd) {
+ const RoseInGraph &ig = bd.ig;
+ const RoseInVertexProps &iv_info = ig[iv];
+ assert(iv_info.type == RIV_LITERAL);
+ assert(!parents.empty()); /* start vertices should not be here */
+
// ng_violet should have ensured that mixed-sensitivity literals are no
- // longer than the benefits max width.
- assert(iv_info.s.length() <= MAX_MASK2_WIDTH ||
- !mixed_sensitivity(iv_info.s));
-
- // Rose graph construction process should have given us a min_offset.
- assert(iv_info.min_offset > 0);
-
- if (use_eod_table) {
- goto floating;
- }
-
- DEBUG_PRINTF("rose find vertex\n");
- if (parents.size() == 1) {
- const RoseVertex u = parents.front().first;
- const RoseInEdgeProps &ep = ig[parents.front().second];
-
- if (!tbi->isAnyStart(u)) {
- goto floating;
- }
-
- if (!ep.graph && ep.maxBound == ROSE_BOUND_INF) {
- goto floating;
- }
- if (ep.graph && !isAnchored(*ep.graph)) {
- goto floating;
- }
-
- DEBUG_PRINTF("cand for anchored maxBound %u, %p (%d)\n", ep.maxBound,
- ep.graph.get(), ep.graph ? (int)isAnchored(*ep.graph) : 3);
-
- /* need to check if putting iv into the anchored table would create
- * any bad_overlap relationships with its successor literals */
- for (const auto &e : out_edges_range(iv, ig)) {
- RoseInVertex t = target(e, ig);
- u32 overlap = findRoseAnchorFloatingOverlap(ig[e], ig[t]);
- DEBUG_PRINTF("found overlap of %u\n", overlap);
- if (overlap > tbi->cc.grey.maxHistoryAvailable + 1) {
- goto floating;
- }
- }
-
- RoseVertex v = tryForAnchoredVertex(tbi, iv_info, ep);
- if (v != RoseGraph::null_vertex()) {
- DEBUG_PRINTF("add anchored literal vertex\n");
- vertex_map[iv].push_back(v);
- return;
- }
- }
-
-floating:
- vector<u8> msk, cmp;
- if (tbi->cc.grey.roseHamsterMasks && in_degree(iv, ig) == 1) {
- RoseInEdge e = *in_edges(iv, ig).first;
- if (ig[e].graph) {
- findRoseLiteralMask(*ig[e].graph, ig[e].graph_lag, msk, cmp);
- }
- }
-
- u32 delay = iv_info.delay;
- rose_literal_table table = use_eod_table ? ROSE_EOD_ANCHORED : ROSE_FLOATING;
-
- u32 literalId = tbi->getLiteralId(iv_info.s, msk, cmp, delay, table);
-
- DEBUG_PRINTF("literal=%u (len=%zu, delay=%u, offsets=[%u,%u] '%s')\n",
- literalId, iv_info.s.length(), delay, iv_info.min_offset,
- iv_info.max_offset, dumpString(iv_info.s).c_str());
-
- createVertices(tbi, vertex_map, parents, iv, iv_info.min_offset,
- iv_info.max_offset, literalId, delay, bd);
-}
-
-static
+ // longer than the benefits max width.
+ assert(iv_info.s.length() <= MAX_MASK2_WIDTH ||
+ !mixed_sensitivity(iv_info.s));
+
+ // Rose graph construction process should have given us a min_offset.
+ assert(iv_info.min_offset > 0);
+
+ if (use_eod_table) {
+ goto floating;
+ }
+
+ DEBUG_PRINTF("rose find vertex\n");
+ if (parents.size() == 1) {
+ const RoseVertex u = parents.front().first;
+ const RoseInEdgeProps &ep = ig[parents.front().second];
+
+ if (!tbi->isAnyStart(u)) {
+ goto floating;
+ }
+
+ if (!ep.graph && ep.maxBound == ROSE_BOUND_INF) {
+ goto floating;
+ }
+ if (ep.graph && !isAnchored(*ep.graph)) {
+ goto floating;
+ }
+
+ DEBUG_PRINTF("cand for anchored maxBound %u, %p (%d)\n", ep.maxBound,
+ ep.graph.get(), ep.graph ? (int)isAnchored(*ep.graph) : 3);
+
+ /* need to check if putting iv into the anchored table would create
+ * any bad_overlap relationships with its successor literals */
+ for (const auto &e : out_edges_range(iv, ig)) {
+ RoseInVertex t = target(e, ig);
+ u32 overlap = findRoseAnchorFloatingOverlap(ig[e], ig[t]);
+ DEBUG_PRINTF("found overlap of %u\n", overlap);
+ if (overlap > tbi->cc.grey.maxHistoryAvailable + 1) {
+ goto floating;
+ }
+ }
+
+ RoseVertex v = tryForAnchoredVertex(tbi, iv_info, ep);
+ if (v != RoseGraph::null_vertex()) {
+ DEBUG_PRINTF("add anchored literal vertex\n");
+ vertex_map[iv].push_back(v);
+ return;
+ }
+ }
+
+floating:
+ vector<u8> msk, cmp;
+ if (tbi->cc.grey.roseHamsterMasks && in_degree(iv, ig) == 1) {
+ RoseInEdge e = *in_edges(iv, ig).first;
+ if (ig[e].graph) {
+ findRoseLiteralMask(*ig[e].graph, ig[e].graph_lag, msk, cmp);
+ }
+ }
+
+ u32 delay = iv_info.delay;
+ rose_literal_table table = use_eod_table ? ROSE_EOD_ANCHORED : ROSE_FLOATING;
+
+ u32 literalId = tbi->getLiteralId(iv_info.s, msk, cmp, delay, table);
+
+ DEBUG_PRINTF("literal=%u (len=%zu, delay=%u, offsets=[%u,%u] '%s')\n",
+ literalId, iv_info.s.length(), delay, iv_info.min_offset,
+ iv_info.max_offset, dumpString(iv_info.s).c_str());
+
+ createVertices(tbi, vertex_map, parents, iv, iv_info.min_offset,
+ iv_info.max_offset, literalId, delay, bd);
+}
+
+static
unique_ptr<NGHolder> makeRoseEodPrefix(const NGHolder &h, RoseBuildImpl &build,
map<flat_set<ReportID>, ReportID> &remap) {
- assert(generates_callbacks(h));
+ assert(generates_callbacks(h));
assert(!in_degree(h.accept, h));
auto gg = cloneHolder(h);
NGHolder &g = *gg;
g.kind = is_triggered(h) ? NFA_INFIX : NFA_PREFIX;
-
- // Move acceptEod edges over to accept.
- vector<NFAEdge> dead;
+
+ // Move acceptEod edges over to accept.
+ vector<NFAEdge> dead;
for (const auto &e : in_edges_range(g.acceptEod, g)) {
NFAVertex u = source(e, g);
if (u == g.accept) {
- continue;
- }
+ continue;
+ }
add_edge_if_not_present(u, g.accept, g);
- dead.push_back(e);
+ dead.push_back(e);
if (!contains(remap, g[u].reports)) {
remap[g[u].reports] = build.getNewNfaReport();
}
g[u].reports = { remap[g[u].reports] };
- }
-
+ }
+
remove_edges(dead, g);
return gg;
-}
-
-static
+}
+
+static
u32 getEodEventID(RoseBuildImpl &build) {
// Allocate the EOD event if it hasn't been already.
if (build.eod_event_literal_id == MO_INVALID_IDX) {
@@ -730,18 +730,18 @@ void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u,
}
static
-void doRoseAcceptVertex(RoseBuildImpl *tbi,
- const vector<pair<RoseVertex, RoseInEdge> > &parents,
- RoseInVertex iv, const RoseBuildData &bd) {
- const RoseInGraph &ig = bd.ig;
- assert(ig[iv].type == RIV_ACCEPT || ig[iv].type == RIV_ACCEPT_EOD);
-
- RoseGraph &g = tbi->g;
-
- for (const auto &pv : parents) {
- RoseVertex u = pv.first;
- const RoseInEdgeProps &edge_props = bd.ig[pv.second];
-
+void doRoseAcceptVertex(RoseBuildImpl *tbi,
+ const vector<pair<RoseVertex, RoseInEdge> > &parents,
+ RoseInVertex iv, const RoseBuildData &bd) {
+ const RoseInGraph &ig = bd.ig;
+ assert(ig[iv].type == RIV_ACCEPT || ig[iv].type == RIV_ACCEPT_EOD);
+
+ RoseGraph &g = tbi->g;
+
+ for (const auto &pv : parents) {
+ RoseVertex u = pv.first;
+ const RoseInEdgeProps &edge_props = bd.ig[pv.second];
+
/* We need to duplicate the parent vertices if:
*
* 1) It already has a suffix, etc as we are going to add the specified
@@ -753,42 +753,42 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi,
* incompatible with normal successors. As accepts are processed last we
* do not need to worry about other normal successors being added later.
*/
- if (g[u].suffix || !g[u].reports.empty()
+ if (g[u].suffix || !g[u].reports.empty()
|| (ig[iv].type == RIV_ACCEPT_EOD && out_degree(u, g)
&& !edge_props.graph)
- || (!isLeafNode(u, g) && !tbi->isAnyStart(u))) {
+ || (!isLeafNode(u, g) && !tbi->isAnyStart(u))) {
DEBUG_PRINTF("duplicating for parent %zu\n", g[u].index);
- assert(!tbi->isAnyStart(u));
- u = duplicate(tbi, u);
- g[u].suffix.reset();
- g[u].eod_accept = false;
- }
-
- assert(!g[u].suffix);
- if (ig[iv].type == RIV_ACCEPT) {
- assert(!tbi->isAnyStart(u));
+ assert(!tbi->isAnyStart(u));
+ u = duplicate(tbi, u);
+ g[u].suffix.reset();
+ g[u].eod_accept = false;
+ }
+
+ assert(!g[u].suffix);
+ if (ig[iv].type == RIV_ACCEPT) {
+ assert(!tbi->isAnyStart(u));
if (edge_props.dfa) {
DEBUG_PRINTF("adding early dfa suffix to i%zu\n", g[u].index);
g[u].suffix.rdfa = edge_props.dfa;
- g[u].suffix.dfa_min_width = findMinWidth(*edge_props.graph);
- g[u].suffix.dfa_max_width = findMaxWidth(*edge_props.graph);
- } else if (edge_props.graph) {
+ g[u].suffix.dfa_min_width = findMinWidth(*edge_props.graph);
+ g[u].suffix.dfa_max_width = findMaxWidth(*edge_props.graph);
+ } else if (edge_props.graph) {
DEBUG_PRINTF("adding suffix to i%zu\n", g[u].index);
- g[u].suffix.graph = edge_props.graph;
- assert(g[u].suffix.graph->kind == NFA_SUFFIX);
- /* TODO: set dfa_(min|max)_width */
- } else if (edge_props.haig) {
+ g[u].suffix.graph = edge_props.graph;
+ assert(g[u].suffix.graph->kind == NFA_SUFFIX);
+ /* TODO: set dfa_(min|max)_width */
+ } else if (edge_props.haig) {
DEBUG_PRINTF("adding suffaig to i%zu\n", g[u].index);
- g[u].suffix.haig = edge_props.haig;
- } else {
+ g[u].suffix.haig = edge_props.haig;
+ } else {
DEBUG_PRINTF("adding boring accept to i%zu\n", g[u].index);
- assert(!g[u].eod_accept);
- g[u].reports = ig[iv].reports;
- }
- } else {
- assert(ig[iv].type == RIV_ACCEPT_EOD);
+ assert(!g[u].eod_accept);
+ g[u].reports = ig[iv].reports;
+ }
+ } else {
+ assert(ig[iv].type == RIV_ACCEPT_EOD);
assert(!edge_props.haig);
-
+
if (!edge_props.graph) {
RoseVertex w = add_vertex(g);
g[w].eod_accept = true;
@@ -809,129 +809,129 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi,
if (tbi->isInETable(u)) {
assert(h.kind == NFA_SUFFIX);
- assert(!tbi->isAnyStart(u));
- /* etable can't/shouldn't use eod event */
+ assert(!tbi->isAnyStart(u));
+ /* etable can't/shouldn't use eod event */
DEBUG_PRINTF("adding suffix to i%zu\n", g[u].index);
- g[u].suffix.graph = edge_props.graph;
- continue;
- }
-
+ g[u].suffix.graph = edge_props.graph;
+ continue;
+ }
+
makeEodEventLeftfix(*tbi, u, h);
- }
- }
-}
-
-static
-bool suitableForEod(const RoseInGraph &ig, vector<RoseInVertex> topo,
- u32 *max_len, const CompileContext &cc) {
- map<RoseInVertex, u32> max_depth_from_eod;
- *max_len = 0;
-
- reverse(topo.begin(), topo.end()); /* we want to start at accept end */
-
- for (auto v : topo) {
- u32 v_depth = 0;
-
- if (ig[v].type == RIV_ACCEPT) {
- DEBUG_PRINTF("[ACCEPT]\n");
- for (const auto &e : in_edges_range(v, ig)) {
- if (!ig[e].graph || !can_only_match_at_eod(*ig[e].graph)) {
- DEBUG_PRINTF("floating accept\n");
- return false;
- }
- }
- }
-
- switch (ig[v].type) {
- case RIV_LITERAL:
- DEBUG_PRINTF("[LITERAL]\n");
- break;
- case RIV_START:
- DEBUG_PRINTF("[START]\n");
- break;
- case RIV_ANCHORED_START:
- DEBUG_PRINTF("[ANCHOR]\n");
- break;
- case RIV_ACCEPT:
- break;
- case RIV_ACCEPT_EOD:
- DEBUG_PRINTF("[EOD]\n");
- break;
- default:
- assert(0);
- DEBUG_PRINTF("????\n");
- return false;
- }
-
- for (const auto &e : out_edges_range(v, ig)) {
- RoseInVertex t = target(e, ig);
-
- assert(contains(max_depth_from_eod, t));
- u64a max_width;
-
- if (ig[v].type == RIV_START || ig[v].type == RIV_ANCHORED_START) {
- /* start itself doesn't need to be in history buffer
- * just need to make sure all succ literals are ok */
- if (ig[t].type == RIV_LITERAL) {
- max_width = ig[t].s.length();
- } else {
- max_width = 0;
- }
- if (ig[e].graph) {
- depth graph_max_width = findMaxWidth(*ig[e].graph);
- DEBUG_PRINTF("graph max width %s, lag %u\n",
- graph_max_width.str().c_str(),
- ig[e].graph_lag);
- if (!graph_max_width.is_finite()) {
- DEBUG_PRINTF("fail due to graph with inf max width\n");
- return false;
- }
- max_width += graph_max_width;
- }
- } else if (ig[e].haig) {
- DEBUG_PRINTF("fail due to haig\n");
- return false;
- } else if (ig[e].graph) {
- depth graph_max_width = findMaxWidth(*ig[e].graph);
- DEBUG_PRINTF("graph max width %s, lag %u\n",
- graph_max_width.str().c_str(), ig[e].graph_lag);
- if (!graph_max_width.is_finite()) {
- DEBUG_PRINTF("fail due to graph with inf max width\n");
- return false;
- }
- max_width = ig[e].graph_lag + graph_max_width;
- } else {
- max_width = ig[e].maxBound;
- if (ig[t].type == RIV_LITERAL) {
- max_width += ig[t].s.length();
- }
- }
-
- max_width += max_depth_from_eod[t];
- if (max_width > ROSE_BOUND_INF) {
- max_width = ROSE_BOUND_INF;
- }
-
- DEBUG_PRINTF("max_width=%llu\n", max_width);
-
- ENSURE_AT_LEAST(&v_depth, (u32)max_width);
- }
-
+ }
+ }
+}
+
+static
+bool suitableForEod(const RoseInGraph &ig, vector<RoseInVertex> topo,
+ u32 *max_len, const CompileContext &cc) {
+ map<RoseInVertex, u32> max_depth_from_eod;
+ *max_len = 0;
+
+ reverse(topo.begin(), topo.end()); /* we want to start at accept end */
+
+ for (auto v : topo) {
+ u32 v_depth = 0;
+
+ if (ig[v].type == RIV_ACCEPT) {
+ DEBUG_PRINTF("[ACCEPT]\n");
+ for (const auto &e : in_edges_range(v, ig)) {
+ if (!ig[e].graph || !can_only_match_at_eod(*ig[e].graph)) {
+ DEBUG_PRINTF("floating accept\n");
+ return false;
+ }
+ }
+ }
+
+ switch (ig[v].type) {
+ case RIV_LITERAL:
+ DEBUG_PRINTF("[LITERAL]\n");
+ break;
+ case RIV_START:
+ DEBUG_PRINTF("[START]\n");
+ break;
+ case RIV_ANCHORED_START:
+ DEBUG_PRINTF("[ANCHOR]\n");
+ break;
+ case RIV_ACCEPT:
+ break;
+ case RIV_ACCEPT_EOD:
+ DEBUG_PRINTF("[EOD]\n");
+ break;
+ default:
+ assert(0);
+ DEBUG_PRINTF("????\n");
+ return false;
+ }
+
+ for (const auto &e : out_edges_range(v, ig)) {
+ RoseInVertex t = target(e, ig);
+
+ assert(contains(max_depth_from_eod, t));
+ u64a max_width;
+
+ if (ig[v].type == RIV_START || ig[v].type == RIV_ANCHORED_START) {
+ /* start itself doesn't need to be in history buffer
+ * just need to make sure all succ literals are ok */
+ if (ig[t].type == RIV_LITERAL) {
+ max_width = ig[t].s.length();
+ } else {
+ max_width = 0;
+ }
+ if (ig[e].graph) {
+ depth graph_max_width = findMaxWidth(*ig[e].graph);
+ DEBUG_PRINTF("graph max width %s, lag %u\n",
+ graph_max_width.str().c_str(),
+ ig[e].graph_lag);
+ if (!graph_max_width.is_finite()) {
+ DEBUG_PRINTF("fail due to graph with inf max width\n");
+ return false;
+ }
+ max_width += graph_max_width;
+ }
+ } else if (ig[e].haig) {
+ DEBUG_PRINTF("fail due to haig\n");
+ return false;
+ } else if (ig[e].graph) {
+ depth graph_max_width = findMaxWidth(*ig[e].graph);
+ DEBUG_PRINTF("graph max width %s, lag %u\n",
+ graph_max_width.str().c_str(), ig[e].graph_lag);
+ if (!graph_max_width.is_finite()) {
+ DEBUG_PRINTF("fail due to graph with inf max width\n");
+ return false;
+ }
+ max_width = ig[e].graph_lag + graph_max_width;
+ } else {
+ max_width = ig[e].maxBound;
+ if (ig[t].type == RIV_LITERAL) {
+ max_width += ig[t].s.length();
+ }
+ }
+
+ max_width += max_depth_from_eod[t];
+ if (max_width > ROSE_BOUND_INF) {
+ max_width = ROSE_BOUND_INF;
+ }
+
+ DEBUG_PRINTF("max_width=%llu\n", max_width);
+
+ ENSURE_AT_LEAST(&v_depth, (u32)max_width);
+ }
+
if (v_depth == ROSE_BOUND_INF
|| v_depth > cc.grey.maxHistoryAvailable) {
- DEBUG_PRINTF("not suitable for eod table %u\n", v_depth);
- return false;
- }
-
- max_depth_from_eod[v] = v_depth;
- ENSURE_AT_LEAST(max_len, v_depth);
- }
-
- DEBUG_PRINTF("to the eod table and beyond\n");
- return true;
-}
-
-static
+ DEBUG_PRINTF("not suitable for eod table %u\n", v_depth);
+ return false;
+ }
+
+ max_depth_from_eod[v] = v_depth;
+ ENSURE_AT_LEAST(max_len, v_depth);
+ }
+
+ DEBUG_PRINTF("to the eod table and beyond\n");
+ return true;
+}
+
+static
void shift_accepts_to_end(const RoseInGraph &ig,
vector<RoseInVertex> &topo_order) {
stable_partition(begin(topo_order), end(topo_order),
@@ -939,761 +939,761 @@ void shift_accepts_to_end(const RoseInGraph &ig,
}
static
-void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) {
- const RoseInGraph &ig = bd.ig;
-
- /* add the pattern in to the main rose graph */
- DEBUG_PRINTF("%srose pop\n", bd.som ? "som " : "");
-
- /* Note: an input vertex may need to create several rose vertices. This is
- * primarily because a RoseVertex can only have 1 one leftfix */
- map<RoseInVertex, vector<RoseVertex> > vertex_map;
-
- vector<RoseInVertex> v_order = topo_order(ig);
+void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) {
+ const RoseInGraph &ig = bd.ig;
+
+ /* add the pattern in to the main rose graph */
+ DEBUG_PRINTF("%srose pop\n", bd.som ? "som " : "");
+
+ /* Note: an input vertex may need to create several rose vertices. This is
+ * primarily because a RoseVertex can only have 1 one leftfix */
+ map<RoseInVertex, vector<RoseVertex> > vertex_map;
+
+ vector<RoseInVertex> v_order = topo_order(ig);
shift_accepts_to_end(ig, v_order);
-
- u32 eod_space_required;
- bool use_eod_table = suitableForEod(ig, v_order, &eod_space_required,
- tbi->cc);
- if (use_eod_table) {
- ENSURE_AT_LEAST(&tbi->ematcher_region_size, eod_space_required);
- }
-
- assert(ig[v_order.front()].type == RIV_START
- || ig[v_order.front()].type == RIV_ANCHORED_START);
-
- for (RoseInVertex iv : v_order) {
+
+ u32 eod_space_required;
+ bool use_eod_table = suitableForEod(ig, v_order, &eod_space_required,
+ tbi->cc);
+ if (use_eod_table) {
+ ENSURE_AT_LEAST(&tbi->ematcher_region_size, eod_space_required);
+ }
+
+ assert(ig[v_order.front()].type == RIV_START
+ || ig[v_order.front()].type == RIV_ANCHORED_START);
+
+ for (RoseInVertex iv : v_order) {
DEBUG_PRINTF("vertex %zu\n", ig[iv].index);
-
- if (ig[iv].type == RIV_START) {
- DEBUG_PRINTF("is root\n");
- vertex_map[iv].push_back(tbi->root);
- continue;
- } else if (ig[iv].type == RIV_ANCHORED_START) {
- DEBUG_PRINTF("is anchored root\n");
- vertex_map[iv].push_back(tbi->anchored_root);
- continue;
- }
-
- vector<pair<RoseVertex, RoseInEdge> > parents;
- for (const auto &e : in_edges_range(iv, ig)) {
- RoseInVertex u = source(e, ig);
- assert(contains(vertex_map, u));
- const vector<RoseVertex> &images = vertex_map[u];
-
- // We should have no dupes.
+
+ if (ig[iv].type == RIV_START) {
+ DEBUG_PRINTF("is root\n");
+ vertex_map[iv].push_back(tbi->root);
+ continue;
+ } else if (ig[iv].type == RIV_ANCHORED_START) {
+ DEBUG_PRINTF("is anchored root\n");
+ vertex_map[iv].push_back(tbi->anchored_root);
+ continue;
+ }
+
+ vector<pair<RoseVertex, RoseInEdge> > parents;
+ for (const auto &e : in_edges_range(iv, ig)) {
+ RoseInVertex u = source(e, ig);
+ assert(contains(vertex_map, u));
+ const vector<RoseVertex> &images = vertex_map[u];
+
+ // We should have no dupes.
assert(set<RoseVertex>(images.begin(), images.end()).size()
- == images.size());
-
- for (auto v_image : images) {
- // v_image should NOT already be in our parents list.
- assert(find_if(parents.begin(), parents.end(),
- [&v_image](const pair<RoseVertex, RoseInEdge> &p) {
- return p.first == v_image;
- }) == parents.end());
-
- parents.emplace_back(v_image, e);
-
- if (tbi->isAnchored(v_image)) {
- assert(!use_eod_table);
- u32 overlap = findRoseAnchorFloatingOverlap(ig[e], ig[iv]);
- assert(overlap <= tbi->cc.grey.maxHistoryAvailable + 1);
- ENSURE_AT_LEAST(&tbi->max_rose_anchored_floating_overlap,
- overlap);
- }
- }
- }
-
- if (ig[iv].type == RIV_LITERAL) {
- DEBUG_PRINTF("LITERAL '%s'\n", dumpString(ig[iv].s).c_str());
- assert(!isLeafNode(iv, ig));
- doRoseLiteralVertex(tbi, use_eod_table, vertex_map, parents, iv,
- bd);
- } else {
- if (ig[iv].type == RIV_ACCEPT) {
- DEBUG_PRINTF("ACCEPT\n");
- } else {
- assert(ig[iv].type == RIV_ACCEPT_EOD);
- DEBUG_PRINTF("ACCEPT_EOD\n");
- }
- assert(isLeafNode(iv, ig)); /* accepts are final */
- doRoseAcceptVertex(tbi, parents, iv, bd);
- }
- }
- DEBUG_PRINTF("done\n");
-}
-
-template<typename GraphT>
-static
-bool empty(const GraphT &g) {
- typename GraphT::vertex_iterator vi, ve;
- tie(vi, ve) = vertices(g);
- return vi == ve;
-}
-
-static
+ == images.size());
+
+ for (auto v_image : images) {
+ // v_image should NOT already be in our parents list.
+ assert(find_if(parents.begin(), parents.end(),
+ [&v_image](const pair<RoseVertex, RoseInEdge> &p) {
+ return p.first == v_image;
+ }) == parents.end());
+
+ parents.emplace_back(v_image, e);
+
+ if (tbi->isAnchored(v_image)) {
+ assert(!use_eod_table);
+ u32 overlap = findRoseAnchorFloatingOverlap(ig[e], ig[iv]);
+ assert(overlap <= tbi->cc.grey.maxHistoryAvailable + 1);
+ ENSURE_AT_LEAST(&tbi->max_rose_anchored_floating_overlap,
+ overlap);
+ }
+ }
+ }
+
+ if (ig[iv].type == RIV_LITERAL) {
+ DEBUG_PRINTF("LITERAL '%s'\n", dumpString(ig[iv].s).c_str());
+ assert(!isLeafNode(iv, ig));
+ doRoseLiteralVertex(tbi, use_eod_table, vertex_map, parents, iv,
+ bd);
+ } else {
+ if (ig[iv].type == RIV_ACCEPT) {
+ DEBUG_PRINTF("ACCEPT\n");
+ } else {
+ assert(ig[iv].type == RIV_ACCEPT_EOD);
+ DEBUG_PRINTF("ACCEPT_EOD\n");
+ }
+ assert(isLeafNode(iv, ig)); /* accepts are final */
+ doRoseAcceptVertex(tbi, parents, iv, bd);
+ }
+ }
+ DEBUG_PRINTF("done\n");
+}
+
+template<typename GraphT>
+static
+bool empty(const GraphT &g) {
+ typename GraphT::vertex_iterator vi, ve;
+ tie(vi, ve) = vertices(g);
+ return vi == ve;
+}
+
+static
bool canImplementGraph(NGHolder &h, bool prefilter, const ReportManager &rm,
const CompileContext &cc) {
- if (isImplementableNFA(h, &rm, cc)) {
- return true;
- }
-
- if (prefilter && cc.grey.prefilterReductions) {
- // If we're prefiltering, we can have another go with a reduced graph.
- UNUSED size_t numBefore = num_vertices(h);
- prefilterReductions(h, cc);
- UNUSED size_t numAfter = num_vertices(h);
- DEBUG_PRINTF("reduced from %zu to %zu vertices\n", numBefore, numAfter);
-
- if (isImplementableNFA(h, &rm, cc)) {
- return true;
- }
- }
-
- DEBUG_PRINTF("unable to build engine\n");
- return false;
-}
-
-static
-bool predsAreDelaySensitive(const RoseInGraph &ig, RoseInVertex v) {
- assert(in_degree(v, ig));
-
- for (const auto &e : in_edges_range(v, ig)) {
- if (ig[e].graph || ig[e].haig) {
- DEBUG_PRINTF("edge graph\n");
- return true;
- }
- if (ig[e].minBound || ig[e].maxBound != ROSE_BOUND_INF) {
- DEBUG_PRINTF("edge bounds\n");
- return true;
- }
-
- RoseInVertex u = source(e, ig);
- if (ig[u].type == RIV_START) {
- continue;
- }
- if (ig[u].type != RIV_LITERAL) {
- DEBUG_PRINTF("unsafe pred vertex\n");
- return true;
- }
- if (ig[u].delay) {
- DEBUG_PRINTF("pred has delay\n");
- return true;
- }
- }
-
- return false;
-}
-
-static
-u32 maxAvailableDelay(const ue2_literal &pred_key, const ue2_literal &lit_key) {
- /* overly conservative if only part of the string is nocase */
- string pred = pred_key.get_string();
- string lit = lit_key.get_string();
-
- if (pred_key.any_nocase() || lit_key.any_nocase()) {
- upperString(pred);
- upperString(lit);
- }
-
- string::size_type last = pred.rfind(lit);
- if (last == string::npos) {
- return MAX_DELAY;
- }
-
- u32 raw = pred.size() - last - 1;
- return MIN(raw, MAX_DELAY);
-}
-
-static
+ if (isImplementableNFA(h, &rm, cc)) {
+ return true;
+ }
+
+ if (prefilter && cc.grey.prefilterReductions) {
+ // If we're prefiltering, we can have another go with a reduced graph.
+ UNUSED size_t numBefore = num_vertices(h);
+ prefilterReductions(h, cc);
+ UNUSED size_t numAfter = num_vertices(h);
+ DEBUG_PRINTF("reduced from %zu to %zu vertices\n", numBefore, numAfter);
+
+ if (isImplementableNFA(h, &rm, cc)) {
+ return true;
+ }
+ }
+
+ DEBUG_PRINTF("unable to build engine\n");
+ return false;
+}
+
+static
+bool predsAreDelaySensitive(const RoseInGraph &ig, RoseInVertex v) {
+ assert(in_degree(v, ig));
+
+ for (const auto &e : in_edges_range(v, ig)) {
+ if (ig[e].graph || ig[e].haig) {
+ DEBUG_PRINTF("edge graph\n");
+ return true;
+ }
+ if (ig[e].minBound || ig[e].maxBound != ROSE_BOUND_INF) {
+ DEBUG_PRINTF("edge bounds\n");
+ return true;
+ }
+
+ RoseInVertex u = source(e, ig);
+ if (ig[u].type == RIV_START) {
+ continue;
+ }
+ if (ig[u].type != RIV_LITERAL) {
+ DEBUG_PRINTF("unsafe pred vertex\n");
+ return true;
+ }
+ if (ig[u].delay) {
+ DEBUG_PRINTF("pred has delay\n");
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static
+u32 maxAvailableDelay(const ue2_literal &pred_key, const ue2_literal &lit_key) {
+ /* overly conservative if only part of the string is nocase */
+ string pred = pred_key.get_string();
+ string lit = lit_key.get_string();
+
+ if (pred_key.any_nocase() || lit_key.any_nocase()) {
+ upperString(pred);
+ upperString(lit);
+ }
+
+ string::size_type last = pred.rfind(lit);
+ if (last == string::npos) {
+ return MAX_DELAY;
+ }
+
+ u32 raw = pred.size() - last - 1;
+ return MIN(raw, MAX_DELAY);
+}
+
+static
u32 findMaxSafeDelay(const RoseInGraph &ig, RoseInVertex u, RoseInVertex v) {
- // First, check the overlap constraints on (u,v).
- size_t max_delay;
- if (ig[v].type == RIV_LITERAL) {
- DEBUG_PRINTF("lit->lit edge: '%s' -> '%s'\n",
- escapeString(ig[u].s).c_str(),
- escapeString(ig[v].s).c_str());
- max_delay = maxAvailableDelay(ig[u].s, ig[v].s);
- } else if (ig[v].type == RIV_ACCEPT) {
- DEBUG_PRINTF("lit->accept edge: '%s' -> ACCEPT\n",
- escapeString(ig[u].s).c_str());
- max_delay = MAX_DELAY;
- } else {
- assert(0);
- return 0;
- }
-
- DEBUG_PRINTF("max safe delay for this edge: %zu\n", max_delay);
-
- // Now consider the predecessors of u.
- for (const auto &e : in_edges_range(u, ig)) {
- RoseInVertex w = source(e, ig);
- if (ig[w].type == RIV_START) {
- continue;
- }
- assert(ig[w].type == RIV_LITERAL);
- assert(ig[w].delay == 0);
-
- DEBUG_PRINTF("pred lit->lit edge: '%s' -> '%s'\n",
- escapeString(ig[w].s).c_str(),
- escapeString(ig[u].s).c_str());
-
- // We cannot delay the literal on u so much that a predecessor literal
- // could occur in the delayed region. For example, consider
- // 'barman.*foobar': if we allow 'foobar' to be delayed by 3, then
- // 'barman' could occur in the input string and race with 'foobar', as
- // in 'foobarman'.
-
- const size_t pred_len = ig[w].s.length();
- size_t overlap = maxOverlap(ig[u].s, ig[w].s, 0);
- DEBUG_PRINTF("pred_len=%zu, overlap=%zu\n", pred_len, overlap);
- assert(overlap <= pred_len);
- size_t max_lit_delay = pred_len - min(overlap + 1, pred_len);
- DEBUG_PRINTF("overlap=%zu -> max_lit_delay=%zu\n", overlap,
- max_lit_delay);
- max_delay = min(max_delay, max_lit_delay);
- }
-
- DEBUG_PRINTF("max_delay=%zu\n", max_delay);
- assert(max_delay <= MAX_DELAY);
- return max_delay;
-}
-
-static
-bool transformInfixToDelay(const RoseInGraph &ig, const RoseInEdge &e,
- const CompileContext &cc, u32 *delay_out) {
- const u32 max_history =
- cc.streaming ? cc.grey.maxHistoryAvailable : ROSE_BOUND_INF;
-
- const RoseInVertex u = source(e, ig), v = target(e, ig);
- const u32 graph_lag = ig[e].graph_lag;
-
- // Clone a copy of the graph, as we need to be able to roll back this
- // operation.
- NGHolder h;
- cloneHolder(h, *ig[e].graph);
-
- DEBUG_PRINTF("target literal: %s\n", dumpString(ig[v].s).c_str());
- DEBUG_PRINTF("graph with %zu vertices and graph_lag %u\n", num_vertices(h),
- graph_lag);
-
- assert(graph_lag <= ig[v].s.length());
- if (graph_lag < ig[v].s.length()) {
- size_t len = ig[v].s.length() - graph_lag;
- ue2_literal lit(ig[v].s.substr(0, len));
- DEBUG_PRINTF("lit2=%s\n", dumpString(lit).c_str());
- u32 delay2 = removeTrailingLiteralStates(h, lit, max_history);
- if (delay2 == MO_INVALID_IDX) {
- DEBUG_PRINTF("couldn't remove trailing literal\n");
- return false;
- }
- if (delay2 != len) {
- DEBUG_PRINTF("couldn't remove entire trailing literal\n");
- return false;
- }
- }
-
- PureRepeat repeat;
- if (!isPureRepeat(h, repeat)) {
- DEBUG_PRINTF("graph is not repeat\n");
- return false;
- }
- DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
- if (!repeat.bounds.max.is_infinite()) {
- DEBUG_PRINTF("not inf\n");
- return false;
- }
-
- if (!repeat.reach.all()) {
- DEBUG_PRINTF("non-dot reach\n");
- return false;
- }
-
- u32 delay = ig[v].s.length() + repeat.bounds.min;
- if (delay > MAX_DELAY) {
- DEBUG_PRINTF("delay %u > MAX_DELAY\n", delay);
- return false;
- }
-
- if (delay + ig[u].s.length() - 1 > max_history) {
- DEBUG_PRINTF("delay too large for history\n");
- return false;
- }
-
- *delay_out = delay;
- return true;
-}
-
-static
-void transformLiteralDelay(RoseInGraph &ig, const CompileContext &cc) {
- if (!cc.grey.roseTransformDelay) {
- return;
- }
-
- for (auto u : vertices_range(ig)) {
- if (ig[u].type != RIV_LITERAL) {
- continue;
- }
- if (out_degree(u, ig) != 1) {
- continue;
- }
-
- RoseInEdge e = *out_edges(u, ig).first;
- RoseInVertex v = target(e, ig);
- if (ig[v].type != RIV_LITERAL) {
- continue;
- }
- if (ig[e].haig) {
- continue;
- }
- if (!ig[e].graph) {
- continue;
- }
-
- if (predsAreDelaySensitive(ig, u)) {
- DEBUG_PRINTF("preds are delay sensitive\n");
- continue;
- }
-
- u32 max_delay = findMaxSafeDelay(ig, u, v);
-
- DEBUG_PRINTF("lit->lit edge with graph: '%s' -> '%s'\n",
- escapeString(ig[u].s).c_str(),
- escapeString(ig[v].s).c_str());
-
- u32 delay = 0;
- if (!transformInfixToDelay(ig, e, cc, &delay)) {
- continue;
- }
-
- if (delay > max_delay) {
- DEBUG_PRINTF("delay=%u > max_delay=%u\n", delay, max_delay);
- continue;
- }
-
- DEBUG_PRINTF("setting lit delay to %u and deleting graph\n", delay);
- ig[u].delay = delay;
- ig[u].min_offset = add_rose_depth(ig[u].min_offset, delay);
- ig[u].max_offset = add_rose_depth(ig[u].max_offset, delay);
- ig[e].graph_lag = 0;
- ig[e].graph.reset();
- ig[e].minBound = 0;
- ig[e].maxBound = ROSE_BOUND_INF;
- }
-}
-
-static
-bool transformInfixToAnchBounds(const RoseInGraph &ig, const RoseInEdge &e,
- const CompileContext &cc, DepthMinMax *bounds) {
- const u32 max_history = cc.streaming ? cc.grey.maxHistoryAvailable
- : ROSE_BOUND_INF;
-
- const RoseInVertex v = target(e, ig);
- const u32 graph_lag = ig[e].graph_lag;
-
- // Clone a copy of the graph, as we need to be able to roll back this
- // operation.
- NGHolder h;
- cloneHolder(h, *ig[e].graph);
-
- DEBUG_PRINTF("graph with %zu vertices and graph_lag %u\n", num_vertices(h),
- graph_lag);
-
- assert(graph_lag <= ig[v].s.length());
- if (graph_lag < ig[v].s.length()) {
- size_t len = ig[v].s.length() - graph_lag;
- ue2_literal lit(ig[v].s.substr(0, len));
- DEBUG_PRINTF("lit2=%s\n", dumpString(lit).c_str());
- u32 delay2 = removeTrailingLiteralStates(h, lit, max_history);
- if (delay2 == MO_INVALID_IDX) {
- DEBUG_PRINTF("couldn't remove trailing literal\n");
- return false;
- }
- if (delay2 != len) {
- DEBUG_PRINTF("couldn't remove entire trailing literal\n");
- return false;
- }
- }
-
- PureRepeat repeat;
- if (!isPureRepeat(h, repeat)) {
- DEBUG_PRINTF("graph is not repeat\n");
- return false;
- }
- DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
- if (!repeat.bounds.max.is_infinite()) {
- DEBUG_PRINTF("not inf\n");
- return false;
- }
-
- if (!repeat.reach.all()) {
- DEBUG_PRINTF("non-dot reach\n");
- return false;
- }
-
- *bounds = repeat.bounds;
- return true;
-}
-
-static
-void transformAnchoredLiteralOverlap(RoseInGraph &ig, RoseBuildData &bd,
- const CompileContext &cc) {
- if (!cc.grey.roseTransformDelay) {
- return;
- }
-
- for (const auto &e : edges_range(ig)) {
- const RoseInVertex u = source(e, ig);
- const RoseInVertex v = target(e, ig);
-
- if (ig[u].type != RIV_LITERAL || ig[v].type != RIV_LITERAL) {
- continue;
- }
- if (ig[e].haig || !ig[e].graph) {
- continue;
- }
-
- if (ig[u].min_offset != ig[u].max_offset) {
- DEBUG_PRINTF("u not fixed depth\n");
- continue;
- }
-
- DEBUG_PRINTF("anch_lit->lit edge with graph: '%s' -> '%s'\n",
- escapeString(ig[u].s).c_str(),
- escapeString(ig[v].s).c_str());
-
- DepthMinMax bounds;
- if (!transformInfixToAnchBounds(ig, e, cc, &bounds)) {
- continue;
- }
-
- DEBUG_PRINTF("setting bounds to %s and deleting graph\n",
- bounds.str().c_str());
- ig[e].graph_lag = 0;
- ig[e].graph.reset();
- ig[e].minBound = bounds.min;
- ig[e].maxBound = bounds.max.is_finite() ? (u32)bounds.max
- : ROSE_BOUND_INF;
- bd.anch_history_edges.insert(e);
- }
-}
-
-/**
- * \brief Transform small trailing dot repeat suffixes into delay on the last
- * literal.
- *
- * For example, the case /hatstand.*teakettle./s can just delay 'teakettle' +1
- * rather than having a suffix to handle the dot.
- *
- * This transformation looks for literal->accept edges and transforms them if
- * appropriate. It doesn't handle complex cases where the literal has more than
- * one successor.
- */
-static
-void transformSuffixDelay(RoseInGraph &ig, const CompileContext &cc) {
- if (!cc.grey.roseTransformDelay) {
- return;
- }
-
- const u32 max_history = cc.streaming ? cc.grey.maxHistoryAvailable
- : ROSE_BOUND_INF;
-
- set<RoseInVertex> modified_accepts; // may be dead after transform
-
- for (auto u : vertices_range(ig)) {
- if (ig[u].type != RIV_LITERAL) {
- continue;
- }
- if (out_degree(u, ig) != 1) {
- continue;
- }
-
- RoseInEdge e = *out_edges(u, ig).first;
- RoseInVertex v = target(e, ig);
- if (ig[v].type != RIV_ACCEPT) {
- continue;
- }
- if (ig[e].haig) {
- continue;
- }
- if (!ig[e].graph) {
- continue;
- }
-
- if (predsAreDelaySensitive(ig, u)) {
- DEBUG_PRINTF("preds are delay sensitive\n");
- continue;
- }
-
- DEBUG_PRINTF("lit->accept edge with graph: lit='%s'\n",
- escapeString(ig[u].s).c_str());
-
- const NGHolder &h = *ig[e].graph;
- const set<ReportID> reports = all_reports(h);
- if (reports.size() != 1) {
- DEBUG_PRINTF("too many reports\n");
- continue;
- }
-
- PureRepeat repeat;
- if (!isPureRepeat(h, repeat)) {
- DEBUG_PRINTF("suffix graph is not repeat\n");
- continue;
- }
- DEBUG_PRINTF("suffix graph is %s repeat\n",
- repeat.bounds.str().c_str());
-
- if (!repeat.reach.all()) {
- DEBUG_PRINTF("non-dot reach\n");
- continue;
- }
-
- if (repeat.bounds.min != repeat.bounds.max ||
- repeat.bounds.min > depth(MAX_DELAY)) {
- DEBUG_PRINTF("repeat is variable or too large\n");
- continue;
- }
-
- u32 max_delay = findMaxSafeDelay(ig, u, v);
-
- u32 delay = repeat.bounds.min;
- if (delay > max_delay) {
- DEBUG_PRINTF("delay=%u > max_delay=%u\n", delay, max_delay);
- continue;
- }
-
- if (delay + ig[u].s.length() - 1 > max_history) {
- DEBUG_PRINTF("delay too large for history\n");
- continue;
- }
-
- DEBUG_PRINTF("setting lit delay to %u and removing suffix\n", delay);
- ig[u].delay = delay;
- ig[u].min_offset = add_rose_depth(ig[u].min_offset, delay);
- ig[u].max_offset = add_rose_depth(ig[u].max_offset, delay);
-
- // Construct a new accept vertex for this report and remove edge e.
- // (This allows us to cope if v has more than one in-edge).
- RoseInVertex v2 =
- add_vertex(RoseInVertexProps::makeAccept(reports), ig);
- add_edge(u, v2, ig);
- remove_edge(e, ig);
- modified_accepts.insert(v);
- }
-
- DEBUG_PRINTF("%zu modified accepts\n", modified_accepts.size());
-
- for (auto v : modified_accepts) {
- if (in_degree(v, ig) == 0) {
- DEBUG_PRINTF("removing accept vertex with no preds\n");
- remove_vertex(v, ig);
- }
- }
-}
-
-#ifndef NDEBUG
-static
-bool validateKinds(const RoseInGraph &g) {
- for (const auto &e : edges_range(g)) {
- if (g[e].graph && g[e].graph->kind != whatRoseIsThis(g, e)) {
- return false;
- }
- }
-
- return true;
-}
-#endif
-
+ // First, check the overlap constraints on (u,v).
+ size_t max_delay;
+ if (ig[v].type == RIV_LITERAL) {
+ DEBUG_PRINTF("lit->lit edge: '%s' -> '%s'\n",
+ escapeString(ig[u].s).c_str(),
+ escapeString(ig[v].s).c_str());
+ max_delay = maxAvailableDelay(ig[u].s, ig[v].s);
+ } else if (ig[v].type == RIV_ACCEPT) {
+ DEBUG_PRINTF("lit->accept edge: '%s' -> ACCEPT\n",
+ escapeString(ig[u].s).c_str());
+ max_delay = MAX_DELAY;
+ } else {
+ assert(0);
+ return 0;
+ }
+
+ DEBUG_PRINTF("max safe delay for this edge: %zu\n", max_delay);
+
+ // Now consider the predecessors of u.
+ for (const auto &e : in_edges_range(u, ig)) {
+ RoseInVertex w = source(e, ig);
+ if (ig[w].type == RIV_START) {
+ continue;
+ }
+ assert(ig[w].type == RIV_LITERAL);
+ assert(ig[w].delay == 0);
+
+ DEBUG_PRINTF("pred lit->lit edge: '%s' -> '%s'\n",
+ escapeString(ig[w].s).c_str(),
+ escapeString(ig[u].s).c_str());
+
+ // We cannot delay the literal on u so much that a predecessor literal
+ // could occur in the delayed region. For example, consider
+ // 'barman.*foobar': if we allow 'foobar' to be delayed by 3, then
+ // 'barman' could occur in the input string and race with 'foobar', as
+ // in 'foobarman'.
+
+ const size_t pred_len = ig[w].s.length();
+ size_t overlap = maxOverlap(ig[u].s, ig[w].s, 0);
+ DEBUG_PRINTF("pred_len=%zu, overlap=%zu\n", pred_len, overlap);
+ assert(overlap <= pred_len);
+ size_t max_lit_delay = pred_len - min(overlap + 1, pred_len);
+ DEBUG_PRINTF("overlap=%zu -> max_lit_delay=%zu\n", overlap,
+ max_lit_delay);
+ max_delay = min(max_delay, max_lit_delay);
+ }
+
+ DEBUG_PRINTF("max_delay=%zu\n", max_delay);
+ assert(max_delay <= MAX_DELAY);
+ return max_delay;
+}
+
+static
+bool transformInfixToDelay(const RoseInGraph &ig, const RoseInEdge &e,
+ const CompileContext &cc, u32 *delay_out) {
+ const u32 max_history =
+ cc.streaming ? cc.grey.maxHistoryAvailable : ROSE_BOUND_INF;
+
+ const RoseInVertex u = source(e, ig), v = target(e, ig);
+ const u32 graph_lag = ig[e].graph_lag;
+
+ // Clone a copy of the graph, as we need to be able to roll back this
+ // operation.
+ NGHolder h;
+ cloneHolder(h, *ig[e].graph);
+
+ DEBUG_PRINTF("target literal: %s\n", dumpString(ig[v].s).c_str());
+ DEBUG_PRINTF("graph with %zu vertices and graph_lag %u\n", num_vertices(h),
+ graph_lag);
+
+ assert(graph_lag <= ig[v].s.length());
+ if (graph_lag < ig[v].s.length()) {
+ size_t len = ig[v].s.length() - graph_lag;
+ ue2_literal lit(ig[v].s.substr(0, len));
+ DEBUG_PRINTF("lit2=%s\n", dumpString(lit).c_str());
+ u32 delay2 = removeTrailingLiteralStates(h, lit, max_history);
+ if (delay2 == MO_INVALID_IDX) {
+ DEBUG_PRINTF("couldn't remove trailing literal\n");
+ return false;
+ }
+ if (delay2 != len) {
+ DEBUG_PRINTF("couldn't remove entire trailing literal\n");
+ return false;
+ }
+ }
+
+ PureRepeat repeat;
+ if (!isPureRepeat(h, repeat)) {
+ DEBUG_PRINTF("graph is not repeat\n");
+ return false;
+ }
+ DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
+ if (!repeat.bounds.max.is_infinite()) {
+ DEBUG_PRINTF("not inf\n");
+ return false;
+ }
+
+ if (!repeat.reach.all()) {
+ DEBUG_PRINTF("non-dot reach\n");
+ return false;
+ }
+
+ u32 delay = ig[v].s.length() + repeat.bounds.min;
+ if (delay > MAX_DELAY) {
+ DEBUG_PRINTF("delay %u > MAX_DELAY\n", delay);
+ return false;
+ }
+
+ if (delay + ig[u].s.length() - 1 > max_history) {
+ DEBUG_PRINTF("delay too large for history\n");
+ return false;
+ }
+
+ *delay_out = delay;
+ return true;
+}
+
+static
+void transformLiteralDelay(RoseInGraph &ig, const CompileContext &cc) {
+ if (!cc.grey.roseTransformDelay) {
+ return;
+ }
+
+ for (auto u : vertices_range(ig)) {
+ if (ig[u].type != RIV_LITERAL) {
+ continue;
+ }
+ if (out_degree(u, ig) != 1) {
+ continue;
+ }
+
+ RoseInEdge e = *out_edges(u, ig).first;
+ RoseInVertex v = target(e, ig);
+ if (ig[v].type != RIV_LITERAL) {
+ continue;
+ }
+ if (ig[e].haig) {
+ continue;
+ }
+ if (!ig[e].graph) {
+ continue;
+ }
+
+ if (predsAreDelaySensitive(ig, u)) {
+ DEBUG_PRINTF("preds are delay sensitive\n");
+ continue;
+ }
+
+ u32 max_delay = findMaxSafeDelay(ig, u, v);
+
+ DEBUG_PRINTF("lit->lit edge with graph: '%s' -> '%s'\n",
+ escapeString(ig[u].s).c_str(),
+ escapeString(ig[v].s).c_str());
+
+ u32 delay = 0;
+ if (!transformInfixToDelay(ig, e, cc, &delay)) {
+ continue;
+ }
+
+ if (delay > max_delay) {
+ DEBUG_PRINTF("delay=%u > max_delay=%u\n", delay, max_delay);
+ continue;
+ }
+
+ DEBUG_PRINTF("setting lit delay to %u and deleting graph\n", delay);
+ ig[u].delay = delay;
+ ig[u].min_offset = add_rose_depth(ig[u].min_offset, delay);
+ ig[u].max_offset = add_rose_depth(ig[u].max_offset, delay);
+ ig[e].graph_lag = 0;
+ ig[e].graph.reset();
+ ig[e].minBound = 0;
+ ig[e].maxBound = ROSE_BOUND_INF;
+ }
+}
+
+static
+bool transformInfixToAnchBounds(const RoseInGraph &ig, const RoseInEdge &e,
+ const CompileContext &cc, DepthMinMax *bounds) {
+ const u32 max_history = cc.streaming ? cc.grey.maxHistoryAvailable
+ : ROSE_BOUND_INF;
+
+ const RoseInVertex v = target(e, ig);
+ const u32 graph_lag = ig[e].graph_lag;
+
+ // Clone a copy of the graph, as we need to be able to roll back this
+ // operation.
+ NGHolder h;
+ cloneHolder(h, *ig[e].graph);
+
+ DEBUG_PRINTF("graph with %zu vertices and graph_lag %u\n", num_vertices(h),
+ graph_lag);
+
+ assert(graph_lag <= ig[v].s.length());
+ if (graph_lag < ig[v].s.length()) {
+ size_t len = ig[v].s.length() - graph_lag;
+ ue2_literal lit(ig[v].s.substr(0, len));
+ DEBUG_PRINTF("lit2=%s\n", dumpString(lit).c_str());
+ u32 delay2 = removeTrailingLiteralStates(h, lit, max_history);
+ if (delay2 == MO_INVALID_IDX) {
+ DEBUG_PRINTF("couldn't remove trailing literal\n");
+ return false;
+ }
+ if (delay2 != len) {
+ DEBUG_PRINTF("couldn't remove entire trailing literal\n");
+ return false;
+ }
+ }
+
+ PureRepeat repeat;
+ if (!isPureRepeat(h, repeat)) {
+ DEBUG_PRINTF("graph is not repeat\n");
+ return false;
+ }
+ DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
+ if (!repeat.bounds.max.is_infinite()) {
+ DEBUG_PRINTF("not inf\n");
+ return false;
+ }
+
+ if (!repeat.reach.all()) {
+ DEBUG_PRINTF("non-dot reach\n");
+ return false;
+ }
+
+ *bounds = repeat.bounds;
+ return true;
+}
+
+static
+void transformAnchoredLiteralOverlap(RoseInGraph &ig, RoseBuildData &bd,
+ const CompileContext &cc) {
+ if (!cc.grey.roseTransformDelay) {
+ return;
+ }
+
+ for (const auto &e : edges_range(ig)) {
+ const RoseInVertex u = source(e, ig);
+ const RoseInVertex v = target(e, ig);
+
+ if (ig[u].type != RIV_LITERAL || ig[v].type != RIV_LITERAL) {
+ continue;
+ }
+ if (ig[e].haig || !ig[e].graph) {
+ continue;
+ }
+
+ if (ig[u].min_offset != ig[u].max_offset) {
+ DEBUG_PRINTF("u not fixed depth\n");
+ continue;
+ }
+
+ DEBUG_PRINTF("anch_lit->lit edge with graph: '%s' -> '%s'\n",
+ escapeString(ig[u].s).c_str(),
+ escapeString(ig[v].s).c_str());
+
+ DepthMinMax bounds;
+ if (!transformInfixToAnchBounds(ig, e, cc, &bounds)) {
+ continue;
+ }
+
+ DEBUG_PRINTF("setting bounds to %s and deleting graph\n",
+ bounds.str().c_str());
+ ig[e].graph_lag = 0;
+ ig[e].graph.reset();
+ ig[e].minBound = bounds.min;
+ ig[e].maxBound = bounds.max.is_finite() ? (u32)bounds.max
+ : ROSE_BOUND_INF;
+ bd.anch_history_edges.insert(e);
+ }
+}
+
+/**
+ * \brief Transform small trailing dot repeat suffixes into delay on the last
+ * literal.
+ *
+ * For example, the case /hatstand.*teakettle./s can just delay 'teakettle' +1
+ * rather than having a suffix to handle the dot.
+ *
+ * This transformation looks for literal->accept edges and transforms them if
+ * appropriate. It doesn't handle complex cases where the literal has more than
+ * one successor.
+ */
+static
+void transformSuffixDelay(RoseInGraph &ig, const CompileContext &cc) {
+ if (!cc.grey.roseTransformDelay) {
+ return;
+ }
+
+ const u32 max_history = cc.streaming ? cc.grey.maxHistoryAvailable
+ : ROSE_BOUND_INF;
+
+ set<RoseInVertex> modified_accepts; // may be dead after transform
+
+ for (auto u : vertices_range(ig)) {
+ if (ig[u].type != RIV_LITERAL) {
+ continue;
+ }
+ if (out_degree(u, ig) != 1) {
+ continue;
+ }
+
+ RoseInEdge e = *out_edges(u, ig).first;
+ RoseInVertex v = target(e, ig);
+ if (ig[v].type != RIV_ACCEPT) {
+ continue;
+ }
+ if (ig[e].haig) {
+ continue;
+ }
+ if (!ig[e].graph) {
+ continue;
+ }
+
+ if (predsAreDelaySensitive(ig, u)) {
+ DEBUG_PRINTF("preds are delay sensitive\n");
+ continue;
+ }
+
+ DEBUG_PRINTF("lit->accept edge with graph: lit='%s'\n",
+ escapeString(ig[u].s).c_str());
+
+ const NGHolder &h = *ig[e].graph;
+ const set<ReportID> reports = all_reports(h);
+ if (reports.size() != 1) {
+ DEBUG_PRINTF("too many reports\n");
+ continue;
+ }
+
+ PureRepeat repeat;
+ if (!isPureRepeat(h, repeat)) {
+ DEBUG_PRINTF("suffix graph is not repeat\n");
+ continue;
+ }
+ DEBUG_PRINTF("suffix graph is %s repeat\n",
+ repeat.bounds.str().c_str());
+
+ if (!repeat.reach.all()) {
+ DEBUG_PRINTF("non-dot reach\n");
+ continue;
+ }
+
+ if (repeat.bounds.min != repeat.bounds.max ||
+ repeat.bounds.min > depth(MAX_DELAY)) {
+ DEBUG_PRINTF("repeat is variable or too large\n");
+ continue;
+ }
+
+ u32 max_delay = findMaxSafeDelay(ig, u, v);
+
+ u32 delay = repeat.bounds.min;
+ if (delay > max_delay) {
+ DEBUG_PRINTF("delay=%u > max_delay=%u\n", delay, max_delay);
+ continue;
+ }
+
+ if (delay + ig[u].s.length() - 1 > max_history) {
+ DEBUG_PRINTF("delay too large for history\n");
+ continue;
+ }
+
+ DEBUG_PRINTF("setting lit delay to %u and removing suffix\n", delay);
+ ig[u].delay = delay;
+ ig[u].min_offset = add_rose_depth(ig[u].min_offset, delay);
+ ig[u].max_offset = add_rose_depth(ig[u].max_offset, delay);
+
+ // Construct a new accept vertex for this report and remove edge e.
+ // (This allows us to cope if v has more than one in-edge).
+ RoseInVertex v2 =
+ add_vertex(RoseInVertexProps::makeAccept(reports), ig);
+ add_edge(u, v2, ig);
+ remove_edge(e, ig);
+ modified_accepts.insert(v);
+ }
+
+ DEBUG_PRINTF("%zu modified accepts\n", modified_accepts.size());
+
+ for (auto v : modified_accepts) {
+ if (in_degree(v, ig) == 0) {
+ DEBUG_PRINTF("removing accept vertex with no preds\n");
+ remove_vertex(v, ig);
+ }
+ }
+}
+
+#ifndef NDEBUG
+static
+bool validateKinds(const RoseInGraph &g) {
+ for (const auto &e : edges_range(g)) {
+ if (g[e].graph && g[e].graph->kind != whatRoseIsThis(g, e)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+#endif
+
bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter) {
- DEBUG_PRINTF("trying to rose\n");
- assert(validateKinds(ig));
+ DEBUG_PRINTF("trying to rose\n");
+ assert(validateKinds(ig));
assert(hasCorrectlyNumberedVertices(ig));
-
- if (::ue2::empty(ig)) {
- assert(0);
- return false;
- }
-
- const unique_ptr<RoseInGraph> in_ptr = cloneRoseGraph(ig);
- RoseInGraph &in = *in_ptr;
-
- RoseBuildData bd(in, false);
-
- transformLiteralDelay(in, cc);
- transformAnchoredLiteralOverlap(in, bd, cc);
- transformSuffixDelay(in, cc);
-
+
+ if (::ue2::empty(ig)) {
+ assert(0);
+ return false;
+ }
+
+ const unique_ptr<RoseInGraph> in_ptr = cloneRoseGraph(ig);
+ RoseInGraph &in = *in_ptr;
+
+ RoseBuildData bd(in, false);
+
+ transformLiteralDelay(in, cc);
+ transformAnchoredLiteralOverlap(in, bd, cc);
+ transformSuffixDelay(in, cc);
+
renumber_vertices(in);
assert(validateKinds(in));
-
+
insertion_ordered_map<NGHolder *, vector<RoseInEdge>> graphs;
-
- for (const auto &e : edges_range(in)) {
- if (!in[e].graph) {
+
+ for (const auto &e : edges_range(in)) {
+ if (!in[e].graph) {
assert(!in[e].dfa);
assert(!in[e].haig);
- continue; // no graph
- }
-
+ continue; // no graph
+ }
+
if (in[e].haig || in[e].dfa) {
/* Early DFAs/Haigs are always implementable (we've already built
* the raw DFA). */
- continue;
- }
-
- NGHolder *h = in[e].graph.get();
+ continue;
+ }
+
+ NGHolder *h = in[e].graph.get();
assert(isCorrectlyTopped(*h));
- graphs[h].push_back(e);
- }
-
- vector<RoseInEdge> graph_edges;
-
+ graphs[h].push_back(e);
+ }
+
+ vector<RoseInEdge> graph_edges;
+
for (const auto &m : graphs) {
NGHolder *h = m.first;
if (!canImplementGraph(*h, prefilter, rm, cc)) {
- return false;
- }
+ return false;
+ }
insert(&graph_edges, graph_edges.end(), m.second);
- }
-
- /* we are now past the point of no return. We can start making irreversible
- changes to the rose graph, etc */
-
- for (const auto &e : graph_edges) {
- assert(in[e].graph);
- assert(!in[e].haig);
- NGHolder &h = *in[e].graph;
- DEBUG_PRINTF("handling %p\n", &h);
- assert(allMatchStatesHaveReports(h));
-
- if (!generates_callbacks(whatRoseIsThis(in, e))
- && in[target(e, in)].type != RIV_ACCEPT_EOD) {
+ }
+
+ /* we are now past the point of no return. We can start making irreversible
+ changes to the rose graph, etc */
+
+ for (const auto &e : graph_edges) {
+ assert(in[e].graph);
+ assert(!in[e].haig);
+ NGHolder &h = *in[e].graph;
+ DEBUG_PRINTF("handling %p\n", &h);
+ assert(allMatchStatesHaveReports(h));
+
+ if (!generates_callbacks(whatRoseIsThis(in, e))
+ && in[target(e, in)].type != RIV_ACCEPT_EOD) {
set_report(h, getNewNfaReport());
- }
- }
-
- populateRoseGraph(this, bd);
-
- return true;
-}
-
-bool RoseBuildImpl::addSombeRose(const RoseInGraph &ig) {
- DEBUG_PRINTF("rose is trying to consume a sombe\n");
- assert(validateKinds(ig));
-
- if (::ue2::empty(ig)) {
- assert(0);
- return false;
- }
-
- RoseBuildData bd(ig, true);
-
- for (const auto &e : edges_range(ig)) {
- if (!ig[e].graph) {
- continue; // no graph
- }
- DEBUG_PRINTF("handling %p\n", ig[e].graph.get());
- assert(allMatchStatesHaveReports(*ig[e].graph));
- assert(ig[e].haig);
- }
-
- populateRoseGraph(this, bd);
-
- return true;
-}
-
-bool roseCheckRose(const RoseInGraph &ig, bool prefilter,
- const ReportManager &rm, const CompileContext &cc) {
- assert(validateKinds(ig));
-
- if (::ue2::empty(ig)) {
- assert(0);
- return false;
- }
-
+ }
+ }
+
+ populateRoseGraph(this, bd);
+
+ return true;
+}
+
+bool RoseBuildImpl::addSombeRose(const RoseInGraph &ig) {
+ DEBUG_PRINTF("rose is trying to consume a sombe\n");
+ assert(validateKinds(ig));
+
+ if (::ue2::empty(ig)) {
+ assert(0);
+ return false;
+ }
+
+ RoseBuildData bd(ig, true);
+
+ for (const auto &e : edges_range(ig)) {
+ if (!ig[e].graph) {
+ continue; // no graph
+ }
+ DEBUG_PRINTF("handling %p\n", ig[e].graph.get());
+ assert(allMatchStatesHaveReports(*ig[e].graph));
+ assert(ig[e].haig);
+ }
+
+ populateRoseGraph(this, bd);
+
+ return true;
+}
+
+bool roseCheckRose(const RoseInGraph &ig, bool prefilter,
+ const ReportManager &rm, const CompileContext &cc) {
+ assert(validateKinds(ig));
+
+ if (::ue2::empty(ig)) {
+ assert(0);
+ return false;
+ }
+
vector<NGHolder *> graphs;
-
- for (const auto &e : edges_range(ig)) {
- if (!ig[e].graph) {
- continue; // no graph
- }
-
- if (ig[e].haig) {
- // Haigs are always implementable (we've already built the raw DFA).
- continue;
- }
-
+
+ for (const auto &e : edges_range(ig)) {
+ if (!ig[e].graph) {
+ continue; // no graph
+ }
+
+ if (ig[e].haig) {
+ // Haigs are always implementable (we've already built the raw DFA).
+ continue;
+ }
+
graphs.push_back(ig[e].graph.get());
- }
-
+ }
+
for (const auto &g : graphs) {
if (!canImplementGraph(*g, prefilter, rm, cc)) {
- return false;
- }
- }
-
- return true;
-}
-
-void RoseBuildImpl::add(bool anchored, bool eod, const ue2_literal &lit,
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void RoseBuildImpl::add(bool anchored, bool eod, const ue2_literal &lit,
const flat_set<ReportID> &reports) {
- assert(!reports.empty());
-
- if (cc.grey.floodAsPuffette && !anchored && !eod && is_flood(lit) &&
- lit.length() > 3) {
- DEBUG_PRINTF("adding as puffette\n");
- const CharReach &cr = *lit.begin();
- for (const auto &report : reports) {
- addOutfix(raw_puff(lit.length(), true, report, cr, true));
- }
-
- return;
- }
-
- RoseInGraph ig;
- RoseInVertex start = add_vertex(RoseInVertexProps::makeStart(anchored), ig);
- RoseInVertex accept = add_vertex(
- eod ? RoseInVertexProps::makeAcceptEod(set<ReportID>())
- : RoseInVertexProps::makeAccept(set<ReportID>()), ig);
- RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
-
- add_edge(start, v, RoseInEdgeProps(0U, anchored ? 0U : ROSE_BOUND_INF), ig);
- add_edge(v, accept, RoseInEdgeProps(0U, 0U), ig);
-
- calcVertexOffsets(ig);
-
- ig[accept].reports.insert(reports.begin(), reports.end());
-
- addRose(ig, false);
-}
-
-static
-u32 findMaxBAWidth(const NGHolder &h) {
- // Must be bi-anchored: no out-edges from startDs (other than its
- // self-loop), no in-edges to accept.
+ assert(!reports.empty());
+
+ if (cc.grey.floodAsPuffette && !anchored && !eod && is_flood(lit) &&
+ lit.length() > 3) {
+ DEBUG_PRINTF("adding as puffette\n");
+ const CharReach &cr = *lit.begin();
+ for (const auto &report : reports) {
+ addOutfix(raw_puff(lit.length(), true, report, cr, true));
+ }
+
+ return;
+ }
+
+ RoseInGraph ig;
+ RoseInVertex start = add_vertex(RoseInVertexProps::makeStart(anchored), ig);
+ RoseInVertex accept = add_vertex(
+ eod ? RoseInVertexProps::makeAcceptEod(set<ReportID>())
+ : RoseInVertexProps::makeAccept(set<ReportID>()), ig);
+ RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
+
+ add_edge(start, v, RoseInEdgeProps(0U, anchored ? 0U : ROSE_BOUND_INF), ig);
+ add_edge(v, accept, RoseInEdgeProps(0U, 0U), ig);
+
+ calcVertexOffsets(ig);
+
+ ig[accept].reports.insert(reports.begin(), reports.end());
+
+ addRose(ig, false);
+}
+
+static
+u32 findMaxBAWidth(const NGHolder &h) {
+ // Must be bi-anchored: no out-edges from startDs (other than its
+ // self-loop), no in-edges to accept.
if (out_degree(h.startDs, h) > 1 || in_degree(h.accept, h)) {
- return ROSE_BOUND_INF;
- }
- depth d = findMaxWidth(h);
- assert(d.is_reachable());
-
- if (!d.is_finite()) {
- return ROSE_BOUND_INF;
- }
- return d;
-}
-
-static
-void populateOutfixInfo(OutfixInfo &outfix, const NGHolder &h,
- const RoseBuildImpl &tbi) {
- outfix.maxBAWidth = findMaxBAWidth(h);
- outfix.minWidth = findMinWidth(h);
- outfix.maxWidth = findMaxWidth(h);
- outfix.maxOffset = findMaxOffset(h, tbi.rm);
- populateReverseAccelerationInfo(outfix.rev_info, h);
-}
-
+ return ROSE_BOUND_INF;
+ }
+ depth d = findMaxWidth(h);
+ assert(d.is_reachable());
+
+ if (!d.is_finite()) {
+ return ROSE_BOUND_INF;
+ }
+ return d;
+}
+
+static
+void populateOutfixInfo(OutfixInfo &outfix, const NGHolder &h,
+ const RoseBuildImpl &tbi) {
+ outfix.maxBAWidth = findMaxBAWidth(h);
+ outfix.minWidth = findMinWidth(h);
+ outfix.maxWidth = findMaxWidth(h);
+ outfix.maxOffset = findMaxOffset(h, tbi.rm);
+ populateReverseAccelerationInfo(outfix.rev_info, h);
+}
+
static
bool addEodOutfix(RoseBuildImpl &build, const NGHolder &h) {
map<flat_set<ReportID>, ReportID> report_remap;
@@ -1748,9 +1748,9 @@ bool addEodOutfix(RoseBuildImpl &build, const NGHolder &h) {
return true;
}
-bool RoseBuildImpl::addOutfix(const NGHolder &h) {
- DEBUG_PRINTF("%zu vertices, %zu edges\n", num_vertices(h), num_edges(h));
-
+bool RoseBuildImpl::addOutfix(const NGHolder &h) {
+ DEBUG_PRINTF("%zu vertices, %zu edges\n", num_vertices(h), num_edges(h));
+
/* TODO: handle more than one report */
if (!in_degree(h.accept, h)
&& all_reports(h).size() == 1
@@ -1758,241 +1758,241 @@ bool RoseBuildImpl::addOutfix(const NGHolder &h) {
return true;
}
- const u32 nfa_states = isImplementableNFA(h, &rm, cc);
- if (nfa_states) {
- DEBUG_PRINTF("implementable as an NFA in %u states\n", nfa_states);
- } else {
- DEBUG_PRINTF("not implementable as an NFA\n");
- }
-
- bool dfa_cand = !nfa_states || nfa_states > 128 /* slow model */
- || can_exhaust(h, rm); /* can be pruned */
-
- unique_ptr<raw_dfa> rdfa;
-
- if (!nfa_states || cc.grey.roseMcClellanOutfix == 2 ||
- (cc.grey.roseMcClellanOutfix == 1 && dfa_cand)) {
- rdfa = buildMcClellan(h, &rm, cc.grey);
- }
-
- if (!nfa_states && !rdfa) {
- DEBUG_PRINTF("could not build as either an NFA or a DFA\n");
- return false;
- }
-
- if (rdfa) {
- outfixes.push_back(OutfixInfo(move(rdfa)));
- } else {
- outfixes.push_back(OutfixInfo(cloneHolder(h)));
- }
-
- populateOutfixInfo(outfixes.back(), h, *this);
-
- return true;
-}
-
-bool RoseBuildImpl::addOutfix(const NGHolder &h, const raw_som_dfa &haig) {
- DEBUG_PRINTF("haig with %zu states\n", haig.states.size());
-
- outfixes.push_back(OutfixInfo(ue2::make_unique<raw_som_dfa>(haig)));
- populateOutfixInfo(outfixes.back(), h, *this);
-
- return true; /* failure is not yet an option */
-}
-
-bool RoseBuildImpl::addOutfix(const raw_puff &rp) {
- if (!mpv_outfix) {
+ const u32 nfa_states = isImplementableNFA(h, &rm, cc);
+ if (nfa_states) {
+ DEBUG_PRINTF("implementable as an NFA in %u states\n", nfa_states);
+ } else {
+ DEBUG_PRINTF("not implementable as an NFA\n");
+ }
+
+ bool dfa_cand = !nfa_states || nfa_states > 128 /* slow model */
+ || can_exhaust(h, rm); /* can be pruned */
+
+ unique_ptr<raw_dfa> rdfa;
+
+ if (!nfa_states || cc.grey.roseMcClellanOutfix == 2 ||
+ (cc.grey.roseMcClellanOutfix == 1 && dfa_cand)) {
+ rdfa = buildMcClellan(h, &rm, cc.grey);
+ }
+
+ if (!nfa_states && !rdfa) {
+ DEBUG_PRINTF("could not build as either an NFA or a DFA\n");
+ return false;
+ }
+
+ if (rdfa) {
+ outfixes.push_back(OutfixInfo(move(rdfa)));
+ } else {
+ outfixes.push_back(OutfixInfo(cloneHolder(h)));
+ }
+
+ populateOutfixInfo(outfixes.back(), h, *this);
+
+ return true;
+}
+
+bool RoseBuildImpl::addOutfix(const NGHolder &h, const raw_som_dfa &haig) {
+ DEBUG_PRINTF("haig with %zu states\n", haig.states.size());
+
+ outfixes.push_back(OutfixInfo(ue2::make_unique<raw_som_dfa>(haig)));
+ populateOutfixInfo(outfixes.back(), h, *this);
+
+ return true; /* failure is not yet an option */
+}
+
+bool RoseBuildImpl::addOutfix(const raw_puff &rp) {
+ if (!mpv_outfix) {
mpv_outfix = std::make_unique<OutfixInfo>(MpvProto());
- }
-
+ }
+
auto *mpv = mpv_outfix->mpv();
assert(mpv);
mpv->puffettes.push_back(rp);
-
- mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */
- mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats));
- mpv_outfix->maxWidth = rp.unbounded
- ? depth::infinity()
- : max(mpv_outfix->maxWidth, depth(rp.repeats));
-
- if (mpv_outfix->maxOffset == ROSE_BOUND_INF || rp.unbounded) {
- mpv_outfix->maxOffset = ROSE_BOUND_INF;
- } else {
- mpv_outfix->maxOffset = MAX(mpv_outfix->maxOffset, rp.repeats);
- }
-
- return true; /* failure is not yet an option */
-}
-
-bool RoseBuildImpl::addChainTail(const raw_puff &rp, u32 *queue_out,
- u32 *event_out) {
- if (!mpv_outfix) {
+
+ mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */
+ mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats));
+ mpv_outfix->maxWidth = rp.unbounded
+ ? depth::infinity()
+ : max(mpv_outfix->maxWidth, depth(rp.repeats));
+
+ if (mpv_outfix->maxOffset == ROSE_BOUND_INF || rp.unbounded) {
+ mpv_outfix->maxOffset = ROSE_BOUND_INF;
+ } else {
+ mpv_outfix->maxOffset = MAX(mpv_outfix->maxOffset, rp.repeats);
+ }
+
+ return true; /* failure is not yet an option */
+}
+
+bool RoseBuildImpl::addChainTail(const raw_puff &rp, u32 *queue_out,
+ u32 *event_out) {
+ if (!mpv_outfix) {
mpv_outfix = std::make_unique<OutfixInfo>(MpvProto());
- }
-
+ }
+
auto *mpv = mpv_outfix->mpv();
assert(mpv);
mpv->triggered_puffettes.push_back(rp);
-
- mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */
- mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats));
- mpv_outfix->maxWidth = rp.unbounded
- ? depth::infinity()
- : max(mpv_outfix->maxWidth, depth(rp.repeats));
-
- mpv_outfix->maxOffset = ROSE_BOUND_INF; /* TODO: we could get information from
- * the caller */
-
- *queue_out = mpv_outfix->get_queue(qif);
+
+ mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */
+ mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats));
+ mpv_outfix->maxWidth = rp.unbounded
+ ? depth::infinity()
+ : max(mpv_outfix->maxWidth, depth(rp.repeats));
+
+ mpv_outfix->maxOffset = ROSE_BOUND_INF; /* TODO: we could get information from
+ * the caller */
+
+ *queue_out = mpv_outfix->get_queue(qif);
*event_out = MQE_TOP_FIRST + mpv->triggered_puffettes.size() - 1;
-
- return true; /* failure is not yet an option */
-}
-
-static
-bool prepAcceptForAddAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &w,
+
+ return true; /* failure is not yet an option */
+}
+
+static
+bool prepAcceptForAddAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &w,
NFAVertex u,
- const vector<DepthMinMax> &vertexDepths,
- map<u32, DepthMinMax> &depthMap,
+ const vector<DepthMinMax> &vertexDepths,
+ map<u32, DepthMinMax> &depthMap,
map<NFAVertex, set<u32>> &reportMap,
- map<ReportID, u32> &allocated_reports,
- flat_set<u32> &added_lit_ids) {
- const depth max_anchored_depth(tbi.cc.grey.maxAnchoredRegion);
+ map<ReportID, u32> &allocated_reports,
+ flat_set<u32> &added_lit_ids) {
+ const depth max_anchored_depth(tbi.cc.grey.maxAnchoredRegion);
const size_t index = w[u].index;
assert(index < vertexDepths.size());
const DepthMinMax &d = vertexDepths.at(index);
-
- for (const auto &int_report : w[u].reports) {
- assert(int_report != MO_INVALID_IDX);
-
- u32 lit_id;
- if (!contains(allocated_reports, int_report)) {
- lit_id = tbi.getNewLiteralId();
- added_lit_ids.insert(lit_id);
- allocated_reports[int_report] = lit_id;
- } else {
- lit_id = allocated_reports[int_report];
- }
-
- reportMap[u].insert(lit_id);
-
- if (!contains(depthMap, lit_id)) {
- depthMap[lit_id] = d;
- } else {
- depthMap[lit_id] = unionDepthMinMax(depthMap[lit_id], d);
- }
-
+
+ for (const auto &int_report : w[u].reports) {
+ assert(int_report != MO_INVALID_IDX);
+
+ u32 lit_id;
+ if (!contains(allocated_reports, int_report)) {
+ lit_id = tbi.getNewLiteralId();
+ added_lit_ids.insert(lit_id);
+ allocated_reports[int_report] = lit_id;
+ } else {
+ lit_id = allocated_reports[int_report];
+ }
+
+ reportMap[u].insert(lit_id);
+
+ if (!contains(depthMap, lit_id)) {
+ depthMap[lit_id] = d;
+ } else {
+ depthMap[lit_id] = unionDepthMinMax(depthMap[lit_id], d);
+ }
+
if (depthMap[lit_id].max > max_anchored_depth) {
- DEBUG_PRINTF("depth=%s exceeds maxAnchoredRegion=%u\n",
+ DEBUG_PRINTF("depth=%s exceeds maxAnchoredRegion=%u\n",
depthMap[lit_id].max.str().c_str(),
- tbi.cc.grey.maxAnchoredRegion);
- return false;
- }
- }
-
- return true;
-}
-
-// Failure path for addAnchoredAcyclic: removes the literal IDs that have been
-// added to support anchored NFAs. Assumes that they are a contiguous range at
-// the end of the RoseBuildImpl::literal_info vector.
-static
-void removeAddedLiterals(RoseBuildImpl &tbi, const flat_set<u32> &lit_ids) {
- if (lit_ids.empty()) {
- return;
- }
-
+ tbi.cc.grey.maxAnchoredRegion);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Failure path for addAnchoredAcyclic: removes the literal IDs that have been
+// added to support anchored NFAs. Assumes that they are a contiguous range at
+// the end of the RoseBuildImpl::literal_info vector.
+static
+void removeAddedLiterals(RoseBuildImpl &tbi, const flat_set<u32> &lit_ids) {
+ if (lit_ids.empty()) {
+ return;
+ }
+
DEBUG_PRINTF("remove last %zu literals\n", lit_ids.size());
- // lit_ids should be a contiguous range.
- assert(lit_ids.size() == *lit_ids.rbegin() - *lit_ids.begin() + 1);
+ // lit_ids should be a contiguous range.
+ assert(lit_ids.size() == *lit_ids.rbegin() - *lit_ids.begin() + 1);
assert(*lit_ids.rbegin() == tbi.literals.size() - 1);
-
+
assert(all_of_in(lit_ids, [&](u32 lit_id) {
return lit_id < tbi.literal_info.size() &&
tbi.literals.at(lit_id).table == ROSE_ANCHORED &&
tbi.literal_info[lit_id].vertices.empty();
}));
-
+
tbi.literals.erase_back(lit_ids.size());
assert(tbi.literals.size() == *lit_ids.begin());
-
- // lit_ids should be at the end of tbi.literal_info.
- assert(tbi.literal_info.size() == *lit_ids.rbegin() + 1);
- tbi.literal_info.resize(*lit_ids.begin()); // remove all ids in lit_ids
-}
-
-bool RoseBuildImpl::addAnchoredAcyclic(const NGHolder &h) {
+
+ // lit_ids should be at the end of tbi.literal_info.
+ assert(tbi.literal_info.size() == *lit_ids.rbegin() + 1);
+ tbi.literal_info.resize(*lit_ids.begin()); // remove all ids in lit_ids
+}
+
+bool RoseBuildImpl::addAnchoredAcyclic(const NGHolder &h) {
auto vertexDepths = calcDepthsFrom(h, h.start);
-
- map<NFAVertex, set<u32> > reportMap; /* NFAVertex -> literal ids */
- map<u32, DepthMinMax> depthMap; /* literal id -> min/max depth */
- map<ReportID, u32> allocated_reports; /* report -> literal id */
- flat_set<u32> added_lit_ids; /* literal ids added for this NFA */
-
- for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
+
+ map<NFAVertex, set<u32> > reportMap; /* NFAVertex -> literal ids */
+ map<u32, DepthMinMax> depthMap; /* literal id -> min/max depth */
+ map<ReportID, u32> allocated_reports; /* report -> literal id */
+ flat_set<u32> added_lit_ids; /* literal ids added for this NFA */
+
+ for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
if (!prepAcceptForAddAnchoredNFA(*this, h, v, vertexDepths, depthMap,
- reportMap, allocated_reports,
- added_lit_ids)) {
- removeAddedLiterals(*this, added_lit_ids);
- return false;
- }
- }
-
- map<ReportID, u32> allocated_reports_eod; /* report -> literal id */
-
- for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) {
- if (v == h.accept) {
- continue;
- }
+ reportMap, allocated_reports,
+ added_lit_ids)) {
+ removeAddedLiterals(*this, added_lit_ids);
+ return false;
+ }
+ }
+
+ map<ReportID, u32> allocated_reports_eod; /* report -> literal id */
+
+ for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) {
+ if (v == h.accept) {
+ continue;
+ }
if (!prepAcceptForAddAnchoredNFA(*this, h, v, vertexDepths, depthMap,
- reportMap, allocated_reports_eod,
- added_lit_ids)) {
- removeAddedLiterals(*this, added_lit_ids);
- return false;
- }
- }
-
- assert(!reportMap.empty());
-
- int rv = addAnchoredNFA(*this, h, reportMap);
- if (rv != ANCHORED_FAIL) {
- assert(rv != ANCHORED_REMAP);
- DEBUG_PRINTF("added anchored nfa\n");
- /* add edges to the rose graph to bubble the match up */
- for (const auto &m : allocated_reports) {
- const ReportID &report = m.first;
- const u32 &lit_id = m.second;
- assert(depthMap[lit_id].max.is_finite());
- u32 minBound = depthMap[lit_id].min;
- u32 maxBound = depthMap[lit_id].max;
- RoseVertex v
- = createAnchoredVertex(this, lit_id, minBound, maxBound);
- g[v].reports.insert(report);
- }
-
- for (const auto &m : allocated_reports_eod) {
- const ReportID &report = m.first;
- const u32 &lit_id = m.second;
- assert(depthMap[lit_id].max.is_finite());
- u32 minBound = depthMap[lit_id].min;
- u32 maxBound = depthMap[lit_id].max;
- RoseVertex v
- = createAnchoredVertex(this, lit_id, minBound, maxBound);
- RoseVertex eod = add_vertex(g);
- g[eod].eod_accept = true;
- g[eod].reports.insert(report);
- g[eod].min_offset = g[v].min_offset;
- g[eod].max_offset = g[v].max_offset;
- add_edge(v, eod, g);
- }
-
- return true;
- } else {
- DEBUG_PRINTF("failed to add anchored nfa\n");
- removeAddedLiterals(*this, added_lit_ids);
- return false;
- }
-}
-
-} // namespace ue2
+ reportMap, allocated_reports_eod,
+ added_lit_ids)) {
+ removeAddedLiterals(*this, added_lit_ids);
+ return false;
+ }
+ }
+
+ assert(!reportMap.empty());
+
+ int rv = addAnchoredNFA(*this, h, reportMap);
+ if (rv != ANCHORED_FAIL) {
+ assert(rv != ANCHORED_REMAP);
+ DEBUG_PRINTF("added anchored nfa\n");
+ /* add edges to the rose graph to bubble the match up */
+ for (const auto &m : allocated_reports) {
+ const ReportID &report = m.first;
+ const u32 &lit_id = m.second;
+ assert(depthMap[lit_id].max.is_finite());
+ u32 minBound = depthMap[lit_id].min;
+ u32 maxBound = depthMap[lit_id].max;
+ RoseVertex v
+ = createAnchoredVertex(this, lit_id, minBound, maxBound);
+ g[v].reports.insert(report);
+ }
+
+ for (const auto &m : allocated_reports_eod) {
+ const ReportID &report = m.first;
+ const u32 &lit_id = m.second;
+ assert(depthMap[lit_id].max.is_finite());
+ u32 minBound = depthMap[lit_id].min;
+ u32 maxBound = depthMap[lit_id].max;
+ RoseVertex v
+ = createAnchoredVertex(this, lit_id, minBound, maxBound);
+ RoseVertex eod = add_vertex(g);
+ g[eod].eod_accept = true;
+ g[eod].reports.insert(report);
+ g[eod].min_offset = g[v].min_offset;
+ g[eod].max_offset = g[v].max_offset;
+ add_edge(v, eod, g);
+ }
+
+ return true;
+ } else {
+ DEBUG_PRINTF("failed to add anchored nfa\n");
+ removeAddedLiterals(*this, added_lit_ids);
+ return false;
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_add_internal.h b/contrib/libs/hyperscan/src/rose/rose_build_add_internal.h
index d7fd2e8032..143f1dfa58 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_add_internal.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_add_internal.h
@@ -1,47 +1,47 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_BUILD_ADD_INTERNAL_H
-#define ROSE_BUILD_ADD_INTERNAL_H
-
-#include "rose_graph.h"
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_ADD_INTERNAL_H
+#define ROSE_BUILD_ADD_INTERNAL_H
+
+#include "rose_graph.h"
+#include "ue2common.h"
#include "util/flat_containers.h"
-
-namespace ue2 {
-
-class RoseBuildImpl;
-
-RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent,
- u32 minBound, u32 maxBound, u32 literalId,
- size_t literalLength,
+
+namespace ue2 {
+
+class RoseBuildImpl;
+
+RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent,
+ u32 minBound, u32 maxBound, u32 literalId,
+ size_t literalLength,
const flat_set<ReportID> &reports);
-
-} // namespace ue2
-
+
+} // namespace ue2
+
#endif // ROSE_BUILD_ADD_INTERNAL_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_add_mask.cpp b/contrib/libs/hyperscan/src/rose/rose_build_add_mask.cpp
index be9ff37bdc..0a7e44c370 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_add_mask.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_add_mask.cpp
@@ -1,796 +1,796 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_impl.h"
-
-#include "ue2common.h"
-#include "grey.h"
-#include "rose_build_add_internal.h"
-#include "rose_build_anchored.h"
-#include "rose_in_util.h"
-#include "hwlm/hwlm_literal.h"
-#include "nfagraph/ng_depth.h"
-#include "nfagraph/ng_dump.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_limex.h"
-#include "nfagraph/ng_reports.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_width.h"
-#include "util/charreach.h"
-#include "util/charreach_util.h"
-#include "util/compare.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph.h"
-#include "util/make_unique.h"
-#include "util/ue2string.h"
-#include "util/verify_types.h"
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-#include <utility>
-
-using namespace std;
-
-namespace ue2 {
-
-#define MIN_MASK_LIT_LEN 2
-#define MAX_MASK_SIZE 255
-#define MAX_MASK_LITS 30
-
-static
-void findMaskLiteral(const vector<CharReach> &mask, bool streaming,
- ue2_literal *lit, u32 *offset, const Grey &grey) {
- bool case_fixed = false;
- bool nocase = false;
-
- size_t best_begin = 0;
- size_t best_end = 0;
- size_t best_len = 0;
-
- size_t begin = 0;
- size_t end = 0;
-
- for (size_t i = 0; i < mask.size(); i++) {
- bool fail = false;
- if (mask[i].count() != 1 && !mask[i].isCaselessChar()) {
- DEBUG_PRINTF("hit non-literal char, resetting at %zu\n", i);
- fail = true;
- }
-
- if (!fail && streaming && (end >= grey.maxHistoryAvailable + 1)) {
- DEBUG_PRINTF("hit literal limit, resetting at %zu\n", i);
- fail = true;
- }
-
- if (!fail && case_fixed && mask[i].isAlpha()) {
- if (nocase && mask[i].count() != 2) {
- fail = true;
- }
-
- if (!nocase && mask[i].count() != 1) {
- fail = true;
- }
- }
-
- if (fail) {
- case_fixed = false;
- nocase = false;
- size_t len = end - begin;
- bool better = len > best_len;
- if (better) {
- best_begin = begin;
- best_end = end;
- best_len = len;
- }
- begin = i + 1;
- end = i + 1;
- } else {
- assert(end == i);
- end = i + 1;
-
- if (mask[i].isAlpha()) {
- case_fixed = true;
- nocase = mask[i].count() == 2;
- }
- }
- }
-
- size_t len = end - begin;
- /* Everybody would rather be trigger towards the end */
- bool better = len >= best_len && mask.size() - end <= MAX_DELAY;
-
- if (better) {
- best_begin = begin;
- best_end = end;
- best_len = len;
- }
-
- for (size_t i = best_begin; i < best_end; i++) {
- assert(mask[i].count() == 1 || mask[i].count() == 2);
- lit->push_back(mask[i].find_first(), mask[i].count() > 1);
- }
-
- *offset = verify_u32(best_begin);
-}
-
-static
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_impl.h"
+
+#include "ue2common.h"
+#include "grey.h"
+#include "rose_build_add_internal.h"
+#include "rose_build_anchored.h"
+#include "rose_in_util.h"
+#include "hwlm/hwlm_literal.h"
+#include "nfagraph/ng_depth.h"
+#include "nfagraph/ng_dump.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_reports.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
+#include "util/charreach.h"
+#include "util/charreach_util.h"
+#include "util/compare.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph.h"
+#include "util/make_unique.h"
+#include "util/ue2string.h"
+#include "util/verify_types.h"
+
+#include <algorithm>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+#include <utility>
+
+using namespace std;
+
+namespace ue2 {
+
+#define MIN_MASK_LIT_LEN 2
+#define MAX_MASK_SIZE 255
+#define MAX_MASK_LITS 30
+
+static
+void findMaskLiteral(const vector<CharReach> &mask, bool streaming,
+ ue2_literal *lit, u32 *offset, const Grey &grey) {
+ bool case_fixed = false;
+ bool nocase = false;
+
+ size_t best_begin = 0;
+ size_t best_end = 0;
+ size_t best_len = 0;
+
+ size_t begin = 0;
+ size_t end = 0;
+
+ for (size_t i = 0; i < mask.size(); i++) {
+ bool fail = false;
+ if (mask[i].count() != 1 && !mask[i].isCaselessChar()) {
+ DEBUG_PRINTF("hit non-literal char, resetting at %zu\n", i);
+ fail = true;
+ }
+
+ if (!fail && streaming && (end >= grey.maxHistoryAvailable + 1)) {
+ DEBUG_PRINTF("hit literal limit, resetting at %zu\n", i);
+ fail = true;
+ }
+
+ if (!fail && case_fixed && mask[i].isAlpha()) {
+ if (nocase && mask[i].count() != 2) {
+ fail = true;
+ }
+
+ if (!nocase && mask[i].count() != 1) {
+ fail = true;
+ }
+ }
+
+ if (fail) {
+ case_fixed = false;
+ nocase = false;
+ size_t len = end - begin;
+ bool better = len > best_len;
+ if (better) {
+ best_begin = begin;
+ best_end = end;
+ best_len = len;
+ }
+ begin = i + 1;
+ end = i + 1;
+ } else {
+ assert(end == i);
+ end = i + 1;
+
+ if (mask[i].isAlpha()) {
+ case_fixed = true;
+ nocase = mask[i].count() == 2;
+ }
+ }
+ }
+
+ size_t len = end - begin;
+ /* Everybody would rather be trigger towards the end */
+ bool better = len >= best_len && mask.size() - end <= MAX_DELAY;
+
+ if (better) {
+ best_begin = begin;
+ best_end = end;
+ best_len = len;
+ }
+
+ for (size_t i = best_begin; i < best_end; i++) {
+ assert(mask[i].count() == 1 || mask[i].count() == 2);
+ lit->push_back(mask[i].find_first(), mask[i].count() > 1);
+ }
+
+ *offset = verify_u32(best_begin);
+}
+
+static
bool initFmlCandidates(const CharReach &cr, vector<ue2_literal> &cand) {
- for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
- char c = (char)i;
- bool nocase = myisupper(c) && cr.test(mytolower(c));
- if (myislower(c) && cr.test(mytoupper(c))) {
- continue;
- }
-
+ for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
+ char c = (char)i;
+ bool nocase = myisupper(c) && cr.test(mytolower(c));
+ if (myislower(c) && cr.test(mytoupper(c))) {
+ continue;
+ }
+
if (cand.size() >= MAX_MASK_LITS) {
- DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
- return false;
- }
-
+ DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
+ return false;
+ }
+
cand.emplace_back(c, nocase);
- }
-
+ }
+
assert(cand.size() <= MAX_MASK_LITS);
return !cand.empty();
-}
-
-static
+}
+
+static
bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> &curr,
vector<ue2_literal> &cand) {
- DEBUG_PRINTF("expanding string with cr of %zu\n", cr.count());
+ DEBUG_PRINTF("expanding string with cr of %zu\n", cr.count());
DEBUG_PRINTF(" current cand list size %zu\n", cand.size());
-
+
curr.clear();
-
- for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
- char c = (char)i;
- bool nocase = myisupper(c) && cr.test(mytolower(c));
- if (myislower(c) && cr.test(mytoupper(c))) {
- continue;
- }
-
+
+ for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
+ char c = (char)i;
+ bool nocase = myisupper(c) && cr.test(mytolower(c));
+ if (myislower(c) && cr.test(mytoupper(c))) {
+ continue;
+ }
+
for (const auto &lit : cand) {
- if (curr.size() >= MAX_MASK_LITS) {
- DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
- return false;
- }
-
+ if (curr.size() >= MAX_MASK_LITS) {
+ DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
+ return false;
+ }
+
curr.push_back(lit);
curr.back().push_back(c, nocase);
- }
- }
-
- if (curr.back().length() > MAX_MASK2_WIDTH &&
- any_of(begin(curr), end(curr), mixed_sensitivity)) {
- DEBUG_PRINTF("mixed-sensitivity lit is too long, stopping\n");
- return false;
- }
-
- assert(curr.size() <= MAX_MASK_LITS);
+ }
+ }
+
+ if (curr.back().length() > MAX_MASK2_WIDTH &&
+ any_of(begin(curr), end(curr), mixed_sensitivity)) {
+ DEBUG_PRINTF("mixed-sensitivity lit is too long, stopping\n");
+ return false;
+ }
+
+ assert(curr.size() <= MAX_MASK_LITS);
cand.swap(curr);
- return true;
-}
-
-static
-u32 scoreFmlCandidates(const vector<ue2_literal> &cand) {
- if (cand.empty()) {
- DEBUG_PRINTF("no candidates\n");
- return 0;
- }
-
- const u32 len = cand.back().length();
-
- DEBUG_PRINTF("length = %u count %zu\n", len, cand.size());
- u32 min_period = len;
-
- for (const auto &lit : cand) {
+ return true;
+}
+
+static
+u32 scoreFmlCandidates(const vector<ue2_literal> &cand) {
+ if (cand.empty()) {
+ DEBUG_PRINTF("no candidates\n");
+ return 0;
+ }
+
+ const u32 len = cand.back().length();
+
+ DEBUG_PRINTF("length = %u count %zu\n", len, cand.size());
+ u32 min_period = len;
+
+ for (const auto &lit : cand) {
DEBUG_PRINTF("candidate: %s\n", dumpString(lit).c_str());
- u32 period = lit.length() - maxStringSelfOverlap(lit);
- min_period = min(min_period, period);
- }
- DEBUG_PRINTF("min_period %u\n", min_period);
- u32 length_score =
- (5 * min_period + len) * (cand.back().any_nocase() ? 90 : 100);
- u32 count_penalty;
- if (len > 4) {
- count_penalty = 9 * len * cand.size();
- } else {
- count_penalty = 5 * cand.size();
- }
- if (length_score <= count_penalty) {
- return 1;
- }
- return length_score - count_penalty;
-}
-
-/* favours later literals */
-static
-bool findMaskLiterals(const vector<CharReach> &mask, vector<ue2_literal> *lit,
- u32 *minBound, u32 *length) {
- *minBound = 0;
- *length = 0;
-
+ u32 period = lit.length() - maxStringSelfOverlap(lit);
+ min_period = min(min_period, period);
+ }
+ DEBUG_PRINTF("min_period %u\n", min_period);
+ u32 length_score =
+ (5 * min_period + len) * (cand.back().any_nocase() ? 90 : 100);
+ u32 count_penalty;
+ if (len > 4) {
+ count_penalty = 9 * len * cand.size();
+ } else {
+ count_penalty = 5 * cand.size();
+ }
+ if (length_score <= count_penalty) {
+ return 1;
+ }
+ return length_score - count_penalty;
+}
+
+/* favours later literals */
+static
+bool findMaskLiterals(const vector<CharReach> &mask, vector<ue2_literal> *lit,
+ u32 *minBound, u32 *length) {
+ *minBound = 0;
+ *length = 0;
+
vector<ue2_literal> candidates, best_candidates, curr_candidates;
- u32 best_score = 0;
- u32 best_minOffset = 0;
+ u32 best_score = 0;
+ u32 best_minOffset = 0;
for (auto it = mask.begin(); it != mask.end(); ++it) {
- candidates.clear();
+ candidates.clear();
if (!initFmlCandidates(*it, candidates)) {
- DEBUG_PRINTF("failed to init\n");
- continue;
- }
- DEBUG_PRINTF("++\n");
+ DEBUG_PRINTF("failed to init\n");
+ continue;
+ }
+ DEBUG_PRINTF("++\n");
auto jt = it;
while (jt != mask.begin()) {
- --jt;
- DEBUG_PRINTF("--\n");
+ --jt;
+ DEBUG_PRINTF("--\n");
if (!expandFmlCandidates(*jt, curr_candidates, candidates)) {
- DEBUG_PRINTF("expansion stopped\n");
- break;
- }
- }
+ DEBUG_PRINTF("expansion stopped\n");
+ break;
+ }
+ }
// Candidates have been expanded in reverse order.
for (auto &cand : candidates) {
cand = reverse_literal(cand);
}
- u32 score = scoreFmlCandidates(candidates);
- DEBUG_PRINTF("scored %u for literal set of size %zu\n", score,
- candidates.size());
- if (!candidates.empty() && score >= best_score) {
+ u32 score = scoreFmlCandidates(candidates);
+ DEBUG_PRINTF("scored %u for literal set of size %zu\n", score,
+ candidates.size());
+ if (!candidates.empty() && score >= best_score) {
best_minOffset = it - mask.begin() - candidates.back().length() + 1;
- best_candidates.swap(candidates);
- best_score = score;
- }
- }
-
- if (!best_score) {
- DEBUG_PRINTF("no lits\n");
- return false;
- }
-
- *minBound = best_minOffset;
- *length = best_candidates.back().length();
-
- DEBUG_PRINTF("best minbound %u length %u\n", *minBound, *length);
-
+ best_candidates.swap(candidates);
+ best_score = score;
+ }
+ }
+
+ if (!best_score) {
+ DEBUG_PRINTF("no lits\n");
+ return false;
+ }
+
+ *minBound = best_minOffset;
+ *length = best_candidates.back().length();
+
+ DEBUG_PRINTF("best minbound %u length %u\n", *minBound, *length);
+
assert(all_of_in(best_candidates, [&](const ue2_literal &s) {
return s.length() == *length;
}));
*lit = std::move(best_candidates);
- return true;
-}
-
-static
-unique_ptr<NGHolder> buildMaskLhs(bool anchored, u32 prefix_len,
- const vector<CharReach> &mask) {
- DEBUG_PRINTF("build %slhs len %u/%zu\n", anchored ? "anc " : "", prefix_len,
- mask.size());
-
- unique_ptr<NGHolder> lhs = ue2::make_unique<NGHolder>(NFA_PREFIX);
-
- assert(prefix_len);
- assert(mask.size() >= prefix_len);
- NFAVertex pred = anchored ? lhs->start : lhs->startDs;
-
- u32 m_idx = 0;
- while (prefix_len--) {
- NFAVertex v = add_vertex(*lhs);
- (*lhs)[v].char_reach = mask[m_idx++];
- add_edge(pred, v, *lhs);
- pred = v;
- }
- add_edge(pred, lhs->accept, *lhs);
- (*lhs)[pred].reports.insert(0);
-
- return lhs;
-}
-
-static
-void buildLiteralMask(const vector<CharReach> &mask, vector<u8> &msk,
- vector<u8> &cmp, u32 delay) {
- msk.clear();
- cmp.clear();
- if (mask.size() <= delay) {
- return;
- }
-
- // Construct an and/cmp mask from our mask ending at delay positions before
- // the end of the literal, with max length HWLM_MASKLEN.
-
- auto ite = mask.end() - delay;
- auto it = ite - min(size_t{HWLM_MASKLEN}, mask.size() - delay);
-
- for (; it != ite; ++it) {
- msk.push_back(0);
- cmp.push_back(0);
- make_and_cmp_mask(*it, &msk.back(), &cmp.back());
- }
-
- assert(msk.size() == cmp.size());
- assert(msk.size() <= HWLM_MASKLEN);
-}
-
-static
+ return true;
+}
+
+static
+unique_ptr<NGHolder> buildMaskLhs(bool anchored, u32 prefix_len,
+ const vector<CharReach> &mask) {
+ DEBUG_PRINTF("build %slhs len %u/%zu\n", anchored ? "anc " : "", prefix_len,
+ mask.size());
+
+ unique_ptr<NGHolder> lhs = ue2::make_unique<NGHolder>(NFA_PREFIX);
+
+ assert(prefix_len);
+ assert(mask.size() >= prefix_len);
+ NFAVertex pred = anchored ? lhs->start : lhs->startDs;
+
+ u32 m_idx = 0;
+ while (prefix_len--) {
+ NFAVertex v = add_vertex(*lhs);
+ (*lhs)[v].char_reach = mask[m_idx++];
+ add_edge(pred, v, *lhs);
+ pred = v;
+ }
+ add_edge(pred, lhs->accept, *lhs);
+ (*lhs)[pred].reports.insert(0);
+
+ return lhs;
+}
+
+static
+void buildLiteralMask(const vector<CharReach> &mask, vector<u8> &msk,
+ vector<u8> &cmp, u32 delay) {
+ msk.clear();
+ cmp.clear();
+ if (mask.size() <= delay) {
+ return;
+ }
+
+ // Construct an and/cmp mask from our mask ending at delay positions before
+ // the end of the literal, with max length HWLM_MASKLEN.
+
+ auto ite = mask.end() - delay;
+ auto it = ite - min(size_t{HWLM_MASKLEN}, mask.size() - delay);
+
+ for (; it != ite; ++it) {
+ msk.push_back(0);
+ cmp.push_back(0);
+ make_and_cmp_mask(*it, &msk.back(), &cmp.back());
+ }
+
+ assert(msk.size() == cmp.size());
+ assert(msk.size() <= HWLM_MASKLEN);
+}
+
+static
bool validateTransientMask(const vector<CharReach> &mask, bool anchored,
bool eod, const Grey &grey) {
- assert(!mask.empty());
-
- // An EOD anchored mask requires that everything fit into history, while an
- // ordinary floating case can handle one byte more (i.e., max history size
- // and one byte in the buffer).
- const size_t max_width = grey.maxHistoryAvailable + (eod ? 0 : 1);
- if (mask.size() > max_width) {
- DEBUG_PRINTF("mask too long for max available history\n");
- return false;
- }
-
+ assert(!mask.empty());
+
+ // An EOD anchored mask requires that everything fit into history, while an
+ // ordinary floating case can handle one byte more (i.e., max history size
+ // and one byte in the buffer).
+ const size_t max_width = grey.maxHistoryAvailable + (eod ? 0 : 1);
+ if (mask.size() > max_width) {
+ DEBUG_PRINTF("mask too long for max available history\n");
+ return false;
+ }
+
/* although anchored masks cannot be transient, short masks may be placed
* into the atable. */
if (anchored && mask.size() > grey.maxAnchoredRegion) {
return false;
}
- vector<ue2_literal> lits;
- u32 lit_minBound; /* minBound of each literal in lit */
- u32 lit_length; /* length of each literal in lit */
- if (!findMaskLiterals(mask, &lits, &lit_minBound, &lit_length)) {
- DEBUG_PRINTF("failed to find any lits\n");
- return false;
- }
-
- if (lits.empty()) {
- return false;
- }
-
- const u32 delay = mask.size() - lit_length - lit_minBound;
- if (delay > MAX_DELAY) {
- DEBUG_PRINTF("delay %u is too much\n", delay);
- return false;
- }
-
- if (lit_length == 1 && lits.size() > 3) {
- DEBUG_PRINTF("no decent trigger\n");
- return false;
- }
-
- // Mixed-sensitivity literals require benefits masks to implement, and thus
- // have a maximum length. This has been taken into account in
- // findMaskLiterals.
- assert(lit_length <= MAX_MASK2_WIDTH ||
- none_of(begin(lits), end(lits), mixed_sensitivity));
-
- // Build the HWLM literal mask.
- vector<u8> msk, cmp;
- if (grey.roseHamsterMasks) {
- buildLiteralMask(mask, msk, cmp, delay);
- }
-
- // We consider the HWLM mask length to run from the first non-zero byte to
- // the end, and let max(mask length, literal length) be the effective
- // literal length.
- //
- // A one-byte literal with no mask is too short, but a one-byte literal
- // with a few bytes of mask information is OK.
-
- u32 msk_length = distance(find_if(begin(msk), end(msk),
- [](u8 v) { return v != 0; }), end(msk));
- u32 eff_lit_length = max(lit_length, msk_length);
- DEBUG_PRINTF("msk_length=%u, eff_lit_length = %u\n", msk_length,
- eff_lit_length);
-
- if (eff_lit_length < MIN_MASK_LIT_LEN) {
- DEBUG_PRINTF("literals too short\n");
- return false;
- }
-
- DEBUG_PRINTF("mask is ok\n");
- return true;
-}
-
-static
-bool maskIsNeeded(const ue2_literal &lit, const NGHolder &g) {
+ vector<ue2_literal> lits;
+ u32 lit_minBound; /* minBound of each literal in lit */
+ u32 lit_length; /* length of each literal in lit */
+ if (!findMaskLiterals(mask, &lits, &lit_minBound, &lit_length)) {
+ DEBUG_PRINTF("failed to find any lits\n");
+ return false;
+ }
+
+ if (lits.empty()) {
+ return false;
+ }
+
+ const u32 delay = mask.size() - lit_length - lit_minBound;
+ if (delay > MAX_DELAY) {
+ DEBUG_PRINTF("delay %u is too much\n", delay);
+ return false;
+ }
+
+ if (lit_length == 1 && lits.size() > 3) {
+ DEBUG_PRINTF("no decent trigger\n");
+ return false;
+ }
+
+ // Mixed-sensitivity literals require benefits masks to implement, and thus
+ // have a maximum length. This has been taken into account in
+ // findMaskLiterals.
+ assert(lit_length <= MAX_MASK2_WIDTH ||
+ none_of(begin(lits), end(lits), mixed_sensitivity));
+
+ // Build the HWLM literal mask.
+ vector<u8> msk, cmp;
+ if (grey.roseHamsterMasks) {
+ buildLiteralMask(mask, msk, cmp, delay);
+ }
+
+ // We consider the HWLM mask length to run from the first non-zero byte to
+ // the end, and let max(mask length, literal length) be the effective
+ // literal length.
+ //
+ // A one-byte literal with no mask is too short, but a one-byte literal
+ // with a few bytes of mask information is OK.
+
+ u32 msk_length = distance(find_if(begin(msk), end(msk),
+ [](u8 v) { return v != 0; }), end(msk));
+ u32 eff_lit_length = max(lit_length, msk_length);
+ DEBUG_PRINTF("msk_length=%u, eff_lit_length = %u\n", msk_length,
+ eff_lit_length);
+
+ if (eff_lit_length < MIN_MASK_LIT_LEN) {
+ DEBUG_PRINTF("literals too short\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("mask is ok\n");
+ return true;
+}
+
+static
+bool maskIsNeeded(const ue2_literal &lit, const NGHolder &g) {
flat_set<NFAVertex> curr = {g.accept};
flat_set<NFAVertex> next;
-
- for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
- const CharReach &cr = *it;
- DEBUG_PRINTF("check %s\n", describeClass(*it).c_str());
- next.clear();
- for (auto v : curr) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (isSubsetOf(cr, g[u].char_reach)) {
- next.insert(u);
- }
- }
- }
- if (next.empty()) {
- DEBUG_PRINTF("no path to start\n");
- return true;
- }
- curr.swap(next);
- }
-
- for (auto v : curr) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == g.start || u == g.startDs) {
- DEBUG_PRINTF("literal spans graph from start to accept\n");
- return false;
-
- }
- }
- }
-
- DEBUG_PRINTF("literal doesn't reach start\n");
- return true;
-}
-
-static
-void addTransientMask(RoseBuildImpl &build, const vector<CharReach> &mask,
+
+ for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
+ const CharReach &cr = *it;
+ DEBUG_PRINTF("check %s\n", describeClass(*it).c_str());
+ next.clear();
+ for (auto v : curr) {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (isSubsetOf(cr, g[u].char_reach)) {
+ next.insert(u);
+ }
+ }
+ }
+ if (next.empty()) {
+ DEBUG_PRINTF("no path to start\n");
+ return true;
+ }
+ curr.swap(next);
+ }
+
+ for (auto v : curr) {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == g.start || u == g.startDs) {
+ DEBUG_PRINTF("literal spans graph from start to accept\n");
+ return false;
+
+ }
+ }
+ }
+
+ DEBUG_PRINTF("literal doesn't reach start\n");
+ return true;
+}
+
+static
+void addTransientMask(RoseBuildImpl &build, const vector<CharReach> &mask,
const flat_set<ReportID> &reports, bool anchored,
- bool eod) {
- vector<ue2_literal> lits;
- u32 lit_minBound; /* minBound of each literal in lit */
- u32 lit_length; /* length of each literal in lit */
- if (!findMaskLiterals(mask, &lits, &lit_minBound, &lit_length)) {
- DEBUG_PRINTF("failed to find any lits\n");
- assert(0);
- return;
- }
-
- DEBUG_PRINTF("%zu literals, minBound=%u, length=%u\n", lits.size(),
- lit_minBound, lit_length);
-
- if (lits.empty()) {
- assert(0);
- return;
- }
-
- u32 delay = mask.size() - lit_length - lit_minBound;
- assert(delay <= MAX_DELAY);
- DEBUG_PRINTF("delay=%u\n", delay);
-
- shared_ptr<NGHolder> mask_graph = buildMaskLhs(anchored, mask.size(), mask);
-
- u32 mask_lag = 0; /* TODO */
-
- // Everyone gets the same report ID.
- ReportID mask_report = build.getNewNfaReport();
+ bool eod) {
+ vector<ue2_literal> lits;
+ u32 lit_minBound; /* minBound of each literal in lit */
+ u32 lit_length; /* length of each literal in lit */
+ if (!findMaskLiterals(mask, &lits, &lit_minBound, &lit_length)) {
+ DEBUG_PRINTF("failed to find any lits\n");
+ assert(0);
+ return;
+ }
+
+ DEBUG_PRINTF("%zu literals, minBound=%u, length=%u\n", lits.size(),
+ lit_minBound, lit_length);
+
+ if (lits.empty()) {
+ assert(0);
+ return;
+ }
+
+ u32 delay = mask.size() - lit_length - lit_minBound;
+ assert(delay <= MAX_DELAY);
+ DEBUG_PRINTF("delay=%u\n", delay);
+
+ shared_ptr<NGHolder> mask_graph = buildMaskLhs(anchored, mask.size(), mask);
+
+ u32 mask_lag = 0; /* TODO */
+
+ // Everyone gets the same report ID.
+ ReportID mask_report = build.getNewNfaReport();
set_report(*mask_graph, mask_report);
-
- // Build the HWLM literal mask.
- vector<u8> msk, cmp;
- if (build.cc.grey.roseHamsterMasks) {
- buildLiteralMask(mask, msk, cmp, delay);
- }
-
- /* adjust bounds to be relative to trigger rather than mask */
- const u32 v_min_offset = add_rose_depth(0, mask.size());
- const u32 v_max_offset =
- add_rose_depth(anchored ? 0 : ROSE_BOUND_INF, mask.size());
-
- RoseGraph &g = build.g;
-
- // By default, masked literals go into the floating table (except for eod
- // cases).
- enum rose_literal_table table = ROSE_FLOATING;
-
- RoseVertex eod_v = RoseGraph::null_vertex();
- if (eod) {
- eod_v = add_vertex(g);
- g[eod_v].eod_accept = true;
- insert(&g[eod_v].reports, reports);
- g[eod_v].min_offset = v_min_offset;
- g[eod_v].max_offset = v_max_offset;
-
- // Note: because this is a transient mask, we know that we can match it
- // completely inside the history buffer. So, using the EOD literal
- // table is always safe.
- table = ROSE_EOD_ANCHORED;
-
- // Widen the EOD table window to cover the mask.
- ENSURE_AT_LEAST(&build.ematcher_region_size, mask.size());
- }
-
+
+ // Build the HWLM literal mask.
+ vector<u8> msk, cmp;
+ if (build.cc.grey.roseHamsterMasks) {
+ buildLiteralMask(mask, msk, cmp, delay);
+ }
+
+ /* adjust bounds to be relative to trigger rather than mask */
+ const u32 v_min_offset = add_rose_depth(0, mask.size());
+ const u32 v_max_offset =
+ add_rose_depth(anchored ? 0 : ROSE_BOUND_INF, mask.size());
+
+ RoseGraph &g = build.g;
+
+ // By default, masked literals go into the floating table (except for eod
+ // cases).
+ enum rose_literal_table table = ROSE_FLOATING;
+
+ RoseVertex eod_v = RoseGraph::null_vertex();
+ if (eod) {
+ eod_v = add_vertex(g);
+ g[eod_v].eod_accept = true;
+ insert(&g[eod_v].reports, reports);
+ g[eod_v].min_offset = v_min_offset;
+ g[eod_v].max_offset = v_max_offset;
+
+ // Note: because this is a transient mask, we know that we can match it
+ // completely inside the history buffer. So, using the EOD literal
+ // table is always safe.
+ table = ROSE_EOD_ANCHORED;
+
+ // Widen the EOD table window to cover the mask.
+ ENSURE_AT_LEAST(&build.ematcher_region_size, mask.size());
+ }
+
const flat_set<ReportID> no_reports;
-
- for (const auto &lit : lits) {
- u32 lit_id = build.getLiteralId(lit, msk, cmp, delay, table);
- const RoseVertex parent = anchored ? build.anchored_root : build.root;
- bool use_mask = delay || maskIsNeeded(lit, *mask_graph);
-
- auto v = createVertex(&build, parent, 0, ROSE_BOUND_INF, lit_id,
- lit.length(), eod ? no_reports : reports);
-
- if (use_mask) {
- g[v].left.graph = mask_graph;
- g[v].left.lag = mask_lag;
- g[v].left.leftfix_report = mask_report;
- } else {
- // Make sure our edge bounds are correct.
+
+ for (const auto &lit : lits) {
+ u32 lit_id = build.getLiteralId(lit, msk, cmp, delay, table);
+ const RoseVertex parent = anchored ? build.anchored_root : build.root;
+ bool use_mask = delay || maskIsNeeded(lit, *mask_graph);
+
+ auto v = createVertex(&build, parent, 0, ROSE_BOUND_INF, lit_id,
+ lit.length(), eod ? no_reports : reports);
+
+ if (use_mask) {
+ g[v].left.graph = mask_graph;
+ g[v].left.lag = mask_lag;
+ g[v].left.leftfix_report = mask_report;
+ } else {
+ // Make sure our edge bounds are correct.
RoseEdge e = edge(parent, v, g);
- g[e].minBound = 0;
- g[e].maxBound = anchored ? 0 : ROSE_BOUND_INF;
- g[e].history = anchored ? ROSE_ROLE_HISTORY_ANCH
- : ROSE_ROLE_HISTORY_NONE;
- }
-
- // Set offsets correctly.
- g[v].min_offset = v_min_offset;
- g[v].max_offset = v_max_offset;
-
- if (eod) {
+ g[e].minBound = 0;
+ g[e].maxBound = anchored ? 0 : ROSE_BOUND_INF;
+ g[e].history = anchored ? ROSE_ROLE_HISTORY_ANCH
+ : ROSE_ROLE_HISTORY_NONE;
+ }
+
+ // Set offsets correctly.
+ g[v].min_offset = v_min_offset;
+ g[v].max_offset = v_max_offset;
+
+ if (eod) {
RoseEdge e = add_edge(v, eod_v, g);
- g[e].minBound = 0;
- g[e].maxBound = 0;
- g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE;
- }
- }
-}
-
-static
+ g[e].minBound = 0;
+ g[e].maxBound = 0;
+ g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE;
+ }
+ }
+}
+
+static
unique_ptr<NGHolder> buildMaskRhs(const flat_set<ReportID> &reports,
- const vector<CharReach> &mask,
- u32 suffix_len) {
- assert(suffix_len);
- assert(mask.size() > suffix_len);
-
- unique_ptr<NGHolder> rhs = ue2::make_unique<NGHolder>(NFA_SUFFIX);
- NGHolder &h = *rhs;
-
- NFAVertex succ = h.accept;
- u32 m_idx = mask.size() - 1;
- while (suffix_len--) {
- NFAVertex u = add_vertex(h);
- if (succ == h.accept) {
- h[u].reports.insert(reports.begin(), reports.end());
- }
- h[u].char_reach = mask[m_idx--];
- add_edge(u, succ, h);
- succ = u;
- }
-
+ const vector<CharReach> &mask,
+ u32 suffix_len) {
+ assert(suffix_len);
+ assert(mask.size() > suffix_len);
+
+ unique_ptr<NGHolder> rhs = ue2::make_unique<NGHolder>(NFA_SUFFIX);
+ NGHolder &h = *rhs;
+
+ NFAVertex succ = h.accept;
+ u32 m_idx = mask.size() - 1;
+ while (suffix_len--) {
+ NFAVertex u = add_vertex(h);
+ if (succ == h.accept) {
+ h[u].reports.insert(reports.begin(), reports.end());
+ }
+ h[u].char_reach = mask[m_idx--];
+ add_edge(u, succ, h);
+ succ = u;
+ }
+
NFAEdge e = add_edge(h.start, succ, h);
h[e].tops.insert(DEFAULT_TOP);
-
- return rhs;
-}
-
-static
+
+ return rhs;
+}
+
+static
void doAddMask(RoseBuildImpl &tbi, bool anchored, const vector<CharReach> &mask,
const ue2_literal &lit, u32 prefix_len, u32 suffix_len,
const flat_set<ReportID> &reports) {
- /* Note: bounds are relative to literal start */
- RoseInGraph ig;
- RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(anchored), ig);
- RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
-
- DEBUG_PRINTF("pref + lit = %u\n", prefix_len);
- assert(prefix_len >= lit.length());
-
- // prefix len is relative to end of literal.
- u32 minBound = prefix_len - lit.length();
-
- if (minBound) {
- if (anchored && prefix_len > tbi.cc.grey.maxAnchoredRegion) {
- DEBUG_PRINTF("too deep\n");
- /* see if there is an anchored literal we can also hang off */
-
- ue2_literal lit2;
- u32 lit2_offset;
- vector<CharReach> mask2 = mask;
- assert(mask2.size() > tbi.cc.grey.maxAnchoredRegion);
- mask2.resize(MIN(tbi.cc.grey.maxAnchoredRegion, minBound));
-
- findMaskLiteral(mask2, tbi.cc.streaming, &lit2, &lit2_offset,
- tbi.cc.grey);
-
- if (lit2.length() >= MIN_MASK_LIT_LEN) {
- u32 prefix2_len = lit2_offset + lit2.length();
- assert(prefix2_len < minBound);
- RoseInVertex u
- = add_vertex(RoseInVertexProps::makeLiteral(lit2), ig);
- if (lit2_offset){
- DEBUG_PRINTF("building lhs (off %u)\n", lit2_offset);
- shared_ptr<NGHolder> lhs2
- = buildMaskLhs(true, lit2_offset, mask);
- add_edge(s, u, RoseInEdgeProps(lhs2, lit2.length()), ig);
- } else {
- add_edge(s, u, RoseInEdgeProps(0, 0), ig);
- }
-
- /* midfix */
- DEBUG_PRINTF("building mhs\n");
- vector<CharReach> mask3(mask.begin() + prefix2_len, mask.end());
- u32 overlap = maxOverlap(lit2, lit, 0);
- u32 delay = lit.length() - overlap;
- shared_ptr<NGHolder> mhs
- = buildMaskLhs(true, minBound - prefix2_len + overlap,
- mask3);
- mhs->kind = NFA_INFIX;
+ /* Note: bounds are relative to literal start */
+ RoseInGraph ig;
+ RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(anchored), ig);
+ RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
+
+ DEBUG_PRINTF("pref + lit = %u\n", prefix_len);
+ assert(prefix_len >= lit.length());
+
+ // prefix len is relative to end of literal.
+ u32 minBound = prefix_len - lit.length();
+
+ if (minBound) {
+ if (anchored && prefix_len > tbi.cc.grey.maxAnchoredRegion) {
+ DEBUG_PRINTF("too deep\n");
+ /* see if there is an anchored literal we can also hang off */
+
+ ue2_literal lit2;
+ u32 lit2_offset;
+ vector<CharReach> mask2 = mask;
+ assert(mask2.size() > tbi.cc.grey.maxAnchoredRegion);
+ mask2.resize(MIN(tbi.cc.grey.maxAnchoredRegion, minBound));
+
+ findMaskLiteral(mask2, tbi.cc.streaming, &lit2, &lit2_offset,
+ tbi.cc.grey);
+
+ if (lit2.length() >= MIN_MASK_LIT_LEN) {
+ u32 prefix2_len = lit2_offset + lit2.length();
+ assert(prefix2_len < minBound);
+ RoseInVertex u
+ = add_vertex(RoseInVertexProps::makeLiteral(lit2), ig);
+ if (lit2_offset){
+ DEBUG_PRINTF("building lhs (off %u)\n", lit2_offset);
+ shared_ptr<NGHolder> lhs2
+ = buildMaskLhs(true, lit2_offset, mask);
+ add_edge(s, u, RoseInEdgeProps(lhs2, lit2.length()), ig);
+ } else {
+ add_edge(s, u, RoseInEdgeProps(0, 0), ig);
+ }
+
+ /* midfix */
+ DEBUG_PRINTF("building mhs\n");
+ vector<CharReach> mask3(mask.begin() + prefix2_len, mask.end());
+ u32 overlap = maxOverlap(lit2, lit, 0);
+ u32 delay = lit.length() - overlap;
+ shared_ptr<NGHolder> mhs
+ = buildMaskLhs(true, minBound - prefix2_len + overlap,
+ mask3);
+ mhs->kind = NFA_INFIX;
setTops(*mhs);
- add_edge(u, v, RoseInEdgeProps(mhs, delay), ig);
-
- DEBUG_PRINTF("add anch literal too!\n");
- goto do_rhs;
- }
- }
-
- shared_ptr<NGHolder> lhs = buildMaskLhs(anchored, minBound, mask);
- add_edge(s, v, RoseInEdgeProps(lhs, lit.length()), ig);
- } else {
- u32 maxBound = anchored ? minBound : ROSE_BOUND_INF;
- add_edge(s, v, RoseInEdgeProps(minBound, maxBound), ig);
- }
-
- do_rhs:
- if (suffix_len) {
- shared_ptr<NGHolder> rhs = buildMaskRhs(reports, mask, suffix_len);
- RoseInVertex a =
- add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig);
- add_edge(v, a, RoseInEdgeProps(rhs, 0), ig);
- } else {
- /* Note: masks have no eod connections */
- RoseInVertex a
- = add_vertex(RoseInVertexProps::makeAccept(reports), ig);
- add_edge(v, a, RoseInEdgeProps(0U, 0U), ig);
- }
-
- calcVertexOffsets(ig);
-
- bool rv = tbi.addRose(ig, false);
-
- assert(rv); /* checkAllowMask should have prevented this */
- if (!rv) {
- throw std::exception();
- }
-}
-
-static
-bool checkAllowMask(const vector<CharReach> &mask, ue2_literal *lit,
- u32 *prefix_len, u32 *suffix_len,
- const CompileContext &cc) {
- assert(!mask.empty());
- u32 lit_offset;
- findMaskLiteral(mask, cc.streaming, lit, &lit_offset, cc.grey);
-
- if (lit->length() < MIN_MASK_LIT_LEN && lit->length() != mask.size()) {
- DEBUG_PRINTF("need more literal - bad mask\n");
- return false;
- }
-
- DEBUG_PRINTF("mask lit '%s', len=%zu at offset=%u\n",
- dumpString(*lit).c_str(), lit->length(), lit_offset);
-
- assert(!cc.streaming || lit->length() <= cc.grey.maxHistoryAvailable + 1);
-
- /* literal is included in the prefix nfa so that matches from the prefix
- * can't occur in the history buffer - probably should tweak the NFA API
- * to allow such matches not to be suppressed */
- *prefix_len = lit_offset + lit->length();
- *suffix_len = mask.size() - *prefix_len;
- DEBUG_PRINTF("prefix_len=%u, suffix_len=%u\n", *prefix_len, *suffix_len);
-
- /* check if we can backtrack sufficiently */
- if (cc.streaming && *prefix_len > cc.grey.maxHistoryAvailable + 1) {
- DEBUG_PRINTF("too much lag\n");
- return false;
- }
-
- if (*suffix_len > MAX_MASK_SIZE || *prefix_len > MAX_MASK_SIZE) {
- DEBUG_PRINTF("too big\n");
- return false;
- }
-
- return true;
-}
-
-bool RoseBuildImpl::add(bool anchored, const vector<CharReach> &mask,
+ add_edge(u, v, RoseInEdgeProps(mhs, delay), ig);
+
+ DEBUG_PRINTF("add anch literal too!\n");
+ goto do_rhs;
+ }
+ }
+
+ shared_ptr<NGHolder> lhs = buildMaskLhs(anchored, minBound, mask);
+ add_edge(s, v, RoseInEdgeProps(lhs, lit.length()), ig);
+ } else {
+ u32 maxBound = anchored ? minBound : ROSE_BOUND_INF;
+ add_edge(s, v, RoseInEdgeProps(minBound, maxBound), ig);
+ }
+
+ do_rhs:
+ if (suffix_len) {
+ shared_ptr<NGHolder> rhs = buildMaskRhs(reports, mask, suffix_len);
+ RoseInVertex a =
+ add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig);
+ add_edge(v, a, RoseInEdgeProps(rhs, 0), ig);
+ } else {
+ /* Note: masks have no eod connections */
+ RoseInVertex a
+ = add_vertex(RoseInVertexProps::makeAccept(reports), ig);
+ add_edge(v, a, RoseInEdgeProps(0U, 0U), ig);
+ }
+
+ calcVertexOffsets(ig);
+
+ bool rv = tbi.addRose(ig, false);
+
+ assert(rv); /* checkAllowMask should have prevented this */
+ if (!rv) {
+ throw std::exception();
+ }
+}
+
+static
+bool checkAllowMask(const vector<CharReach> &mask, ue2_literal *lit,
+ u32 *prefix_len, u32 *suffix_len,
+ const CompileContext &cc) {
+ assert(!mask.empty());
+ u32 lit_offset;
+ findMaskLiteral(mask, cc.streaming, lit, &lit_offset, cc.grey);
+
+ if (lit->length() < MIN_MASK_LIT_LEN && lit->length() != mask.size()) {
+ DEBUG_PRINTF("need more literal - bad mask\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("mask lit '%s', len=%zu at offset=%u\n",
+ dumpString(*lit).c_str(), lit->length(), lit_offset);
+
+ assert(!cc.streaming || lit->length() <= cc.grey.maxHistoryAvailable + 1);
+
+ /* literal is included in the prefix nfa so that matches from the prefix
+ * can't occur in the history buffer - probably should tweak the NFA API
+ * to allow such matches not to be suppressed */
+ *prefix_len = lit_offset + lit->length();
+ *suffix_len = mask.size() - *prefix_len;
+ DEBUG_PRINTF("prefix_len=%u, suffix_len=%u\n", *prefix_len, *suffix_len);
+
+ /* check if we can backtrack sufficiently */
+ if (cc.streaming && *prefix_len > cc.grey.maxHistoryAvailable + 1) {
+ DEBUG_PRINTF("too much lag\n");
+ return false;
+ }
+
+ if (*suffix_len > MAX_MASK_SIZE || *prefix_len > MAX_MASK_SIZE) {
+ DEBUG_PRINTF("too big\n");
+ return false;
+ }
+
+ return true;
+}
+
+bool RoseBuildImpl::add(bool anchored, const vector<CharReach> &mask,
const flat_set<ReportID> &reports) {
if (validateTransientMask(mask, anchored, false, cc.grey)) {
- bool eod = false;
- addTransientMask(*this, mask, reports, anchored, eod);
- return true;
- }
-
- ue2_literal lit;
- u32 prefix_len = 0;
- u32 suffix_len = 0;
-
- if (!checkAllowMask(mask, &lit, &prefix_len, &suffix_len, cc)) {
- return false;
- }
-
- /* we know that the mask can be handled now, start playing with the rose
- * graph */
- doAddMask(*this, anchored, mask, lit, prefix_len, suffix_len, reports);
-
- return true;
-}
-
-bool RoseBuildImpl::validateMask(const vector<CharReach> &mask,
+ bool eod = false;
+ addTransientMask(*this, mask, reports, anchored, eod);
+ return true;
+ }
+
+ ue2_literal lit;
+ u32 prefix_len = 0;
+ u32 suffix_len = 0;
+
+ if (!checkAllowMask(mask, &lit, &prefix_len, &suffix_len, cc)) {
+ return false;
+ }
+
+ /* we know that the mask can be handled now, start playing with the rose
+ * graph */
+ doAddMask(*this, anchored, mask, lit, prefix_len, suffix_len, reports);
+
+ return true;
+}
+
+bool RoseBuildImpl::validateMask(const vector<CharReach> &mask,
UNUSED const flat_set<ReportID> &reports,
bool anchored, bool eod) const {
return validateTransientMask(mask, anchored, eod, cc.grey);
-}
-
-static
-unique_ptr<NGHolder> makeAnchoredGraph(const vector<CharReach> &mask,
+}
+
+static
+unique_ptr<NGHolder> makeAnchoredGraph(const vector<CharReach> &mask,
const flat_set<ReportID> &reports,
- bool eod) {
- auto gp = ue2::make_unique<NGHolder>();
- NGHolder &g = *gp;
-
- NFAVertex u = g.start;
- for (const auto &cr : mask) {
- NFAVertex v = add_vertex(g);
- g[v].char_reach = cr;
- add_edge(u, v, g);
- u = v;
- }
-
-
- g[u].reports = reports;
- add_edge(u, eod ? g.acceptEod : g.accept, g);
-
- return gp;
-}
-
-static
-bool addAnchoredMask(RoseBuildImpl &build, const vector<CharReach> &mask,
+ bool eod) {
+ auto gp = ue2::make_unique<NGHolder>();
+ NGHolder &g = *gp;
+
+ NFAVertex u = g.start;
+ for (const auto &cr : mask) {
+ NFAVertex v = add_vertex(g);
+ g[v].char_reach = cr;
+ add_edge(u, v, g);
+ u = v;
+ }
+
+
+ g[u].reports = reports;
+ add_edge(u, eod ? g.acceptEod : g.accept, g);
+
+ return gp;
+}
+
+static
+bool addAnchoredMask(RoseBuildImpl &build, const vector<CharReach> &mask,
const flat_set<ReportID> &reports, bool eod) {
- if (!build.cc.grey.allowAnchoredAcyclic) {
- return false;
- }
-
- auto g = makeAnchoredGraph(mask, reports, eod);
- assert(g);
-
- return build.addAnchoredAcyclic(*g);
-}
-
-void RoseBuildImpl::addMask(const vector<CharReach> &mask,
+ if (!build.cc.grey.allowAnchoredAcyclic) {
+ return false;
+ }
+
+ auto g = makeAnchoredGraph(mask, reports, eod);
+ assert(g);
+
+ return build.addAnchoredAcyclic(*g);
+}
+
+void RoseBuildImpl::addMask(const vector<CharReach> &mask,
const flat_set<ReportID> &reports, bool anchored,
bool eod) {
- if (anchored && addAnchoredMask(*this, mask, reports, eod)) {
- DEBUG_PRINTF("added mask as anchored acyclic graph\n");
- return;
- }
-
- addTransientMask(*this, mask, reports, anchored, eod);
-}
-
-} // namespace ue2
+ if (anchored && addAnchoredMask(*this, mask, reports, eod)) {
+ DEBUG_PRINTF("added mask as anchored acyclic graph\n");
+ return;
+ }
+
+ addTransientMask(*this, mask, reports, anchored, eod);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_anchored.cpp b/contrib/libs/hyperscan/src/rose/rose_build_anchored.cpp
index 5a3aa00638..23688b8d22 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_anchored.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_anchored.cpp
@@ -1,182 +1,182 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_anchored.h"
-
-#include "grey.h"
-#include "rose_build_impl.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_anchored.h"
+
+#include "grey.h"
+#include "rose_build_impl.h"
#include "rose_build_matchers.h"
-#include "rose_internal.h"
-#include "ue2common.h"
-#include "nfa/dfa_min.h"
-#include "nfa/mcclellancompile.h"
-#include "nfa/mcclellancompile_util.h"
-#include "nfa/nfa_build_util.h"
-#include "nfa/rdfa_merge.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_mcclellan_internal.h"
-#include "util/alloc.h"
-#include "util/bitfield.h"
-#include "util/charreach.h"
-#include "util/compile_context.h"
-#include "util/compile_error.h"
-#include "util/container.h"
-#include "util/determinise.h"
+#include "rose_internal.h"
+#include "ue2common.h"
+#include "nfa/dfa_min.h"
+#include "nfa/mcclellancompile.h"
+#include "nfa/mcclellancompile_util.h"
+#include "nfa/nfa_build_util.h"
+#include "nfa/rdfa_merge.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_mcclellan_internal.h"
+#include "util/alloc.h"
+#include "util/bitfield.h"
+#include "util/charreach.h"
+#include "util/compile_context.h"
+#include "util/compile_error.h"
+#include "util/container.h"
+#include "util/determinise.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
-#include "util/make_unique.h"
-#include "util/order_check.h"
-#include "util/ue2string.h"
+#include "util/graph_range.h"
+#include "util/make_unique.h"
+#include "util/order_check.h"
+#include "util/ue2string.h"
#include "util/unordered.h"
-#include "util/verify_types.h"
-
-#include <map>
-#include <queue>
-#include <set>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-#define ANCHORED_NFA_STATE_LIMIT 512
-#define MAX_DFA_STATES 16000
-#define DFA_PAIR_MERGE_THRESHOLD 5000
-#define MAX_SMALL_START_REACH 4
-
-#define INIT_STATE (DEAD_STATE + 1)
-
+#include "util/verify_types.h"
+
+#include <map>
+#include <queue>
+#include <set>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+#define ANCHORED_NFA_STATE_LIMIT 512
+#define MAX_DFA_STATES 16000
+#define DFA_PAIR_MERGE_THRESHOLD 5000
+#define MAX_SMALL_START_REACH 4
+
+#define INIT_STATE (DEAD_STATE + 1)
+
#define NO_FRAG_ID (~0U)
-// Adds a vertex with the given reach.
-static
-NFAVertex add_vertex(NGHolder &h, const CharReach &cr) {
- NFAVertex v = add_vertex(h);
- h[v].char_reach = cr;
- return v;
-}
-
-static
-void add_edges(const set<NFAVertex> &parents, NFAVertex v, NGHolder &h) {
- for (auto p : parents) {
- add_edge(p, v, h);
- }
-}
-
-static
-set<NFAVertex> addDotsToGraph(NGHolder &h, NFAVertex start, u32 min, u32 max,
- const CharReach &cr) {
- DEBUG_PRINTF("adding [%u, %u] to graph\n", min, max);
- u32 i = 0;
- set<NFAVertex> curr;
- curr.insert(start);
- for (; i < min; i++) {
- NFAVertex next = add_vertex(h, cr);
- add_edges(curr, next, h);
- curr.clear();
- curr.insert(next);
- }
-
- assert(max != ROSE_BOUND_INF);
-
- set<NFAVertex> orig = curr;
- for (; i < max; i++) {
- NFAVertex next = add_vertex(h, cr);
- add_edges(curr, next, h);
- curr.clear();
- curr.insert(next);
- curr.insert(orig.begin(), orig.end());
- }
-
- return curr;
-}
-
-static
-NFAVertex addToGraph(NGHolder &h, const set<NFAVertex> &curr,
- const ue2_literal &s) {
- DEBUG_PRINTF("adding %s to graph\n", dumpString(s).c_str());
- assert(!s.empty());
-
- ue2_literal::const_iterator it = s.begin();
- NFAVertex u = add_vertex(h, *it);
- add_edges(curr, u, h);
-
- for (++it; it != s.end(); ++it) {
- NFAVertex next = add_vertex(h, *it);
- add_edge(u, next, h);
- u = next;
- }
-
- return u;
-}
-
-static
-void mergeAnchoredDfas(vector<unique_ptr<raw_dfa>> &dfas,
- const RoseBuildImpl &build) {
- // First, group our DFAs into "small start" and "big start" sets.
- vector<unique_ptr<raw_dfa>> small_starts, big_starts;
- for (auto &rdfa : dfas) {
- u32 start_size = mcclellanStartReachSize(rdfa.get());
- if (start_size <= MAX_SMALL_START_REACH) {
- small_starts.push_back(move(rdfa));
- } else {
- big_starts.push_back(move(rdfa));
- }
- }
- dfas.clear();
-
- DEBUG_PRINTF("%zu dfas with small starts, %zu dfas with big starts\n",
- small_starts.size(), big_starts.size());
- mergeDfas(small_starts, MAX_DFA_STATES, nullptr, build.cc.grey);
- mergeDfas(big_starts, MAX_DFA_STATES, nullptr, build.cc.grey);
-
- // Rehome our groups into one vector.
- for (auto &rdfa : small_starts) {
- dfas.push_back(move(rdfa));
- }
- for (auto &rdfa : big_starts) {
- dfas.push_back(move(rdfa));
- }
-
- // Final test: if we've built two DFAs here that are small enough, we can
- // try to merge them.
- if (dfas.size() == 2) {
- size_t total_states = dfas[0]->states.size() + dfas[1]->states.size();
- if (total_states < DFA_PAIR_MERGE_THRESHOLD) {
- DEBUG_PRINTF("doing small pair merge\n");
- mergeDfas(dfas, MAX_DFA_STATES, nullptr, build.cc.grey);
- }
- }
-}
-
-static
+// Adds a vertex with the given reach.
+static
+NFAVertex add_vertex(NGHolder &h, const CharReach &cr) {
+ NFAVertex v = add_vertex(h);
+ h[v].char_reach = cr;
+ return v;
+}
+
+static
+void add_edges(const set<NFAVertex> &parents, NFAVertex v, NGHolder &h) {
+ for (auto p : parents) {
+ add_edge(p, v, h);
+ }
+}
+
+static
+set<NFAVertex> addDotsToGraph(NGHolder &h, NFAVertex start, u32 min, u32 max,
+ const CharReach &cr) {
+ DEBUG_PRINTF("adding [%u, %u] to graph\n", min, max);
+ u32 i = 0;
+ set<NFAVertex> curr;
+ curr.insert(start);
+ for (; i < min; i++) {
+ NFAVertex next = add_vertex(h, cr);
+ add_edges(curr, next, h);
+ curr.clear();
+ curr.insert(next);
+ }
+
+ assert(max != ROSE_BOUND_INF);
+
+ set<NFAVertex> orig = curr;
+ for (; i < max; i++) {
+ NFAVertex next = add_vertex(h, cr);
+ add_edges(curr, next, h);
+ curr.clear();
+ curr.insert(next);
+ curr.insert(orig.begin(), orig.end());
+ }
+
+ return curr;
+}
+
+static
+NFAVertex addToGraph(NGHolder &h, const set<NFAVertex> &curr,
+ const ue2_literal &s) {
+ DEBUG_PRINTF("adding %s to graph\n", dumpString(s).c_str());
+ assert(!s.empty());
+
+ ue2_literal::const_iterator it = s.begin();
+ NFAVertex u = add_vertex(h, *it);
+ add_edges(curr, u, h);
+
+ for (++it; it != s.end(); ++it) {
+ NFAVertex next = add_vertex(h, *it);
+ add_edge(u, next, h);
+ u = next;
+ }
+
+ return u;
+}
+
+static
+void mergeAnchoredDfas(vector<unique_ptr<raw_dfa>> &dfas,
+ const RoseBuildImpl &build) {
+ // First, group our DFAs into "small start" and "big start" sets.
+ vector<unique_ptr<raw_dfa>> small_starts, big_starts;
+ for (auto &rdfa : dfas) {
+ u32 start_size = mcclellanStartReachSize(rdfa.get());
+ if (start_size <= MAX_SMALL_START_REACH) {
+ small_starts.push_back(move(rdfa));
+ } else {
+ big_starts.push_back(move(rdfa));
+ }
+ }
+ dfas.clear();
+
+ DEBUG_PRINTF("%zu dfas with small starts, %zu dfas with big starts\n",
+ small_starts.size(), big_starts.size());
+ mergeDfas(small_starts, MAX_DFA_STATES, nullptr, build.cc.grey);
+ mergeDfas(big_starts, MAX_DFA_STATES, nullptr, build.cc.grey);
+
+ // Rehome our groups into one vector.
+ for (auto &rdfa : small_starts) {
+ dfas.push_back(move(rdfa));
+ }
+ for (auto &rdfa : big_starts) {
+ dfas.push_back(move(rdfa));
+ }
+
+ // Final test: if we've built two DFAs here that are small enough, we can
+ // try to merge them.
+ if (dfas.size() == 2) {
+ size_t total_states = dfas[0]->states.size() + dfas[1]->states.size();
+ if (total_states < DFA_PAIR_MERGE_THRESHOLD) {
+ DEBUG_PRINTF("doing small pair merge\n");
+ mergeDfas(dfas, MAX_DFA_STATES, nullptr, build.cc.grey);
+ }
+ }
+}
+
+static
void remapAnchoredReports(raw_dfa &rdfa, const vector<u32> &frag_map) {
for (dstate &ds : rdfa.states) {
assert(ds.reports_eod.empty()); // Not used in anchored matcher.
@@ -190,27 +190,27 @@ void remapAnchoredReports(raw_dfa &rdfa, const vector<u32> &frag_map) {
new_reports.insert(frag_map[id]);
}
ds.reports = std::move(new_reports);
- }
-}
-
+ }
+}
+
/**
* \brief Replaces the report ids currently in the dfas (rose graph literal
* ids) with the fragment id for each literal.
*/
-static
+static
void remapAnchoredReports(RoseBuildImpl &build, const vector<u32> &frag_map) {
for (auto &m : build.anchored_nfas) {
for (auto &rdfa : m.second) {
assert(rdfa);
remapAnchoredReports(*rdfa, frag_map);
}
- }
-}
-
+ }
+}
+
/**
* Returns mapping from literal ids to fragment ids.
*/
-static
+static
vector<u32> reverseFragMap(const RoseBuildImpl &build,
const vector<LitFragment> &fragments) {
vector<u32> rev(build.literal_info.size(), NO_FRAG_ID);
@@ -218,16 +218,16 @@ vector<u32> reverseFragMap(const RoseBuildImpl &build,
for (u32 lit_id : f.lit_ids) {
assert(lit_id < rev.size());
rev[lit_id] = f.fragment_id;
- }
- }
+ }
+ }
return rev;
-}
-
+}
+
/**
* \brief Replace the reports (which are literal final_ids) in the given
* raw_dfa with program offsets.
*/
-static
+static
void remapIdsToPrograms(const vector<LitFragment> &fragments, raw_dfa &rdfa) {
for (dstate &ds : rdfa.states) {
assert(ds.reports_eod.empty()); // Not used in anchored matcher.
@@ -247,609 +247,609 @@ void remapIdsToPrograms(const vector<LitFragment> &fragments, raw_dfa &rdfa) {
static
unique_ptr<NGHolder> populate_holder(const simple_anchored_info &sai,
const flat_set<u32> &exit_ids) {
- DEBUG_PRINTF("populating holder for ^.{%u,%u}%s\n", sai.min_bound,
- sai.max_bound, dumpString(sai.literal).c_str());
+ DEBUG_PRINTF("populating holder for ^.{%u,%u}%s\n", sai.min_bound,
+ sai.max_bound, dumpString(sai.literal).c_str());
auto h_ptr = std::make_unique<NGHolder>();
NGHolder &h = *h_ptr;
auto ends = addDotsToGraph(h, h.start, sai.min_bound, sai.max_bound,
CharReach::dot());
- NFAVertex v = addToGraph(h, ends, sai.literal);
- add_edge(v, h.accept, h);
- h[v].reports.insert(exit_ids.begin(), exit_ids.end());
+ NFAVertex v = addToGraph(h, ends, sai.literal);
+ add_edge(v, h.accept, h);
+ h[v].reports.insert(exit_ids.begin(), exit_ids.end());
return h_ptr;
-}
-
+}
+
u32 anchoredStateSize(const anchored_matcher_info &atable) {
const struct anchored_matcher_info *curr = &atable;
-
- // Walk the list until we find the last element; total state size will be
- // that engine's state offset plus its state requirement.
- while (curr->next_offset) {
- curr = (const anchored_matcher_info *)
- ((const char *)curr + curr->next_offset);
- }
-
- const NFA *nfa = (const NFA *)((const char *)curr + sizeof(*curr));
+
+ // Walk the list until we find the last element; total state size will be
+ // that engine's state offset plus its state requirement.
+ while (curr->next_offset) {
+ curr = (const anchored_matcher_info *)
+ ((const char *)curr + curr->next_offset);
+ }
+
+ const NFA *nfa = (const NFA *)((const char *)curr + sizeof(*curr));
return curr->state_offset + nfa->streamStateSize;
-}
-
-namespace {
-
+}
+
+namespace {
+
using nfa_state_set = bitfield<ANCHORED_NFA_STATE_LIMIT>;
-
-struct Holder_StateSet {
- Holder_StateSet() : wdelay(0) {}
-
- nfa_state_set wrap_state;
- u32 wdelay;
-
- bool operator==(const Holder_StateSet &b) const {
- return wdelay == b.wdelay && wrap_state == b.wrap_state;
- }
+
+struct Holder_StateSet {
+ Holder_StateSet() : wdelay(0) {}
+
+ nfa_state_set wrap_state;
+ u32 wdelay;
+
+ bool operator==(const Holder_StateSet &b) const {
+ return wdelay == b.wdelay && wrap_state == b.wrap_state;
+ }
size_t hash() const {
return hash_all(wrap_state, wdelay);
}
-};
-
-class Automaton_Holder {
-public:
+};
+
+class Automaton_Holder {
+public:
using StateSet = Holder_StateSet;
using StateMap = ue2_unordered_map<StateSet, dstate_id_t>;
-
- explicit Automaton_Holder(const NGHolder &g_in) : g(g_in) {
- for (auto v : vertices_range(g)) {
- vertexToIndex[v] = indexToVertex.size();
- indexToVertex.push_back(v);
- }
-
- assert(indexToVertex.size() <= ANCHORED_NFA_STATE_LIMIT);
-
- DEBUG_PRINTF("%zu states\n", indexToVertex.size());
- init.wdelay = 0;
- init.wrap_state.set(vertexToIndex[g.start]);
-
- DEBUG_PRINTF("init wdelay %u\n", init.wdelay);
-
- calculateAlphabet();
- cr_by_index = populateCR(g, indexToVertex, alpha);
- }
-
-private:
- void calculateAlphabet() {
- vector<CharReach> esets(1, CharReach::dot());
-
- for (auto v : indexToVertex) {
- const CharReach &cr = g[v].char_reach;
-
- for (size_t i = 0; i < esets.size(); i++) {
- if (esets[i].count() == 1) {
- continue;
- }
-
- CharReach t = cr & esets[i];
-
- if (t.any() && t != esets[i]) {
- esets[i] &= ~t;
- esets.push_back(t);
- }
- }
- }
-
- alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
- }
-
-public:
- void transition(const StateSet &in, StateSet *next) {
- /* track the dfa state, reset nfa states */
- u32 wdelay = in.wdelay ? in.wdelay - 1 : 0;
-
- for (symbol_t s = 0; s < alphasize; s++) {
- next[s].wrap_state.reset();
- next[s].wdelay = wdelay;
- }
-
- nfa_state_set succ;
-
- if (wdelay != in.wdelay) {
- DEBUG_PRINTF("enabling start\n");
- succ.set(vertexToIndex[g.startDs]);
- }
-
- for (size_t i = in.wrap_state.find_first(); i != nfa_state_set::npos;
- i = in.wrap_state.find_next(i)) {
- NFAVertex v = indexToVertex[i];
- for (auto w : adjacent_vertices_range(v, g)) {
- if (!contains(vertexToIndex, w)
- || w == g.accept || w == g.acceptEod) {
- continue;
- }
-
- if (w == g.startDs) {
- continue;
- }
-
- succ.set(vertexToIndex[w]);
- }
- }
-
- for (size_t j = succ.find_first(); j != nfa_state_set::npos;
- j = succ.find_next(j)) {
- const CharReach &cr = cr_by_index[j];
- for (size_t s = cr.find_first(); s != CharReach::npos;
- s = cr.find_next(s)) {
- next[s].wrap_state.set(j); /* pre alpha'ed */
- }
- }
-
- next[alpha[TOP]] = in;
- }
-
- const vector<StateSet> initial() {
- return {init};
- }
-
- void reports(const StateSet &in, flat_set<ReportID> &rv) {
- rv.clear();
- for (size_t i = in.wrap_state.find_first(); i != nfa_state_set::npos;
- i = in.wrap_state.find_next(i)) {
- NFAVertex v = indexToVertex[i];
- if (edge(v, g.accept, g).second) {
- assert(!g[v].reports.empty());
- insert(&rv, g[v].reports);
- } else {
- assert(g[v].reports.empty());
- }
- }
- }
-
- void reportsEod(const StateSet &, flat_set<ReportID> &r) {
- r.clear();
- }
-
- static bool canPrune(const flat_set<ReportID> &) {
- /* used by ng_ to prune states after highlander accepts */
- return false;
- }
-
-private:
- const NGHolder &g;
+
+ explicit Automaton_Holder(const NGHolder &g_in) : g(g_in) {
+ for (auto v : vertices_range(g)) {
+ vertexToIndex[v] = indexToVertex.size();
+ indexToVertex.push_back(v);
+ }
+
+ assert(indexToVertex.size() <= ANCHORED_NFA_STATE_LIMIT);
+
+ DEBUG_PRINTF("%zu states\n", indexToVertex.size());
+ init.wdelay = 0;
+ init.wrap_state.set(vertexToIndex[g.start]);
+
+ DEBUG_PRINTF("init wdelay %u\n", init.wdelay);
+
+ calculateAlphabet();
+ cr_by_index = populateCR(g, indexToVertex, alpha);
+ }
+
+private:
+ void calculateAlphabet() {
+ vector<CharReach> esets(1, CharReach::dot());
+
+ for (auto v : indexToVertex) {
+ const CharReach &cr = g[v].char_reach;
+
+ for (size_t i = 0; i < esets.size(); i++) {
+ if (esets[i].count() == 1) {
+ continue;
+ }
+
+ CharReach t = cr & esets[i];
+
+ if (t.any() && t != esets[i]) {
+ esets[i] &= ~t;
+ esets.push_back(t);
+ }
+ }
+ }
+
+ alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
+ }
+
+public:
+ void transition(const StateSet &in, StateSet *next) {
+ /* track the dfa state, reset nfa states */
+ u32 wdelay = in.wdelay ? in.wdelay - 1 : 0;
+
+ for (symbol_t s = 0; s < alphasize; s++) {
+ next[s].wrap_state.reset();
+ next[s].wdelay = wdelay;
+ }
+
+ nfa_state_set succ;
+
+ if (wdelay != in.wdelay) {
+ DEBUG_PRINTF("enabling start\n");
+ succ.set(vertexToIndex[g.startDs]);
+ }
+
+ for (size_t i = in.wrap_state.find_first(); i != nfa_state_set::npos;
+ i = in.wrap_state.find_next(i)) {
+ NFAVertex v = indexToVertex[i];
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (!contains(vertexToIndex, w)
+ || w == g.accept || w == g.acceptEod) {
+ continue;
+ }
+
+ if (w == g.startDs) {
+ continue;
+ }
+
+ succ.set(vertexToIndex[w]);
+ }
+ }
+
+ for (size_t j = succ.find_first(); j != nfa_state_set::npos;
+ j = succ.find_next(j)) {
+ const CharReach &cr = cr_by_index[j];
+ for (size_t s = cr.find_first(); s != CharReach::npos;
+ s = cr.find_next(s)) {
+ next[s].wrap_state.set(j); /* pre alpha'ed */
+ }
+ }
+
+ next[alpha[TOP]] = in;
+ }
+
+ const vector<StateSet> initial() {
+ return {init};
+ }
+
+ void reports(const StateSet &in, flat_set<ReportID> &rv) {
+ rv.clear();
+ for (size_t i = in.wrap_state.find_first(); i != nfa_state_set::npos;
+ i = in.wrap_state.find_next(i)) {
+ NFAVertex v = indexToVertex[i];
+ if (edge(v, g.accept, g).second) {
+ assert(!g[v].reports.empty());
+ insert(&rv, g[v].reports);
+ } else {
+ assert(g[v].reports.empty());
+ }
+ }
+ }
+
+ void reportsEod(const StateSet &, flat_set<ReportID> &r) {
+ r.clear();
+ }
+
+ static bool canPrune(const flat_set<ReportID> &) {
+ /* used by ng_ to prune states after highlander accepts */
+ return false;
+ }
+
+private:
+ const NGHolder &g;
unordered_map<NFAVertex, u32> vertexToIndex;
- vector<NFAVertex> indexToVertex;
- vector<CharReach> cr_by_index;
- StateSet init;
-public:
- StateSet dead;
- array<u16, ALPHABET_SIZE> alpha;
- array<u16, ALPHABET_SIZE> unalpha;
- u16 alphasize;
-};
-
-} // namespace
-
-static
-bool check_dupe(const raw_dfa &rdfa,
- const vector<unique_ptr<raw_dfa>> &existing, ReportID *remap) {
- if (!remap) {
- DEBUG_PRINTF("no remap\n");
- return false;
- }
-
- set<ReportID> rdfa_reports;
- for (const auto &ds : rdfa.states) {
- rdfa_reports.insert(ds.reports.begin(), ds.reports.end());
- }
- if (rdfa_reports.size() != 1) {
- return false; /* too complicated for now would need mapping TODO */
- }
-
- for (const auto &e_rdfa : existing) {
- assert(e_rdfa);
- const raw_dfa &b = *e_rdfa;
-
- if (rdfa.start_anchored != b.start_anchored ||
- rdfa.alpha_size != b.alpha_size ||
- rdfa.states.size() != b.states.size() ||
- rdfa.alpha_remap != b.alpha_remap) {
- continue;
- }
-
- set<ReportID> b_reports;
-
- for (u32 i = 0; i < b.states.size(); i++) {
- assert(b.states[i].reports_eod.empty());
- assert(rdfa.states[i].reports_eod.empty());
- if (rdfa.states[i].reports.size() != b.states[i].reports.size()) {
- goto next_dfa;
- }
- b_reports.insert(b.states[i].reports.begin(),
- b.states[i].reports.end());
-
- assert(rdfa.states[i].next.size() == b.states[i].next.size());
- if (!equal(rdfa.states[i].next.begin(), rdfa.states[i].next.end(),
- b.states[i].next.begin())) {
- goto next_dfa;
- }
- }
-
- if (b_reports.size() != 1) {
- continue;
- }
-
- *remap = *b_reports.begin();
- DEBUG_PRINTF("dupe found remapping to %u\n", *remap);
- return true;
- next_dfa:;
- }
-
- return false;
-}
-
-static
+ vector<NFAVertex> indexToVertex;
+ vector<CharReach> cr_by_index;
+ StateSet init;
+public:
+ StateSet dead;
+ array<u16, ALPHABET_SIZE> alpha;
+ array<u16, ALPHABET_SIZE> unalpha;
+ u16 alphasize;
+};
+
+} // namespace
+
+static
+bool check_dupe(const raw_dfa &rdfa,
+ const vector<unique_ptr<raw_dfa>> &existing, ReportID *remap) {
+ if (!remap) {
+ DEBUG_PRINTF("no remap\n");
+ return false;
+ }
+
+ set<ReportID> rdfa_reports;
+ for (const auto &ds : rdfa.states) {
+ rdfa_reports.insert(ds.reports.begin(), ds.reports.end());
+ }
+ if (rdfa_reports.size() != 1) {
+ return false; /* too complicated for now would need mapping TODO */
+ }
+
+ for (const auto &e_rdfa : existing) {
+ assert(e_rdfa);
+ const raw_dfa &b = *e_rdfa;
+
+ if (rdfa.start_anchored != b.start_anchored ||
+ rdfa.alpha_size != b.alpha_size ||
+ rdfa.states.size() != b.states.size() ||
+ rdfa.alpha_remap != b.alpha_remap) {
+ continue;
+ }
+
+ set<ReportID> b_reports;
+
+ for (u32 i = 0; i < b.states.size(); i++) {
+ assert(b.states[i].reports_eod.empty());
+ assert(rdfa.states[i].reports_eod.empty());
+ if (rdfa.states[i].reports.size() != b.states[i].reports.size()) {
+ goto next_dfa;
+ }
+ b_reports.insert(b.states[i].reports.begin(),
+ b.states[i].reports.end());
+
+ assert(rdfa.states[i].next.size() == b.states[i].next.size());
+ if (!equal(rdfa.states[i].next.begin(), rdfa.states[i].next.end(),
+ b.states[i].next.begin())) {
+ goto next_dfa;
+ }
+ }
+
+ if (b_reports.size() != 1) {
+ continue;
+ }
+
+ *remap = *b_reports.begin();
+ DEBUG_PRINTF("dupe found remapping to %u\n", *remap);
+ return true;
+ next_dfa:;
+ }
+
+ return false;
+}
+
+static
bool check_dupe_simple(const RoseBuildImpl &build, u32 min_bound, u32 max_bound,
- const ue2_literal &lit, ReportID *remap) {
- if (!remap) {
- DEBUG_PRINTF("no remap\n");
- return false;
- }
-
- simple_anchored_info sai(min_bound, max_bound, lit);
+ const ue2_literal &lit, ReportID *remap) {
+ if (!remap) {
+ DEBUG_PRINTF("no remap\n");
+ return false;
+ }
+
+ simple_anchored_info sai(min_bound, max_bound, lit);
if (contains(build.anchored_simple, sai)) {
*remap = *build.anchored_simple.at(sai).begin();
- return true;
- }
-
- return false;
-}
-
-static
-NFAVertex extractLiteral(const NGHolder &h, ue2_literal *lit) {
- vector<NFAVertex> lit_verts;
- NFAVertex v = h.accept;
- while ((v = getSoleSourceVertex(h, v))) {
- const CharReach &cr = h[v].char_reach;
- if (cr.count() > 1 && !cr.isCaselessChar()) {
- break;
- }
- lit_verts.push_back(v);
- }
-
- if (lit_verts.empty()) {
+ return true;
+ }
+
+ return false;
+}
+
+static
+NFAVertex extractLiteral(const NGHolder &h, ue2_literal *lit) {
+ vector<NFAVertex> lit_verts;
+ NFAVertex v = h.accept;
+ while ((v = getSoleSourceVertex(h, v))) {
+ const CharReach &cr = h[v].char_reach;
+ if (cr.count() > 1 && !cr.isCaselessChar()) {
+ break;
+ }
+ lit_verts.push_back(v);
+ }
+
+ if (lit_verts.empty()) {
return NGHolder::null_vertex();
- }
-
- bool nocase = false;
- bool case_set = false;
-
- for (auto it = lit_verts.rbegin(), ite = lit_verts.rend(); it != ite;
- ++it) {
- const CharReach &cr = h[*it].char_reach;
- if (cr.isAlpha()) {
- bool cr_nocase = cr.count() != 1;
- if (case_set && cr_nocase != nocase) {
+ }
+
+ bool nocase = false;
+ bool case_set = false;
+
+ for (auto it = lit_verts.rbegin(), ite = lit_verts.rend(); it != ite;
+ ++it) {
+ const CharReach &cr = h[*it].char_reach;
+ if (cr.isAlpha()) {
+ bool cr_nocase = cr.count() != 1;
+ if (case_set && cr_nocase != nocase) {
return NGHolder::null_vertex();
- }
-
- case_set = true;
- nocase = cr_nocase;
- lit->push_back(cr.find_first(), nocase);
- } else {
- lit->push_back(cr.find_first(), false);
- }
- }
-
- return lit_verts.back();
-}
-
-static
-bool isSimple(const NGHolder &h, u32 *min_bound, u32 *max_bound,
- ue2_literal *lit, u32 *report) {
- assert(!proper_out_degree(h.startDs, h));
- assert(in_degree(h.acceptEod, h) == 1);
-
- DEBUG_PRINTF("looking for simple case\n");
- NFAVertex lit_head = extractLiteral(h, lit);
-
+ }
+
+ case_set = true;
+ nocase = cr_nocase;
+ lit->push_back(cr.find_first(), nocase);
+ } else {
+ lit->push_back(cr.find_first(), false);
+ }
+ }
+
+ return lit_verts.back();
+}
+
+static
+bool isSimple(const NGHolder &h, u32 *min_bound, u32 *max_bound,
+ ue2_literal *lit, u32 *report) {
+ assert(!proper_out_degree(h.startDs, h));
+ assert(in_degree(h.acceptEod, h) == 1);
+
+ DEBUG_PRINTF("looking for simple case\n");
+ NFAVertex lit_head = extractLiteral(h, lit);
+
if (lit_head == NGHolder::null_vertex()) {
- DEBUG_PRINTF("no literal found\n");
- return false;
- }
-
- const auto &reps = h[*inv_adjacent_vertices(h.accept, h).first].reports;
-
- if (reps.size() != 1) {
- return false;
- }
- *report = *reps.begin();
-
- assert(!lit->empty());
-
- set<NFAVertex> rep_exits;
-
- /* lit should only be connected to dot vertices */
- for (auto u : inv_adjacent_vertices_range(lit_head, h)) {
+ DEBUG_PRINTF("no literal found\n");
+ return false;
+ }
+
+ const auto &reps = h[*inv_adjacent_vertices(h.accept, h).first].reports;
+
+ if (reps.size() != 1) {
+ return false;
+ }
+ *report = *reps.begin();
+
+ assert(!lit->empty());
+
+ set<NFAVertex> rep_exits;
+
+ /* lit should only be connected to dot vertices */
+ for (auto u : inv_adjacent_vertices_range(lit_head, h)) {
DEBUG_PRINTF("checking %zu\n", h[u].index);
- if (!h[u].char_reach.all()) {
- return false;
- }
-
- if (u != h.start) {
- rep_exits.insert(u);
- }
- }
-
- if (rep_exits.empty()) {
- DEBUG_PRINTF("direct anchored\n");
- assert(edge(h.start, lit_head, h).second);
- *min_bound = 0;
- *max_bound = 0;
- return true;
- }
-
- NFAVertex key = *rep_exits.begin();
-
- // Special-case the check for '^.foo' or '^.?foo'.
- if (rep_exits.size() == 1 && edge(h.start, key, h).second &&
- out_degree(key, h) == 1) {
- DEBUG_PRINTF("one exit\n");
- assert(edge(h.start, h.startDs, h).second);
- size_t num_enters = out_degree(h.start, h);
- if (num_enters == 2) {
- DEBUG_PRINTF("^.{1,1} prefix\n");
- *min_bound = 1;
- *max_bound = 1;
- return true;
- }
- if (num_enters == 3 && edge(h.start, lit_head, h).second) {
- DEBUG_PRINTF("^.{0,1} prefix\n");
- *min_bound = 0;
- *max_bound = 1;
- return true;
- }
- }
-
- vector<GraphRepeatInfo> repeats;
- findRepeats(h, 2, &repeats);
-
- vector<GraphRepeatInfo>::const_iterator it;
- for (it = repeats.begin(); it != repeats.end(); ++it) {
- DEBUG_PRINTF("checking.. %zu verts\n", it->vertices.size());
- if (find(it->vertices.begin(), it->vertices.end(), key)
- != it->vertices.end()) {
- break;
- }
- }
- if (it == repeats.end()) {
- DEBUG_PRINTF("no repeat found\n");
- return false;
- }
-
- set<NFAVertex> rep_verts;
- insert(&rep_verts, it->vertices);
- if (!is_subset_of(rep_exits, rep_verts)) {
- DEBUG_PRINTF("bad exit check\n");
- return false;
- }
-
- set<NFAVertex> rep_enters;
- insert(&rep_enters, adjacent_vertices(h.start, h));
- rep_enters.erase(lit_head);
- rep_enters.erase(h.startDs);
-
- if (!is_subset_of(rep_enters, rep_verts)) {
- DEBUG_PRINTF("bad entry check\n");
- return false;
- }
-
- u32 min_b = it->repeatMin;
- if (edge(h.start, lit_head, h).second) { /* jump edge */
- if (min_b != 1) {
- DEBUG_PRINTF("jump edge around repeat with min bound\n");
- return false;
- }
-
- min_b = 0;
- }
- *min_bound = min_b;
- *max_bound = it->repeatMax;
-
- DEBUG_PRINTF("repeat %u %u before %s\n", *min_bound, *max_bound,
- dumpString(*lit).c_str());
- return true;
-}
-
-static
+ if (!h[u].char_reach.all()) {
+ return false;
+ }
+
+ if (u != h.start) {
+ rep_exits.insert(u);
+ }
+ }
+
+ if (rep_exits.empty()) {
+ DEBUG_PRINTF("direct anchored\n");
+ assert(edge(h.start, lit_head, h).second);
+ *min_bound = 0;
+ *max_bound = 0;
+ return true;
+ }
+
+ NFAVertex key = *rep_exits.begin();
+
+ // Special-case the check for '^.foo' or '^.?foo'.
+ if (rep_exits.size() == 1 && edge(h.start, key, h).second &&
+ out_degree(key, h) == 1) {
+ DEBUG_PRINTF("one exit\n");
+ assert(edge(h.start, h.startDs, h).second);
+ size_t num_enters = out_degree(h.start, h);
+ if (num_enters == 2) {
+ DEBUG_PRINTF("^.{1,1} prefix\n");
+ *min_bound = 1;
+ *max_bound = 1;
+ return true;
+ }
+ if (num_enters == 3 && edge(h.start, lit_head, h).second) {
+ DEBUG_PRINTF("^.{0,1} prefix\n");
+ *min_bound = 0;
+ *max_bound = 1;
+ return true;
+ }
+ }
+
+ vector<GraphRepeatInfo> repeats;
+ findRepeats(h, 2, &repeats);
+
+ vector<GraphRepeatInfo>::const_iterator it;
+ for (it = repeats.begin(); it != repeats.end(); ++it) {
+ DEBUG_PRINTF("checking.. %zu verts\n", it->vertices.size());
+ if (find(it->vertices.begin(), it->vertices.end(), key)
+ != it->vertices.end()) {
+ break;
+ }
+ }
+ if (it == repeats.end()) {
+ DEBUG_PRINTF("no repeat found\n");
+ return false;
+ }
+
+ set<NFAVertex> rep_verts;
+ insert(&rep_verts, it->vertices);
+ if (!is_subset_of(rep_exits, rep_verts)) {
+ DEBUG_PRINTF("bad exit check\n");
+ return false;
+ }
+
+ set<NFAVertex> rep_enters;
+ insert(&rep_enters, adjacent_vertices(h.start, h));
+ rep_enters.erase(lit_head);
+ rep_enters.erase(h.startDs);
+
+ if (!is_subset_of(rep_enters, rep_verts)) {
+ DEBUG_PRINTF("bad entry check\n");
+ return false;
+ }
+
+ u32 min_b = it->repeatMin;
+ if (edge(h.start, lit_head, h).second) { /* jump edge */
+ if (min_b != 1) {
+ DEBUG_PRINTF("jump edge around repeat with min bound\n");
+ return false;
+ }
+
+ min_b = 0;
+ }
+ *min_bound = min_b;
+ *max_bound = it->repeatMax;
+
+ DEBUG_PRINTF("repeat %u %u before %s\n", *min_bound, *max_bound,
+ dumpString(*lit).c_str());
+ return true;
+}
+
+static
int finalise_out(RoseBuildImpl &build, const NGHolder &h,
- const Automaton_Holder &autom, unique_ptr<raw_dfa> out_dfa,
- ReportID *remap) {
- u32 min_bound = ~0U;
- u32 max_bound = ~0U;
- ue2_literal lit;
- u32 simple_report = MO_INVALID_IDX;
- if (isSimple(h, &min_bound, &max_bound, &lit, &simple_report)) {
- assert(simple_report != MO_INVALID_IDX);
+ const Automaton_Holder &autom, unique_ptr<raw_dfa> out_dfa,
+ ReportID *remap) {
+ u32 min_bound = ~0U;
+ u32 max_bound = ~0U;
+ ue2_literal lit;
+ u32 simple_report = MO_INVALID_IDX;
+ if (isSimple(h, &min_bound, &max_bound, &lit, &simple_report)) {
+ assert(simple_report != MO_INVALID_IDX);
if (check_dupe_simple(build, min_bound, max_bound, lit, remap)) {
- DEBUG_PRINTF("found duplicate remapping to %u\n", *remap);
- return ANCHORED_REMAP;
- }
- DEBUG_PRINTF("add with report %u\n", simple_report);
+ DEBUG_PRINTF("found duplicate remapping to %u\n", *remap);
+ return ANCHORED_REMAP;
+ }
+ DEBUG_PRINTF("add with report %u\n", simple_report);
build.anchored_simple[simple_anchored_info(min_bound, max_bound, lit)]
- .insert(simple_report);
- return ANCHORED_SUCCESS;
- }
-
- out_dfa->start_anchored = INIT_STATE;
- out_dfa->start_floating = DEAD_STATE;
- out_dfa->alpha_size = autom.alphasize;
- out_dfa->alpha_remap = autom.alpha;
- auto hash = hash_dfa_no_reports(*out_dfa);
+ .insert(simple_report);
+ return ANCHORED_SUCCESS;
+ }
+
+ out_dfa->start_anchored = INIT_STATE;
+ out_dfa->start_floating = DEAD_STATE;
+ out_dfa->alpha_size = autom.alphasize;
+ out_dfa->alpha_remap = autom.alpha;
+ auto hash = hash_dfa_no_reports(*out_dfa);
if (check_dupe(*out_dfa, build.anchored_nfas[hash], remap)) {
- return ANCHORED_REMAP;
- }
+ return ANCHORED_REMAP;
+ }
build.anchored_nfas[hash].push_back(move(out_dfa));
- return ANCHORED_SUCCESS;
-}
-
-static
+ return ANCHORED_SUCCESS;
+}
+
+static
int addAutomaton(RoseBuildImpl &build, const NGHolder &h, ReportID *remap) {
- if (num_vertices(h) > ANCHORED_NFA_STATE_LIMIT) {
- DEBUG_PRINTF("autom bad!\n");
- return ANCHORED_FAIL;
- }
-
- Automaton_Holder autom(h);
-
+ if (num_vertices(h) > ANCHORED_NFA_STATE_LIMIT) {
+ DEBUG_PRINTF("autom bad!\n");
+ return ANCHORED_FAIL;
+ }
+
+ Automaton_Holder autom(h);
+
auto out_dfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW);
if (determinise(autom, out_dfa->states, MAX_DFA_STATES)) {
return finalise_out(build, h, autom, move(out_dfa), remap);
- }
-
- DEBUG_PRINTF("determinise failed\n");
- return ANCHORED_FAIL;
-}
-
-static
-void setReports(NGHolder &h, const map<NFAVertex, set<u32>> &reportMap,
+ }
+
+ DEBUG_PRINTF("determinise failed\n");
+ return ANCHORED_FAIL;
+}
+
+static
+void setReports(NGHolder &h, const map<NFAVertex, set<u32>> &reportMap,
const unordered_map<NFAVertex, NFAVertex> &orig_to_copy) {
- for (const auto &m : reportMap) {
- NFAVertex t = orig_to_copy.at(m.first);
- assert(!m.second.empty());
- add_edge(t, h.accept, h);
- insert(&h[t].reports, m.second);
- }
-}
-
+ for (const auto &m : reportMap) {
+ NFAVertex t = orig_to_copy.at(m.first);
+ assert(!m.second.empty());
+ add_edge(t, h.accept, h);
+ insert(&h[t].reports, m.second);
+ }
+}
+
int addAnchoredNFA(RoseBuildImpl &build, const NGHolder &wrapper,
- const map<NFAVertex, set<u32>> &reportMap) {
- NGHolder h;
+ const map<NFAVertex, set<u32>> &reportMap) {
+ NGHolder h;
unordered_map<NFAVertex, NFAVertex> orig_to_copy;
- cloneHolder(h, wrapper, &orig_to_copy);
- clear_in_edges(h.accept, h);
- clear_in_edges(h.acceptEod, h);
- add_edge(h.accept, h.acceptEod, h);
- clearReports(h);
- setReports(h, reportMap, orig_to_copy);
-
+ cloneHolder(h, wrapper, &orig_to_copy);
+ clear_in_edges(h.accept, h);
+ clear_in_edges(h.acceptEod, h);
+ add_edge(h.accept, h.acceptEod, h);
+ clearReports(h);
+ setReports(h, reportMap, orig_to_copy);
+
return addAutomaton(build, h, nullptr);
-}
-
+}
+
int addToAnchoredMatcher(RoseBuildImpl &build, const NGHolder &anchored,
- u32 exit_id, ReportID *remap) {
- NGHolder h;
- cloneHolder(h, anchored);
- clearReports(h);
- assert(in_degree(h.acceptEod, h) == 1);
- for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
- h[v].reports.clear();
- h[v].reports.insert(exit_id);
- }
-
+ u32 exit_id, ReportID *remap) {
+ NGHolder h;
+ cloneHolder(h, anchored);
+ clearReports(h);
+ assert(in_degree(h.acceptEod, h) == 1);
+ for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
+ h[v].reports.clear();
+ h[v].reports.insert(exit_id);
+ }
+
return addAutomaton(build, h, remap);
-}
-
-static
+}
+
+static
void buildSimpleDfas(const RoseBuildImpl &build, const vector<u32> &frag_map,
- vector<unique_ptr<raw_dfa>> *anchored_dfas) {
- /* we should have determinised all of these before so there should be no
- * chance of failure. */
+ vector<unique_ptr<raw_dfa>> *anchored_dfas) {
+ /* we should have determinised all of these before so there should be no
+ * chance of failure. */
flat_set<u32> exit_ids;
for (const auto &simple : build.anchored_simple) {
exit_ids.clear();
- for (auto lit_id : simple.second) {
+ for (auto lit_id : simple.second) {
assert(lit_id < frag_map.size());
exit_ids.insert(frag_map[lit_id]);
- }
+ }
auto h = populate_holder(simple.first, exit_ids);
Automaton_Holder autom(*h);
auto rdfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW);
UNUSED bool rv = determinise(autom, rdfa->states, MAX_DFA_STATES);
assert(rv);
- rdfa->start_anchored = INIT_STATE;
- rdfa->start_floating = DEAD_STATE;
- rdfa->alpha_size = autom.alphasize;
- rdfa->alpha_remap = autom.alpha;
- anchored_dfas->push_back(move(rdfa));
- }
-}
-
-/**
- * Fill the given vector with all of the raw_dfas we need to compile into the
- * anchored matcher. Takes ownership of the input structures, clearing them
- * from RoseBuildImpl.
- */
-static
+ rdfa->start_anchored = INIT_STATE;
+ rdfa->start_floating = DEAD_STATE;
+ rdfa->alpha_size = autom.alphasize;
+ rdfa->alpha_remap = autom.alpha;
+ anchored_dfas->push_back(move(rdfa));
+ }
+}
+
+/**
+ * Fill the given vector with all of the raw_dfas we need to compile into the
+ * anchored matcher. Takes ownership of the input structures, clearing them
+ * from RoseBuildImpl.
+ */
+static
vector<unique_ptr<raw_dfa>> getAnchoredDfas(RoseBuildImpl &build,
const vector<u32> &frag_map) {
vector<unique_ptr<raw_dfa>> dfas;
- // DFAs that already exist as raw_dfas.
+ // DFAs that already exist as raw_dfas.
for (auto &anch_dfas : build.anchored_nfas) {
- for (auto &rdfa : anch_dfas.second) {
+ for (auto &rdfa : anch_dfas.second) {
dfas.push_back(move(rdfa));
- }
- }
+ }
+ }
build.anchored_nfas.clear();
-
- // DFAs we currently have as simple literals.
+
+ // DFAs we currently have as simple literals.
if (!build.anchored_simple.empty()) {
buildSimpleDfas(build, frag_map, &dfas);
build.anchored_simple.clear();
- }
+ }
return dfas;
-}
-
-/**
- * \brief Builds our anchored DFAs into runtime NFAs.
- *
- * Constructs a vector of NFA structures and a vector of their start offsets
- * (number of dots removed from the prefix) from the raw_dfa structures given.
- *
- * Note: frees the raw_dfa structures on completion.
- *
- * \return Total bytes required for the complete anchored matcher.
- */
-static
+}
+
+/**
+ * \brief Builds our anchored DFAs into runtime NFAs.
+ *
+ * Constructs a vector of NFA structures and a vector of their start offsets
+ * (number of dots removed from the prefix) from the raw_dfa structures given.
+ *
+ * Note: frees the raw_dfa structures on completion.
+ *
+ * \return Total bytes required for the complete anchored matcher.
+ */
+static
size_t buildNfas(vector<raw_dfa> &anchored_dfas,
vector<bytecode_ptr<NFA>> *nfas,
vector<u32> *start_offset, const CompileContext &cc,
const ReportManager &rm) {
- const size_t num_dfas = anchored_dfas.size();
-
- nfas->reserve(num_dfas);
- start_offset->reserve(num_dfas);
-
- size_t total_size = 0;
-
- for (auto &rdfa : anchored_dfas) {
+ const size_t num_dfas = anchored_dfas.size();
+
+ nfas->reserve(num_dfas);
+ start_offset->reserve(num_dfas);
+
+ size_t total_size = 0;
+
+ for (auto &rdfa : anchored_dfas) {
u32 removed_dots = remove_leading_dots(rdfa);
- start_offset->push_back(removed_dots);
-
+ start_offset->push_back(removed_dots);
+
minimize_hopcroft(rdfa, cc.grey);
-
+
auto nfa = mcclellanCompile(rdfa, cc, rm, false);
- if (!nfa) {
- assert(0);
- throw std::bad_alloc();
- }
-
- assert(nfa->length);
- total_size += ROUNDUP_CL(sizeof(anchored_matcher_info) + nfa->length);
- nfas->push_back(move(nfa));
- }
-
- // We no longer need to keep the raw_dfa structures around.
- anchored_dfas.clear();
-
- return total_size;
-}
-
+ if (!nfa) {
+ assert(0);
+ throw std::bad_alloc();
+ }
+
+ assert(nfa->length);
+ total_size += ROUNDUP_CL(sizeof(anchored_matcher_info) + nfa->length);
+ nfas->push_back(move(nfa));
+ }
+
+ // We no longer need to keep the raw_dfa structures around.
+ anchored_dfas.clear();
+
+ return total_size;
+}
+
vector<raw_dfa> buildAnchoredDfas(RoseBuildImpl &build,
const vector<LitFragment> &fragments) {
vector<raw_dfa> dfas;
-
+
if (build.anchored_nfas.empty() && build.anchored_simple.empty()) {
- DEBUG_PRINTF("empty\n");
+ DEBUG_PRINTF("empty\n");
return dfas;
}
@@ -874,50 +874,50 @@ buildAnchoredMatcher(RoseBuildImpl &build, const vector<LitFragment> &fragments,
if (dfas.empty()) {
DEBUG_PRINTF("empty\n");
- return nullptr;
- }
-
+ return nullptr;
+ }
+
for (auto &rdfa : dfas) {
remapIdsToPrograms(fragments, rdfa);
}
-
+
vector<bytecode_ptr<NFA>> nfas;
- vector<u32> start_offset; // start offset for each dfa (dots removed)
+ vector<u32> start_offset; // start offset for each dfa (dots removed)
size_t total_size = buildNfas(dfas, &nfas, &start_offset, cc, build.rm);
-
- if (total_size > cc.grey.limitRoseAnchoredSize) {
- throw ResourceLimitError();
- }
-
+
+ if (total_size > cc.grey.limitRoseAnchoredSize) {
+ throw ResourceLimitError();
+ }
+
auto atable =
make_zeroed_bytecode_ptr<anchored_matcher_info>(total_size, 64);
- char *curr = (char *)atable.get();
-
- u32 state_offset = 0;
- for (size_t i = 0; i < nfas.size(); i++) {
- const NFA *nfa = nfas[i].get();
- anchored_matcher_info *ami = (anchored_matcher_info *)curr;
- char *prev_curr = curr;
-
- curr += sizeof(anchored_matcher_info);
-
- memcpy(curr, nfa, nfa->length);
- curr += nfa->length;
- curr = ROUNDUP_PTR(curr, 64);
-
- if (i + 1 == nfas.size()) {
- ami->next_offset = 0U;
- } else {
- ami->next_offset = verify_u32(curr - prev_curr);
- }
-
- ami->state_offset = state_offset;
+ char *curr = (char *)atable.get();
+
+ u32 state_offset = 0;
+ for (size_t i = 0; i < nfas.size(); i++) {
+ const NFA *nfa = nfas[i].get();
+ anchored_matcher_info *ami = (anchored_matcher_info *)curr;
+ char *prev_curr = curr;
+
+ curr += sizeof(anchored_matcher_info);
+
+ memcpy(curr, nfa, nfa->length);
+ curr += nfa->length;
+ curr = ROUNDUP_PTR(curr, 64);
+
+ if (i + 1 == nfas.size()) {
+ ami->next_offset = 0U;
+ } else {
+ ami->next_offset = verify_u32(curr - prev_curr);
+ }
+
+ ami->state_offset = state_offset;
state_offset += nfa->streamStateSize;
- ami->anchoredMinDistance = start_offset[i];
- }
-
+ ami->anchoredMinDistance = start_offset[i];
+ }
+
DEBUG_PRINTF("success %zu\n", atable.size());
- return atable;
-}
-
-} // namespace ue2
+ return atable;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_anchored.h b/contrib/libs/hyperscan/src/rose/rose_build_anchored.h
index 0301eea217..37d268ac5a 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_anchored.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_anchored.h
@@ -1,57 +1,57 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_BUILD_ANCHORED
-#define ROSE_BUILD_ANCHORED
-
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_ANCHORED
+#define ROSE_BUILD_ANCHORED
+
+#include "ue2common.h"
#include "rose_build_impl.h"
-#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_holder.h"
#include "util/bytecode_ptr.h"
-
-#include <map>
-#include <vector>
-#include <set>
-
+
+#include <map>
+#include <vector>
+#include <set>
+
struct anchored_matcher_info;
-
-namespace ue2 {
-
-class RoseBuildImpl;
+
+namespace ue2 {
+
+class RoseBuildImpl;
struct raw_dfa;
struct LitFragment;
-
+
/**
* \brief Construct a set of anchored DFAs from our anchored literals/engines.
*/
std::vector<raw_dfa> buildAnchoredDfas(RoseBuildImpl &build,
const std::vector<LitFragment> &fragments);
-
+
/**
* \brief Construct an anchored_matcher_info runtime structure from the given
* set of DFAs.
@@ -66,16 +66,16 @@ buildAnchoredMatcher(RoseBuildImpl &build,
u32 anchoredStateSize(const anchored_matcher_info &atable);
-#define ANCHORED_FAIL 0
-#define ANCHORED_SUCCESS 1
-#define ANCHORED_REMAP 2
-
+#define ANCHORED_FAIL 0
+#define ANCHORED_SUCCESS 1
+#define ANCHORED_REMAP 2
+
int addAnchoredNFA(RoseBuildImpl &build, const NGHolder &wrapper,
- const std::map<NFAVertex, std::set<u32>> &reportMap);
-
+ const std::map<NFAVertex, std::set<u32>> &reportMap);
+
int addToAnchoredMatcher(RoseBuildImpl &build, const NGHolder &anchored,
- u32 exit_id, ReportID *remap);
-
-} // namespace ue2
-
-#endif
+ u32 exit_id, ReportID *remap);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_bytecode.cpp b/contrib/libs/hyperscan/src/rose/rose_build_bytecode.cpp
index b40257e4d5..df464c2800 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_bytecode.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_bytecode.cpp
@@ -1,186 +1,186 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_impl.h"
-
-#include "ue2common.h"
-#include "grey.h"
-#include "hs_compile.h" // for HS_MODE_*
-#include "rose_build_add_internal.h"
-#include "rose_build_anchored.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_impl.h"
+
+#include "ue2common.h"
+#include "grey.h"
+#include "hs_compile.h" // for HS_MODE_*
+#include "rose_build_add_internal.h"
+#include "rose_build_anchored.h"
#include "rose_build_dump.h"
#include "rose_build_engine_blob.h"
#include "rose_build_exclusive.h"
#include "rose_build_groups.h"
-#include "rose_build_infix.h"
+#include "rose_build_infix.h"
#include "rose_build_long_lit.h"
-#include "rose_build_lookaround.h"
+#include "rose_build_lookaround.h"
#include "rose_build_matchers.h"
#include "rose_build_misc.h"
#include "rose_build_program.h"
#include "rose_build_resources.h"
-#include "rose_build_scatter.h"
-#include "rose_build_util.h"
-#include "rose_build_width.h"
+#include "rose_build_scatter.h"
+#include "rose_build_util.h"
+#include "rose_build_width.h"
#include "rose_internal.h"
#include "rose_program.h"
-#include "hwlm/hwlm.h" /* engine types */
-#include "hwlm/hwlm_build.h"
+#include "hwlm/hwlm.h" /* engine types */
+#include "hwlm/hwlm_build.h"
#include "hwlm/hwlm_literal.h"
-#include "nfa/castlecompile.h"
-#include "nfa/goughcompile.h"
-#include "nfa/mcclellancompile.h"
+#include "nfa/castlecompile.h"
+#include "nfa/goughcompile.h"
+#include "nfa/mcclellancompile.h"
#include "nfa/mcclellancompile_util.h"
#include "nfa/mcsheng_compile.h"
-#include "nfa/nfa_api_queue.h"
-#include "nfa/nfa_build_util.h"
-#include "nfa/nfa_internal.h"
+#include "nfa/nfa_api_queue.h"
+#include "nfa/nfa_build_util.h"
+#include "nfa/nfa_internal.h"
#include "nfa/shengcompile.h"
-#include "nfa/shufticompile.h"
+#include "nfa/shufticompile.h"
#include "nfa/tamaramacompile.h"
#include "nfa/tamarama_internal.h"
#include "nfagraph/ng_execute.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_lbr.h"
-#include "nfagraph/ng_limex.h"
-#include "nfagraph/ng_mcclellan.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_reports.h"
-#include "nfagraph/ng_revacc.h"
-#include "nfagraph/ng_stop.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_width.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_lbr.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_mcclellan.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_reports.h"
+#include "nfagraph/ng_revacc.h"
+#include "nfagraph/ng_stop.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
#include "smallwrite/smallwrite_build.h"
-#include "som/slot_manager.h"
-#include "util/bitutils.h"
-#include "util/boundary_reports.h"
-#include "util/charreach.h"
-#include "util/charreach_util.h"
-#include "util/compile_context.h"
-#include "util/compile_error.h"
-#include "util/container.h"
+#include "som/slot_manager.h"
+#include "util/bitutils.h"
+#include "util/boundary_reports.h"
+#include "util/charreach.h"
+#include "util/charreach_util.h"
+#include "util/compile_context.h"
+#include "util/compile_error.h"
+#include "util/container.h"
#include "util/fatbit_build.h"
-#include "util/graph_range.h"
+#include "util/graph_range.h"
#include "util/insertion_ordered.h"
#include "util/make_unique.h"
-#include "util/multibit_build.h"
+#include "util/multibit_build.h"
#include "util/noncopyable.h"
-#include "util/order_check.h"
+#include "util/order_check.h"
#include "util/popcount.h"
-#include "util/queue_index_factory.h"
-#include "util/report_manager.h"
-#include "util/ue2string.h"
-#include "util/verify_types.h"
-
-#include <algorithm>
+#include "util/queue_index_factory.h"
+#include "util/report_manager.h"
+#include "util/ue2string.h"
+#include "util/verify_types.h"
+
+#include <algorithm>
#include <array>
-#include <map>
-#include <queue>
-#include <set>
-#include <sstream>
-#include <string>
-#include <vector>
-#include <utility>
-
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_values;
-using boost::adaptors::map_keys;
-
-namespace ue2 {
-
-/* The rose bytecode construction is a giant cesspit.
- *
- * One issue is that bits and pieces are constructed piecemeal and these
- * sections are used by later in the construction process. Until the very end of
- * the construction there is no useful invariant holding for the bytecode. This
- * makes reordering / understanding the construction process awkward as there
- * are hidden dependencies everywhere. We should start by shifting towards
- * a model where the bytecode is only written to during the construction so that
- * the dependencies can be understood by us mere mortals.
- *
- * I am sure the construction process is also bad from a number of other
- * standpoints as well but the can come later.
- *
- * Actually, one other annoying issues the plague of member functions on the
- * impl which tightly couples the internals of this file to all the other rose
- * build files. Need more egregiously awesome free functions.
- */
-
-namespace /* anon */ {
-
+#include <map>
+#include <queue>
+#include <set>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <utility>
+
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_values;
+using boost::adaptors::map_keys;
+
+namespace ue2 {
+
+/* The rose bytecode construction is a giant cesspit.
+ *
+ * One issue is that bits and pieces are constructed piecemeal and these
+ * sections are used by later in the construction process. Until the very end of
+ * the construction there is no useful invariant holding for the bytecode. This
+ * makes reordering / understanding the construction process awkward as there
+ * are hidden dependencies everywhere. We should start by shifting towards
+ * a model where the bytecode is only written to during the construction so that
+ * the dependencies can be understood by us mere mortals.
+ *
+ * I am sure the construction process is also bad from a number of other
+ * standpoints as well but the can come later.
+ *
+ * Actually, one other annoying issues the plague of member functions on the
+ * impl which tightly couples the internals of this file to all the other rose
+ * build files. Need more egregiously awesome free functions.
+ */
+
+namespace /* anon */ {
+
struct build_context : noncopyable {
/** \brief information about engines to the left of a vertex */
map<RoseVertex, left_build_info> leftfix_info;
-
+
/** \brief mapping from suffix to queue index. */
map<suffix_id, u32> suffixes;
-
+
/** \brief engine info by queue. */
map<u32, engine_info> engine_info_by_queue;
-
+
/** \brief Simple cache of programs written to engine blob, used for
* deduplication. */
unordered_map<RoseProgram, u32, RoseProgramHash,
RoseProgramEquivalence> program_cache;
-
+
/** \brief State indices, for those roles that have them.
* Each vertex present has a unique state index in the range
* [0, roleStateIndices.size()). */
unordered_map<RoseVertex, u32> roleStateIndices;
-
+
/** \brief Mapping from queue index to bytecode offset for built engines
* that have already been pushed into the engine_blob. */
unordered_map<u32, u32> engineOffsets;
-
+
/** \brief List of long literals (ones with CHECK_LONG_LIT instructions)
* that need hash table support. */
vector<ue2_case_string> longLiterals;
-
+
/** \brief Contents of the Rose bytecode immediately following the
* RoseEngine. */
RoseEngineBlob engine_blob;
-
+
/** \brief True if this Rose engine has an MPV engine. */
bool needs_mpv_catchup = false;
-
+
/** \brief Resources in use (tracked as programs are added). */
RoseResources resources;
};
-
+
/** \brief subengine info including built engine and
* corresponding triggering rose vertices */
struct ExclusiveSubengine {
bytecode_ptr<NFA> nfa;
vector<RoseVertex> vertices;
};
-
+
/** \brief exclusive info to build tamarama */
struct ExclusiveInfo : noncopyable {
// subengine info
@@ -189,11 +189,11 @@ struct ExclusiveInfo : noncopyable {
set<ReportID> reports;
// assigned queue id
u32 queue;
-};
-
-}
-
-static
+};
+
+}
+
+static
void add_nfa_to_blob(build_context &bc, NFA &nfa) {
u32 qi = nfa.queueIndex;
u32 nfa_offset = bc.engine_blob.add(nfa, nfa.length);
@@ -202,19 +202,19 @@ void add_nfa_to_blob(build_context &bc, NFA &nfa) {
assert(!contains(bc.engineOffsets, qi));
bc.engineOffsets.emplace(qi, nfa_offset);
-}
-
-static
-u32 countRosePrefixes(const vector<LeftNfaInfo> &roses) {
- u32 num = 0;
- for (const auto &r : roses) {
- if (!r.infix) {
- num++;
- }
- }
- return num;
-}
-
+}
+
+static
+u32 countRosePrefixes(const vector<LeftNfaInfo> &roses) {
+ u32 num = 0;
+ for (const auto &r : roses) {
+ if (!r.infix) {
+ num++;
+ }
+ }
+ return num;
+}
+
/**
* \brief True if this Rose engine needs to run a catch up whenever a literal
* report is generated.
@@ -222,7 +222,7 @@ u32 countRosePrefixes(const vector<LeftNfaInfo> &roses) {
* Catch up is necessary if there are output-exposed engines (suffixes,
* outfixes).
*/
-static
+static
bool needsCatchup(const RoseBuildImpl &build) {
/* Note: we could be more selective about when we need to generate catch up
* instructions rather than just a boolean yes/no - for instance, if we know
@@ -237,7 +237,7 @@ bool needsCatchup(const RoseBuildImpl &build) {
if (!build.outfixes.empty()) {
/* TODO: check that they have non-eod reports */
- DEBUG_PRINTF("has outfixes\n");
+ DEBUG_PRINTF("has outfixes\n");
return true;
}
@@ -259,75 +259,75 @@ static
bool isPureFloating(const RoseResources &resources, const CompileContext &cc) {
if (!resources.has_floating) {
DEBUG_PRINTF("no floating table\n");
- return false;
- }
-
+ return false;
+ }
+
if (resources.has_outfixes || resources.has_suffixes ||
resources.has_leftfixes) {
DEBUG_PRINTF("has engines\n");
return false;
}
-
+
if (resources.has_anchored) {
DEBUG_PRINTF("has anchored matcher\n");
- return false;
- }
-
+ return false;
+ }
+
if (resources.has_eod) {
DEBUG_PRINTF("has eod work to do\n");
return false;
}
-
+
if (resources.has_states) {
DEBUG_PRINTF("has states\n");
return false;
}
-
+
if (resources.has_lit_delay) {
DEBUG_PRINTF("has delayed literals\n");
return false;
}
-
+
if (cc.streaming && resources.has_lit_check) {
DEBUG_PRINTF("has long literals in streaming mode, which needs long "
"literal table support\n");
return false;
- }
-
+ }
+
if (resources.checks_groups) {
DEBUG_PRINTF("has group checks\n");
return false;
}
- DEBUG_PRINTF("pure floating literals\n");
- return true;
-}
-
-static
+ DEBUG_PRINTF("pure floating literals\n");
+ return true;
+}
+
+static
bool isSingleOutfix(const RoseBuildImpl &tbi) {
- for (auto v : vertices_range(tbi.g)) {
- if (tbi.isAnyStart(v)) {
- continue;
- }
- if (tbi.hasLiteralInTable(v, ROSE_ANCHORED_SMALL_BLOCK)) {
- continue;
- }
- DEBUG_PRINTF("has role\n");
- return false;
- }
-
- if (tbi.ssm.numSomSlots()) {
- return false;
- }
-
- if (!tbi.boundary.report_at_eod.empty()) {
- return false; /* streaming runtime makes liberal use of broken flag */
- }
-
+ for (auto v : vertices_range(tbi.g)) {
+ if (tbi.isAnyStart(v)) {
+ continue;
+ }
+ if (tbi.hasLiteralInTable(v, ROSE_ANCHORED_SMALL_BLOCK)) {
+ continue;
+ }
+ DEBUG_PRINTF("has role\n");
+ return false;
+ }
+
+ if (tbi.ssm.numSomSlots()) {
+ return false;
+ }
+
+ if (!tbi.boundary.report_at_eod.empty()) {
+ return false; /* streaming runtime makes liberal use of broken flag */
+ }
+
return tbi.outfixes.size() == 1;
-}
-
-static
+}
+
+static
u8 pickRuntimeImpl(const RoseBuildImpl &build, const RoseResources &resources,
UNUSED u32 outfixEndQueue) {
DEBUG_PRINTF("has_outfixes=%d\n", resources.has_outfixes);
@@ -343,33 +343,33 @@ u8 pickRuntimeImpl(const RoseBuildImpl &build, const RoseResources &resources,
DEBUG_PRINTF("has_eod=%d\n", resources.has_eod);
if (isPureFloating(resources, build.cc)) {
- return ROSE_RUNTIME_PURE_LITERAL;
- }
-
+ return ROSE_RUNTIME_PURE_LITERAL;
+ }
+
if (isSingleOutfix(build)) {
- return ROSE_RUNTIME_SINGLE_OUTFIX;
- }
-
- return ROSE_RUNTIME_FULL_ROSE;
-}
-
+ return ROSE_RUNTIME_SINGLE_OUTFIX;
+ }
+
+ return ROSE_RUNTIME_FULL_ROSE;
+}
+
/**
* \brief True if this Rose engine needs to run MPV catch up in front of
* non-MPV reports.
*/
-static
+static
bool needsMpvCatchup(const RoseBuildImpl &build) {
const auto &outfixes = build.outfixes;
bool has_mpv =
any_of(begin(outfixes), end(outfixes), [](const OutfixInfo &outfix) {
return outfix.is_nonempty_mpv();
});
-
+
if (!has_mpv) {
DEBUG_PRINTF("no mpv\n");
return false;
- }
-
+ }
+
if (isSingleOutfix(build)) {
DEBUG_PRINTF("single outfix\n");
return false;
@@ -393,39 +393,39 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount,
// Role state storage.
curr_offset += mmbit_size(rolesWithStateCount);
- so->activeLeafArray = curr_offset; /* TODO: limit size of array */
- curr_offset += mmbit_size(activeArrayCount);
+ so->activeLeafArray = curr_offset; /* TODO: limit size of array */
+ curr_offset += mmbit_size(activeArrayCount);
so->activeLeafArray_size = mmbit_size(activeArrayCount);
-
- so->activeLeftArray = curr_offset; /* TODO: limit size of array */
+
+ so->activeLeftArray = curr_offset; /* TODO: limit size of array */
curr_offset += mmbit_size(activeLeftCount);
- so->activeLeftArray_size = mmbit_size(activeLeftCount);
-
+ so->activeLeftArray_size = mmbit_size(activeLeftCount);
+
so->longLitState = curr_offset;
curr_offset += longLitStreamStateRequired;
so->longLitState_size = longLitStreamStateRequired;
-
- // ONE WHOLE BYTE for each active leftfix with lag.
- so->leftfixLagTable = curr_offset;
- curr_offset += laggedRoseCount;
-
- so->anchorState = curr_offset;
- curr_offset += anchorStateSize;
-
- so->groups = curr_offset;
+
+ // ONE WHOLE BYTE for each active leftfix with lag.
+ so->leftfixLagTable = curr_offset;
+ curr_offset += laggedRoseCount;
+
+ so->anchorState = curr_offset;
+ curr_offset += anchorStateSize;
+
+ so->groups = curr_offset;
so->groups_size = (build.group_end + 7) / 8;
- assert(so->groups_size <= sizeof(u64a));
- curr_offset += so->groups_size;
-
- // The history consists of the bytes in the history only. YAY
- so->history = curr_offset;
- curr_offset += historyRequired;
-
+ assert(so->groups_size <= sizeof(u64a));
+ curr_offset += so->groups_size;
+
+ // The history consists of the bytes in the history only. YAY
+ so->history = curr_offset;
+ curr_offset += historyRequired;
+
// Exhaustion multibit.
- so->exhausted = curr_offset;
+ so->exhausted = curr_offset;
curr_offset += mmbit_size(build.rm.numEkeys());
so->exhausted_size = mmbit_size(build.rm.numEkeys());
-
+
// Logical multibit.
so->logicalVec = curr_offset;
so->logicalVec_size = mmbit_size(build.rm.numLogicalKeys() +
@@ -437,191 +437,191 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount,
so->combVec_size = mmbit_size(build.rm.numCkeys());
curr_offset += so->combVec_size;
- // SOM locations and valid/writeable multibit structures.
+ // SOM locations and valid/writeable multibit structures.
if (build.ssm.numSomSlots()) {
const u32 somWidth = build.ssm.somPrecision();
- if (somWidth) { // somWidth is zero in block mode.
- curr_offset = ROUNDUP_N(curr_offset, somWidth);
- so->somLocation = curr_offset;
+ if (somWidth) { // somWidth is zero in block mode.
+ curr_offset = ROUNDUP_N(curr_offset, somWidth);
+ so->somLocation = curr_offset;
curr_offset += build.ssm.numSomSlots() * somWidth;
- } else {
- so->somLocation = 0;
- }
- so->somValid = curr_offset;
+ } else {
+ so->somLocation = 0;
+ }
+ so->somValid = curr_offset;
curr_offset += mmbit_size(build.ssm.numSomSlots());
- so->somWritable = curr_offset;
+ so->somWritable = curr_offset;
curr_offset += mmbit_size(build.ssm.numSomSlots());
so->somMultibit_size = mmbit_size(build.ssm.numSomSlots());
- } else {
- // No SOM handling, avoid growing the stream state any further.
- so->somLocation = 0;
- so->somValid = 0;
- so->somWritable = 0;
- }
-
- // note: state space for mask nfas is allocated later
+ } else {
+ // No SOM handling, avoid growing the stream state any further.
+ so->somLocation = 0;
+ so->somValid = 0;
+ so->somWritable = 0;
+ }
+
+ // note: state space for mask nfas is allocated later
so->nfaStateBegin = curr_offset;
- so->end = curr_offset;
-}
-
-// Get the mask of initial vertices due to root and anchored_root.
-rose_group RoseBuildImpl::getInitialGroups() const {
+ so->end = curr_offset;
+}
+
+// Get the mask of initial vertices due to root and anchored_root.
+rose_group RoseBuildImpl::getInitialGroups() const {
rose_group groups = getSuccGroups(root)
| getSuccGroups(anchored_root)
| boundary_group_mask;
- DEBUG_PRINTF("initial groups = %016llx\n", groups);
- return groups;
-}
-
-static
-bool nfaStuckOn(const NGHolder &g) {
- assert(!proper_out_degree(g.startDs, g));
- set<NFAVertex> succ;
- insert(&succ, adjacent_vertices(g.start, g));
- succ.erase(g.startDs);
-
- set<NFAVertex> asucc;
- set<u32> tops;
- set<u32> done_tops;
-
- for (const auto &e : out_edges_range(g.start, g)) {
+ DEBUG_PRINTF("initial groups = %016llx\n", groups);
+ return groups;
+}
+
+static
+bool nfaStuckOn(const NGHolder &g) {
+ assert(!proper_out_degree(g.startDs, g));
+ set<NFAVertex> succ;
+ insert(&succ, adjacent_vertices(g.start, g));
+ succ.erase(g.startDs);
+
+ set<NFAVertex> asucc;
+ set<u32> tops;
+ set<u32> done_tops;
+
+ for (const auto &e : out_edges_range(g.start, g)) {
insert(&tops, g[e].tops);
- if (!g[target(e, g)].char_reach.all()) {
- continue;
- }
-
- asucc.clear();
- insert(&asucc, adjacent_vertices(target(e, g), g));
-
- if (asucc == succ) {
+ if (!g[target(e, g)].char_reach.all()) {
+ continue;
+ }
+
+ asucc.clear();
+ insert(&asucc, adjacent_vertices(target(e, g), g));
+
+ if (asucc == succ) {
insert(&done_tops, g[e].tops);
- }
- }
-
- return tops == done_tops;
-}
-
-namespace {
-struct PredTopPair {
- PredTopPair(RoseVertex v, u32 t) : pred(v), top(t) {}
- bool operator<(const PredTopPair &b) const {
- const PredTopPair &a = *this;
- ORDER_CHECK(pred);
- ORDER_CHECK(top);
- return false;
- }
- RoseVertex pred;
- u32 top;
-};
-}
-
-static
-void findFixedDepthTops(const RoseGraph &g, const set<PredTopPair> &triggers,
- map<u32, u32> *fixed_depth_tops) {
- DEBUG_PRINTF("|trig| %zu\n", triggers.size());
- /* find all pred roles for this holder, group by top */
- /* if all pred roles for a given top have the same min and max offset, we
- * add the top to the fixed_depth_top map */
- map<u32, set<RoseVertex> > pred_by_top;
- for (const auto &ptp : triggers) {
- u32 top = ptp.top;
- RoseVertex u = ptp.pred;
- pred_by_top[top].insert(u);
- }
-
- for (const auto &e : pred_by_top) {
- u32 top = e.first;
- const set<RoseVertex> &preds = e.second;
- if (!g[*preds.begin()].fixedOffset()) {
- continue;
- }
- u32 depth = g[*preds.begin()].min_offset;
- for (RoseVertex u : preds) {
- if (g[u].min_offset != depth || g[u].max_offset != depth) {
- goto next_top;
- }
- }
- DEBUG_PRINTF("%u at depth %u\n", top, depth);
- (*fixed_depth_tops)[top] = depth;
- next_top:;
- }
-}
-
-/**
- * \brief Heuristic for picking between a DFA or NFA implementation of an
- * engine.
- */
-static
+ }
+ }
+
+ return tops == done_tops;
+}
+
+namespace {
+struct PredTopPair {
+ PredTopPair(RoseVertex v, u32 t) : pred(v), top(t) {}
+ bool operator<(const PredTopPair &b) const {
+ const PredTopPair &a = *this;
+ ORDER_CHECK(pred);
+ ORDER_CHECK(top);
+ return false;
+ }
+ RoseVertex pred;
+ u32 top;
+};
+}
+
+static
+void findFixedDepthTops(const RoseGraph &g, const set<PredTopPair> &triggers,
+ map<u32, u32> *fixed_depth_tops) {
+ DEBUG_PRINTF("|trig| %zu\n", triggers.size());
+ /* find all pred roles for this holder, group by top */
+ /* if all pred roles for a given top have the same min and max offset, we
+ * add the top to the fixed_depth_top map */
+ map<u32, set<RoseVertex> > pred_by_top;
+ for (const auto &ptp : triggers) {
+ u32 top = ptp.top;
+ RoseVertex u = ptp.pred;
+ pred_by_top[top].insert(u);
+ }
+
+ for (const auto &e : pred_by_top) {
+ u32 top = e.first;
+ const set<RoseVertex> &preds = e.second;
+ if (!g[*preds.begin()].fixedOffset()) {
+ continue;
+ }
+ u32 depth = g[*preds.begin()].min_offset;
+ for (RoseVertex u : preds) {
+ if (g[u].min_offset != depth || g[u].max_offset != depth) {
+ goto next_top;
+ }
+ }
+ DEBUG_PRINTF("%u at depth %u\n", top, depth);
+ (*fixed_depth_tops)[top] = depth;
+ next_top:;
+ }
+}
+
+/**
+ * \brief Heuristic for picking between a DFA or NFA implementation of an
+ * engine.
+ */
+static
bytecode_ptr<NFA> pickImpl(bytecode_ptr<NFA> dfa_impl,
bytecode_ptr<NFA> nfa_impl,
bool fast_nfa) {
- assert(nfa_impl);
- assert(dfa_impl);
+ assert(nfa_impl);
+ assert(dfa_impl);
assert(isDfaType(dfa_impl->type));
-
- // If our NFA is an LBR, it always wins.
- if (isLbrType(nfa_impl->type)) {
- return nfa_impl;
- }
-
+
+ // If our NFA is an LBR, it always wins.
+ if (isLbrType(nfa_impl->type)) {
+ return nfa_impl;
+ }
+
// if our DFA is an accelerated Sheng, it always wins.
if (isShengType(dfa_impl->type) && has_accel(*dfa_impl)) {
return dfa_impl;
}
- bool d_accel = has_accel(*dfa_impl);
- bool n_accel = has_accel(*nfa_impl);
+ bool d_accel = has_accel(*dfa_impl);
+ bool n_accel = has_accel(*nfa_impl);
bool d_big = isBigDfaType(dfa_impl->type);
- bool n_vsmall = nfa_impl->nPositions <= 32;
- bool n_br = has_bounded_repeats(*nfa_impl);
- DEBUG_PRINTF("da %d na %d db %d nvs %d nbr %d\n", (int)d_accel,
- (int)n_accel, (int)d_big, (int)n_vsmall, (int)n_br);
- if (d_big) {
- if (!n_vsmall) {
- if (d_accel || !n_accel) {
- return dfa_impl;
- } else {
- return nfa_impl;
- }
- } else {
+ bool n_vsmall = nfa_impl->nPositions <= 32;
+ bool n_br = has_bounded_repeats(*nfa_impl);
+ DEBUG_PRINTF("da %d na %d db %d nvs %d nbr %d\n", (int)d_accel,
+ (int)n_accel, (int)d_big, (int)n_vsmall, (int)n_br);
+ if (d_big) {
+ if (!n_vsmall) {
+ if (d_accel || !n_accel) {
+ return dfa_impl;
+ } else {
+ return nfa_impl;
+ }
+ } else {
if (n_accel && fast_nfa) {
- return nfa_impl;
- } else {
- return dfa_impl;
- }
- }
- } else {
- /* favour a McClellan 8, unless the nfa looks really good and the dfa
- * looks like trouble */
- if (!d_accel && n_vsmall && n_accel && !n_br) {
- return nfa_impl;
- } else {
- return dfa_impl;
- }
- }
-}
-
-/**
- * \brief Builds an LBR if there's one repeat in the given CastleProto,
- * otherwise a Castle.
- */
-static
+ return nfa_impl;
+ } else {
+ return dfa_impl;
+ }
+ }
+ } else {
+ /* favour a McClellan 8, unless the nfa looks really good and the dfa
+ * looks like trouble */
+ if (!d_accel && n_vsmall && n_accel && !n_br) {
+ return nfa_impl;
+ } else {
+ return dfa_impl;
+ }
+ }
+}
+
+/**
+ * \brief Builds an LBR if there's one repeat in the given CastleProto,
+ * otherwise a Castle.
+ */
+static
bytecode_ptr<NFA>
-buildRepeatEngine(const CastleProto &proto,
- const map<u32, vector<vector<CharReach>>> &triggers,
+buildRepeatEngine(const CastleProto &proto,
+ const map<u32, vector<vector<CharReach>>> &triggers,
const CompileContext &cc, const ReportManager &rm) {
- // If we only have one repeat, the LBR should always be the best possible
- // implementation.
- if (proto.repeats.size() == 1 && cc.grey.allowLbr) {
+ // If we only have one repeat, the LBR should always be the best possible
+ // implementation.
+ if (proto.repeats.size() == 1 && cc.grey.allowLbr) {
return constructLBR(proto, triggers.at(0), cc, rm);
- }
-
+ }
+
auto castle_nfa = buildCastle(proto, triggers, cc, rm);
- assert(castle_nfa); // Should always be constructible.
- return castle_nfa;
-}
-
+ assert(castle_nfa); // Should always be constructible.
+ return castle_nfa;
+}
+
static
bytecode_ptr<NFA> getDfa(raw_dfa &rdfa, bool is_transient,
const CompileContext &cc, const ReportManager &rm) {
@@ -649,236 +649,236 @@ bytecode_ptr<NFA> getDfa(raw_dfa &rdfa, bool is_transient,
return dfa;
}
-/* builds suffix nfas */
-static
+/* builds suffix nfas */
+static
bytecode_ptr<NFA>
-buildSuffix(const ReportManager &rm, const SomSlotManager &ssm,
- const map<u32, u32> &fixed_depth_tops,
- const map<u32, vector<vector<CharReach>>> &triggers,
- suffix_id suff, const CompileContext &cc) {
- if (suff.castle()) {
+buildSuffix(const ReportManager &rm, const SomSlotManager &ssm,
+ const map<u32, u32> &fixed_depth_tops,
+ const map<u32, vector<vector<CharReach>>> &triggers,
+ suffix_id suff, const CompileContext &cc) {
+ if (suff.castle()) {
auto n = buildRepeatEngine(*suff.castle(), triggers, cc, rm);
- assert(n);
- return n;
- }
-
- if (suff.haig()) {
+ assert(n);
+ return n;
+ }
+
+ if (suff.haig()) {
auto n = goughCompile(*suff.haig(), ssm.somPrecision(), cc, rm);
- assert(n);
- return n;
- }
-
- if (suff.dfa()) {
+ assert(n);
+ return n;
+ }
+
+ if (suff.dfa()) {
auto d = getDfa(*suff.dfa(), false, cc, rm);
- assert(d);
- return d;
- }
-
- assert(suff.graph());
- NGHolder &holder = *suff.graph();
- assert(holder.kind == NFA_SUFFIX);
- const bool oneTop = onlyOneTop(holder);
- bool compress_state = cc.streaming;
-
- // Take a shot at the LBR engine.
- if (oneTop) {
+ assert(d);
+ return d;
+ }
+
+ assert(suff.graph());
+ NGHolder &holder = *suff.graph();
+ assert(holder.kind == NFA_SUFFIX);
+ const bool oneTop = onlyOneTop(holder);
+ bool compress_state = cc.streaming;
+
+ // Take a shot at the LBR engine.
+ if (oneTop) {
auto lbr = constructLBR(holder, triggers.at(0), cc, rm);
- if (lbr) {
- return lbr;
- }
- }
-
+ if (lbr) {
+ return lbr;
+ }
+ }
+
bool fast_nfa = false;
- auto n = constructNFA(holder, &rm, fixed_depth_tops, triggers,
+ auto n = constructNFA(holder, &rm, fixed_depth_tops, triggers,
compress_state, fast_nfa, cc);
- assert(n);
-
- if (oneTop && cc.grey.roseMcClellanSuffix) {
- if (cc.grey.roseMcClellanSuffix == 2 || n->nPositions > 128 ||
+ assert(n);
+
+ if (oneTop && cc.grey.roseMcClellanSuffix) {
+ if (cc.grey.roseMcClellanSuffix == 2 || n->nPositions > 128 ||
!has_bounded_repeats_other_than_firsts(*n) || !fast_nfa) {
- auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0),
- cc.grey);
- if (rdfa) {
+ auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0),
+ cc.grey);
+ if (rdfa) {
auto d = getDfa(*rdfa, false, cc, rm);
- assert(d);
- if (cc.grey.roseMcClellanSuffix != 2) {
+ assert(d);
+ if (cc.grey.roseMcClellanSuffix != 2) {
n = pickImpl(move(d), move(n), fast_nfa);
- } else {
- n = move(d);
- }
-
- assert(n);
- if (isMcClellanType(n->type)) {
- // DFA chosen. We may be able to set some more properties
- // in the NFA structure here.
- u64a maxOffset = findMaxOffset(holder, rm);
- if (maxOffset != MAX_OFFSET && maxOffset < 0xffffffffull) {
- n->maxOffset = (u32)maxOffset;
- DEBUG_PRINTF("dfa max offset %llu\n", maxOffset);
- } else {
- n->maxOffset = 0; // inf
- }
- }
- }
- }
- }
- return n;
-}
-
-static
-void findInfixTriggers(const RoseBuildImpl &build,
- map<left_id, set<PredTopPair> > *infixTriggers) {
- const RoseGraph &g = build.g;
- for (auto v : vertices_range(g)) {
- if (!g[v].left) {
- continue;
- }
-
- set<PredTopPair> &triggers = (*infixTriggers)[left_id(g[v].left)];
-
- for (const auto &e : in_edges_range(v, g)) {
- RoseVertex u = source(e, g);
- if (build.isAnyStart(u)) {
- continue;
- }
- triggers.insert(PredTopPair(u, g[e].rose_top));
- }
- }
-}
-
-static
-vector<CharReach> as_cr_seq(const rose_literal_id &lit) {
- vector<CharReach> rv = as_cr_seq(lit.s);
- for (u32 i = 0; i < lit.delay; i++) {
- rv.push_back(CharReach::dot());
- }
-
- /* TODO: take into account cmp/msk */
- return rv;
-}
-
-/**
- * \brief Returns a map of trigger literals as sequences of CharReach, grouped
- * by top index.
- */
-static
-void findTriggerSequences(const RoseBuildImpl &tbi,
- const set<PredTopPair> &triggers,
- map<u32, vector<vector<CharReach> > > *trigger_lits) {
- map<u32, set<u32> > lit_ids_by_top;
- for (const PredTopPair &t : triggers) {
- insert(&lit_ids_by_top[t.top], tbi.g[t.pred].literals);
- }
-
- for (const auto &e : lit_ids_by_top) {
- const u32 top = e.first;
- const set<u32> &lit_ids = e.second;
-
+ } else {
+ n = move(d);
+ }
+
+ assert(n);
+ if (isMcClellanType(n->type)) {
+ // DFA chosen. We may be able to set some more properties
+ // in the NFA structure here.
+ u64a maxOffset = findMaxOffset(holder, rm);
+ if (maxOffset != MAX_OFFSET && maxOffset < 0xffffffffull) {
+ n->maxOffset = (u32)maxOffset;
+ DEBUG_PRINTF("dfa max offset %llu\n", maxOffset);
+ } else {
+ n->maxOffset = 0; // inf
+ }
+ }
+ }
+ }
+ }
+ return n;
+}
+
+static
+void findInfixTriggers(const RoseBuildImpl &build,
+ map<left_id, set<PredTopPair> > *infixTriggers) {
+ const RoseGraph &g = build.g;
+ for (auto v : vertices_range(g)) {
+ if (!g[v].left) {
+ continue;
+ }
+
+ set<PredTopPair> &triggers = (*infixTriggers)[left_id(g[v].left)];
+
+ for (const auto &e : in_edges_range(v, g)) {
+ RoseVertex u = source(e, g);
+ if (build.isAnyStart(u)) {
+ continue;
+ }
+ triggers.insert(PredTopPair(u, g[e].rose_top));
+ }
+ }
+}
+
+static
+vector<CharReach> as_cr_seq(const rose_literal_id &lit) {
+ vector<CharReach> rv = as_cr_seq(lit.s);
+ for (u32 i = 0; i < lit.delay; i++) {
+ rv.push_back(CharReach::dot());
+ }
+
+ /* TODO: take into account cmp/msk */
+ return rv;
+}
+
+/**
+ * \brief Returns a map of trigger literals as sequences of CharReach, grouped
+ * by top index.
+ */
+static
+void findTriggerSequences(const RoseBuildImpl &tbi,
+ const set<PredTopPair> &triggers,
+ map<u32, vector<vector<CharReach> > > *trigger_lits) {
+ map<u32, set<u32> > lit_ids_by_top;
+ for (const PredTopPair &t : triggers) {
+ insert(&lit_ids_by_top[t.top], tbi.g[t.pred].literals);
+ }
+
+ for (const auto &e : lit_ids_by_top) {
+ const u32 top = e.first;
+ const set<u32> &lit_ids = e.second;
+
for (u32 id : lit_ids) {
const rose_literal_id &lit = tbi.literals.at(id);
- (*trigger_lits)[top].push_back(as_cr_seq(lit));
- }
- }
-}
-
+ (*trigger_lits)[top].push_back(as_cr_seq(lit));
+ }
+ }
+}
+
static
bytecode_ptr<NFA> makeLeftNfa(const RoseBuildImpl &tbi, left_id &left,
const bool is_prefix, const bool is_transient,
const map<left_id, set<PredTopPair>> &infixTriggers,
const CompileContext &cc) {
const ReportManager &rm = tbi.rm;
-
+
bytecode_ptr<NFA> n;
- // Should compress state if this rose is non-transient and we're in
- // streaming mode.
- const bool compress_state = !is_transient;
-
+ // Should compress state if this rose is non-transient and we're in
+ // streaming mode.
+ const bool compress_state = !is_transient;
+
assert(is_prefix || !left.graph() || left.graph()->kind == NFA_INFIX);
assert(!is_prefix || !left.graph() || left.graph()->kind == NFA_PREFIX
|| left.graph()->kind == NFA_EAGER_PREFIX);
-
- // Holder should be implementable as an NFA at the very least.
- if (!left.dfa() && left.graph()) {
- assert(isImplementableNFA(*left.graph(), nullptr, cc));
- }
-
- map<u32, u32> fixed_depth_tops;
- if (!is_prefix /* infix */) {
- const set<PredTopPair> &triggers = infixTriggers.at(left);
- findFixedDepthTops(tbi.g, triggers, &fixed_depth_tops);
- }
-
- if (left.castle()) {
- assert(!is_prefix);
- map<u32, vector<vector<CharReach> > > triggers;
- findTriggerSequences(tbi, infixTriggers.at(left), &triggers);
+
+ // Holder should be implementable as an NFA at the very least.
+ if (!left.dfa() && left.graph()) {
+ assert(isImplementableNFA(*left.graph(), nullptr, cc));
+ }
+
+ map<u32, u32> fixed_depth_tops;
+ if (!is_prefix /* infix */) {
+ const set<PredTopPair> &triggers = infixTriggers.at(left);
+ findFixedDepthTops(tbi.g, triggers, &fixed_depth_tops);
+ }
+
+ if (left.castle()) {
+ assert(!is_prefix);
+ map<u32, vector<vector<CharReach> > > triggers;
+ findTriggerSequences(tbi, infixTriggers.at(left), &triggers);
n = buildRepeatEngine(*left.castle(), triggers, cc, rm);
- assert(n);
- return n; // Castles/LBRs are always best!
- }
-
- if (left.dfa()) {
+ assert(n);
+ return n; // Castles/LBRs are always best!
+ }
+
+ if (left.dfa()) {
n = getDfa(*left.dfa(), is_transient, cc, rm);
- } else if (left.graph() && cc.grey.roseMcClellanPrefix == 2 && is_prefix &&
- !is_transient) {
- auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey);
- if (rdfa) {
+ } else if (left.graph() && cc.grey.roseMcClellanPrefix == 2 && is_prefix &&
+ !is_transient) {
+ auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey);
+ if (rdfa) {
n = getDfa(*rdfa, is_transient, cc, rm);
assert(n);
- }
- }
-
- // We can attempt to build LBRs for infixes.
- if (!n && !is_prefix && left.graph() && onlyOneTop(*left.graph())) {
- map<u32, vector<vector<CharReach> > > triggers;
- findTriggerSequences(tbi, infixTriggers.at(left), &triggers);
+ }
+ }
+
+ // We can attempt to build LBRs for infixes.
+ if (!n && !is_prefix && left.graph() && onlyOneTop(*left.graph())) {
+ map<u32, vector<vector<CharReach> > > triggers;
+ findTriggerSequences(tbi, infixTriggers.at(left), &triggers);
assert(triggers.size() == 1); // single top
n = constructLBR(*left.graph(), triggers.begin()->second, cc, rm);
- }
-
+ }
+
bool fast_nfa = false;
- if (!n && left.graph()) {
- map<u32, vector<vector<CharReach>>> triggers;
+ if (!n && left.graph()) {
+ map<u32, vector<vector<CharReach>>> triggers;
if (left.graph()->kind == NFA_INFIX) {
findTriggerSequences(tbi, infixTriggers.at(left), &triggers);
}
- n = constructNFA(*left.graph(), nullptr, fixed_depth_tops, triggers,
+ n = constructNFA(*left.graph(), nullptr, fixed_depth_tops, triggers,
compress_state, fast_nfa, cc);
- }
-
- if (cc.grey.roseMcClellanPrefix == 1 && is_prefix && !left.dfa()
- && left.graph()
+ }
+
+ if (cc.grey.roseMcClellanPrefix == 1 && is_prefix && !left.dfa()
+ && left.graph()
&& (!n || !has_bounded_repeats_other_than_firsts(*n) || !fast_nfa)) {
- auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey);
- if (rdfa) {
+ auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey);
+ if (rdfa) {
auto d = getDfa(*rdfa, is_transient, cc, rm);
- assert(d);
+ assert(d);
n = pickImpl(move(d), move(n), fast_nfa);
- }
- }
-
- return n;
-}
-
-static
-void setLeftNfaProperties(NFA &n, const left_id &left) {
- depth min_width = findMinWidth(left);
- DEBUG_PRINTF("min_width=%s\n", min_width.str().c_str());
- u32 min_width_value = min_width.is_finite() ? (u32)min_width : 0;
- n.minWidth = min_width_value;
-
- depth max_width = findMaxWidth(left);
- DEBUG_PRINTF("max_width=%s\n", max_width.str().c_str());
- u32 max_width_value = max_width.is_finite() ? (u32)max_width : 0;
- n.maxWidth = max_width_value;
-
- // FIXME: NFA::maxOffset in Rose can't be found from reports as they don't
- // map to internal_report structures; it would have to come from the Rose
- // graph.
-}
-
-static
+ }
+ }
+
+ return n;
+}
+
+static
+void setLeftNfaProperties(NFA &n, const left_id &left) {
+ depth min_width = findMinWidth(left);
+ DEBUG_PRINTF("min_width=%s\n", min_width.str().c_str());
+ u32 min_width_value = min_width.is_finite() ? (u32)min_width : 0;
+ n.minWidth = min_width_value;
+
+ depth max_width = findMaxWidth(left);
+ DEBUG_PRINTF("max_width=%s\n", max_width.str().c_str());
+ u32 max_width_value = max_width.is_finite() ? (u32)max_width : 0;
+ n.maxWidth = max_width_value;
+
+ // FIXME: NFA::maxOffset in Rose can't be found from reports as they don't
+ // map to internal_report structures; it would have to come from the Rose
+ // graph.
+}
+
+static
void appendTailToHolder(NGHolder &h, const flat_set<ReportID> &reports,
const vector<NFAVertex> &starts,
const vector<CharReach> &tail) {
@@ -902,16 +902,16 @@ void appendTailToHolder(NGHolder &h, const flat_set<ReportID> &reports,
h[curr].char_reach = *it;
++it;
}
-
+
h[curr].reports = reports;
add_edge(curr, h.accept, h);
}
-
+
static
void appendTailToHolder(NGHolder &h, const vector<CharReach> &tail) {
assert(in_degree(h.acceptEod, h) == 1);
assert(!tail.empty());
-
+
map<flat_set<ReportID>, vector<NFAVertex> > reporters;
for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
reporters[h[v].reports].push_back(v);
@@ -950,11 +950,11 @@ u32 decreaseLag(const RoseBuildImpl &build, NGHolder &h,
restored[i] |= *lit_it;
++lit_it;
}
- }
+ }
}
-
+
assert(!restored.empty());
-
+
appendTailToHolder(h, restored);
return restored.size();
@@ -991,9 +991,9 @@ bool checkSuitableForEager(bool is_prefix, const left_id &left,
if (build.isInETable(s)
|| contains(rg[s].literals, build.eod_event_literal_id)) {
return false; /* Ignore EOD related prefixes */
- }
+ }
}
-
+
if (left.dfa()) {
const raw_dfa &dfa = *left.dfa();
if (dfa.start_floating != DEAD_STATE) {
@@ -1002,7 +1002,7 @@ bool checkSuitableForEager(bool is_prefix, const left_id &left,
if (!dfa.states[dfa.start_anchored].reports.empty()) {
return false; /* vacuous (todo: handle?) */
}
-
+
if (!can_die_early(dfa, EAGER_DIE_BEFORE_LIMIT)) {
return false;
}
@@ -1012,11 +1012,11 @@ bool checkSuitableForEager(bool is_prefix, const left_id &left,
if (proper_out_degree(g.startDs, g)) {
return false; /* not purely anchored */
}
-
+
ei.new_graph = cloneHolder(*left.graph());
auto gg = ei.new_graph;
gg->kind = NFA_EAGER_PREFIX;
-
+
ei.lag_adjust = decreaseLag(build, *gg, succs);
if (is_match_vertex(gg->start, *gg)) {
@@ -1165,15 +1165,15 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi,
for (u32 lit_id : g[u].literals) {
lits.insert(build.literals.at(lit_id).s);
}
- }
- }
+ }
+ }
DEBUG_PRINTF("%zu literals\n", lits.size());
max_queuelen = findMaxInfixMatches(leftfix, lits);
if (max_queuelen < UINT32_MAX) {
max_queuelen++;
}
}
-
+
u32 max_width;
if (is_transient) {
depth d = findMaxWidth(leftfix);
@@ -1182,13 +1182,13 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi,
} else {
max_width = 0;
}
-
+
u8 cm_count = 0;
CharReach cm_cr;
if (cc.grey.allowCountingMiracles) {
findCountingMiracleInfo(leftfix, stop, &cm_count, &cm_cr);
}
-
+
for (RoseVertex v : succs) {
bc.leftfix_info.emplace(v, left_build_info(qi, g[v].left.lag, max_width,
squash_mask, stop,
@@ -1215,11 +1215,11 @@ unique_ptr<TamaInfo> constructTamaInfo(const RoseGraph &g,
for (const auto &e : in_edges_range(v, g)) {
tops.insert(g[e].rose_top);
}
- }
+ }
}
tamaInfo->add(nfa, tops);
}
-
+
return tamaInfo;
}
@@ -1238,12 +1238,12 @@ void updateTops(const RoseGraph &g, const TamaInfo &tamaInfo,
for (const auto &e : in_edges_range(v, g)) {
tamaProto.add(n, g[v].index, g[e].rose_top, out_top_remap);
}
- }
+ }
}
i++;
}
}
-
+
static
shared_ptr<TamaProto> constructContainerEngine(const RoseGraph &g,
build_context &bc,
@@ -1253,13 +1253,13 @@ shared_ptr<TamaProto> constructContainerEngine(const RoseGraph &g,
const Grey &grey) {
const auto &subengines = info.subengines;
auto tamaInfo = constructTamaInfo(g, subengines, is_suffix);
-
+
map<pair<const NFA *, u32>, u32> out_top_remap;
auto n = buildTamarama(*tamaInfo, queue, out_top_remap);
enforceEngineSizeLimit(n.get(), grey);
bc.engine_info_by_queue.emplace(n->queueIndex, engine_info(n.get(), false));
add_nfa_to_blob(bc, *n);
-
+
DEBUG_PRINTF("queue id:%u\n", queue);
shared_ptr<TamaProto> tamaProto = make_shared<TamaProto>();
tamaProto->reports = info.reports;
@@ -1283,11 +1283,11 @@ void buildInfixContainer(RoseGraph &g, build_context &bc,
for (const auto &v : verts) {
DEBUG_PRINTF("vert id:%zu\n", g[v].index);
g[v].left.tamarama = tamaProto;
- }
+ }
}
}
}
-
+
static
void buildSuffixContainer(RoseGraph &g, build_context &bc,
const vector<ExclusiveInfo> &exclusive_info,
@@ -1307,10 +1307,10 @@ void buildSuffixContainer(RoseGraph &g, build_context &bc,
const auto &v = verts[0];
suffix_id newSuffix(g[v].suffix);
bc.suffixes.emplace(newSuffix, queue);
- }
+ }
}
}
-
+
static
void updateExclusiveInfixProperties(const RoseBuildImpl &build,
const vector<ExclusiveInfo> &exclusive_info,
@@ -1320,14 +1320,14 @@ void updateExclusiveInfixProperties(const RoseBuildImpl &build,
for (const auto &info : exclusive_info) {
// Set leftfix optimisations, disabled for tamarama subengines
rose_group squash_mask = ~rose_group{0};
- // Leftfixes can have stop alphabets.
- vector<u8> stop(N_CHARS, 0);
+ // Leftfixes can have stop alphabets.
+ vector<u8> stop(N_CHARS, 0);
// Infix NFAs can have bounds on their queue lengths.
u32 max_queuelen = 0;
u32 max_width = 0;
u8 cm_count = 0;
CharReach cm_cr;
-
+
const auto &qi = info.queue;
const auto &subengines = info.subengines;
bool no_retrigger = true;
@@ -1346,7 +1346,7 @@ void updateExclusiveInfixProperties(const RoseBuildImpl &build,
for (u32 lit_id : build.g[u].literals) {
lits.insert(build.literals.at(lit_id).s);
}
- }
+ }
DEBUG_PRINTF("%zu literals\n", lits.size());
u32 queuelen = findMaxInfixMatches(leftfix, lits);
@@ -1354,7 +1354,7 @@ void updateExclusiveInfixProperties(const RoseBuildImpl &build,
queuelen++;
}
max_queuelen = max(max_queuelen, queuelen);
- }
+ }
}
if (no_retrigger) {
@@ -1369,11 +1369,11 @@ void updateExclusiveInfixProperties(const RoseBuildImpl &build,
squash_mask, stop,
max_queuelen, cm_count,
cm_cr));
- }
- }
+ }
+ }
}
}
-
+
static
void updateExclusiveSuffixProperties(const RoseBuildImpl &build,
const vector<ExclusiveInfo> &exclusive_info,
@@ -1390,14 +1390,14 @@ void updateExclusiveSuffixProperties(const RoseBuildImpl &build,
no_retrigger = false;
break;
}
- }
-
+ }
+
if (no_retrigger) {
no_retrigger_queues->insert(qi);
- }
+ }
}
}
-
+
static
void buildExclusiveInfixes(RoseBuildImpl &build, build_context &bc,
QueueIndexFactory &qif,
@@ -1429,12 +1429,12 @@ void buildExclusiveInfixes(RoseBuildImpl &build, build_context &bc,
}
info.queue = qif.get_queue();
exclusive_info.push_back(move(info));
- }
+ }
updateExclusiveInfixProperties(build, exclusive_info, bc.leftfix_info,
no_retrigger_queues);
buildInfixContainer(g, bc, exclusive_info, build.cc.grey);
}
-
+
static
void findExclusiveInfixes(RoseBuildImpl &build, build_context &bc,
QueueIndexFactory &qif,
@@ -1582,48 +1582,48 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc,
leftfix);
}
- return true;
-}
-
-static
-void findSuffixTriggers(const RoseBuildImpl &tbi,
- map<suffix_id, set<PredTopPair> > *suffixTriggers) {
- const RoseGraph &g = tbi.g;
- for (auto v : vertices_range(g)) {
- if (!g[v].suffix) {
- continue;
- }
- PredTopPair ptp(v, g[v].suffix.top);
- (*suffixTriggers)[g[v].suffix].insert(ptp);
- }
-}
-
-static
-bool hasNonSmallBlockOutfix(const vector<OutfixInfo> &outfixes) {
- for (const auto &out : outfixes) {
- if (!out.in_sbmatcher) {
- return true;
- }
- }
- return false;
-}
-
+ return true;
+}
+
+static
+void findSuffixTriggers(const RoseBuildImpl &tbi,
+ map<suffix_id, set<PredTopPair> > *suffixTriggers) {
+ const RoseGraph &g = tbi.g;
+ for (auto v : vertices_range(g)) {
+ if (!g[v].suffix) {
+ continue;
+ }
+ PredTopPair ptp(v, g[v].suffix.top);
+ (*suffixTriggers)[g[v].suffix].insert(ptp);
+ }
+}
+
+static
+bool hasNonSmallBlockOutfix(const vector<OutfixInfo> &outfixes) {
+ for (const auto &out : outfixes) {
+ if (!out.in_sbmatcher) {
+ return true;
+ }
+ }
+ return false;
+}
+
namespace {
class OutfixBuilder : public boost::static_visitor<bytecode_ptr<NFA>> {
public:
explicit OutfixBuilder(const RoseBuildImpl &build_in) : build(build_in) {}
-
+
bytecode_ptr<NFA> operator()(boost::blank&) const {
return nullptr;
};
-
+
bytecode_ptr<NFA> operator()(unique_ptr<raw_dfa> &rdfa) const {
// Unleash the mighty DFA!
return getDfa(*rdfa, false, build.cc, build.rm);
}
bytecode_ptr<NFA> operator()(unique_ptr<raw_som_dfa> &haig) const {
- // Unleash the Goughfish!
+ // Unleash the Goughfish!
return goughCompile(*haig, build.ssm.somPrecision(), build.cc,
build.rm);
}
@@ -1633,31 +1633,31 @@ public:
const ReportManager &rm = build.rm;
NGHolder &h = *holder;
- assert(h.kind == NFA_OUTFIX);
-
- // Build NFA.
+ assert(h.kind == NFA_OUTFIX);
+
+ // Build NFA.
const map<u32, u32> fixed_depth_tops; /* no tops */
const map<u32, vector<vector<CharReach>>> triggers; /* no tops */
bool compress_state = cc.streaming;
bool fast_nfa = false;
auto n = constructNFA(h, &rm, fixed_depth_tops, triggers,
compress_state, fast_nfa, cc);
-
- // Try for a DFA upgrade.
+
+ // Try for a DFA upgrade.
if (n && cc.grey.roseMcClellanOutfix &&
(!has_bounded_repeats_other_than_firsts(*n) || !fast_nfa)) {
- auto rdfa = buildMcClellan(h, &rm, cc.grey);
- if (rdfa) {
+ auto rdfa = buildMcClellan(h, &rm, cc.grey);
+ if (rdfa) {
auto d = getDfa(*rdfa, false, cc, rm);
- if (d) {
+ if (d) {
n = pickImpl(move(d), move(n), fast_nfa);
- }
- }
- }
+ }
+ }
+ }
return n;
- }
-
+ }
+
bytecode_ptr<NFA> operator()(UNUSED MpvProto &mpv) const {
// MPV construction handled separately.
assert(mpv.puffettes.empty());
@@ -1675,166 +1675,166 @@ bytecode_ptr<NFA> buildOutfix(const RoseBuildImpl &build, OutfixInfo &outfix) {
auto n = boost::apply_visitor(OutfixBuilder(build), outfix.proto);
if (n && build.cc.grey.reverseAccelerate) {
- buildReverseAcceleration(n.get(), outfix.rev_info, outfix.minWidth);
- }
-
- return n;
-}
-
-static
+ buildReverseAcceleration(n.get(), outfix.rev_info, outfix.minWidth);
+ }
+
+ return n;
+}
+
+static
void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired,
bool *mpv_as_outfix) {
assert(bc.engineOffsets.empty()); // MPV should be first
- *mpv_as_outfix = false;
+ *mpv_as_outfix = false;
OutfixInfo *mpv_outfix = nullptr;
-
- /* assume outfixes are just above chain tails in queue indices */
- for (auto &out : tbi.outfixes) {
- if (out.is_nonempty_mpv()) {
+
+ /* assume outfixes are just above chain tails in queue indices */
+ for (auto &out : tbi.outfixes) {
+ if (out.is_nonempty_mpv()) {
assert(!mpv_outfix);
mpv_outfix = &out;
- } else {
+ } else {
assert(!out.mpv());
- }
- }
-
+ }
+ }
+
if (!mpv_outfix) {
- return;
- }
-
+ return;
+ }
+
auto *mpv = mpv_outfix->mpv();
auto nfa = mpvCompile(mpv->puffettes, mpv->triggered_puffettes, tbi.rm);
- assert(nfa);
- if (!nfa) {
- throw CompileError("Unable to generate bytecode.");
- }
-
- if (tbi.cc.grey.reverseAccelerate) {
+ assert(nfa);
+ if (!nfa) {
+ throw CompileError("Unable to generate bytecode.");
+ }
+
+ if (tbi.cc.grey.reverseAccelerate) {
buildReverseAcceleration(nfa.get(), mpv_outfix->rev_info,
mpv_outfix->minWidth);
- }
-
+ }
+
u32 qi = mpv_outfix->get_queue(tbi.qif);
- nfa->queueIndex = qi;
+ nfa->queueIndex = qi;
enforceEngineSizeLimit(nfa.get(), tbi.cc.grey);
bc.engine_info_by_queue.emplace(nfa->queueIndex,
engine_info(nfa.get(), false));
-
- DEBUG_PRINTF("built mpv\n");
-
- if (!*historyRequired && requires_decompress_key(*nfa)) {
- *historyRequired = 1;
- }
-
+
+ DEBUG_PRINTF("built mpv\n");
+
+ if (!*historyRequired && requires_decompress_key(*nfa)) {
+ *historyRequired = 1;
+ }
+
add_nfa_to_blob(bc, *nfa);
- *mpv_as_outfix = !mpv->puffettes.empty();
-}
-
-static
-void setOutfixProperties(NFA &n, const OutfixInfo &outfix) {
- depth min_width = outfix.minWidth;
- DEBUG_PRINTF("min_width=%s\n", min_width.str().c_str());
- u32 min_width_value = min_width.is_finite() ? (u32)min_width : 0;
- n.minWidth = min_width_value;
-
- depth max_width = outfix.maxWidth;
- DEBUG_PRINTF("max_width=%s\n", max_width.str().c_str());
- u32 max_width_value = max_width.is_finite() ? (u32)max_width : 0;
- n.maxWidth = max_width_value;
-
- DEBUG_PRINTF("max_offset=%llu\n", outfix.maxOffset);
- u32 max_offset_value = outfix.maxOffset < ~0U ? (u32)outfix.maxOffset : 0;
- n.maxOffset = max_offset_value;
-
- DEBUG_PRINTF("maxBAWidth=%u\n", outfix.maxBAWidth);
- if (outfix.maxBAWidth != ROSE_BOUND_INF && outfix.maxBAWidth < 256) {
- n.maxBiAnchoredWidth = verify_u8(outfix.maxBAWidth);
- }
-}
-
-static
+ *mpv_as_outfix = !mpv->puffettes.empty();
+}
+
+static
+void setOutfixProperties(NFA &n, const OutfixInfo &outfix) {
+ depth min_width = outfix.minWidth;
+ DEBUG_PRINTF("min_width=%s\n", min_width.str().c_str());
+ u32 min_width_value = min_width.is_finite() ? (u32)min_width : 0;
+ n.minWidth = min_width_value;
+
+ depth max_width = outfix.maxWidth;
+ DEBUG_PRINTF("max_width=%s\n", max_width.str().c_str());
+ u32 max_width_value = max_width.is_finite() ? (u32)max_width : 0;
+ n.maxWidth = max_width_value;
+
+ DEBUG_PRINTF("max_offset=%llu\n", outfix.maxOffset);
+ u32 max_offset_value = outfix.maxOffset < ~0U ? (u32)outfix.maxOffset : 0;
+ n.maxOffset = max_offset_value;
+
+ DEBUG_PRINTF("maxBAWidth=%u\n", outfix.maxBAWidth);
+ if (outfix.maxBAWidth != ROSE_BOUND_INF && outfix.maxBAWidth < 256) {
+ n.maxBiAnchoredWidth = verify_u8(outfix.maxBAWidth);
+ }
+}
+
+static
bool prepOutfixes(RoseBuildImpl &tbi, build_context &bc,
- size_t *historyRequired) {
- if (tbi.cc.grey.onlyOneOutfix && tbi.outfixes.size() > 1) {
- DEBUG_PRINTF("we have %zu outfixes, but Grey::onlyOneOutfix is set\n",
- tbi.outfixes.size());
- throw ResourceLimitError();
- }
-
+ size_t *historyRequired) {
+ if (tbi.cc.grey.onlyOneOutfix && tbi.outfixes.size() > 1) {
+ DEBUG_PRINTF("we have %zu outfixes, but Grey::onlyOneOutfix is set\n",
+ tbi.outfixes.size());
+ throw ResourceLimitError();
+ }
+
assert(tbi.qif.allocated_count() == bc.engineOffsets.size());
-
- for (auto &out : tbi.outfixes) {
+
+ for (auto &out : tbi.outfixes) {
if (out.mpv()) {
- continue; /* already done */
- }
+ continue; /* already done */
+ }
DEBUG_PRINTF("building outfix %zd\n", &out - &tbi.outfixes[0]);
- auto n = buildOutfix(tbi, out);
- if (!n) {
- assert(0);
- return false;
- }
-
- setOutfixProperties(*n, out);
-
+ auto n = buildOutfix(tbi, out);
+ if (!n) {
+ assert(0);
+ return false;
+ }
+
+ setOutfixProperties(*n, out);
+
n->queueIndex = out.get_queue(tbi.qif);
enforceEngineSizeLimit(n.get(), tbi.cc.grey);
bc.engine_info_by_queue.emplace(n->queueIndex,
engine_info(n.get(), false));
-
- if (!*historyRequired && requires_decompress_key(*n)) {
- *historyRequired = 1;
- }
-
+
+ if (!*historyRequired && requires_decompress_key(*n)) {
+ *historyRequired = 1;
+ }
+
add_nfa_to_blob(bc, *n);
- }
-
- return true;
-}
-
-static
+ }
+
+ return true;
+}
+
+static
void assignSuffixQueues(RoseBuildImpl &build, map<suffix_id, u32> &suffixes) {
const RoseGraph &g = build.g;
-
- for (auto v : vertices_range(g)) {
- if (!g[v].suffix) {
- continue;
- }
-
- const suffix_id s(g[v].suffix);
-
+
+ for (auto v : vertices_range(g)) {
+ if (!g[v].suffix) {
+ continue;
+ }
+
+ const suffix_id s(g[v].suffix);
+
DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].index, s.graph());
-
- // We may have already built this NFA.
+
+ // We may have already built this NFA.
if (contains(suffixes, s)) {
- continue;
- }
-
+ continue;
+ }
+
u32 queue = build.qif.get_queue();
- DEBUG_PRINTF("assigning %p to queue %u\n", s.graph(), queue);
+ DEBUG_PRINTF("assigning %p to queue %u\n", s.graph(), queue);
suffixes.emplace(s, queue);
- }
-}
-
-static
-void setSuffixProperties(NFA &n, const suffix_id &suff,
- const ReportManager &rm) {
- depth min_width = findMinWidth(suff);
- DEBUG_PRINTF("min_width=%s\n", min_width.str().c_str());
- u32 min_width_value = min_width.is_finite() ? (u32)min_width : 0;
- n.minWidth = min_width_value;
-
- depth max_width = findMaxWidth(suff);
- DEBUG_PRINTF("max_width=%s\n", max_width.str().c_str());
- u32 max_width_value = max_width.is_finite() ? (u32)max_width : 0;
- n.maxWidth = max_width_value;
-
- u64a max_offset = findMaxOffset(all_reports(suff), rm);
- DEBUG_PRINTF("max_offset=%llu\n", max_offset);
- u32 max_offset_value = max_offset < ~0U ? (u32)max_offset : 0;
- n.maxOffset = max_offset_value;
-}
-
-static
+ }
+}
+
+static
+void setSuffixProperties(NFA &n, const suffix_id &suff,
+ const ReportManager &rm) {
+ depth min_width = findMinWidth(suff);
+ DEBUG_PRINTF("min_width=%s\n", min_width.str().c_str());
+ u32 min_width_value = min_width.is_finite() ? (u32)min_width : 0;
+ n.minWidth = min_width_value;
+
+ depth max_width = findMaxWidth(suff);
+ DEBUG_PRINTF("max_width=%s\n", max_width.str().c_str());
+ u32 max_width_value = max_width.is_finite() ? (u32)max_width : 0;
+ n.maxWidth = max_width_value;
+
+ u64a max_offset = findMaxOffset(all_reports(suff), rm);
+ DEBUG_PRINTF("max_offset=%llu\n", max_offset);
+ u32 max_offset_value = max_offset < ~0U ? (u32)max_offset : 0;
+ n.maxOffset = max_offset_value;
+}
+
+static
void buildExclusiveSuffixes(RoseBuildImpl &build, build_context &bc,
QueueIndexFactory &qif,
map<suffix_id, set<PredTopPair>> &suffixTriggers,
@@ -1842,19 +1842,19 @@ void buildExclusiveSuffixes(RoseBuildImpl &build, build_context &bc,
const vector<vector<u32>> &groups,
set<u32> *no_retrigger_queues) {
RoseGraph &g = build.g;
-
+
vector<ExclusiveInfo> exclusive_info;
for (const auto &gp : groups) {
ExclusiveInfo info;
for (const auto &id : gp) {
const auto &verts = vertex_map.at(id);
suffix_id s(g[verts[0]].suffix);
-
+
const set<PredTopPair> &s_triggers = suffixTriggers.at(s);
-
+
map<u32, u32> fixed_depth_tops;
findFixedDepthTops(g, s_triggers, &fixed_depth_tops);
-
+
map<u32, vector<vector<CharReach>>> triggers;
findTriggerSequences(build, s_triggers, &triggers);
@@ -1871,7 +1871,7 @@ void buildExclusiveSuffixes(RoseBuildImpl &build, build_context &bc,
const auto &reports = all_reports(s);
info.reports.insert(reports.begin(), reports.end());
- }
+ }
info.queue = qif.get_queue();
exclusive_info.push_back(move(info));
}
@@ -1879,14 +1879,14 @@ void buildExclusiveSuffixes(RoseBuildImpl &build, build_context &bc,
no_retrigger_queues);
buildSuffixContainer(g, bc, exclusive_info, build.cc.grey);
}
-
+
static
void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc,
QueueIndexFactory &qif,
map<suffix_id, set<PredTopPair>> &suffixTriggers,
set<u32> *no_retrigger_queues) {
const RoseGraph &g = tbi.g;
-
+
map<suffix_id, u32> suffixes;
set<RoleInfo<suffix_id>> roleInfoSet;
map<u32, vector<RoseVertex>> vertex_map;
@@ -1894,8 +1894,8 @@ void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc,
for (auto v : vertices_range(g)) {
if (!g[v].suffix) {
continue;
- }
-
+ }
+
const suffix_id s(g[v].suffix);
DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].index, s.graph());
@@ -1907,30 +1907,30 @@ void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc,
vertex_map[id].push_back(v);
}
continue;
- }
-
+ }
+
if (s.haig()) {
continue;
}
-
+
// Currently disable eod suffixes for exclusive analysis
if (!tbi.isInETable(v) && (s.graph() || s.castle())) {
DEBUG_PRINTF("assigning %p to id %u\n", s.graph(), role_id);
suffixes.emplace(s, role_id);
-
+
vertex_map[role_id].push_back(v);
const set<PredTopPair> &s_triggers = suffixTriggers.at(s);
map<u32, vector<vector<CharReach>>> triggers;
findTriggerSequences(tbi, s_triggers, &triggers);
-
+
RoleInfo<suffix_id> info(s, role_id);
if (setTriggerLiteralsSuffix(info, triggers)) {
roleInfoSet.insert(info);
}
role_id++;
}
- }
-
+ }
+
if (suffixes.size() > 1) {
DEBUG_PRINTF("suffix size:%zu\n", suffixes.size());
vector<vector<u32>> groups;
@@ -1938,9 +1938,9 @@ void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc,
buildExclusiveSuffixes(tbi, bc, qif, suffixTriggers, vertex_map,
groups, no_retrigger_queues);
}
-}
-
-static
+}
+
+static
bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc,
set<u32> *no_retrigger_queues,
const map<suffix_id, set<PredTopPair>> &suffixTriggers) {
@@ -1952,31 +1952,31 @@ bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc,
ordered.emplace_back(e.second, e.first);
}
sort(begin(ordered), end(ordered));
-
+
for (const auto &e : ordered) {
const u32 queue = e.first;
const suffix_id &s = e.second;
-
+
if (s.tamarama()) {
continue;
}
-
+
const set<PredTopPair> &s_triggers = suffixTriggers.at(s);
-
+
map<u32, u32> fixed_depth_tops;
findFixedDepthTops(tbi.g, s_triggers, &fixed_depth_tops);
-
+
map<u32, vector<vector<CharReach>>> triggers;
findTriggerSequences(tbi, s_triggers, &triggers);
-
+
auto n = buildSuffix(tbi.rm, tbi.ssm, fixed_depth_tops, triggers,
s, tbi.cc);
if (!n) {
return false;
}
-
+
setSuffixProperties(*n, s, tbi.rm);
-
+
n->queueIndex = queue;
enforceEngineSizeLimit(n.get(), tbi.cc.grey);
bc.engine_info_by_queue.emplace(n->queueIndex,
@@ -1990,63 +1990,63 @@ bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc,
}
add_nfa_to_blob(bc, *n);
- }
-
+ }
+
return true;
-}
-
-static
+}
+
+static
void buildCountingMiracles(build_context &bc) {
- map<pair<CharReach, u8>, u32> pre_built;
-
+ map<pair<CharReach, u8>, u32> pre_built;
+
for (left_build_info &lbi : bc.leftfix_info | map_values) {
if (!lbi.countingMiracleCount) {
continue;
- }
-
- const CharReach &cr = lbi.countingMiracleReach;
- assert(!cr.all() && !cr.none());
-
- auto key = make_pair(cr, lbi.countingMiracleCount);
- if (contains(pre_built, key)) {
- lbi.countingMiracleOffset = pre_built[key];
- continue;
- }
-
- RoseCountingMiracle rcm;
- memset(&rcm, 0, sizeof(rcm));
-
- if (cr.count() == 1) {
- rcm.c = cr.find_first();
- } else {
- rcm.shufti = 1;
+ }
+
+ const CharReach &cr = lbi.countingMiracleReach;
+ assert(!cr.all() && !cr.none());
+
+ auto key = make_pair(cr, lbi.countingMiracleCount);
+ if (contains(pre_built, key)) {
+ lbi.countingMiracleOffset = pre_built[key];
+ continue;
+ }
+
+ RoseCountingMiracle rcm;
+ memset(&rcm, 0, sizeof(rcm));
+
+ if (cr.count() == 1) {
+ rcm.c = cr.find_first();
+ } else {
+ rcm.shufti = 1;
int rv = shuftiBuildMasks(cr, (u8 *)&rcm.lo, (u8 *)&rcm.hi);
- if (rv == -1) {
- DEBUG_PRINTF("failed to build shufti\n");
- lbi.countingMiracleCount = 0; /* remove counting miracle */
- continue;
- }
-
- rcm.poison = (~cr).find_first();
- }
-
- rcm.count = lbi.countingMiracleCount;
-
+ if (rv == -1) {
+ DEBUG_PRINTF("failed to build shufti\n");
+ lbi.countingMiracleCount = 0; /* remove counting miracle */
+ continue;
+ }
+
+ rcm.poison = (~cr).find_first();
+ }
+
+ rcm.count = lbi.countingMiracleCount;
+
lbi.countingMiracleOffset = bc.engine_blob.add(rcm);
- pre_built[key] = lbi.countingMiracleOffset;
- DEBUG_PRINTF("built cm for count of %u @ %u\n", rcm.count,
- lbi.countingMiracleOffset);
- }
-}
-
+ pre_built[key] = lbi.countingMiracleOffset;
+ DEBUG_PRINTF("built cm for count of %u @ %u\n", rcm.count,
+ lbi.countingMiracleOffset);
+ }
+}
+
/* Note: buildNfas may reduce the lag for vertices that have prefixes */
-static
+static
bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif,
set<u32> *no_retrigger_queues, set<u32> *eager_queues,
u32 *leftfixBeginQueue) {
map<suffix_id, set<PredTopPair>> suffixTriggers;
findSuffixTriggers(tbi, &suffixTriggers);
-
+
if (tbi.cc.grey.allowTamarama && tbi.cc.streaming) {
findExclusiveSuffixes(tbi, bc, qif, suffixTriggers,
no_retrigger_queues);
@@ -2055,156 +2055,156 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif,
assignSuffixQueues(tbi, bc.suffixes);
if (!buildSuffixes(tbi, bc, no_retrigger_queues, suffixTriggers)) {
- return false;
- }
+ return false;
+ }
suffixTriggers.clear();
-
- *leftfixBeginQueue = qif.allocated_count();
-
+
+ *leftfixBeginQueue = qif.allocated_count();
+
if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues,
- true)) {
- return false;
- }
-
+ true)) {
+ return false;
+ }
+
if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues,
- false)) {
- return false;
- }
-
- return true;
-}
-
-static
+ false)) {
+ return false;
+ }
+
+ return true;
+}
+
+static
void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info,
RoseStateOffsets *so, u32 *scratchStateSize,
u32 *transientStateSize) {
- u32 state_offset;
+ u32 state_offset;
if (eng_info.transient) {
// Transient engines do not use stream state, but must have room in
// transient state (stored in scratch).
state_offset = *transientStateSize;
*transientStateSize += eng_info.stream_size;
- } else {
+ } else {
// Pack NFA stream state on to the end of the Rose stream state.
- state_offset = so->end;
+ state_offset = so->end;
so->end += eng_info.stream_size;
- }
-
+ }
+
nfa_info.stateOffset = state_offset;
-
+
// Uncompressed state in scratch must be aligned.
*scratchStateSize = ROUNDUP_N(*scratchStateSize, eng_info.scratch_align);
nfa_info.fullStateOffset = *scratchStateSize;
*scratchStateSize += eng_info.scratch_size;
-}
-
-static
+}
+
+static
void updateNfaState(const build_context &bc, vector<NfaInfo> &nfa_infos,
RoseStateOffsets *so, u32 *scratchStateSize,
u32 *transientStateSize) {
if (nfa_infos.empty()) {
return;
- }
-
+ }
+
*transientStateSize = 0;
*scratchStateSize = 0;
-
+
for (u32 qi = 0; qi < nfa_infos.size(); qi++) {
NfaInfo &nfa_info = nfa_infos[qi];
const auto &eng_info = bc.engine_info_by_queue.at(qi);
allocateStateSpace(eng_info, nfa_info, so, scratchStateSize,
transientStateSize);
- }
-}
-
-/* does not include history requirements for outfixes or literal matchers */
-u32 RoseBuildImpl::calcHistoryRequired() const {
- u32 m = cc.grey.minHistoryAvailable;
-
- for (auto v : vertices_range(g)) {
- if (g[v].suffix) {
- m = MAX(m, 2); // so that history req is at least 1, for state
- // compression.
- /* TODO: check if suffix uses state compression */
- }
-
- if (g[v].left) {
- const u32 lag = g[v].left.lag;
- const left_id leftfix(g[v].left);
- if (contains(transient, leftfix)) {
- u32 mv = lag + findMaxWidth(leftfix);
-
- // If this vertex has an event literal, we need to add one to
- // cope with it.
- if (hasLiteralInTable(v, ROSE_EVENT)) {
- mv++;
- }
-
- m = MAX(m, mv);
- } else {
- /* rose will be caught up from (lag - 1), also need an extra
- * byte behind that to find the decompression key */
- m = MAX(m, lag + 1);
- m = MAX(m, 2); // so that history req is at least 1, for state
- // compression.
- }
- }
- }
-
- // Delayed literals contribute to history requirement as well.
+ }
+}
+
+/* does not include history requirements for outfixes or literal matchers */
+u32 RoseBuildImpl::calcHistoryRequired() const {
+ u32 m = cc.grey.minHistoryAvailable;
+
+ for (auto v : vertices_range(g)) {
+ if (g[v].suffix) {
+ m = MAX(m, 2); // so that history req is at least 1, for state
+ // compression.
+ /* TODO: check if suffix uses state compression */
+ }
+
+ if (g[v].left) {
+ const u32 lag = g[v].left.lag;
+ const left_id leftfix(g[v].left);
+ if (contains(transient, leftfix)) {
+ u32 mv = lag + findMaxWidth(leftfix);
+
+ // If this vertex has an event literal, we need to add one to
+ // cope with it.
+ if (hasLiteralInTable(v, ROSE_EVENT)) {
+ mv++;
+ }
+
+ m = MAX(m, mv);
+ } else {
+ /* rose will be caught up from (lag - 1), also need an extra
+ * byte behind that to find the decompression key */
+ m = MAX(m, lag + 1);
+ m = MAX(m, 2); // so that history req is at least 1, for state
+ // compression.
+ }
+ }
+ }
+
+ // Delayed literals contribute to history requirement as well.
for (u32 id = 0; id < literals.size(); id++) {
const auto &lit = literals.at(id);
- if (lit.delay) {
- // If the literal is delayed _and_ has a mask that is longer than
- // the literal, we need enough history to match the whole mask as
- // well when rebuilding delayed matches.
- size_t len = std::max(lit.elength(), lit.msk.size() + lit.delay);
- ENSURE_AT_LEAST(&m, verify_u32(len));
- }
-
- /* Benefit checks require data is available. */
- if (literal_info.at(id).requires_benefits) {
- ENSURE_AT_LEAST(&m,
- MIN(verify_u32(lit.elength()), MAX_MASK2_WIDTH));
- }
- }
-
- m = MAX(m, max_rose_anchored_floating_overlap);
-
- DEBUG_PRINTF("m=%u, ematcher_region_size=%u\n", m, ematcher_region_size);
-
- if (ematcher_region_size >= m) {
- return ematcher_region_size;
- }
-
- return m ? m - 1 : 0;
-}
-
-static
+ if (lit.delay) {
+ // If the literal is delayed _and_ has a mask that is longer than
+ // the literal, we need enough history to match the whole mask as
+ // well when rebuilding delayed matches.
+ size_t len = std::max(lit.elength(), lit.msk.size() + lit.delay);
+ ENSURE_AT_LEAST(&m, verify_u32(len));
+ }
+
+ /* Benefit checks require data is available. */
+ if (literal_info.at(id).requires_benefits) {
+ ENSURE_AT_LEAST(&m,
+ MIN(verify_u32(lit.elength()), MAX_MASK2_WIDTH));
+ }
+ }
+
+ m = MAX(m, max_rose_anchored_floating_overlap);
+
+ DEBUG_PRINTF("m=%u, ematcher_region_size=%u\n", m, ematcher_region_size);
+
+ if (ematcher_region_size >= m) {
+ return ematcher_region_size;
+ }
+
+ return m ? m - 1 : 0;
+}
+
+static
u32 buildLastByteIter(const RoseGraph &g, build_context &bc) {
vector<u32> lb_roles;
-
+
for (auto v : vertices_range(g)) {
if (!hasLastByteHistorySucc(g, v)) {
continue;
- }
+ }
// Eager EOD reporters won't have state indices.
auto it = bc.roleStateIndices.find(v);
if (it != end(bc.roleStateIndices)) {
lb_roles.push_back(it->second);
DEBUG_PRINTF("last byte %u\n", it->second);
- }
- }
-
+ }
+ }
+
if (lb_roles.empty()) {
return 0; /* invalid offset */
- }
+ }
auto iter = mmbBuildSparseIterator(lb_roles, bc.roleStateIndices.size());
return bc.engine_blob.add_iterator(iter);
-}
-
-static
+}
+
+static
u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build,
const vector<raw_dfa> &anchored_dfas) {
if (anchored_dfas.size() > 1) {
@@ -2212,8 +2212,8 @@ u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build,
/* We must regard matches from other anchored tables as unordered, as
* we do for floating matches. */
return 1;
- }
-
+ }
+
const RoseGraph &g = build.g;
u32 minWidth = ROSE_BOUND_INF;
for (auto v : vertices_range(g)) {
@@ -2221,58 +2221,58 @@ u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build,
DEBUG_PRINTF("skipping %zu anchored or root\n", g[v].index);
continue;
}
-
+
u32 w = g[v].min_offset;
DEBUG_PRINTF("%zu m_o = %u\n", g[v].index, w);
-
+
if (w < minWidth) {
minWidth = w;
- }
- }
-
+ }
+ }
+
return minWidth;
-}
-
-static
+}
+
+static
vector<u32> buildSuffixEkeyLists(const RoseBuildImpl &build, build_context &bc,
const QueueIndexFactory &qif) {
vector<u32> out(qif.allocated_count());
-
+
map<u32, vector<u32>> qi_to_ekeys; /* for determinism */
-
+
for (const auto &e : bc.suffixes) {
const suffix_id &s = e.first;
u32 qi = e.second;
set<u32> ekeys = reportsToEkeys(all_reports(s), build.rm);
-
+
if (!ekeys.empty()) {
qi_to_ekeys[qi] = {ekeys.begin(), ekeys.end()};
- }
- }
-
+ }
+ }
+
/* for each outfix also build elists */
for (const auto &outfix : build.outfixes) {
u32 qi = outfix.get_queue();
set<u32> ekeys = reportsToEkeys(all_reports(outfix), build.rm);
-
+
if (!ekeys.empty()) {
qi_to_ekeys[qi] = {ekeys.begin(), ekeys.end()};
}
- }
-
+ }
+
for (auto &e : qi_to_ekeys) {
u32 qi = e.first;
auto &ekeys = e.second;
assert(!ekeys.empty());
ekeys.push_back(INVALID_EKEY); /* terminator */
out[qi] = bc.engine_blob.add_range(ekeys);
- }
-
+ }
+
return out;
-}
-
+}
+
/** Returns sparse iter offset in engine blob. */
-static
+static
u32 buildEodNfaIterator(build_context &bc, const u32 activeQueueCount) {
vector<u32> keys;
for (u32 qi = 0; qi < activeQueueCount; ++qi) {
@@ -2281,68 +2281,68 @@ u32 buildEodNfaIterator(build_context &bc, const u32 activeQueueCount) {
DEBUG_PRINTF("nfa qi=%u accepts eod\n", qi);
keys.push_back(qi);
}
- }
-
+ }
+
if (keys.empty()) {
return 0;
- }
-
+ }
+
DEBUG_PRINTF("building iter for %zu nfas\n", keys.size());
-
+
auto iter = mmbBuildSparseIterator(keys, activeQueueCount);
return bc.engine_blob.add_iterator(iter);
-}
-
-static
+}
+
+static
bool hasMpvTrigger(const set<u32> &reports, const ReportManager &rm) {
for (u32 r : reports) {
if (rm.getReport(r).type == INTERNAL_ROSE_CHAIN) {
- return true;
- }
- }
-
- return false;
-}
-
-static
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static
bool anyEndfixMpvTriggers(const RoseBuildImpl &build) {
const RoseGraph &g = build.g;
unordered_set<suffix_id> done;
-
+
/* suffixes */
for (auto v : vertices_range(g)) {
if (!g[v].suffix) {
continue;
- }
+ }
if (contains(done, g[v].suffix)) {
continue; /* already done */
}
done.insert(g[v].suffix);
-
+
if (hasMpvTrigger(all_reports(g[v].suffix), build.rm)) {
return true;
- }
- }
-
+ }
+ }
+
/* outfixes */
for (const auto &out : build.outfixes) {
if (hasMpvTrigger(all_reports(out), build.rm)) {
- return true;
- }
- }
-
- return false;
-}
-
+ return true;
+ }
+ }
+
+ return false;
+}
+
struct DerivedBoundaryReports {
explicit DerivedBoundaryReports(const BoundaryReports &boundary) {
insert(&report_at_0_eod_full, boundary.report_at_0_eod);
insert(&report_at_0_eod_full, boundary.report_at_eod);
insert(&report_at_0_eod_full, boundary.report_at_0);
- }
+ }
set<ReportID> report_at_0_eod_full;
};
-
+
static
void addSomRevNfas(build_context &bc, RoseEngine &proto,
const SomSlotManager &ssm) {
@@ -2357,71 +2357,71 @@ void addSomRevNfas(build_context &bc, RoseEngine &proto,
nfa_offsets.push_back(offset);
/* note: som rev nfas don't need a queue assigned as only run in block
* mode reverse */
- }
-
+ }
+
proto.somRevCount = verify_u32(nfas.size());
proto.somRevOffsetOffset = bc.engine_blob.add_range(nfa_offsets);
-}
-
-static
+}
+
+static
void recordResources(RoseResources &resources, const RoseBuildImpl &build,
const vector<raw_dfa> &anchored_dfas,
const vector<LitFragment> &fragments) {
if (!build.outfixes.empty()) {
resources.has_outfixes = true;
- }
-
+ }
+
resources.has_literals = !fragments.empty();
-
+
const auto &g = build.g;
for (const auto &v : vertices_range(g)) {
if (g[v].eod_accept) {
resources.has_eod = true;
break;
- }
+ }
if (g[v].suffix && has_eod_accepts(g[v].suffix)) {
resources.has_eod = true;
break;
- }
- }
-
+ }
+ }
+
resources.has_anchored = !anchored_dfas.empty();
resources.has_anchored_multiple = anchored_dfas.size() > 1;
for (const auto &rdfa : anchored_dfas) {
if (rdfa.states.size() > 256) {
resources.has_anchored_large = true;
- }
- }
-
-}
-
-static
+ }
+ }
+
+}
+
+static
u32 writeProgram(build_context &bc, RoseProgram &&program) {
if (program.empty()) {
DEBUG_PRINTF("no program\n");
return 0;
- }
-
+ }
+
applyFinalSpecialisation(program);
-
+
auto it = bc.program_cache.find(program);
if (it != end(bc.program_cache)) {
DEBUG_PRINTF("reusing cached program at %u\n", it->second);
return it->second;
- }
-
+ }
+
recordResources(bc.resources, program);
recordLongLiterals(bc.longLiterals, program);
-
+
auto prog_bytecode = writeProgram(bc.engine_blob, program);
u32 offset = bc.engine_blob.add(prog_bytecode);
DEBUG_PRINTF("prog len %zu written at offset %u\n", prog_bytecode.size(),
offset);
bc.program_cache.emplace(move(program), offset);
return offset;
-}
-
-static
+}
+
+static
u32 writeActiveLeftIter(RoseEngineBlob &engine_blob,
const vector<LeftNfaInfo> &leftInfoTable) {
vector<u32> keys;
@@ -2430,19 +2430,19 @@ u32 writeActiveLeftIter(RoseEngineBlob &engine_blob,
DEBUG_PRINTF("leftfix %zu is active\n", i);
keys.push_back(verify_u32(i));
}
- }
-
+ }
+
DEBUG_PRINTF("%zu active leftfixes\n", keys.size());
-
+
if (keys.empty()) {
return 0;
- }
-
+ }
+
auto iter = mmbBuildSparseIterator(keys, verify_u32(leftInfoTable.size()));
return engine_blob.add_iterator(iter);
-}
-
-static
+}
+
+static
bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc,
u32 outfixEndQueue) {
for (u32 i = 0; i < outfixEndQueue; i++) {
@@ -2451,38 +2451,38 @@ bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc,
DEBUG_PRINTF("outfix has eod\n");
return true;
}
- }
-
+ }
+
if (build.eod_event_literal_id != MO_INVALID_IDX) {
DEBUG_PRINTF("eod is an event to be celebrated\n");
return true;
- }
-
+ }
+
const RoseGraph &g = build.g;
for (auto v : vertices_range(g)) {
if (g[v].eod_accept) {
DEBUG_PRINTF("literally report eod\n");
- return true;
- }
+ return true;
+ }
if (g[v].suffix && has_eod_accepts(g[v].suffix)) {
DEBUG_PRINTF("eod suffix\n");
return true;
}
- }
+ }
DEBUG_PRINTF("yawn\n");
- return false;
-}
-
-static
+ return false;
+}
+
+static
void writeDkeyInfo(const ReportManager &rm, RoseEngineBlob &engine_blob,
RoseEngine &proto) {
const auto inv_dkeys = rm.getDkeyToReportTable();
proto.invDkeyOffset = engine_blob.add_range(inv_dkeys);
proto.dkeyCount = rm.numDkeys();
proto.dkeyLogSize = fatbit_size(proto.dkeyCount);
-}
-
-static
+}
+
+static
void writeLeftInfo(RoseEngineBlob &engine_blob, RoseEngine &proto,
const vector<LeftNfaInfo> &leftInfoTable) {
proto.leftOffset = engine_blob.add_range(leftInfoTable);
@@ -2491,9 +2491,9 @@ void writeLeftInfo(RoseEngineBlob &engine_blob, RoseEngine &proto,
proto.roseCount = verify_u32(leftInfoTable.size());
proto.activeLeftCount = verify_u32(leftInfoTable.size());
proto.rosePrefixCount = countRosePrefixes(leftInfoTable);
-}
-
-static
+}
+
+static
void writeLogicalInfo(const ReportManager &rm, RoseEngineBlob &engine_blob,
RoseEngine &proto) {
const auto &tree = rm.getLogicalTree();
@@ -2511,68 +2511,68 @@ void writeNfaInfo(const RoseBuildImpl &build, build_context &bc,
const u32 queue_count = build.qif.allocated_count();
if (!queue_count) {
return;
- }
-
+ }
+
auto ekey_lists = buildSuffixEkeyLists(build, bc, build.qif);
-
+
vector<NfaInfo> infos(queue_count);
memset(infos.data(), 0, sizeof(NfaInfo) * queue_count);
-
+
for (u32 qi = 0; qi < queue_count; qi++) {
NfaInfo &info = infos[qi];
info.nfaOffset = bc.engineOffsets.at(qi);
assert(qi < ekey_lists.size());
info.ekeyListOffset = ekey_lists.at(qi);
info.no_retrigger = contains(no_retrigger_queues, qi) ? 1 : 0;
- }
-
+ }
+
// Mark outfixes that are in the small block matcher.
for (const auto &out : build.outfixes) {
const u32 qi = out.get_queue();
assert(qi < infos.size());
infos.at(qi).in_sbmatcher = out.in_sbmatcher;
- }
-
+ }
+
// Mark suffixes triggered by EOD table literals.
const RoseGraph &g = build.g;
- for (auto v : vertices_range(g)) {
- if (!g[v].suffix) {
- continue;
- }
+ for (auto v : vertices_range(g)) {
+ if (!g[v].suffix) {
+ continue;
+ }
u32 qi = bc.suffixes.at(g[v].suffix);
assert(qi < infos.size());
if (build.isInETable(v)) {
infos.at(qi).eod = 1;
- }
- }
-
+ }
+ }
+
// Update state offsets to do with NFAs in proto and in the NfaInfo
// structures.
updateNfaState(bc, infos, &proto.stateOffsets, &proto.scratchStateSize,
&proto.tStateSize);
-
+
proto.nfaInfoOffset = bc.engine_blob.add_range(infos);
-}
-
-static
+}
+
+static
bool hasBoundaryReports(const BoundaryReports &boundary) {
if (!boundary.report_at_0.empty()) {
DEBUG_PRINTF("has boundary reports at 0\n");
return true;
- }
+ }
if (!boundary.report_at_0_eod.empty()) {
DEBUG_PRINTF("has boundary reports at 0 eod\n");
return true;
- }
+ }
if (!boundary.report_at_eod.empty()) {
DEBUG_PRINTF("has boundary reports at eod\n");
return true;
- }
+ }
DEBUG_PRINTF("no boundary reports\n");
return false;
-}
-
-static
+}
+
+static
void makeBoundaryPrograms(const RoseBuildImpl &build, build_context &bc,
const BoundaryReports &boundary,
const DerivedBoundaryReports &dboundary,
@@ -2580,29 +2580,29 @@ void makeBoundaryPrograms(const RoseBuildImpl &build, build_context &bc,
DEBUG_PRINTF("report ^: %zu\n", boundary.report_at_0.size());
DEBUG_PRINTF("report $: %zu\n", boundary.report_at_eod.size());
DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size());
-
+
auto eod_prog = makeBoundaryProgram(build, boundary.report_at_eod);
out.reportEodOffset = writeProgram(bc, move(eod_prog));
-
+
auto zero_prog = makeBoundaryProgram(build, boundary.report_at_0);
out.reportZeroOffset = writeProgram(bc, move(zero_prog));
-
+
auto zeod_prog = makeBoundaryProgram(build, dboundary.report_at_0_eod_full);
out.reportZeroEodOffset = writeProgram(bc, move(zeod_prog));
-}
-
-static
+}
+
+static
unordered_map<RoseVertex, u32> assignStateIndices(const RoseBuildImpl &build) {
const auto &g = build.g;
-
+
u32 state = 0;
unordered_map<RoseVertex, u32> roleStateIndices;
- for (auto v : vertices_range(g)) {
+ for (auto v : vertices_range(g)) {
// Virtual vertices (starts, EOD accept vertices) never need state
// indices.
if (build.isVirtualVertex(v)) {
- continue;
- }
+ continue;
+ }
// We only need a state index if we have successors that are not
// eagerly-reported EOD vertices.
@@ -2612,60 +2612,60 @@ unordered_map<RoseVertex, u32> assignStateIndices(const RoseBuildImpl &build) {
needs_state_index = true;
break;
}
- }
-
+ }
+
if (!needs_state_index) {
continue;
- }
-
+ }
+
/* TODO: also don't need a state index if all edges are nfa based */
roleStateIndices.emplace(v, state++);
- }
-
+ }
+
DEBUG_PRINTF("assigned %u states (from %zu vertices)\n", state,
num_vertices(g));
return roleStateIndices;
-}
-
-static
+}
+
+static
bool hasUsefulStops(const left_build_info &build) {
for (u32 i = 0; i < N_CHARS; i++) {
if (build.stopAlphabet[i]) {
- return true;
- }
- }
- return false;
-}
-
-static
+ return true;
+ }
+ }
+ return false;
+}
+
+static
void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc,
const set<u32> &eager_queues, u32 leftfixBeginQueue,
u32 leftfixCount, vector<LeftNfaInfo> &leftTable,
u32 *laggedRoseCount, size_t *history) {
const RoseGraph &g = tbi.g;
const CompileContext &cc = tbi.cc;
-
+
unordered_set<u32> done_core;
-
+
leftTable.resize(leftfixCount);
-
+
u32 lagIndex = 0;
-
+
for (RoseVertex v : vertices_range(g)) {
if (!g[v].left) {
continue;
- }
+ }
assert(contains(bc.leftfix_info, v));
const left_build_info &lbi = bc.leftfix_info.at(v);
if (lbi.has_lookaround) {
continue;
}
-
+
assert(lbi.queue >= leftfixBeginQueue);
u32 left_index = lbi.queue - leftfixBeginQueue;
assert(left_index < leftfixCount);
-
+
/* seedy hack to make miracles more effective.
*
* TODO: make miracle seeking not depend on history length and have
@@ -2676,30 +2676,30 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc,
g[v].left.lag + 1
+ cc.grey.miracleHistoryBonus));
}
-
+
LeftNfaInfo &left = leftTable[left_index];
if (!contains(done_core, left_index)) {
done_core.insert(left_index);
memset(&left, 0, sizeof(left));
left.squash_mask = ~0ULL;
-
+
DEBUG_PRINTF("populating info for %u\n", left_index);
-
+
left.maxQueueLen = lbi.max_queuelen;
-
+
if (hasUsefulStops(lbi)) {
assert(lbi.stopAlphabet.size() == N_CHARS);
left.stopTable = bc.engine_blob.add_range(lbi.stopAlphabet);
}
-
+
assert(lbi.countingMiracleOffset || !lbi.countingMiracleCount);
left.countingMiracleOffset = lbi.countingMiracleOffset;
-
+
DEBUG_PRINTF("mw = %u\n", lbi.transient);
left.transient = verify_u8(lbi.transient);
left.infix = tbi.isNonRootSuccessor(v);
left.eager = contains(eager_queues, lbi.queue);
-
+
// A rose has a lagIndex if it's non-transient and we are
// streaming.
if (!lbi.transient && cc.streaming) {
@@ -2709,64 +2709,64 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc,
left.lagIndex = ROSE_OFFSET_INVALID;
}
}
-
+
DEBUG_PRINTF("rose %u is %s\n", left_index,
left.infix ? "infix" : "prefix");
-
+
// Update squash mask.
left.squash_mask &= lbi.squash_mask;
-
+
// Update the max delay.
ENSURE_AT_LEAST(&left.maxLag, lbi.lag);
-
+
if (contains(g[v].literals, tbi.eod_event_literal_id)) {
left.eod_check = 1;
}
- }
-
+ }
+
DEBUG_PRINTF("built %u roses with lag indices\n", lagIndex);
*laggedRoseCount = lagIndex;
-}
-
-static
+}
+
+static
RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc,
ProgramBuild &prog_build, u32 lit_id,
const vector<vector<RoseEdge>> &lit_edge_map,
bool is_anchored_replay_program) {
DEBUG_PRINTF("lit_id=%u\n", lit_id);
assert(lit_id < lit_edge_map.size());
-
+
return makeLiteralProgram(build, bc.leftfix_info, bc.suffixes,
bc.engine_info_by_queue, bc.roleStateIndices,
prog_build, lit_id, lit_edge_map.at(lit_id),
is_anchored_replay_program);
-}
-
-static
+}
+
+static
RoseProgram makeFragmentProgram(const RoseBuildImpl &build, build_context &bc,
ProgramBuild &prog_build,
const vector<u32> &lit_ids,
const vector<vector<RoseEdge>> &lit_edge_map) {
assert(!lit_ids.empty());
-
+
vector<RoseProgram> blocks;
for (const auto &lit_id : lit_ids) {
auto prog = makeLiteralProgram(build, bc, prog_build, lit_id,
lit_edge_map, false);
blocks.push_back(move(prog));
- }
-
+ }
+
return assembleProgramBlocks(move(blocks));
-}
-
+}
+
/**
* \brief Returns a map from literal ID to a list of edges leading into
* vertices with that literal ID.
*/
-static
+static
vector<vector<RoseEdge>> findEdgesByLiteral(const RoseBuildImpl &build) {
vector<vector<RoseEdge>> lit_edge_map(build.literals.size());
-
+
const auto &g = build.g;
for (const auto &v : vertices_range(g)) {
for (const auto &lit_id : g[v].literals) {
@@ -2775,7 +2775,7 @@ vector<vector<RoseEdge>> findEdgesByLiteral(const RoseBuildImpl &build) {
insert(&edge_list, edge_list.end(), in_edges(v, g));
}
}
-
+
// Sort edges in each edge list by (source, target) indices. This gives us
// less surprising ordering in program generation for a literal with many
// edges.
@@ -2785,82 +2785,82 @@ vector<vector<RoseEdge>> findEdgesByLiteral(const RoseBuildImpl &build) {
return tie(g[source(a, g)].index, g[target(a, g)].index) <
tie(g[source(b, g)].index, g[target(b, g)].index);
});
- }
-
+ }
+
return lit_edge_map;
-}
-
-static
+}
+
+static
bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) {
assert(lit_id < build.literal_info.size());
const auto &info = build.literal_info[lit_id];
if (!info.vertices.empty()) {
return true;
}
-
+
for (const u32 &delayed_id : info.delayed_ids) {
assert(delayed_id < build.literal_info.size());
const rose_literal_info &delayed_info = build.literal_info[delayed_id];
if (!delayed_info.vertices.empty()) {
return true;
}
- }
-
+ }
+
DEBUG_PRINTF("literal %u has no refs\n", lit_id);
return false;
-}
-
-static
+}
+
+static
rose_literal_id getFragment(rose_literal_id lit) {
if (lit.s.length() > ROSE_SHORT_LITERAL_LEN_MAX) {
// Trim to last ROSE_SHORT_LITERAL_LEN_MAX bytes.
lit.s.erase(0, lit.s.length() - ROSE_SHORT_LITERAL_LEN_MAX);
- }
+ }
DEBUG_PRINTF("fragment: %s\n", dumpString(lit.s).c_str());
return lit;
-}
-
-static
+}
+
+static
vector<LitFragment> groupByFragment(const RoseBuildImpl &build) {
vector<LitFragment> fragments;
u32 frag_id = 0;
-
+
struct FragmentInfo {
vector<u32> lit_ids;
rose_group groups = 0;
};
-
+
map<rose_literal_id, FragmentInfo> frag_info;
-
+
for (u32 lit_id = 0; lit_id < build.literals.size(); lit_id++) {
const auto &lit = build.literals.at(lit_id);
const auto &info = build.literal_info.at(lit_id);
-
+
if (!isUsedLiteral(build, lit_id)) {
DEBUG_PRINTF("lit %u is unused\n", lit_id);
continue;
- }
-
+ }
+
if (lit.table == ROSE_EVENT) {
DEBUG_PRINTF("lit %u is an event\n", lit_id);
- continue;
- }
-
+ continue;
+ }
+
auto groups = info.group_mask;
-
+
if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) {
fragments.emplace_back(frag_id, lit.s, groups, lit_id);
frag_id++;
continue;
- }
-
+ }
+
DEBUG_PRINTF("fragment candidate: lit_id=%u %s\n", lit_id,
dumpString(lit.s).c_str());
auto &fi = frag_info[getFragment(lit)];
fi.lit_ids.push_back(lit_id);
fi.groups |= groups;
- }
-
+ }
+
for (auto &m : frag_info) {
auto &lit = m.first;
auto &fi = m.second;
@@ -2869,17 +2869,17 @@ vector<LitFragment> groupByFragment(const RoseBuildImpl &build) {
fragments.emplace_back(frag_id, lit.s, fi.groups, move(fi.lit_ids));
frag_id++;
assert(frag_id == fragments.size());
- }
-
+ }
+
return fragments;
-}
-
-static
+}
+
+static
void buildIncludedIdMap(unordered_map<u32, pair<u32, u8>> &includedIdMap,
const LitProto *litProto) {
if (!litProto) {
return;
- }
+ }
const auto &proto = *litProto->hwlmProto;
for (const auto &lit : proto.lits) {
if (contains(includedIdMap, lit.id)) {
@@ -2897,11 +2897,11 @@ void buildIncludedIdMap(unordered_map<u32, pair<u32, u8>> &includedIdMap,
includedIdMap[lit.id] = make_pair(lit.included_id, lit.squash);
} else {
includedIdMap[lit.id] = make_pair(INVALID_LIT_ID, 0);
- }
- }
-}
-
-static
+ }
+ }
+}
+
+static
void findInclusionGroups(vector<LitFragment> &fragments,
LitProto *fproto, LitProto *drproto,
LitProto *eproto, LitProto *sbproto) {
@@ -2911,7 +2911,7 @@ void findInclusionGroups(vector<LitFragment> &fragments,
buildIncludedIdMap(includedDelayIdMap, drproto);
buildIncludedIdMap(includedIdMap, eproto);
buildIncludedIdMap(includedIdMap, sbproto);
-
+
size_t fragNum = fragments.size();
vector<u32> candidates;
for (size_t j = 0; j < fragNum; j++) {
@@ -2922,8 +2922,8 @@ void findInclusionGroups(vector<LitFragment> &fragments,
candidates.push_back(j);
DEBUG_PRINTF("find candidate\n");
}
- }
-
+ }
+
for (const auto &c : candidates) {
auto &frag = fragments[c];
u32 id = c;
@@ -2935,20 +2935,20 @@ void findInclusionGroups(vector<LitFragment> &fragments,
DEBUG_PRINTF("frag id %u child frag id %u\n", c,
frag.included_frag_id);
}
-
+
if (contains(includedDelayIdMap, id) &&
includedDelayIdMap[id].first != INVALID_LIT_ID) {
const auto &childId = includedDelayIdMap[id];
frag.included_delay_frag_id = childId.first;
frag.delay_squash = childId.second;
-
+
DEBUG_PRINTF("delay frag id %u child frag id %u\n", c,
frag.included_delay_frag_id);
- }
- }
-}
-
-static
+ }
+ }
+}
+
+static
void buildFragmentPrograms(const RoseBuildImpl &build,
vector<LitFragment> &fragments,
build_context &bc, ProgramBuild &prog_build,
@@ -2964,12 +2964,12 @@ void buildFragmentPrograms(const RoseBuildImpl &build,
auto caseful2 = !b.s.any_nocase();
return tie(len1, caseful1) < tie(len2, caseful2);
});
-
+
for (auto &frag : ordered_fragments) {
auto &pfrag = fragments[frag.fragment_id];
DEBUG_PRINTF("frag_id=%u, lit_ids=[%s]\n", pfrag.fragment_id,
as_string_list(pfrag.lit_ids).c_str());
-
+
auto lit_prog = makeFragmentProgram(build, bc, prog_build,
pfrag.lit_ids, lit_edge_map);
if (pfrag.included_frag_id != INVALID_FRAG_ID &&
@@ -2981,14 +2981,14 @@ void buildFragmentPrograms(const RoseBuildImpl &build,
DEBUG_PRINTF("child %u offset %u\n", cfrag.fragment_id,
child_offset);
addIncludedJumpProgram(lit_prog, child_offset, pfrag.squash);
- }
+ }
pfrag.lit_program_offset = writeProgram(bc, move(lit_prog));
-
+
// We only do delayed rebuild in streaming mode.
if (!build.cc.streaming) {
- continue;
- }
-
+ continue;
+ }
+
auto rebuild_prog = makeDelayRebuildProgram(build, prog_build,
pfrag.lit_ids);
if (pfrag.included_delay_frag_id != INVALID_FRAG_ID &&
@@ -3001,12 +3001,12 @@ void buildFragmentPrograms(const RoseBuildImpl &build,
child_offset);
addIncludedJumpProgram(rebuild_prog, child_offset,
pfrag.delay_squash);
- }
+ }
pfrag.delay_program_offset = writeProgram(bc, move(rebuild_prog));
- }
-}
-
-static
+ }
+}
+
+static
void updateLitProtoProgramOffset(vector<LitFragment> &fragments,
LitProto &litProto, bool delay) {
auto &proto = *litProto.hwlmProto;
@@ -3022,34 +3022,34 @@ void updateLitProtoProgramOffset(vector<LitFragment> &fragments,
frag.lit_program_offset);
lit.id = frag.lit_program_offset;
}
- }
-}
-
-static
+ }
+}
+
+static
void updateLitProgramOffset(vector<LitFragment> &fragments,
LitProto *fproto, LitProto *drproto,
LitProto *eproto, LitProto *sbproto) {
if (fproto) {
updateLitProtoProgramOffset(fragments, *fproto, false);
}
-
+
if (drproto) {
updateLitProtoProgramOffset(fragments, *drproto, true);
}
-
+
if (eproto) {
updateLitProtoProgramOffset(fragments, *eproto, false);
- }
-
+ }
+
if (sbproto) {
updateLitProtoProgramOffset(fragments, *sbproto, false);
- }
-}
-
+ }
+}
+
/**
* \brief Build the interpreter programs for each literal.
*/
-static
+static
void buildLiteralPrograms(const RoseBuildImpl &build,
vector<LitFragment> &fragments, build_context &bc,
ProgramBuild &prog_build, LitProto *fproto,
@@ -3057,42 +3057,42 @@ void buildLiteralPrograms(const RoseBuildImpl &build,
LitProto *sbproto) {
DEBUG_PRINTF("%zu fragments\n", fragments.size());
auto lit_edge_map = findEdgesByLiteral(build);
-
+
findInclusionGroups(fragments, fproto, drproto, eproto, sbproto);
-
+
buildFragmentPrograms(build, fragments, bc, prog_build, lit_edge_map);
-
+
// update literal program offsets for literal matcher prototypes
updateLitProgramOffset(fragments, fproto, drproto, eproto, sbproto);
-}
-
+}
+
/**
* \brief Write delay replay programs to the bytecode.
*
* Returns the offset of the beginning of the program array, and the number of
* programs.
*/
-static
+static
pair<u32, u32> writeDelayPrograms(const RoseBuildImpl &build,
const vector<LitFragment> &fragments,
build_context &bc,
ProgramBuild &prog_build) {
auto lit_edge_map = findEdgesByLiteral(build);
-
+
vector<u32> programs; // program offsets indexed by (delayed) lit id
unordered_map<u32, u32> cache; // program offsets we have already seen
-
+
for (const auto &frag : fragments) {
for (const u32 lit_id : frag.lit_ids) {
const auto &info = build.literal_info.at(lit_id);
-
+
for (const auto &delayed_lit_id : info.delayed_ids) {
DEBUG_PRINTF("lit id %u delay id %u\n", lit_id, delayed_lit_id);
auto prog = makeLiteralProgram(build, bc, prog_build,
delayed_lit_id, lit_edge_map,
false);
u32 offset = writeProgram(bc, move(prog));
-
+
u32 delay_id;
auto it = cache.find(offset);
if (it != end(cache)) {
@@ -3108,37 +3108,37 @@ pair<u32, u32> writeDelayPrograms(const RoseBuildImpl &build,
}
prog_build.delay_programs.emplace(delayed_lit_id, delay_id);
}
- }
- }
-
+ }
+ }
+
DEBUG_PRINTF("%zu delay programs\n", programs.size());
return {bc.engine_blob.add_range(programs), verify_u32(programs.size())};
-}
-
+}
+
/**
* \brief Write anchored replay programs to the bytecode.
*
* Returns the offset of the beginning of the program array, and the number of
* programs.
*/
-static
+static
pair<u32, u32> writeAnchoredPrograms(const RoseBuildImpl &build,
const vector<LitFragment> &fragments,
build_context &bc,
ProgramBuild &prog_build) {
auto lit_edge_map = findEdgesByLiteral(build);
-
+
vector<u32> programs; // program offsets indexed by anchored id
unordered_map<u32, u32> cache; // program offsets we have already seen
-
+
for (const auto &frag : fragments) {
for (const u32 lit_id : frag.lit_ids) {
const auto &lit = build.literals.at(lit_id);
-
+
if (lit.table != ROSE_ANCHORED) {
continue;
}
-
+
// If this anchored literal can never match past
// floatingMinLiteralMatchOffset, we will never have to record it.
if (findMaxOffset(build, lit_id)
@@ -3148,12 +3148,12 @@ pair<u32, u32> writeAnchoredPrograms(const RoseBuildImpl &build,
prog_build.floatingMinLiteralMatchOffset);
continue;
}
-
+
auto prog = makeLiteralProgram(build, bc, prog_build, lit_id,
lit_edge_map, true);
u32 offset = writeProgram(bc, move(prog));
DEBUG_PRINTF("lit_id=%u -> anch prog at %u\n", lit_id, offset);
-
+
u32 anch_id;
auto it = cache.find(offset);
if (it != end(cache)) {
@@ -3168,13 +3168,13 @@ pair<u32, u32> writeAnchoredPrograms(const RoseBuildImpl &build,
offset);
}
prog_build.anchored_programs.emplace(lit_id, anch_id);
- }
- }
-
+ }
+ }
+
DEBUG_PRINTF("%zu anchored programs\n", programs.size());
return {bc.engine_blob.add_range(programs), verify_u32(programs.size())};
}
-
+
/**
* \brief Returns all reports used by output-exposed engines, for which we need
* to generate programs.
@@ -3182,33 +3182,33 @@ pair<u32, u32> writeAnchoredPrograms(const RoseBuildImpl &build,
static
set<ReportID> findEngineReports(const RoseBuildImpl &build) {
set<ReportID> reports;
-
+
// The small write engine uses these engine report programs.
insert(&reports, build.smwr.all_reports());
-
+
for (const auto &outfix : build.outfixes) {
insert(&reports, all_reports(outfix));
- }
-
+ }
+
const auto &g = build.g;
for (auto v : vertices_range(g)) {
if (g[v].suffix) {
insert(&reports, all_reports(g[v].suffix));
- }
- }
-
+ }
+ }
+
DEBUG_PRINTF("%zu engine reports (of %zu)\n", reports.size(),
build.rm.numReports());
return reports;
-}
-
-static
+}
+
+static
pair<u32, u32> buildReportPrograms(const RoseBuildImpl &build,
build_context &bc) {
const auto reports = findEngineReports(build);
vector<u32> programs;
programs.reserve(reports.size());
-
+
for (ReportID id : reports) {
auto program = makeReportProgram(build, bc.needs_mpv_catchup, id);
u32 offset = writeProgram(bc, move(program));
@@ -3216,14 +3216,14 @@ pair<u32, u32> buildReportPrograms(const RoseBuildImpl &build,
build.rm.setProgramOffset(id, offset);
DEBUG_PRINTF("program for report %u @ %u (%zu instructions)\n", id,
programs.back(), program.size());
- }
-
+ }
+
u32 offset = bc.engine_blob.add_range(programs);
u32 count = verify_u32(programs.size());
return {offset, count};
-}
-
-static
+}
+
+static
bool hasEodAnchoredSuffix(const RoseBuildImpl &build) {
const RoseGraph &g = build.g;
for (auto v : vertices_range(g)) {
@@ -3231,40 +3231,40 @@ bool hasEodAnchoredSuffix(const RoseBuildImpl &build) {
DEBUG_PRINTF("vertex %zu is in eod table and has a suffix\n",
g[v].index);
return true;
- }
- }
+ }
+ }
return false;
-}
-
-static
+}
+
+static
bool hasEodMatcher(const RoseBuildImpl &build) {
const RoseGraph &g = build.g;
for (auto v : vertices_range(g)) {
if (build.isInETable(v)) {
DEBUG_PRINTF("vertex %zu is in eod table\n", g[v].index);
- return true;
- }
- }
- return false;
-}
-
-static
+ return true;
+ }
+ }
+ return false;
+}
+
+static
void addEodAnchorProgram(const RoseBuildImpl &build, const build_context &bc,
ProgramBuild &prog_build, bool in_etable,
RoseProgram &program) {
const RoseGraph &g = build.g;
-
+
// Predecessor state id -> program block.
map<u32, RoseProgram> pred_blocks;
-
+
for (auto v : vertices_range(g)) {
if (!g[v].eod_accept) {
- continue;
- }
-
+ continue;
+ }
+
DEBUG_PRINTF("vertex %zu (with %zu preds) fires on EOD\n", g[v].index,
in_degree(v, g));
-
+
vector<RoseEdge> edge_list;
for (const auto &e : in_edges_range(v, g)) {
RoseVertex u = source(e, g);
@@ -3272,15 +3272,15 @@ void addEodAnchorProgram(const RoseBuildImpl &build, const build_context &bc,
DEBUG_PRINTF("pred %zu %s in etable\n", g[u].index,
in_etable ? "is not" : "is");
continue;
- }
+ }
if (canEagerlyReportAtEod(build, e)) {
DEBUG_PRINTF("already done report for vertex %zu\n",
g[u].index);
continue;
- }
+ }
edge_list.push_back(e);
- }
-
+ }
+
const bool multiple_preds = edge_list.size() > 1;
for (const auto &e : edge_list) {
RoseVertex u = source(e, g);
@@ -3288,19 +3288,19 @@ void addEodAnchorProgram(const RoseBuildImpl &build, const build_context &bc,
u32 pred_state = bc.roleStateIndices.at(u);
pred_blocks[pred_state].add_block(
makeEodAnchorProgram(build, prog_build, e, multiple_preds));
- }
- }
-
+ }
+ }
+
addPredBlocks(pred_blocks, bc.roleStateIndices.size(), program);
-}
-
-static
+}
+
+static
void addEodEventProgram(const RoseBuildImpl &build, build_context &bc,
ProgramBuild &prog_build, RoseProgram &program) {
if (build.eod_event_literal_id == MO_INVALID_IDX) {
return;
}
-
+
const RoseGraph &g = build.g;
const auto &lit_info = build.literal_info.at(build.eod_event_literal_id);
assert(lit_info.delayed_ids.empty());
@@ -3312,44 +3312,44 @@ void addEodEventProgram(const RoseBuildImpl &build, build_context &bc,
for (const auto &v : lit_info.vertices) {
for (const auto &e : in_edges_range(v, g)) {
edge_list.push_back(e);
- }
+ }
}
-
+
// Sort edge list for determinism, prettiness.
sort(begin(edge_list), end(edge_list),
[&g](const RoseEdge &a, const RoseEdge &b) {
return tie(g[source(a, g)].index, g[target(a, g)].index) <
tie(g[source(b, g)].index, g[target(b, g)].index);
});
-
+
auto block = makeLiteralProgram(build, bc.leftfix_info, bc.suffixes,
bc.engine_info_by_queue,
bc.roleStateIndices, prog_build,
build.eod_event_literal_id, edge_list,
false);
program.add_block(move(block));
-}
-
-static
+}
+
+static
RoseProgram makeEodProgram(const RoseBuildImpl &build, build_context &bc,
ProgramBuild &prog_build, u32 eodNfaIterOffset) {
RoseProgram program;
-
+
addEodEventProgram(build, bc, prog_build, program);
addEnginesEodProgram(eodNfaIterOffset, program);
addEodAnchorProgram(build, bc, prog_build, false, program);
if (hasEodMatcher(build)) {
addMatcherEodProgram(program);
- }
+ }
addEodAnchorProgram(build, bc, prog_build, true, program);
if (hasEodAnchoredSuffix(build)) {
addSuffixesEodProgram(program);
}
-
+
return program;
-}
-
-static
+}
+
+static
RoseProgram makeFlushCombProgram(const RoseEngine &t) {
RoseProgram program;
if (t.ckeyCount) {
@@ -3368,159 +3368,159 @@ RoseProgram makeLastFlushCombProgram(const RoseEngine &t) {
}
static
-u32 history_required(const rose_literal_id &key) {
- if (key.msk.size() < key.s.length()) {
- return key.elength() - 1;
- } else {
- return key.msk.size() + key.delay - 1;
- }
-}
-
-static
-void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) {
- const RoseGraph &g = build.g;
-
- engine->floatingDistance = 0;
- engine->floatingMinDistance = ROSE_BOUND_INF;
- engine->anchoredDistance = 0;
- engine->maxFloatingDelayedMatch = 0;
- u32 delayRebuildLength = 0;
- engine->smallBlockDistance = 0;
-
- for (auto v : vertices_range(g)) {
- if (g[v].literals.empty()) {
- continue;
- }
-
- assert(g[v].min_offset < ROSE_BOUND_INF); // cannot == ROSE_BOUND_INF
- assert(g[v].min_offset <= g[v].max_offset);
-
- for (u32 lit_id : g[v].literals) {
+u32 history_required(const rose_literal_id &key) {
+ if (key.msk.size() < key.s.length()) {
+ return key.elength() - 1;
+ } else {
+ return key.msk.size() + key.delay - 1;
+ }
+}
+
+static
+void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) {
+ const RoseGraph &g = build.g;
+
+ engine->floatingDistance = 0;
+ engine->floatingMinDistance = ROSE_BOUND_INF;
+ engine->anchoredDistance = 0;
+ engine->maxFloatingDelayedMatch = 0;
+ u32 delayRebuildLength = 0;
+ engine->smallBlockDistance = 0;
+
+ for (auto v : vertices_range(g)) {
+ if (g[v].literals.empty()) {
+ continue;
+ }
+
+ assert(g[v].min_offset < ROSE_BOUND_INF); // cannot == ROSE_BOUND_INF
+ assert(g[v].min_offset <= g[v].max_offset);
+
+ for (u32 lit_id : g[v].literals) {
const rose_literal_id &key = build.literals.at(lit_id);
- u32 max_d = g[v].max_offset;
- u32 min_d = g[v].min_offset;
-
+ u32 max_d = g[v].max_offset;
+ u32 min_d = g[v].min_offset;
+
DEBUG_PRINTF("checking %u: elen %zu min/max %u/%u\n", lit_id,
key.elength_including_mask(), min_d, max_d);
- if (build.literal_info[lit_id].undelayed_id != lit_id) {
- /* this is a delayed match; need to update delay properties */
- /* TODO: can delayed literals ever be in another table ? */
- if (key.table == ROSE_FLOATING) {
- ENSURE_AT_LEAST(&engine->maxFloatingDelayedMatch, max_d);
- ENSURE_AT_LEAST(&delayRebuildLength, history_required(key));
- }
- }
-
- /* for the FloatingDistances we need the true max depth of the
- string */
- if (max_d != ROSE_BOUND_INF && key.table != ROSE_ANCHORED) {
- assert(max_d >= key.delay);
- max_d -= key.delay;
- }
-
- switch (key.table) {
- case ROSE_FLOATING:
- ENSURE_AT_LEAST(&engine->floatingDistance, max_d);
+ if (build.literal_info[lit_id].undelayed_id != lit_id) {
+ /* this is a delayed match; need to update delay properties */
+ /* TODO: can delayed literals ever be in another table ? */
+ if (key.table == ROSE_FLOATING) {
+ ENSURE_AT_LEAST(&engine->maxFloatingDelayedMatch, max_d);
+ ENSURE_AT_LEAST(&delayRebuildLength, history_required(key));
+ }
+ }
+
+ /* for the FloatingDistances we need the true max depth of the
+ string */
+ if (max_d != ROSE_BOUND_INF && key.table != ROSE_ANCHORED) {
+ assert(max_d >= key.delay);
+ max_d -= key.delay;
+ }
+
+ switch (key.table) {
+ case ROSE_FLOATING:
+ ENSURE_AT_LEAST(&engine->floatingDistance, max_d);
if (min_d >= key.elength_including_mask()) {
- LIMIT_TO_AT_MOST(&engine->floatingMinDistance,
+ LIMIT_TO_AT_MOST(&engine->floatingMinDistance,
min_d - (u32)key.elength_including_mask());
- } else {
- /* overlapped literals from rose + anchored table can
- * cause us to underflow due to sloppiness in
- * estimates */
- engine->floatingMinDistance = 0;
- }
- break;
- case ROSE_ANCHORED_SMALL_BLOCK:
- ENSURE_AT_LEAST(&engine->smallBlockDistance, max_d);
- break;
- case ROSE_ANCHORED:
- ENSURE_AT_LEAST(&engine->anchoredDistance, max_d);
- break;
- case ROSE_EOD_ANCHORED:
- // EOD anchored literals are in another table, so they
- // don't contribute to these calculations.
- break;
- case ROSE_EVENT:
- break; // Not a real literal.
- }
- }
- }
-
- // Floating literals go in the small block table too.
- ENSURE_AT_LEAST(&engine->smallBlockDistance, engine->floatingDistance);
-
- // Clipped by its very nature.
- LIMIT_TO_AT_MOST(&engine->smallBlockDistance, 32U);
-
- engine->delayRebuildLength = delayRebuildLength;
-
- DEBUG_PRINTF("anchoredDistance = %u\n", engine->anchoredDistance);
- DEBUG_PRINTF("floatingDistance = %u\n", engine->floatingDistance);
- DEBUG_PRINTF("smallBlockDistance = %u\n", engine->smallBlockDistance);
- assert(engine->anchoredDistance <= build.cc.grey.maxAnchoredRegion);
-
- /* anchored->floating squash literals may lower floating min distance */
- /* TODO: find actual value */
- if (!engine->anchoredDistance) {
- return;
- }
-}
-
+ } else {
+ /* overlapped literals from rose + anchored table can
+ * cause us to underflow due to sloppiness in
+ * estimates */
+ engine->floatingMinDistance = 0;
+ }
+ break;
+ case ROSE_ANCHORED_SMALL_BLOCK:
+ ENSURE_AT_LEAST(&engine->smallBlockDistance, max_d);
+ break;
+ case ROSE_ANCHORED:
+ ENSURE_AT_LEAST(&engine->anchoredDistance, max_d);
+ break;
+ case ROSE_EOD_ANCHORED:
+ // EOD anchored literals are in another table, so they
+ // don't contribute to these calculations.
+ break;
+ case ROSE_EVENT:
+ break; // Not a real literal.
+ }
+ }
+ }
+
+ // Floating literals go in the small block table too.
+ ENSURE_AT_LEAST(&engine->smallBlockDistance, engine->floatingDistance);
+
+ // Clipped by its very nature.
+ LIMIT_TO_AT_MOST(&engine->smallBlockDistance, 32U);
+
+ engine->delayRebuildLength = delayRebuildLength;
+
+ DEBUG_PRINTF("anchoredDistance = %u\n", engine->anchoredDistance);
+ DEBUG_PRINTF("floatingDistance = %u\n", engine->floatingDistance);
+ DEBUG_PRINTF("smallBlockDistance = %u\n", engine->smallBlockDistance);
+ assert(engine->anchoredDistance <= build.cc.grey.maxAnchoredRegion);
+
+ /* anchored->floating squash literals may lower floating min distance */
+ /* TODO: find actual value */
+ if (!engine->anchoredDistance) {
+ return;
+ }
+}
+
static
u32 writeEagerQueueIter(const set<u32> &eager, u32 leftfixBeginQueue,
u32 queue_count, RoseEngineBlob &engine_blob) {
if (eager.empty()) {
return 0;
- }
-
+ }
+
vector<u32> vec;
for (u32 q : eager) {
assert(q >= leftfixBeginQueue);
vec.push_back(q - leftfixBeginQueue);
}
-
+
auto iter = mmbBuildSparseIterator(vec, queue_count - leftfixBeginQueue);
return engine_blob.add_iterator(iter);
}
-
+
static
bytecode_ptr<RoseEngine> addSmallWriteEngine(const RoseBuildImpl &build,
const RoseResources &res,
bytecode_ptr<RoseEngine> rose) {
assert(rose);
-
+
if (roseIsPureLiteral(rose.get())) {
DEBUG_PRINTF("pure literal case, not adding smwr\n");
return rose;
- }
-
+ }
+
u32 qual = roseQuality(res, rose.get());
auto smwr_engine = build.smwr.build(qual);
if (!smwr_engine) {
DEBUG_PRINTF("no smwr built\n");
return rose;
- }
-
+ }
+
const size_t mainSize = rose.size();
const size_t smallWriteSize = smwr_engine.size();
DEBUG_PRINTF("adding smwr engine, size=%zu\n", smallWriteSize);
-
+
const size_t smwrOffset = ROUNDUP_CL(mainSize);
const size_t newSize = smwrOffset + smallWriteSize;
-
+
auto rose2 = make_zeroed_bytecode_ptr<RoseEngine>(newSize, 64);
char *ptr = (char *)rose2.get();
memcpy(ptr, rose.get(), mainSize);
memcpy(ptr + smwrOffset, smwr_engine.get(), smallWriteSize);
-
+
rose2->smallWriteOffset = verify_u32(smwrOffset);
rose2->size = verify_u32(newSize);
-
+
return rose2;
}
-
+
/**
* \brief Returns the pair (number of literals, max length) for all real
* literals in the floating table that are in-use.
@@ -3529,10 +3529,10 @@ static
pair<size_t, size_t> floatingCountAndMaxLen(const RoseBuildImpl &build) {
size_t num = 0;
size_t max_len = 0;
-
+
for (u32 id = 0; id < build.literals.size(); id++) {
const rose_literal_id &lit = build.literals.at(id);
-
+
if (lit.table != ROSE_FLOATING) {
continue;
}
@@ -3544,31 +3544,31 @@ pair<size_t, size_t> floatingCountAndMaxLen(const RoseBuildImpl &build) {
if (!isUsedLiteral(build, id)) {
continue;
}
-
+
num++;
max_len = max(max_len, lit.s.length());
- }
+ }
DEBUG_PRINTF("%zu floating literals with max_len=%zu\n", num, max_len);
return {num, max_len};
}
-
+
size_t calcLongLitThreshold(const RoseBuildImpl &build,
const size_t historyRequired) {
const auto &cc = build.cc;
-
+
// In block mode, we don't have history, so we don't need long literal
// support and can just use "medium-length" literal confirm. TODO: we could
// specialize further and have a block mode literal confirm instruction.
if (!cc.streaming) {
return SIZE_MAX;
- }
-
+ }
+
size_t longLitLengthThreshold = ROSE_LONG_LITERAL_THRESHOLD_MIN;
-
+
// Expand to size of history we've already allocated. Note that we need N-1
// bytes of history to match a literal of length N.
longLitLengthThreshold = max(longLitLengthThreshold, historyRequired + 1);
-
+
// If we only have one literal, allow for a larger value in order to avoid
// building a long literal table for a trivial Noodle case that we could
// fit in history.
@@ -3579,15 +3579,15 @@ size_t calcLongLitThreshold(const RoseBuildImpl &build,
num_len.second);
longLitLengthThreshold = num_len.second;
}
- }
-
+ }
+
// Clamp to max history available.
longLitLengthThreshold =
min(longLitLengthThreshold, size_t{cc.grey.maxHistoryAvailable} + 1);
-
+
return longLitLengthThreshold;
}
-
+
static
map<left_id, u32> makeLeftQueueMap(const RoseGraph &g,
const map<RoseVertex, left_build_info> &leftfix_info) {
@@ -3601,18 +3601,18 @@ map<left_id, u32> makeLeftQueueMap(const RoseGraph &g,
left_id left(g[e.first].left);
assert(!contains(lqm, left) || lqm[left] == e.second.queue);
lqm[left] = e.second.queue;
- }
-
+ }
+
return lqm;
}
-
+
bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
// We keep all our offsets, counts etc. in a prototype RoseEngine which we
// will copy into the real one once it is allocated: we can't do this
// until we know how big it will be.
RoseEngine proto;
memset(&proto, 0, sizeof(proto));
-
+
// Set scanning mode.
if (!cc.streaming) {
proto.mode = HS_MODE_BLOCK;
@@ -3621,29 +3621,29 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
} else {
proto.mode = HS_MODE_STREAM;
}
-
+
DerivedBoundaryReports dboundary(boundary);
-
+
size_t historyRequired = calcHistoryRequired(); // Updated by HWLM.
size_t longLitLengthThreshold = calcLongLitThreshold(*this,
historyRequired);
DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold);
-
+
vector<LitFragment> fragments = groupByFragment(*this);
-
+
auto anchored_dfas = buildAnchoredDfas(*this, fragments);
-
+
build_context bc;
u32 floatingMinLiteralMatchOffset
= findMinFloatingLiteralMatch(*this, anchored_dfas);
recordResources(bc.resources, *this, anchored_dfas, fragments);
bc.needs_mpv_catchup = needsMpvCatchup(*this);
-
+
makeBoundaryPrograms(*this, bc, boundary, dboundary, proto.boundary);
-
+
tie(proto.reportProgramOffset, proto.reportProgramCount) =
buildReportPrograms(*this, bc);
-
+
// Build NFAs
bool mpv_as_outfix;
prepMpv(*this, bc, &historyRequired, &mpv_as_outfix);
@@ -3653,10 +3653,10 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
}
proto.outfixEndQueue = qif.allocated_count();
proto.leftfixBeginQueue = proto.outfixEndQueue;
-
+
set<u32> no_retrigger_queues;
set<u32> eager_queues;
-
+
/* Note: buildNfas may reduce the lag for vertices that have prefixes */
if (!buildNfas(*this, bc, qif, &no_retrigger_queues, &eager_queues,
&proto.leftfixBeginQueue)) {
@@ -3664,76 +3664,76 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
}
u32 eodNfaIterOffset = buildEodNfaIterator(bc, proto.leftfixBeginQueue);
buildCountingMiracles(bc);
-
+
u32 queue_count = qif.allocated_count(); /* excludes anchored matcher q;
* som rev nfas */
if (queue_count > cc.grey.limitRoseEngineCount) {
throw ResourceLimitError();
}
-
+
// Enforce role table resource limit.
if (num_vertices(g) > cc.grey.limitRoseRoleCount) {
throw ResourceLimitError();
}
-
+
bc.roleStateIndices = assignStateIndices(*this);
-
+
u32 laggedRoseCount = 0;
vector<LeftNfaInfo> leftInfoTable;
buildLeftInfoTable(*this, bc, eager_queues, proto.leftfixBeginQueue,
queue_count - proto.leftfixBeginQueue, leftInfoTable,
&laggedRoseCount, &historyRequired);
-
+
// Information only needed for program construction.
ProgramBuild prog_build(floatingMinLiteralMatchOffset,
longLitLengthThreshold, needsCatchup(*this));
prog_build.vertex_group_map = getVertexGroupMap(*this);
prog_build.squashable_groups = getSquashableGroups(*this);
-
+
tie(proto.anchoredProgramOffset, proto.anchored_count) =
writeAnchoredPrograms(*this, fragments, bc, prog_build);
-
+
tie(proto.delayProgramOffset, proto.delay_count) =
writeDelayPrograms(*this, fragments, bc, prog_build);
-
+
// Build floating HWLM matcher prototype.
rose_group fgroups = 0;
auto fproto = buildFloatingMatcherProto(*this, fragments,
longLitLengthThreshold,
&fgroups, &historyRequired);
-
+
// Build delay rebuild HWLM matcher prototype.
auto drproto = buildDelayRebuildMatcherProto(*this, fragments,
longLitLengthThreshold);
-
+
// Build EOD-anchored HWLM matcher prototype.
auto eproto = buildEodAnchoredMatcherProto(*this, fragments);
-
+
// Build small-block HWLM matcher prototype.
auto sbproto = buildSmallBlockMatcherProto(*this, fragments);
-
+
buildLiteralPrograms(*this, fragments, bc, prog_build, fproto.get(),
drproto.get(), eproto.get(), sbproto.get());
-
+
auto eod_prog = makeEodProgram(*this, bc, prog_build, eodNfaIterOffset);
proto.eodProgramOffset = writeProgram(bc, move(eod_prog));
-
+
size_t longLitStreamStateRequired = 0;
proto.longLitTableOffset
= buildLongLiteralTable(*this, bc.engine_blob, bc.longLiterals,
longLitLengthThreshold, &historyRequired,
&longLitStreamStateRequired);
-
+
proto.lastByteHistoryIterOffset = buildLastByteIter(g, bc);
proto.eagerIterOffset = writeEagerQueueIter(
eager_queues, proto.leftfixBeginQueue, queue_count, bc.engine_blob);
-
+
addSomRevNfas(bc, proto, ssm);
-
+
writeDkeyInfo(rm, bc.engine_blob, proto);
writeLeftInfo(bc.engine_blob, proto, leftInfoTable);
writeLogicalInfo(rm, bc.engine_blob, proto);
-
+
auto flushComb_prog = makeFlushCombProgram(proto);
proto.flushCombProgramOffset = writeProgram(bc, move(flushComb_prog));
@@ -3743,105 +3743,105 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
// Build anchored matcher.
auto atable = buildAnchoredMatcher(*this, fragments, anchored_dfas);
- if (atable) {
+ if (atable) {
proto.amatcherOffset = bc.engine_blob.add(atable);
- }
+ }
// Build floating HWLM matcher.
auto ftable = buildHWLMMatcher(*this, fproto.get());
- if (ftable) {
+ if (ftable) {
proto.fmatcherOffset = bc.engine_blob.add(ftable);
bc.resources.has_floating = true;
- }
+ }
// Build delay rebuild HWLM matcher.
auto drtable = buildHWLMMatcher(*this, drproto.get());
if (drtable) {
proto.drmatcherOffset = bc.engine_blob.add(drtable);
- }
+ }
// Build EOD-anchored HWLM matcher.
auto etable = buildHWLMMatcher(*this, eproto.get());
- if (etable) {
+ if (etable) {
proto.ematcherOffset = bc.engine_blob.add(etable);
- }
+ }
// Build small-block HWLM matcher.
auto sbtable = buildHWLMMatcher(*this, sbproto.get());
- if (sbtable) {
+ if (sbtable) {
proto.sbmatcherOffset = bc.engine_blob.add(sbtable);
- }
-
+ }
+
proto.activeArrayCount = proto.leftfixBeginQueue;
-
+
proto.anchorStateSize = atable ? anchoredStateSize(*atable) : 0;
-
+
DEBUG_PRINTF("rose history required %zu\n", historyRequired);
assert(!cc.streaming || historyRequired <= cc.grey.maxHistoryAvailable);
-
+
// Some SOM schemes (reverse NFAs, for example) may require more history.
historyRequired = max(historyRequired, (size_t)ssm.somHistoryRequired());
-
+
assert(!cc.streaming || historyRequired <=
max(cc.grey.maxHistoryAvailable, cc.grey.somMaxRevNfaLength));
-
+
fillStateOffsets(*this, bc.roleStateIndices.size(), proto.anchorStateSize,
proto.activeArrayCount, proto.activeLeftCount,
laggedRoseCount, longLitStreamStateRequired,
historyRequired, &proto.stateOffsets);
-
+
// Write in NfaInfo structures. This will also update state size
// information in proto.
writeNfaInfo(*this, bc, proto, no_retrigger_queues);
-
+
scatter_plan_raw state_scatter = buildStateScatterPlan(
sizeof(u8), bc.roleStateIndices.size(), proto.activeLeftCount,
proto.rosePrefixCount, proto.stateOffsets, cc.streaming,
proto.activeArrayCount, proto.outfixBeginQueue, proto.outfixEndQueue);
-
+
u32 currOffset; /* relative to base of RoseEngine */
if (!bc.engine_blob.empty()) {
currOffset = bc.engine_blob.base_offset + bc.engine_blob.size();
} else {
currOffset = sizeof(RoseEngine);
}
-
+
currOffset = ROUNDUP_CL(currOffset);
DEBUG_PRINTF("currOffset %u\n", currOffset);
-
+
currOffset = ROUNDUP_N(currOffset, alignof(scatter_unit_u64a));
u32 state_scatter_aux_offset = currOffset;
currOffset += aux_size(state_scatter);
-
+
proto.historyRequired = verify_u32(historyRequired);
proto.ekeyCount = rm.numEkeys();
-
+
proto.somHorizon = ssm.somPrecision();
proto.somLocationCount = ssm.numSomSlots();
proto.somLocationFatbitSize = fatbit_size(proto.somLocationCount);
-
+
proto.runtimeImpl = pickRuntimeImpl(*this, bc.resources,
proto.outfixEndQueue);
proto.mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this);
-
+
proto.queueCount = queue_count;
proto.activeQueueArraySize = fatbit_size(queue_count);
proto.handledKeyCount = prog_build.handledKeys.size();
proto.handledKeyFatbitSize = fatbit_size(proto.handledKeyCount);
-
+
proto.rolesWithStateCount = bc.roleStateIndices.size();
-
+
proto.initMpvNfa = mpv_as_outfix ? 0 : MO_INVALID_IDX;
proto.stateSize = mmbit_size(bc.roleStateIndices.size());
-
+
proto.delay_fatbit_size = fatbit_size(proto.delay_count);
proto.anchored_fatbit_size = fatbit_size(proto.anchored_count);
-
- // The Small Write matcher is (conditionally) added to the RoseEngine in
- // another pass by the caller. Set to zero (meaning no SMWR engine) for
- // now.
+
+ // The Small Write matcher is (conditionally) added to the RoseEngine in
+ // another pass by the caller. Set to zero (meaning no SMWR engine) for
+ // now.
proto.smallWriteOffset = 0;
-
+
proto.amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED);
proto.fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING);
proto.eodmatcherMinWidth = findMinWidth(*this, ROSE_EOD_ANCHORED);
@@ -3850,47 +3850,47 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
proto.minWidth = hasBoundaryReports(boundary) ? 0 : minWidth;
proto.minWidthExcludingBoundaries = minWidth;
proto.floatingMinLiteralMatchOffset = floatingMinLiteralMatchOffset;
-
+
proto.maxBiAnchoredWidth = findMaxBAWidth(*this);
proto.noFloatingRoots = hasNoFloatingRoots();
proto.requiresEodCheck = hasEodAnchors(*this, bc, proto.outfixEndQueue);
proto.hasOutfixesInSmallBlock = hasNonSmallBlockOutfix(outfixes);
proto.canExhaust = rm.patternSetCanExhaust();
proto.hasSom = hasSom;
-
- /* populate anchoredDistance, floatingDistance, floatingMinDistance, etc */
+
+ /* populate anchoredDistance, floatingDistance, floatingMinDistance, etc */
fillMatcherDistances(*this, &proto);
-
+
proto.initialGroups = getInitialGroups();
proto.floating_group_mask = fgroups;
proto.totalNumLiterals = verify_u32(literal_info.size());
proto.asize = verify_u32(atable.size());
proto.ematcherRegionSize = ematcher_region_size;
-
+
proto.size = currOffset;
-
+
// Time to allocate the real RoseEngine structure, at cacheline alignment.
auto engine = make_zeroed_bytecode_ptr<RoseEngine>(currOffset, 64);
assert(engine); // will have thrown bad_alloc otherwise.
-
+
// Copy in our prototype engine data.
memcpy(engine.get(), &proto, sizeof(proto));
-
+
write_out(&engine->state_init, (char *)engine.get(), state_scatter,
state_scatter_aux_offset);
-
+
// Copy in the engine blob.
bc.engine_blob.write_bytes(engine.get());
-
+
// Add a small write engine if appropriate.
engine = addSmallWriteEngine(*this, bc.resources, move(engine));
-
+
DEBUG_PRINTF("rose done %p\n", engine.get());
-
+
dumpRose(*this, fragments, makeLeftQueueMap(g, bc.leftfix_info),
bc.suffixes, engine.get());
-
- return engine;
-}
-
-} // namespace ue2
+
+ return engine;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_compile.cpp b/contrib/libs/hyperscan/src/rose/rose_build_compile.cpp
index a9cd2b95df..1cf3bbe695 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_compile.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_compile.cpp
@@ -1,151 +1,151 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_impl.h"
-
-#include "grey.h"
-#include "hs_internal.h"
-#include "rose_build_anchored.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_impl.h"
+
+#include "grey.h"
+#include "hs_internal.h"
+#include "rose_build_anchored.h"
#include "rose_build_castle.h"
-#include "rose_build_convert.h"
-#include "rose_build_dump.h"
+#include "rose_build_convert.h"
+#include "rose_build_dump.h"
#include "rose_build_groups.h"
#include "rose_build_matchers.h"
-#include "rose_build_merge.h"
-#include "rose_build_role_aliasing.h"
-#include "rose_build_util.h"
-#include "ue2common.h"
+#include "rose_build_merge.h"
+#include "rose_build_role_aliasing.h"
+#include "rose_build_util.h"
+#include "ue2common.h"
#include "hwlm/hwlm_literal.h"
-#include "nfa/nfa_internal.h"
-#include "nfa/rdfa.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_execute.h"
-#include "nfagraph/ng_is_equal.h"
-#include "nfagraph/ng_limex.h"
-#include "nfagraph/ng_mcclellan.h"
+#include "nfa/nfa_internal.h"
+#include "nfa/rdfa.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_execute.h"
+#include "nfagraph/ng_is_equal.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_mcclellan.h"
#include "nfagraph/ng_prune.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_reports.h"
-#include "nfagraph/ng_stop.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_width.h"
-#include "util/bitutils.h"
-#include "util/charreach.h"
-#include "util/charreach_util.h"
-#include "util/compare.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_reports.h"
+#include "nfagraph/ng_stop.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
+#include "util/bitutils.h"
+#include "util/charreach.h"
+#include "util/charreach_util.h"
+#include "util/compare.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
-#include "util/order_check.h"
-#include "util/report_manager.h"
-#include "util/ue2string.h"
-#include "util/verify_types.h"
-
-#include <algorithm>
-#include <functional>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-#include <utility>
-
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_values;
-
-namespace ue2 {
-
-#define ANCHORED_REHOME_MIN_FLOATING 800
-#define ANCHORED_REHOME_MIN_FLOATING_SHORT 50
-#define ANCHORED_REHOME_ALLOW_SHORT 20
-#define ANCHORED_REHOME_DEEP 25
-#define ANCHORED_REHOME_SHORT_LEN 3
-
+#include "util/graph_range.h"
+#include "util/order_check.h"
+#include "util/report_manager.h"
+#include "util/ue2string.h"
+#include "util/verify_types.h"
+
+#include <algorithm>
+#include <functional>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+#include <utility>
+
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_values;
+
+namespace ue2 {
+
+#define ANCHORED_REHOME_MIN_FLOATING 800
+#define ANCHORED_REHOME_MIN_FLOATING_SHORT 50
+#define ANCHORED_REHOME_ALLOW_SHORT 20
+#define ANCHORED_REHOME_DEEP 25
+#define ANCHORED_REHOME_SHORT_LEN 3
+
#define MAX_EXPLOSION_NC 3
-static
+static
bool limited_explosion(const ue2_literal &s) {
u32 nc_count = 0;
-
+
for (const auto &e : s) {
if (e.nocase) {
nc_count++;
- }
- }
-
+ }
+ }
+
return nc_count <= MAX_EXPLOSION_NC;
-}
-
-static
+}
+
+static
void removeLiteralFromGraph(RoseBuildImpl &build, u32 id) {
assert(id < build.literal_info.size());
auto &info = build.literal_info.at(id);
for (const auto &v : info.vertices) {
build.g[v].literals.erase(id);
- }
+ }
info.vertices.clear();
-}
-
+}
+
/**
* \brief Replace the given mixed-case literal with the set of its caseless
* variants.
*/
-static
+static
void explodeLiteral(RoseBuildImpl &build, u32 id) {
const auto &lit = build.literals.at(id);
auto &info = build.literal_info[id];
-
+
assert(!info.group_mask); // not set yet
assert(info.undelayed_id == id); // we do not explode delayed literals
-
+
for (auto it = caseIterateBegin(lit.s); it != caseIterateEnd(); ++it) {
ue2_literal new_str(*it, false);
-
+
if (!maskIsConsistent(new_str.get_string(), false, lit.msk, lit.cmp)) {
DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n");
- continue;
- }
-
+ continue;
+ }
+
u32 new_id =
build.getLiteralId(new_str, lit.msk, lit.cmp, lit.delay, lit.table);
-
+
DEBUG_PRINTF("adding exploded lit %u: '%s'\n", new_id,
dumpString(new_str).c_str());
-
+
const auto &new_lit = build.literals.at(new_id);
auto &new_info = build.literal_info.at(new_id);
insert(&new_info.vertices, info.vertices);
for (const auto &v : info.vertices) {
build.g[v].literals.insert(new_id);
- }
-
+ }
+
build.literal_info[new_id].undelayed_id = new_id;
if (!info.delayed_ids.empty()) {
flat_set<u32> &del_ids = new_info.delayed_ids;
@@ -156,35 +156,35 @@ void explodeLiteral(RoseBuildImpl &build, u32 id) {
dlit.delay, dlit.table);
del_ids.insert(new_delay_id);
build.literal_info[new_delay_id].undelayed_id = new_id;
- }
- }
- }
-
+ }
+ }
+ }
+
// Remove the old literal and any old delay variants.
removeLiteralFromGraph(build, id);
for (u32 delay_id : info.delayed_ids) {
removeLiteralFromGraph(build, delay_id);
- }
+ }
info.delayed_ids.clear();
-}
-
-void RoseBuildImpl::handleMixedSensitivity(void) {
+}
+
+void RoseBuildImpl::handleMixedSensitivity(void) {
vector<u32> explode;
for (u32 id = 0; id < literals.size(); id++) {
const rose_literal_id &lit = literals.at(id);
-
- if (lit.delay) {
- continue; /* delay id's are virtual-ish */
- }
-
- if (lit.table == ROSE_ANCHORED || lit.table == ROSE_EVENT) {
- continue; /* wrong table */
- }
-
- if (!mixed_sensitivity(lit.s)) {
- continue;
- }
-
+
+ if (lit.delay) {
+ continue; /* delay id's are virtual-ish */
+ }
+
+ if (lit.table == ROSE_ANCHORED || lit.table == ROSE_EVENT) {
+ continue; /* wrong table */
+ }
+
+ if (!mixed_sensitivity(lit.s)) {
+ continue;
+ }
+
// We don't want to explode long literals, as they require confirmation
// with a CHECK_LONG_LIT instruction and need unique final_ids.
// TODO: we could allow explosion for literals where the prefixes
@@ -192,226 +192,226 @@ void RoseBuildImpl::handleMixedSensitivity(void) {
if (lit.s.length() <= ROSE_LONG_LITERAL_THRESHOLD_MIN &&
limited_explosion(lit.s) && literal_info[id].delayed_ids.empty()) {
- DEBUG_PRINTF("need to explode existing string '%s'\n",
- dumpString(lit.s).c_str());
+ DEBUG_PRINTF("need to explode existing string '%s'\n",
+ dumpString(lit.s).c_str());
explode.push_back(id);
- } else {
- literal_info[id].requires_benefits = true;
- }
- }
+ } else {
+ literal_info[id].requires_benefits = true;
+ }
+ }
for (u32 id : explode) {
explodeLiteral(*this, id);
}
-}
-
-// Returns the length of the longest prefix of s that is (a) also a suffix of s
-// and (b) not s itself.
-static
-size_t maxPeriod(const ue2_literal &s) {
- /* overly conservative if only part of the string is nocase */
- if (s.empty()) {
- return 0;
- }
-
- const size_t len = s.length();
- const char *begin = s.c_str(), *end = begin + len;
- size_t i;
- for (i = len - 1; i != 0; i--) {
- if (!cmp(begin, end - i, i, s.any_nocase())) {
- break;
- }
- }
-
- return i;
-}
-
-bool RoseBuildImpl::isPseudoStar(const RoseEdge &e) const {
- return !g[e].minBound && isPseudoStarOrFirstOnly(e);
-}
-
-bool RoseBuildImpl::isPseudoStarOrFirstOnly(const RoseEdge &e) const {
- RoseVertex u = source(e, g);
- RoseVertex v = target(e, g);
-
- if (g[e].maxBound != ROSE_BOUND_INF) {
- return false;
- }
-
- if (isAnyStart(u)) {
- return true;
- }
-
- if (isAnchored(u)) {
- /* anchored table runs out of order */
- return false;
- }
-
- if (hasDelayedLiteral(u)) {
- return false;
- }
-
- if (g[v].left) {
- return false;
- }
-
- if (g[v].eod_accept) {
- return true;
- }
-
- assert(!g[v].literals.empty());
- if (maxLiteralOverlap(u, v)) {
- return false;
- }
-
- return true;
-}
-
-bool RoseBuildImpl::hasOnlyPseudoStarInEdges(RoseVertex v) const {
- for (const auto &e : in_edges_range(v, g)) {
- if (!isPseudoStar(e)) {
- return false;
- }
- }
- return true;
-}
-
-static
-size_t trailerDueToSelf(const rose_literal_id &lit) {
- size_t trailer = lit.s.length() - maxPeriod(lit.s);
- if (trailer > 255) {
- return 255;
- }
- if (!trailer) {
- return 1;
- }
- return trailer;
-}
-
-static
-RoseRoleHistory findHistoryScheme(const RoseBuildImpl &tbi, const RoseEdge &e) {
- const RoseGraph &g = tbi.g;
- const RoseVertex u = source(e, g); /* pred role */
- const RoseVertex v = target(e, g); /* current role */
-
+}
+
+// Returns the length of the longest prefix of s that is (a) also a suffix of s
+// and (b) not s itself.
+static
+size_t maxPeriod(const ue2_literal &s) {
+ /* overly conservative if only part of the string is nocase */
+ if (s.empty()) {
+ return 0;
+ }
+
+ const size_t len = s.length();
+ const char *begin = s.c_str(), *end = begin + len;
+ size_t i;
+ for (i = len - 1; i != 0; i--) {
+ if (!cmp(begin, end - i, i, s.any_nocase())) {
+ break;
+ }
+ }
+
+ return i;
+}
+
+bool RoseBuildImpl::isPseudoStar(const RoseEdge &e) const {
+ return !g[e].minBound && isPseudoStarOrFirstOnly(e);
+}
+
+bool RoseBuildImpl::isPseudoStarOrFirstOnly(const RoseEdge &e) const {
+ RoseVertex u = source(e, g);
+ RoseVertex v = target(e, g);
+
+ if (g[e].maxBound != ROSE_BOUND_INF) {
+ return false;
+ }
+
+ if (isAnyStart(u)) {
+ return true;
+ }
+
+ if (isAnchored(u)) {
+ /* anchored table runs out of order */
+ return false;
+ }
+
+ if (hasDelayedLiteral(u)) {
+ return false;
+ }
+
+ if (g[v].left) {
+ return false;
+ }
+
+ if (g[v].eod_accept) {
+ return true;
+ }
+
+ assert(!g[v].literals.empty());
+ if (maxLiteralOverlap(u, v)) {
+ return false;
+ }
+
+ return true;
+}
+
+bool RoseBuildImpl::hasOnlyPseudoStarInEdges(RoseVertex v) const {
+ for (const auto &e : in_edges_range(v, g)) {
+ if (!isPseudoStar(e)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static
+size_t trailerDueToSelf(const rose_literal_id &lit) {
+ size_t trailer = lit.s.length() - maxPeriod(lit.s);
+ if (trailer > 255) {
+ return 255;
+ }
+ if (!trailer) {
+ return 1;
+ }
+ return trailer;
+}
+
+static
+RoseRoleHistory findHistoryScheme(const RoseBuildImpl &tbi, const RoseEdge &e) {
+ const RoseGraph &g = tbi.g;
+ const RoseVertex u = source(e, g); /* pred role */
+ const RoseVertex v = target(e, g); /* current role */
+
DEBUG_PRINTF("find history for [%zu,%zu]\n", g[u].index, g[v].index);
- DEBUG_PRINTF("u has min_offset=%u, max_offset=%u\n", g[u].min_offset,
- g[u].max_offset);
-
- if (g[v].left) {
- if (!tbi.isAnyStart(u)) {
- /* infix nfa will track history, treat as pseudo .*. Note: rose lits
- * may overlap so rose history track would be wrong anyway */
- DEBUG_PRINTF("skipping history as prefix\n");
- return ROSE_ROLE_HISTORY_NONE;
- }
- if (g[e].minBound || g[e].maxBound != ROSE_BOUND_INF) {
- DEBUG_PRINTF("rose prefix with external bounds\n");
- return ROSE_ROLE_HISTORY_ANCH;
- } else {
- return ROSE_ROLE_HISTORY_NONE;
- }
- }
-
- // Handle EOD cases.
- if (g[v].eod_accept) {
- const u32 minBound = g[e].minBound, maxBound = g[e].maxBound;
- DEBUG_PRINTF("EOD edge with bounds [%u,%u]\n", minBound, maxBound);
-
- // Trivial case: we don't need history for {0,inf} bounds
- if (minBound == 0 && maxBound == ROSE_BOUND_INF) {
- return ROSE_ROLE_HISTORY_NONE;
- }
-
- // Event literals store no history.
- if (tbi.hasLiteralInTable(u, ROSE_EVENT)) {
- return ROSE_ROLE_HISTORY_NONE;
- }
-
- // Trivial case: fixed offset from anchor
- if (g[u].fixedOffset()) {
- return ROSE_ROLE_HISTORY_ANCH;
- }
-
- // If the bounds are {0,0}, this role can only match precisely at EOD.
- if (minBound == 0 && maxBound == 0) {
+ DEBUG_PRINTF("u has min_offset=%u, max_offset=%u\n", g[u].min_offset,
+ g[u].max_offset);
+
+ if (g[v].left) {
+ if (!tbi.isAnyStart(u)) {
+ /* infix nfa will track history, treat as pseudo .*. Note: rose lits
+ * may overlap so rose history track would be wrong anyway */
+ DEBUG_PRINTF("skipping history as prefix\n");
+ return ROSE_ROLE_HISTORY_NONE;
+ }
+ if (g[e].minBound || g[e].maxBound != ROSE_BOUND_INF) {
+ DEBUG_PRINTF("rose prefix with external bounds\n");
+ return ROSE_ROLE_HISTORY_ANCH;
+ } else {
+ return ROSE_ROLE_HISTORY_NONE;
+ }
+ }
+
+ // Handle EOD cases.
+ if (g[v].eod_accept) {
+ const u32 minBound = g[e].minBound, maxBound = g[e].maxBound;
+ DEBUG_PRINTF("EOD edge with bounds [%u,%u]\n", minBound, maxBound);
+
+ // Trivial case: we don't need history for {0,inf} bounds
+ if (minBound == 0 && maxBound == ROSE_BOUND_INF) {
+ return ROSE_ROLE_HISTORY_NONE;
+ }
+
+ // Event literals store no history.
+ if (tbi.hasLiteralInTable(u, ROSE_EVENT)) {
+ return ROSE_ROLE_HISTORY_NONE;
+ }
+
+ // Trivial case: fixed offset from anchor
+ if (g[u].fixedOffset()) {
+ return ROSE_ROLE_HISTORY_ANCH;
+ }
+
+ // If the bounds are {0,0}, this role can only match precisely at EOD.
+ if (minBound == 0 && maxBound == 0) {
/* last byte history will squash the state byte so cannot have other
* succ */
assert(out_degree(u, g) == 1);
- return ROSE_ROLE_HISTORY_LAST_BYTE;
- }
-
- // XXX: No other history schemes should be possible any longer.
- assert(0);
- }
-
- // Non-EOD cases.
-
- DEBUG_PRINTF("examining edge [%zu,%zu] with bounds {%u,%u}\n",
+ return ROSE_ROLE_HISTORY_LAST_BYTE;
+ }
+
+ // XXX: No other history schemes should be possible any longer.
+ assert(0);
+ }
+
+ // Non-EOD cases.
+
+ DEBUG_PRINTF("examining edge [%zu,%zu] with bounds {%u,%u}\n",
g[u].index, g[v].index, g[e].minBound, g[e].maxBound);
-
- if (tbi.isAnchored(v)) {
- // Matches for literals in the anchored table will always arrive at the
- // right offsets, so there's no need for history-based confirmation.
- DEBUG_PRINTF("v in anchored table, no need for history\n");
- assert(u == tbi.anchored_root);
- return ROSE_ROLE_HISTORY_NONE;
- }
-
+
+ if (tbi.isAnchored(v)) {
+ // Matches for literals in the anchored table will always arrive at the
+ // right offsets, so there's no need for history-based confirmation.
+ DEBUG_PRINTF("v in anchored table, no need for history\n");
+ assert(u == tbi.anchored_root);
+ return ROSE_ROLE_HISTORY_NONE;
+ }
+
if (g[u].fixedOffset() &&
(g[e].minBound || g[e].maxBound != ROSE_BOUND_INF)) {
- DEBUG_PRINTF("fixed offset -> anch\n");
- return ROSE_ROLE_HISTORY_ANCH;
- }
-
- return ROSE_ROLE_HISTORY_NONE;
-}
-
-static
-void assignHistories(RoseBuildImpl &tbi) {
- for (const auto &e : edges_range(tbi.g)) {
- if (tbi.g[e].history == ROSE_ROLE_HISTORY_INVALID) {
- tbi.g[e].history = findHistoryScheme(tbi, e);
- }
- }
-}
-
-bool RoseBuildImpl::isDirectReport(u32 id) const {
- assert(id < literal_info.size());
-
- // Literal info properties.
- const rose_literal_info &info = literal_info[id];
- if (info.vertices.empty()) {
- return false;
- }
-
- if (!info.delayed_ids.empty() /* dr's don't set groups */
- || info.requires_benefits) { /* dr's don't require confirm */
- return false;
- }
-
- if (isDelayed(id)) { /* can't handle delayed dr atm as we require delay
- * ids to be dense */
- return false;
- }
-
- // Role properties.
-
- // Note that a literal can have multiple roles and still be a direct
- // report; it'll become a multi-direct report ("MDR") that fires each
- // role's reports from a list.
-
- for (auto v : info.vertices) {
+ DEBUG_PRINTF("fixed offset -> anch\n");
+ return ROSE_ROLE_HISTORY_ANCH;
+ }
+
+ return ROSE_ROLE_HISTORY_NONE;
+}
+
+static
+void assignHistories(RoseBuildImpl &tbi) {
+ for (const auto &e : edges_range(tbi.g)) {
+ if (tbi.g[e].history == ROSE_ROLE_HISTORY_INVALID) {
+ tbi.g[e].history = findHistoryScheme(tbi, e);
+ }
+ }
+}
+
+bool RoseBuildImpl::isDirectReport(u32 id) const {
+ assert(id < literal_info.size());
+
+ // Literal info properties.
+ const rose_literal_info &info = literal_info[id];
+ if (info.vertices.empty()) {
+ return false;
+ }
+
+ if (!info.delayed_ids.empty() /* dr's don't set groups */
+ || info.requires_benefits) { /* dr's don't require confirm */
+ return false;
+ }
+
+ if (isDelayed(id)) { /* can't handle delayed dr atm as we require delay
+ * ids to be dense */
+ return false;
+ }
+
+ // Role properties.
+
+ // Note that a literal can have multiple roles and still be a direct
+ // report; it'll become a multi-direct report ("MDR") that fires each
+ // role's reports from a list.
+
+ for (auto v : info.vertices) {
assert(contains(g[v].literals, id));
-
- if (g[v].reports.empty() ||
- g[v].eod_accept || // no accept EOD
- !g[v].isBoring() ||
- !isLeafNode(v, g) || // Must have no out-edges
- in_degree(v, g) != 1) { // Role must have exactly one in-edge
- return false;
- }
-
+
+ if (g[v].reports.empty() ||
+ g[v].eod_accept || // no accept EOD
+ !g[v].isBoring() ||
+ !isLeafNode(v, g) || // Must have no out-edges
+ in_degree(v, g) != 1) { // Role must have exactly one in-edge
+ return false;
+ }
+
// Use the program to handle cases that aren't external reports.
for (const ReportID &rid : g[v].reports) {
if (!isExternalReport(rm.getReport(rid))) {
@@ -420,36 +420,36 @@ bool RoseBuildImpl::isDirectReport(u32 id) const {
}
if (literals.at(id).table == ROSE_ANCHORED) {
- /* in-edges are irrelevant for anchored region. */
- continue;
- }
-
- /* The in-edge must be an (0, inf) edge from root. */
- assert(in_degree(v, g) != 0);
- RoseEdge e = *(in_edges(v, g).first);
- if (source(e, g) != root || g[e].minBound != 0 ||
- g[e].maxBound != ROSE_BOUND_INF) {
- return false;
- }
-
- // Note: we allow ekeys; they will result in unused roles being built as
- // direct reporting will be used when actually matching in Rose.
- /* TODO: prevent roles being created */
- }
-
- DEBUG_PRINTF("literal %u ('%s') is a %s report\n", id,
+ /* in-edges are irrelevant for anchored region. */
+ continue;
+ }
+
+ /* The in-edge must be an (0, inf) edge from root. */
+ assert(in_degree(v, g) != 0);
+ RoseEdge e = *(in_edges(v, g).first);
+ if (source(e, g) != root || g[e].minBound != 0 ||
+ g[e].maxBound != ROSE_BOUND_INF) {
+ return false;
+ }
+
+ // Note: we allow ekeys; they will result in unused roles being built as
+ // direct reporting will be used when actually matching in Rose.
+ /* TODO: prevent roles being created */
+ }
+
+ DEBUG_PRINTF("literal %u ('%s') is a %s report\n", id,
dumpString(literals.at(id).s).c_str(),
- info.vertices.size() > 1 ? "multi-direct" : "direct");
- return true;
-}
-
+ info.vertices.size() > 1 ? "multi-direct" : "direct");
+ return true;
+}
+
/* If we have prefixes that can squash all the floating roots, we can have a
* somewhat-conditional floating table. As we can't yet look at squash_masks, we
* have to make some guess as to if we are in this case but the win for not
* running a floating table over a large portion of the stream is significantly
* larger than avoiding running an eod table over the last N bytes. */
-static
+static
bool checkFloatingKillableByPrefixes(const RoseBuildImpl &tbi) {
for (auto v : vertices_range(tbi.g)) {
if (!tbi.isRootSuccessor(v)) {
@@ -484,25 +484,25 @@ bool checkFloatingKillableByPrefixes(const RoseBuildImpl &tbi) {
static
bool checkEodStealFloating(const RoseBuildImpl &build,
- const vector<u32> &eodLiteralsForFloating,
- u32 numFloatingLiterals,
- size_t shortestFloatingLen) {
- if (eodLiteralsForFloating.empty()) {
- DEBUG_PRINTF("no eod literals\n");
- return true;
- }
-
- if (!numFloatingLiterals) {
- DEBUG_PRINTF("no floating table\n");
- return false;
- }
-
+ const vector<u32> &eodLiteralsForFloating,
+ u32 numFloatingLiterals,
+ size_t shortestFloatingLen) {
+ if (eodLiteralsForFloating.empty()) {
+ DEBUG_PRINTF("no eod literals\n");
+ return true;
+ }
+
+ if (!numFloatingLiterals) {
+ DEBUG_PRINTF("no floating table\n");
+ return false;
+ }
+
if (build.hasNoFloatingRoots()) {
- DEBUG_PRINTF("skipping as floating table is conditional\n");
- /* TODO: investigate putting stuff in atable */
- return false;
- }
-
+ DEBUG_PRINTF("skipping as floating table is conditional\n");
+ /* TODO: investigate putting stuff in atable */
+ return false;
+ }
+
if (checkFloatingKillableByPrefixes(build)) {
DEBUG_PRINTF("skipping as prefixes may make ftable conditional\n");
return false;
@@ -516,333 +516,333 @@ bool checkEodStealFloating(const RoseBuildImpl &build,
}
}
- DEBUG_PRINTF("%zu are eod literals, %u floating; floating len=%zu\n",
- eodLiteralsForFloating.size(), numFloatingLiterals,
- shortestFloatingLen);
- u32 new_floating_lits = 0;
-
- for (u32 eod_id : eodLiteralsForFloating) {
+ DEBUG_PRINTF("%zu are eod literals, %u floating; floating len=%zu\n",
+ eodLiteralsForFloating.size(), numFloatingLiterals,
+ shortestFloatingLen);
+ u32 new_floating_lits = 0;
+
+ for (u32 eod_id : eodLiteralsForFloating) {
const rose_literal_id &lit = build.literals.at(eod_id);
- DEBUG_PRINTF("checking '%s'\n", dumpString(lit.s).c_str());
-
+ DEBUG_PRINTF("checking '%s'\n", dumpString(lit.s).c_str());
+
if (contains(floating_lits, lit.s)) {
- DEBUG_PRINTF("skip; there is already a floating version\n");
- continue;
- }
-
- // Don't want to make the shortest floating literal shorter/worse.
- if (trailerDueToSelf(lit) < 4 || lit.s.length() < shortestFloatingLen) {
- DEBUG_PRINTF("len=%zu, selfOverlap=%zu\n", lit.s.length(),
- trailerDueToSelf(lit));
- DEBUG_PRINTF("would shorten, bailing\n");
- return false;
- }
-
- new_floating_lits++;
- }
- DEBUG_PRINTF("..would require %u new floating literals\n",
- new_floating_lits);
-
- // Magic number thresholds: we only want to get rid of our EOD table if it
- // would make no real difference to the FDR.
- if (numFloatingLiterals / 8 < new_floating_lits
- && (new_floating_lits > 3 || numFloatingLiterals <= 2)) {
- DEBUG_PRINTF("leaving eod table alone.\n");
- return false;
- }
-
- return true;
-}
-
-static
-void promoteEodToFloating(RoseBuildImpl &tbi, const vector<u32> &eodLiterals) {
+ DEBUG_PRINTF("skip; there is already a floating version\n");
+ continue;
+ }
+
+ // Don't want to make the shortest floating literal shorter/worse.
+ if (trailerDueToSelf(lit) < 4 || lit.s.length() < shortestFloatingLen) {
+ DEBUG_PRINTF("len=%zu, selfOverlap=%zu\n", lit.s.length(),
+ trailerDueToSelf(lit));
+ DEBUG_PRINTF("would shorten, bailing\n");
+ return false;
+ }
+
+ new_floating_lits++;
+ }
+ DEBUG_PRINTF("..would require %u new floating literals\n",
+ new_floating_lits);
+
+ // Magic number thresholds: we only want to get rid of our EOD table if it
+ // would make no real difference to the FDR.
+ if (numFloatingLiterals / 8 < new_floating_lits
+ && (new_floating_lits > 3 || numFloatingLiterals <= 2)) {
+ DEBUG_PRINTF("leaving eod table alone.\n");
+ return false;
+ }
+
+ return true;
+}
+
+static
+void promoteEodToFloating(RoseBuildImpl &tbi, const vector<u32> &eodLiterals) {
DEBUG_PRINTF("promoting %zu eod literals to floating table\n",
eodLiterals.size());
-
- for (u32 eod_id : eodLiterals) {
+
+ for (u32 eod_id : eodLiterals) {
const rose_literal_id &lit = tbi.literals.at(eod_id);
DEBUG_PRINTF("eod_id=%u, lit=%s\n", eod_id, dumpString(lit.s).c_str());
- u32 floating_id = tbi.getLiteralId(lit.s, lit.msk, lit.cmp, lit.delay,
- ROSE_FLOATING);
+ u32 floating_id = tbi.getLiteralId(lit.s, lit.msk, lit.cmp, lit.delay,
+ ROSE_FLOATING);
DEBUG_PRINTF("floating_id=%u, lit=%s\n", floating_id,
dumpString(tbi.literals.at(floating_id).s).c_str());
- auto &float_verts = tbi.literal_info[floating_id].vertices;
- auto &eod_verts = tbi.literal_info[eod_id].vertices;
-
- insert(&float_verts, eod_verts);
- eod_verts.clear();
-
- DEBUG_PRINTF("eod_lit=%u -> float_lit=%u\n", eod_id, floating_id);
-
- for (auto v : float_verts) {
- tbi.g[v].literals.erase(eod_id);
- tbi.g[v].literals.insert(floating_id);
- }
-
- tbi.literal_info[floating_id].requires_benefits
- = tbi.literal_info[eod_id].requires_benefits;
- }
-}
-
-static
-bool promoteEodToAnchored(RoseBuildImpl &tbi, const vector<u32> &eodLiterals) {
- DEBUG_PRINTF("promoting eod literals to anchored table\n");
- bool rv = true;
-
- for (u32 eod_id : eodLiterals) {
+ auto &float_verts = tbi.literal_info[floating_id].vertices;
+ auto &eod_verts = tbi.literal_info[eod_id].vertices;
+
+ insert(&float_verts, eod_verts);
+ eod_verts.clear();
+
+ DEBUG_PRINTF("eod_lit=%u -> float_lit=%u\n", eod_id, floating_id);
+
+ for (auto v : float_verts) {
+ tbi.g[v].literals.erase(eod_id);
+ tbi.g[v].literals.insert(floating_id);
+ }
+
+ tbi.literal_info[floating_id].requires_benefits
+ = tbi.literal_info[eod_id].requires_benefits;
+ }
+}
+
+static
+bool promoteEodToAnchored(RoseBuildImpl &tbi, const vector<u32> &eodLiterals) {
+ DEBUG_PRINTF("promoting eod literals to anchored table\n");
+ bool rv = true;
+
+ for (u32 eod_id : eodLiterals) {
const rose_literal_id &lit = tbi.literals.at(eod_id);
-
- NGHolder h;
- add_edge(h.start, h.accept, h);
- appendLiteral(h, lit.s); /* we only accept cases which are anchored
- * hard up against start */
-
- u32 a_id = tbi.getNewLiteralId();
- u32 remap_id = 0;
- DEBUG_PRINTF(" trying to add dfa stuff\n");
- int anch_ok = addToAnchoredMatcher(tbi, h, a_id, &remap_id);
-
- if (anch_ok == ANCHORED_FAIL) {
- DEBUG_PRINTF("failed to promote to anchored need to keep etable\n");
- rv = false;
- continue;
- } else if (anch_ok == ANCHORED_REMAP) {
- DEBUG_PRINTF("remapped\n");
- a_id = remap_id;
- } else {
- assert(anch_ok == ANCHORED_SUCCESS);
- }
-
- // Store the literal itself in a side structure so that we can use it
- // for overlap calculations later. This may be obsolete when the old
- // Rose construction path (and its history selection code) goes away.
- tbi.anchoredLitSuffix.insert(make_pair(a_id, lit));
-
- auto &a_verts = tbi.literal_info[a_id].vertices;
- auto &eod_verts = tbi.literal_info[eod_id].vertices;
-
- for (auto v : eod_verts) {
- for (const auto &e : in_edges_range(v, tbi.g)) {
- assert(tbi.g[e].maxBound != ROSE_BOUND_INF);
- tbi.g[e].minBound += lit.s.length();
- tbi.g[e].maxBound += lit.s.length();
- }
- }
-
- insert(&a_verts, eod_verts);
- eod_verts.clear();
-
- for (auto v : a_verts) {
- tbi.g[v].literals.erase(eod_id);
- tbi.g[v].literals.insert(a_id);
- }
- }
-
- return rv;
-}
-
-static
-bool suitableForAnchored(const RoseBuildImpl &tbi, const rose_literal_id &l_id,
- const rose_literal_info &lit) {
- const RoseGraph &g = tbi.g;
-
- bool seen = false;
- u32 min_offset = 0;
- u32 max_offset = 0;
-
- if (!lit.delayed_ids.empty() || l_id.delay) {
- DEBUG_PRINTF("delay\n");
- return false;
- }
-
- if (!l_id.msk.empty()) {
- DEBUG_PRINTF("msk\n");
- return false;
- }
-
- for (auto v : lit.vertices) {
- if (!seen) {
- min_offset = g[v].min_offset;
- max_offset = g[v].max_offset;
- seen = true;
-
- if (max_offset > tbi.cc.grey.maxAnchoredRegion) {
- DEBUG_PRINTF("too deep %u\n", max_offset);
- return false;
- }
- }
-
- if (max_offset != g[v].max_offset || min_offset != g[v].min_offset) {
- DEBUG_PRINTF(":(\n");
- return false;
- }
-
- if (!g[v].isBoring()) {
- DEBUG_PRINTF(":(\n");
- return false;
- }
-
- if (g[v].literals.size() != 1) {
- DEBUG_PRINTF("shared\n");
- return false;
- }
-
- if (tbi.isNonRootSuccessor(v)) {
- DEBUG_PRINTF("non root\n");
- return false;
- }
-
- if (max_offset != l_id.s.length() || min_offset != l_id.s.length()) {
- DEBUG_PRINTF("|%zu| (%u,%u):(\n", l_id.s.length(), min_offset,
- max_offset);
- /* TODO: handle cases with small bounds */
- return false;
- }
-
- for (auto w : adjacent_vertices_range(v, g)) {
- if (!g[w].eod_accept) {
- DEBUG_PRINTF("non eod accept literal\n");
- return false;
- }
- }
- }
- return true;
-}
-
-// If we've got a small number of long, innocuous EOD literals and a large
-// floating table, we consider promoting those EOD literals to the floating
-// table to avoid having to run both. See UE-2069, consider deleting this and
-// replacing with an elegant reverse DFA.
-/* We do not want to do this if we would otherwise avoid running the floating
- * table altogether. */
-static
-void stealEodVertices(RoseBuildImpl &tbi) {
- u32 numFloatingLiterals = 0;
- u32 numAnchoredLiterals = 0;
- size_t shortestFloatingLen = SIZE_MAX;
- vector<u32> eodLiteralsForFloating;
- vector<u32> eodLiteralsForAnchored;
- DEBUG_PRINTF("hi\n");
-
- for (u32 i = 0; i < tbi.literal_info.size(); i++) {
- const auto &info = tbi.literal_info[i];
- if (info.vertices.empty()) {
- continue; // skip unused literals
- }
-
+
+ NGHolder h;
+ add_edge(h.start, h.accept, h);
+ appendLiteral(h, lit.s); /* we only accept cases which are anchored
+ * hard up against start */
+
+ u32 a_id = tbi.getNewLiteralId();
+ u32 remap_id = 0;
+ DEBUG_PRINTF(" trying to add dfa stuff\n");
+ int anch_ok = addToAnchoredMatcher(tbi, h, a_id, &remap_id);
+
+ if (anch_ok == ANCHORED_FAIL) {
+ DEBUG_PRINTF("failed to promote to anchored need to keep etable\n");
+ rv = false;
+ continue;
+ } else if (anch_ok == ANCHORED_REMAP) {
+ DEBUG_PRINTF("remapped\n");
+ a_id = remap_id;
+ } else {
+ assert(anch_ok == ANCHORED_SUCCESS);
+ }
+
+ // Store the literal itself in a side structure so that we can use it
+ // for overlap calculations later. This may be obsolete when the old
+ // Rose construction path (and its history selection code) goes away.
+ tbi.anchoredLitSuffix.insert(make_pair(a_id, lit));
+
+ auto &a_verts = tbi.literal_info[a_id].vertices;
+ auto &eod_verts = tbi.literal_info[eod_id].vertices;
+
+ for (auto v : eod_verts) {
+ for (const auto &e : in_edges_range(v, tbi.g)) {
+ assert(tbi.g[e].maxBound != ROSE_BOUND_INF);
+ tbi.g[e].minBound += lit.s.length();
+ tbi.g[e].maxBound += lit.s.length();
+ }
+ }
+
+ insert(&a_verts, eod_verts);
+ eod_verts.clear();
+
+ for (auto v : a_verts) {
+ tbi.g[v].literals.erase(eod_id);
+ tbi.g[v].literals.insert(a_id);
+ }
+ }
+
+ return rv;
+}
+
+static
+bool suitableForAnchored(const RoseBuildImpl &tbi, const rose_literal_id &l_id,
+ const rose_literal_info &lit) {
+ const RoseGraph &g = tbi.g;
+
+ bool seen = false;
+ u32 min_offset = 0;
+ u32 max_offset = 0;
+
+ if (!lit.delayed_ids.empty() || l_id.delay) {
+ DEBUG_PRINTF("delay\n");
+ return false;
+ }
+
+ if (!l_id.msk.empty()) {
+ DEBUG_PRINTF("msk\n");
+ return false;
+ }
+
+ for (auto v : lit.vertices) {
+ if (!seen) {
+ min_offset = g[v].min_offset;
+ max_offset = g[v].max_offset;
+ seen = true;
+
+ if (max_offset > tbi.cc.grey.maxAnchoredRegion) {
+ DEBUG_PRINTF("too deep %u\n", max_offset);
+ return false;
+ }
+ }
+
+ if (max_offset != g[v].max_offset || min_offset != g[v].min_offset) {
+ DEBUG_PRINTF(":(\n");
+ return false;
+ }
+
+ if (!g[v].isBoring()) {
+ DEBUG_PRINTF(":(\n");
+ return false;
+ }
+
+ if (g[v].literals.size() != 1) {
+ DEBUG_PRINTF("shared\n");
+ return false;
+ }
+
+ if (tbi.isNonRootSuccessor(v)) {
+ DEBUG_PRINTF("non root\n");
+ return false;
+ }
+
+ if (max_offset != l_id.s.length() || min_offset != l_id.s.length()) {
+ DEBUG_PRINTF("|%zu| (%u,%u):(\n", l_id.s.length(), min_offset,
+ max_offset);
+ /* TODO: handle cases with small bounds */
+ return false;
+ }
+
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (!g[w].eod_accept) {
+ DEBUG_PRINTF("non eod accept literal\n");
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+// If we've got a small number of long, innocuous EOD literals and a large
+// floating table, we consider promoting those EOD literals to the floating
+// table to avoid having to run both. See UE-2069, consider deleting this and
+// replacing with an elegant reverse DFA.
+/* We do not want to do this if we would otherwise avoid running the floating
+ * table altogether. */
+static
+void stealEodVertices(RoseBuildImpl &tbi) {
+ u32 numFloatingLiterals = 0;
+ u32 numAnchoredLiterals = 0;
+ size_t shortestFloatingLen = SIZE_MAX;
+ vector<u32> eodLiteralsForFloating;
+ vector<u32> eodLiteralsForAnchored;
+ DEBUG_PRINTF("hi\n");
+
+ for (u32 i = 0; i < tbi.literal_info.size(); i++) {
+ const auto &info = tbi.literal_info[i];
+ if (info.vertices.empty()) {
+ continue; // skip unused literals
+ }
+
const rose_literal_id &lit = tbi.literals.at(i);
-
- if (lit.table == ROSE_EOD_ANCHORED) {
- if (suitableForAnchored(tbi, lit, info)) {
- eodLiteralsForAnchored.push_back(i);
- } else {
- eodLiteralsForFloating.push_back(i);
- }
- } else if (lit.table == ROSE_FLOATING) {
- numFloatingLiterals++;
- shortestFloatingLen = min(shortestFloatingLen, lit.s.length());
- } else if (lit.table == ROSE_ANCHORED) {
- numAnchoredLiterals++;
- }
- }
-
- /* given a choice of having either an eod table or an anchored table, we
- * always favour having an anchored table */
-
- if (!checkEodStealFloating(tbi, eodLiteralsForFloating, numFloatingLiterals,
- shortestFloatingLen)) {
- DEBUG_PRINTF("removing etable weakens ftable\n");
- return;
- }
-
- promoteEodToFloating(tbi, eodLiteralsForFloating);
-
- if (!promoteEodToAnchored(tbi, eodLiteralsForAnchored)) {
- DEBUG_PRINTF("still need ematcher\n");
- return;
- }
-
- // We're no longer using the EOD matcher.
- tbi.ematcher_region_size = 0;
-}
-
-bool RoseBuildImpl::isDelayed(u32 id) const {
- return literal_info.at(id).undelayed_id != id;
-}
-
+
+ if (lit.table == ROSE_EOD_ANCHORED) {
+ if (suitableForAnchored(tbi, lit, info)) {
+ eodLiteralsForAnchored.push_back(i);
+ } else {
+ eodLiteralsForFloating.push_back(i);
+ }
+ } else if (lit.table == ROSE_FLOATING) {
+ numFloatingLiterals++;
+ shortestFloatingLen = min(shortestFloatingLen, lit.s.length());
+ } else if (lit.table == ROSE_ANCHORED) {
+ numAnchoredLiterals++;
+ }
+ }
+
+ /* given a choice of having either an eod table or an anchored table, we
+ * always favour having an anchored table */
+
+ if (!checkEodStealFloating(tbi, eodLiteralsForFloating, numFloatingLiterals,
+ shortestFloatingLen)) {
+ DEBUG_PRINTF("removing etable weakens ftable\n");
+ return;
+ }
+
+ promoteEodToFloating(tbi, eodLiteralsForFloating);
+
+ if (!promoteEodToAnchored(tbi, eodLiteralsForAnchored)) {
+ DEBUG_PRINTF("still need ematcher\n");
+ return;
+ }
+
+ // We're no longer using the EOD matcher.
+ tbi.ematcher_region_size = 0;
+}
+
+bool RoseBuildImpl::isDelayed(u32 id) const {
+ return literal_info.at(id).undelayed_id != id;
+}
+
bool RoseBuildImpl::hasDelayedLiteral(RoseVertex v) const {
for (u32 lit_id : g[v].literals) {
if (literals.at(lit_id).delay) {
return true;
- }
- }
-
+ }
+ }
+
return false;
-}
-
+}
+
bool RoseBuildImpl::hasDelayPred(RoseVertex v) const {
for (auto u : inv_adjacent_vertices_range(v, g)) {
if (hasDelayedLiteral(u)) {
return true;
- }
- }
-
- return false;
-}
-
+ }
+ }
+
+ return false;
+}
+
bool RoseBuildImpl::hasAnchoredTablePred(RoseVertex v) const {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
if (isAnchored(u)) {
return true;
- }
- }
-
+ }
+ }
+
return false;
-}
-
+}
+
void RoseBuildImpl::findTransientLeftfixes(void) {
for (auto v : vertices_range(g)) {
if (!g[v].left) {
- continue;
- }
-
+ continue;
+ }
+
/* infixes can never (or at least not yet) be transient */
if (isNonRootSuccessor(v)) {
- continue;
- }
-
+ continue;
+ }
+
const left_id &left(g[v].left);
-
+
if (::ue2::isAnchored(left) && !isInETable(v)) {
/* etable prefixes currently MUST be transient as we do not know
* where we can safely catch them up to (yet). */
DEBUG_PRINTF("anchored roses in rocky soil are not fleeting\n");
continue;
}
-
+
const depth max_width = findMaxWidth(left);
if (!max_width.is_finite()) {
DEBUG_PRINTF("inf max width\n");
- continue;
- }
-
+ continue;
+ }
+
if (cc.streaming) {
/* STREAMING: transient prefixes must be able to run using history
* rather than storing state. */
u32 his = g[v].left.lag + max_width;
-
+
// If this vertex has an event literal, we need to add one to cope
// with it.
if (hasLiteralInTable(v, ROSE_EVENT)) {
his++;
}
-
+
/* +1 as trigger must appear in main buffer and no byte is needed to
* decompress the state */
if (his <= cc.grey.maxHistoryAvailable + 1) {
transient.insert(left);
DEBUG_PRINTF("a transient leftfix spotted his=%u\n", his);
- }
+ }
} else {
/* BLOCK: transientness is less important and more fuzzy, ideally
* it should be quick to calculate the state. No need to worry about
@@ -852,10 +852,10 @@ void RoseBuildImpl::findTransientLeftfixes(void) {
DEBUG_PRINTF("a transient block leftfix spotted [%u]\n",
(u32)max_width);
}
- }
+ }
}
}
-
+
/** Find all the different roses and their associated literals. */
static
map<left_id, vector<RoseVertex>> findLeftSucc(const RoseBuildImpl &build) {
@@ -864,69 +864,69 @@ map<left_id, vector<RoseVertex>> findLeftSucc(const RoseBuildImpl &build) {
if (build.g[v].left) {
const LeftEngInfo &lei = build.g[v].left;
leftfixes[lei].push_back(v);
- }
+ }
}
return leftfixes;
}
-
+
namespace {
struct infix_info {
set<RoseVertex> preds;
set<RoseVertex> succs;
};
}
-
+
static
map<NGHolder *, infix_info> findInfixGraphInfo(const RoseBuildImpl &build) {
map<NGHolder *, infix_info> rv;
-
+
for (auto v : vertices_range(build.g)) {
if (!build.g[v].left) {
- continue;
- }
-
+ continue;
+ }
+
if (build.isRootSuccessor(v)) {
DEBUG_PRINTF("a prefix is never an infix\n");
continue;
- }
-
+ }
+
/* ensure only proper nfas */
const LeftEngInfo &lei = build.g[v].left;
if (!lei.graph) {
- continue;
- }
+ continue;
+ }
if (lei.haig || lei.dfa) {
continue;
- }
+ }
assert(!lei.castle);
infix_info &info = rv[lei.graph.get()];
insert(&info.preds, inv_adjacent_vertices_range(v, build.g));
info.succs.insert(v);
- }
-
+ }
+
return rv;
-}
-
+}
+
static
map<u32, flat_set<NFAEdge>> getTopInfo(const NGHolder &h) {
map<u32, flat_set<NFAEdge>> rv;
for (NFAEdge e : out_edges_range(h.start, h)) {
for (u32 t : h[e].tops) {
rv[t].insert(e);
- }
- }
+ }
+ }
return rv;
-}
-
+}
+
static
u32 findUnusedTop(const map<u32, flat_set<NFAEdge>> &tops) {
u32 i = 0;
while (contains(tops, i)) {
i++;
- }
+ }
return i;
-}
-
+}
+
static
bool reduceTopTriggerLoad(RoseBuildImpl &build, NGHolder &h, RoseVertex u) {
RoseGraph &g = build.g;
@@ -936,43 +936,43 @@ bool reduceTopTriggerLoad(RoseBuildImpl &build, NGHolder &h, RoseVertex u) {
RoseVertex v = target(e, g);
if (g[v].left.graph.get() != &h) {
continue;
- }
+ }
tops.insert(g[e].rose_top);
- }
-
+ }
+
assert(!tops.empty());
if (tops.size() <= 1) {
- return false;
- }
+ return false;
+ }
DEBUG_PRINTF("%zu triggers %zu tops for %p\n", build.g[u].index,
tops.size(), &h);
-
+
auto h_top_info = getTopInfo(h);
flat_set<NFAEdge> edges_to_trigger;
for (u32 t : tops) {
insert(&edges_to_trigger, h_top_info[t]);
- }
-
+ }
+
u32 new_top = ~0U;
/* check if there is already a top with the right the successor set */
for (const auto &elem : h_top_info) {
if (elem.second == edges_to_trigger) {
new_top = elem.first;
break;
- }
- }
-
+ }
+ }
+
/* if no existing suitable top, add a new top for us */
if (new_top == ~0U) {
new_top = findUnusedTop(h_top_info);
-
+
/* add top to edges out of start */
for (NFAEdge e : out_edges_range(h.start, h)) {
if (has_intersection(tops, h[e].tops)) {
h[e].tops.insert(new_top);
}
- }
-
+ }
+
/* check still implementable if we add a new top */
if (!isImplementableNFA(h, nullptr, build.cc)) {
DEBUG_PRINTF("unable to add new top\n");
@@ -981,219 +981,219 @@ bool reduceTopTriggerLoad(RoseBuildImpl &build, NGHolder &h, RoseVertex u) {
}
/* we should be back to the original graph */
assert(isImplementableNFA(h, nullptr, build.cc));
- return false;
- }
- }
-
+ return false;
+ }
+ }
+
DEBUG_PRINTF("using new merged top %u\n", new_top);
assert(new_top != ~0U);
for (RoseEdge e: out_edges_range(u, g)) {
RoseVertex v = target(e, g);
if (g[v].left.graph.get() != &h) {
continue;
- }
+ }
g[e].rose_top = new_top;
- }
-
+ }
+
return true;
-}
-
-static
+}
+
+static
void packInfixTops(NGHolder &h, RoseGraph &g,
const set<RoseVertex> &verts) {
if (!is_triggered(h)) {
DEBUG_PRINTF("not triggered, no tops\n");
return;
- }
+ }
assert(isCorrectlyTopped(h));
DEBUG_PRINTF("pruning unused tops\n");
flat_set<u32> used_tops;
for (auto v : verts) {
assert(g[v].left.graph.get() == &h);
-
+
for (const auto &e : in_edges_range(v, g)) {
u32 top = g[e].rose_top;
used_tops.insert(top);
}
- }
-
+ }
+
map<u32, u32> top_mapping;
for (u32 t : used_tops) {
u32 new_top = top_mapping.size();
top_mapping[t] = new_top;
- }
-
+ }
+
for (auto v : verts) {
assert(g[v].left.graph.get() == &h);
-
+
for (const auto &e : in_edges_range(v, g)) {
g[e].rose_top = top_mapping.at(g[e].rose_top);
- }
+ }
}
-
+
vector<NFAEdge> dead;
for (const auto &e : out_edges_range(h.start, h)) {
NFAVertex v = target(e, h);
if (v == h.startDs) {
continue; // stylised edge, leave it alone.
- }
+ }
flat_set<u32> updated_tops;
for (u32 t : h[e].tops) {
if (contains(top_mapping, t)) {
updated_tops.insert(top_mapping.at(t));
- }
- }
+ }
+ }
h[e].tops = std::move(updated_tops);
if (h[e].tops.empty()) {
DEBUG_PRINTF("edge (start,%zu) has only unused tops\n", h[v].index);
dead.push_back(e);
- }
- }
-
+ }
+ }
+
if (dead.empty()) {
return;
- }
-
+ }
+
remove_edges(dead, h);
pruneUseless(h);
clearReports(h); // As we may have removed vacuous edges.
-}
-
-static
+}
+
+static
void reduceTopTriggerLoad(RoseBuildImpl &build) {
auto infixes = findInfixGraphInfo(build);
-
+
for (auto &p : infixes) {
if (onlyOneTop(*p.first)) {
- continue;
- }
-
+ continue;
+ }
+
bool changed = false;
for (RoseVertex v : p.second.preds) {
changed |= reduceTopTriggerLoad(build, *p.first, v);
- }
-
+ }
+
if (changed) {
packInfixTops(*p.first, build.g, p.second.succs);
reduceImplementableGraph(*p.first, SOM_NONE, nullptr, build.cc);
- }
- }
-}
-
-static
+ }
+ }
+}
+
+static
bool triggerKillsRoseGraph(const RoseBuildImpl &build, const left_id &left,
- const set<ue2_literal> &all_lits,
- const RoseEdge &e) {
- assert(left.graph());
- const NGHolder &h = *left.graph();
-
+ const set<ue2_literal> &all_lits,
+ const RoseEdge &e) {
+ assert(left.graph());
+ const NGHolder &h = *left.graph();
+
flat_set<NFAVertex> all_states;
- insert(&all_states, vertices(h));
- assert(out_degree(h.startDs, h) == 1); /* triggered don't use sds */
- DEBUG_PRINTF("removing sds\n");
- all_states.erase(h.startDs);
-
+ insert(&all_states, vertices(h));
+ assert(out_degree(h.startDs, h) == 1); /* triggered don't use sds */
+ DEBUG_PRINTF("removing sds\n");
+ all_states.erase(h.startDs);
+
flat_set<NFAVertex> states;
-
- /* check each pred literal to see if they all kill previous graph
- * state */
+
+ /* check each pred literal to see if they all kill previous graph
+ * state */
for (u32 lit_id : build.g[source(e, build.g)].literals) {
const rose_literal_id &pred_lit = build.literals.at(lit_id);
- const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s);
-
- DEBUG_PRINTF("running graph %zu\n", states.size());
- states = execute_graph(h, s, all_states, true);
- DEBUG_PRINTF("ran, %zu states on\n", states.size());
-
- if (!states.empty()) {
- return false;
- }
- }
-
- return true;
-}
-
-static
+ const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s);
+
+ DEBUG_PRINTF("running graph %zu\n", states.size());
+ states = execute_graph(h, s, all_states, true);
+ DEBUG_PRINTF("ran, %zu states on\n", states.size());
+
+ if (!states.empty()) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static
bool triggerKillsRose(const RoseBuildImpl &build, const left_id &left,
- const set<ue2_literal> &all_lits, const RoseEdge &e) {
- if (left.haig()) {
- /* TODO: To allow this for som-based engines we would also need to
- * ensure as well that no other triggers can occur at the same location
- * with a different som. */
- return false;
- }
-
- if (left.graph()) {
+ const set<ue2_literal> &all_lits, const RoseEdge &e) {
+ if (left.haig()) {
+ /* TODO: To allow this for som-based engines we would also need to
+ * ensure as well that no other triggers can occur at the same location
+ * with a different som. */
+ return false;
+ }
+
+ if (left.graph()) {
return triggerKillsRoseGraph(build, left, all_lits, e);
- }
-
- if (left.castle()) {
+ }
+
+ if (left.castle()) {
return triggerKillsRoseCastle(build, left, all_lits, e);
- }
-
- return false;
-}
-
+ }
+
+ return false;
+}
+
/* Sometimes the arrival of a top for a rose infix can ensure that the nfa would
* be dead at that time. In the case of multiple trigger literals, we can only
* base our decision on that portion of literal after any overlapping literals.
*/
-static
+static
void findTopTriggerCancels(RoseBuildImpl &build) {
auto left_succ = findLeftSucc(build); /* leftfixes -> succ verts */
-
+
for (const auto &r : left_succ) {
- const left_id &left = r.first;
- const vector<RoseVertex> &succs = r.second;
-
- assert(!succs.empty());
+ const left_id &left = r.first;
+ const vector<RoseVertex> &succs = r.second;
+
+ assert(!succs.empty());
if (build.isRootSuccessor(*succs.begin())) {
- /* a prefix is never an infix */
- continue;
- }
-
- set<u32> tops_seen;
- set<RoseEdge> rose_edges;
- set<u32> pred_lit_ids;
-
- for (auto v : succs) {
+ /* a prefix is never an infix */
+ continue;
+ }
+
+ set<u32> tops_seen;
+ set<RoseEdge> rose_edges;
+ set<u32> pred_lit_ids;
+
+ for (auto v : succs) {
for (const auto &e : in_edges_range(v, build.g)) {
RoseVertex u = source(e, build.g);
tops_seen.insert(build.g[e].rose_top);
insert(&pred_lit_ids, build.g[u].literals);
- rose_edges.insert(e);
- }
- }
-
- set<ue2_literal> all_lits;
-
- if (tops_seen.size() > 1) {
- goto next_rose; /* slightly tricky to deal with overlap case */
- }
-
- for (u32 lit_id : pred_lit_ids) {
+ rose_edges.insert(e);
+ }
+ }
+
+ set<ue2_literal> all_lits;
+
+ if (tops_seen.size() > 1) {
+ goto next_rose; /* slightly tricky to deal with overlap case */
+ }
+
+ for (u32 lit_id : pred_lit_ids) {
const rose_literal_id &p_lit = build.literals.at(lit_id);
- if (p_lit.delay || p_lit.table == ROSE_ANCHORED) {
- goto next_rose;
- }
- all_lits.insert(p_lit.s);
- DEBUG_PRINTF("trigger: '%s'\n", dumpString(p_lit.s).c_str());
- }
-
- DEBUG_PRINTF("rose has %zu trigger literals, %zu edges\n",
- all_lits.size(), rose_edges.size());
-
- for (const auto &e : rose_edges) {
+ if (p_lit.delay || p_lit.table == ROSE_ANCHORED) {
+ goto next_rose;
+ }
+ all_lits.insert(p_lit.s);
+ DEBUG_PRINTF("trigger: '%s'\n", dumpString(p_lit.s).c_str());
+ }
+
+ DEBUG_PRINTF("rose has %zu trigger literals, %zu edges\n",
+ all_lits.size(), rose_edges.size());
+
+ for (const auto &e : rose_edges) {
if (triggerKillsRose(build, left, all_lits, e)) {
- DEBUG_PRINTF("top will override previous rose state\n");
+ DEBUG_PRINTF("top will override previous rose state\n");
build.g[e].rose_cancel_prev_top = true;
- }
- }
- next_rose:;
- }
-}
-
-static
+ }
+ }
+ next_rose:;
+ }
+}
+
+static
void optimiseRoseTops(RoseBuildImpl &build) {
reduceTopTriggerLoad(build);
/* prune unused tops ? */
@@ -1201,599 +1201,599 @@ void optimiseRoseTops(RoseBuildImpl &build) {
}
static
-void buildRoseSquashMasks(RoseBuildImpl &tbi) {
- /* Rose nfa squash masks are applied to the groups when the nfa can no
- * longer match */
-
- map<left_id, vector<RoseVertex>> roses =
- findLeftSucc(tbi); /* rose -> succ verts */
-
- /* a rose nfa can squash a group if all literals in that group are a
- * successor of the nfa and all the literals */
- for (const auto &e : roses) {
- const left_id &left = e.first;
- const vector<RoseVertex> &succs = e.second;
-
- set<u32> lit_ids;
- bool anchored_pred = false;
- for (auto v : succs) {
- lit_ids.insert(tbi.g[v].literals.begin(), tbi.g[v].literals.end());
- for (auto u : inv_adjacent_vertices_range(v, tbi.g)) {
- anchored_pred |= tbi.isAnchored(u);
- }
- }
-
- /* Due to the anchored table not being able to set groups again,
- * we cannot use a rose nfa for group squashing if it is being triggered
- * from the anchored table and can match more than once. */
-
- if (anchored_pred) { /* infix with pred in anchored table */
- u32 min_off = ~0U;
- u32 max_off = 0U;
- for (auto v : succs) {
- for (auto u : inv_adjacent_vertices_range(v, tbi.g)) {
- min_off = min(min_off, tbi.g[u].min_offset);
- max_off = max(max_off, tbi.g[u].max_offset);
- }
- }
- if (min_off != max_off) {
- /* leave all groups alone */
- tbi.rose_squash_masks[left] = ~0ULL;
- continue;
- }
- }
-
+void buildRoseSquashMasks(RoseBuildImpl &tbi) {
+ /* Rose nfa squash masks are applied to the groups when the nfa can no
+ * longer match */
+
+ map<left_id, vector<RoseVertex>> roses =
+ findLeftSucc(tbi); /* rose -> succ verts */
+
+ /* a rose nfa can squash a group if all literals in that group are a
+ * successor of the nfa and all the literals */
+ for (const auto &e : roses) {
+ const left_id &left = e.first;
+ const vector<RoseVertex> &succs = e.second;
+
+ set<u32> lit_ids;
+ bool anchored_pred = false;
+ for (auto v : succs) {
+ lit_ids.insert(tbi.g[v].literals.begin(), tbi.g[v].literals.end());
+ for (auto u : inv_adjacent_vertices_range(v, tbi.g)) {
+ anchored_pred |= tbi.isAnchored(u);
+ }
+ }
+
+ /* Due to the anchored table not being able to set groups again,
+ * we cannot use a rose nfa for group squashing if it is being triggered
+ * from the anchored table and can match more than once. */
+
+ if (anchored_pred) { /* infix with pred in anchored table */
+ u32 min_off = ~0U;
+ u32 max_off = 0U;
+ for (auto v : succs) {
+ for (auto u : inv_adjacent_vertices_range(v, tbi.g)) {
+ min_off = min(min_off, tbi.g[u].min_offset);
+ max_off = max(max_off, tbi.g[u].max_offset);
+ }
+ }
+ if (min_off != max_off) {
+ /* leave all groups alone */
+ tbi.rose_squash_masks[left] = ~0ULL;
+ continue;
+ }
+ }
+
rose_group unsquashable = tbi.boundary_group_mask;
-
- for (u32 lit_id : lit_ids) {
- const rose_literal_info &info = tbi.literal_info[lit_id];
+
+ for (u32 lit_id : lit_ids) {
+ const rose_literal_info &info = tbi.literal_info[lit_id];
if (!info.delayed_ids.empty()
|| !all_of_in(info.vertices,
[&](RoseVertex v) {
return left == tbi.g[v].left; })) {
DEBUG_PRINTF("group %llu is unsquashable\n", info.group_mask);
- unsquashable |= info.group_mask;
- }
- }
-
- rose_group squash_mask = ~0ULL; /* leave all groups alone */
-
- for (u32 i = 0; i < ROSE_GROUPS_MAX; i++) {
- if (is_subset_of(tbi.group_to_literal[i], lit_ids)) {
- squash_mask &= ~(1ULL << i);
- }
- }
- squash_mask |= unsquashable;
- tbi.rose_squash_masks[left] = squash_mask;
- }
-}
-
-static
-void countFloatingLiterals(const RoseBuildImpl &tbi, u32 *total_count,
- u32 *short_count) {
- *total_count = 0;
- *short_count = 0;
+ unsquashable |= info.group_mask;
+ }
+ }
+
+ rose_group squash_mask = ~0ULL; /* leave all groups alone */
+
+ for (u32 i = 0; i < ROSE_GROUPS_MAX; i++) {
+ if (is_subset_of(tbi.group_to_literal[i], lit_ids)) {
+ squash_mask &= ~(1ULL << i);
+ }
+ }
+ squash_mask |= unsquashable;
+ tbi.rose_squash_masks[left] = squash_mask;
+ }
+}
+
+static
+void countFloatingLiterals(const RoseBuildImpl &tbi, u32 *total_count,
+ u32 *short_count) {
+ *total_count = 0;
+ *short_count = 0;
for (const rose_literal_id &lit : tbi.literals) {
- if (lit.delay) {
- continue; /* delay id's are virtual-ish */
- }
-
- if (lit.table != ROSE_FLOATING) {
- continue; /* wrong table */
- }
-
- ++*total_count;
- if (lit.s.length() <= ANCHORED_REHOME_SHORT_LEN) {
- ++*short_count;
- }
- }
-}
-
-static
-void rehomeAnchoredLiteral(RoseBuildImpl &tbi, const simple_anchored_info &sai,
- const set<u32> &lit_ids) {
- /* TODO: verify that vertices only have a single literal at the moment */
-
- DEBUG_PRINTF("rehoming ^.{%u,%u}%s\n", sai.min_bound, sai.max_bound,
- dumpString(sai.literal).c_str());
-
- /* Get a floating literal corresponding to the anchored literal */
- u32 new_literal_id = tbi.getLiteralId(sai.literal, 0, ROSE_FLOATING);
- rose_literal_info &new_lit_info = tbi.literal_info[new_literal_id];
- DEBUG_PRINTF("floating literal id -> %u\n", new_literal_id);
-
- for (u32 lit_id : lit_ids) {
- rose_literal_info &old_lit_info = tbi.literal_info[lit_id];
- assert(old_lit_info.delayed_ids.empty());
-
- for (auto v : old_lit_info.vertices) {
- /* Transfer vertex over to new literal id */
- assert(tbi.g[v].literals.size() == 1);
- tbi.g[v].literals.clear();
- tbi.g[v].literals.insert(new_literal_id);
- new_lit_info.vertices.insert(v);
-
- /* ensure bounds on the vertex's in-edge are correct */
- assert(in_degree(v, tbi.g) == 1);
- const RoseEdge &e = *in_edges(v, tbi.g).first;
- assert(tbi.g[e].minBound == sai.min_bound + sai.literal.length());
- assert(tbi.g[e].maxBound == sai.max_bound + sai.literal.length());
- tbi.g[e].minBound = sai.min_bound;
- tbi.g[e].maxBound = sai.max_bound;
- }
-
- /* mark the old literal as empty */
- old_lit_info.vertices.clear();
- }
-}
-
-static
-void rehomeAnchoredLiterals(RoseBuildImpl &tbi) {
- /* if we have many literals in the floating table, we want to push
- * literals which are anchored but deep into the floating table as they
- * are unlikely to reduce the performance of the floating table. */
- u32 total_count;
- u32 short_count;
- countFloatingLiterals(tbi, &total_count, &short_count);
-
- DEBUG_PRINTF("considering rehoming options\n");
-
- if (total_count < ANCHORED_REHOME_MIN_FLOATING
- && short_count < ANCHORED_REHOME_MIN_FLOATING_SHORT) {
- DEBUG_PRINTF("not a heavy case %u %u\n", total_count, short_count);
- return;
- }
-
- u32 min_rehome_len = ANCHORED_REHOME_SHORT_LEN + 1;
- if (short_count >= ANCHORED_REHOME_ALLOW_SHORT) {
- min_rehome_len--;
- }
-
- for (map<simple_anchored_info, set<u32> >::iterator it
- = tbi.anchored_simple.begin();
- it != tbi.anchored_simple.end();) {
- if (it->first.max_bound < ANCHORED_REHOME_DEEP
- || it->first.literal.length() < min_rehome_len) {
- ++it;
- continue;
- }
-
- rehomeAnchoredLiteral(tbi, it->first, it->second);
- tbi.anchored_simple.erase(it++);
- }
-}
-
-/** \brief Maximum number of single-byte literals to add to the small block
- * table. */
-static const size_t MAX_1BYTE_SMALL_BLOCK_LITERALS = 20;
-
-static
-void addSmallBlockLiteral(RoseBuildImpl &tbi, const simple_anchored_info &sai,
- const set<u32> &lit_ids) {
- DEBUG_PRINTF("anchored ^.{%u,%u}%s\n", sai.min_bound, sai.max_bound,
- dumpString(sai.literal).c_str());
-
- u32 lit_id = tbi.getLiteralId(sai.literal, 0, ROSE_ANCHORED_SMALL_BLOCK);
- rose_literal_info &lit_info = tbi.literal_info[lit_id];
- DEBUG_PRINTF("anchored small block literal id -> %u\n", lit_id);
-
- RoseGraph &g = tbi.g;
- const RoseVertex anchored_root = tbi.anchored_root;
-
- for (u32 old_id : lit_ids) {
- assert(old_id < tbi.literal_info.size());
- const rose_literal_info &li = tbi.literal_info[old_id];
-
+ if (lit.delay) {
+ continue; /* delay id's are virtual-ish */
+ }
+
+ if (lit.table != ROSE_FLOATING) {
+ continue; /* wrong table */
+ }
+
+ ++*total_count;
+ if (lit.s.length() <= ANCHORED_REHOME_SHORT_LEN) {
+ ++*short_count;
+ }
+ }
+}
+
+static
+void rehomeAnchoredLiteral(RoseBuildImpl &tbi, const simple_anchored_info &sai,
+ const set<u32> &lit_ids) {
+ /* TODO: verify that vertices only have a single literal at the moment */
+
+ DEBUG_PRINTF("rehoming ^.{%u,%u}%s\n", sai.min_bound, sai.max_bound,
+ dumpString(sai.literal).c_str());
+
+ /* Get a floating literal corresponding to the anchored literal */
+ u32 new_literal_id = tbi.getLiteralId(sai.literal, 0, ROSE_FLOATING);
+ rose_literal_info &new_lit_info = tbi.literal_info[new_literal_id];
+ DEBUG_PRINTF("floating literal id -> %u\n", new_literal_id);
+
+ for (u32 lit_id : lit_ids) {
+ rose_literal_info &old_lit_info = tbi.literal_info[lit_id];
+ assert(old_lit_info.delayed_ids.empty());
+
+ for (auto v : old_lit_info.vertices) {
+ /* Transfer vertex over to new literal id */
+ assert(tbi.g[v].literals.size() == 1);
+ tbi.g[v].literals.clear();
+ tbi.g[v].literals.insert(new_literal_id);
+ new_lit_info.vertices.insert(v);
+
+ /* ensure bounds on the vertex's in-edge are correct */
+ assert(in_degree(v, tbi.g) == 1);
+ const RoseEdge &e = *in_edges(v, tbi.g).first;
+ assert(tbi.g[e].minBound == sai.min_bound + sai.literal.length());
+ assert(tbi.g[e].maxBound == sai.max_bound + sai.literal.length());
+ tbi.g[e].minBound = sai.min_bound;
+ tbi.g[e].maxBound = sai.max_bound;
+ }
+
+ /* mark the old literal as empty */
+ old_lit_info.vertices.clear();
+ }
+}
+
+static
+void rehomeAnchoredLiterals(RoseBuildImpl &tbi) {
+ /* if we have many literals in the floating table, we want to push
+ * literals which are anchored but deep into the floating table as they
+ * are unlikely to reduce the performance of the floating table. */
+ u32 total_count;
+ u32 short_count;
+ countFloatingLiterals(tbi, &total_count, &short_count);
+
+ DEBUG_PRINTF("considering rehoming options\n");
+
+ if (total_count < ANCHORED_REHOME_MIN_FLOATING
+ && short_count < ANCHORED_REHOME_MIN_FLOATING_SHORT) {
+ DEBUG_PRINTF("not a heavy case %u %u\n", total_count, short_count);
+ return;
+ }
+
+ u32 min_rehome_len = ANCHORED_REHOME_SHORT_LEN + 1;
+ if (short_count >= ANCHORED_REHOME_ALLOW_SHORT) {
+ min_rehome_len--;
+ }
+
+ for (map<simple_anchored_info, set<u32> >::iterator it
+ = tbi.anchored_simple.begin();
+ it != tbi.anchored_simple.end();) {
+ if (it->first.max_bound < ANCHORED_REHOME_DEEP
+ || it->first.literal.length() < min_rehome_len) {
+ ++it;
+ continue;
+ }
+
+ rehomeAnchoredLiteral(tbi, it->first, it->second);
+ tbi.anchored_simple.erase(it++);
+ }
+}
+
+/** \brief Maximum number of single-byte literals to add to the small block
+ * table. */
+static const size_t MAX_1BYTE_SMALL_BLOCK_LITERALS = 20;
+
+static
+void addSmallBlockLiteral(RoseBuildImpl &tbi, const simple_anchored_info &sai,
+ const set<u32> &lit_ids) {
+ DEBUG_PRINTF("anchored ^.{%u,%u}%s\n", sai.min_bound, sai.max_bound,
+ dumpString(sai.literal).c_str());
+
+ u32 lit_id = tbi.getLiteralId(sai.literal, 0, ROSE_ANCHORED_SMALL_BLOCK);
+ rose_literal_info &lit_info = tbi.literal_info[lit_id];
+ DEBUG_PRINTF("anchored small block literal id -> %u\n", lit_id);
+
+ RoseGraph &g = tbi.g;
+ const RoseVertex anchored_root = tbi.anchored_root;
+
+ for (u32 old_id : lit_ids) {
+ assert(old_id < tbi.literal_info.size());
+ const rose_literal_info &li = tbi.literal_info[old_id];
+
for (auto lit_v : li.vertices) {
- // Clone vertex with the new literal ID.
- RoseVertex v = add_vertex(g[lit_v], g);
- g[v].literals.clear();
- g[v].literals.insert(lit_id);
- g[v].min_offset = sai.min_bound + sai.literal.length();
- g[v].max_offset = sai.max_bound + sai.literal.length();
- lit_info.vertices.insert(v);
-
+ // Clone vertex with the new literal ID.
+ RoseVertex v = add_vertex(g[lit_v], g);
+ g[v].literals.clear();
+ g[v].literals.insert(lit_id);
+ g[v].min_offset = sai.min_bound + sai.literal.length();
+ g[v].max_offset = sai.max_bound + sai.literal.length();
+ lit_info.vertices.insert(v);
+
RoseEdge e = add_edge(anchored_root, v, g);
g[e].minBound = sai.min_bound;
g[e].maxBound = sai.max_bound;
- }
- }
-}
-
-static
-void addSmallBlockLiteral(RoseBuildImpl &tbi, const ue2_literal &lit,
- const flat_set<ReportID> &reports) {
- DEBUG_PRINTF("lit %s, reports: %s\n", dumpString(lit).c_str(),
- as_string_list(reports).c_str());
- assert(!reports.empty());
-
- u32 lit_id = tbi.getLiteralId(lit, 0, ROSE_ANCHORED_SMALL_BLOCK);
- assert(lit_id < tbi.literal_info.size());
- rose_literal_info &lit_info = tbi.literal_info[lit_id];
-
- RoseGraph &g = tbi.g;
-
- RoseVertex v = add_vertex(g);
- g[v].literals.insert(lit_id);
- g[v].reports = reports;
-
+ }
+ }
+}
+
+static
+void addSmallBlockLiteral(RoseBuildImpl &tbi, const ue2_literal &lit,
+ const flat_set<ReportID> &reports) {
+ DEBUG_PRINTF("lit %s, reports: %s\n", dumpString(lit).c_str(),
+ as_string_list(reports).c_str());
+ assert(!reports.empty());
+
+ u32 lit_id = tbi.getLiteralId(lit, 0, ROSE_ANCHORED_SMALL_BLOCK);
+ assert(lit_id < tbi.literal_info.size());
+ rose_literal_info &lit_info = tbi.literal_info[lit_id];
+
+ RoseGraph &g = tbi.g;
+
+ RoseVertex v = add_vertex(g);
+ g[v].literals.insert(lit_id);
+ g[v].reports = reports;
+
RoseEdge e = add_edge(tbi.root, v, g);
- g[e].minBound = 0;
- g[e].maxBound = ROSE_BOUND_INF;
- g[v].min_offset = 1;
- g[v].max_offset = ROSE_BOUND_INF;
- lit_info.vertices.insert(v);
-}
-
-static
-bool stateIsSEPLiteral(const dstate_id_t &s, const symbol_t &sym,
- const raw_dfa &rdfa) {
- const dstate &ds = rdfa.states[s];
- if (!ds.reports_eod.empty() || ds.reports.empty()) {
- DEBUG_PRINTF("badly formed reports\n");
- return false;
- }
-
- DEBUG_PRINTF("examine state %u reached by sym %u\n", s, sym);
-
- for (symbol_t i = 0; i < rdfa.getImplAlphaSize(); i++) {
- const auto &s_next = ds.next[i];
- DEBUG_PRINTF("state %u -> %u on sym %u\n", s, s_next, i);
- if (s_next == DEAD_STATE) {
- continue; // dead, probably pruned
- } else if (s_next == s && i == sym) {
- continue; // self loop on same symbol
- } else if (s_next == rdfa.start_floating) {
- continue; // return to floating start
- }
-
- // We don't handle any other transitions.
- DEBUG_PRINTF("not single-byte\n");
- return false;
- }
-
- return true;
-}
-
-static
-bool extractSEPLiterals(const raw_dfa &rdfa,
- map<ue2_literal, flat_set<ReportID>> &lits_out) {
- if (rdfa.start_floating == DEAD_STATE) {
- DEBUG_PRINTF("not floating?\n");
- return false;
- }
- if (rdfa.start_anchored != rdfa.start_floating) {
- DEBUG_PRINTF("not all floating?\n");
- return false;
- }
-
- map<flat_set<ReportID>, vector<u32>> lits; // reports -> symbols
-
- const dstate &start = rdfa.states[rdfa.start_floating];
-
- const symbol_t alpha_size = rdfa.getImplAlphaSize();
- for (symbol_t i = 0; i < alpha_size; i++) {
- auto next = start.next[i];
- if (next == DEAD_STATE || next == rdfa.start_floating) {
- continue;
- }
-
- if (!stateIsSEPLiteral(next, i, rdfa)) {
- return false;
- }
- lits[rdfa.states[next].reports].push_back(i);
- }
-
- // Map from symbols back to character reachability.
- vector<CharReach> reach(alpha_size);
- for (u32 i = 0; i < N_CHARS; i++) {
- assert(rdfa.alpha_remap[i] < alpha_size);
- reach[rdfa.alpha_remap[i]].set(i);
- }
-
- for (const auto &m : lits) {
- const auto &reports = m.first;
- const auto &symbols = m.second;
-
- CharReach cr;
- for (const auto &sym : symbols) {
- cr |= reach[sym];
- }
-
- for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
- if (myisupper(i) && cr.test(mytolower(i))) {
- // ignore upper half of a nocase pair
- continue;
- }
-
- bool nocase = myislower(i) && cr.test(mytoupper(i));
- insert(&lits_out[ue2_literal((char)i, nocase)], reports);
- }
- }
-
- return true;
-}
-
-static
-bool extractSEPLiterals(const OutfixInfo &outfix, const ReportManager &rm,
- map<ue2_literal, flat_set<ReportID>> &lits_out) {
- if (outfix.minWidth != depth(1) || outfix.maxWidth != depth(1)) {
- DEBUG_PRINTF("outfix must be fixed width of one\n");
- return false;
- }
-
- for (const auto &report_id : all_reports(outfix)) {
- const auto &report = rm.getReport(report_id);
- if (!isSimpleExhaustible(report)) {
- DEBUG_PRINTF("report id %u not simple exhaustible\n", report_id);
- return false;
- }
- }
-
- // SEP cases should always become DFAs, so that's the only extract code we
- // have implemented here.
-
+ g[e].minBound = 0;
+ g[e].maxBound = ROSE_BOUND_INF;
+ g[v].min_offset = 1;
+ g[v].max_offset = ROSE_BOUND_INF;
+ lit_info.vertices.insert(v);
+}
+
+static
+bool stateIsSEPLiteral(const dstate_id_t &s, const symbol_t &sym,
+ const raw_dfa &rdfa) {
+ const dstate &ds = rdfa.states[s];
+ if (!ds.reports_eod.empty() || ds.reports.empty()) {
+ DEBUG_PRINTF("badly formed reports\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("examine state %u reached by sym %u\n", s, sym);
+
+ for (symbol_t i = 0; i < rdfa.getImplAlphaSize(); i++) {
+ const auto &s_next = ds.next[i];
+ DEBUG_PRINTF("state %u -> %u on sym %u\n", s, s_next, i);
+ if (s_next == DEAD_STATE) {
+ continue; // dead, probably pruned
+ } else if (s_next == s && i == sym) {
+ continue; // self loop on same symbol
+ } else if (s_next == rdfa.start_floating) {
+ continue; // return to floating start
+ }
+
+ // We don't handle any other transitions.
+ DEBUG_PRINTF("not single-byte\n");
+ return false;
+ }
+
+ return true;
+}
+
+static
+bool extractSEPLiterals(const raw_dfa &rdfa,
+ map<ue2_literal, flat_set<ReportID>> &lits_out) {
+ if (rdfa.start_floating == DEAD_STATE) {
+ DEBUG_PRINTF("not floating?\n");
+ return false;
+ }
+ if (rdfa.start_anchored != rdfa.start_floating) {
+ DEBUG_PRINTF("not all floating?\n");
+ return false;
+ }
+
+ map<flat_set<ReportID>, vector<u32>> lits; // reports -> symbols
+
+ const dstate &start = rdfa.states[rdfa.start_floating];
+
+ const symbol_t alpha_size = rdfa.getImplAlphaSize();
+ for (symbol_t i = 0; i < alpha_size; i++) {
+ auto next = start.next[i];
+ if (next == DEAD_STATE || next == rdfa.start_floating) {
+ continue;
+ }
+
+ if (!stateIsSEPLiteral(next, i, rdfa)) {
+ return false;
+ }
+ lits[rdfa.states[next].reports].push_back(i);
+ }
+
+ // Map from symbols back to character reachability.
+ vector<CharReach> reach(alpha_size);
+ for (u32 i = 0; i < N_CHARS; i++) {
+ assert(rdfa.alpha_remap[i] < alpha_size);
+ reach[rdfa.alpha_remap[i]].set(i);
+ }
+
+ for (const auto &m : lits) {
+ const auto &reports = m.first;
+ const auto &symbols = m.second;
+
+ CharReach cr;
+ for (const auto &sym : symbols) {
+ cr |= reach[sym];
+ }
+
+ for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
+ if (myisupper(i) && cr.test(mytolower(i))) {
+ // ignore upper half of a nocase pair
+ continue;
+ }
+
+ bool nocase = myislower(i) && cr.test(mytoupper(i));
+ insert(&lits_out[ue2_literal((char)i, nocase)], reports);
+ }
+ }
+
+ return true;
+}
+
+static
+bool extractSEPLiterals(const OutfixInfo &outfix, const ReportManager &rm,
+ map<ue2_literal, flat_set<ReportID>> &lits_out) {
+ if (outfix.minWidth != depth(1) || outfix.maxWidth != depth(1)) {
+ DEBUG_PRINTF("outfix must be fixed width of one\n");
+ return false;
+ }
+
+ for (const auto &report_id : all_reports(outfix)) {
+ const auto &report = rm.getReport(report_id);
+ if (!isSimpleExhaustible(report)) {
+ DEBUG_PRINTF("report id %u not simple exhaustible\n", report_id);
+ return false;
+ }
+ }
+
+ // SEP cases should always become DFAs, so that's the only extract code we
+ // have implemented here.
+
if (outfix.rdfa()) {
return extractSEPLiterals(*outfix.rdfa(), lits_out);
- }
-
- DEBUG_PRINTF("cannot extract literals from outfix type\n");
- return false;
-}
-
-static
-void addAnchoredSmallBlockLiterals(RoseBuildImpl &tbi) {
- if (tbi.cc.streaming) {
- DEBUG_PRINTF("not block mode\n");
- return;
- }
- if (!tbi.anchored_nfas.empty()) {
- DEBUG_PRINTF("anchored table is not purely literal\n");
- return;
- }
-
- // At the moment, we only use the small-block matcher if all our anchored
- // literals are direct reports (i.e. leaf nodes in the Rose graph).
- for (const set<u32> &lits : tbi.anchored_simple | map_values) {
- for (u32 lit_id : lits) {
- if (!tbi.isDirectReport(lit_id)) {
- DEBUG_PRINTF("not all anchored lits are direct reports\n");
- return;
- }
- }
- }
-
- vector<pair<simple_anchored_info, set<u32> > > anchored_lits;
- vector<OutfixInfo *> sep_outfixes;
- size_t oneByteLiterals = 0;
-
- for (const auto &e : tbi.anchored_simple) {
- const simple_anchored_info &sai = e.first;
- const set<u32> &lit_ids = e.second;
-
- if (sai.literal.length() + sai.min_bound > ROSE_SMALL_BLOCK_LEN) {
- DEBUG_PRINTF("skipping literal '%s' with min bound %u that cannot "
- "match inside small block width\n",
- dumpString(sai.literal).c_str(), sai.min_bound);
- }
-
- anchored_lits.push_back(make_pair(sai, lit_ids));
- if (sai.literal.length() == 1) {
- oneByteLiterals++;
- }
- }
-
- // Capture SEP outfixes as well, adding them as literals to the small block
- // table.
- map<ue2_literal, flat_set<ReportID>> sep_literals;
- for (OutfixInfo &oi : tbi.outfixes) {
- if (extractSEPLiterals(oi, tbi.rm, sep_literals)) {
- sep_outfixes.push_back(&oi);
- }
- }
-
- oneByteLiterals += sep_literals.size();
- DEBUG_PRINTF("%zu one-byte literals\n", oneByteLiterals);
- if (oneByteLiterals > MAX_1BYTE_SMALL_BLOCK_LITERALS) {
- DEBUG_PRINTF("too many one-byte literals, not building small block "
- "table!\n");
- return;
- }
-
- for (const auto &e : tbi.anchored_simple) {
- const simple_anchored_info &sai = e.first;
- const set<u32> &lit_ids = e.second;
-
- addSmallBlockLiteral(tbi, sai, lit_ids);
- }
-
- for (const auto &m : sep_literals) {
- addSmallBlockLiteral(tbi, m.first, m.second);
- }
-
- for (OutfixInfo *oi : sep_outfixes) {
- assert(oi);
- oi->in_sbmatcher = true;
- }
-}
-
-#ifndef NDEBUG
-static
-bool historiesAreValid(const RoseGraph &g) {
- for (const auto &e : edges_range(g)) {
- if (g[e].history == ROSE_ROLE_HISTORY_INVALID) {
- DEBUG_PRINTF("edge [%zu,%zu] has invalid history\n",
+ }
+
+ DEBUG_PRINTF("cannot extract literals from outfix type\n");
+ return false;
+}
+
+static
+void addAnchoredSmallBlockLiterals(RoseBuildImpl &tbi) {
+ if (tbi.cc.streaming) {
+ DEBUG_PRINTF("not block mode\n");
+ return;
+ }
+ if (!tbi.anchored_nfas.empty()) {
+ DEBUG_PRINTF("anchored table is not purely literal\n");
+ return;
+ }
+
+ // At the moment, we only use the small-block matcher if all our anchored
+ // literals are direct reports (i.e. leaf nodes in the Rose graph).
+ for (const set<u32> &lits : tbi.anchored_simple | map_values) {
+ for (u32 lit_id : lits) {
+ if (!tbi.isDirectReport(lit_id)) {
+ DEBUG_PRINTF("not all anchored lits are direct reports\n");
+ return;
+ }
+ }
+ }
+
+ vector<pair<simple_anchored_info, set<u32> > > anchored_lits;
+ vector<OutfixInfo *> sep_outfixes;
+ size_t oneByteLiterals = 0;
+
+ for (const auto &e : tbi.anchored_simple) {
+ const simple_anchored_info &sai = e.first;
+ const set<u32> &lit_ids = e.second;
+
+ if (sai.literal.length() + sai.min_bound > ROSE_SMALL_BLOCK_LEN) {
+ DEBUG_PRINTF("skipping literal '%s' with min bound %u that cannot "
+ "match inside small block width\n",
+ dumpString(sai.literal).c_str(), sai.min_bound);
+ }
+
+ anchored_lits.push_back(make_pair(sai, lit_ids));
+ if (sai.literal.length() == 1) {
+ oneByteLiterals++;
+ }
+ }
+
+ // Capture SEP outfixes as well, adding them as literals to the small block
+ // table.
+ map<ue2_literal, flat_set<ReportID>> sep_literals;
+ for (OutfixInfo &oi : tbi.outfixes) {
+ if (extractSEPLiterals(oi, tbi.rm, sep_literals)) {
+ sep_outfixes.push_back(&oi);
+ }
+ }
+
+ oneByteLiterals += sep_literals.size();
+ DEBUG_PRINTF("%zu one-byte literals\n", oneByteLiterals);
+ if (oneByteLiterals > MAX_1BYTE_SMALL_BLOCK_LITERALS) {
+ DEBUG_PRINTF("too many one-byte literals, not building small block "
+ "table!\n");
+ return;
+ }
+
+ for (const auto &e : tbi.anchored_simple) {
+ const simple_anchored_info &sai = e.first;
+ const set<u32> &lit_ids = e.second;
+
+ addSmallBlockLiteral(tbi, sai, lit_ids);
+ }
+
+ for (const auto &m : sep_literals) {
+ addSmallBlockLiteral(tbi, m.first, m.second);
+ }
+
+ for (OutfixInfo *oi : sep_outfixes) {
+ assert(oi);
+ oi->in_sbmatcher = true;
+ }
+}
+
+#ifndef NDEBUG
+static
+bool historiesAreValid(const RoseGraph &g) {
+ for (const auto &e : edges_range(g)) {
+ if (g[e].history == ROSE_ROLE_HISTORY_INVALID) {
+ DEBUG_PRINTF("edge [%zu,%zu] has invalid history\n",
g[source(e, g)].index, g[target(e, g)].index);
- return false;
- }
- }
-
- return true;
-}
-
-/**
- * Assertion: Returns true if we have a reference hanging around to a vertex
- * that no longer exists in the graph.
- */
-static
-bool danglingVertexRef(RoseBuildImpl &tbi) {
- RoseGraph::vertex_iterator vi, ve;
- tie(vi, ve) = vertices(tbi.g);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Assertion: Returns true if we have a reference hanging around to a vertex
+ * that no longer exists in the graph.
+ */
+static
+bool danglingVertexRef(RoseBuildImpl &tbi) {
+ RoseGraph::vertex_iterator vi, ve;
+ tie(vi, ve) = vertices(tbi.g);
const unordered_set<RoseVertex> valid_vertices(vi, ve);
-
- if (!contains(valid_vertices, tbi.anchored_root)) {
+
+ if (!contains(valid_vertices, tbi.anchored_root)) {
DEBUG_PRINTF("anchored root vertex %zu not in graph\n",
tbi.g[tbi.anchored_root].index);
- return true;
- }
-
- for (const auto &e : tbi.ghost) {
- if (!contains(valid_vertices, e.first)) {
+ return true;
+ }
+
+ for (const auto &e : tbi.ghost) {
+ if (!contains(valid_vertices, e.first)) {
DEBUG_PRINTF("ghost key vertex %zu not in graph\n",
tbi.g[e.first].index);
- return true;
- }
- if (!contains(valid_vertices, e.second)) {
+ return true;
+ }
+ if (!contains(valid_vertices, e.second)) {
DEBUG_PRINTF("ghost value vertex %zu not in graph\n",
tbi.g[e.second].index);
- return true;
- }
- }
-
- return false;
-}
-
-static
-bool roleOffsetsAreValid(const RoseGraph &g) {
- for (auto v : vertices_range(g)) {
- if (g[v].min_offset >= ROSE_BOUND_INF) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static
+bool roleOffsetsAreValid(const RoseGraph &g) {
+ for (auto v : vertices_range(g)) {
+ if (g[v].min_offset >= ROSE_BOUND_INF) {
DEBUG_PRINTF("invalid min_offset for role %zu\n", g[v].index);
- return false;
- }
- if (g[v].min_offset > g[v].max_offset) {
+ return false;
+ }
+ if (g[v].min_offset > g[v].max_offset) {
DEBUG_PRINTF("min_offset > max_offset for %zu\n", g[v].index);
- return false;
- }
- }
- return true;
-}
-#endif // NDEBUG
-
+ return false;
+ }
+ }
+ return true;
+}
+#endif // NDEBUG
+
bytecode_ptr<RoseEngine> RoseBuildImpl::buildRose(u32 minWidth) {
dumpRoseGraph(*this, "rose_early.dot");
-
- // Early check for Rose implementability.
- assert(canImplementGraphs(*this));
-
- // Sanity check vertex role offsets.
- assert(roleOffsetsAreValid(g));
-
- convertPrefixToBounds(*this);
-
- // Turn flood-prone suffixes into suffix NFAs.
- convertFloodProneSuffixes(*this);
-
- // Turn repeats into Castle prototypes.
- makeCastles(*this);
-
- rehomeAnchoredLiterals(*this);
-
- // If we've got a very small number of EOD-anchored literals, consider
- // moving them into the floating table so that we only have one literal
+
+ // Early check for Rose implementability.
+ assert(canImplementGraphs(*this));
+
+ // Sanity check vertex role offsets.
+ assert(roleOffsetsAreValid(g));
+
+ convertPrefixToBounds(*this);
+
+ // Turn flood-prone suffixes into suffix NFAs.
+ convertFloodProneSuffixes(*this);
+
+ // Turn repeats into Castle prototypes.
+ makeCastles(*this);
+
+ rehomeAnchoredLiterals(*this);
+
+ // If we've got a very small number of EOD-anchored literals, consider
+ // moving them into the floating table so that we only have one literal
// matcher to run. Note that this needs to happen before
// addAnchoredSmallBlockLiterals as it may create anchored literals.
- assert(roleOffsetsAreValid(g));
- stealEodVertices(*this);
-
- addAnchoredSmallBlockLiterals(*this);
-
- // Merge duplicate leaf nodes
- dedupeSuffixes(*this);
- if (cc.grey.roseGraphReduction) {
- mergeDupeLeaves(*this);
- uncalcLeaves(*this);
- }
-
- assert(roleOffsetsAreValid(g));
- handleMixedSensitivity();
-
- assignHistories(*this);
-
- convertAnchPrefixToBounds(*this);
-
- // Do some final graph reduction.
- dedupeLeftfixes(*this);
- aliasRoles(*this, false); // Don't merge leftfixes.
- dedupeLeftfixes(*this);
- uncalcLeaves(*this);
-
- /* note the leftfixes which do not need to keep state across stream
- boundaries */
- findTransientLeftfixes();
-
- dedupeLeftfixesVariableLag(*this);
- mergeLeftfixesVariableLag(*this);
- mergeSmallLeftfixes(*this);
- mergeCastleLeftfixes(*this);
-
- // Do a rose-merging aliasing pass.
- aliasRoles(*this, true);
-
- // Merging of suffixes _below_ role aliasing, as otherwise we'd have to
- // teach role aliasing about suffix tops.
- mergeCastleSuffixes(*this);
- mergePuffixes(*this);
- mergeAcyclicSuffixes(*this);
- mergeSmallSuffixes(*this);
-
- // Convert Castles that would be better off as NFAs back to NGHolder
- // infixes/suffixes.
- if (unmakeCastles(*this)) {
- // We may be able to save some stream state by merging the newly
- // "unmade" Castles.
- mergeSmallSuffixes(*this);
- mergeSmallLeftfixes(*this);
- }
-
+ assert(roleOffsetsAreValid(g));
+ stealEodVertices(*this);
+
+ addAnchoredSmallBlockLiterals(*this);
+
+ // Merge duplicate leaf nodes
+ dedupeSuffixes(*this);
+ if (cc.grey.roseGraphReduction) {
+ mergeDupeLeaves(*this);
+ uncalcLeaves(*this);
+ }
+
+ assert(roleOffsetsAreValid(g));
+ handleMixedSensitivity();
+
+ assignHistories(*this);
+
+ convertAnchPrefixToBounds(*this);
+
+ // Do some final graph reduction.
+ dedupeLeftfixes(*this);
+ aliasRoles(*this, false); // Don't merge leftfixes.
+ dedupeLeftfixes(*this);
+ uncalcLeaves(*this);
+
+ /* note the leftfixes which do not need to keep state across stream
+ boundaries */
+ findTransientLeftfixes();
+
+ dedupeLeftfixesVariableLag(*this);
+ mergeLeftfixesVariableLag(*this);
+ mergeSmallLeftfixes(*this);
+ mergeCastleLeftfixes(*this);
+
+ // Do a rose-merging aliasing pass.
+ aliasRoles(*this, true);
+
+ // Merging of suffixes _below_ role aliasing, as otherwise we'd have to
+ // teach role aliasing about suffix tops.
+ mergeCastleSuffixes(*this);
+ mergePuffixes(*this);
+ mergeAcyclicSuffixes(*this);
+ mergeSmallSuffixes(*this);
+
+ // Convert Castles that would be better off as NFAs back to NGHolder
+ // infixes/suffixes.
+ if (unmakeCastles(*this)) {
+ // We may be able to save some stream state by merging the newly
+ // "unmade" Castles.
+ mergeSmallSuffixes(*this);
+ mergeSmallLeftfixes(*this);
+ }
+
assert(!hasOrphanedTops(*this));
- // Do a rose-merging aliasing pass.
- aliasRoles(*this, true);
+ // Do a rose-merging aliasing pass.
+ aliasRoles(*this, true);
assert(!hasOrphanedTops(*this));
-
- // Run a merge pass over the outfixes as well.
- mergeOutfixes(*this);
-
- assert(!danglingVertexRef(*this));
+
+ // Run a merge pass over the outfixes as well.
+ mergeOutfixes(*this);
+
+ assert(!danglingVertexRef(*this));
assert(!hasOrphanedTops(*this));
-
+
findMoreLiteralMasks(*this);
assignGroupsToLiterals(*this);
assignGroupsToRoles(*this);
- findGroupSquashers(*this);
-
- /* final prep work */
- remapCastleTops(*this);
+ findGroupSquashers(*this);
+
+ /* final prep work */
+ remapCastleTops(*this);
optimiseRoseTops(*this);
- buildRoseSquashMasks(*this);
-
- rm.assignDkeys(this);
-
- /* transfer mpv outfix to main queue */
- if (mpv_outfix) {
- outfixes.push_back(move(*mpv_outfix));
- mpv_outfix = nullptr;
- }
-
- assert(canImplementGraphs(*this));
- assert(!hasOrphanedTops(*this));
- assert(roleOffsetsAreValid(g));
- assert(historiesAreValid(g));
-
+ buildRoseSquashMasks(*this);
+
+ rm.assignDkeys(this);
+
+ /* transfer mpv outfix to main queue */
+ if (mpv_outfix) {
+ outfixes.push_back(move(*mpv_outfix));
+ mpv_outfix = nullptr;
+ }
+
+ assert(canImplementGraphs(*this));
+ assert(!hasOrphanedTops(*this));
+ assert(roleOffsetsAreValid(g));
+ assert(historiesAreValid(g));
+
dumpRoseGraph(*this, "rose_pre_norm.dot");
-
- return buildFinalEngine(minWidth);
-}
-
-} // namespace ue2
+
+ return buildFinalEngine(minWidth);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_convert.cpp b/contrib/libs/hyperscan/src/rose/rose_build_convert.cpp
index f13fac04e1..33351099f7 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_convert.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_convert.cpp
@@ -1,364 +1,364 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_convert.h"
-
-#include "grey.h"
-#include "rose_build.h"
-#include "rose_build_impl.h"
-#include "rose_build_util.h"
-#include "ue2common.h"
-#include "hwlm/hwlm_build.h"
-#include "nfa/castlecompile.h"
-#include "nfa/limex_limits.h"
-#include "nfagraph/ng_dump.h"
-#include "nfagraph/ng_limex.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_reports.h"
-#include "nfagraph/ng_split.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_width.h"
-#include "util/bitutils.h"
-#include "util/charreach.h"
-#include "util/charreach_util.h"
-#include "util/compile_context.h"
-#include "util/depth.h"
-#include "util/graph_range.h"
-#include "util/make_unique.h"
-#include "util/order_check.h"
-#include "util/ue2string.h"
-
-#include <algorithm>
-#include <map>
-#include <queue>
-#include <set>
-#include <string>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_convert.h"
+
+#include "grey.h"
+#include "rose_build.h"
+#include "rose_build_impl.h"
+#include "rose_build_util.h"
+#include "ue2common.h"
+#include "hwlm/hwlm_build.h"
+#include "nfa/castlecompile.h"
+#include "nfa/limex_limits.h"
+#include "nfagraph/ng_dump.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_reports.h"
+#include "nfagraph/ng_split.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
+#include "util/bitutils.h"
+#include "util/charreach.h"
+#include "util/charreach_util.h"
+#include "util/compile_context.h"
+#include "util/depth.h"
+#include "util/graph_range.h"
+#include "util/make_unique.h"
+#include "util/order_check.h"
+#include "util/ue2string.h"
+
+#include <algorithm>
+#include <map>
+#include <queue>
+#include <set>
+#include <string>
#include <unordered_map>
#include <utility>
-#include <vector>
-
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_values;
-
-namespace ue2 {
-
-static
-NFAVertex addHolderVertex(const CharReach &cr, NGHolder &out) {
- assert(cr.any());
- NFAVertex v = add_vertex(out);
- out[v].char_reach = cr;
- return v;
-}
-
-static
-size_t suffixFloodLen(const ue2_literal &s) {
- if (s.empty()) {
- return 0;
- }
-
- const ue2_literal::elem &c = s.back();
- auto it = find_if(s.rbegin(), s.rend(),
+#include <vector>
+
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_values;
+
+namespace ue2 {
+
+static
+NFAVertex addHolderVertex(const CharReach &cr, NGHolder &out) {
+ assert(cr.any());
+ NFAVertex v = add_vertex(out);
+ out[v].char_reach = cr;
+ return v;
+}
+
+static
+size_t suffixFloodLen(const ue2_literal &s) {
+ if (s.empty()) {
+ return 0;
+ }
+
+ const ue2_literal::elem &c = s.back();
+ auto it = find_if(s.rbegin(), s.rend(),
[&c](const ue2_literal::elem &e) { return e != c; });
- return distance(s.rbegin(), it);
-}
-
-static
-unique_ptr<NGHolder> makeFloodProneSuffix(const ue2_literal &s, size_t len,
- const flat_set<ReportID> &reports) {
- assert(len < s.length());
- assert(!reports.empty());
-
- unique_ptr<NGHolder> h = ue2::make_unique<NGHolder>(NFA_SUFFIX);
-
- NFAVertex u = h->start;
- for (auto it = s.begin() + s.length() - len; it != s.end(); ++it) {
- NFAVertex v = addHolderVertex(*it, *h);
+ return distance(s.rbegin(), it);
+}
+
+static
+unique_ptr<NGHolder> makeFloodProneSuffix(const ue2_literal &s, size_t len,
+ const flat_set<ReportID> &reports) {
+ assert(len < s.length());
+ assert(!reports.empty());
+
+ unique_ptr<NGHolder> h = ue2::make_unique<NGHolder>(NFA_SUFFIX);
+
+ NFAVertex u = h->start;
+ for (auto it = s.begin() + s.length() - len; it != s.end(); ++it) {
+ NFAVertex v = addHolderVertex(*it, *h);
NFAEdge e = add_edge(u, v, *h);
if (u == h->start) {
(*h)[e].tops.insert(DEFAULT_TOP);
}
- u = v;
- }
-
- (*h)[u].reports.insert(reports.begin(), reports.end());
- add_edge(u, h->accept, *h);
- return h;
-}
-
-static
-unique_ptr<NGHolder> makeRosePrefix(const ue2_literal &s) {
- unique_ptr<NGHolder> h = ue2::make_unique<NGHolder>(NFA_PREFIX);
-
- NFAVertex u = h->startDs;
- for (const auto &c : s) {
- NFAVertex v = addHolderVertex(c, *h);
- add_edge(u, v, *h);
- u = v;
- }
- add_edge(u, h->accept, *h);
- return h;
-}
-
-static
-void replaceWithLitPrefix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id,
- const rose_literal_id &lit, size_t suffixlen,
- size_t delay) {
- assert(suffixlen < lit.s.length());
-
- DEBUG_PRINTF("replacing '%s' with prefix, length=%zu, delay=%zu\n",
- dumpString(lit.s).c_str(), lit.s.length() - suffixlen, delay);
-
- RoseGraph &g = tbi.g;
- ue2_literal new_lit = lit.s.substr(0, lit.s.length() - suffixlen);
- u32 new_id = tbi.getLiteralId(new_lit, delay, ROSE_FLOATING);
- rose_literal_info &old_info = tbi.literal_info.at(lit_id);
- old_info.vertices.erase(v);
- tbi.literal_info.at(new_id).vertices.insert(v);
- g[v].literals.clear();
- g[v].literals.insert(new_id);
-}
-
-static
-bool delayLiteralWithPrefix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id,
- const rose_literal_id &lit, size_t suffixlen) {
- if (suffixlen > MAX_DELAY) {
- DEBUG_PRINTF("delay too large\n");
- return false;
- }
-
- if (!tbi.isDirectReport(lit_id)) {
- DEBUG_PRINTF("literal is not direct report\n");
- return false;
- }
-
- if (tbi.cc.streaming &&
- lit.s.length() > tbi.cc.grey.maxHistoryAvailable + 1) {
- DEBUG_PRINTF("insufficient history to delay literal of len %zu\n",
- lit.s.length());
- return false;
- }
-
- shared_ptr<NGHolder> h = makeRosePrefix(lit.s);
- ReportID prefix_report = 0;
+ u = v;
+ }
+
+ (*h)[u].reports.insert(reports.begin(), reports.end());
+ add_edge(u, h->accept, *h);
+ return h;
+}
+
+static
+unique_ptr<NGHolder> makeRosePrefix(const ue2_literal &s) {
+ unique_ptr<NGHolder> h = ue2::make_unique<NGHolder>(NFA_PREFIX);
+
+ NFAVertex u = h->startDs;
+ for (const auto &c : s) {
+ NFAVertex v = addHolderVertex(c, *h);
+ add_edge(u, v, *h);
+ u = v;
+ }
+ add_edge(u, h->accept, *h);
+ return h;
+}
+
+static
+void replaceWithLitPrefix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id,
+ const rose_literal_id &lit, size_t suffixlen,
+ size_t delay) {
+ assert(suffixlen < lit.s.length());
+
+ DEBUG_PRINTF("replacing '%s' with prefix, length=%zu, delay=%zu\n",
+ dumpString(lit.s).c_str(), lit.s.length() - suffixlen, delay);
+
+ RoseGraph &g = tbi.g;
+ ue2_literal new_lit = lit.s.substr(0, lit.s.length() - suffixlen);
+ u32 new_id = tbi.getLiteralId(new_lit, delay, ROSE_FLOATING);
+ rose_literal_info &old_info = tbi.literal_info.at(lit_id);
+ old_info.vertices.erase(v);
+ tbi.literal_info.at(new_id).vertices.insert(v);
+ g[v].literals.clear();
+ g[v].literals.insert(new_id);
+}
+
+static
+bool delayLiteralWithPrefix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id,
+ const rose_literal_id &lit, size_t suffixlen) {
+ if (suffixlen > MAX_DELAY) {
+ DEBUG_PRINTF("delay too large\n");
+ return false;
+ }
+
+ if (!tbi.isDirectReport(lit_id)) {
+ DEBUG_PRINTF("literal is not direct report\n");
+ return false;
+ }
+
+ if (tbi.cc.streaming &&
+ lit.s.length() > tbi.cc.grey.maxHistoryAvailable + 1) {
+ DEBUG_PRINTF("insufficient history to delay literal of len %zu\n",
+ lit.s.length());
+ return false;
+ }
+
+ shared_ptr<NGHolder> h = makeRosePrefix(lit.s);
+ ReportID prefix_report = 0;
set_report(*h, prefix_report);
-
- if (!isImplementableNFA(*h, &tbi.rm, tbi.cc)) {
- DEBUG_PRINTF("prefix not implementable\n");
- return false;
- }
-
- RoseGraph &g = tbi.g;
- assert(!g[v].left);
- g[v].left.graph = h;
- g[v].left.lag = 0;
- g[v].left.leftfix_report = prefix_report;
-
- // Swap v's literal for a shorter one, delayed by suffix len.
- replaceWithLitPrefix(tbi, v, lit_id, lit, suffixlen, suffixlen);
-
- return true;
-}
-
-static
-void convertFloodProneSuffix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id,
- const rose_literal_id &lit, size_t suffixlen) {
- DEBUG_PRINTF("flood-prone leaf '%s'\n", dumpString(lit.s).c_str());
- DEBUG_PRINTF("turning last %zu chars into a suffix NFA\n", suffixlen);
- RoseGraph &g = tbi.g;
- assert(!g[v].eod_accept);
-
- // If we're a direct report literal, we may be able to convert this case
- // into a delayed literal with a (very boring) transient prefix that
- // handles our flood-prone suffix.
- if (delayLiteralWithPrefix(tbi, v, lit_id, lit, suffixlen)) {
- DEBUG_PRINTF("implemented as delayed literal with a rose prefix\n");
- return;
- }
-
- // General case: create a suffix that implements the flood-prone portion.
-
- // Create the NFA.
- auto h = makeFloodProneSuffix(lit.s, suffixlen, g[v].reports);
- if (!isImplementableNFA(*h, &tbi.rm, tbi.cc)) {
- DEBUG_PRINTF("not implementable\n");
- return;
- }
-
- // Apply the NFA.
- assert(!g[v].suffix);
- g[v].suffix.graph = move(h);
- g[v].reports.clear();
-
- // Swap v's literal for a shorter one.
- replaceWithLitPrefix(tbi, v, lit_id, lit, suffixlen, 0);
-
- // It's possible that min_offset might be an underestimate, so we
- // subtract min(min_offset, suffixlen) for safety.
- g[v].min_offset -= min((size_t)g[v].min_offset, suffixlen);
-
- if (g[v].max_offset < ROSE_BOUND_INF) {
- assert(g[v].max_offset >= suffixlen);
- g[v].max_offset -= suffixlen;
- }
-}
-
-/**
- * Collect an estimate of the number of literals in the floating table, and use
- * this to estimate the flood prone suffix length.
- */
-static
-size_t findFloodProneSuffixLen(const RoseBuildImpl &tbi) {
- size_t numLiterals = 0;
+
+ if (!isImplementableNFA(*h, &tbi.rm, tbi.cc)) {
+ DEBUG_PRINTF("prefix not implementable\n");
+ return false;
+ }
+
+ RoseGraph &g = tbi.g;
+ assert(!g[v].left);
+ g[v].left.graph = h;
+ g[v].left.lag = 0;
+ g[v].left.leftfix_report = prefix_report;
+
+ // Swap v's literal for a shorter one, delayed by suffix len.
+ replaceWithLitPrefix(tbi, v, lit_id, lit, suffixlen, suffixlen);
+
+ return true;
+}
+
+static
+void convertFloodProneSuffix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id,
+ const rose_literal_id &lit, size_t suffixlen) {
+ DEBUG_PRINTF("flood-prone leaf '%s'\n", dumpString(lit.s).c_str());
+ DEBUG_PRINTF("turning last %zu chars into a suffix NFA\n", suffixlen);
+ RoseGraph &g = tbi.g;
+ assert(!g[v].eod_accept);
+
+ // If we're a direct report literal, we may be able to convert this case
+ // into a delayed literal with a (very boring) transient prefix that
+ // handles our flood-prone suffix.
+ if (delayLiteralWithPrefix(tbi, v, lit_id, lit, suffixlen)) {
+ DEBUG_PRINTF("implemented as delayed literal with a rose prefix\n");
+ return;
+ }
+
+ // General case: create a suffix that implements the flood-prone portion.
+
+ // Create the NFA.
+ auto h = makeFloodProneSuffix(lit.s, suffixlen, g[v].reports);
+ if (!isImplementableNFA(*h, &tbi.rm, tbi.cc)) {
+ DEBUG_PRINTF("not implementable\n");
+ return;
+ }
+
+ // Apply the NFA.
+ assert(!g[v].suffix);
+ g[v].suffix.graph = move(h);
+ g[v].reports.clear();
+
+ // Swap v's literal for a shorter one.
+ replaceWithLitPrefix(tbi, v, lit_id, lit, suffixlen, 0);
+
+ // It's possible that min_offset might be an underestimate, so we
+ // subtract min(min_offset, suffixlen) for safety.
+ g[v].min_offset -= min((size_t)g[v].min_offset, suffixlen);
+
+ if (g[v].max_offset < ROSE_BOUND_INF) {
+ assert(g[v].max_offset >= suffixlen);
+ g[v].max_offset -= suffixlen;
+ }
+}
+
+/**
+ * Collect an estimate of the number of literals in the floating table, and use
+ * this to estimate the flood prone suffix length.
+ */
+static
+size_t findFloodProneSuffixLen(const RoseBuildImpl &tbi) {
+ size_t numLiterals = 0;
for (const rose_literal_id &lit : tbi.literals) {
- if (lit.delay) {
- continue; // delay ids are virtual-ish
- }
- if (lit.table != ROSE_FLOATING) {
- continue;
- }
-
- numLiterals++;
- }
-
- return hwlmFloodProneSuffixLen(numLiterals, tbi.cc);
-}
-
-/**
- * \brief Convert flood-prone literal suffixes into suffix NFAs.
- *
- * For any trailing string in Rose (string cannot lead to more Rose roles or
- * NFAs, etc) ending with a continuous run of a single character with more than
- * 3 copies of that single character,
- *
- * If the result of removing all but 2 copies of that character yields a string
- * that is greater than FLOOD_PRONE_LIT_MIN_LENGTH characters, remove those
- * final characters from the literal and move them into a suffix NFA.
- */
-void convertFloodProneSuffixes(RoseBuildImpl &tbi) {
- static const size_t FLOOD_PRONE_LIT_MIN_LENGTH = 5;
-
- if (!tbi.cc.grey.roseConvertFloodProneSuffixes) {
- return;
- }
-
- const size_t floodProneLen = findFloodProneSuffixLen(tbi);
- DEBUG_PRINTF("flood prone suffix len = %zu\n", floodProneLen);
-
- RoseGraph &g = tbi.g;
-
- for (auto v : vertices_range(g)) {
- if (!isLeafNode(v, g)) {
- continue;
- }
-
- if (g[v].reports.empty()) {
- continue;
- }
-
- // TODO: currently only boring vertices.
- if (!g[v].isBoring()) {
- continue;
- }
-
- // Currently only handles vertices with a single literal (should always
- // be the case this early in Rose construction).
- if (g[v].literals.size() != 1) {
- continue;
- }
-
- u32 lit_id = *g[v].literals.begin();
+ if (lit.delay) {
+ continue; // delay ids are virtual-ish
+ }
+ if (lit.table != ROSE_FLOATING) {
+ continue;
+ }
+
+ numLiterals++;
+ }
+
+ return hwlmFloodProneSuffixLen(numLiterals, tbi.cc);
+}
+
+/**
+ * \brief Convert flood-prone literal suffixes into suffix NFAs.
+ *
+ * For any trailing string in Rose (string cannot lead to more Rose roles or
+ * NFAs, etc) ending with a continuous run of a single character with more than
+ * 3 copies of that single character,
+ *
+ * If the result of removing all but 2 copies of that character yields a string
+ * that is greater than FLOOD_PRONE_LIT_MIN_LENGTH characters, remove those
+ * final characters from the literal and move them into a suffix NFA.
+ */
+void convertFloodProneSuffixes(RoseBuildImpl &tbi) {
+ static const size_t FLOOD_PRONE_LIT_MIN_LENGTH = 5;
+
+ if (!tbi.cc.grey.roseConvertFloodProneSuffixes) {
+ return;
+ }
+
+ const size_t floodProneLen = findFloodProneSuffixLen(tbi);
+ DEBUG_PRINTF("flood prone suffix len = %zu\n", floodProneLen);
+
+ RoseGraph &g = tbi.g;
+
+ for (auto v : vertices_range(g)) {
+ if (!isLeafNode(v, g)) {
+ continue;
+ }
+
+ if (g[v].reports.empty()) {
+ continue;
+ }
+
+ // TODO: currently only boring vertices.
+ if (!g[v].isBoring()) {
+ continue;
+ }
+
+ // Currently only handles vertices with a single literal (should always
+ // be the case this early in Rose construction).
+ if (g[v].literals.size() != 1) {
+ continue;
+ }
+
+ u32 lit_id = *g[v].literals.begin();
const rose_literal_id &lit = tbi.literals.at(lit_id);
-
- // anchored or delayed literals need thought.
- if (lit.table != ROSE_FLOATING || lit.delay) {
- continue;
- }
-
- // don't do this to literals with msk/cmp.
- if (!lit.msk.empty()) {
- continue;
- }
-
- // Can't safely do this operation to vertices with delayed
- // predecessors.
- if (tbi.hasDelayPred(v)) {
- DEBUG_PRINTF("delayed pred\n");
- continue;
- }
-
- if (lit.s.length() <= FLOOD_PRONE_LIT_MIN_LENGTH) {
- DEBUG_PRINTF("literal is short enough already\n");
- continue;
- }
-
- size_t floodLen = suffixFloodLen(lit.s);
- if (floodLen < floodProneLen) {
- DEBUG_PRINTF("literal not flood-prone\n");
- continue;
- }
-
- if (floodLen == lit.s.length()) {
- DEBUG_PRINTF("whole literal is a flood\n");
- // Removing the part of the flood from the end of the literal would
- // leave us with a shorter, but still flood-prone, prefix. Better
- // to leave it alone.
- continue;
- }
-
- size_t suffixLen = floodLen - (floodProneLen - 1);
- if (lit.s.length() - suffixLen < FLOOD_PRONE_LIT_MIN_LENGTH) {
- DEBUG_PRINTF("removing flood would leave literal too short\n");
- continue;
- }
-
- convertFloodProneSuffix(tbi, v, lit_id, lit, suffixLen);
- }
-}
-
-static
-CharReach getReachOfNormalVertex(const NGHolder &g) {
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
- return g[v].char_reach;
- }
- assert(0);
- return CharReach();
-}
-
+
+ // anchored or delayed literals need thought.
+ if (lit.table != ROSE_FLOATING || lit.delay) {
+ continue;
+ }
+
+ // don't do this to literals with msk/cmp.
+ if (!lit.msk.empty()) {
+ continue;
+ }
+
+ // Can't safely do this operation to vertices with delayed
+ // predecessors.
+ if (tbi.hasDelayPred(v)) {
+ DEBUG_PRINTF("delayed pred\n");
+ continue;
+ }
+
+ if (lit.s.length() <= FLOOD_PRONE_LIT_MIN_LENGTH) {
+ DEBUG_PRINTF("literal is short enough already\n");
+ continue;
+ }
+
+ size_t floodLen = suffixFloodLen(lit.s);
+ if (floodLen < floodProneLen) {
+ DEBUG_PRINTF("literal not flood-prone\n");
+ continue;
+ }
+
+ if (floodLen == lit.s.length()) {
+ DEBUG_PRINTF("whole literal is a flood\n");
+ // Removing the part of the flood from the end of the literal would
+ // leave us with a shorter, but still flood-prone, prefix. Better
+ // to leave it alone.
+ continue;
+ }
+
+ size_t suffixLen = floodLen - (floodProneLen - 1);
+ if (lit.s.length() - suffixLen < FLOOD_PRONE_LIT_MIN_LENGTH) {
+ DEBUG_PRINTF("removing flood would leave literal too short\n");
+ continue;
+ }
+
+ convertFloodProneSuffix(tbi, v, lit_id, lit, suffixLen);
+ }
+}
+
+static
+CharReach getReachOfNormalVertex(const NGHolder &g) {
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ return g[v].char_reach;
+ }
+ assert(0);
+ return CharReach();
+}
+
/**
* \brief Set the edge bounds and appropriate history on the given edge in the
* Rose graph.
*/
-static
+static
void setEdgeBounds(RoseGraph &g, const RoseEdge &e, u32 min_bound,
u32 max_bound) {
assert(min_bound <= max_bound);
@@ -375,444 +375,444 @@ void setEdgeBounds(RoseGraph &g, const RoseEdge &e, u32 min_bound,
}
static
-bool handleStartPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
- const RoseEdge &e_old, RoseVertex ar,
- vector<RoseEdge> *to_delete) {
- DEBUG_PRINTF("hi\n");
-
- /* check for prefix cliches connected to start (^.{N,M}) */
- if (!getReachOfNormalVertex(h).all()) {
- DEBUG_PRINTF(":(\n");
- return false;
- }
-
- PureRepeat repeat;
- if (!isPureRepeat(h, repeat)) {
- DEBUG_PRINTF(":(\n");
- return false;
- }
-
- assert(repeat.bounds.min.is_finite());
- assert(repeat.bounds.max.is_reachable());
- assert(repeat.bounds.min <= repeat.bounds.max);
-
- DEBUG_PRINTF("prefix is ^.{%s,%s}\n", repeat.bounds.min.str().c_str(),
- repeat.bounds.max.str().c_str());
-
- /* update bounds on edge */
-
- // Convert to Rose graph bounds, which are not (yet?) depth classes.
- u32 bound_min = repeat.bounds.min;
- u32 bound_max =
- repeat.bounds.max.is_finite() ? (u32)repeat.bounds.max : ROSE_BOUND_INF;
-
- if (source(e_old, g) == ar) {
- assert(g[e_old].minBound <= bound_min);
- assert(g[e_old].maxBound >= bound_max);
+bool handleStartPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
+ const RoseEdge &e_old, RoseVertex ar,
+ vector<RoseEdge> *to_delete) {
+ DEBUG_PRINTF("hi\n");
+
+ /* check for prefix cliches connected to start (^.{N,M}) */
+ if (!getReachOfNormalVertex(h).all()) {
+ DEBUG_PRINTF(":(\n");
+ return false;
+ }
+
+ PureRepeat repeat;
+ if (!isPureRepeat(h, repeat)) {
+ DEBUG_PRINTF(":(\n");
+ return false;
+ }
+
+ assert(repeat.bounds.min.is_finite());
+ assert(repeat.bounds.max.is_reachable());
+ assert(repeat.bounds.min <= repeat.bounds.max);
+
+ DEBUG_PRINTF("prefix is ^.{%s,%s}\n", repeat.bounds.min.str().c_str(),
+ repeat.bounds.max.str().c_str());
+
+ /* update bounds on edge */
+
+ // Convert to Rose graph bounds, which are not (yet?) depth classes.
+ u32 bound_min = repeat.bounds.min;
+ u32 bound_max =
+ repeat.bounds.max.is_finite() ? (u32)repeat.bounds.max : ROSE_BOUND_INF;
+
+ if (source(e_old, g) == ar) {
+ assert(g[e_old].minBound <= bound_min);
+ assert(g[e_old].maxBound >= bound_max);
setEdgeBounds(g, e_old, bound_min, bound_max);
- } else {
+ } else {
RoseEdge e_new = add_edge(ar, v, g);
setEdgeBounds(g, e_new, bound_min, bound_max);
- to_delete->push_back(e_old);
- }
-
- g[v].left.reset(); /* clear the prefix info */
- return true;
-}
-
-static
-bool handleStartDsPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
- const RoseEdge &e) {
- DEBUG_PRINTF("hi\n");
- /* check for prefix cliches connected to start-ds (.{N}, ^.{N,}) */
- u32 repeatCount = 0;
- NFAVertex hu = h.startDs;
-
+ to_delete->push_back(e_old);
+ }
+
+ g[v].left.reset(); /* clear the prefix info */
+ return true;
+}
+
+static
+bool handleStartDsPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
+ const RoseEdge &e) {
+ DEBUG_PRINTF("hi\n");
+ /* check for prefix cliches connected to start-ds (.{N}, ^.{N,}) */
+ u32 repeatCount = 0;
+ NFAVertex hu = h.startDs;
+
auto start_succ = succs<set<NFAVertex>>(h.start, h);
auto startds_succ = succs<set<NFAVertex>>(h.startDs, h);
-
- if (!is_subset_of(start_succ, startds_succ)) {
- DEBUG_PRINTF("not a simple chain\n");
- return false;
- }
-
- set<NFAVertex> seen;
- do {
- if (!h[hu].char_reach.all()) {
- return false;
- }
- NFAVertex hv = getSoleDestVertex(h, hu);
- if (!hv) {
- return false;
- }
- if (contains(seen, hv)) {
- assert(0);
- return false;
- }
- hu = hv;
- repeatCount++;
- if (hu == h.accept) {
- break;
- }
- } while(1);
-
- assert(hu == h.accept);
-
- repeatCount--; /* do not count accept as part of the chain */
-
- DEBUG_PRINTF("prefix is ^.{%u,}\n", repeatCount);
-
- /* update bounds on edge */
- assert(g[e].minBound <= repeatCount);
+
+ if (!is_subset_of(start_succ, startds_succ)) {
+ DEBUG_PRINTF("not a simple chain\n");
+ return false;
+ }
+
+ set<NFAVertex> seen;
+ do {
+ if (!h[hu].char_reach.all()) {
+ return false;
+ }
+ NFAVertex hv = getSoleDestVertex(h, hu);
+ if (!hv) {
+ return false;
+ }
+ if (contains(seen, hv)) {
+ assert(0);
+ return false;
+ }
+ hu = hv;
+ repeatCount++;
+ if (hu == h.accept) {
+ break;
+ }
+ } while(1);
+
+ assert(hu == h.accept);
+
+ repeatCount--; /* do not count accept as part of the chain */
+
+ DEBUG_PRINTF("prefix is ^.{%u,}\n", repeatCount);
+
+ /* update bounds on edge */
+ assert(g[e].minBound <= repeatCount);
setEdgeBounds(g, e, repeatCount, ROSE_BOUND_INF);
-
- g[v].left.reset(); /* clear the prefix info */
-
- return true;
-}
-
-static
-bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
- const RoseEdge &e_old, RoseVertex ar,
- vector<RoseEdge> *to_delete,
- const CompileContext &cc) {
- assert(in_degree(h.acceptEod, h) == 1);
-
- bool anchored = !proper_out_degree(h.startDs, h);
+
+ g[v].left.reset(); /* clear the prefix info */
+
+ return true;
+}
+
+static
+bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
+ const RoseEdge &e_old, RoseVertex ar,
+ vector<RoseEdge> *to_delete,
+ const CompileContext &cc) {
+ assert(in_degree(h.acceptEod, h) == 1);
+
+ bool anchored = !proper_out_degree(h.startDs, h);
NFAVertex key = NGHolder::null_vertex();
- NFAVertex base = anchored ? h.start : h.startDs;
-
- if (!anchored) {
+ NFAVertex base = anchored ? h.start : h.startDs;
+
+ if (!anchored) {
auto start_succ = succs<set<NFAVertex>>(h.start, h);
auto startds_succ = succs<set<NFAVertex>>(h.startDs, h);
-
- if (!is_subset_of(start_succ, startds_succ)) {
- DEBUG_PRINTF("not a simple chain\n");
- return false;
- }
- }
-
- for (auto w : adjacent_vertices_range(base, h)) {
+
+ if (!is_subset_of(start_succ, startds_succ)) {
+ DEBUG_PRINTF("not a simple chain\n");
+ return false;
+ }
+ }
+
+ for (auto w : adjacent_vertices_range(base, h)) {
DEBUG_PRINTF("checking %zu\n", h[w].index);
- if (!h[w].char_reach.all()) {
- continue;
- }
-
- if (!is_special(w, h)) {
- key = w;
- break;
- }
- }
-
- if (!key) {
- return false;
- }
-
- vector<GraphRepeatInfo> repeats;
- findRepeats(h, 2, &repeats);
-
- vector<GraphRepeatInfo>::const_iterator it;
- for (it = repeats.begin(); it != repeats.end(); ++it) {
- DEBUG_PRINTF("checking.. %zu verts\n", it->vertices.size());
- if (find(it->vertices.begin(), it->vertices.end(), key)
- != it->vertices.end()) {
- break;
- }
- }
- if (it == repeats.end()) {
- DEBUG_PRINTF("no repeat found\n");
- return false;
- }
-
- GraphRepeatInfo ri = *it;
-
- set<NFAVertex> exits_and_repeat_verts;
- for (auto repeat_v : ri.vertices) {
+ if (!h[w].char_reach.all()) {
+ continue;
+ }
+
+ if (!is_special(w, h)) {
+ key = w;
+ break;
+ }
+ }
+
+ if (!key) {
+ return false;
+ }
+
+ vector<GraphRepeatInfo> repeats;
+ findRepeats(h, 2, &repeats);
+
+ vector<GraphRepeatInfo>::const_iterator it;
+ for (it = repeats.begin(); it != repeats.end(); ++it) {
+ DEBUG_PRINTF("checking.. %zu verts\n", it->vertices.size());
+ if (find(it->vertices.begin(), it->vertices.end(), key)
+ != it->vertices.end()) {
+ break;
+ }
+ }
+ if (it == repeats.end()) {
+ DEBUG_PRINTF("no repeat found\n");
+ return false;
+ }
+
+ GraphRepeatInfo ri = *it;
+
+ set<NFAVertex> exits_and_repeat_verts;
+ for (auto repeat_v : ri.vertices) {
DEBUG_PRINTF("repeat vertex %zu\n", h[repeat_v].index);
- succ(h, repeat_v, &exits_and_repeat_verts);
- exits_and_repeat_verts.insert(repeat_v);
- }
-
- DEBUG_PRINTF("repeat {%s,%s}\n", ri.repeatMin.str().c_str(),
- ri.repeatMax.str().c_str());
-
- set<NFAVertex> rep_verts;
- insert(&rep_verts, ri.vertices);
-
- set<NFAVertex> exits;
- exits = exits_and_repeat_verts;
- erase_all(&exits, rep_verts);
-
+ succ(h, repeat_v, &exits_and_repeat_verts);
+ exits_and_repeat_verts.insert(repeat_v);
+ }
+
+ DEBUG_PRINTF("repeat {%s,%s}\n", ri.repeatMin.str().c_str(),
+ ri.repeatMax.str().c_str());
+
+ set<NFAVertex> rep_verts;
+ insert(&rep_verts, ri.vertices);
+
+ set<NFAVertex> exits;
+ exits = exits_and_repeat_verts;
+ erase_all(&exits, rep_verts);
+
auto base_succ = succs<set<NFAVertex>>(base, h);
- base_succ.erase(h.startDs);
-
- if (is_subset_of(base_succ, rep_verts)) {
- /* all good: repeat dominates the rest of the pattern */
- } else if (ri.repeatMin == depth(1)
- && is_subset_of(exits, base_succ)
- && is_subset_of(base_succ, exits_and_repeat_verts)) {
- /* we have a jump edge */
+ base_succ.erase(h.startDs);
+
+ if (is_subset_of(base_succ, rep_verts)) {
+ /* all good: repeat dominates the rest of the pattern */
+ } else if (ri.repeatMin == depth(1)
+ && is_subset_of(exits, base_succ)
+ && is_subset_of(base_succ, exits_and_repeat_verts)) {
+ /* we have a jump edge */
ri.repeatMin = depth(0);
- } else {
- return false;
- }
-
- DEBUG_PRINTF("repeat {%s,%s}\n", ri.repeatMin.str().c_str(),
- ri.repeatMax.str().c_str());
- DEBUG_PRINTF("woot?\n");
-
- shared_ptr<NGHolder> h_new = make_shared<NGHolder>();
+ } else {
+ return false;
+ }
+
+ DEBUG_PRINTF("repeat {%s,%s}\n", ri.repeatMin.str().c_str(),
+ ri.repeatMax.str().c_str());
+ DEBUG_PRINTF("woot?\n");
+
+ shared_ptr<NGHolder> h_new = make_shared<NGHolder>();
unordered_map<NFAVertex, NFAVertex> rhs_map;
- vector<NFAVertex> exits_vec;
- insert(&exits_vec, exits_vec.end(), exits);
- splitRHS(h, exits_vec, h_new.get(), &rhs_map);
- h_new->kind = NFA_PREFIX;
-
- if (num_vertices(*h_new) <= N_SPECIALS) {
- DEBUG_PRINTF("not a hybrid??\n");
- /* TODO: pick up these cases, unify code */
- return false;
- }
-
- for (auto w : adjacent_vertices_range(h_new->start, *h_new)) {
- if (w != h_new->startDs) {
- add_edge(h_new->startDs, w, *h_new);
- }
- }
- clear_out_edges(h_new->start, *h_new);
- add_edge(h_new->start, h_new->startDs, *h_new);
-
- depth width = findMinWidth(*h_new);
- if (width != findMaxWidth(*h_new)) {
- return false;
- }
-
- if (g[v].left.dfa) {
- /* we were unable to implement initial graph as an nfa;
- * we need to to check if we still need a dfa and, if so, rebuild. */
- if (!isImplementableNFA(*h_new, nullptr, cc)) {
- return false; /* TODO: handle rebuilding dfa */
- }
- }
-
- if (anchored) {
- if (ri.repeatMax.is_infinite()) {
- return false; /* TODO */
- }
-
- if (source(e_old, g) == ar) {
+ vector<NFAVertex> exits_vec;
+ insert(&exits_vec, exits_vec.end(), exits);
+ splitRHS(h, exits_vec, h_new.get(), &rhs_map);
+ h_new->kind = NFA_PREFIX;
+
+ if (num_vertices(*h_new) <= N_SPECIALS) {
+ DEBUG_PRINTF("not a hybrid??\n");
+ /* TODO: pick up these cases, unify code */
+ return false;
+ }
+
+ for (auto w : adjacent_vertices_range(h_new->start, *h_new)) {
+ if (w != h_new->startDs) {
+ add_edge(h_new->startDs, w, *h_new);
+ }
+ }
+ clear_out_edges(h_new->start, *h_new);
+ add_edge(h_new->start, h_new->startDs, *h_new);
+
+ depth width = findMinWidth(*h_new);
+ if (width != findMaxWidth(*h_new)) {
+ return false;
+ }
+
+ if (g[v].left.dfa) {
+ /* we were unable to implement initial graph as an nfa;
+ * we need to to check if we still need a dfa and, if so, rebuild. */
+ if (!isImplementableNFA(*h_new, nullptr, cc)) {
+ return false; /* TODO: handle rebuilding dfa */
+ }
+ }
+
+ if (anchored) {
+ if (ri.repeatMax.is_infinite()) {
+ return false; /* TODO */
+ }
+
+ if (source(e_old, g) == ar) {
setEdgeBounds(g, e_old, ri.repeatMin + width, ri.repeatMax + width);
- } else {
+ } else {
RoseEdge e_new = add_edge(ar, v, g);
setEdgeBounds(g, e_new, ri.repeatMin + width, ri.repeatMax + width);
- to_delete->push_back(e_old);
- }
-
- } else {
- assert(g[e_old].minBound <= ri.repeatMin + width);
+ to_delete->push_back(e_old);
+ }
+
+ } else {
+ assert(g[e_old].minBound <= ri.repeatMin + width);
setEdgeBounds(g, e_old, ri.repeatMin + width, ROSE_BOUND_INF);
- }
-
- g[v].left.dfa.reset();
- g[v].left.graph = h_new;
-
- return true;
-}
-
-/* turns simple prefixes like /^.{30,} into bounds on the root roles */
-void convertPrefixToBounds(RoseBuildImpl &tbi) {
- RoseGraph &g = tbi.g;
-
- vector<RoseEdge> to_delete;
- RoseVertex ar = tbi.anchored_root;
-
- /* graphs with prefixes produced by rose are wired to tbi.root */
-
- for (const auto &e : out_edges_range(tbi.root, g)) {
- RoseVertex v = target(e, g);
-
- if (in_degree(v, g) != 1) {
- continue;
- }
-
- if (!g[v].left.graph) {
- continue;
- }
-
- if (g[v].left.tracksSom()) {
- continue;
- }
-
- const NGHolder &h = *g[v].left.graph;
-
- if (g[v].left.lag != tbi.minLiteralLen(v)
- || g[v].left.lag != tbi.maxLiteralLen(v)) {
- continue;
- }
-
- if (all_reports(h).size() != 1) {
- assert(0);
- continue;
- }
-
+ }
+
+ g[v].left.dfa.reset();
+ g[v].left.graph = h_new;
+
+ return true;
+}
+
+/* turns simple prefixes like /^.{30,} into bounds on the root roles */
+void convertPrefixToBounds(RoseBuildImpl &tbi) {
+ RoseGraph &g = tbi.g;
+
+ vector<RoseEdge> to_delete;
+ RoseVertex ar = tbi.anchored_root;
+
+ /* graphs with prefixes produced by rose are wired to tbi.root */
+
+ for (const auto &e : out_edges_range(tbi.root, g)) {
+ RoseVertex v = target(e, g);
+
+ if (in_degree(v, g) != 1) {
+ continue;
+ }
+
+ if (!g[v].left.graph) {
+ continue;
+ }
+
+ if (g[v].left.tracksSom()) {
+ continue;
+ }
+
+ const NGHolder &h = *g[v].left.graph;
+
+ if (g[v].left.lag != tbi.minLiteralLen(v)
+ || g[v].left.lag != tbi.maxLiteralLen(v)) {
+ continue;
+ }
+
+ if (all_reports(h).size() != 1) {
+ assert(0);
+ continue;
+ }
+
DEBUG_PRINTF("inspecting prefix of %zu\n", g[v].index);
-
- if (!proper_out_degree(h.startDs, h)) {
- if (handleStartPrefixCliche(h, g, v, e, ar, &to_delete)) {
- continue;
- }
- } else {
- if (handleStartDsPrefixCliche(h, g, v, e)) {
- continue;
- }
- }
-
- /* prefix is not just a simple dot repeat. However, it is still
- * possible that it consists of dot repeat and fixed width mask that we
- * can handle. */
- handleMixedPrefixCliche(h, g, v, e, ar, &to_delete, tbi.cc);
- }
-
- for (const auto &e : out_edges_range(ar, g)) {
- RoseVertex v = target(e, g);
-
- /* note: vertices that we have rehomed will currently have an in-degree
- * of 2 */
- if (in_degree(v, g) != 1) {
- continue;
- }
-
- if (!g[v].left.graph) {
- continue;
- }
-
- if (g[v].left.tracksSom()) {
- continue;
- }
-
- if (g[v].left.lag != tbi.minLiteralLen(v)
- || g[v].left.lag != tbi.maxLiteralLen(v)) {
- continue;
- }
-
- const NGHolder &h = *g[v].left.graph;
- if (all_reports(h).size() != 1) {
- assert(0);
- continue;
- }
-
+
+ if (!proper_out_degree(h.startDs, h)) {
+ if (handleStartPrefixCliche(h, g, v, e, ar, &to_delete)) {
+ continue;
+ }
+ } else {
+ if (handleStartDsPrefixCliche(h, g, v, e)) {
+ continue;
+ }
+ }
+
+ /* prefix is not just a simple dot repeat. However, it is still
+ * possible that it consists of dot repeat and fixed width mask that we
+ * can handle. */
+ handleMixedPrefixCliche(h, g, v, e, ar, &to_delete, tbi.cc);
+ }
+
+ for (const auto &e : out_edges_range(ar, g)) {
+ RoseVertex v = target(e, g);
+
+ /* note: vertices that we have rehomed will currently have an in-degree
+ * of 2 */
+ if (in_degree(v, g) != 1) {
+ continue;
+ }
+
+ if (!g[v].left.graph) {
+ continue;
+ }
+
+ if (g[v].left.tracksSom()) {
+ continue;
+ }
+
+ if (g[v].left.lag != tbi.minLiteralLen(v)
+ || g[v].left.lag != tbi.maxLiteralLen(v)) {
+ continue;
+ }
+
+ const NGHolder &h = *g[v].left.graph;
+ if (all_reports(h).size() != 1) {
+ assert(0);
+ continue;
+ }
+
DEBUG_PRINTF("inspecting prefix of %zu\n", g[v].index);
-
- if (!proper_out_degree(h.startDs, h)) {
- if (handleStartPrefixCliche(h, g, v, e, ar, &to_delete)) {
- continue;
- }
- } else {
- if (handleStartDsPrefixCliche(h, g, v, e)) {
- continue;
- }
- }
-
- /* prefix is not just a simple dot repeat. However, it is still
- * possible that it consists of dot repeat and fixed width mask that we
- * can handle. */
- handleMixedPrefixCliche(h, g, v, e, ar, &to_delete, tbi.cc);
- }
-
- for (const auto &e : to_delete) {
- remove_edge(e, g);
- }
-}
-
-/**
- * Identify dot-repeat infixes after fixed-depth literals and convert them to
- * edges with ROSE_ROLE_HISTORY_ANCH history and equivalent bounds.
- */
-void convertAnchPrefixToBounds(RoseBuildImpl &tbi) {
- RoseGraph &g = tbi.g;
-
- for (const auto v : vertices_range(g)) {
- if (!g[v].left) {
- continue;
- }
-
+
+ if (!proper_out_degree(h.startDs, h)) {
+ if (handleStartPrefixCliche(h, g, v, e, ar, &to_delete)) {
+ continue;
+ }
+ } else {
+ if (handleStartDsPrefixCliche(h, g, v, e)) {
+ continue;
+ }
+ }
+
+ /* prefix is not just a simple dot repeat. However, it is still
+ * possible that it consists of dot repeat and fixed width mask that we
+ * can handle. */
+ handleMixedPrefixCliche(h, g, v, e, ar, &to_delete, tbi.cc);
+ }
+
+ for (const auto &e : to_delete) {
+ remove_edge(e, g);
+ }
+}
+
+/**
+ * Identify dot-repeat infixes after fixed-depth literals and convert them to
+ * edges with ROSE_ROLE_HISTORY_ANCH history and equivalent bounds.
+ */
+void convertAnchPrefixToBounds(RoseBuildImpl &tbi) {
+ RoseGraph &g = tbi.g;
+
+ for (const auto v : vertices_range(g)) {
+ if (!g[v].left) {
+ continue;
+ }
+
DEBUG_PRINTF("vertex %zu\n", g[v].index);
-
- // This pass runs after makeCastles, so we use the fact that bounded
- // repeat detection has already been done for us.
-
- if (!g[v].left.castle) {
- DEBUG_PRINTF("not a castle\n");
- continue;
- }
-
- const CastleProto &castle = *g[v].left.castle;
-
- if (castle.repeats.size() != 1) {
- DEBUG_PRINTF("too many repeats\n");
- assert(0); // Castles should not have been merged yet.
- continue;
- }
-
- if (!castle.reach().all()) {
- DEBUG_PRINTF("not dot\n");
- continue;
- }
-
- if (in_degree(v, g) != 1) {
- DEBUG_PRINTF("too many in-edges\n");
- continue;
- }
-
- RoseEdge e = *in_edges(v, g).first;
- RoseVertex u = source(e, g);
-
- if (g[e].history != ROSE_ROLE_HISTORY_NONE) {
- DEBUG_PRINTF("history already set to something other than NONE?\n");
- assert(0);
- continue;
- }
-
- if (g[u].min_offset != g[u].max_offset) {
- DEBUG_PRINTF("pred not fixed offset\n");
- continue;
- }
- DEBUG_PRINTF("pred is fixed offset, at %u\n", g[u].min_offset);
- assert(g[u].min_offset < ROSE_BOUND_INF);
-
- size_t lit_length = tbi.minLiteralLen(v);
- if (lit_length != tbi.maxLiteralLen(v)) {
- assert(0);
- DEBUG_PRINTF("variable literal lengths\n");
- continue;
- }
-
- u32 lag = g[v].left.lag;
- DEBUG_PRINTF("lit_length=%zu, lag=%u\n", lit_length, lag);
- assert(lag <= lit_length);
- depth delay_adj(lit_length - lag);
-
- const PureRepeat &pr = castle.repeats.begin()->second;
- DEBUG_PRINTF("castle has repeat %s\n", pr.bounds.str().c_str());
+
+ // This pass runs after makeCastles, so we use the fact that bounded
+ // repeat detection has already been done for us.
+
+ if (!g[v].left.castle) {
+ DEBUG_PRINTF("not a castle\n");
+ continue;
+ }
+
+ const CastleProto &castle = *g[v].left.castle;
+
+ if (castle.repeats.size() != 1) {
+ DEBUG_PRINTF("too many repeats\n");
+ assert(0); // Castles should not have been merged yet.
+ continue;
+ }
+
+ if (!castle.reach().all()) {
+ DEBUG_PRINTF("not dot\n");
+ continue;
+ }
+
+ if (in_degree(v, g) != 1) {
+ DEBUG_PRINTF("too many in-edges\n");
+ continue;
+ }
+
+ RoseEdge e = *in_edges(v, g).first;
+ RoseVertex u = source(e, g);
+
+ if (g[e].history != ROSE_ROLE_HISTORY_NONE) {
+ DEBUG_PRINTF("history already set to something other than NONE?\n");
+ assert(0);
+ continue;
+ }
+
+ if (g[u].min_offset != g[u].max_offset) {
+ DEBUG_PRINTF("pred not fixed offset\n");
+ continue;
+ }
+ DEBUG_PRINTF("pred is fixed offset, at %u\n", g[u].min_offset);
+ assert(g[u].min_offset < ROSE_BOUND_INF);
+
+ size_t lit_length = tbi.minLiteralLen(v);
+ if (lit_length != tbi.maxLiteralLen(v)) {
+ assert(0);
+ DEBUG_PRINTF("variable literal lengths\n");
+ continue;
+ }
+
+ u32 lag = g[v].left.lag;
+ DEBUG_PRINTF("lit_length=%zu, lag=%u\n", lit_length, lag);
+ assert(lag <= lit_length);
+ depth delay_adj(lit_length - lag);
+
+ const PureRepeat &pr = castle.repeats.begin()->second;
+ DEBUG_PRINTF("castle has repeat %s\n", pr.bounds.str().c_str());
DEBUG_PRINTF("delay adj %u\n", (u32)delay_adj);
-
+
if (delay_adj >= pr.bounds.max) {
DEBUG_PRINTF("delay adj too large\n");
continue;
}
- DepthMinMax bounds(pr.bounds); // copy
- if (delay_adj > bounds.min) {
+ DepthMinMax bounds(pr.bounds); // copy
+ if (delay_adj > bounds.min) {
bounds.min = depth(0);
} else {
bounds.min -= delay_adj;
- }
- bounds.max -= delay_adj;
+ }
+ bounds.max -= delay_adj;
setEdgeBounds(g, e, bounds.min, bounds.max.is_finite()
? (u32)bounds.max
: ROSE_BOUND_INF);
- g[v].left.reset();
- }
-}
-
-} // namespace ue2
+ g[v].left.reset();
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_convert.h b/contrib/libs/hyperscan/src/rose/rose_build_convert.h
index 35dbad2a51..7307c213ca 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_convert.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_convert.h
@@ -1,42 +1,42 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_BUILD_CONVERT_H
-#define ROSE_BUILD_CONVERT_H
-
-namespace ue2 {
-
-class RoseBuildImpl;
-
-void convertFloodProneSuffixes(RoseBuildImpl &tbi);
-void convertPrefixToBounds(RoseBuildImpl &tbi);
-void convertAnchPrefixToBounds(RoseBuildImpl &tbi);
-
-} // namespace ue2
-
-#endif
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_CONVERT_H
+#define ROSE_BUILD_CONVERT_H
+
+namespace ue2 {
+
+class RoseBuildImpl;
+
+void convertFloodProneSuffixes(RoseBuildImpl &tbi);
+void convertPrefixToBounds(RoseBuildImpl &tbi);
+void convertAnchPrefixToBounds(RoseBuildImpl &tbi);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_dump.h b/contrib/libs/hyperscan/src/rose/rose_build_dump.h
index 31daec5574..d4c620a3e6 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_dump.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_dump.h
@@ -1,55 +1,55 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_BUILD_DUMP_H
-#define ROSE_BUILD_DUMP_H
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_DUMP_H
+#define ROSE_BUILD_DUMP_H
+
#include "ue2common.h"
#include <map>
#include <string>
#include <vector>
-struct RoseEngine;
-
-namespace ue2 {
-
+struct RoseEngine;
+
+namespace ue2 {
+
class RoseBuildImpl;
-struct Grey;
+struct Grey;
struct hwlmLiteral;
struct LitFragment;
struct left_id;
struct suffix_id;
-
-#ifdef DUMP_SUPPORT
-// Dump the Rose graph in graphviz representation.
+
+#ifdef DUMP_SUPPORT
+// Dump the Rose graph in graphviz representation.
void dumpRoseGraph(const RoseBuildImpl &build, const char *filename);
-
+
void dumpRose(const RoseBuildImpl &build,
const std::vector<LitFragment> &fragments,
const std::map<left_id, u32> &leftfix_queue_map,
@@ -59,25 +59,25 @@ void dumpRose(const RoseBuildImpl &build,
void dumpMatcherLiterals(const std::vector<hwlmLiteral> &lits,
const std::string &name, const Grey &grey);
-#else
-
-static UNUSED
+#else
+
+static UNUSED
void dumpRoseGraph(const RoseBuildImpl &, const char *) {
-}
-
-static UNUSED
+}
+
+static UNUSED
void dumpRose(const RoseBuildImpl &, const std::vector<LitFragment> &,
const std::map<left_id, u32> &, const std::map<suffix_id, u32> &,
const RoseEngine *) {
-}
-
+}
+
static UNUSED
void dumpMatcherLiterals(const std::vector<hwlmLiteral> &, const std::string &,
const Grey &) {
}
-#endif
-
-} // namespace ue2
-
-#endif
+#endif
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_impl.h b/contrib/libs/hyperscan/src/rose/rose_build_impl.h
index 9c601f1e5f..7780848b1b 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_impl.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_impl.h
@@ -1,64 +1,64 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
#ifndef ROSE_BUILD_IMPL_H
#define ROSE_BUILD_IMPL_H
-
-#include "rose_build.h"
-#include "rose_build_util.h"
+
+#include "rose_build.h"
+#include "rose_build_util.h"
#include "rose_common.h"
-#include "rose_graph.h"
-#include "nfa/mpvcompile.h"
-#include "nfa/goughcompile.h"
-#include "nfa/nfa_internal.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_revacc.h"
+#include "rose_graph.h"
+#include "nfa/mpvcompile.h"
+#include "nfa/goughcompile.h"
+#include "nfa/nfa_internal.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_revacc.h"
#include "util/bytecode_ptr.h"
#include "util/flat_containers.h"
#include "util/hash.h"
-#include "util/order_check.h"
-#include "util/queue_index_factory.h"
+#include "util/order_check.h"
+#include "util/queue_index_factory.h"
#include "util/ue2string.h"
#include "util/unordered.h"
#include "util/verify_types.h"
-
-#include <deque>
-#include <map>
-#include <string>
-#include <vector>
+
+#include <deque>
+#include <map>
+#include <string>
+#include <vector>
#include <boost/variant.hpp>
-
-struct RoseEngine;
-
-namespace ue2 {
-
-#define ROSE_GROUPS_MAX 64
-
+
+struct RoseEngine;
+
+namespace ue2 {
+
+#define ROSE_GROUPS_MAX 64
+
#define ROSE_LONG_LITERAL_THRESHOLD_MIN 33
/**
@@ -72,66 +72,66 @@ namespace ue2 {
*/
#define ROSE_SHORT_LITERAL_LEN_MAX 8
-struct BoundaryReports;
-struct CastleProto;
-struct CompileContext;
-class ReportManager;
+struct BoundaryReports;
+struct CastleProto;
+struct CompileContext;
+class ReportManager;
class SmallWriteBuild;
-class SomSlotManager;
-
-struct suffix_id {
- suffix_id(const RoseSuffixInfo &in)
- : g(in.graph.get()), c(in.castle.get()), d(in.rdfa.get()),
+class SomSlotManager;
+
+struct suffix_id {
+ suffix_id(const RoseSuffixInfo &in)
+ : g(in.graph.get()), c(in.castle.get()), d(in.rdfa.get()),
h(in.haig.get()), t(in.tamarama.get()),
dfa_min_width(in.dfa_min_width),
- dfa_max_width(in.dfa_max_width) {
- assert(!g || g->kind == NFA_SUFFIX);
- }
- bool operator==(const suffix_id &b) const {
+ dfa_max_width(in.dfa_max_width) {
+ assert(!g || g->kind == NFA_SUFFIX);
+ }
+ bool operator==(const suffix_id &b) const {
bool rv = g == b.g && c == b.c && h == b.h && d == b.d && t == b.t;
- assert(!rv || dfa_min_width == b.dfa_min_width);
- assert(!rv || dfa_max_width == b.dfa_max_width);
- return rv;
- }
- bool operator!=(const suffix_id &b) const { return !(*this == b); }
- bool operator<(const suffix_id &b) const {
- const suffix_id &a = *this;
- ORDER_CHECK(g);
- ORDER_CHECK(c);
- ORDER_CHECK(d);
- ORDER_CHECK(h);
+ assert(!rv || dfa_min_width == b.dfa_min_width);
+ assert(!rv || dfa_max_width == b.dfa_max_width);
+ return rv;
+ }
+ bool operator!=(const suffix_id &b) const { return !(*this == b); }
+ bool operator<(const suffix_id &b) const {
+ const suffix_id &a = *this;
+ ORDER_CHECK(g);
+ ORDER_CHECK(c);
+ ORDER_CHECK(d);
+ ORDER_CHECK(h);
ORDER_CHECK(t);
- return false;
- }
-
- NGHolder *graph() {
- if (!d && !h) {
- assert(dfa_min_width == depth(0));
- assert(dfa_max_width == depth::infinity());
- }
- return g;
- }
- const NGHolder *graph() const {
- if (!d && !h) {
- assert(dfa_min_width == depth(0));
- assert(dfa_max_width == depth::infinity());
- }
- return g;
- }
- CastleProto *castle() {
- if (!d && !h) {
- assert(dfa_min_width == depth(0));
- assert(dfa_max_width == depth::infinity());
- }
- return c;
- }
- const CastleProto *castle() const {
- if (!d && !h) {
- assert(dfa_min_width == depth(0));
- assert(dfa_max_width == depth::infinity());
- }
- return c;
- }
+ return false;
+ }
+
+ NGHolder *graph() {
+ if (!d && !h) {
+ assert(dfa_min_width == depth(0));
+ assert(dfa_max_width == depth::infinity());
+ }
+ return g;
+ }
+ const NGHolder *graph() const {
+ if (!d && !h) {
+ assert(dfa_min_width == depth(0));
+ assert(dfa_max_width == depth::infinity());
+ }
+ return g;
+ }
+ CastleProto *castle() {
+ if (!d && !h) {
+ assert(dfa_min_width == depth(0));
+ assert(dfa_max_width == depth::infinity());
+ }
+ return c;
+ }
+ const CastleProto *castle() const {
+ if (!d && !h) {
+ assert(dfa_min_width == depth(0));
+ assert(dfa_max_width == depth::infinity());
+ }
+ return c;
+ }
TamaProto *tamarama() {
if (!d && !h) {
assert(dfa_min_width == depth(0));
@@ -148,148 +148,148 @@ struct suffix_id {
}
- raw_som_dfa *haig() { return h; }
- const raw_som_dfa *haig() const { return h; }
- raw_dfa *dfa() { return d; }
- const raw_dfa *dfa() const { return d; }
-
- size_t hash() const;
-
-private:
- NGHolder *g;
- CastleProto *c;
- raw_dfa *d;
- raw_som_dfa *h;
+ raw_som_dfa *haig() { return h; }
+ const raw_som_dfa *haig() const { return h; }
+ raw_dfa *dfa() { return d; }
+ const raw_dfa *dfa() const { return d; }
+
+ size_t hash() const;
+
+private:
+ NGHolder *g;
+ CastleProto *c;
+ raw_dfa *d;
+ raw_som_dfa *h;
TamaProto *t;
- depth dfa_min_width;
- depth dfa_max_width;
-
- friend depth findMinWidth(const suffix_id &s);
- friend depth findMaxWidth(const suffix_id &s);
- friend depth findMinWidth(const suffix_id &s, u32 top);
- friend depth findMaxWidth(const suffix_id &s, u32 top);
-};
-
-std::set<ReportID> all_reports(const suffix_id &s);
-std::set<u32> all_tops(const suffix_id &s);
-bool has_eod_accepts(const suffix_id &s);
-bool has_non_eod_accepts(const suffix_id &s);
-depth findMinWidth(const suffix_id &s);
-depth findMaxWidth(const suffix_id &s);
-depth findMinWidth(const suffix_id &s, u32 top);
-depth findMaxWidth(const suffix_id &s, u32 top);
-
-/** \brief represents an engine to the left of a rose role */
-struct left_id {
- left_id(const LeftEngInfo &in)
- : g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()),
- h(in.haig.get()), dfa_min_width(in.dfa_min_width),
- dfa_max_width(in.dfa_max_width) {
+ depth dfa_min_width;
+ depth dfa_max_width;
+
+ friend depth findMinWidth(const suffix_id &s);
+ friend depth findMaxWidth(const suffix_id &s);
+ friend depth findMinWidth(const suffix_id &s, u32 top);
+ friend depth findMaxWidth(const suffix_id &s, u32 top);
+};
+
+std::set<ReportID> all_reports(const suffix_id &s);
+std::set<u32> all_tops(const suffix_id &s);
+bool has_eod_accepts(const suffix_id &s);
+bool has_non_eod_accepts(const suffix_id &s);
+depth findMinWidth(const suffix_id &s);
+depth findMaxWidth(const suffix_id &s);
+depth findMinWidth(const suffix_id &s, u32 top);
+depth findMaxWidth(const suffix_id &s, u32 top);
+
+/** \brief represents an engine to the left of a rose role */
+struct left_id {
+ left_id(const LeftEngInfo &in)
+ : g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()),
+ h(in.haig.get()), dfa_min_width(in.dfa_min_width),
+ dfa_max_width(in.dfa_max_width) {
assert(!g || !has_managed_reports(*g));
- }
- bool operator==(const left_id &b) const {
- bool rv = g == b.g && c == b.c && h == b.h && d == b.d;
- assert(!rv || dfa_min_width == b.dfa_min_width);
- assert(!rv || dfa_max_width == b.dfa_max_width);
- return rv;
- }
- bool operator!=(const left_id &b) const { return !(*this == b); }
- bool operator<(const left_id &b) const {
- const left_id &a = *this;
- ORDER_CHECK(g);
- ORDER_CHECK(c);
- ORDER_CHECK(d);
- ORDER_CHECK(h);
- return false;
- }
-
- NGHolder *graph() {
- if (!d && !h) {
- assert(dfa_min_width == depth(0));
- assert(dfa_max_width == depth::infinity());
- }
- return g;
- }
- const NGHolder *graph() const {
- if (!d && !h) {
- assert(dfa_min_width == depth(0));
- assert(dfa_max_width == depth::infinity());
- }
- return g;
- }
- CastleProto *castle() {
- if (!d && !h) {
- assert(dfa_min_width == depth(0));
- assert(dfa_max_width == depth::infinity());
- }
-
- return c;
- }
- const CastleProto *castle() const {
- if (!d && !h) {
- assert(dfa_min_width == depth(0));
- assert(dfa_max_width == depth::infinity());
- }
-
- return c;
- }
- raw_som_dfa *haig() { return h; }
- const raw_som_dfa *haig() const { return h; }
- raw_dfa *dfa() { return d; }
- const raw_dfa *dfa() const { return d; }
-
- size_t hash() const;
-
-private:
- NGHolder *g;
- CastleProto *c;
- raw_dfa *d;
- raw_som_dfa *h;
- depth dfa_min_width;
- depth dfa_max_width;
-
- friend bool isAnchored(const left_id &r);
- friend depth findMinWidth(const left_id &r);
- friend depth findMaxWidth(const left_id &r);
-};
-
-std::set<u32> all_tops(const left_id &r);
+ }
+ bool operator==(const left_id &b) const {
+ bool rv = g == b.g && c == b.c && h == b.h && d == b.d;
+ assert(!rv || dfa_min_width == b.dfa_min_width);
+ assert(!rv || dfa_max_width == b.dfa_max_width);
+ return rv;
+ }
+ bool operator!=(const left_id &b) const { return !(*this == b); }
+ bool operator<(const left_id &b) const {
+ const left_id &a = *this;
+ ORDER_CHECK(g);
+ ORDER_CHECK(c);
+ ORDER_CHECK(d);
+ ORDER_CHECK(h);
+ return false;
+ }
+
+ NGHolder *graph() {
+ if (!d && !h) {
+ assert(dfa_min_width == depth(0));
+ assert(dfa_max_width == depth::infinity());
+ }
+ return g;
+ }
+ const NGHolder *graph() const {
+ if (!d && !h) {
+ assert(dfa_min_width == depth(0));
+ assert(dfa_max_width == depth::infinity());
+ }
+ return g;
+ }
+ CastleProto *castle() {
+ if (!d && !h) {
+ assert(dfa_min_width == depth(0));
+ assert(dfa_max_width == depth::infinity());
+ }
+
+ return c;
+ }
+ const CastleProto *castle() const {
+ if (!d && !h) {
+ assert(dfa_min_width == depth(0));
+ assert(dfa_max_width == depth::infinity());
+ }
+
+ return c;
+ }
+ raw_som_dfa *haig() { return h; }
+ const raw_som_dfa *haig() const { return h; }
+ raw_dfa *dfa() { return d; }
+ const raw_dfa *dfa() const { return d; }
+
+ size_t hash() const;
+
+private:
+ NGHolder *g;
+ CastleProto *c;
+ raw_dfa *d;
+ raw_som_dfa *h;
+ depth dfa_min_width;
+ depth dfa_max_width;
+
+ friend bool isAnchored(const left_id &r);
+ friend depth findMinWidth(const left_id &r);
+ friend depth findMaxWidth(const left_id &r);
+};
+
+std::set<u32> all_tops(const left_id &r);
std::set<ReportID> all_reports(const left_id &left);
-bool isAnchored(const left_id &r);
-depth findMinWidth(const left_id &r);
-depth findMaxWidth(const left_id &r);
-u32 num_tops(const left_id &r);
-
-struct rose_literal_info {
+bool isAnchored(const left_id &r);
+depth findMinWidth(const left_id &r);
+depth findMaxWidth(const left_id &r);
+u32 num_tops(const left_id &r);
+
+struct rose_literal_info {
flat_set<u32> delayed_ids;
flat_set<RoseVertex> vertices;
- rose_group group_mask = 0;
- u32 undelayed_id = MO_INVALID_IDX;
- bool squash_group = false;
- bool requires_benefits = false;
-};
-
-/**
- * \brief Main literal struct used at Rose build time. Numeric literal IDs
- * used at build time point at these (via the RoseBuildImpl::literals map).
- */
-struct rose_literal_id {
- rose_literal_id(const ue2_literal &s_in, rose_literal_table table_in,
- u32 delay_in)
- : s(s_in), table(table_in), delay(delay_in), distinctiveness(0) {}
-
- rose_literal_id(const ue2_literal &s_in, const std::vector<u8> &msk_in,
- const std::vector<u8> &cmp_in, rose_literal_table table_in,
- u32 delay_in);
-
- ue2_literal s;
- std::vector<u8> msk;
- std::vector<u8> cmp;
- rose_literal_table table;
- u32 delay;
- u32 distinctiveness;
-
- size_t elength(void) const { return s.length() + delay; }
+ rose_group group_mask = 0;
+ u32 undelayed_id = MO_INVALID_IDX;
+ bool squash_group = false;
+ bool requires_benefits = false;
+};
+
+/**
+ * \brief Main literal struct used at Rose build time. Numeric literal IDs
+ * used at build time point at these (via the RoseBuildImpl::literals map).
+ */
+struct rose_literal_id {
+ rose_literal_id(const ue2_literal &s_in, rose_literal_table table_in,
+ u32 delay_in)
+ : s(s_in), table(table_in), delay(delay_in), distinctiveness(0) {}
+
+ rose_literal_id(const ue2_literal &s_in, const std::vector<u8> &msk_in,
+ const std::vector<u8> &cmp_in, rose_literal_table table_in,
+ u32 delay_in);
+
+ ue2_literal s;
+ std::vector<u8> msk;
+ std::vector<u8> cmp;
+ rose_literal_table table;
+ u32 delay;
+ u32 distinctiveness;
+
+ size_t elength(void) const { return s.length() + delay; }
size_t elength_including_mask(void) const {
size_t mask_len = msk.size();
for (u8 c : msk) {
@@ -310,19 +310,19 @@ struct rose_literal_id {
size_t hash() const {
return hash_all(s, msk, cmp, table, delay, distinctiveness);
}
-};
-
-static inline
-bool operator<(const rose_literal_id &a, const rose_literal_id &b) {
- ORDER_CHECK(distinctiveness);
- ORDER_CHECK(table);
- ORDER_CHECK(s);
- ORDER_CHECK(delay);
- ORDER_CHECK(msk);
- ORDER_CHECK(cmp);
- return 0;
-}
-
+};
+
+static inline
+bool operator<(const rose_literal_id &a, const rose_literal_id &b) {
+ ORDER_CHECK(distinctiveness);
+ ORDER_CHECK(table);
+ ORDER_CHECK(s);
+ ORDER_CHECK(delay);
+ ORDER_CHECK(msk);
+ ORDER_CHECK(cmp);
+ return 0;
+}
+
class RoseLiteralMap {
/**
* \brief Main storage for literals.
@@ -332,7 +332,7 @@ class RoseLiteralMap {
* the loop.
*/
std::deque<rose_literal_id> lits;
-
+
/** \brief Quick-lookup index from literal -> index in lits. */
ue2_unordered_map<rose_literal_id, u32> lits_index;
@@ -372,68 +372,68 @@ public:
}
};
-struct simple_anchored_info {
- simple_anchored_info(u32 min_b, u32 max_b, const ue2_literal &lit)
- : min_bound(min_b), max_bound(max_b), literal(lit) {}
- u32 min_bound; /**< min number of characters required before literal can
- * start matching */
- u32 max_bound; /**< max number of characters allowed before literal can
- * start matching */
- ue2_literal literal;
-};
-
-static really_inline
-bool operator<(const simple_anchored_info &a, const simple_anchored_info &b) {
- ORDER_CHECK(min_bound);
- ORDER_CHECK(max_bound);
- ORDER_CHECK(literal);
- return 0;
-}
-
+struct simple_anchored_info {
+ simple_anchored_info(u32 min_b, u32 max_b, const ue2_literal &lit)
+ : min_bound(min_b), max_bound(max_b), literal(lit) {}
+ u32 min_bound; /**< min number of characters required before literal can
+ * start matching */
+ u32 max_bound; /**< max number of characters allowed before literal can
+ * start matching */
+ ue2_literal literal;
+};
+
+static really_inline
+bool operator<(const simple_anchored_info &a, const simple_anchored_info &b) {
+ ORDER_CHECK(min_bound);
+ ORDER_CHECK(max_bound);
+ ORDER_CHECK(literal);
+ return 0;
+}
+
struct MpvProto {
bool empty() const {
return puffettes.empty() && triggered_puffettes.empty();
- }
+ }
void reset() {
puffettes.clear();
triggered_puffettes.clear();
- }
+ }
std::vector<raw_puff> puffettes;
std::vector<raw_puff> triggered_puffettes;
};
-
+
struct OutfixInfo {
template<class T>
explicit OutfixInfo(std::unique_ptr<T> x) : proto(std::move(x)) {}
explicit OutfixInfo(MpvProto mpv_in) : proto(std::move(mpv_in)) {}
- u32 get_queue(QueueIndexFactory &qif);
-
+ u32 get_queue(QueueIndexFactory &qif);
+
u32 get_queue() const {
assert(queue != ~0U);
return queue;
}
- bool is_nonempty_mpv() const {
+ bool is_nonempty_mpv() const {
auto *m = boost::get<MpvProto>(&proto);
return m && !m->empty();
- }
-
- bool is_dead() const {
+ }
+
+ bool is_dead() const {
auto *m = boost::get<MpvProto>(&proto);
if (m) {
return m->empty();
}
return boost::get<boost::blank>(&proto) != nullptr;
- }
-
- void clear() {
+ }
+
+ void clear() {
proto = boost::blank();
- }
-
+ }
+
// Convenience accessor functions.
-
+
NGHolder *holder() {
auto *up = boost::get<std::unique_ptr<NGHolder>>(&proto);
return up ? up->get() : nullptr;
@@ -449,7 +449,7 @@ struct OutfixInfo {
MpvProto *mpv() {
return boost::get<MpvProto>(&proto);
}
-
+
// Convenience const accessor functions.
const NGHolder *holder() const {
@@ -479,214 +479,214 @@ struct OutfixInfo {
std::unique_ptr<raw_som_dfa>,
MpvProto> proto = boost::blank();
- RevAccInfo rev_info;
- u32 maxBAWidth = 0; //!< max bi-anchored width
+ RevAccInfo rev_info;
+ u32 maxBAWidth = 0; //!< max bi-anchored width
depth minWidth{depth::infinity()};
depth maxWidth{0};
- u64a maxOffset = 0;
- bool in_sbmatcher = false; //!< handled by small-block matcher.
-
-private:
- u32 queue = ~0U;
-};
-
-std::set<ReportID> all_reports(const OutfixInfo &outfix);
-
-// Concrete impl class
-class RoseBuildImpl : public RoseBuild {
-public:
+ u64a maxOffset = 0;
+ bool in_sbmatcher = false; //!< handled by small-block matcher.
+
+private:
+ u32 queue = ~0U;
+};
+
+std::set<ReportID> all_reports(const OutfixInfo &outfix);
+
+// Concrete impl class
+class RoseBuildImpl : public RoseBuild {
+public:
RoseBuildImpl(ReportManager &rm, SomSlotManager &ssm, SmallWriteBuild &smwr,
- const CompileContext &cc, const BoundaryReports &boundary);
-
- ~RoseBuildImpl() override;
-
- // Adds a single literal.
- void add(bool anchored, bool eod, const ue2_literal &lit,
+ const CompileContext &cc, const BoundaryReports &boundary);
+
+ ~RoseBuildImpl() override;
+
+ // Adds a single literal.
+ void add(bool anchored, bool eod, const ue2_literal &lit,
const flat_set<ReportID> &ids) override;
-
+
bool addRose(const RoseInGraph &ig, bool prefilter) override;
- bool addSombeRose(const RoseInGraph &ig) override;
-
- bool addOutfix(const NGHolder &h) override;
- bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) override;
- bool addOutfix(const raw_puff &rp) override;
-
- bool addChainTail(const raw_puff &rp, u32 *queue_out, u32 *event_out) override;
-
- // Returns true if we were able to add it as a mask
- bool add(bool anchored, const std::vector<CharReach> &mask,
+ bool addSombeRose(const RoseInGraph &ig) override;
+
+ bool addOutfix(const NGHolder &h) override;
+ bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) override;
+ bool addOutfix(const raw_puff &rp) override;
+
+ bool addChainTail(const raw_puff &rp, u32 *queue_out, u32 *event_out) override;
+
+ // Returns true if we were able to add it as a mask
+ bool add(bool anchored, const std::vector<CharReach> &mask,
const flat_set<ReportID> &reports) override;
-
- bool addAnchoredAcyclic(const NGHolder &graph) override;
-
- bool validateMask(const std::vector<CharReach> &mask,
+
+ bool addAnchoredAcyclic(const NGHolder &graph) override;
+
+ bool validateMask(const std::vector<CharReach> &mask,
const flat_set<ReportID> &reports, bool anchored,
- bool eod) const override;
- void addMask(const std::vector<CharReach> &mask,
+ bool eod) const override;
+ void addMask(const std::vector<CharReach> &mask,
const flat_set<ReportID> &reports, bool anchored,
- bool eod) override;
-
- // Construct a runtime implementation.
+ bool eod) override;
+
+ // Construct a runtime implementation.
bytecode_ptr<RoseEngine> buildRose(u32 minWidth) override;
bytecode_ptr<RoseEngine> buildFinalEngine(u32 minWidth);
-
- void setSom() override { hasSom = true; }
-
- std::unique_ptr<RoseDedupeAux> generateDedupeAux() const override;
-
- // Find the maximum bound on the edges to this vertex's successors.
- u32 calcSuccMaxBound(RoseVertex u) const;
-
- /* Returns the ID of the given literal in the literal map, adding it if
- * necessary. */
- u32 getLiteralId(const ue2_literal &s, u32 delay, rose_literal_table table);
-
- // Variant with msk/cmp.
- u32 getLiteralId(const ue2_literal &s, const std::vector<u8> &msk,
- const std::vector<u8> &cmp, u32 delay,
- rose_literal_table table);
-
- u32 getNewLiteralId(void);
-
- void removeVertices(const std::vector<RoseVertex> &dead);
-
- // Is the Rose anchored?
- bool hasNoFloatingRoots() const;
-
- u32 calcHistoryRequired() const;
-
- rose_group getInitialGroups() const;
- rose_group getSuccGroups(RoseVertex start) const;
- rose_group getGroups(RoseVertex v) const;
-
- bool hasDelayedLiteral(RoseVertex v) const;
- bool hasDelayPred(RoseVertex v) const;
- bool hasLiteralInTable(RoseVertex v, enum rose_literal_table t) const;
- bool hasAnchoredTablePred(RoseVertex v) const;
-
- // Is the given vertex a successor of either root or anchored_root?
- bool isRootSuccessor(const RoseVertex &v) const;
- /* Is the given vertex a successor of something other than root or
- * anchored_root? */
- bool isNonRootSuccessor(const RoseVertex &v) const;
-
- bool isDirectReport(u32 id) const;
- bool isDelayed(u32 id) const;
-
- bool isAnchored(RoseVertex v) const; /* true iff has literal in anchored
- * table */
- bool isFloating(RoseVertex v) const; /* true iff has literal in floating
- * table */
- bool isInETable(RoseVertex v) const; /* true iff has literal in eod
- * table */
-
- size_t maxLiteralLen(RoseVertex v) const;
- size_t minLiteralLen(RoseVertex v) const;
-
- // max overlap considered for every pair (ulit, vlit).
- size_t maxLiteralOverlap(RoseVertex u, RoseVertex v) const;
-
- bool isPseudoStar(const RoseEdge &e) const;
- bool isPseudoStarOrFirstOnly(const RoseEdge &e) const;
- bool hasOnlyPseudoStarInEdges(RoseVertex v) const;
-
- bool isAnyStart(const RoseVertex &v) const {
- return v == root || v == anchored_root;
- }
-
- bool isVirtualVertex(const RoseVertex &v) const {
- return g[v].eod_accept || isAnyStart(v);
- }
-
- void handleMixedSensitivity(void);
-
- void findTransientLeftfixes(void);
-
- const CompileContext &cc;
- RoseGraph g;
- const RoseVertex root;
- const RoseVertex anchored_root;
- RoseLiteralMap literals;
- std::map<RoseVertex, RoseVertex> ghost;
- ReportID getNewNfaReport() override {
- return next_nfa_report++;
- }
- std::deque<rose_literal_info> literal_info;
- bool hasSom; //!< at least one pattern requires SOM.
- std::map<size_t, std::vector<std::unique_ptr<raw_dfa>>> anchored_nfas;
- std::map<simple_anchored_info, std::set<u32>> anchored_simple;
- std::map<u32, std::set<u32> > group_to_literal;
- u32 group_end;
-
- u32 ematcher_region_size; /**< number of bytes the eod table runs over */
-
- /** \brief Mapping from anchored literal ID to the original literal suffix
- * present when the literal was added to the literal matcher. Used for
- * overlap calculation in history assignment. */
- std::map<u32, rose_literal_id> anchoredLitSuffix;
-
+
+ void setSom() override { hasSom = true; }
+
+ std::unique_ptr<RoseDedupeAux> generateDedupeAux() const override;
+
+ // Find the maximum bound on the edges to this vertex's successors.
+ u32 calcSuccMaxBound(RoseVertex u) const;
+
+ /* Returns the ID of the given literal in the literal map, adding it if
+ * necessary. */
+ u32 getLiteralId(const ue2_literal &s, u32 delay, rose_literal_table table);
+
+ // Variant with msk/cmp.
+ u32 getLiteralId(const ue2_literal &s, const std::vector<u8> &msk,
+ const std::vector<u8> &cmp, u32 delay,
+ rose_literal_table table);
+
+ u32 getNewLiteralId(void);
+
+ void removeVertices(const std::vector<RoseVertex> &dead);
+
+ // Is the Rose anchored?
+ bool hasNoFloatingRoots() const;
+
+ u32 calcHistoryRequired() const;
+
+ rose_group getInitialGroups() const;
+ rose_group getSuccGroups(RoseVertex start) const;
+ rose_group getGroups(RoseVertex v) const;
+
+ bool hasDelayedLiteral(RoseVertex v) const;
+ bool hasDelayPred(RoseVertex v) const;
+ bool hasLiteralInTable(RoseVertex v, enum rose_literal_table t) const;
+ bool hasAnchoredTablePred(RoseVertex v) const;
+
+ // Is the given vertex a successor of either root or anchored_root?
+ bool isRootSuccessor(const RoseVertex &v) const;
+ /* Is the given vertex a successor of something other than root or
+ * anchored_root? */
+ bool isNonRootSuccessor(const RoseVertex &v) const;
+
+ bool isDirectReport(u32 id) const;
+ bool isDelayed(u32 id) const;
+
+ bool isAnchored(RoseVertex v) const; /* true iff has literal in anchored
+ * table */
+ bool isFloating(RoseVertex v) const; /* true iff has literal in floating
+ * table */
+ bool isInETable(RoseVertex v) const; /* true iff has literal in eod
+ * table */
+
+ size_t maxLiteralLen(RoseVertex v) const;
+ size_t minLiteralLen(RoseVertex v) const;
+
+ // max overlap considered for every pair (ulit, vlit).
+ size_t maxLiteralOverlap(RoseVertex u, RoseVertex v) const;
+
+ bool isPseudoStar(const RoseEdge &e) const;
+ bool isPseudoStarOrFirstOnly(const RoseEdge &e) const;
+ bool hasOnlyPseudoStarInEdges(RoseVertex v) const;
+
+ bool isAnyStart(const RoseVertex &v) const {
+ return v == root || v == anchored_root;
+ }
+
+ bool isVirtualVertex(const RoseVertex &v) const {
+ return g[v].eod_accept || isAnyStart(v);
+ }
+
+ void handleMixedSensitivity(void);
+
+ void findTransientLeftfixes(void);
+
+ const CompileContext &cc;
+ RoseGraph g;
+ const RoseVertex root;
+ const RoseVertex anchored_root;
+ RoseLiteralMap literals;
+ std::map<RoseVertex, RoseVertex> ghost;
+ ReportID getNewNfaReport() override {
+ return next_nfa_report++;
+ }
+ std::deque<rose_literal_info> literal_info;
+ bool hasSom; //!< at least one pattern requires SOM.
+ std::map<size_t, std::vector<std::unique_ptr<raw_dfa>>> anchored_nfas;
+ std::map<simple_anchored_info, std::set<u32>> anchored_simple;
+ std::map<u32, std::set<u32> > group_to_literal;
+ u32 group_end;
+
+ u32 ematcher_region_size; /**< number of bytes the eod table runs over */
+
+ /** \brief Mapping from anchored literal ID to the original literal suffix
+ * present when the literal was added to the literal matcher. Used for
+ * overlap calculation in history assignment. */
+ std::map<u32, rose_literal_id> anchoredLitSuffix;
+
ue2_unordered_set<left_id> transient;
ue2_unordered_map<left_id, rose_group> rose_squash_masks;
-
- std::vector<OutfixInfo> outfixes;
-
- /** \brief MPV outfix entry. Null if not used, and moved into the outfixes
- * list before we start building the bytecode (at which point it is set to
- * null again). */
- std::unique_ptr<OutfixInfo> mpv_outfix = nullptr;
-
- u32 eod_event_literal_id; // ID of EOD event literal, or MO_INVALID_IDX.
-
- u32 max_rose_anchored_floating_overlap;
-
+
+ std::vector<OutfixInfo> outfixes;
+
+ /** \brief MPV outfix entry. Null if not used, and moved into the outfixes
+ * list before we start building the bytecode (at which point it is set to
+ * null again). */
+ std::unique_ptr<OutfixInfo> mpv_outfix = nullptr;
+
+ u32 eod_event_literal_id; // ID of EOD event literal, or MO_INVALID_IDX.
+
+ u32 max_rose_anchored_floating_overlap;
+
rose_group boundary_group_mask = 0;
-
- QueueIndexFactory qif;
- ReportManager &rm;
- SomSlotManager &ssm;
+
+ QueueIndexFactory qif;
+ ReportManager &rm;
+ SomSlotManager &ssm;
SmallWriteBuild &smwr;
- const BoundaryReports &boundary;
-
-private:
- ReportID next_nfa_report;
-};
-
+ const BoundaryReports &boundary;
+
+private:
+ ReportID next_nfa_report;
+};
+
size_t calcLongLitThreshold(const RoseBuildImpl &build,
const size_t historyRequired);
-// Free functions, in rose_build_misc.cpp
-
-bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v);
-bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v);
-
-size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b);
+// Free functions, in rose_build_misc.cpp
+
+bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v);
+bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v);
+
+size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b);
ue2_literal findNonOverlappingTail(const std::set<ue2_literal> &lits,
const ue2_literal &s);
-
-#ifndef NDEBUG
+
+#ifndef NDEBUG
bool roseHasTops(const RoseBuildImpl &build, RoseVertex v);
bool hasOrphanedTops(const RoseBuildImpl &build);
-#endif
-
-u64a findMaxOffset(const std::set<ReportID> &reports, const ReportManager &rm);
-
-// Function that operates on a msk/cmp pair and a literal, as used in
-// hwlmLiteral, and zeroes msk elements that don't add any power to the
-// literal.
-void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk,
- std::vector<u8> &cmp);
-
+#endif
+
+u64a findMaxOffset(const std::set<ReportID> &reports, const ReportManager &rm);
+
+// Function that operates on a msk/cmp pair and a literal, as used in
+// hwlmLiteral, and zeroes msk elements that don't add any power to the
+// literal.
+void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk,
+ std::vector<u8> &cmp);
+
u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id);
u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id);
-
+
bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e);
-
-#ifndef NDEBUG
-bool canImplementGraphs(const RoseBuildImpl &tbi);
-#endif
-
-} // namespace ue2
-
+
+#ifndef NDEBUG
+bool canImplementGraphs(const RoseBuildImpl &tbi);
+#endif
+
+} // namespace ue2
+
namespace std {
template<>
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_infix.cpp b/contrib/libs/hyperscan/src/rose/rose_build_infix.cpp
index e250bf4c7e..80e1254236 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_infix.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_infix.cpp
@@ -1,328 +1,328 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose/rose_build_infix.h"
-
-#include "ue2common.h"
-#include "nfa/castlecompile.h"
-#include "nfagraph/ng_dump.h"
-#include "nfagraph/ng_width.h"
-#include "nfagraph/ng_util.h"
-#include "rose/rose_build_impl.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose/rose_build_infix.h"
+
+#include "ue2common.h"
+#include "nfa/castlecompile.h"
+#include "nfagraph/ng_dump.h"
+#include "nfagraph/ng_width.h"
+#include "nfagraph/ng_util.h"
+#include "rose/rose_build_impl.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
-#include "util/graph.h"
+#include "util/graph_range.h"
+#include "util/graph.h"
#include "util/hash.h"
-#include "util/ue2string.h"
+#include "util/ue2string.h"
#include "util/unordered.h"
-
-#include <algorithm>
-#include <set>
-
-using namespace std;
-
-namespace ue2 {
-
-static
-bool couldEndLiteral(const ue2_literal &s, NFAVertex initial,
- const NGHolder &h) {
+
+#include <algorithm>
+#include <set>
+
+using namespace std;
+
+namespace ue2 {
+
+static
+bool couldEndLiteral(const ue2_literal &s, NFAVertex initial,
+ const NGHolder &h) {
flat_set<NFAVertex> curr, next;
- curr.insert(initial);
-
- for (auto it = s.rbegin(), ite = s.rend(); it != ite; ++it) {
- const CharReach &cr_s = *it;
- bool matched = false;
- next.clear();
-
- for (auto v : curr) {
- if (v == h.start) {
- // We can't see what we had before the start, so we must assume
- // the literal could overlap with it.
- return true;
- }
- const CharReach &cr_v = h[v].char_reach;
- if (overlaps(cr_v, cr_s)) {
- insert(&next, inv_adjacent_vertices(v, h));
- matched = true;
- }
- }
-
- if (!matched) {
- return false;
- }
-
- curr.swap(next);
- }
-
- return true;
-}
-
+ curr.insert(initial);
+
+ for (auto it = s.rbegin(), ite = s.rend(); it != ite; ++it) {
+ const CharReach &cr_s = *it;
+ bool matched = false;
+ next.clear();
+
+ for (auto v : curr) {
+ if (v == h.start) {
+ // We can't see what we had before the start, so we must assume
+ // the literal could overlap with it.
+ return true;
+ }
+ const CharReach &cr_v = h[v].char_reach;
+ if (overlaps(cr_v, cr_s)) {
+ insert(&next, inv_adjacent_vertices(v, h));
+ matched = true;
+ }
+ }
+
+ if (!matched) {
+ return false;
+ }
+
+ curr.swap(next);
+ }
+
+ return true;
+}
+
using EdgeCache = ue2_unordered_set<pair<NFAVertex, NFAVertex>>;
-static
+static
void contractVertex(NGHolder &g, NFAVertex v, EdgeCache &all_edges) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == v) {
- continue; // self-edge
- }
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w == v) {
- continue; // self-edge
- }
-
- // Construct edge (u, v) only if it doesn't already exist. We use
- // the all_edges container here, as checking existence inside the
- // graph is expensive when u or v have large degree.
- if (all_edges.emplace(u, w).second) {
- add_edge(u, w, g);
- }
- }
- }
-
- // Note that edges to/from v will remain in all_edges.
- clear_vertex(v, g);
-}
-
-static
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == v) {
+ continue; // self-edge
+ }
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w == v) {
+ continue; // self-edge
+ }
+
+ // Construct edge (u, v) only if it doesn't already exist. We use
+ // the all_edges container here, as checking existence inside the
+ // graph is expensive when u or v have large degree.
+ if (all_edges.emplace(u, w).second) {
+ add_edge(u, w, g);
+ }
+ }
+ }
+
+ // Note that edges to/from v will remain in all_edges.
+ clear_vertex(v, g);
+}
+
+static
u32 findMaxLiteralMatches(const NGHolder &h, const set<ue2_literal> &lits) {
- DEBUG_PRINTF("h=%p, %zu literals\n", &h, lits.size());
+ DEBUG_PRINTF("h=%p, %zu literals\n", &h, lits.size());
//dumpGraph("infix.dot", h);
-
- // Indices of vertices that could terminate any of the literals in 'lits'.
- set<u32> terms;
-
- for (const auto &s : lits) {
- DEBUG_PRINTF("lit s='%s'\n", escapeString(s).c_str());
- if (s.empty()) {
- // Likely an anchored case, be conservative here.
- return NO_MATCH_LIMIT;
- }
-
- for (auto v : vertices_range(h)) {
- if (is_special(v, h)) {
- continue;
- }
-
- if (couldEndLiteral(s, v, h)) {
- u32 idx = h[v].index;
- DEBUG_PRINTF("vertex %u could terminate lit\n", idx);
- terms.insert(idx);
- }
- }
- }
-
- if (terms.empty()) {
- DEBUG_PRINTF("literals cannot match inside infix\n");
- return 0;
- }
-
- NGHolder g;
- cloneHolder(g, h);
- vector<NFAVertex> dead;
-
+
+ // Indices of vertices that could terminate any of the literals in 'lits'.
+ set<u32> terms;
+
+ for (const auto &s : lits) {
+ DEBUG_PRINTF("lit s='%s'\n", escapeString(s).c_str());
+ if (s.empty()) {
+ // Likely an anchored case, be conservative here.
+ return NO_MATCH_LIMIT;
+ }
+
+ for (auto v : vertices_range(h)) {
+ if (is_special(v, h)) {
+ continue;
+ }
+
+ if (couldEndLiteral(s, v, h)) {
+ u32 idx = h[v].index;
+ DEBUG_PRINTF("vertex %u could terminate lit\n", idx);
+ terms.insert(idx);
+ }
+ }
+ }
+
+ if (terms.empty()) {
+ DEBUG_PRINTF("literals cannot match inside infix\n");
+ return 0;
+ }
+
+ NGHolder g;
+ cloneHolder(g, h);
+ vector<NFAVertex> dead;
+
// The set of all edges in the graph is used for existence checks in
// contractVertex.
EdgeCache all_edges;
- for (const auto &e : edges_range(g)) {
- all_edges.emplace(source(e, g), target(e, g));
- }
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
- if (contains(terms, g[v].index)) {
- continue;
- }
-
- contractVertex(g, v, all_edges);
- dead.push_back(v);
- }
-
- remove_vertices(dead, g);
+ for (const auto &e : edges_range(g)) {
+ all_edges.emplace(source(e, g), target(e, g));
+ }
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ if (contains(terms, g[v].index)) {
+ continue;
+ }
+
+ contractVertex(g, v, all_edges);
+ dead.push_back(v);
+ }
+
+ remove_vertices(dead, g);
//dumpGraph("relaxed.dot", g);
-
- depth maxWidth = findMaxWidth(g);
- DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str());
- assert(maxWidth.is_reachable());
-
- if (maxWidth.is_infinite()) {
- // Cycle detected, so we can likely squeeze an unlimited number of
- // matches into this graph.
- return NO_MATCH_LIMIT;
- }
-
- assert(terms.size() >= maxWidth);
- return maxWidth;
-}
-
-namespace {
-struct ReachMismatch {
- explicit ReachMismatch(const CharReach &cr_in) : cr(cr_in) {}
- bool operator()(const CharReach &a) const { return !overlaps(cr, a); }
-
-private:
- CharReach cr;
-};
-}
-
-static
-u32 findMaxInfixMatches(const CastleProto &castle,
- const set<ue2_literal> &lits) {
- DEBUG_PRINTF("castle=%p, %zu literals\n", &castle, lits.size());
-
- if (castle.repeats.size() > 1) {
- DEBUG_PRINTF("more than one top!\n");
- return NO_MATCH_LIMIT;
- }
-
- assert(!castle.repeats.empty());
- const PureRepeat &pr = castle.repeats.begin()->second;
- DEBUG_PRINTF("repeat=%s reach=%s\n", pr.bounds.str().c_str(),
- describeClass(pr.reach).c_str());
-
- size_t max_count = 0;
-
- for (const auto &s : lits) {
- DEBUG_PRINTF("lit s='%s'\n", escapeString(s).c_str());
- if (s.empty()) {
- // Likely an anchored case, be conservative here.
- return NO_MATCH_LIMIT;
- }
-
- size_t count = 0;
-
- auto f = find_if(s.rbegin(), s.rend(), ReachMismatch(pr.reach));
-
- if (f == s.rbegin()) {
- DEBUG_PRINTF("lit can't terminate inside infix\n");
- count = 0;
- } else if (f != s.rend()) {
- size_t suffix_len = distance(s.rbegin(), f);
- DEBUG_PRINTF("suffix of len %zu matches at start\n", suffix_len);
- if (pr.bounds.max.is_finite()) {
- count = min(suffix_len, (size_t)pr.bounds.max);
- } else {
- count = suffix_len;
- }
- } else {
- DEBUG_PRINTF("whole lit can match inside infix (repeatedly)\n");
- if (pr.bounds.max.is_finite()) {
- count = pr.bounds.max;
- } else {
- DEBUG_PRINTF("inf bound\n");
- return NO_MATCH_LIMIT;
- }
- }
-
- DEBUG_PRINTF("count=%zu\n", count);
- max_count = max(max_count, count);
- }
-
- DEBUG_PRINTF("max_count %zu\n", max_count);
-
- if (max_count > NO_MATCH_LIMIT) {
- assert(0); // This would be a surprise.
- return NO_MATCH_LIMIT;
- }
-
- return (u32)max_count;
-}
-
-u32 findMaxInfixMatches(const left_id &left, const set<ue2_literal> &lits) {
- if (left.castle()) {
- return findMaxInfixMatches(*left.castle(), lits);
- }
- if (left.graph()) {
+
+ depth maxWidth = findMaxWidth(g);
+ DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str());
+ assert(maxWidth.is_reachable());
+
+ if (maxWidth.is_infinite()) {
+ // Cycle detected, so we can likely squeeze an unlimited number of
+ // matches into this graph.
+ return NO_MATCH_LIMIT;
+ }
+
+ assert(terms.size() >= maxWidth);
+ return maxWidth;
+}
+
+namespace {
+struct ReachMismatch {
+ explicit ReachMismatch(const CharReach &cr_in) : cr(cr_in) {}
+ bool operator()(const CharReach &a) const { return !overlaps(cr, a); }
+
+private:
+ CharReach cr;
+};
+}
+
+static
+u32 findMaxInfixMatches(const CastleProto &castle,
+ const set<ue2_literal> &lits) {
+ DEBUG_PRINTF("castle=%p, %zu literals\n", &castle, lits.size());
+
+ if (castle.repeats.size() > 1) {
+ DEBUG_PRINTF("more than one top!\n");
+ return NO_MATCH_LIMIT;
+ }
+
+ assert(!castle.repeats.empty());
+ const PureRepeat &pr = castle.repeats.begin()->second;
+ DEBUG_PRINTF("repeat=%s reach=%s\n", pr.bounds.str().c_str(),
+ describeClass(pr.reach).c_str());
+
+ size_t max_count = 0;
+
+ for (const auto &s : lits) {
+ DEBUG_PRINTF("lit s='%s'\n", escapeString(s).c_str());
+ if (s.empty()) {
+ // Likely an anchored case, be conservative here.
+ return NO_MATCH_LIMIT;
+ }
+
+ size_t count = 0;
+
+ auto f = find_if(s.rbegin(), s.rend(), ReachMismatch(pr.reach));
+
+ if (f == s.rbegin()) {
+ DEBUG_PRINTF("lit can't terminate inside infix\n");
+ count = 0;
+ } else if (f != s.rend()) {
+ size_t suffix_len = distance(s.rbegin(), f);
+ DEBUG_PRINTF("suffix of len %zu matches at start\n", suffix_len);
+ if (pr.bounds.max.is_finite()) {
+ count = min(suffix_len, (size_t)pr.bounds.max);
+ } else {
+ count = suffix_len;
+ }
+ } else {
+ DEBUG_PRINTF("whole lit can match inside infix (repeatedly)\n");
+ if (pr.bounds.max.is_finite()) {
+ count = pr.bounds.max;
+ } else {
+ DEBUG_PRINTF("inf bound\n");
+ return NO_MATCH_LIMIT;
+ }
+ }
+
+ DEBUG_PRINTF("count=%zu\n", count);
+ max_count = max(max_count, count);
+ }
+
+ DEBUG_PRINTF("max_count %zu\n", max_count);
+
+ if (max_count > NO_MATCH_LIMIT) {
+ assert(0); // This would be a surprise.
+ return NO_MATCH_LIMIT;
+ }
+
+ return (u32)max_count;
+}
+
+u32 findMaxInfixMatches(const left_id &left, const set<ue2_literal> &lits) {
+ if (left.castle()) {
+ return findMaxInfixMatches(*left.castle(), lits);
+ }
+ if (left.graph()) {
if (!onlyOneTop(*left.graph())) {
DEBUG_PRINTF("more than one top!n");
return NO_MATCH_LIMIT;
}
return findMaxLiteralMatches(*left.graph(), lits);
- }
-
- return NO_MATCH_LIMIT;
-}
-
-void findCountingMiracleInfo(const left_id &left, const vector<u8> &stopTable,
- u8 *cm_count, CharReach *cm_cr) {
- DEBUG_PRINTF("hello\n");
- *cm_count = 0;
- cm_cr->clear();
- if (!left.graph()) {
- return;
- }
-
- const NGHolder &g = *left.graph();
-
+ }
+
+ return NO_MATCH_LIMIT;
+}
+
+void findCountingMiracleInfo(const left_id &left, const vector<u8> &stopTable,
+ u8 *cm_count, CharReach *cm_cr) {
+ DEBUG_PRINTF("hello\n");
+ *cm_count = 0;
+ cm_cr->clear();
+ if (!left.graph()) {
+ return;
+ }
+
+ const NGHolder &g = *left.graph();
+
auto cyclics = find_vertices_in_cycles(g);
-
- if (!proper_out_degree(g.startDs, g)) {
- cyclics.erase(g.startDs);
- }
-
- CharReach cyclic_cr;
- for (NFAVertex v : cyclics) {
+
+ if (!proper_out_degree(g.startDs, g)) {
+ cyclics.erase(g.startDs);
+ }
+
+ CharReach cyclic_cr;
+ for (NFAVertex v : cyclics) {
DEBUG_PRINTF("considering %zu ||=%zu\n", g[v].index,
- g[v].char_reach.count());
- cyclic_cr |= g[v].char_reach;
- }
-
- if (cyclic_cr.none() || cyclic_cr.all()) {
- DEBUG_PRINTF("cyclic cr width %zu\n", cyclic_cr.count());
- return; /* useless */
- }
-
- *cm_cr = ~cyclic_cr;
-
- /* stop character will be part of normal miracles, no need to look for them
- * here too */
- assert(stopTable.size() == N_CHARS);
- for (u32 i = 0; i < N_CHARS; i++) {
- if (stopTable[i]) {
- cm_cr->clear(i);
- }
- }
-
- set<ue2_literal> lits;
- for (size_t c = cm_cr->find_first(); c != CharReach::npos;
- c = cm_cr->find_next(c)) {
- DEBUG_PRINTF("considering %hhx as stop character\n", (u8)c);
- lits.insert(ue2_literal(c, false));
- }
-
+ g[v].char_reach.count());
+ cyclic_cr |= g[v].char_reach;
+ }
+
+ if (cyclic_cr.none() || cyclic_cr.all()) {
+ DEBUG_PRINTF("cyclic cr width %zu\n", cyclic_cr.count());
+ return; /* useless */
+ }
+
+ *cm_cr = ~cyclic_cr;
+
+ /* stop character will be part of normal miracles, no need to look for them
+ * here too */
+ assert(stopTable.size() == N_CHARS);
+ for (u32 i = 0; i < N_CHARS; i++) {
+ if (stopTable[i]) {
+ cm_cr->clear(i);
+ }
+ }
+
+ set<ue2_literal> lits;
+ for (size_t c = cm_cr->find_first(); c != CharReach::npos;
+ c = cm_cr->find_next(c)) {
+ DEBUG_PRINTF("considering %hhx as stop character\n", (u8)c);
+ lits.insert(ue2_literal(c, false));
+ }
+
u32 count = findMaxLiteralMatches(*left.graph(), lits);
- DEBUG_PRINTF("counting miracle %u\n", count + 1);
- if (count && count < 50) {
- *cm_count = count + 1;
- }
-}
-
-} // namespace ue2
+ DEBUG_PRINTF("counting miracle %u\n", count + 1);
+ if (count && count < 50) {
+ *cm_count = count + 1;
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_infix.h b/contrib/libs/hyperscan/src/rose/rose_build_infix.h
index 4706cb8c19..dc2685065a 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_infix.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_infix.h
@@ -1,52 +1,52 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_BUILD_INFIX_H
-#define ROSE_BUILD_INFIX_H
-
-#include "ue2common.h"
-
-#include <set>
-#include <vector>
-
-namespace ue2 {
-
-class CharReach;
-struct left_id;
-struct ue2_literal;
-
-static constexpr u32 NO_MATCH_LIMIT = 0xffffffff;
-
-u32 findMaxInfixMatches(const left_id &left, const std::set<ue2_literal> &lits);
-
-void findCountingMiracleInfo(const left_id &left, const std::vector<u8> &stop,
- u8 *cm_count, CharReach *cm_cr);
-
-} // namespace ue2
-
-#endif // ROSE_BUILD_INFIX_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_INFIX_H
+#define ROSE_BUILD_INFIX_H
+
+#include "ue2common.h"
+
+#include <set>
+#include <vector>
+
+namespace ue2 {
+
+class CharReach;
+struct left_id;
+struct ue2_literal;
+
+static constexpr u32 NO_MATCH_LIMIT = 0xffffffff;
+
+u32 findMaxInfixMatches(const left_id &left, const std::set<ue2_literal> &lits);
+
+void findCountingMiracleInfo(const left_id &left, const std::vector<u8> &stop,
+ u8 *cm_count, CharReach *cm_cr);
+
+} // namespace ue2
+
+#endif // ROSE_BUILD_INFIX_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp b/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp
index 10e1cbfa5f..d0540d79b0 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp
@@ -1,68 +1,68 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose compile-time analysis for lookaround masks.
- */
-#include "rose_build_lookaround.h"
-
-#include "rose_build_impl.h"
-#include "nfa/castlecompile.h"
-#include "nfa/goughcompile.h"
-#include "nfa/rdfa.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_util.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose compile-time analysis for lookaround masks.
+ */
+#include "rose_build_lookaround.h"
+
+#include "rose_build_impl.h"
+#include "nfa/castlecompile.h"
+#include "nfa/goughcompile.h"
+#include "nfa/rdfa.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_util.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
#include "util/flat_containers.h"
-#include "util/verify_types.h"
-
-#include <cstdlib>
-#include <queue>
+#include "util/verify_types.h"
+
+#include <cstdlib>
+#include <queue>
#include <sstream>
-
-using namespace std;
-
-namespace ue2 {
-
-/** \brief Max search distance for reachability in front of a role. */
-static const u32 MAX_FWD_LEN = 64;
-
-/** \brief Max search distance for reachability behind a role. */
-static const u32 MAX_BACK_LEN = 64;
-
-/** \brief Max lookaround entries for a role. */
+
+using namespace std;
+
+namespace ue2 {
+
+/** \brief Max search distance for reachability in front of a role. */
+static const u32 MAX_FWD_LEN = 64;
+
+/** \brief Max search distance for reachability behind a role. */
+static const u32 MAX_BACK_LEN = 64;
+
+/** \brief Max lookaround entries for a role. */
static const u32 MAX_LOOKAROUND_ENTRIES = 32;
-
-/** \brief We would rather have lookarounds with smaller reach than this. */
-static const u32 LOOKAROUND_WIDE_REACH = 200;
-
+
+/** \brief We would rather have lookarounds with smaller reach than this. */
+static const u32 LOOKAROUND_WIDE_REACH = 200;
+
#if defined(DEBUG) || defined(DUMP_SUPPORT)
static UNUSED
string dump(const map<s32, CharReach> &look) {
@@ -77,389 +77,389 @@ string dump(const map<s32, CharReach> &look) {
}
#endif
-static
-void getForwardReach(const NGHolder &g, u32 top, map<s32, CharReach> &look) {
+static
+void getForwardReach(const NGHolder &g, u32 top, map<s32, CharReach> &look) {
flat_set<NFAVertex> curr, next;
-
- // Consider only successors of start with the required top.
- for (const auto &e : out_edges_range(g.start, g)) {
- NFAVertex v = target(e, g);
- if (v == g.startDs) {
- continue;
- }
+
+ // Consider only successors of start with the required top.
+ for (const auto &e : out_edges_range(g.start, g)) {
+ NFAVertex v = target(e, g);
+ if (v == g.startDs) {
+ continue;
+ }
if (contains(g[e].tops, top)) {
- curr.insert(v);
- }
- }
-
- for (u32 i = 0; i < MAX_FWD_LEN; i++) {
- if (curr.empty() || contains(curr, g.accept) ||
- contains(curr, g.acceptEod)) {
- break;
- }
-
- next.clear();
- CharReach cr;
-
- for (auto v : curr) {
- assert(!is_special(v, g));
- cr |= g[v].char_reach;
- insert(&next, adjacent_vertices(v, g));
- }
-
- assert(cr.any());
- look[i] |= cr;
- curr.swap(next);
- }
-}
-
-static
-void getBackwardReach(const NGHolder &g, ReportID report, u32 lag,
- map<s32, CharReach> &look) {
+ curr.insert(v);
+ }
+ }
+
+ for (u32 i = 0; i < MAX_FWD_LEN; i++) {
+ if (curr.empty() || contains(curr, g.accept) ||
+ contains(curr, g.acceptEod)) {
+ break;
+ }
+
+ next.clear();
+ CharReach cr;
+
+ for (auto v : curr) {
+ assert(!is_special(v, g));
+ cr |= g[v].char_reach;
+ insert(&next, adjacent_vertices(v, g));
+ }
+
+ assert(cr.any());
+ look[i] |= cr;
+ curr.swap(next);
+ }
+}
+
+static
+void getBackwardReach(const NGHolder &g, ReportID report, u32 lag,
+ map<s32, CharReach> &look) {
flat_set<NFAVertex> curr, next;
-
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- if (contains(g[v].reports, report)) {
- curr.insert(v);
- }
- }
-
- for (u32 i = lag + 1; i <= MAX_BACK_LEN; i++) {
- if (curr.empty() || contains(curr, g.start) ||
- contains(curr, g.startDs)) {
- break;
- }
-
- next.clear();
- CharReach cr;
-
- for (auto v : curr) {
- assert(!is_special(v, g));
- cr |= g[v].char_reach;
- insert(&next, inv_adjacent_vertices(v, g));
- }
-
- assert(cr.any());
- look[0 - i] |= cr;
- curr.swap(next);
- }
-}
-
-static
-void getForwardReach(const CastleProto &castle, u32 top,
- map<s32, CharReach> &look) {
- depth len = castle.repeats.at(top).bounds.min;
- len = min(len, depth(MAX_FWD_LEN));
- assert(len.is_finite());
-
- const CharReach &cr = castle.reach();
- for (u32 i = 0; i < len; i++) {
- look[i] |= cr;
- }
-}
-
-static
-void getBackwardReach(const CastleProto &castle, ReportID report, u32 lag,
- map<s32, CharReach> &look) {
- depth min_depth = depth::infinity();
- for (const auto &m : castle.repeats) {
- const PureRepeat &pr = m.second;
- if (contains(pr.reports, report)) {
- min_depth = min(min_depth, pr.bounds.min);
- }
- }
-
- if (!min_depth.is_finite()) {
- assert(0);
- return;
- }
-
- const CharReach &cr = castle.reach();
- for (u32 i = lag + 1; i <= min(lag + (u32)min_depth, MAX_BACK_LEN);
- i++) {
- look[0 - i] |= cr;
- }
-}
-
-static
-void getForwardReach(const raw_dfa &rdfa, map<s32, CharReach> &look) {
- if (rdfa.states.size() < 2) {
- return;
- }
-
+
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ if (contains(g[v].reports, report)) {
+ curr.insert(v);
+ }
+ }
+
+ for (u32 i = lag + 1; i <= MAX_BACK_LEN; i++) {
+ if (curr.empty() || contains(curr, g.start) ||
+ contains(curr, g.startDs)) {
+ break;
+ }
+
+ next.clear();
+ CharReach cr;
+
+ for (auto v : curr) {
+ assert(!is_special(v, g));
+ cr |= g[v].char_reach;
+ insert(&next, inv_adjacent_vertices(v, g));
+ }
+
+ assert(cr.any());
+ look[0 - i] |= cr;
+ curr.swap(next);
+ }
+}
+
+static
+void getForwardReach(const CastleProto &castle, u32 top,
+ map<s32, CharReach> &look) {
+ depth len = castle.repeats.at(top).bounds.min;
+ len = min(len, depth(MAX_FWD_LEN));
+ assert(len.is_finite());
+
+ const CharReach &cr = castle.reach();
+ for (u32 i = 0; i < len; i++) {
+ look[i] |= cr;
+ }
+}
+
+static
+void getBackwardReach(const CastleProto &castle, ReportID report, u32 lag,
+ map<s32, CharReach> &look) {
+ depth min_depth = depth::infinity();
+ for (const auto &m : castle.repeats) {
+ const PureRepeat &pr = m.second;
+ if (contains(pr.reports, report)) {
+ min_depth = min(min_depth, pr.bounds.min);
+ }
+ }
+
+ if (!min_depth.is_finite()) {
+ assert(0);
+ return;
+ }
+
+ const CharReach &cr = castle.reach();
+ for (u32 i = lag + 1; i <= min(lag + (u32)min_depth, MAX_BACK_LEN);
+ i++) {
+ look[0 - i] |= cr;
+ }
+}
+
+static
+void getForwardReach(const raw_dfa &rdfa, map<s32, CharReach> &look) {
+ if (rdfa.states.size() < 2) {
+ return;
+ }
+
flat_set<dstate_id_t> curr, next;
- curr.insert(rdfa.start_anchored);
-
- for (u32 i = 0; i < MAX_FWD_LEN && !curr.empty(); i++) {
- next.clear();
- CharReach cr;
-
- for (const auto state_id : curr) {
- const dstate &ds = rdfa.states[state_id];
-
- if (!ds.reports.empty() || !ds.reports_eod.empty()) {
- return;
- }
-
- for (unsigned c = 0; c < N_CHARS; c++) {
- dstate_id_t succ = ds.next[rdfa.alpha_remap[c]];
- if (succ != DEAD_STATE) {
- cr.set(c);
- next.insert(succ);
- }
- }
- }
-
- assert(cr.any());
- look[i] |= cr;
- curr.swap(next);
- }
-}
-
-static
-void getSuffixForwardReach(const suffix_id &suff, u32 top,
- map<s32, CharReach> &look) {
- if (suff.graph()) {
- getForwardReach(*suff.graph(), top, look);
- } else if (suff.castle()) {
- getForwardReach(*suff.castle(), top, look);
- } else if (suff.dfa()) {
- assert(top == 0); // DFA isn't multi-top capable.
- getForwardReach(*suff.dfa(), look);
- } else if (suff.haig()) {
- assert(top == 0); // DFA isn't multi-top capable.
- getForwardReach(*suff.haig(), look);
- }
-}
-
-static
-void getRoseForwardReach(const left_id &left, u32 top,
- map<s32, CharReach> &look) {
- if (left.graph()) {
- getForwardReach(*left.graph(), top, look);
- } else if (left.castle()) {
- getForwardReach(*left.castle(), top, look);
- } else if (left.dfa()) {
- assert(top == 0); // DFA isn't multi-top capable.
- getForwardReach(*left.dfa(), look);
- } else if (left.haig()) {
- assert(top == 0); // DFA isn't multi-top capable.
- getForwardReach(*left.haig(), look);
- }
-}
-
-static
-void combineForwardMasks(const vector<map<s32, CharReach> > &rose_look,
- map<s32, CharReach> &look) {
- for (u32 i = 0; i < MAX_FWD_LEN; i++) {
- for (const auto &rlook : rose_look) {
- if (contains(rlook, i)) {
- look[i] |= rlook.at(i);
- } else {
- look[i].setall();
- }
- }
- }
-}
-
-static
-void findForwardReach(const RoseGraph &g, const RoseVertex v,
- map<s32, CharReach> &look) {
- if (!g[v].reports.empty()) {
- DEBUG_PRINTF("acceptor\n");
- return;
- }
-
- // Non-leaf vertices can pick up a mask per successor prefix rose
- // engine.
- vector<map<s32, CharReach>> rose_look;
- for (const auto &e : out_edges_range(v, g)) {
- RoseVertex t = target(e, g);
- if (!g[t].left) {
+ curr.insert(rdfa.start_anchored);
+
+ for (u32 i = 0; i < MAX_FWD_LEN && !curr.empty(); i++) {
+ next.clear();
+ CharReach cr;
+
+ for (const auto state_id : curr) {
+ const dstate &ds = rdfa.states[state_id];
+
+ if (!ds.reports.empty() || !ds.reports_eod.empty()) {
+ return;
+ }
+
+ for (unsigned c = 0; c < N_CHARS; c++) {
+ dstate_id_t succ = ds.next[rdfa.alpha_remap[c]];
+ if (succ != DEAD_STATE) {
+ cr.set(c);
+ next.insert(succ);
+ }
+ }
+ }
+
+ assert(cr.any());
+ look[i] |= cr;
+ curr.swap(next);
+ }
+}
+
+static
+void getSuffixForwardReach(const suffix_id &suff, u32 top,
+ map<s32, CharReach> &look) {
+ if (suff.graph()) {
+ getForwardReach(*suff.graph(), top, look);
+ } else if (suff.castle()) {
+ getForwardReach(*suff.castle(), top, look);
+ } else if (suff.dfa()) {
+ assert(top == 0); // DFA isn't multi-top capable.
+ getForwardReach(*suff.dfa(), look);
+ } else if (suff.haig()) {
+ assert(top == 0); // DFA isn't multi-top capable.
+ getForwardReach(*suff.haig(), look);
+ }
+}
+
+static
+void getRoseForwardReach(const left_id &left, u32 top,
+ map<s32, CharReach> &look) {
+ if (left.graph()) {
+ getForwardReach(*left.graph(), top, look);
+ } else if (left.castle()) {
+ getForwardReach(*left.castle(), top, look);
+ } else if (left.dfa()) {
+ assert(top == 0); // DFA isn't multi-top capable.
+ getForwardReach(*left.dfa(), look);
+ } else if (left.haig()) {
+ assert(top == 0); // DFA isn't multi-top capable.
+ getForwardReach(*left.haig(), look);
+ }
+}
+
+static
+void combineForwardMasks(const vector<map<s32, CharReach> > &rose_look,
+ map<s32, CharReach> &look) {
+ for (u32 i = 0; i < MAX_FWD_LEN; i++) {
+ for (const auto &rlook : rose_look) {
+ if (contains(rlook, i)) {
+ look[i] |= rlook.at(i);
+ } else {
+ look[i].setall();
+ }
+ }
+ }
+}
+
+static
+void findForwardReach(const RoseGraph &g, const RoseVertex v,
+ map<s32, CharReach> &look) {
+ if (!g[v].reports.empty()) {
+ DEBUG_PRINTF("acceptor\n");
+ return;
+ }
+
+ // Non-leaf vertices can pick up a mask per successor prefix rose
+ // engine.
+ vector<map<s32, CharReach>> rose_look;
+ for (const auto &e : out_edges_range(v, g)) {
+ RoseVertex t = target(e, g);
+ if (!g[t].left) {
DEBUG_PRINTF("successor %zu has no leftfix\n", g[t].index);
- return;
- }
- rose_look.push_back(map<s32, CharReach>());
- getRoseForwardReach(g[t].left, g[e].rose_top, rose_look.back());
- }
-
- if (g[v].suffix) {
- DEBUG_PRINTF("suffix engine\n");
- rose_look.push_back(map<s32, CharReach>());
- getSuffixForwardReach(g[v].suffix, g[v].suffix.top, rose_look.back());
- }
-
- combineForwardMasks(rose_look, look);
-}
-
-static
-void findBackwardReach(const RoseGraph &g, const RoseVertex v,
- map<s32, CharReach> &look) {
- if (!g[v].left) {
- return;
- }
-
- DEBUG_PRINTF("leftfix, report=%u, lag=%u\n", g[v].left.leftfix_report,
- g[v].left.lag);
-
- if (g[v].left.graph) {
- getBackwardReach(*g[v].left.graph, g[v].left.leftfix_report,
- g[v].left.lag, look);
- } else if (g[v].left.castle) {
- getBackwardReach(*g[v].left.castle, g[v].left.leftfix_report,
- g[v].left.lag, look);
- }
-
- // TODO: implement DFA variants if necessary.
-}
-
-static
-void normalise(map<s32, CharReach> &look) {
- // We can erase entries where the reach is "all characters".
- vector<s32> dead;
- for (const auto &m : look) {
- if (m.second.all()) {
- dead.push_back(m.first);
- }
- }
- erase_all(&look, dead);
-}
-
-namespace {
-
-struct LookPriority {
- explicit LookPriority(const map<s32, CharReach> &look_in) : look(look_in) {}
-
- bool operator()(s32 a, s32 b) const {
- const CharReach &a_reach = look.at(a);
- const CharReach &b_reach = look.at(b);
- if (a_reach.count() != b_reach.count()) {
- return a_reach.count() < b_reach.count();
- }
- return abs(a) < abs(b);
- }
-
-private:
- const map<s32, CharReach> &look;
-};
-
-} // namespace
-
-static
-bool isFloodProne(const map<s32, CharReach> &look, const CharReach &flood_cr) {
- for (const auto &m : look) {
- const CharReach &look_cr = m.second;
- if (!overlaps(look_cr, flood_cr)) {
- return false;
- }
- }
- DEBUG_PRINTF("look can't escape flood on %s\n",
- describeClass(flood_cr).c_str());
- return true;
-}
-
-static
-bool isFloodProne(const map<s32, CharReach> &look,
- const set<CharReach> &flood_reach) {
- if (flood_reach.empty()) {
- return false;
- }
-
- for (const CharReach &flood_cr : flood_reach) {
- if (isFloodProne(look, flood_cr)) {
- return true;
- }
- }
-
- return false;
-}
-
-static
-void reduce(map<s32, CharReach> &look, set<CharReach> &flood_reach) {
- if (look.size() <= MAX_LOOKAROUND_ENTRIES) {
- return;
- }
-
- DEBUG_PRINTF("before reduce: %s\n", dump(look).c_str());
-
- // First, remove floods that we already can't escape; they shouldn't affect
- // the analysis below.
- for (auto it = flood_reach.begin(); it != flood_reach.end();) {
- if (isFloodProne(look, *it)) {
- DEBUG_PRINTF("removing inescapable flood on %s from analysis\n",
- describeClass(*it).c_str());
- flood_reach.erase(it++);
- } else {
- ++it;
- }
- }
-
- LookPriority cmp(look);
- priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
- for (const auto &m : look) {
- pq.push(m.first);
- }
-
- while (!pq.empty() && look.size() > MAX_LOOKAROUND_ENTRIES) {
- s32 d = pq.top();
- assert(contains(look, d));
- const CharReach cr(look[d]); // copy
- pq.pop();
-
- DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
- look.erase(d);
-
- // If removing this entry would result in us becoming flood_prone on a
- // particular flood_reach case, reinstate it and move on.
- if (isFloodProne(look, flood_reach)) {
- DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
- describeClass(cr).c_str());
- look.insert(make_pair(d, cr));
- }
- }
-
- while (!pq.empty()) {
- s32 d = pq.top();
- assert(contains(look, d));
- const CharReach cr(look[d]); // copy
- pq.pop();
-
- if (cr.count() < LOOKAROUND_WIDE_REACH) {
- continue;
- }
-
- DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
- look.erase(d);
-
- // If removing this entry would result in us becoming flood_prone on a
- // particular flood_reach case, reinstate it and move on.
- if (isFloodProne(look, flood_reach)) {
- DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
- describeClass(cr).c_str());
- look.insert(make_pair(d, cr));
- }
- }
-
- DEBUG_PRINTF("after reduce: %s\n", dump(look).c_str());
-}
-
-static
-void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v,
- set<CharReach> &flood_reach) {
- for (u32 lit_id : tbi.g[v].literals) {
+ return;
+ }
+ rose_look.push_back(map<s32, CharReach>());
+ getRoseForwardReach(g[t].left, g[e].rose_top, rose_look.back());
+ }
+
+ if (g[v].suffix) {
+ DEBUG_PRINTF("suffix engine\n");
+ rose_look.push_back(map<s32, CharReach>());
+ getSuffixForwardReach(g[v].suffix, g[v].suffix.top, rose_look.back());
+ }
+
+ combineForwardMasks(rose_look, look);
+}
+
+static
+void findBackwardReach(const RoseGraph &g, const RoseVertex v,
+ map<s32, CharReach> &look) {
+ if (!g[v].left) {
+ return;
+ }
+
+ DEBUG_PRINTF("leftfix, report=%u, lag=%u\n", g[v].left.leftfix_report,
+ g[v].left.lag);
+
+ if (g[v].left.graph) {
+ getBackwardReach(*g[v].left.graph, g[v].left.leftfix_report,
+ g[v].left.lag, look);
+ } else if (g[v].left.castle) {
+ getBackwardReach(*g[v].left.castle, g[v].left.leftfix_report,
+ g[v].left.lag, look);
+ }
+
+ // TODO: implement DFA variants if necessary.
+}
+
+static
+void normalise(map<s32, CharReach> &look) {
+ // We can erase entries where the reach is "all characters".
+ vector<s32> dead;
+ for (const auto &m : look) {
+ if (m.second.all()) {
+ dead.push_back(m.first);
+ }
+ }
+ erase_all(&look, dead);
+}
+
+namespace {
+
+struct LookPriority {
+ explicit LookPriority(const map<s32, CharReach> &look_in) : look(look_in) {}
+
+ bool operator()(s32 a, s32 b) const {
+ const CharReach &a_reach = look.at(a);
+ const CharReach &b_reach = look.at(b);
+ if (a_reach.count() != b_reach.count()) {
+ return a_reach.count() < b_reach.count();
+ }
+ return abs(a) < abs(b);
+ }
+
+private:
+ const map<s32, CharReach> &look;
+};
+
+} // namespace
+
+static
+bool isFloodProne(const map<s32, CharReach> &look, const CharReach &flood_cr) {
+ for (const auto &m : look) {
+ const CharReach &look_cr = m.second;
+ if (!overlaps(look_cr, flood_cr)) {
+ return false;
+ }
+ }
+ DEBUG_PRINTF("look can't escape flood on %s\n",
+ describeClass(flood_cr).c_str());
+ return true;
+}
+
+static
+bool isFloodProne(const map<s32, CharReach> &look,
+ const set<CharReach> &flood_reach) {
+ if (flood_reach.empty()) {
+ return false;
+ }
+
+ for (const CharReach &flood_cr : flood_reach) {
+ if (isFloodProne(look, flood_cr)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static
+void reduce(map<s32, CharReach> &look, set<CharReach> &flood_reach) {
+ if (look.size() <= MAX_LOOKAROUND_ENTRIES) {
+ return;
+ }
+
+ DEBUG_PRINTF("before reduce: %s\n", dump(look).c_str());
+
+ // First, remove floods that we already can't escape; they shouldn't affect
+ // the analysis below.
+ for (auto it = flood_reach.begin(); it != flood_reach.end();) {
+ if (isFloodProne(look, *it)) {
+ DEBUG_PRINTF("removing inescapable flood on %s from analysis\n",
+ describeClass(*it).c_str());
+ flood_reach.erase(it++);
+ } else {
+ ++it;
+ }
+ }
+
+ LookPriority cmp(look);
+ priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
+ for (const auto &m : look) {
+ pq.push(m.first);
+ }
+
+ while (!pq.empty() && look.size() > MAX_LOOKAROUND_ENTRIES) {
+ s32 d = pq.top();
+ assert(contains(look, d));
+ const CharReach cr(look[d]); // copy
+ pq.pop();
+
+ DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
+ look.erase(d);
+
+ // If removing this entry would result in us becoming flood_prone on a
+ // particular flood_reach case, reinstate it and move on.
+ if (isFloodProne(look, flood_reach)) {
+ DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
+ describeClass(cr).c_str());
+ look.insert(make_pair(d, cr));
+ }
+ }
+
+ while (!pq.empty()) {
+ s32 d = pq.top();
+ assert(contains(look, d));
+ const CharReach cr(look[d]); // copy
+ pq.pop();
+
+ if (cr.count() < LOOKAROUND_WIDE_REACH) {
+ continue;
+ }
+
+ DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
+ look.erase(d);
+
+ // If removing this entry would result in us becoming flood_prone on a
+ // particular flood_reach case, reinstate it and move on.
+ if (isFloodProne(look, flood_reach)) {
+ DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
+ describeClass(cr).c_str());
+ look.insert(make_pair(d, cr));
+ }
+ }
+
+ DEBUG_PRINTF("after reduce: %s\n", dump(look).c_str());
+}
+
+static
+void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v,
+ set<CharReach> &flood_reach) {
+ for (u32 lit_id : tbi.g[v].literals) {
const ue2_literal &s = tbi.literals.at(lit_id).s;
- if (s.empty()) {
- continue;
- }
- if (is_flood(s)) {
- CharReach cr(*s.begin());
- DEBUG_PRINTF("flood-prone with reach: %s\n",
- describeClass(cr).c_str());
- flood_reach.insert(cr);
- }
- }
-}
-
+ if (s.empty()) {
+ continue;
+ }
+ if (is_flood(s)) {
+ CharReach cr(*s.begin());
+ DEBUG_PRINTF("flood-prone with reach: %s\n",
+ describeClass(cr).c_str());
+ flood_reach.insert(cr);
+ }
+ }
+}
+
namespace {
struct LookProto {
@@ -470,7 +470,7 @@ struct LookProto {
};
}
-static
+static
vector<LookProto> findLiteralReach(const rose_literal_id &lit) {
vector<LookProto> look;
look.reserve(lit.s.length());
@@ -490,15 +490,15 @@ vector<LookProto> findLiteralReach(const RoseBuildImpl &build,
bool first = true;
vector<LookProto> look;
- for (u32 lit_id : build.g[v].literals) {
+ for (u32 lit_id : build.g[v].literals) {
const rose_literal_id &lit = build.literals.at(lit_id);
auto lit_look = findLiteralReach(lit);
-
+
if (first) {
look = std::move(lit_look);
first = false;
continue;
- }
+ }
// Erase elements from look with keys not in lit_look. Where a key is
// in both maps, union its reach with the lookaround.
@@ -523,34 +523,34 @@ vector<LookProto> findLiteralReach(const RoseBuildImpl &build,
++jt;
}
}
- }
-
- return look;
-}
-
-/**
- * Trim lookaround checks from the prefix that overlap with the literals
- * themselves.
- */
-static
-void trimLiterals(const RoseBuildImpl &build, const RoseVertex v,
- map<s32, CharReach> &look) {
- DEBUG_PRINTF("pre-trim lookaround: %s\n", dump(look).c_str());
-
- for (const auto &m : findLiteralReach(build, v)) {
+ }
+
+ return look;
+}
+
+/**
+ * Trim lookaround checks from the prefix that overlap with the literals
+ * themselves.
+ */
+static
+void trimLiterals(const RoseBuildImpl &build, const RoseVertex v,
+ map<s32, CharReach> &look) {
+ DEBUG_PRINTF("pre-trim lookaround: %s\n", dump(look).c_str());
+
+ for (const auto &m : findLiteralReach(build, v)) {
auto it = look.find(m.offset);
- if (it == end(look)) {
- continue;
- }
+ if (it == end(look)) {
+ continue;
+ }
if (m.reach.isSubsetOf(it->second)) {
- DEBUG_PRINTF("can trim entry at %d\n", it->first);
- look.erase(it);
- }
- }
-
- DEBUG_PRINTF("post-trim lookaround: %s\n", dump(look).c_str());
-}
-
+ DEBUG_PRINTF("can trim entry at %d\n", it->first);
+ look.erase(it);
+ }
+ }
+
+ DEBUG_PRINTF("post-trim lookaround: %s\n", dump(look).c_str());
+}
+
static
void normaliseLeftfix(map<s32, CharReach> &look) {
// We can erase entries where the reach is "all characters", except for the
@@ -621,44 +621,44 @@ void transToLookaround(const vector<map<s32, CharReach>> &looks,
}
}
-void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
- vector<LookEntry> &lookaround) {
- lookaround.clear();
-
- const RoseGraph &g = tbi.g;
-
- map<s32, CharReach> look;
- findBackwardReach(g, v, look);
- findForwardReach(g, v, look);
- trimLiterals(tbi, v, look);
-
- if (look.empty()) {
- return;
- }
-
- normalise(look);
-
- if (look.empty()) {
- return;
- }
-
- set<CharReach> flood_reach;
- findFloodReach(tbi, v, flood_reach);
- reduce(look, flood_reach);
-
- if (look.empty()) {
- return;
- }
-
- DEBUG_PRINTF("lookaround: %s\n", dump(look).c_str());
- lookaround.reserve(look.size());
- for (const auto &m : look) {
- s8 offset = verify_s8(m.first);
- lookaround.emplace_back(offset, m.second);
- }
-}
-
-static
+void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
+ vector<LookEntry> &lookaround) {
+ lookaround.clear();
+
+ const RoseGraph &g = tbi.g;
+
+ map<s32, CharReach> look;
+ findBackwardReach(g, v, look);
+ findForwardReach(g, v, look);
+ trimLiterals(tbi, v, look);
+
+ if (look.empty()) {
+ return;
+ }
+
+ normalise(look);
+
+ if (look.empty()) {
+ return;
+ }
+
+ set<CharReach> flood_reach;
+ findFloodReach(tbi, v, flood_reach);
+ reduce(look, flood_reach);
+
+ if (look.empty()) {
+ return;
+ }
+
+ DEBUG_PRINTF("lookaround: %s\n", dump(look).c_str());
+ lookaround.reserve(look.size());
+ for (const auto &m : look) {
+ s8 offset = verify_s8(m.first);
+ lookaround.emplace_back(offset, m.second);
+ }
+}
+
+static
bool checkShuftiBuckets(const vector<map<s32, CharReach>> &looks,
u32 bucket_size) {
set<u32> bucket;
@@ -685,25 +685,25 @@ bool checkShuftiBuckets(const vector<map<s32, CharReach>> &looks,
bucket.insert(hi_lo);
}
}
- }
+ }
DEBUG_PRINTF("shufti has %lu bucket(s)\n", bucket.size());
return bucket.size() <= bucket_size;
}
-
+
static
bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag,
vector<map<s32, CharReach>> &looks) {
if (!isAcyclic(g)) {
DEBUG_PRINTF("contains back-edge\n");
- return false;
- }
-
+ return false;
+ }
+
// Must be floating chains wired to startDs.
if (!isFloating(g)) {
DEBUG_PRINTF("not a floating start\n");
- return false;
- }
-
+ return false;
+ }
+
vector<NFAVertex> curr;
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
if (v == g.start || v == g.startDs) {
@@ -737,16 +737,16 @@ bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag,
size_t curr_size = curr.size();
if (curr.size() > 1 && i > lag + MULTIPATH_MAX_LEN) {
DEBUG_PRINTF("range is larger than 16 in multi-path\n");
- return false;
- }
-
+ return false;
+ }
+
for (size_t idx = 0; idx < curr_size; idx++) {
NFAVertex v = curr[idx];
if (v == g.startDs) {
continue;
}
assert(!is_special(v, g));
-
+
for (auto u : inv_adjacent_vertices_range(v, g)) {
if (u == g.start || u == g.startDs) {
curr[idx] = g.startDs;
@@ -792,88 +792,88 @@ bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag,
u32 bucket_size = total_len > 32 ? 8 : 16;
if (!checkShuftiBuckets(looks, bucket_size)) {
DEBUG_PRINTF("shufti has too many buckets\n");
- return false;
- }
+ return false;
+ }
}
-
+
assert(!looks.empty());
if (looks.size() == 1) {
DEBUG_PRINTF("single lookaround\n");
} else {
DEBUG_PRINTF("multi-path lookaround\n");
- }
- DEBUG_PRINTF("done\n");
- return true;
-}
-
-bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
+ }
+ DEBUG_PRINTF("done\n");
+ return true;
+}
+
+bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
vector<vector<LookEntry>> &lookaround) {
- lookaround.clear();
-
- const RoseGraph &g = build.g;
- const left_id leftfix(g[v].left);
-
- if (!contains(build.transient, leftfix)) {
- DEBUG_PRINTF("not transient\n");
- return false;
- }
-
- if (!leftfix.graph()) {
- DEBUG_PRINTF("only supported for graphs so far\n");
- return false;
- }
-
+ lookaround.clear();
+
+ const RoseGraph &g = build.g;
+ const left_id leftfix(g[v].left);
+
+ if (!contains(build.transient, leftfix)) {
+ DEBUG_PRINTF("not transient\n");
+ return false;
+ }
+
+ if (!leftfix.graph()) {
+ DEBUG_PRINTF("only supported for graphs so far\n");
+ return false;
+ }
+
vector<map<s32, CharReach>> looks;
if (!getTransientPrefixReach(*leftfix.graph(), g[v].left.leftfix_report,
g[v].left.lag, looks)) {
DEBUG_PRINTF("graph has loop or too large\n");
- return false;
- }
-
+ return false;
+ }
+
if (!trimMultipathLeftfix(build, v, looks)) {
- return false;
- }
+ return false;
+ }
transToLookaround(looks, lookaround);
-
+
return !lookaround.empty();
-}
-
-void mergeLookaround(vector<LookEntry> &lookaround,
- const vector<LookEntry> &more_lookaround) {
- if (lookaround.size() >= MAX_LOOKAROUND_ENTRIES) {
- DEBUG_PRINTF("big enough!\n");
- return;
- }
-
- // Don't merge lookarounds at offsets we already have entries for.
+}
+
+void mergeLookaround(vector<LookEntry> &lookaround,
+ const vector<LookEntry> &more_lookaround) {
+ if (lookaround.size() >= MAX_LOOKAROUND_ENTRIES) {
+ DEBUG_PRINTF("big enough!\n");
+ return;
+ }
+
+ // Don't merge lookarounds at offsets we already have entries for.
flat_set<s8> offsets;
- for (const auto &e : lookaround) {
- offsets.insert(e.offset);
- }
-
- map<s32, CharReach> more;
- LookPriority cmp(more);
- priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
- for (const auto &e : more_lookaround) {
- if (!contains(offsets, e.offset)) {
- more.emplace(e.offset, e.reach);
- pq.push(e.offset);
- }
- }
-
- while (!pq.empty() && lookaround.size() < MAX_LOOKAROUND_ENTRIES) {
- const s32 offset = pq.top();
- pq.pop();
- const auto &cr = more.at(offset);
- DEBUG_PRINTF("added {%d,%s}\n", offset, describeClass(cr).c_str());
- lookaround.emplace_back(verify_s8(offset), cr);
- }
-
- // Order by offset.
- sort(begin(lookaround), end(lookaround),
- [](const LookEntry &a, const LookEntry &b) {
- return a.offset < b.offset;
- });
-}
-
-} // namespace ue2
+ for (const auto &e : lookaround) {
+ offsets.insert(e.offset);
+ }
+
+ map<s32, CharReach> more;
+ LookPriority cmp(more);
+ priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
+ for (const auto &e : more_lookaround) {
+ if (!contains(offsets, e.offset)) {
+ more.emplace(e.offset, e.reach);
+ pq.push(e.offset);
+ }
+ }
+
+ while (!pq.empty() && lookaround.size() < MAX_LOOKAROUND_ENTRIES) {
+ const s32 offset = pq.top();
+ pq.pop();
+ const auto &cr = more.at(offset);
+ DEBUG_PRINTF("added {%d,%s}\n", offset, describeClass(cr).c_str());
+ lookaround.emplace_back(verify_s8(offset), cr);
+ }
+
+ // Order by offset.
+ sort(begin(lookaround), end(lookaround),
+ [](const LookEntry &a, const LookEntry &b) {
+ return a.offset < b.offset;
+ });
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_lookaround.h b/contrib/libs/hyperscan/src/rose/rose_build_lookaround.h
index 814f784ecf..70d4217ccc 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_lookaround.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_lookaround.h
@@ -1,81 +1,81 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose compile-time analysis for lookaround masks.
- */
-#ifndef ROSE_ROSE_BUILD_LOOKAROUND_H
-#define ROSE_ROSE_BUILD_LOOKAROUND_H
-
-#include "rose_graph.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose compile-time analysis for lookaround masks.
+ */
+#ifndef ROSE_ROSE_BUILD_LOOKAROUND_H
+#define ROSE_ROSE_BUILD_LOOKAROUND_H
+
+#include "rose_graph.h"
#include "util/hash.h"
-
-#include <vector>
-
+
+#include <vector>
+
/** \brief Max path number for multi-path lookaround. */
#define MAX_LOOKAROUND_PATHS 8
-namespace ue2 {
-
-class CharReach;
-class RoseBuildImpl;
-
-/** \brief Lookaround entry prototype, describing the reachability at a given
- * distance from the end of a role match. */
-struct LookEntry {
+namespace ue2 {
+
+class CharReach;
+class RoseBuildImpl;
+
+/** \brief Lookaround entry prototype, describing the reachability at a given
+ * distance from the end of a role match. */
+struct LookEntry {
LookEntry() : offset(0) {}
- LookEntry(s8 offset_in, const CharReach &reach_in)
- : offset(offset_in), reach(reach_in) {}
- s8 offset; //!< offset from role match location.
- CharReach reach; //!< reachability at given offset.
-
- bool operator==(const LookEntry &other) const {
- return offset == other.offset && reach == other.reach;
- }
-};
-
-void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
+ LookEntry(s8 offset_in, const CharReach &reach_in)
+ : offset(offset_in), reach(reach_in) {}
+ s8 offset; //!< offset from role match location.
+ CharReach reach; //!< reachability at given offset.
+
+ bool operator==(const LookEntry &other) const {
+ return offset == other.offset && reach == other.reach;
+ }
+};
+
+void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
std::vector<LookEntry> &look_more);
-
-/**
- * \brief If possible, render the prefix of the given vertex as a lookaround.
- *
- * Given a prefix, returns true (and fills the lookaround vector) if
- * it can be satisfied with a lookaround alone.
- */
-bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
+
+/**
+ * \brief If possible, render the prefix of the given vertex as a lookaround.
+ *
+ * Given a prefix, returns true (and fills the lookaround vector) if
+ * it can be satisfied with a lookaround alone.
+ */
+bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
std::vector<std::vector<LookEntry>> &lookaround);
-
-void mergeLookaround(std::vector<LookEntry> &lookaround,
- const std::vector<LookEntry> &more_lookaround);
-
-} // namespace ue2
-
+
+void mergeLookaround(std::vector<LookEntry> &lookaround,
+ const std::vector<LookEntry> &more_lookaround);
+
+} // namespace ue2
+
namespace std {
template<>
@@ -87,4 +87,4 @@ struct hash<ue2::LookEntry> {
} // namespace std
-#endif // ROSE_ROSE_BUILD_LOOKAROUND_H
+#endif // ROSE_ROSE_BUILD_LOOKAROUND_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp b/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp
index 2b92d83fb4..5066dbd578 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp
@@ -1,490 +1,490 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose Build: functions for reducing the size of the Rose graph
- * through merging.
- */
-#include "rose_build_merge.h"
-
-#include "grey.h"
-#include "rose_build.h"
-#include "rose_build_impl.h"
-#include "rose_build_util.h"
-#include "ue2common.h"
-#include "nfa/castlecompile.h"
-#include "nfa/goughcompile.h"
-#include "nfa/limex_limits.h"
-#include "nfa/mcclellancompile.h"
-#include "nfa/nfa_build_util.h"
-#include "nfa/rdfa_merge.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_haig.h"
-#include "nfagraph/ng_is_equal.h"
-#include "nfagraph/ng_lbr.h"
-#include "nfagraph/ng_limex.h"
-#include "nfagraph/ng_mcclellan.h"
-#include "nfagraph/ng_puff.h"
-#include "nfagraph/ng_redundancy.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_reports.h"
-#include "nfagraph/ng_stop.h"
-#include "nfagraph/ng_uncalc_components.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_width.h"
-#include "util/bitutils.h"
-#include "util/charreach.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose Build: functions for reducing the size of the Rose graph
+ * through merging.
+ */
+#include "rose_build_merge.h"
+
+#include "grey.h"
+#include "rose_build.h"
+#include "rose_build_impl.h"
+#include "rose_build_util.h"
+#include "ue2common.h"
+#include "nfa/castlecompile.h"
+#include "nfa/goughcompile.h"
+#include "nfa/limex_limits.h"
+#include "nfa/mcclellancompile.h"
+#include "nfa/nfa_build_util.h"
+#include "nfa/rdfa_merge.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_haig.h"
+#include "nfagraph/ng_is_equal.h"
+#include "nfagraph/ng_lbr.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_mcclellan.h"
+#include "nfagraph/ng_puff.h"
+#include "nfagraph/ng_redundancy.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_reports.h"
+#include "nfagraph/ng_stop.h"
+#include "nfagraph/ng_uncalc_components.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
+#include "util/bitutils.h"
+#include "util/charreach.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
#include "util/hash.h"
#include "util/insertion_ordered.h"
-#include "util/order_check.h"
-#include "util/report_manager.h"
-#include "util/ue2string.h"
+#include "util/order_check.h"
+#include "util/report_manager.h"
+#include "util/ue2string.h"
#include "util/unordered.h"
-
-#include <algorithm>
-#include <functional>
-#include <list>
-#include <map>
-#include <queue>
-#include <set>
-#include <string>
-#include <vector>
-#include <utility>
-
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_values;
+
+#include <algorithm>
+#include <functional>
+#include <list>
+#include <map>
+#include <queue>
+#include <set>
+#include <string>
+#include <vector>
+#include <utility>
+
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_values;
using boost::adaptors::map_keys;
-
-namespace ue2 {
-
-static const size_t NARROW_START_MAX = 10;
-static const size_t SMALL_MERGE_MAX_VERTICES_STREAM = 128;
-static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = 64;
-static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32;
-static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10;
-static const size_t MERGE_GROUP_SIZE_MAX = 200;
+
+namespace ue2 {
+
+static const size_t NARROW_START_MAX = 10;
+static const size_t SMALL_MERGE_MAX_VERTICES_STREAM = 128;
+static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = 64;
+static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32;
+static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10;
+static const size_t MERGE_GROUP_SIZE_MAX = 200;
static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = 1000;
-
-/** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */
-static const size_t DFA_CHUNK_SIZE_MAX = 200;
-
-/** \brief Max DFA states in a merged DFA. */
-static const size_t DFA_MERGE_MAX_STATES = 8000;
-
+
+/** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */
+static const size_t DFA_CHUNK_SIZE_MAX = 200;
+
+/** \brief Max DFA states in a merged DFA. */
+static const size_t DFA_MERGE_MAX_STATES = 8000;
+
/** \brief In block mode, merge two prefixes even if they don't have identical
* literal sets if they have fewer than this many states and the merged graph
* is also small. */
static constexpr size_t MAX_BLOCK_PREFIX_MERGE_VERTICES = 32;
-
-static
-size_t small_merge_max_vertices(const CompileContext &cc) {
- return cc.streaming ? SMALL_MERGE_MAX_VERTICES_STREAM
- : SMALL_MERGE_MAX_VERTICES_BLOCK;
-}
-
-static
-size_t small_rose_threshold(const CompileContext &cc) {
- return cc.streaming ? SMALL_ROSE_THRESHOLD_STREAM
- : SMALL_ROSE_THRESHOLD_BLOCK;
-}
-
-/**
- * Returns a loose hash of a leftfix for use in dedupeLeftfixes. Note that
- * reports should not contribute to the hash.
- */
-static
+
+static
+size_t small_merge_max_vertices(const CompileContext &cc) {
+ return cc.streaming ? SMALL_MERGE_MAX_VERTICES_STREAM
+ : SMALL_MERGE_MAX_VERTICES_BLOCK;
+}
+
+static
+size_t small_rose_threshold(const CompileContext &cc) {
+ return cc.streaming ? SMALL_ROSE_THRESHOLD_STREAM
+ : SMALL_ROSE_THRESHOLD_BLOCK;
+}
+
+/**
+ * Returns a loose hash of a leftfix for use in dedupeLeftfixes. Note that
+ * reports should not contribute to the hash.
+ */
+static
size_t hashLeftfix(const left_id &left) {
- size_t val = 0;
-
+ size_t val = 0;
+
if (left.castle()) {
hash_combine(val, left.castle()->reach());
for (const auto &pr : left.castle()->repeats) {
- hash_combine(val, pr.first); // top
- hash_combine(val, pr.second.bounds);
- }
+ hash_combine(val, pr.first); // top
+ hash_combine(val, pr.second.bounds);
+ }
} else if (left.graph()) {
hash_combine(val, hash_holder(*left.graph()));
- }
-
- return val;
-}
-
-namespace {
-
-/** Key used to group sets of leftfixes by the dedupeLeftfixes path. */
-struct RoseGroup {
- RoseGroup(const RoseBuildImpl &build, RoseVertex v)
- : left_hash(hashLeftfix(build.g[v].left)),
- lag(build.g[v].left.lag), eod_table(build.isInETable(v)) {
- const RoseGraph &g = build.g;
- assert(in_degree(v, g) == 1);
- RoseVertex u = *inv_adjacent_vertices(v, g).first;
+ }
+
+ return val;
+}
+
+namespace {
+
+/** Key used to group sets of leftfixes by the dedupeLeftfixes path. */
+struct RoseGroup {
+ RoseGroup(const RoseBuildImpl &build, RoseVertex v)
+ : left_hash(hashLeftfix(build.g[v].left)),
+ lag(build.g[v].left.lag), eod_table(build.isInETable(v)) {
+ const RoseGraph &g = build.g;
+ assert(in_degree(v, g) == 1);
+ RoseVertex u = *inv_adjacent_vertices(v, g).first;
parent = g[u].index;
- }
-
- bool operator<(const RoseGroup &b) const {
- const RoseGroup &a = *this;
- ORDER_CHECK(parent);
- ORDER_CHECK(left_hash);
- ORDER_CHECK(lag);
- ORDER_CHECK(eod_table);
- return false;
- }
-
-private:
- /** Parent vertex index. We must use the index, rather than the descriptor,
- * for compile determinism. */
- size_t parent;
-
- /** Quick hash of the leftfix itself. Must be identical for a given pair of
- * graphs if is_equal would return true. */
- size_t left_hash;
-
- /** Leftfix lag value. */
- u32 lag;
-
- /** True if associated vertex (successor) is in the EOD table. We don't
- * allow sharing of leftfix engines between "normal" and EOD operation. */
- bool eod_table;
-};
-
-/**
+ }
+
+ bool operator<(const RoseGroup &b) const {
+ const RoseGroup &a = *this;
+ ORDER_CHECK(parent);
+ ORDER_CHECK(left_hash);
+ ORDER_CHECK(lag);
+ ORDER_CHECK(eod_table);
+ return false;
+ }
+
+private:
+ /** Parent vertex index. We must use the index, rather than the descriptor,
+ * for compile determinism. */
+ size_t parent;
+
+ /** Quick hash of the leftfix itself. Must be identical for a given pair of
+ * graphs if is_equal would return true. */
+ size_t left_hash;
+
+ /** Leftfix lag value. */
+ u32 lag;
+
+ /** True if associated vertex (successor) is in the EOD table. We don't
+ * allow sharing of leftfix engines between "normal" and EOD operation. */
+ bool eod_table;
+};
+
+/**
* Intended to find graphs that are identical except for their report
* IDs. Relies on vertex and edge indices to pick up graphs that have been
* messily put together in different orderings. Only implemented for castles and
* holders.
- */
+ */
static
bool is_equal(const left_id &u_left, ReportID u_report,
const left_id &v_left, ReportID v_report) {
if (u_left.castle() && v_left.castle()) {
return is_equal(*u_left.castle(), u_report, *v_left.castle(), v_report);
}
-
+
if (!u_left.graph() || !v_left.graph()) {
return false;
- }
-
+ }
+
return is_equal(*u_left.graph(), u_report, *v_left.graph(), v_report);
}
-
-} // namespace
-
-/**
- * This pass performs work similar to \ref dedupeSuffixes - it removes
- * duplicate prefix/infixes (that is, leftfixes) which are identical graphs and
- * share the same trigger vertex and lag. Leftfixes are first grouped by
- * parent role and lag to reduce the number of candidates to be inspected
- * for each leftfix. The graphs in each cluster are then compared with each
- * other and the graph is updated to only refer to a canonical version of each
- * graph.
- *
- * Note: only roles with a single predecessor vertex are considered for this
- * transform - it should probably be generalised to work for roles which share
+
+} // namespace
+
+/**
+ * This pass performs work similar to \ref dedupeSuffixes - it removes
+ * duplicate prefix/infixes (that is, leftfixes) which are identical graphs and
+ * share the same trigger vertex and lag. Leftfixes are first grouped by
+ * parent role and lag to reduce the number of candidates to be inspected
+ * for each leftfix. The graphs in each cluster are then compared with each
+ * other and the graph is updated to only refer to a canonical version of each
+ * graph.
+ *
+ * Note: only roles with a single predecessor vertex are considered for this
+ * transform - it should probably be generalised to work for roles which share
* the same set of predecessor roles as for \ref dedupeLeftfixesVariableLag or
* it should be retired entirely.
- */
-bool dedupeLeftfixes(RoseBuildImpl &tbi) {
- DEBUG_PRINTF("deduping leftfixes\n");
- map<RoseGroup, deque<RoseVertex>> roses;
- bool work_done = false;
-
- /* Note: a leftfix's transientness will not be altered by deduping */
-
- // Collect leftfixes into groups.
- RoseGraph &g = tbi.g;
- for (auto v : vertices_range(g)) {
- if (!g[v].left) {
- continue;
- }
- const left_id left(g[v].left);
-
- if (left.haig()) {
- /* TODO: allow merging of identical haigs */
- continue;
- }
-
- if (in_degree(v, g) != 1) {
- continue;
- }
-
- roses[RoseGroup(tbi, v)].push_back(v);
- }
-
- DEBUG_PRINTF("collected %zu rose groups\n", roses.size());
-
- // Walk groups and dedupe the roses therein.
- for (deque<RoseVertex> &verts : roses | map_values) {
- DEBUG_PRINTF("group has %zu vertices\n", verts.size());
-
+ */
+bool dedupeLeftfixes(RoseBuildImpl &tbi) {
+ DEBUG_PRINTF("deduping leftfixes\n");
+ map<RoseGroup, deque<RoseVertex>> roses;
+ bool work_done = false;
+
+ /* Note: a leftfix's transientness will not be altered by deduping */
+
+ // Collect leftfixes into groups.
+ RoseGraph &g = tbi.g;
+ for (auto v : vertices_range(g)) {
+ if (!g[v].left) {
+ continue;
+ }
+ const left_id left(g[v].left);
+
+ if (left.haig()) {
+ /* TODO: allow merging of identical haigs */
+ continue;
+ }
+
+ if (in_degree(v, g) != 1) {
+ continue;
+ }
+
+ roses[RoseGroup(tbi, v)].push_back(v);
+ }
+
+ DEBUG_PRINTF("collected %zu rose groups\n", roses.size());
+
+ // Walk groups and dedupe the roses therein.
+ for (deque<RoseVertex> &verts : roses | map_values) {
+ DEBUG_PRINTF("group has %zu vertices\n", verts.size());
+
unordered_set<left_id> seen;
-
- for (auto jt = verts.begin(), jte = verts.end(); jt != jte; ++jt) {
- RoseVertex v = *jt;
- left_id left(g[v].left);
-
- // Skip cases we've already handled, and mark as seen otherwise.
- if (!seen.insert(left).second) {
- continue;
- }
-
- // Scan the rest of the list for dupes.
+
+ for (auto jt = verts.begin(), jte = verts.end(); jt != jte; ++jt) {
+ RoseVertex v = *jt;
+ left_id left(g[v].left);
+
+ // Skip cases we've already handled, and mark as seen otherwise.
+ if (!seen.insert(left).second) {
+ continue;
+ }
+
+ // Scan the rest of the list for dupes.
for (auto kt = std::next(jt); kt != jte; ++kt) {
if (g[v].left == g[*kt].left
|| !is_equal(g[v].left, g[v].left.leftfix_report,
g[*kt].left, g[*kt].left.leftfix_report)) {
- continue;
- }
-
- // Dupe found.
- DEBUG_PRINTF("rose at vertex %zu is a dupe of %zu\n",
+ continue;
+ }
+
+ // Dupe found.
+ DEBUG_PRINTF("rose at vertex %zu is a dupe of %zu\n",
g[*kt].index, g[v].index);
- assert(g[v].left.lag == g[*kt].left.lag);
- g[*kt].left = g[v].left;
- work_done = true;
- }
- }
- }
-
- return work_done;
-}
-
-/**
- * \brief Returns a numeric key that can be used to group this suffix with
- * others that may be its duplicate.
- */
-static
-size_t suffix_size_key(const suffix_id &s) {
- if (s.graph()) {
- return num_vertices(*s.graph());
- }
- if (s.castle()) {
- return s.castle()->repeats.size();
- }
- return 0;
-}
-
-static
-bool is_equal(const suffix_id &s1, const suffix_id &s2) {
- if (s1.graph() && s2.graph()) {
- return is_equal(*s1.graph(), *s2.graph());
- } else if (s1.castle() && s2.castle()) {
- return is_equal(*s1.castle(), *s2.castle());
- }
- return false;
-}
-
-/**
- * This function simply looks for suffix NGHolder graphs which are identical
- * and updates the roles in the RoseGraph to refer to only a single copy. This
- * obviously has benefits in terms of both performance (as we don't run
- * multiple engines doing the same work) and stream state. This function first
- * groups all suffixes by number of vertices and report set to restrict the set
- * of possible candidates. Each group is then walked to find duplicates using
- * the \ref is_equal comparator for NGHolders and updating the RoseGraph as it
- * goes.
- *
- * Note: does not dedupe suffixes of vertices in the EOD table.
- */
-void dedupeSuffixes(RoseBuildImpl &tbi) {
- DEBUG_PRINTF("deduping suffixes\n");
-
+ assert(g[v].left.lag == g[*kt].left.lag);
+ g[*kt].left = g[v].left;
+ work_done = true;
+ }
+ }
+ }
+
+ return work_done;
+}
+
+/**
+ * \brief Returns a numeric key that can be used to group this suffix with
+ * others that may be its duplicate.
+ */
+static
+size_t suffix_size_key(const suffix_id &s) {
+ if (s.graph()) {
+ return num_vertices(*s.graph());
+ }
+ if (s.castle()) {
+ return s.castle()->repeats.size();
+ }
+ return 0;
+}
+
+static
+bool is_equal(const suffix_id &s1, const suffix_id &s2) {
+ if (s1.graph() && s2.graph()) {
+ return is_equal(*s1.graph(), *s2.graph());
+ } else if (s1.castle() && s2.castle()) {
+ return is_equal(*s1.castle(), *s2.castle());
+ }
+ return false;
+}
+
+/**
+ * This function simply looks for suffix NGHolder graphs which are identical
+ * and updates the roles in the RoseGraph to refer to only a single copy. This
+ * obviously has benefits in terms of both performance (as we don't run
+ * multiple engines doing the same work) and stream state. This function first
+ * groups all suffixes by number of vertices and report set to restrict the set
+ * of possible candidates. Each group is then walked to find duplicates using
+ * the \ref is_equal comparator for NGHolders and updating the RoseGraph as it
+ * goes.
+ *
+ * Note: does not dedupe suffixes of vertices in the EOD table.
+ */
+void dedupeSuffixes(RoseBuildImpl &tbi) {
+ DEBUG_PRINTF("deduping suffixes\n");
+
unordered_map<suffix_id, set<RoseVertex>> suffix_map;
- map<pair<size_t, set<ReportID>>, vector<suffix_id>> part;
-
- // Collect suffixes into groups.
- RoseGraph &g = tbi.g;
- for (auto v : vertices_range(g)) {
- if (!g[v].suffix || tbi.isInETable(v)) {
- continue;
- }
-
- const suffix_id s(g[v].suffix);
-
- if (!(s.graph() || s.castle())) {
- continue; // e.g. Haig
- }
-
- set<RoseVertex> &verts = suffix_map[s];
- if (verts.empty()) {
- part[make_pair(suffix_size_key(s), all_reports(s))].push_back(s);
- }
- verts.insert(v);
- }
-
- DEBUG_PRINTF("collected %zu groups\n", part.size());
-
- for (const auto &cand : part | map_values) {
- if (cand.size() <= 1) {
- continue;
- }
- DEBUG_PRINTF("deduping cand set of size %zu\n", cand.size());
-
- for (auto jt = cand.begin(); jt != cand.end(); ++jt) {
- if (suffix_map[*jt].empty()) {
- continue;
- }
- for (auto kt = next(jt); kt != cand.end(); ++kt) {
- if (suffix_map[*kt].empty() || !is_equal(*jt, *kt)) {
- continue;
- }
- DEBUG_PRINTF("found dupe\n");
- for (auto v : suffix_map[*kt]) {
- RoseVertex dupe = *suffix_map[*jt].begin();
- assert(dupe != v);
- g[v].suffix.graph = g[dupe].suffix.graph;
- g[v].suffix.castle = g[dupe].suffix.castle;
- assert(suffix_id(g[v].suffix) ==
- suffix_id(g[dupe].suffix));
- suffix_map[*jt].insert(v);
- }
- suffix_map[*kt].clear();
- }
- }
- }
-}
-
-namespace {
-
-/**
- * This class stores a mapping from an engine reference (left_id, suffix_id,
- * etc) to a list of vertices, and also allows us to iterate over the set of
- * engine references in insertion order -- we add to the mapping in vertex
- * iteration order, so this allows us to provide a consistent ordering.
- */
-template<class EngineRef>
-class Bouquet {
-private:
- list<EngineRef> ordering; // Unique list in insert order.
+ map<pair<size_t, set<ReportID>>, vector<suffix_id>> part;
+
+ // Collect suffixes into groups.
+ RoseGraph &g = tbi.g;
+ for (auto v : vertices_range(g)) {
+ if (!g[v].suffix || tbi.isInETable(v)) {
+ continue;
+ }
+
+ const suffix_id s(g[v].suffix);
+
+ if (!(s.graph() || s.castle())) {
+ continue; // e.g. Haig
+ }
+
+ set<RoseVertex> &verts = suffix_map[s];
+ if (verts.empty()) {
+ part[make_pair(suffix_size_key(s), all_reports(s))].push_back(s);
+ }
+ verts.insert(v);
+ }
+
+ DEBUG_PRINTF("collected %zu groups\n", part.size());
+
+ for (const auto &cand : part | map_values) {
+ if (cand.size() <= 1) {
+ continue;
+ }
+ DEBUG_PRINTF("deduping cand set of size %zu\n", cand.size());
+
+ for (auto jt = cand.begin(); jt != cand.end(); ++jt) {
+ if (suffix_map[*jt].empty()) {
+ continue;
+ }
+ for (auto kt = next(jt); kt != cand.end(); ++kt) {
+ if (suffix_map[*kt].empty() || !is_equal(*jt, *kt)) {
+ continue;
+ }
+ DEBUG_PRINTF("found dupe\n");
+ for (auto v : suffix_map[*kt]) {
+ RoseVertex dupe = *suffix_map[*jt].begin();
+ assert(dupe != v);
+ g[v].suffix.graph = g[dupe].suffix.graph;
+ g[v].suffix.castle = g[dupe].suffix.castle;
+ assert(suffix_id(g[v].suffix) ==
+ suffix_id(g[dupe].suffix));
+ suffix_map[*jt].insert(v);
+ }
+ suffix_map[*kt].clear();
+ }
+ }
+ }
+}
+
+namespace {
+
+/**
+ * This class stores a mapping from an engine reference (left_id, suffix_id,
+ * etc) to a list of vertices, and also allows us to iterate over the set of
+ * engine references in insertion order -- we add to the mapping in vertex
+ * iteration order, so this allows us to provide a consistent ordering.
+ */
+template<class EngineRef>
+class Bouquet {
+private:
+ list<EngineRef> ordering; // Unique list in insert order.
using BouquetMap = ue2_unordered_map<EngineRef, deque<RoseVertex>>;
- BouquetMap bouquet;
-public:
- void insert(const EngineRef &h, RoseVertex v) {
- typename BouquetMap::iterator f = bouquet.find(h);
- if (f == bouquet.end()) {
- ordering.push_back(h);
- bouquet[h].push_back(v);
- } else {
- f->second.push_back(v);
- }
- }
-
- void insert(const EngineRef &h, const deque<RoseVertex> &verts) {
- typename BouquetMap::iterator f = bouquet.find(h);
- if (f == bouquet.end()) {
- ordering.push_back(h);
- bouquet.insert(make_pair(h, verts));
- } else {
- f->second.insert(f->second.end(), verts.begin(), verts.end());
- }
- }
-
- const deque<RoseVertex> &vertices(const EngineRef &h) const {
- typename BouquetMap::const_iterator it = bouquet.find(h);
- assert(it != bouquet.end()); // must be present
- return it->second;
- }
-
- void erase(const EngineRef &h) {
- assert(bouquet.find(h) != bouquet.end());
- bouquet.erase(h);
- ordering.remove(h);
- }
-
- /** Remove all the elements in the given iterator range. */
- template <class Iter>
- void erase_all(Iter erase_begin, Iter erase_end) {
- for (Iter it = erase_begin; it != erase_end; ++it) {
- bouquet.erase(*it);
- }
-
- // Use a quick-lookup container so that we only have to traverse the
- // 'ordering' list once.
- const set<EngineRef> dead(erase_begin, erase_end);
- for (iterator it = begin(); it != end(); /* incremented inside */) {
- if (contains(dead, *it)) {
- ordering.erase(it++);
- } else {
- ++it;
- }
- }
- }
-
- void clear() {
- ordering.clear();
- bouquet.clear();
- }
-
- size_t size() const { return bouquet.size(); }
-
- // iterate over holders in insert order
- typedef typename list<EngineRef>::iterator iterator;
- iterator begin() { return ordering.begin(); }
- iterator end() { return ordering.end(); }
-
- // const iterate over holders in insert order
- typedef typename list<EngineRef>::const_iterator const_iterator;
- const_iterator begin() const { return ordering.begin(); }
- const_iterator end() const { return ordering.end(); }
-};
-
+ BouquetMap bouquet;
+public:
+ void insert(const EngineRef &h, RoseVertex v) {
+ typename BouquetMap::iterator f = bouquet.find(h);
+ if (f == bouquet.end()) {
+ ordering.push_back(h);
+ bouquet[h].push_back(v);
+ } else {
+ f->second.push_back(v);
+ }
+ }
+
+ void insert(const EngineRef &h, const deque<RoseVertex> &verts) {
+ typename BouquetMap::iterator f = bouquet.find(h);
+ if (f == bouquet.end()) {
+ ordering.push_back(h);
+ bouquet.insert(make_pair(h, verts));
+ } else {
+ f->second.insert(f->second.end(), verts.begin(), verts.end());
+ }
+ }
+
+ const deque<RoseVertex> &vertices(const EngineRef &h) const {
+ typename BouquetMap::const_iterator it = bouquet.find(h);
+ assert(it != bouquet.end()); // must be present
+ return it->second;
+ }
+
+ void erase(const EngineRef &h) {
+ assert(bouquet.find(h) != bouquet.end());
+ bouquet.erase(h);
+ ordering.remove(h);
+ }
+
+ /** Remove all the elements in the given iterator range. */
+ template <class Iter>
+ void erase_all(Iter erase_begin, Iter erase_end) {
+ for (Iter it = erase_begin; it != erase_end; ++it) {
+ bouquet.erase(*it);
+ }
+
+ // Use a quick-lookup container so that we only have to traverse the
+ // 'ordering' list once.
+ const set<EngineRef> dead(erase_begin, erase_end);
+ for (iterator it = begin(); it != end(); /* incremented inside */) {
+ if (contains(dead, *it)) {
+ ordering.erase(it++);
+ } else {
+ ++it;
+ }
+ }
+ }
+
+ void clear() {
+ ordering.clear();
+ bouquet.clear();
+ }
+
+ size_t size() const { return bouquet.size(); }
+
+ // iterate over holders in insert order
+ typedef typename list<EngineRef>::iterator iterator;
+ iterator begin() { return ordering.begin(); }
+ iterator end() { return ordering.end(); }
+
+ // const iterate over holders in insert order
+ typedef typename list<EngineRef>::const_iterator const_iterator;
+ const_iterator begin() const { return ordering.begin(); }
+ const_iterator end() const { return ordering.end(); }
+};
+
typedef Bouquet<left_id> LeftfixBouquet;
-typedef Bouquet<suffix_id> SuffixBouquet;
-
-} // namespace
-
-/**
- * Split a \ref Bouquet of some type into several smaller ones.
- */
-template <class EngineRef>
-static void chunkBouquets(const Bouquet<EngineRef> &in,
- deque<Bouquet<EngineRef>> &out,
- const size_t chunk_size) {
- if (in.size() <= chunk_size) {
- out.push_back(in);
- return;
- }
-
- out.push_back(Bouquet<EngineRef>());
- for (const auto &engine : in) {
- if (out.back().size() >= chunk_size) {
- out.push_back(Bouquet<EngineRef>());
- }
- out.back().insert(engine, in.vertices(engine));
- }
-}
-
+typedef Bouquet<suffix_id> SuffixBouquet;
+
+} // namespace
+
+/**
+ * Split a \ref Bouquet of some type into several smaller ones.
+ */
+template <class EngineRef>
+static void chunkBouquets(const Bouquet<EngineRef> &in,
+ deque<Bouquet<EngineRef>> &out,
+ const size_t chunk_size) {
+ if (in.size() <= chunk_size) {
+ out.push_back(in);
+ return;
+ }
+
+ out.push_back(Bouquet<EngineRef>());
+ for (const auto &engine : in) {
+ if (out.back().size() >= chunk_size) {
+ out.push_back(Bouquet<EngineRef>());
+ }
+ out.back().insert(engine, in.vertices(engine));
+ }
+}
+
static
bool stringsCanFinishAtSameSpot(const ue2_literal &u,
ue2_literal::const_iterator v_b,
@@ -504,31 +504,31 @@ bool stringsCanFinishAtSameSpot(const ue2_literal &u,
return true;
}
-/**
+/**
* Check that if after u has been seen, that it is impossible for the arrival of
* v to require the inspection of an engine earlier than u did.
- *
+ *
* Let delta be the earliest that v can be seen after u (may be zero)
*
* ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta)
* ==> - ulag <= delta - vlag
* ==> vlag - ulag <= delta
- */
-static
-bool checkPrefix(const rose_literal_id &ul, const u32 ulag,
- const rose_literal_id &vl, const u32 vlag) {
+ */
+static
+bool checkPrefix(const rose_literal_id &ul, const u32 ulag,
+ const rose_literal_id &vl, const u32 vlag) {
DEBUG_PRINTF("'%s'-%u '%s'-%u\n", escapeString(ul.s).c_str(), ulag,
escapeString(vl.s).c_str(), vlag);
if (vl.delay || ul.delay) {
/* engine related literals should not be delayed anyway */
- return false;
- }
-
+ return false;
+ }
+
if (ulag >= vlag) {
assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag);
return true;
- }
+ }
size_t min_allowed_delta = vlag - ulag;
DEBUG_PRINTF("min allow distace %zu\n", min_allowed_delta);
@@ -542,20 +542,20 @@ bool checkPrefix(const rose_literal_id &ul, const u32 ulag,
DEBUG_PRINTF("OK\n");
return true;
-}
-
+}
+
static
bool hasSameEngineType(const RoseVertexProps &u_prop,
const RoseVertexProps &v_prop) {
const left_id u_left = u_prop.left;
const left_id v_left = v_prop.left;
-
+
return !u_left.haig() == !v_left.haig()
&& !u_left.dfa() == !v_left.dfa()
&& !u_left.castle() == !v_left.castle()
&& !u_left.graph() == !v_left.graph();
}
-
+
/**
* Verifies that merging the leftfix of vertices does not cause conflicts due
* to the literals on the right.
@@ -577,25 +577,25 @@ bool compatibleLiteralsForMerge(
// We cannot merge engines that prefix literals in different tables.
if (ulits[0].first->table != vlits[0].first->table) {
- DEBUG_PRINTF("literals in different tables\n");
- return false;
- }
-
+ DEBUG_PRINTF("literals in different tables\n");
+ return false;
+ }
+
// We don't handle delayed cases yet.
for (const auto &ue : ulits) {
const rose_literal_id &ul = *ue.first;
if (ul.delay) {
- return false;
- }
- }
-
+ return false;
+ }
+ }
+
for (const auto &ve : vlits) {
const rose_literal_id &vl = *ve.first;
if (vl.delay) {
- return false;
- }
- }
-
+ return false;
+ }
+ }
+
/* An engine requires that all accesses to it are ordered by offsets. (ie,
we can not check an engine's state at offset Y, if we have already
checked its status at offset X and X > Y). If we can not establish that
@@ -614,9 +614,9 @@ bool compatibleLiteralsForMerge(
DEBUG_PRINTF("prefix check failed\n");
return false;
}
- }
- }
-
+ }
+ }
+
return true;
}
@@ -647,8 +647,8 @@ bool safeBlockModeMerge(const RoseBuildImpl &build, RoseVertex u,
// mergeableRoseVertices).
if (!build.isRootSuccessor(u)) {
return true;
- }
-
+ }
+
const RoseGraph &g = build.g;
// Merge prefixes with identical literal sets (as we'd have to run them
@@ -725,43 +725,43 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
return false;
}
- /* We cannot merge prefixes/vertices if they are successors of different
- * root vertices */
- if (tbi.isRootSuccessor(u)) {
- assert(tbi.isRootSuccessor(v));
- set<RoseVertex> u_preds;
- set<RoseVertex> v_preds;
- insert(&u_preds, inv_adjacent_vertices(u, tbi.g));
- insert(&v_preds, inv_adjacent_vertices(v, tbi.g));
-
- if (u_preds != v_preds) {
- return false;
- }
- }
-
+ /* We cannot merge prefixes/vertices if they are successors of different
+ * root vertices */
+ if (tbi.isRootSuccessor(u)) {
+ assert(tbi.isRootSuccessor(v));
+ set<RoseVertex> u_preds;
+ set<RoseVertex> v_preds;
+ insert(&u_preds, inv_adjacent_vertices(u, tbi.g));
+ insert(&v_preds, inv_adjacent_vertices(v, tbi.g));
+
+ if (u_preds != v_preds) {
+ return false;
+ }
+ }
+
u32 ulag = tbi.g[u].left.lag;
vector<pair<const rose_literal_id *, u32>> ulits;
ulits.reserve(tbi.g[u].literals.size());
for (u32 id : tbi.g[u].literals) {
ulits.emplace_back(&tbi.literals.at(id), ulag);
}
-
+
u32 vlag = tbi.g[v].left.lag;
vector<pair<const rose_literal_id *, u32>> vlits;
vlits.reserve(tbi.g[v].literals.size());
for (u32 id : tbi.g[v].literals) {
vlits.emplace_back(&tbi.literals.at(id), vlag);
}
-
+
if (!compatibleLiteralsForMerge(ulits, vlits)) {
return false;
- }
-
+ }
+
DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].index,
tbi.g[v].index);
- return true;
-}
-
+ return true;
+}
+
/* We cannot merge an engine, if a trigger literal and a post literal overlap
* in such a way that engine status needs to be check at a point before the
* engine's current location.
@@ -773,32 +773,32 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
* ==> delta >= v_lag
*
*/
-static
+static
bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl,
u32 vlag) {
DEBUG_PRINTF("%s %s (lag %u)\n", escapeString(ul.s).c_str(),
escapeString(vl.s).c_str(), vlag);
-
+
for (size_t i = 0; i < vlag; i++) {
if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) {
DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i);
return false;
- }
- }
+ }
+ }
DEBUG_PRINTF("OK\n");
- return true;
-}
-
+ return true;
+}
+
template<typename VertexCont>
static never_inline
bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1,
const VertexCont &v2) {
flat_set<RoseVertex> preds;
- for (auto v : v1) {
+ for (auto v : v1) {
insert(&preds, inv_adjacent_vertices(v, build.g));
- }
-
+ }
+
flat_set<u32> pred_lits;
/* No need to examine delays of a common pred - as it must already have
@@ -811,7 +811,7 @@ bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1,
insert(&known_good_preds, inv_adjacent_vertices(v, build.g));
}
- for (auto u : preds) {
+ for (auto u : preds) {
if (!contains(known_good_preds, u)) {
insert(&pred_lits, build.g[u].literals);
}
@@ -838,17 +838,17 @@ bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1,
if (!checkPredDelay(*ul, vl, vlag)) {
return false;
}
- }
- }
- }
-
- return true;
-}
-
-static
-bool mergeableRoseVertices(const RoseBuildImpl &tbi,
- const deque<RoseVertex> &verts1,
- const deque<RoseVertex> &verts2) {
+ }
+ }
+ }
+
+ return true;
+}
+
+static
+bool mergeableRoseVertices(const RoseBuildImpl &tbi,
+ const deque<RoseVertex> &verts1,
+ const deque<RoseVertex> &verts2) {
assert(!verts1.empty());
assert(!verts2.empty());
@@ -874,9 +874,9 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi,
if (u_preds != v_preds) {
return false;
- }
- }
-
+ }
+ }
+
vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */
for (auto a : verts1) {
if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, v_front, a)) {
@@ -905,90 +905,90 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi,
return false;
}
- // Check preds are compatible as well.
+ // Check preds are compatible as well.
if (!checkPredDelays(tbi, verts1, verts2)
|| !checkPredDelays(tbi, verts2, verts1)) {
- return false;
- }
-
- DEBUG_PRINTF("vertex sets are mergeable\n");
- return true;
-}
-
-bool mergeableRoseVertices(const RoseBuildImpl &tbi, const set<RoseVertex> &v1,
- const set<RoseVertex> &v2) {
- const deque<RoseVertex> vv1(v1.begin(), v1.end());
- const deque<RoseVertex> vv2(v2.begin(), v2.end());
- return mergeableRoseVertices(tbi, vv1, vv2);
-}
-
-/** \brief Priority queue element for Rose merges. */
-namespace {
-struct RoseMergeCandidate {
- RoseMergeCandidate(const left_id &r1_in, const left_id &r2_in, u32 cpl_in,
- u32 tb)
- : r1(r1_in), r2(r2_in), stopxor(0), cpl(cpl_in), states(0),
- tie_breaker(tb) {
- if (r1.graph() && r2.graph()) {
- const NGHolder &h1 = *r1.graph(), &h2 = *r2.graph();
- /* som_none as haigs don't merge and just a guiding heuristic */
- CharReach stop1 = findStopAlphabet(h1, SOM_NONE);
- CharReach stop2 = findStopAlphabet(h2, SOM_NONE);
- stopxor = (stop1 ^ stop2).count();
-
- // We use the number of vertices as an approximation of the state
- // count here, as this is just feeding a comparison.
- u32 vertex_count = num_vertices(h1) + num_vertices(h2);
- states = vertex_count - min(vertex_count, cpl);
- } else if (r1.castle() && r2.castle()) {
- // FIXME
- }
- }
-
- bool operator<(const RoseMergeCandidate &a) const {
- if (stopxor != a.stopxor) {
- return stopxor > a.stopxor;
- }
- if (cpl != a.cpl) {
- return cpl < a.cpl;
- }
- if (states != a.states) {
- return states > a.states;
- }
- return tie_breaker < a.tie_breaker;
- }
-
- left_id r1;
- left_id r2;
- u32 stopxor;
- u32 cpl; //!< common prefix length
- u32 states;
- u32 tie_breaker; //!< determinism
-};
-}
-
-static
+ return false;
+ }
+
+ DEBUG_PRINTF("vertex sets are mergeable\n");
+ return true;
+}
+
+bool mergeableRoseVertices(const RoseBuildImpl &tbi, const set<RoseVertex> &v1,
+ const set<RoseVertex> &v2) {
+ const deque<RoseVertex> vv1(v1.begin(), v1.end());
+ const deque<RoseVertex> vv2(v2.begin(), v2.end());
+ return mergeableRoseVertices(tbi, vv1, vv2);
+}
+
+/** \brief Priority queue element for Rose merges. */
+namespace {
+struct RoseMergeCandidate {
+ RoseMergeCandidate(const left_id &r1_in, const left_id &r2_in, u32 cpl_in,
+ u32 tb)
+ : r1(r1_in), r2(r2_in), stopxor(0), cpl(cpl_in), states(0),
+ tie_breaker(tb) {
+ if (r1.graph() && r2.graph()) {
+ const NGHolder &h1 = *r1.graph(), &h2 = *r2.graph();
+ /* som_none as haigs don't merge and just a guiding heuristic */
+ CharReach stop1 = findStopAlphabet(h1, SOM_NONE);
+ CharReach stop2 = findStopAlphabet(h2, SOM_NONE);
+ stopxor = (stop1 ^ stop2).count();
+
+ // We use the number of vertices as an approximation of the state
+ // count here, as this is just feeding a comparison.
+ u32 vertex_count = num_vertices(h1) + num_vertices(h2);
+ states = vertex_count - min(vertex_count, cpl);
+ } else if (r1.castle() && r2.castle()) {
+ // FIXME
+ }
+ }
+
+ bool operator<(const RoseMergeCandidate &a) const {
+ if (stopxor != a.stopxor) {
+ return stopxor > a.stopxor;
+ }
+ if (cpl != a.cpl) {
+ return cpl < a.cpl;
+ }
+ if (states != a.states) {
+ return states > a.states;
+ }
+ return tie_breaker < a.tie_breaker;
+ }
+
+ left_id r1;
+ left_id r2;
+ u32 stopxor;
+ u32 cpl; //!< common prefix length
+ u32 states;
+ u32 tie_breaker; //!< determinism
+};
+}
+
+static
bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2,
const vector<RoseVertex> &verts1,
const vector<RoseVertex> &verts2) {
- assert(!verts1.empty() && !verts2.empty());
-
+ assert(!verts1.empty() && !verts2.empty());
+
DEBUG_PRINTF("merging pair of leftfixes:\n");
DEBUG_PRINTF(" A:%016zx: tops %s\n", r1.hash(),
as_string_list(all_tops(r1)).c_str());
DEBUG_PRINTF(" B:%016zx: tops %s\n", r2.hash(),
as_string_list(all_tops(r2)).c_str());
-
+
RoseGraph &g = build.g;
- if (r1.graph()) {
- assert(r2.graph());
- assert(r1.graph()->kind == r2.graph()->kind);
+ if (r1.graph()) {
+ assert(r2.graph());
+ assert(r1.graph()->kind == r2.graph()->kind);
if (!mergeNfaPair(*r1.graph(), *r2.graph(), nullptr, build.cc)) {
- DEBUG_PRINTF("nfa merge failed\n");
- return false;
- }
-
+ DEBUG_PRINTF("nfa merge failed\n");
+ return false;
+ }
+
/* The graph in r1 has been merged into the graph in r2. Update r1's
* vertices with the new graph ptr. mergeNfaPair() does not alter the
* tops from the input graph so no need to update top values.
@@ -997,38 +997,38 @@ bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2,
* distinct when they have different trigger conditions.
* [Note: mergeLeftfixesVariableLag() should have a common parent set]
*/
- shared_ptr<NGHolder> &h = g[verts2.front()].left.graph;
- for (RoseVertex v : verts1) {
- g[v].left.graph = h;
- }
-
- return true;
- } else if (r1.castle()) {
- assert(r2.castle());
+ shared_ptr<NGHolder> &h = g[verts2.front()].left.graph;
+ for (RoseVertex v : verts1) {
+ g[v].left.graph = h;
+ }
+
+ return true;
+ } else if (r1.castle()) {
+ assert(r2.castle());
assert(build.cc.grey.allowCastle);
-
- map<u32, u32> top_map;
- if (!mergeCastle(*r2.castle(), *r1.castle(), top_map)) {
- DEBUG_PRINTF("castle merge failed\n");
- return false;
- }
-
- // The castle in r1 has been merged into the castle in r2, with tops
- // remapped as per top_map.
- const shared_ptr<CastleProto> &c = g[verts2.front()].left.castle;
- for (RoseVertex v : verts1) {
- g[v].left.castle = c;
- for (const auto &e : in_edges_range(v, g)) {
- g[e].rose_top = top_map.at(g[e].rose_top);
- }
- }
- return true;
- }
-
- assert(0);
- return false;
-}
-
+
+ map<u32, u32> top_map;
+ if (!mergeCastle(*r2.castle(), *r1.castle(), top_map)) {
+ DEBUG_PRINTF("castle merge failed\n");
+ return false;
+ }
+
+ // The castle in r1 has been merged into the castle in r2, with tops
+ // remapped as per top_map.
+ const shared_ptr<CastleProto> &c = g[verts2.front()].left.castle;
+ for (RoseVertex v : verts1) {
+ g[v].left.castle = c;
+ for (const auto &e : in_edges_range(v, g)) {
+ g[e].rose_top = top_map.at(g[e].rose_top);
+ }
+ }
+ return true;
+ }
+
+ assert(0);
+ return false;
+}
+
/**
* Checks that there is no problem due to the involved vertices if we merge two
* leftfix engines.
@@ -1039,13 +1039,13 @@ bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2,
* - check that engines themselves can be merged
* - use heuristics to find out if merging the engines is wise.
*/
-static
+static
bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build,
const vector<RoseVertex> &targets_1,
const vector<RoseVertex> &targets_2) {
assert(!targets_1.empty());
assert(!targets_2.empty());
-
+
vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */
for (auto a : targets_1) {
u32 ulag = build.g[a].left.lag;
@@ -1053,7 +1053,7 @@ bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build,
ulits.emplace_back(&build.literals.at(id), ulag);
}
}
-
+
vector<pair<const rose_literal_id *, u32>> vlits;
for (auto a : targets_2) {
u32 vlag = build.g[a].left.lag;
@@ -1061,21 +1061,21 @@ bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build,
vlits.emplace_back(&build.literals.at(id), vlag);
}
}
-
+
if (!compatibleLiteralsForMerge(ulits, vlits)) {
return false;
}
-
+
// Check preds are compatible as well.
if (!checkPredDelays(build, targets_1, targets_2)
|| !checkPredDelays(build, targets_2, targets_1)) {
return false;
}
-
+
DEBUG_PRINTF("vertex sets are mergeable\n");
return true;
}
-
+
/**
* In block mode, we want to be a little more selective -- we will only merge
* prefix engines when the literal sets are the same or if the merged graph
@@ -1087,13 +1087,13 @@ bool goodBlockModeMerge(const RoseBuildImpl &build,
const vector<RoseVertex> &v_verts,
const left_id &v_eng) {
assert(!build.cc.streaming);
-
+
// Always merge infixes if we can (subject to the other criteria in
// mergeableRoseVertices).
if (!build.isRootSuccessor(u_verts.front())) {
return true;
}
-
+
const RoseGraph &g = build.g;
flat_set<u32> u_lits;
@@ -1197,20 +1197,20 @@ bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1,
&& (stop1.count() > 10 || stop2.count() > 10)) {
DEBUG_PRINTF("skip merge, would kill stop alphabet\n");
return false;
- }
+ }
size_t maxstop = max(stop1.count(), stop2.count());
if (maxstop > 200 && stopboth.count() < 200) {
DEBUG_PRINTF("skip merge, would reduce stop alphabet\n");
return false;
}
}
-
+
/* Rechecking that the targets are compatible, as we may have already
* merged new states into r1 or r2 and we need to verify that this
* candidate is still ok. */
if (!checkVerticesOkForLeftfixMerge(build, targets_1, targets_2)) {
return false;
- }
+ }
if (!build.cc.streaming
&& !goodBlockModeMerge(build, targets_1, r1, targets_2, r2)) {
@@ -1218,62 +1218,62 @@ bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1,
}
return mergeLeftfixPair(build, r1, r2, targets_1, targets_2);
-}
-
-static
-bool nfaHasNarrowStart(const NGHolder &g) {
+}
+
+static
+bool nfaHasNarrowStart(const NGHolder &g) {
if (out_degree(g.startDs, g) > 1) {
- return false; // unanchored
- }
-
- CharReach cr;
-
- for (auto v : adjacent_vertices_range(g.start, g)) {
- if (v == g.startDs) {
- continue;
- }
- cr |= g[v].char_reach;
- }
- return cr.count() <= NARROW_START_MAX;
-}
-
-static
-bool nfaHasFiniteMaxWidth(const NGHolder &g) {
- return findMaxWidth(g).is_finite();
-}
-
-static
-bool hasReformedStartDotStar(const NGHolder &h, const Grey &grey) {
- if (!proper_out_degree(h.startDs, h)) {
- return false;
- }
-
- assert(!is_triggered(h));
-
- NGHolder h_temp;
- cloneHolder(h_temp, h);
-
- vector<BoundedRepeatData> repeats;
- bool suitable_for_sds_reforming = false;
- const map<u32, u32> fixed_depth_tops; /* not relevant for cfa check */
- const map<u32, vector<vector<CharReach>>> triggers; /* not for cfa check */
- const bool simple_model_selection = true; // FIRST is considered simple
- analyseRepeats(h_temp, nullptr, fixed_depth_tops, triggers, &repeats, true,
- simple_model_selection, grey, &suitable_for_sds_reforming);
-
- return suitable_for_sds_reforming;
-}
-
-static
-u32 commonPrefixLength(left_id &r1, left_id &r2) {
- if (r1.graph() && r2.graph()) {
+ return false; // unanchored
+ }
+
+ CharReach cr;
+
+ for (auto v : adjacent_vertices_range(g.start, g)) {
+ if (v == g.startDs) {
+ continue;
+ }
+ cr |= g[v].char_reach;
+ }
+ return cr.count() <= NARROW_START_MAX;
+}
+
+static
+bool nfaHasFiniteMaxWidth(const NGHolder &g) {
+ return findMaxWidth(g).is_finite();
+}
+
+static
+bool hasReformedStartDotStar(const NGHolder &h, const Grey &grey) {
+ if (!proper_out_degree(h.startDs, h)) {
+ return false;
+ }
+
+ assert(!is_triggered(h));
+
+ NGHolder h_temp;
+ cloneHolder(h_temp, h);
+
+ vector<BoundedRepeatData> repeats;
+ bool suitable_for_sds_reforming = false;
+ const map<u32, u32> fixed_depth_tops; /* not relevant for cfa check */
+ const map<u32, vector<vector<CharReach>>> triggers; /* not for cfa check */
+ const bool simple_model_selection = true; // FIRST is considered simple
+ analyseRepeats(h_temp, nullptr, fixed_depth_tops, triggers, &repeats, true,
+ simple_model_selection, grey, &suitable_for_sds_reforming);
+
+ return suitable_for_sds_reforming;
+}
+
+static
+u32 commonPrefixLength(left_id &r1, left_id &r2) {
+ if (r1.graph() && r2.graph()) {
return commonPrefixLength(*r1.graph(), *r2.graph());
- } else if (r1.castle() && r2.castle()) {
- return min(findMinWidth(*r1.castle()), findMinWidth(*r2.castle()));
- }
- return 0;
-}
-
+ } else if (r1.castle() && r2.castle()) {
+ return min(findMinWidth(*r1.castle()), findMinWidth(*r2.castle()));
+ }
+ return 0;
+}
+
namespace {
struct MergeKey {
MergeKey(const left_id &left, flat_set<RoseVertex> parents_in) :
@@ -1352,89 +1352,89 @@ insertion_ordered_map<left_id, vector<RoseVertex>> get_eng_verts(RoseGraph &g) {
return eng_verts;
}
-/**
- * This pass attempts to merge prefix/infix engines which share a common set of
- * parent vertices.
- *
- * Engines are greedily merged pairwise by this process based on a priority
- * queue keyed off the common prefix length.
- *
- * Engines are not merged if the lags are not compatible or if it would damage
- * the stop alphabet.
- *
- * Infixes:
+/**
+ * This pass attempts to merge prefix/infix engines which share a common set of
+ * parent vertices.
+ *
+ * Engines are greedily merged pairwise by this process based on a priority
+ * queue keyed off the common prefix length.
+ *
+ * Engines are not merged if the lags are not compatible or if it would damage
+ * the stop alphabet.
+ *
+ * Infixes:
* - It is expected that when this is run all infixes are still at the single
* top stage as we have not yet merged unrelated infixes together. After
* execution, castles may have multiple (but equivalent) tops.
- *
- * Prefixes:
- * - transient prefixes are not considered.
- * - with a max width or a narrow start are kept segregated by
- * this phase and can only be merged with similar infixes.
- * - in block mode, merges are only performed if literal sets are the same.
- * - merges are not considered in cases where dot star start state will be
- * reformed to optimise a leading repeat.
- */
+ *
+ * Prefixes:
+ * - transient prefixes are not considered.
+ * - with a max width or a narrow start are kept segregated by
+ * this phase and can only be merged with similar infixes.
+ * - in block mode, merges are only performed if literal sets are the same.
+ * - merges are not considered in cases where dot star start state will be
+ * reformed to optimise a leading repeat.
+ */
void mergeLeftfixesVariableLag(RoseBuildImpl &build) {
if (!build.cc.grey.mergeRose) {
- return;
- }
+ return;
+ }
assert(!hasOrphanedTops(build));
-
+
RoseGraph &g = build.g;
-
- DEBUG_PRINTF("-----\n");
- DEBUG_PRINTF("entry\n");
- DEBUG_PRINTF("-----\n");
-
+
+ DEBUG_PRINTF("-----\n");
+ DEBUG_PRINTF("entry\n");
+ DEBUG_PRINTF("-----\n");
+
auto eng_verts = get_eng_verts(g);
-
+
map<MergeKey, vector<left_id>> engine_groups;
for (const auto &e : eng_verts) {
const left_id &left = e.first;
const auto &verts = e.second;
- // Only non-transient for the moment.
+ // Only non-transient for the moment.
if (contains(build.transient, left)) {
- continue;
- }
-
- // No forced McClellan or Haig infix merges.
+ continue;
+ }
+
+ // No forced McClellan or Haig infix merges.
if (left.dfa() || left.haig()) {
- continue;
- }
+ continue;
+ }
assert(left.graph() || left.castle());
-
+
if (left.graph()) {
const NGHolder &h = *left.graph();
/* we should not have merged yet */
assert(!is_triggered(h) || onlyOneTop(h));
-
+
if (hasReformedStartDotStar(h, build.cc.grey)) {
- continue; // preserve the optimisation of the leading repeat
- }
+ continue; // preserve the optimisation of the leading repeat
+ }
} else {
assert(left.castle());
-
+
if (!build.cc.grey.allowCastle) {
DEBUG_PRINTF("castle merging disallowed by greybox\n");
- continue;
- }
- }
-
- // We collapse the anchored root into the root vertex when calculating
- // parents, so that we can merge differently-anchored prefix roses
- // together. (Prompted by UE-2100)
-
+ continue;
+ }
+ }
+
+ // We collapse the anchored root into the root vertex when calculating
+ // parents, so that we can merge differently-anchored prefix roses
+ // together. (Prompted by UE-2100)
+
flat_set<RoseVertex> parents;
for (RoseVertex v : verts) {
insert(&parents, inv_adjacent_vertices_range(v, g));
- }
-
+ }
+
if (contains(parents, build.anchored_root)) {
parents.erase(build.anchored_root);
parents.insert(build.root);
- }
-
+ }
+
assert(!parents.empty());
#ifndef _WIN32
@@ -1450,8 +1450,8 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) {
MergeKey *mk = new MergeKey(left, parents);
engine_groups[*mk].push_back(left);
#endif
- }
-
+ }
+
vector<vector<left_id>> chunks;
for (auto &raw_group : engine_groups | map_values) {
chunk(move(raw_group), &chunks, MERGE_GROUP_SIZE_MAX);
@@ -1462,37 +1462,37 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) {
for (auto &roses : chunks) {
if (roses.size() < 2) {
- continue;
- }
+ continue;
+ }
// All pairs on the prio queue.
u32 tie_breaker = 0;
priority_queue<RoseMergeCandidate> pq;
for (auto it = roses.begin(), ite = roses.end(); it != ite; ++it) {
left_id r1 = *it;
const vector<RoseVertex> &targets_1 = eng_verts[r1];
-
+
for (auto jt = next(it); jt != ite; ++jt) {
left_id r2 = *jt;
-
+
/* we should have already split on engine types and reach */
assert(!r1.castle() == !r2.castle());
assert(!r1.graph() == !r2.graph());
assert(!r1.castle()
|| r1.castle()->reach() == r2.castle()->reach());
-
+
const vector<RoseVertex> &targets_2 = eng_verts[r2];
if (!checkVerticesOkForLeftfixMerge(build, targets_1,
targets_2)) {
continue; // No point queueing unmergeable cases.
}
-
+
u32 cpl = commonPrefixLength(r1, r2);
pq.push(RoseMergeCandidate(r1, r2, cpl, tie_breaker++));
}
}
-
+
DEBUG_PRINTF("merge queue has %zu entries\n", pq.size());
-
+
while (!pq.empty()) {
left_id r1 = pq.top().r1;
left_id r2 = pq.top().r2;
@@ -1505,47 +1505,47 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) {
targets_2)) {
insert(&targets_2, targets_2.end(), targets_1);
targets_1.clear();
- }
- }
- }
-
- DEBUG_PRINTF("-----\n");
- DEBUG_PRINTF("exit\n");
- DEBUG_PRINTF("-----\n");
+ }
+ }
+ }
+
+ DEBUG_PRINTF("-----\n");
+ DEBUG_PRINTF("exit\n");
+ DEBUG_PRINTF("-----\n");
assert(!hasOrphanedTops(build));
-}
-
-namespace {
-
-/**
- * Key used to group sets of leftfixes for the dedupeLeftfixesVariableLag path.
- */
-struct DedupeLeftKey {
+}
+
+namespace {
+
+/**
+ * Key used to group sets of leftfixes for the dedupeLeftfixesVariableLag path.
+ */
+struct DedupeLeftKey {
DedupeLeftKey(const RoseBuildImpl &build,
flat_set<pair<size_t, u32>> preds_in, const left_id &left)
: left_hash(hashLeftfix(left)), preds(move(preds_in)),
transient(contains(build.transient, left)) {
- }
-
- bool operator<(const DedupeLeftKey &b) const {
+ }
+
+ bool operator<(const DedupeLeftKey &b) const {
return tie(left_hash, preds, transient)
< tie(b.left_hash, b.preds, b.transient);
- }
-
-private:
- /** Quick hash of the leftfix itself. Must be identical for a given pair of
- * graphs if is_equal would return true. */
- size_t left_hash;
-
- /** For each in-edge, the pair of (parent index, edge top). */
+ }
+
+private:
+ /** Quick hash of the leftfix itself. Must be identical for a given pair of
+ * graphs if is_equal would return true. */
+ size_t left_hash;
+
+ /** For each in-edge, the pair of (parent index, edge top). */
flat_set<pair<size_t, u32>> preds;
/** We don't want to combine transient with non-transient. */
bool transient;
-};
-
-} // namespace
-
+};
+
+} // namespace
+
static
flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) {
flat_set<pair<size_t, u32>> preds;
@@ -1555,50 +1555,50 @@ flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) {
return preds;
}
-/**
- * This is a generalisation of \ref dedupeLeftfixes which relaxes two
- * restrictions: multiple predecessor roles are allowed and the delay used by
- * each vertex may not be the same for each vertex. Like \ref dedupeLeftfixes,
- * the leftfixes' successor vertices are first grouped to reduce the number of
- * potential candidates - the grouping in this case is by the set of
- * predecessor roles with their associated top events. For the dedupe to be
- * possible, it is required that:
- *
- * 1. the nfa graphs with respect to the relevant reports are identical
- * 2. the nfa graphs are triggered by the same roles with same events (ensured
- * by the initial grouping pass)
- * 3. all the successor roles of either graph can inspect the combined leftfix
- * without advancing the state of the leftfix past the point that another
- * successor may want to inspect it; the overlap relationships between the
- * involved literals are examined to ensure that this property holds.
- *
+/**
+ * This is a generalisation of \ref dedupeLeftfixes which relaxes two
+ * restrictions: multiple predecessor roles are allowed and the delay used by
+ * each vertex may not be the same for each vertex. Like \ref dedupeLeftfixes,
+ * the leftfixes' successor vertices are first grouped to reduce the number of
+ * potential candidates - the grouping in this case is by the set of
+ * predecessor roles with their associated top events. For the dedupe to be
+ * possible, it is required that:
+ *
+ * 1. the nfa graphs with respect to the relevant reports are identical
+ * 2. the nfa graphs are triggered by the same roles with same events (ensured
+ * by the initial grouping pass)
+ * 3. all the successor roles of either graph can inspect the combined leftfix
+ * without advancing the state of the leftfix past the point that another
+ * successor may want to inspect it; the overlap relationships between the
+ * involved literals are examined to ensure that this property holds.
+ *
* Note: this is unable to dedupe when delayed literals are involved unlike
* dedupeLeftfixes.
- */
+ */
void dedupeLeftfixesVariableLag(RoseBuildImpl &build) {
- DEBUG_PRINTF("entry\n");
-
+ DEBUG_PRINTF("entry\n");
+
RoseGraph &g = build.g;
auto eng_verts = get_eng_verts(g);
-
+
map<DedupeLeftKey, vector<left_id>> engine_groups;
for (const auto &e : eng_verts) {
const left_id &left = e.first;
const auto &verts = e.second;
-
+
/* There should only be one report on an engine as no merges have
* happened yet. (aside from eod prefixes) */
if (all_reports(left).size() != 1) {
assert(any_of_in(adjacent_vertices_range(verts.front(), g),
[&](RoseVertex w) { return g[w].eod_accept; }));
- continue;
- }
-
+ continue;
+ }
+
if (left.haig()) {
/* TODO: allow deduping of identical haigs */
- continue;
- }
-
+ continue;
+ }
+
if (left.graph()) {
/* we should not have merged yet */
assert(!is_triggered(*left.graph()) || onlyOneTop(*left.graph()));
@@ -1612,52 +1612,52 @@ void dedupeLeftfixesVariableLag(RoseBuildImpl &build) {
}
}
engine_groups[DedupeLeftKey(build, move(preds), left)].push_back(left);
- }
-
+ }
+
/* We don't bother chunking as we expect deduping to be successful if the
* hashes match */
-
+
for (auto &group : engine_groups | map_values) {
DEBUG_PRINTF("group of %zu roses\n", group.size());
if (group.size() < 2) {
- continue;
- }
-
+ continue;
+ }
+
for (auto it = group.begin(); it != group.end(); ++it) {
- left_id r1 = *it;
+ left_id r1 = *it;
vector<RoseVertex> &verts1 = eng_verts[r1];
assert(!verts1.empty()); /* cleared engines should be behind us */
-
+
assert(all_reports(r1).size() == 1);
ReportID r1_report = *all_reports(r1).begin();
for (auto jt = next(it); jt != group.end(); ++jt) {
- left_id r2 = *jt;
+ left_id r2 = *jt;
vector<RoseVertex> &verts2 = eng_verts[r2];
assert(!verts2.empty());
assert(all_reports(r2).size() == 1);
ReportID r2_report = *all_reports(r2).begin();
-
+
if (!is_equal(r1, r1_report, r2, r2_report)) {
- continue;
- }
-
+ continue;
+ }
+
if (!checkVerticesOkForLeftfixMerge(build, verts1, verts2)) {
- continue;
- }
-
- DEBUG_PRINTF("%p and %p are dupes\n", r1.graph(), r2.graph());
-
+ continue;
+ }
+
+ DEBUG_PRINTF("%p and %p are dupes\n", r1.graph(), r2.graph());
+
// Replace r1 with r2.
-
- for (auto v : verts1) {
- DEBUG_PRINTF("replacing report %u with %u on %zu\n",
+
+ for (auto v : verts1) {
+ DEBUG_PRINTF("replacing report %u with %u on %zu\n",
r2_report, r1_report, g[v].index);
- u32 orig_lag = g[v].left.lag;
+ u32 orig_lag = g[v].left.lag;
g[v].left = g[verts2.front()].left;
- g[v].left.lag = orig_lag;
- }
+ g[v].left.lag = orig_lag;
+ }
insert(&verts2, verts2.end(), verts1);
verts1.clear();
@@ -1665,306 +1665,306 @@ void dedupeLeftfixesVariableLag(RoseBuildImpl &build) {
/* remove stale entry from transient set, if present */
build.transient.erase(r1);
- break;
- }
- }
- }
-}
-
-static
+ break;
+ }
+ }
+ }
+}
+
+static
u32 findUnusedTop(const flat_set<u32> &tops) {
- u32 i = 0;
- while (contains(tops, i)) {
- i++;
- }
- return i;
-}
-
-// Replace top 't' on edges with new top 'u'.
-static
-void replaceTops(NGHolder &h, const map<u32, u32> &top_mapping) {
- for (const auto &e : out_edges_range(h.start, h)) {
- NFAVertex v = target(e, h);
- if (v == h.startDs) {
- continue;
- }
+ u32 i = 0;
+ while (contains(tops, i)) {
+ i++;
+ }
+ return i;
+}
+
+// Replace top 't' on edges with new top 'u'.
+static
+void replaceTops(NGHolder &h, const map<u32, u32> &top_mapping) {
+ for (const auto &e : out_edges_range(h.start, h)) {
+ NFAVertex v = target(e, h);
+ if (v == h.startDs) {
+ continue;
+ }
flat_set<u32> new_tops;
for (u32 t : h[e].tops) {
DEBUG_PRINTF("vertex %zu has top %u\n", h[v].index, t);
new_tops.insert(top_mapping.at(t));
}
h[e].tops = std::move(new_tops);
- }
-}
-
-static
-bool setDistinctTops(NGHolder &h1, const NGHolder &h2,
- map<u32, u32> &top_mapping) {
+ }
+}
+
+static
+bool setDistinctTops(NGHolder &h1, const NGHolder &h2,
+ map<u32, u32> &top_mapping) {
flat_set<u32> tops1 = getTops(h1), tops2 = getTops(h2);
-
- DEBUG_PRINTF("before: h1 has %zu tops, h2 has %zu tops\n", tops1.size(),
- tops2.size());
-
- // If our tops don't intersect, we're OK to merge with no changes.
- if (!has_intersection(tops1, tops2)) {
- DEBUG_PRINTF("tops don't intersect\n");
- return true;
- }
-
- // Otherwise, we have to renumber the tops in h1 so that they don't overlap
- // with the tops in h2.
- top_mapping.clear();
- for (u32 t : tops1) {
- u32 u = findUnusedTop(tops2);
- DEBUG_PRINTF("replacing top %u with %u in h1\n", t, u);
- top_mapping.insert(make_pair(t, u));
- assert(!contains(tops2, u));
- tops2.insert(u);
- }
-
- replaceTops(h1, top_mapping);
- return true;
-}
-
-bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
- const deque<RoseVertex> &verts1) {
- map<u32, u32> top_mapping;
- if (!setDistinctTops(h1, h2, top_mapping)) {
- return false;
- }
-
- if (top_mapping.empty()) {
- return true; // No remapping necessary.
- }
-
- for (auto v : verts1) {
+
+ DEBUG_PRINTF("before: h1 has %zu tops, h2 has %zu tops\n", tops1.size(),
+ tops2.size());
+
+ // If our tops don't intersect, we're OK to merge with no changes.
+ if (!has_intersection(tops1, tops2)) {
+ DEBUG_PRINTF("tops don't intersect\n");
+ return true;
+ }
+
+ // Otherwise, we have to renumber the tops in h1 so that they don't overlap
+ // with the tops in h2.
+ top_mapping.clear();
+ for (u32 t : tops1) {
+ u32 u = findUnusedTop(tops2);
+ DEBUG_PRINTF("replacing top %u with %u in h1\n", t, u);
+ top_mapping.insert(make_pair(t, u));
+ assert(!contains(tops2, u));
+ tops2.insert(u);
+ }
+
+ replaceTops(h1, top_mapping);
+ return true;
+}
+
+bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
+ const deque<RoseVertex> &verts1) {
+ map<u32, u32> top_mapping;
+ if (!setDistinctTops(h1, h2, top_mapping)) {
+ return false;
+ }
+
+ if (top_mapping.empty()) {
+ return true; // No remapping necessary.
+ }
+
+ for (auto v : verts1) {
DEBUG_PRINTF("vertex %zu\n", g[v].index);
- assert(!g[v].left.haig);
- assert(!g[v].left.dfa);
- for (const auto &e : in_edges_range(v, g)) {
- u32 t = g[e].rose_top;
- DEBUG_PRINTF("t=%u\n", t);
- assert(contains(top_mapping, t));
- g[e].rose_top = top_mapping[t];
- DEBUG_PRINTF("edge (%zu,%zu) went from top %u to %u\n",
+ assert(!g[v].left.haig);
+ assert(!g[v].left.dfa);
+ for (const auto &e : in_edges_range(v, g)) {
+ u32 t = g[e].rose_top;
+ DEBUG_PRINTF("t=%u\n", t);
+ assert(contains(top_mapping, t));
+ g[e].rose_top = top_mapping[t];
+ DEBUG_PRINTF("edge (%zu,%zu) went from top %u to %u\n",
g[source(e, g)].index, g[target(e, g)].index, t,
- top_mapping[t]);
- }
- }
-
- return true;
-}
-
-static
-bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
- const deque<RoseVertex> &verts1) {
- map<u32, u32> top_mapping;
- if (!setDistinctTops(h1, h2, top_mapping)) {
- return false;
- }
-
- if (top_mapping.empty()) {
- return true; // No remapping necessary.
- }
-
- for (auto v : verts1) {
+ top_mapping[t]);
+ }
+ }
+
+ return true;
+}
+
+static
+bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
+ const deque<RoseVertex> &verts1) {
+ map<u32, u32> top_mapping;
+ if (!setDistinctTops(h1, h2, top_mapping)) {
+ return false;
+ }
+
+ if (top_mapping.empty()) {
+ return true; // No remapping necessary.
+ }
+
+ for (auto v : verts1) {
DEBUG_PRINTF("vertex %zu\n", g[v].index);
- u32 t = g[v].suffix.top;
- assert(contains(top_mapping, t));
- g[v].suffix.top = top_mapping[t];
- }
-
- return true;
-}
-
-/** \brief Estimate the number of accel states in the given graph when built as
- * an NFA.
- *
- * (The easiest way to estimate something like this is to actually build it:
- * the criteria for NFA acceleration are quite complicated and buried in
- * limex_compile.)
- */
-static
-u32 estimatedAccelStates(const RoseBuildImpl &tbi, const NGHolder &h) {
- return countAccelStates(h, &tbi.rm, tbi.cc);
-}
-
-static
+ u32 t = g[v].suffix.top;
+ assert(contains(top_mapping, t));
+ g[v].suffix.top = top_mapping[t];
+ }
+
+ return true;
+}
+
+/** \brief Estimate the number of accel states in the given graph when built as
+ * an NFA.
+ *
+ * (The easiest way to estimate something like this is to actually build it:
+ * the criteria for NFA acceleration are quite complicated and buried in
+ * limex_compile.)
+ */
+static
+u32 estimatedAccelStates(const RoseBuildImpl &tbi, const NGHolder &h) {
+ return countAccelStates(h, &tbi.rm, tbi.cc);
+}
+
+static
void mergeNfaLeftfixes(RoseBuildImpl &tbi, LeftfixBouquet &roses) {
- RoseGraph &g = tbi.g;
- DEBUG_PRINTF("%zu nfa rose merge candidates\n", roses.size());
-
- // We track the number of accelerable states for each graph in a map and
- // only recompute them when the graph is modified.
+ RoseGraph &g = tbi.g;
+ DEBUG_PRINTF("%zu nfa rose merge candidates\n", roses.size());
+
+ // We track the number of accelerable states for each graph in a map and
+ // only recompute them when the graph is modified.
unordered_map<left_id, u32> accel_count;
- for (const auto &rose : roses) {
- assert(rose.graph()->kind == NFA_INFIX);
- accel_count[rose] = estimatedAccelStates(tbi, *rose.graph());
- }
-
- for (auto it = roses.begin(); it != roses.end(); ++it) {
- left_id r1 = *it;
- const deque<RoseVertex> &verts1 = roses.vertices(r1);
-
- deque<left_id> merged;
- for (auto jt = next(it); jt != roses.end(); ++jt) {
- left_id r2 = *jt;
- const deque<RoseVertex> &verts2 = roses.vertices(r2);
-
- DEBUG_PRINTF("consider merging rose %p (%zu verts) "
- "with %p (%zu verts)\n",
- r1.graph(), verts1.size(), r2.graph(), verts2.size());
-
- u32 accel1 = accel_count[r1];
- if (accel1 >= NFA_MAX_ACCEL_STATES) {
- DEBUG_PRINTF("h1 has hit max accel\n");
- break; // next h1
- }
-
- u32 accel2 = accel_count[r2];
- if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) {
- DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, "
- "accel2=%u)\n",
- accel1, accel2);
- continue; // next h2
- }
-
- if (!mergeableRoseVertices(tbi, verts1, verts2)) {
- DEBUG_PRINTF("not mergeable\n");
- continue; // next h2
- }
-
- // Attempt to merge h2 into h1.
-
- NGHolder victim;
- cloneHolder(victim, *r2.graph());
-
- // Store a copy of the in-edge properties in case we have to roll
- // back.
- map<RoseEdge, RoseEdgeProps> edge_props;
- for (auto v : verts2) {
- for (const auto &e : in_edges_range(v, g)) {
- edge_props[e] = g[e];
- }
- }
-
- if (!setDistinctRoseTops(g, victim, *r1.graph(), verts2)) {
- DEBUG_PRINTF("can't set distinct tops\n");
- continue; // next h2
- }
-
- assert(victim.kind == r1.graph()->kind);
- assert(!generates_callbacks(*r1.graph()));
- if (!mergeNfaPair(victim, *r1.graph(), nullptr, tbi.cc)) {
- DEBUG_PRINTF("merge failed\n");
- // Roll back in-edge properties.
- for (const auto &m : edge_props) {
- g[m.first] = m.second;
- }
- continue; // next h2
- }
-
- // Update h2's roses to point to h1 now
- shared_ptr<NGHolder> winner = g[verts1.front()].left.graph;
- for (auto v : verts2) {
- g[v].left.graph = winner;
- }
- roses.insert(r1, verts2);
-
- merged.push_back(r2);
-
- if (num_vertices(*winner) >= small_merge_max_vertices(tbi.cc)) {
- DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n",
- num_vertices(*winner));
- break; // next h1
- }
-
- // Update h1's accel count estimate.
- accel_count[r1] = estimatedAccelStates(tbi, *winner);
- }
-
- DEBUG_PRINTF("%zu roses merged\n", merged.size());
- roses.erase_all(merged.begin(), merged.end());
- }
-}
-
-/**
- * This pass attempts to merge prefix/infix engines with a small number of
- * vertices together into larger engines. The engines must not be have a
- * reformed start dot star (due to a leading repeat) nor an infix LBR. Engines
- * that have compatible lag are greedily grouped such that they remain
- * accelerable and only have a small number of states. Note: if a role has an
- * infix with multiple trigger vertices, the role will be left unchanged by this
- * pass and will remain using an unmerged graph.
- */
-void mergeSmallLeftfixes(RoseBuildImpl &tbi) {
- DEBUG_PRINTF("entry\n");
-
- if (!tbi.cc.grey.mergeRose || !tbi.cc.grey.roseMultiTopRoses) {
- return;
- }
-
- RoseGraph &g = tbi.g;
-
+ for (const auto &rose : roses) {
+ assert(rose.graph()->kind == NFA_INFIX);
+ accel_count[rose] = estimatedAccelStates(tbi, *rose.graph());
+ }
+
+ for (auto it = roses.begin(); it != roses.end(); ++it) {
+ left_id r1 = *it;
+ const deque<RoseVertex> &verts1 = roses.vertices(r1);
+
+ deque<left_id> merged;
+ for (auto jt = next(it); jt != roses.end(); ++jt) {
+ left_id r2 = *jt;
+ const deque<RoseVertex> &verts2 = roses.vertices(r2);
+
+ DEBUG_PRINTF("consider merging rose %p (%zu verts) "
+ "with %p (%zu verts)\n",
+ r1.graph(), verts1.size(), r2.graph(), verts2.size());
+
+ u32 accel1 = accel_count[r1];
+ if (accel1 >= NFA_MAX_ACCEL_STATES) {
+ DEBUG_PRINTF("h1 has hit max accel\n");
+ break; // next h1
+ }
+
+ u32 accel2 = accel_count[r2];
+ if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) {
+ DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, "
+ "accel2=%u)\n",
+ accel1, accel2);
+ continue; // next h2
+ }
+
+ if (!mergeableRoseVertices(tbi, verts1, verts2)) {
+ DEBUG_PRINTF("not mergeable\n");
+ continue; // next h2
+ }
+
+ // Attempt to merge h2 into h1.
+
+ NGHolder victim;
+ cloneHolder(victim, *r2.graph());
+
+ // Store a copy of the in-edge properties in case we have to roll
+ // back.
+ map<RoseEdge, RoseEdgeProps> edge_props;
+ for (auto v : verts2) {
+ for (const auto &e : in_edges_range(v, g)) {
+ edge_props[e] = g[e];
+ }
+ }
+
+ if (!setDistinctRoseTops(g, victim, *r1.graph(), verts2)) {
+ DEBUG_PRINTF("can't set distinct tops\n");
+ continue; // next h2
+ }
+
+ assert(victim.kind == r1.graph()->kind);
+ assert(!generates_callbacks(*r1.graph()));
+ if (!mergeNfaPair(victim, *r1.graph(), nullptr, tbi.cc)) {
+ DEBUG_PRINTF("merge failed\n");
+ // Roll back in-edge properties.
+ for (const auto &m : edge_props) {
+ g[m.first] = m.second;
+ }
+ continue; // next h2
+ }
+
+ // Update h2's roses to point to h1 now
+ shared_ptr<NGHolder> winner = g[verts1.front()].left.graph;
+ for (auto v : verts2) {
+ g[v].left.graph = winner;
+ }
+ roses.insert(r1, verts2);
+
+ merged.push_back(r2);
+
+ if (num_vertices(*winner) >= small_merge_max_vertices(tbi.cc)) {
+ DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n",
+ num_vertices(*winner));
+ break; // next h1
+ }
+
+ // Update h1's accel count estimate.
+ accel_count[r1] = estimatedAccelStates(tbi, *winner);
+ }
+
+ DEBUG_PRINTF("%zu roses merged\n", merged.size());
+ roses.erase_all(merged.begin(), merged.end());
+ }
+}
+
+/**
+ * This pass attempts to merge prefix/infix engines with a small number of
+ * vertices together into larger engines. The engines must not be have a
+ * reformed start dot star (due to a leading repeat) nor an infix LBR. Engines
+ * that have compatible lag are greedily grouped such that they remain
+ * accelerable and only have a small number of states. Note: if a role has an
+ * infix with multiple trigger vertices, the role will be left unchanged by this
+ * pass and will remain using an unmerged graph.
+ */
+void mergeSmallLeftfixes(RoseBuildImpl &tbi) {
+ DEBUG_PRINTF("entry\n");
+
+ if (!tbi.cc.grey.mergeRose || !tbi.cc.grey.roseMultiTopRoses) {
+ return;
+ }
+
+ RoseGraph &g = tbi.g;
+
LeftfixBouquet nfa_leftfixes;
-
- for (auto v : vertices_range(g)) {
- if (!g[v].left) {
- continue;
- }
-
- // Handle single-parent infixes only.
- if (tbi.isRootSuccessor(v)) {
- continue;
- }
-
- left_id left(g[v].left);
-
- // Only non-transient for the moment.
- if (contains(tbi.transient, left)) {
- continue;
- }
-
- // No DFAs or Haigs right now.
- if (left.dfa() || left.haig()) {
- continue;
- }
-
- // Castles are handled by a different pass.
- if (left.castle()) {
- continue;
- }
-
- assert(left.graph());
- NGHolder &h = *left.graph();
-
- /* Ensure that kind on the graph is correct */
- assert(h.kind == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX));
-
- if (hasReformedStartDotStar(h, tbi.cc.grey)) {
- /* We would lose optimisations of the leading repeat by merging. */
- continue;
- }
-
- // Small roses only.
- if (num_vertices(h) > small_rose_threshold(tbi.cc)) {
- continue;
- }
-
+
+ for (auto v : vertices_range(g)) {
+ if (!g[v].left) {
+ continue;
+ }
+
+ // Handle single-parent infixes only.
+ if (tbi.isRootSuccessor(v)) {
+ continue;
+ }
+
+ left_id left(g[v].left);
+
+ // Only non-transient for the moment.
+ if (contains(tbi.transient, left)) {
+ continue;
+ }
+
+ // No DFAs or Haigs right now.
+ if (left.dfa() || left.haig()) {
+ continue;
+ }
+
+ // Castles are handled by a different pass.
+ if (left.castle()) {
+ continue;
+ }
+
+ assert(left.graph());
+ NGHolder &h = *left.graph();
+
+ /* Ensure that kind on the graph is correct */
+ assert(h.kind == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX));
+
+ if (hasReformedStartDotStar(h, tbi.cc.grey)) {
+ /* We would lose optimisations of the leading repeat by merging. */
+ continue;
+ }
+
+ // Small roses only.
+ if (num_vertices(h) > small_rose_threshold(tbi.cc)) {
+ continue;
+ }
+
nfa_leftfixes.insert(left, v);
- }
-
+ }
+
deque<LeftfixBouquet> leftfix_groups;
chunkBouquets(nfa_leftfixes, leftfix_groups, MERGE_GROUP_SIZE_MAX);
nfa_leftfixes.clear();
DEBUG_PRINTF("chunked nfa leftfixes into %zu groups\n",
leftfix_groups.size());
-
+
for (auto &group : leftfix_groups) {
- mergeNfaLeftfixes(tbi, group);
- }
-}
-
+ mergeNfaLeftfixes(tbi, group);
+ }
+}
+
static
void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands,
insertion_ordered_map<left_id, vector<RoseVertex>> &eng_verts) {
@@ -2029,514 +2029,514 @@ void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands,
* mainly depends on the reach being scanned.
*/
void mergeCastleLeftfixes(RoseBuildImpl &build) {
- DEBUG_PRINTF("entry\n");
-
+ DEBUG_PRINTF("entry\n");
+
if (!build.cc.grey.mergeRose || !build.cc.grey.roseMultiTopRoses
|| !build.cc.grey.allowCastle) {
- return;
- }
-
+ return;
+ }
+
RoseGraph &g = build.g;
-
+
insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts;
-
- for (auto v : vertices_range(g)) {
+
+ for (auto v : vertices_range(g)) {
if (!g[v].left.castle) {
- continue;
- }
-
+ continue;
+ }
+
// Handle infixes only.
if (build.isRootSuccessor(v)) {
- continue;
- }
-
+ continue;
+ }
+
eng_verts[g[v].left].push_back(v);
}
-
+
map<CharReach, vector<left_id>> by_reach;
for (const auto &left : eng_verts | map_keys) {
by_reach[left.castle()->reach()].push_back(left);
}
-
+
vector<vector<left_id>> chunks;
for (auto &raw_group : by_reach | map_values) {
chunk(move(raw_group), &chunks, MERGE_CASTLE_GROUP_SIZE_MAX);
- }
+ }
by_reach.clear();
-
+
DEBUG_PRINTF("chunked castles into %zu groups\n", chunks.size());
-
+
for (auto &chunk : chunks) {
mergeCastleChunk(build, chunk, eng_verts);
- }
-}
-
-static
-void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes,
- const bool acyclic) {
- RoseGraph &g = tbi.g;
-
- DEBUG_PRINTF("group has %zu suffixes\n", suffixes.size());
-
- // If this isn't an acyclic case, we track the number of accelerable states
- // for each graph in a map and only recompute them when the graph is
- // modified.
+ }
+}
+
+static
+void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes,
+ const bool acyclic) {
+ RoseGraph &g = tbi.g;
+
+ DEBUG_PRINTF("group has %zu suffixes\n", suffixes.size());
+
+ // If this isn't an acyclic case, we track the number of accelerable states
+ // for each graph in a map and only recompute them when the graph is
+ // modified.
unordered_map<suffix_id, u32> accel_count;
- if (!acyclic) {
- for (const auto &suffix : suffixes) {
- assert(suffix.graph() && suffix.graph()->kind == NFA_SUFFIX);
- accel_count[suffix] = estimatedAccelStates(tbi, *suffix.graph());
- }
- }
-
- for (auto it = suffixes.begin(); it != suffixes.end(); ++it) {
- suffix_id s1 = *it;
- const deque<RoseVertex> &verts1 = suffixes.vertices(s1);
- assert(s1.graph() && s1.graph()->kind == NFA_SUFFIX);
+ if (!acyclic) {
+ for (const auto &suffix : suffixes) {
+ assert(suffix.graph() && suffix.graph()->kind == NFA_SUFFIX);
+ accel_count[suffix] = estimatedAccelStates(tbi, *suffix.graph());
+ }
+ }
+
+ for (auto it = suffixes.begin(); it != suffixes.end(); ++it) {
+ suffix_id s1 = *it;
+ const deque<RoseVertex> &verts1 = suffixes.vertices(s1);
+ assert(s1.graph() && s1.graph()->kind == NFA_SUFFIX);
// Caller should ensure that we don't propose merges of graphs that are
// already too big.
assert(num_vertices(*s1.graph()) < small_merge_max_vertices(tbi.cc));
- deque<suffix_id> merged;
- for (auto jt = next(it); jt != suffixes.end(); ++jt) {
- suffix_id s2 = *jt;
- const deque<RoseVertex> &verts2 = suffixes.vertices(s2);
- assert(s2.graph() && s2.graph()->kind == NFA_SUFFIX);
-
- if (!acyclic) {
- u32 accel1 = accel_count[s1];
- if (accel1 >= NFA_MAX_ACCEL_STATES) {
- DEBUG_PRINTF("h1 has hit max accel\n");
- break; // next h1
- }
-
- u32 accel2 = accel_count[s2];
- if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) {
- DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, "
- "accel2=%u)\n",
- accel1, accel2);
- continue; // next h2
- }
- }
-
- // Attempt to merge h2 into h1.
-
- NGHolder victim;
- cloneHolder(victim, *s2.graph());
-
- // Store a copy of the suffix tops in case we have to roll back.
- map<RoseVertex, u32> old_tops;
- for (auto v : verts2) {
- old_tops[v] = g[v].suffix.top;
- }
-
- if (!setDistinctSuffixTops(g, victim, *s1.graph(), verts2)) {
- DEBUG_PRINTF("can't set distinct tops\n");
- continue; // next h2
- }
-
- if (!mergeNfaPair(victim, *s1.graph(), &tbi.rm, tbi.cc)) {
- DEBUG_PRINTF("merge failed\n");
- // Roll back in-edge properties.
- for (const auto &m : old_tops) {
- g[m.first].suffix.top = m.second;
- }
- continue; // next h2
- }
-
- // Update h2's roses to point to h1 now
- shared_ptr<NGHolder> winner = g[verts1.front()].suffix.graph;
- for (auto v : verts2) {
- g[v].suffix.graph = winner;
- }
- suffixes.insert(s1, verts2);
- merged.push_back(s2);
-
- if (num_vertices(*s1.graph()) >= small_merge_max_vertices(tbi.cc)) {
- DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n",
- num_vertices(*s1.graph()));
- break; // next h1
- }
-
- if (!acyclic) {
- // Update h1's accel count estimate.
- accel_count[s1] = estimatedAccelStates(tbi, *s1.graph());
- }
- }
-
- DEBUG_PRINTF("%zu suffixes merged\n", merged.size());
- suffixes.erase_all(merged.begin(), merged.end());
- }
-}
-
-/**
- * This merge pass combines suffixes from unrelated roles into a single
- * suffix with multiple top events in order to distinguish the triggers
- * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes
- * while mergeSmallSuffixes only considers small suffixes. The merges will
- * group roles with suffixes in the graph into clusters of at most
- * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the
- * suffixes and attempting to pairwise merge it with another member. Merges
- * will fail if the result is not implementable, requires too many distinct top
- * events, or if it losses the ability to be accelerated. The merge will modify
- * the existing suffix graph of the one member (g1), the other member updates
- * it graph to refer to g1 instead of its previous graph (g2) and use the new
- * tops created. Other roles may have been sharing g1 - these are unaffected by
- * the change as the existing top events are left untouched. Other roles using
- * g2 are also unaffected as g2 will continue to exist until while it has any
- * roles triggering it.
- *
- * Note: suffixes destined for the LBR are not considered for these merges as
- * the LBR can only handle a single repeat and this type of repeat is ideally
- * handled outside of an NFA or DFA.
- */
-void mergeAcyclicSuffixes(RoseBuildImpl &tbi) {
- DEBUG_PRINTF("entry\n");
-
- if (!tbi.cc.grey.mergeSuffixes) {
- return;
- }
-
- SuffixBouquet suffixes;
-
- RoseGraph &g = tbi.g;
-
- for (auto v : vertices_range(g)) {
- shared_ptr<NGHolder> h = g[v].suffix.graph;
- if (!h || tbi.isInETable(v)) {
- continue;
- }
-
- assert(!g[v].suffix.haig);
-
+ deque<suffix_id> merged;
+ for (auto jt = next(it); jt != suffixes.end(); ++jt) {
+ suffix_id s2 = *jt;
+ const deque<RoseVertex> &verts2 = suffixes.vertices(s2);
+ assert(s2.graph() && s2.graph()->kind == NFA_SUFFIX);
+
+ if (!acyclic) {
+ u32 accel1 = accel_count[s1];
+ if (accel1 >= NFA_MAX_ACCEL_STATES) {
+ DEBUG_PRINTF("h1 has hit max accel\n");
+ break; // next h1
+ }
+
+ u32 accel2 = accel_count[s2];
+ if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) {
+ DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, "
+ "accel2=%u)\n",
+ accel1, accel2);
+ continue; // next h2
+ }
+ }
+
+ // Attempt to merge h2 into h1.
+
+ NGHolder victim;
+ cloneHolder(victim, *s2.graph());
+
+ // Store a copy of the suffix tops in case we have to roll back.
+ map<RoseVertex, u32> old_tops;
+ for (auto v : verts2) {
+ old_tops[v] = g[v].suffix.top;
+ }
+
+ if (!setDistinctSuffixTops(g, victim, *s1.graph(), verts2)) {
+ DEBUG_PRINTF("can't set distinct tops\n");
+ continue; // next h2
+ }
+
+ if (!mergeNfaPair(victim, *s1.graph(), &tbi.rm, tbi.cc)) {
+ DEBUG_PRINTF("merge failed\n");
+ // Roll back in-edge properties.
+ for (const auto &m : old_tops) {
+ g[m.first].suffix.top = m.second;
+ }
+ continue; // next h2
+ }
+
+ // Update h2's roses to point to h1 now
+ shared_ptr<NGHolder> winner = g[verts1.front()].suffix.graph;
+ for (auto v : verts2) {
+ g[v].suffix.graph = winner;
+ }
+ suffixes.insert(s1, verts2);
+ merged.push_back(s2);
+
+ if (num_vertices(*s1.graph()) >= small_merge_max_vertices(tbi.cc)) {
+ DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n",
+ num_vertices(*s1.graph()));
+ break; // next h1
+ }
+
+ if (!acyclic) {
+ // Update h1's accel count estimate.
+ accel_count[s1] = estimatedAccelStates(tbi, *s1.graph());
+ }
+ }
+
+ DEBUG_PRINTF("%zu suffixes merged\n", merged.size());
+ suffixes.erase_all(merged.begin(), merged.end());
+ }
+}
+
+/**
+ * This merge pass combines suffixes from unrelated roles into a single
+ * suffix with multiple top events in order to distinguish the triggers
+ * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes
+ * while mergeSmallSuffixes only considers small suffixes. The merges will
+ * group roles with suffixes in the graph into clusters of at most
+ * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the
+ * suffixes and attempting to pairwise merge it with another member. Merges
+ * will fail if the result is not implementable, requires too many distinct top
+ * events, or if it losses the ability to be accelerated. The merge will modify
+ * the existing suffix graph of the one member (g1), the other member updates
+ * it graph to refer to g1 instead of its previous graph (g2) and use the new
+ * tops created. Other roles may have been sharing g1 - these are unaffected by
+ * the change as the existing top events are left untouched. Other roles using
+ * g2 are also unaffected as g2 will continue to exist until while it has any
+ * roles triggering it.
+ *
+ * Note: suffixes destined for the LBR are not considered for these merges as
+ * the LBR can only handle a single repeat and this type of repeat is ideally
+ * handled outside of an NFA or DFA.
+ */
+void mergeAcyclicSuffixes(RoseBuildImpl &tbi) {
+ DEBUG_PRINTF("entry\n");
+
+ if (!tbi.cc.grey.mergeSuffixes) {
+ return;
+ }
+
+ SuffixBouquet suffixes;
+
+ RoseGraph &g = tbi.g;
+
+ for (auto v : vertices_range(g)) {
+ shared_ptr<NGHolder> h = g[v].suffix.graph;
+ if (!h || tbi.isInETable(v)) {
+ continue;
+ }
+
+ assert(!g[v].suffix.haig);
+
if (num_vertices(*h) >= small_merge_max_vertices(tbi.cc)) {
- continue;
- }
-
+ continue;
+ }
+
if (!isAcyclic(*h)) {
- continue;
- }
-
- suffixes.insert(g[v].suffix, v);
- }
-
- deque<SuffixBouquet> suff_groups;
- chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX);
- DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(),
- suff_groups.size());
- suffixes.clear();
-
- for (auto &group : suff_groups) {
- mergeSuffixes(tbi, group, true);
- }
-}
-
-/**
- * This merge pass combines suffixes from unrelated roles into a single
- * suffix with multiple top events in order to distinguish the triggers
- * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes
- * while mergeSmallSuffixes only considers small suffixes. The merges will
- * group roles with suffixes in the graph into clusters of at most
- * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the
- * suffixes and attempting to pairwise merge it with another member. Merges
- * will fail if the result is not implementable, requires too many distinct top
- * events, or if it losses the ability to be accelerated. The merge will modify
- * the existing suffix graph of the one member (g1), the other member updates
- * it graph to refer to g1 instead of its previous graph (g2) and use the new
- * tops created. Other roles may have been sharing g1 - these are unaffected by
- * the change as the existing top events are left untouched. Other roles using
- * g2 are also unaffected as g2 will continue to exist until while it has any
- * roles triggering it.
- *
- * Note: suffixes destined for the LBR are not considered for these merges as
- * the LBR can only handle a single repeat and this type of repeat is ideally
- * handled outside of an NFA or DFA.
- */
-void mergeSmallSuffixes(RoseBuildImpl &tbi) {
- DEBUG_PRINTF("entry\n");
-
- if (!tbi.cc.grey.mergeSuffixes) {
- return;
- }
-
- RoseGraph &g = tbi.g;
- SuffixBouquet suffixes;
-
- for (auto v : vertices_range(g)) {
- shared_ptr<NGHolder> h = g[v].suffix.graph;
- if (!h || tbi.isInETable(v)) {
- continue;
- }
- assert(!g[v].suffix.haig);
-
- // Leave acyclics out for the moment.
- if (isAcyclic(*h)) {
- continue;
- }
-
- // Small-ish suffixes only.
- if (num_vertices(*h) > 32) {
- continue;
- }
-
- suffixes.insert(g[v].suffix, v);
- }
-
- deque<SuffixBouquet> suff_groups;
- chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX);
- DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(),
- suff_groups.size());
- suffixes.clear();
-
- for (auto &group : suff_groups) {
- mergeSuffixes(tbi, group, false);
- }
-}
-
-static
-void removeDeadOutfixes(vector<OutfixInfo> &outfixes) {
- auto is_dead = [](const OutfixInfo &outfix) { return outfix.is_dead(); };
- outfixes.erase(remove_if(begin(outfixes), end(outfixes), is_dead),
- end(outfixes));
-}
-
-static
-void mergeOutfixInfo(OutfixInfo &winner, const OutfixInfo &victim) {
- assert(!winner.is_dead());
-
- winner.maxBAWidth = max(winner.maxBAWidth, victim.maxBAWidth);
- winner.minWidth = min(winner.minWidth, victim.minWidth);
- winner.maxWidth = max(winner.maxWidth, victim.maxWidth);
- winner.maxOffset = max(winner.maxOffset, victim.maxOffset);
- mergeReverseAccelerationInfo(winner.rev_info, victim.rev_info);
-
- // This outfix can be ignored in small block mode if both were. The dedupe
- // layer at runtime will protect us from extra matches if only one was in
- // the small block matcher.
- winner.in_sbmatcher &= victim.in_sbmatcher;
-}
-
-static
-map<NGHolder *, NGHolder *> chunkedNfaMerge(RoseBuildImpl &build,
- const vector<NGHolder *> &nfas) {
- map<NGHolder *, NGHolder *> merged;
-
- vector<NGHolder *> batch;
- for (auto it = begin(nfas), ite = end(nfas); it != ite; ++it) {
- batch.push_back(*it);
- assert((*it)->kind == NFA_OUTFIX);
- if (batch.size() == MERGE_GROUP_SIZE_MAX || next(it) == ite) {
+ continue;
+ }
+
+ suffixes.insert(g[v].suffix, v);
+ }
+
+ deque<SuffixBouquet> suff_groups;
+ chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX);
+ DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(),
+ suff_groups.size());
+ suffixes.clear();
+
+ for (auto &group : suff_groups) {
+ mergeSuffixes(tbi, group, true);
+ }
+}
+
+/**
+ * This merge pass combines suffixes from unrelated roles into a single
+ * suffix with multiple top events in order to distinguish the triggers
+ * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes
+ * while mergeSmallSuffixes only considers small suffixes. The merges will
+ * group roles with suffixes in the graph into clusters of at most
+ * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the
+ * suffixes and attempting to pairwise merge it with another member. Merges
+ * will fail if the result is not implementable, requires too many distinct top
+ * events, or if it losses the ability to be accelerated. The merge will modify
+ * the existing suffix graph of the one member (g1), the other member updates
+ * it graph to refer to g1 instead of its previous graph (g2) and use the new
+ * tops created. Other roles may have been sharing g1 - these are unaffected by
+ * the change as the existing top events are left untouched. Other roles using
+ * g2 are also unaffected as g2 will continue to exist until while it has any
+ * roles triggering it.
+ *
+ * Note: suffixes destined for the LBR are not considered for these merges as
+ * the LBR can only handle a single repeat and this type of repeat is ideally
+ * handled outside of an NFA or DFA.
+ */
+void mergeSmallSuffixes(RoseBuildImpl &tbi) {
+ DEBUG_PRINTF("entry\n");
+
+ if (!tbi.cc.grey.mergeSuffixes) {
+ return;
+ }
+
+ RoseGraph &g = tbi.g;
+ SuffixBouquet suffixes;
+
+ for (auto v : vertices_range(g)) {
+ shared_ptr<NGHolder> h = g[v].suffix.graph;
+ if (!h || tbi.isInETable(v)) {
+ continue;
+ }
+ assert(!g[v].suffix.haig);
+
+ // Leave acyclics out for the moment.
+ if (isAcyclic(*h)) {
+ continue;
+ }
+
+ // Small-ish suffixes only.
+ if (num_vertices(*h) > 32) {
+ continue;
+ }
+
+ suffixes.insert(g[v].suffix, v);
+ }
+
+ deque<SuffixBouquet> suff_groups;
+ chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX);
+ DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(),
+ suff_groups.size());
+ suffixes.clear();
+
+ for (auto &group : suff_groups) {
+ mergeSuffixes(tbi, group, false);
+ }
+}
+
+static
+void removeDeadOutfixes(vector<OutfixInfo> &outfixes) {
+ auto is_dead = [](const OutfixInfo &outfix) { return outfix.is_dead(); };
+ outfixes.erase(remove_if(begin(outfixes), end(outfixes), is_dead),
+ end(outfixes));
+}
+
+static
+void mergeOutfixInfo(OutfixInfo &winner, const OutfixInfo &victim) {
+ assert(!winner.is_dead());
+
+ winner.maxBAWidth = max(winner.maxBAWidth, victim.maxBAWidth);
+ winner.minWidth = min(winner.minWidth, victim.minWidth);
+ winner.maxWidth = max(winner.maxWidth, victim.maxWidth);
+ winner.maxOffset = max(winner.maxOffset, victim.maxOffset);
+ mergeReverseAccelerationInfo(winner.rev_info, victim.rev_info);
+
+ // This outfix can be ignored in small block mode if both were. The dedupe
+ // layer at runtime will protect us from extra matches if only one was in
+ // the small block matcher.
+ winner.in_sbmatcher &= victim.in_sbmatcher;
+}
+
+static
+map<NGHolder *, NGHolder *> chunkedNfaMerge(RoseBuildImpl &build,
+ const vector<NGHolder *> &nfas) {
+ map<NGHolder *, NGHolder *> merged;
+
+ vector<NGHolder *> batch;
+ for (auto it = begin(nfas), ite = end(nfas); it != ite; ++it) {
+ batch.push_back(*it);
+ assert((*it)->kind == NFA_OUTFIX);
+ if (batch.size() == MERGE_GROUP_SIZE_MAX || next(it) == ite) {
auto batch_merged = mergeNfaCluster(batch, &build.rm, build.cc);
insert(&merged, batch_merged);
- batch.clear();
- }
- }
-
- return merged;
-}
-
-static
-void mergeOutfixNfas(RoseBuildImpl &tbi, vector<NGHolder *> &nfas) {
- DEBUG_PRINTF("merging %zu nfas\n", nfas.size());
- if (nfas.size() < 2) {
- return;
- }
-
- vector<OutfixInfo> &outfixes = tbi.outfixes;
-
- map<NGHolder *, size_t> nfa_mapping;
- for (size_t i = 0; i < outfixes.size(); i++) {
+ batch.clear();
+ }
+ }
+
+ return merged;
+}
+
+static
+void mergeOutfixNfas(RoseBuildImpl &tbi, vector<NGHolder *> &nfas) {
+ DEBUG_PRINTF("merging %zu nfas\n", nfas.size());
+ if (nfas.size() < 2) {
+ return;
+ }
+
+ vector<OutfixInfo> &outfixes = tbi.outfixes;
+
+ map<NGHolder *, size_t> nfa_mapping;
+ for (size_t i = 0; i < outfixes.size(); i++) {
auto *holder = outfixes[i].holder();
if (holder) {
nfa_mapping[holder] = i;
- }
- }
-
- map<NGHolder *, NGHolder *> merged = chunkedNfaMerge(tbi, nfas);
- if (merged.empty()) {
- return;
- }
-
- DEBUG_PRINTF("%zu nfas merged\n", merged.size());
-
- // Update the outfix info for merged holders.
- for (const auto &m : merged) {
- OutfixInfo &victim = outfixes.at(nfa_mapping[m.first]);
- OutfixInfo &winner = outfixes.at(nfa_mapping[m.second]);
- mergeOutfixInfo(winner, victim);
- victim.clear();
- }
-
- removeDeadOutfixes(outfixes);
-}
-
-namespace {
-struct MergeMcClellan {
- MergeMcClellan(const ReportManager &rm_in, const Grey &grey_in)
- : rm(rm_in), grey(grey_in) {}
-
- unique_ptr<raw_dfa> operator()(const raw_dfa *d1, const raw_dfa *d2) const {
- assert(d1 && d2);
- return mergeTwoDfas(d1, d2, DFA_MERGE_MAX_STATES, &rm, grey);
- }
-
-private:
- const ReportManager &rm;
- const Grey &grey;
-};
-
-struct MergeHaig {
- explicit MergeHaig(u32 limit_in) : limit(limit_in) {}
-
- unique_ptr<raw_som_dfa> operator()(const raw_som_dfa *d1,
- const raw_som_dfa *d2) const {
- assert(d1 && d2);
- return attemptToMergeHaig({d1, d2}, limit);
- }
-
-private:
- const u32 limit; //!< state limit for merged result.
-};
-}
-
-/**
- * Generic pairwise merge algorithm that can be used for either McClellan
- * (RawDfa=raw_dfa) or Haig (RawDfa=raw_som_dfa). Delegates the actual merge
- * operation to a merge functor, which allows the caller to set some policy
- * (state limits, etc).
- *
- * This is currently astonishingly simple and just considers every pair of
- * DFAs, slow and steady. We may wish to actually apply a merge ordering
- * strategy in the future.
- */
-template<class RawDfa, class MergeFunctor>
-static
-void pairwiseDfaMerge(vector<RawDfa *> &dfas,
+ }
+ }
+
+ map<NGHolder *, NGHolder *> merged = chunkedNfaMerge(tbi, nfas);
+ if (merged.empty()) {
+ return;
+ }
+
+ DEBUG_PRINTF("%zu nfas merged\n", merged.size());
+
+ // Update the outfix info for merged holders.
+ for (const auto &m : merged) {
+ OutfixInfo &victim = outfixes.at(nfa_mapping[m.first]);
+ OutfixInfo &winner = outfixes.at(nfa_mapping[m.second]);
+ mergeOutfixInfo(winner, victim);
+ victim.clear();
+ }
+
+ removeDeadOutfixes(outfixes);
+}
+
+namespace {
+struct MergeMcClellan {
+ MergeMcClellan(const ReportManager &rm_in, const Grey &grey_in)
+ : rm(rm_in), grey(grey_in) {}
+
+ unique_ptr<raw_dfa> operator()(const raw_dfa *d1, const raw_dfa *d2) const {
+ assert(d1 && d2);
+ return mergeTwoDfas(d1, d2, DFA_MERGE_MAX_STATES, &rm, grey);
+ }
+
+private:
+ const ReportManager &rm;
+ const Grey &grey;
+};
+
+struct MergeHaig {
+ explicit MergeHaig(u32 limit_in) : limit(limit_in) {}
+
+ unique_ptr<raw_som_dfa> operator()(const raw_som_dfa *d1,
+ const raw_som_dfa *d2) const {
+ assert(d1 && d2);
+ return attemptToMergeHaig({d1, d2}, limit);
+ }
+
+private:
+ const u32 limit; //!< state limit for merged result.
+};
+}
+
+/**
+ * Generic pairwise merge algorithm that can be used for either McClellan
+ * (RawDfa=raw_dfa) or Haig (RawDfa=raw_som_dfa). Delegates the actual merge
+ * operation to a merge functor, which allows the caller to set some policy
+ * (state limits, etc).
+ *
+ * This is currently astonishingly simple and just considers every pair of
+ * DFAs, slow and steady. We may wish to actually apply a merge ordering
+ * strategy in the future.
+ */
+template<class RawDfa, class MergeFunctor>
+static
+void pairwiseDfaMerge(vector<RawDfa *> &dfas,
unordered_map<RawDfa *, size_t> &dfa_mapping,
- vector<OutfixInfo> &outfixes,
- MergeFunctor merge_func) {
- DEBUG_PRINTF("merging group of size %zu\n", dfas.size());
-
- for (auto it = dfas.begin(), ite = dfas.end(); it != ite; ++it) {
- if (!*it) {
- continue;
- }
- for (auto jt = next(it); jt != ite; ++jt) {
- if (!*jt) {
- continue;
- }
-
- DEBUG_PRINTF("try merge %p and %p\n", *it, *jt);
- unique_ptr<RawDfa> rdfa = merge_func(*it, *jt);
- if (!rdfa) {
- continue; // Merge failed.
- }
-
- DEBUG_PRINTF("merge succeeded, built %p\n", rdfa.get());
- OutfixInfo &winner = outfixes.at(dfa_mapping[*it]);
- OutfixInfo &victim = outfixes.at(dfa_mapping[*jt]);
- assert(!winner.is_dead() && !victim.is_dead());
-
- RawDfa *dfa_ptr = rdfa.get();
- dfa_mapping[dfa_ptr] = dfa_mapping[*it];
- dfa_mapping.erase(*it);
+ vector<OutfixInfo> &outfixes,
+ MergeFunctor merge_func) {
+ DEBUG_PRINTF("merging group of size %zu\n", dfas.size());
+
+ for (auto it = dfas.begin(), ite = dfas.end(); it != ite; ++it) {
+ if (!*it) {
+ continue;
+ }
+ for (auto jt = next(it); jt != ite; ++jt) {
+ if (!*jt) {
+ continue;
+ }
+
+ DEBUG_PRINTF("try merge %p and %p\n", *it, *jt);
+ unique_ptr<RawDfa> rdfa = merge_func(*it, *jt);
+ if (!rdfa) {
+ continue; // Merge failed.
+ }
+
+ DEBUG_PRINTF("merge succeeded, built %p\n", rdfa.get());
+ OutfixInfo &winner = outfixes.at(dfa_mapping[*it]);
+ OutfixInfo &victim = outfixes.at(dfa_mapping[*jt]);
+ assert(!winner.is_dead() && !victim.is_dead());
+
+ RawDfa *dfa_ptr = rdfa.get();
+ dfa_mapping[dfa_ptr] = dfa_mapping[*it];
+ dfa_mapping.erase(*it);
winner.proto = move(rdfa);
-
- mergeOutfixInfo(winner, victim);
-
- victim.clear();
- *jt = nullptr; // to be deleted.
- *it = dfa_ptr;
- }
- }
-}
-
-template<class RawDfa, class MergeFunctor>
-static
-void chunkedDfaMerge(vector<RawDfa *> &dfas,
+
+ mergeOutfixInfo(winner, victim);
+
+ victim.clear();
+ *jt = nullptr; // to be deleted.
+ *it = dfa_ptr;
+ }
+ }
+}
+
+template<class RawDfa, class MergeFunctor>
+static
+void chunkedDfaMerge(vector<RawDfa *> &dfas,
unordered_map<RawDfa *, size_t> &dfa_mapping,
- vector<OutfixInfo> &outfixes,
- MergeFunctor merge_func) {
- DEBUG_PRINTF("begin merge of %zu dfas\n", dfas.size());
-
- vector<RawDfa *> out_dfas;
- vector<RawDfa *> chunk;
- for (auto it = begin(dfas), ite = end(dfas); it != ite; ++it) {
- chunk.push_back(*it);
- if (chunk.size() >= DFA_CHUNK_SIZE_MAX || next(it) == ite) {
- pairwiseDfaMerge(chunk, dfa_mapping, outfixes, merge_func);
- out_dfas.insert(end(out_dfas), begin(chunk), end(chunk));
- chunk.clear();
- }
- }
-
- // Remove null (merged) DFAs and update vector for subsequent use.
- out_dfas.erase(remove(out_dfas.begin(), out_dfas.end(), nullptr),
- out_dfas.end());
- dfas.swap(out_dfas);
- DEBUG_PRINTF("after merge there are %zu dfas\n", dfas.size());
-}
-
-static
-void mergeOutfixDfas(RoseBuildImpl &tbi, vector<raw_dfa *> &dfas) {
- DEBUG_PRINTF("merging %zu nfas\n", dfas.size());
- if (dfas.size() < 2) {
- return;
- }
-
- vector<OutfixInfo> &outfixes = tbi.outfixes;
-
- /* key is index into outfix array as iterators, etc may be invalidated by
- * element addition. */
+ vector<OutfixInfo> &outfixes,
+ MergeFunctor merge_func) {
+ DEBUG_PRINTF("begin merge of %zu dfas\n", dfas.size());
+
+ vector<RawDfa *> out_dfas;
+ vector<RawDfa *> chunk;
+ for (auto it = begin(dfas), ite = end(dfas); it != ite; ++it) {
+ chunk.push_back(*it);
+ if (chunk.size() >= DFA_CHUNK_SIZE_MAX || next(it) == ite) {
+ pairwiseDfaMerge(chunk, dfa_mapping, outfixes, merge_func);
+ out_dfas.insert(end(out_dfas), begin(chunk), end(chunk));
+ chunk.clear();
+ }
+ }
+
+ // Remove null (merged) DFAs and update vector for subsequent use.
+ out_dfas.erase(remove(out_dfas.begin(), out_dfas.end(), nullptr),
+ out_dfas.end());
+ dfas.swap(out_dfas);
+ DEBUG_PRINTF("after merge there are %zu dfas\n", dfas.size());
+}
+
+static
+void mergeOutfixDfas(RoseBuildImpl &tbi, vector<raw_dfa *> &dfas) {
+ DEBUG_PRINTF("merging %zu nfas\n", dfas.size());
+ if (dfas.size() < 2) {
+ return;
+ }
+
+ vector<OutfixInfo> &outfixes = tbi.outfixes;
+
+ /* key is index into outfix array as iterators, etc may be invalidated by
+ * element addition. */
unordered_map<raw_dfa *, size_t> dfa_mapping;
- for (size_t i = 0; i < outfixes.size(); i++) {
+ for (size_t i = 0; i < outfixes.size(); i++) {
auto *rdfa = outfixes[i].rdfa();
if (rdfa) {
dfa_mapping[rdfa] = i;
- }
- }
-
- chunkedDfaMerge(dfas, dfa_mapping, outfixes,
- MergeMcClellan(tbi.rm, tbi.cc.grey));
- removeDeadOutfixes(outfixes);
-}
-
-static
-void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm,
- const Grey &grey) {
- if (!grey.roseMcClellanOutfix) {
- return;
- }
-
- DEBUG_PRINTF("merge combo\n");
-
- bool seen_dfa = false;
- u32 nfa_count = 0;
- for (const auto &outfix : tbi.outfixes) {
+ }
+ }
+
+ chunkedDfaMerge(dfas, dfa_mapping, outfixes,
+ MergeMcClellan(tbi.rm, tbi.cc.grey));
+ removeDeadOutfixes(outfixes);
+}
+
+static
+void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm,
+ const Grey &grey) {
+ if (!grey.roseMcClellanOutfix) {
+ return;
+ }
+
+ DEBUG_PRINTF("merge combo\n");
+
+ bool seen_dfa = false;
+ u32 nfa_count = 0;
+ for (const auto &outfix : tbi.outfixes) {
if (outfix.holder()) {
- DEBUG_PRINTF("nfa\n");
- nfa_count++;
+ DEBUG_PRINTF("nfa\n");
+ nfa_count++;
} else if (outfix.rdfa()) {
- DEBUG_PRINTF("dfa\n");
- seen_dfa = true;
- }
- }
-
- DEBUG_PRINTF("nfa %u dfas present %d\n", nfa_count,
- (int)seen_dfa);
- if (!nfa_count || (nfa_count == 1 && !seen_dfa)) {
- DEBUG_PRINTF("no combo merges possible\n");
- return;
- }
-
- /* key is index into outfix array as iterators, etc may be invalidated by
- * element addition. */
- size_t new_dfas = 0;
+ DEBUG_PRINTF("dfa\n");
+ seen_dfa = true;
+ }
+ }
+
+ DEBUG_PRINTF("nfa %u dfas present %d\n", nfa_count,
+ (int)seen_dfa);
+ if (!nfa_count || (nfa_count == 1 && !seen_dfa)) {
+ DEBUG_PRINTF("no combo merges possible\n");
+ return;
+ }
+
+ /* key is index into outfix array as iterators, etc may be invalidated by
+ * element addition. */
+ size_t new_dfas = 0;
unordered_map<raw_dfa *, size_t> dfa_mapping;
- vector<raw_dfa *> dfas;
-
- for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) {
+ vector<raw_dfa *> dfas;
+
+ for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) {
auto &outfix = *it;
assert(!outfix.is_dead());
@@ -2544,75 +2544,75 @@ void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm,
auto *rdfa = outfix.rdfa();
dfas.push_back(rdfa);
dfa_mapping[rdfa] = it - tbi.outfixes.begin();
- continue;
- }
-
+ continue;
+ }
+
if (!outfix.holder()) {
- continue;
- }
-
+ continue;
+ }
+
NGHolder *h = outfix.holder();
- assert(h->kind == NFA_OUTFIX);
- auto rdfa = buildMcClellan(*h, &rm, grey);
- if (rdfa) {
- // Transform this outfix into a DFA and add it to the merge set.
- dfa_mapping[rdfa.get()] = it - tbi.outfixes.begin();
- dfas.push_back(rdfa.get());
+ assert(h->kind == NFA_OUTFIX);
+ auto rdfa = buildMcClellan(*h, &rm, grey);
+ if (rdfa) {
+ // Transform this outfix into a DFA and add it to the merge set.
+ dfa_mapping[rdfa.get()] = it - tbi.outfixes.begin();
+ dfas.push_back(rdfa.get());
outfix.proto = move(rdfa);
- new_dfas++;
- }
- }
-
- DEBUG_PRINTF("constructed %zu new dfas\n", new_dfas);
-
- if (!new_dfas) {
- /* assumes normal dfas have already been fully merged */
- return;
- }
-
- chunkedDfaMerge(dfas, dfa_mapping, tbi.outfixes,
- MergeMcClellan(tbi.rm, tbi.cc.grey));
- removeDeadOutfixes(tbi.outfixes);
-}
-
-static
-void mergeOutfixHaigs(RoseBuildImpl &tbi, vector<raw_som_dfa *> &dfas,
- u32 limit) {
- if (dfas.size() < 2) {
- return;
- }
-
- vector<OutfixInfo> &outfixes = tbi.outfixes;
-
+ new_dfas++;
+ }
+ }
+
+ DEBUG_PRINTF("constructed %zu new dfas\n", new_dfas);
+
+ if (!new_dfas) {
+ /* assumes normal dfas have already been fully merged */
+ return;
+ }
+
+ chunkedDfaMerge(dfas, dfa_mapping, tbi.outfixes,
+ MergeMcClellan(tbi.rm, tbi.cc.grey));
+ removeDeadOutfixes(tbi.outfixes);
+}
+
+static
+void mergeOutfixHaigs(RoseBuildImpl &tbi, vector<raw_som_dfa *> &dfas,
+ u32 limit) {
+ if (dfas.size() < 2) {
+ return;
+ }
+
+ vector<OutfixInfo> &outfixes = tbi.outfixes;
+
unordered_map<raw_som_dfa *, size_t> dfa_mapping;
- for (size_t i = 0; i < outfixes.size(); i++) {
+ for (size_t i = 0; i < outfixes.size(); i++) {
auto *haig = outfixes[i].haig();
if (haig) {
dfa_mapping[haig] = i;
- }
- }
-
- chunkedDfaMerge(dfas, dfa_mapping, outfixes, MergeHaig(limit));
- removeDeadOutfixes(outfixes);
-}
-
-/**
- * This pass attempts to merge outfix engines together. At this point in time,
- * the engine type (NFA, DFA, Haig) has already been decided for each outfix
- * and outfixes can only merged with others of their same type. NFAs are merged
- * in a priority order based on common prefix length. The other types are
- * merged blindly. Engines are merged to the extent that they can still be
- * implemented efficiently.
- */
-void mergeOutfixes(RoseBuildImpl &tbi) {
- if (!tbi.cc.grey.mergeOutfixes) {
- return;
- }
-
- vector<NGHolder *> nfas;
- vector<raw_dfa *> dfas;
- vector<raw_som_dfa *> som_dfas;
-
+ }
+ }
+
+ chunkedDfaMerge(dfas, dfa_mapping, outfixes, MergeHaig(limit));
+ removeDeadOutfixes(outfixes);
+}
+
+/**
+ * This pass attempts to merge outfix engines together. At this point in time,
+ * the engine type (NFA, DFA, Haig) has already been decided for each outfix
+ * and outfixes can only merged with others of their same type. NFAs are merged
+ * in a priority order based on common prefix length. The other types are
+ * merged blindly. Engines are merged to the extent that they can still be
+ * implemented efficiently.
+ */
+void mergeOutfixes(RoseBuildImpl &tbi) {
+ if (!tbi.cc.grey.mergeOutfixes) {
+ return;
+ }
+
+ vector<NGHolder *> nfas;
+ vector<raw_dfa *> dfas;
+ vector<raw_som_dfa *> som_dfas;
+
for (auto &outfix : tbi.outfixes) {
if (outfix.rdfa()) {
dfas.push_back(outfix.rdfa());
@@ -2620,199 +2620,199 @@ void mergeOutfixes(RoseBuildImpl &tbi) {
nfas.push_back(outfix.holder());
} else if (outfix.haig()) {
som_dfas.push_back(outfix.haig());
- }
- }
-
- DEBUG_PRINTF("merging %zu dfas, %zu nfas\n",
- dfas.size(), nfas.size());
-
- mergeOutfixNfas(tbi, nfas);
- mergeOutfixDfas(tbi, dfas);
- mergeOutfixHaigs(tbi, som_dfas, 255);
- mergeOutfixHaigs(tbi, som_dfas, 8192);
- mergeOutfixCombo(tbi, tbi.rm, tbi.cc.grey);
-}
-
-static
-u32 allowedSquashDistance(const CharReach &cr, u32 min_width,
- const RoseBuildImpl &tbi,
- RoseVertex tv) {
- CharReach accept_cr;
- DEBUG_PRINTF("hello |cr|=%zu\n", cr.count());
-
- const RoseGraph &g = tbi.g;
-
- /* TODO: inspect further back in the pattern */
- for (u32 lit_id : g[tv].literals) {
+ }
+ }
+
+ DEBUG_PRINTF("merging %zu dfas, %zu nfas\n",
+ dfas.size(), nfas.size());
+
+ mergeOutfixNfas(tbi, nfas);
+ mergeOutfixDfas(tbi, dfas);
+ mergeOutfixHaigs(tbi, som_dfas, 255);
+ mergeOutfixHaigs(tbi, som_dfas, 8192);
+ mergeOutfixCombo(tbi, tbi.rm, tbi.cc.grey);
+}
+
+static
+u32 allowedSquashDistance(const CharReach &cr, u32 min_width,
+ const RoseBuildImpl &tbi,
+ RoseVertex tv) {
+ CharReach accept_cr;
+ DEBUG_PRINTF("hello |cr|=%zu\n", cr.count());
+
+ const RoseGraph &g = tbi.g;
+
+ /* TODO: inspect further back in the pattern */
+ for (u32 lit_id : g[tv].literals) {
const rose_literal_id &lit = tbi.literals.at(lit_id);
- if (lit.delay) {
- return 0; /* TODO: better */
- }
- if (lit.table != ROSE_FLOATING && lit.table != ROSE_EOD_ANCHORED) {
- return 0;
- }
- assert(!lit.s.empty());
- accept_cr |= *lit.s.rbegin();
- }
-
- DEBUG_PRINTF("|accept_cr|=%zu\n", accept_cr.count());
-
- if ((accept_cr & cr).any()) {
- DEBUG_PRINTF("no squash\n");
- return 0; /* the accept byte doesn't always kill the puffette. TODO:
- * maybe if we look further back we could find something that
- * would kill the puffette... */
- }
-
- DEBUG_PRINTF("allowed to squash %u\n", min_width);
- return min_width;
-}
-
-void mergePuffixes(RoseBuildImpl &tbi) {
- DEBUG_PRINTF("entry\n");
-
- if (!tbi.cc.grey.mergeSuffixes) {
- return;
- }
-
- RoseGraph &g = tbi.g;
-
- for (auto v : vertices_range(g)) {
- shared_ptr<NGHolder> h = g[v].suffix.graph;
- if (!h) {
- continue;
- }
- assert(!g[v].suffix.haig);
- assert(!g[v].eod_accept);
-
- assert(onlyOneTop(*h)); /* we should not have merged yet */
- bool fixed_depth = g[v].min_offset == g[v].max_offset;
-
- if (!isPuffable(*h, fixed_depth, tbi.rm, tbi.cc.grey)) {
- continue;
- }
-
- PureRepeat repeat;
- if (!isPureRepeat(*h, repeat)) {
- assert(0);
- continue;
- }
-
- if (repeat.bounds.min == depth(0)) {
- assert(0); // No vacuous puffs allowed.
- continue;
- }
-
- assert(repeat.bounds.min.is_finite() &&
- repeat.bounds.max.is_reachable());
- assert(repeat.bounds.max == repeat.bounds.min ||
- repeat.bounds.max.is_infinite());
-
- const bool unbounded = repeat.bounds.max.is_infinite();
- const set<ReportID> reports = all_reports(*h);
- assert(reports.size() == 1);
- ReportID report = *reports.begin();
-
- DEBUG_PRINTF("got puffette candidate %u:%s\n", report,
- repeat.bounds.str().c_str());
-
- raw_puff rp(repeat.bounds.min, unbounded, report, repeat.reach);
-
- u32 queue;
- u32 event;
- tbi.addChainTail(rp, &queue, &event);
- u32 squashDistance =
- allowedSquashDistance(repeat.reach, repeat.bounds.min, tbi, v);
-
+ if (lit.delay) {
+ return 0; /* TODO: better */
+ }
+ if (lit.table != ROSE_FLOATING && lit.table != ROSE_EOD_ANCHORED) {
+ return 0;
+ }
+ assert(!lit.s.empty());
+ accept_cr |= *lit.s.rbegin();
+ }
+
+ DEBUG_PRINTF("|accept_cr|=%zu\n", accept_cr.count());
+
+ if ((accept_cr & cr).any()) {
+ DEBUG_PRINTF("no squash\n");
+ return 0; /* the accept byte doesn't always kill the puffette. TODO:
+ * maybe if we look further back we could find something that
+ * would kill the puffette... */
+ }
+
+ DEBUG_PRINTF("allowed to squash %u\n", min_width);
+ return min_width;
+}
+
+void mergePuffixes(RoseBuildImpl &tbi) {
+ DEBUG_PRINTF("entry\n");
+
+ if (!tbi.cc.grey.mergeSuffixes) {
+ return;
+ }
+
+ RoseGraph &g = tbi.g;
+
+ for (auto v : vertices_range(g)) {
+ shared_ptr<NGHolder> h = g[v].suffix.graph;
+ if (!h) {
+ continue;
+ }
+ assert(!g[v].suffix.haig);
+ assert(!g[v].eod_accept);
+
+ assert(onlyOneTop(*h)); /* we should not have merged yet */
+ bool fixed_depth = g[v].min_offset == g[v].max_offset;
+
+ if (!isPuffable(*h, fixed_depth, tbi.rm, tbi.cc.grey)) {
+ continue;
+ }
+
+ PureRepeat repeat;
+ if (!isPureRepeat(*h, repeat)) {
+ assert(0);
+ continue;
+ }
+
+ if (repeat.bounds.min == depth(0)) {
+ assert(0); // No vacuous puffs allowed.
+ continue;
+ }
+
+ assert(repeat.bounds.min.is_finite() &&
+ repeat.bounds.max.is_reachable());
+ assert(repeat.bounds.max == repeat.bounds.min ||
+ repeat.bounds.max.is_infinite());
+
+ const bool unbounded = repeat.bounds.max.is_infinite();
+ const set<ReportID> reports = all_reports(*h);
+ assert(reports.size() == 1);
+ ReportID report = *reports.begin();
+
+ DEBUG_PRINTF("got puffette candidate %u:%s\n", report,
+ repeat.bounds.str().c_str());
+
+ raw_puff rp(repeat.bounds.min, unbounded, report, repeat.reach);
+
+ u32 queue;
+ u32 event;
+ tbi.addChainTail(rp, &queue, &event);
+ u32 squashDistance =
+ allowedSquashDistance(repeat.reach, repeat.bounds.min, tbi, v);
+
Report ir = makeMpvTrigger(event, squashDistance);
- ReportID id = tbi.rm.getInternalId(ir);
-
- DEBUG_PRINTF("puffette event q%u t%u\n", queue, event);
- g[v].suffix.reset();
- g[v].reports.insert(id);
- }
-}
-
-static
-void updateCastleSuffix(RoseGraph &g, const shared_ptr<CastleProto> &m,
- u32 top, const vector<RoseVertex> &verts) {
+ ReportID id = tbi.rm.getInternalId(ir);
+
+ DEBUG_PRINTF("puffette event q%u t%u\n", queue, event);
+ g[v].suffix.reset();
+ g[v].reports.insert(id);
+ }
+}
+
+static
+void updateCastleSuffix(RoseGraph &g, const shared_ptr<CastleProto> &m,
+ u32 top, const vector<RoseVertex> &verts) {
DEBUG_PRINTF("merged in as top %u of %p, updating %zu vertices\n", top,
m.get(), verts.size());
-
- for (auto v : verts) {
- assert(g[v].suffix.castle);
- g[v].suffix.castle = m;
- g[v].suffix.top = top;
- }
-}
-
-static
+
+ for (auto v : verts) {
+ assert(g[v].suffix.castle);
+ g[v].suffix.castle = m;
+ g[v].suffix.top = top;
+ }
+}
+
+static
void mergeCastleSuffixChunk(RoseGraph &g, const vector<CastleProto *> &castles,
const unordered_map<CastleProto *, vector<RoseVertex>> &eng_verts) {
- if (castles.size() <= 1) {
- return;
- }
-
+ if (castles.size() <= 1) {
+ return;
+ }
+
DEBUG_PRINTF("merging reach %s, %zu elements\n",
describeClass(castles[0]->reach()).c_str(), castles.size());
-
+
CastleProto *m = nullptr;
-
+
for (CastleProto *c : castles) {
- assert(c->repeats.size() == 1); // Not yet merged.
+ assert(c->repeats.size() == 1); // Not yet merged.
assert(g[eng_verts.at(c).front()].suffix.castle.get() == c);
if (!m) {
m = c;
- continue;
- }
-
+ continue;
+ }
+
u32 top = m->merge(c->repeats[0]);
if (top == CastleProto::max_occupancy) {
- // No room left to merge into 'm'. This one becomes the new 'm'.
- DEBUG_PRINTF("next mergee\n");
- m = c;
+ // No room left to merge into 'm'. This one becomes the new 'm'.
+ DEBUG_PRINTF("next mergee\n");
+ m = c;
continue;
- }
+ }
updateCastleSuffix(g, g[eng_verts.at(m).front()].suffix.castle, top,
eng_verts.at(c));
DEBUG_PRINTF("added to %p, top %u\n", m, top);
- }
-}
-
+ }
+}
+
void mergeCastleSuffixes(RoseBuildImpl &build) {
- DEBUG_PRINTF("entry\n");
-
+ DEBUG_PRINTF("entry\n");
+
if (!build.cc.grey.allowCastle || !build.cc.grey.mergeSuffixes) {
- return;
- }
-
+ return;
+ }
+
unordered_map<CastleProto *, vector<RoseVertex>> eng_verts;
map<CharReach, vector<CastleProto *>> by_reach;
-
+
RoseGraph &g = build.g;
-
- for (auto v : vertices_range(g)) {
- if (!g[v].suffix.castle) {
- continue;
- }
-
+
+ for (auto v : vertices_range(g)) {
+ if (!g[v].suffix.castle) {
+ continue;
+ }
+
CastleProto *c = g[v].suffix.castle.get();
-
- if (c->repeats.size() != 1) {
- // This code assumes it's the only place merging is being done.
- assert(0);
- continue;
- }
-
+
+ if (c->repeats.size() != 1) {
+ // This code assumes it's the only place merging is being done.
+ assert(0);
+ continue;
+ }
+
if (!contains(eng_verts, c)) {
- by_reach[c->reach()].push_back(c);
- }
+ by_reach[c->reach()].push_back(c);
+ }
eng_verts[c].push_back(v);
- }
-
+ }
+
for (auto &chunk : by_reach | map_values) {
mergeCastleSuffixChunk(g, chunk, eng_verts);
- }
-}
-
-} // namespace ue2
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_merge.h b/contrib/libs/hyperscan/src/rose/rose_build_merge.h
index c0f0d65c8b..6de6c7786a 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_merge.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_merge.h
@@ -1,70 +1,70 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
* \brief Rose Build: functions for reducing the number of engines in a Rose
* graph through merging or deduplicating engines.
- */
-
-#ifndef ROSE_BUILD_MERGE_H
-#define ROSE_BUILD_MERGE_H
-
-#include "rose_graph.h"
-
-#include <deque>
-#include <set>
-
-namespace ue2 {
-
-class NGHolder;
-class RoseBuildImpl;
-
-bool dedupeLeftfixes(RoseBuildImpl &tbi);
-void mergeLeftfixesVariableLag(RoseBuildImpl &tbi);
-void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi);
-void dedupeSuffixes(RoseBuildImpl &tbi);
-
-void mergeAcyclicSuffixes(RoseBuildImpl &tbi);
-void mergeSmallSuffixes(RoseBuildImpl &tbi);
-void mergeSmallLeftfixes(RoseBuildImpl &tbi);
-void mergeCastleLeftfixes(RoseBuildImpl &tbi);
-void mergeOutfixes(RoseBuildImpl &tbi);
-void mergePuffixes(RoseBuildImpl &tbi);
-void mergeCastleSuffixes(RoseBuildImpl &tbi);
-
-bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
- RoseVertex v);
-bool mergeableRoseVertices(const RoseBuildImpl &tbi,
- const std::set<RoseVertex> &v1,
- const std::set<RoseVertex> &v2);
-bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
- const std::deque<RoseVertex> &verts1);
-
-} // namespace ue2
-
-#endif // ROSE_BUILD_MERGE_H
+ */
+
+#ifndef ROSE_BUILD_MERGE_H
+#define ROSE_BUILD_MERGE_H
+
+#include "rose_graph.h"
+
+#include <deque>
+#include <set>
+
+namespace ue2 {
+
+class NGHolder;
+class RoseBuildImpl;
+
+bool dedupeLeftfixes(RoseBuildImpl &tbi);
+void mergeLeftfixesVariableLag(RoseBuildImpl &tbi);
+void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi);
+void dedupeSuffixes(RoseBuildImpl &tbi);
+
+void mergeAcyclicSuffixes(RoseBuildImpl &tbi);
+void mergeSmallSuffixes(RoseBuildImpl &tbi);
+void mergeSmallLeftfixes(RoseBuildImpl &tbi);
+void mergeCastleLeftfixes(RoseBuildImpl &tbi);
+void mergeOutfixes(RoseBuildImpl &tbi);
+void mergePuffixes(RoseBuildImpl &tbi);
+void mergeCastleSuffixes(RoseBuildImpl &tbi);
+
+bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
+ RoseVertex v);
+bool mergeableRoseVertices(const RoseBuildImpl &tbi,
+ const std::set<RoseVertex> &v1,
+ const std::set<RoseVertex> &v2);
+bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
+ const std::deque<RoseVertex> &verts1);
+
+} // namespace ue2
+
+#endif // ROSE_BUILD_MERGE_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_misc.cpp b/contrib/libs/hyperscan/src/rose/rose_build_misc.cpp
index ca7a131910..0b0e689c99 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_misc.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_misc.cpp
@@ -1,297 +1,297 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
#include "rose_build_misc.h"
-#include "rose_build_impl.h"
-
+#include "rose_build_impl.h"
+
#include "rose_build_resources.h"
#include "hwlm/hwlm_literal.h"
-#include "nfa/castlecompile.h"
-#include "nfa/goughcompile.h"
-#include "nfa/mcclellancompile_util.h"
-#include "nfa/nfa_api.h"
-#include "nfa/rdfa.h"
+#include "nfa/castlecompile.h"
+#include "nfa/goughcompile.h"
+#include "nfa/mcclellancompile_util.h"
+#include "nfa/nfa_api.h"
+#include "nfa/rdfa.h"
#include "nfa/tamaramacompile.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_limex.h"
-#include "nfagraph/ng_reports.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_width.h"
-#include "smallwrite/smallwrite_build.h"
-#include "util/alloc.h"
-#include "util/boundary_reports.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-#include "util/make_unique.h"
-#include "util/order_check.h"
-#include "util/report_manager.h"
-#include "util/ue2string.h"
-#include "util/verify_types.h"
-#include "ue2common.h"
-#include "grey.h"
-
-#include <boost/graph/breadth_first_search.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
-// just to get it out of the header
-RoseBuild::~RoseBuild() { }
-
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_reports.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
+#include "smallwrite/smallwrite_build.h"
+#include "util/alloc.h"
+#include "util/boundary_reports.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+#include "util/make_unique.h"
+#include "util/order_check.h"
+#include "util/report_manager.h"
+#include "util/ue2string.h"
+#include "util/verify_types.h"
+#include "ue2common.h"
+#include "grey.h"
+
+#include <boost/graph/breadth_first_search.hpp>
+
+using namespace std;
+
+namespace ue2 {
+
+// just to get it out of the header
+RoseBuild::~RoseBuild() { }
+
RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in,
SomSlotManager &ssm_in,
SmallWriteBuild &smwr_in,
- const CompileContext &cc_in,
- const BoundaryReports &boundary_in)
- : cc(cc_in),
- root(add_vertex(g)),
- anchored_root(add_vertex(g)),
- hasSom(false),
- group_end(0),
- ematcher_region_size(0),
- eod_event_literal_id(MO_INVALID_IDX),
- max_rose_anchored_floating_overlap(0),
- rm(rm_in),
- ssm(ssm_in),
+ const CompileContext &cc_in,
+ const BoundaryReports &boundary_in)
+ : cc(cc_in),
+ root(add_vertex(g)),
+ anchored_root(add_vertex(g)),
+ hasSom(false),
+ group_end(0),
+ ematcher_region_size(0),
+ eod_event_literal_id(MO_INVALID_IDX),
+ max_rose_anchored_floating_overlap(0),
+ rm(rm_in),
+ ssm(ssm_in),
smwr(smwr_in),
- boundary(boundary_in),
- next_nfa_report(0) {
- // add root vertices to graph
- g[root].min_offset = 0;
- g[root].max_offset = 0;
-
- g[anchored_root].min_offset = 0;
- g[anchored_root].max_offset = 0;
-}
-
-RoseBuildImpl::~RoseBuildImpl() {
- // empty
-}
-
-bool RoseVertexProps::isBoring(void) const {
- return !suffix && !left;
-}
-
-bool RoseVertexProps::fixedOffset(void) const {
- assert(min_offset <= max_offset); /* ensure offsets calculated */
- return max_offset == min_offset && max_offset != ROSE_BOUND_INF;
-}
-
-bool RoseBuildImpl::isRootSuccessor(const RoseVertex &v) const {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (isAnyStart(u)) {
- return true;
- }
- }
- return false;
-}
-
-bool RoseBuildImpl::isNonRootSuccessor(const RoseVertex &v) const {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (!isAnyStart(u)) {
- return true;
- }
- }
- return false;
-}
-
-bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v) {
- for (const auto &e : out_edges_range(v, g)) {
- if (g[e].history == ROSE_ROLE_HISTORY_ANCH) {
- return true;
- }
- }
-
- return false;
-}
-
-bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v) {
- for (const auto &e : out_edges_range(v, g)) {
- if (g[e].history == ROSE_ROLE_HISTORY_LAST_BYTE) {
- return true;
- }
- }
-
- return false;
-}
-
-static
-bool isInTable(const RoseBuildImpl &tbi, RoseVertex v,
- rose_literal_table table) {
- const auto &lit_ids = tbi.g[v].literals;
- if (lit_ids.empty()) {
- return false; // special role with no literals
- }
-
- // All literals for a given vertex will be in the same table, so we need
- // only inspect the first one.
+ boundary(boundary_in),
+ next_nfa_report(0) {
+ // add root vertices to graph
+ g[root].min_offset = 0;
+ g[root].max_offset = 0;
+
+ g[anchored_root].min_offset = 0;
+ g[anchored_root].max_offset = 0;
+}
+
+RoseBuildImpl::~RoseBuildImpl() {
+ // empty
+}
+
+bool RoseVertexProps::isBoring(void) const {
+ return !suffix && !left;
+}
+
+bool RoseVertexProps::fixedOffset(void) const {
+ assert(min_offset <= max_offset); /* ensure offsets calculated */
+ return max_offset == min_offset && max_offset != ROSE_BOUND_INF;
+}
+
+bool RoseBuildImpl::isRootSuccessor(const RoseVertex &v) const {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (isAnyStart(u)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool RoseBuildImpl::isNonRootSuccessor(const RoseVertex &v) const {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (!isAnyStart(u)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v) {
+ for (const auto &e : out_edges_range(v, g)) {
+ if (g[e].history == ROSE_ROLE_HISTORY_ANCH) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v) {
+ for (const auto &e : out_edges_range(v, g)) {
+ if (g[e].history == ROSE_ROLE_HISTORY_LAST_BYTE) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static
+bool isInTable(const RoseBuildImpl &tbi, RoseVertex v,
+ rose_literal_table table) {
+ const auto &lit_ids = tbi.g[v].literals;
+ if (lit_ids.empty()) {
+ return false; // special role with no literals
+ }
+
+ // All literals for a given vertex will be in the same table, so we need
+ // only inspect the first one.
const auto lit_table = tbi.literals.at(*lit_ids.begin()).table;
-
- // Verify that all literals for this vertex are in the same table.
+
+ // Verify that all literals for this vertex are in the same table.
assert(all_of_in(lit_ids, [&](u32 lit_id) {
return tbi.literals.at(lit_id).table == lit_table;
}));
-
- return lit_table == table;
-}
-
-bool RoseBuildImpl::isAnchored(RoseVertex v) const {
- return isInTable(*this, v, ROSE_ANCHORED);
-}
-
-bool RoseBuildImpl::isFloating(RoseVertex v) const {
- return isInTable(*this, v, ROSE_FLOATING);
-}
-
-bool RoseBuildImpl::isInETable(RoseVertex v) const {
- return isInTable(*this, v, ROSE_EOD_ANCHORED);
-}
-
-bool RoseBuildImpl::hasLiteralInTable(RoseVertex v,
- enum rose_literal_table t) const {
- return isInTable(*this, v, t);
-}
-
-/* Indicates that the floating table (if it exists) will be only run
- conditionally based on matches from the anchored table. */
-bool RoseBuildImpl::hasNoFloatingRoots() const {
- for (auto v : adjacent_vertices_range(root, g)) {
- if (isFloating(v)) {
+
+ return lit_table == table;
+}
+
+bool RoseBuildImpl::isAnchored(RoseVertex v) const {
+ return isInTable(*this, v, ROSE_ANCHORED);
+}
+
+bool RoseBuildImpl::isFloating(RoseVertex v) const {
+ return isInTable(*this, v, ROSE_FLOATING);
+}
+
+bool RoseBuildImpl::isInETable(RoseVertex v) const {
+ return isInTable(*this, v, ROSE_EOD_ANCHORED);
+}
+
+bool RoseBuildImpl::hasLiteralInTable(RoseVertex v,
+ enum rose_literal_table t) const {
+ return isInTable(*this, v, t);
+}
+
+/* Indicates that the floating table (if it exists) will be only run
+ conditionally based on matches from the anchored table. */
+bool RoseBuildImpl::hasNoFloatingRoots() const {
+ for (auto v : adjacent_vertices_range(root, g)) {
+ if (isFloating(v)) {
DEBUG_PRINTF("direct floating root %zu\n", g[v].index);
- return false;
- }
- }
-
- /* need to check if the anchored_root has any literals which are too deep */
- for (auto v : adjacent_vertices_range(anchored_root, g)) {
- if (isFloating(v)) {
+ return false;
+ }
+ }
+
+ /* need to check if the anchored_root has any literals which are too deep */
+ for (auto v : adjacent_vertices_range(anchored_root, g)) {
+ if (isFloating(v)) {
DEBUG_PRINTF("indirect floating root %zu\n", g[v].index);
- return false;
- }
- }
-
- return true;
-}
-
-size_t RoseBuildImpl::maxLiteralLen(RoseVertex v) const {
- const auto &lit_ids = g[v].literals;
- assert(!lit_ids.empty());
-
- size_t maxlen = 0;
-
- for (const auto &lit_id : lit_ids) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+size_t RoseBuildImpl::maxLiteralLen(RoseVertex v) const {
+ const auto &lit_ids = g[v].literals;
+ assert(!lit_ids.empty());
+
+ size_t maxlen = 0;
+
+ for (const auto &lit_id : lit_ids) {
maxlen = max(maxlen, literals.at(lit_id).elength());
- }
-
- return maxlen;
-}
-
-size_t RoseBuildImpl::minLiteralLen(RoseVertex v) const {
- const auto &lit_ids = g[v].literals;
- assert(!lit_ids.empty());
-
- size_t minlen = ROSE_BOUND_INF;
-
- for (const auto &lit_id : lit_ids) {
+ }
+
+ return maxlen;
+}
+
+size_t RoseBuildImpl::minLiteralLen(RoseVertex v) const {
+ const auto &lit_ids = g[v].literals;
+ assert(!lit_ids.empty());
+
+ size_t minlen = ROSE_BOUND_INF;
+
+ for (const auto &lit_id : lit_ids) {
minlen = min(minlen, literals.at(lit_id).elength());
- }
-
- return minlen;
-}
-
-// RoseBuild factory
+ }
+
+ return minlen;
+}
+
+// RoseBuild factory
unique_ptr<RoseBuild> makeRoseBuilder(ReportManager &rm,
SomSlotManager &ssm,
SmallWriteBuild &smwr,
- const CompileContext &cc,
- const BoundaryReports &boundary) {
+ const CompileContext &cc,
+ const BoundaryReports &boundary) {
return ue2::make_unique<RoseBuildImpl>(rm, ssm, smwr, cc, boundary);
-}
-
-bool roseIsPureLiteral(const RoseEngine *t) {
- return t->runtimeImpl == ROSE_RUNTIME_PURE_LITERAL;
-}
-
-// Returns non-zero max overlap len if a suffix of the literal 'a' overlaps
-// with a prefix of the literal 'b' or 'a' can be contained in 'b'.
-size_t maxOverlap(const ue2_literal &a, const ue2_literal &b, u32 b_delay) {
- /* overly conservative if only part of the string is nocase */
- bool nocase = a.any_nocase() || b.any_nocase();
- DEBUG_PRINTF("max overlap %s %s+%u %d\n", dumpString(a).c_str(),
- dumpString(b).c_str(), b_delay, (int)nocase);
- size_t a_len = a.length();
- size_t b_len = b.length();
- const char *a_end = a.c_str() + a_len;
- const char *b_end = b.c_str() + b_len;
- if (b_delay >= a_len) {
- return b_len + b_delay;
- } else if (b_delay) {
- /* a can be a substring of b which overlaps some of the end dots
- * OR b can be a substring near the end of a */
- /* ignore overlap due to the final trailing dot as delayed literals
- * are delivered before undelayed */
- for (u32 j = b_delay - 1; j > 0; j--) {
- if (b_len + j >= a_len) {
- if (!cmp(a.c_str(), b_end + j - a_len, a_len - j, nocase)) {
- return b_len + j;
- }
- } else {
- if (!cmp(a_end - j - b_len, b.c_str(), b_len, nocase)) {
- return b_len + j;
- }
- }
- }
- }
-
- return maxStringOverlap(a.get_string(), b.get_string(), nocase);
-}
-
-// Returns non-zero max overlap len if a suffix of the literal ID 'a' overlaps
-// with a prefix of the literal ID 'b' or 'a' can be contained in 'b'.
-size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b) {
- assert(!a.delay);
- return maxOverlap(a.s, b.s, b.delay);
-}
-
-static
-const rose_literal_id &getOverlapLiteral(const RoseBuildImpl &tbi,
- u32 literal_id) {
+}
+
+bool roseIsPureLiteral(const RoseEngine *t) {
+ return t->runtimeImpl == ROSE_RUNTIME_PURE_LITERAL;
+}
+
+// Returns non-zero max overlap len if a suffix of the literal 'a' overlaps
+// with a prefix of the literal 'b' or 'a' can be contained in 'b'.
+size_t maxOverlap(const ue2_literal &a, const ue2_literal &b, u32 b_delay) {
+ /* overly conservative if only part of the string is nocase */
+ bool nocase = a.any_nocase() || b.any_nocase();
+ DEBUG_PRINTF("max overlap %s %s+%u %d\n", dumpString(a).c_str(),
+ dumpString(b).c_str(), b_delay, (int)nocase);
+ size_t a_len = a.length();
+ size_t b_len = b.length();
+ const char *a_end = a.c_str() + a_len;
+ const char *b_end = b.c_str() + b_len;
+ if (b_delay >= a_len) {
+ return b_len + b_delay;
+ } else if (b_delay) {
+ /* a can be a substring of b which overlaps some of the end dots
+ * OR b can be a substring near the end of a */
+ /* ignore overlap due to the final trailing dot as delayed literals
+ * are delivered before undelayed */
+ for (u32 j = b_delay - 1; j > 0; j--) {
+ if (b_len + j >= a_len) {
+ if (!cmp(a.c_str(), b_end + j - a_len, a_len - j, nocase)) {
+ return b_len + j;
+ }
+ } else {
+ if (!cmp(a_end - j - b_len, b.c_str(), b_len, nocase)) {
+ return b_len + j;
+ }
+ }
+ }
+ }
+
+ return maxStringOverlap(a.get_string(), b.get_string(), nocase);
+}
+
+// Returns non-zero max overlap len if a suffix of the literal ID 'a' overlaps
+// with a prefix of the literal ID 'b' or 'a' can be contained in 'b'.
+size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b) {
+ assert(!a.delay);
+ return maxOverlap(a.s, b.s, b.delay);
+}
+
+static
+const rose_literal_id &getOverlapLiteral(const RoseBuildImpl &tbi,
+ u32 literal_id) {
auto it = tbi.anchoredLitSuffix.find(literal_id);
- if (it != tbi.anchoredLitSuffix.end()) {
- return it->second;
- }
+ if (it != tbi.anchoredLitSuffix.end()) {
+ return it->second;
+ }
return tbi.literals.at(literal_id);
-}
-
+}
+
ue2_literal findNonOverlappingTail(const set<ue2_literal> &lits,
const ue2_literal &s) {
size_t max_overlap = 0;
@@ -309,236 +309,236 @@ ue2_literal findNonOverlappingTail(const set<ue2_literal> &lits,
return tail;
}
-size_t RoseBuildImpl::maxLiteralOverlap(RoseVertex u, RoseVertex v) const {
- size_t overlap = 0;
- for (auto u_lit_id : g[u].literals) {
- const rose_literal_id &ul = getOverlapLiteral(*this, u_lit_id);
- for (auto v_lit_id : g[v].literals) {
- const rose_literal_id &vl = getOverlapLiteral(*this, v_lit_id);
- overlap = max(overlap, maxOverlap(ul, vl));
- }
- }
- return overlap;
-}
-
-void RoseBuildImpl::removeVertices(const vector<RoseVertex> &dead) {
- for (auto v : dead) {
- assert(!isAnyStart(v));
+size_t RoseBuildImpl::maxLiteralOverlap(RoseVertex u, RoseVertex v) const {
+ size_t overlap = 0;
+ for (auto u_lit_id : g[u].literals) {
+ const rose_literal_id &ul = getOverlapLiteral(*this, u_lit_id);
+ for (auto v_lit_id : g[v].literals) {
+ const rose_literal_id &vl = getOverlapLiteral(*this, v_lit_id);
+ overlap = max(overlap, maxOverlap(ul, vl));
+ }
+ }
+ return overlap;
+}
+
+void RoseBuildImpl::removeVertices(const vector<RoseVertex> &dead) {
+ for (auto v : dead) {
+ assert(!isAnyStart(v));
DEBUG_PRINTF("removing vertex %zu\n", g[v].index);
- for (auto lit_id : g[v].literals) {
- literal_info[lit_id].vertices.erase(v);
- }
+ for (auto lit_id : g[v].literals) {
+ literal_info[lit_id].vertices.erase(v);
+ }
clear_vertex(v, g);
- remove_vertex(v, g);
- }
+ remove_vertex(v, g);
+ }
renumber_vertices(g);
-}
-
-// Find the maximum bound on the edges to this vertex's successors ignoring
-// those via infixes.
-u32 RoseBuildImpl::calcSuccMaxBound(RoseVertex u) const {
- u32 maxBound = 0;
- for (const auto &e : out_edges_range(u, g)) {
- RoseVertex v = target(e, g);
-
- if (g[v].left) {
- continue;
- }
-
- u32 thisBound = g[e].maxBound;
-
- if (thisBound == ROSE_BOUND_INF) {
- return ROSE_BOUND_INF;
- }
-
- if (!g[v].eod_accept) {
- // Add the length of the longest of our literals.
- thisBound += maxLiteralLen(v);
- }
-
- maxBound = max(maxBound, thisBound);
- }
-
- assert(maxBound <= ROSE_BOUND_INF);
- return maxBound;
-}
-
-u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, u32 delay,
- rose_literal_table table) {
+}
+
+// Find the maximum bound on the edges to this vertex's successors ignoring
+// those via infixes.
+u32 RoseBuildImpl::calcSuccMaxBound(RoseVertex u) const {
+ u32 maxBound = 0;
+ for (const auto &e : out_edges_range(u, g)) {
+ RoseVertex v = target(e, g);
+
+ if (g[v].left) {
+ continue;
+ }
+
+ u32 thisBound = g[e].maxBound;
+
+ if (thisBound == ROSE_BOUND_INF) {
+ return ROSE_BOUND_INF;
+ }
+
+ if (!g[v].eod_accept) {
+ // Add the length of the longest of our literals.
+ thisBound += maxLiteralLen(v);
+ }
+
+ maxBound = max(maxBound, thisBound);
+ }
+
+ assert(maxBound <= ROSE_BOUND_INF);
+ return maxBound;
+}
+
+u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, u32 delay,
+ rose_literal_table table) {
DEBUG_PRINTF("getting id for %s in table %d\n", dumpString(s).c_str(),
table);
- assert(table != ROSE_ANCHORED);
- rose_literal_id key(s, table, delay);
-
+ assert(table != ROSE_ANCHORED);
+ rose_literal_id key(s, table, delay);
+
auto m = literals.insert(key);
u32 id = m.first;
bool inserted = m.second;
-
- if (inserted) {
- literal_info.push_back(rose_literal_info());
- assert(literal_info.size() == id + 1);
-
- if (delay) {
- u32 undelayed_id = getLiteralId(s, 0, table);
- literal_info[id].undelayed_id = undelayed_id;
- literal_info[undelayed_id].delayed_ids.insert(id);
- } else {
- literal_info[id].undelayed_id = id;
- }
- }
- return id;
-}
-
-// Function that operates on a msk/cmp pair and a literal, as used in
-// hwlmLiteral, and zeroes msk elements that don't add any power to the
-// literal.
-void normaliseLiteralMask(const ue2_literal &s_in, vector<u8> &msk,
- vector<u8> &cmp) {
- assert(msk.size() == cmp.size());
- assert(msk.size() <= HWLM_MASKLEN);
-
- if (msk.empty()) {
- return;
- }
-
- // Work over a caseless copy if the string contains nocase chars. This will
- // ensure that we treat masks designed to handle mixed-sensitivity literals
- // correctly: these will be matched by the literal matcher in caseless
- // mode, with the mask used to narrow the matches.
- ue2_literal s(s_in);
- if (s.any_nocase()) {
- make_nocase(&s);
- }
-
- ue2_literal::const_reverse_iterator it = s.rbegin(), ite = s.rend();
- size_t i = msk.size();
- while (i-- != 0 && it != ite) {
- const CharReach &cr = *it;
- for (size_t c = cr.find_first(); c != CharReach::npos;
- c = cr.find_next(c)) {
- if (((u8)c & msk[i]) != cmp[i]) {
- goto skip;
- }
- }
-
- // If we didn't jump out of the loop to skip, then this mask position
- // doesn't further narrow the set of acceptable literals from those
- // accepted by s. So we can zero this element.
- msk[i] = 0;
- cmp[i] = 0;
- skip:
- ++it;
- }
-
- // Wipe out prefix zeroes.
- while (!msk.empty() && msk[0] == 0) {
- msk.erase(msk.begin());
- cmp.erase(cmp.begin());
- }
-}
-
-rose_literal_id::rose_literal_id(const ue2_literal &s_in,
- const vector<u8> &msk_in, const vector<u8> &cmp_in,
- rose_literal_table table_in, u32 delay_in)
- : s(s_in), msk(msk_in), cmp(cmp_in), table(table_in),
- delay(delay_in), distinctiveness(0) {
- assert(msk.size() == cmp.size());
- assert(msk.size() <= HWLM_MASKLEN);
- assert(delay <= MAX_DELAY);
-
- normaliseLiteralMask(s, msk, cmp);
-}
-
-u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, const vector<u8> &msk,
- const vector<u8> &cmp, u32 delay,
- rose_literal_table table) {
+
+ if (inserted) {
+ literal_info.push_back(rose_literal_info());
+ assert(literal_info.size() == id + 1);
+
+ if (delay) {
+ u32 undelayed_id = getLiteralId(s, 0, table);
+ literal_info[id].undelayed_id = undelayed_id;
+ literal_info[undelayed_id].delayed_ids.insert(id);
+ } else {
+ literal_info[id].undelayed_id = id;
+ }
+ }
+ return id;
+}
+
+// Function that operates on a msk/cmp pair and a literal, as used in
+// hwlmLiteral, and zeroes msk elements that don't add any power to the
+// literal.
+void normaliseLiteralMask(const ue2_literal &s_in, vector<u8> &msk,
+ vector<u8> &cmp) {
+ assert(msk.size() == cmp.size());
+ assert(msk.size() <= HWLM_MASKLEN);
+
+ if (msk.empty()) {
+ return;
+ }
+
+ // Work over a caseless copy if the string contains nocase chars. This will
+ // ensure that we treat masks designed to handle mixed-sensitivity literals
+ // correctly: these will be matched by the literal matcher in caseless
+ // mode, with the mask used to narrow the matches.
+ ue2_literal s(s_in);
+ if (s.any_nocase()) {
+ make_nocase(&s);
+ }
+
+ ue2_literal::const_reverse_iterator it = s.rbegin(), ite = s.rend();
+ size_t i = msk.size();
+ while (i-- != 0 && it != ite) {
+ const CharReach &cr = *it;
+ for (size_t c = cr.find_first(); c != CharReach::npos;
+ c = cr.find_next(c)) {
+ if (((u8)c & msk[i]) != cmp[i]) {
+ goto skip;
+ }
+ }
+
+ // If we didn't jump out of the loop to skip, then this mask position
+ // doesn't further narrow the set of acceptable literals from those
+ // accepted by s. So we can zero this element.
+ msk[i] = 0;
+ cmp[i] = 0;
+ skip:
+ ++it;
+ }
+
+ // Wipe out prefix zeroes.
+ while (!msk.empty() && msk[0] == 0) {
+ msk.erase(msk.begin());
+ cmp.erase(cmp.begin());
+ }
+}
+
+rose_literal_id::rose_literal_id(const ue2_literal &s_in,
+ const vector<u8> &msk_in, const vector<u8> &cmp_in,
+ rose_literal_table table_in, u32 delay_in)
+ : s(s_in), msk(msk_in), cmp(cmp_in), table(table_in),
+ delay(delay_in), distinctiveness(0) {
+ assert(msk.size() == cmp.size());
+ assert(msk.size() <= HWLM_MASKLEN);
+ assert(delay <= MAX_DELAY);
+
+ normaliseLiteralMask(s, msk, cmp);
+}
+
+u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, const vector<u8> &msk,
+ const vector<u8> &cmp, u32 delay,
+ rose_literal_table table) {
DEBUG_PRINTF("getting id for %s in table %d\n", dumpString(s).c_str(),
table);
- assert(table != ROSE_ANCHORED);
- rose_literal_id key(s, msk, cmp, table, delay);
-
- /* ue2_literals are always uppercased if nocase and must have an
- * alpha char */
-
+ assert(table != ROSE_ANCHORED);
+ rose_literal_id key(s, msk, cmp, table, delay);
+
+ /* ue2_literals are always uppercased if nocase and must have an
+ * alpha char */
+
auto m = literals.insert(key);
u32 id = m.first;
bool inserted = m.second;
-
- if (inserted) {
- literal_info.push_back(rose_literal_info());
- assert(literal_info.size() == id + 1);
-
- if (delay) {
- u32 undelayed_id = getLiteralId(s, msk, cmp, 0, table);
- literal_info[id].undelayed_id = undelayed_id;
- literal_info[undelayed_id].delayed_ids.insert(id);
- } else {
- literal_info[id].undelayed_id = id;
- }
- }
- return id;
-}
-
-u32 RoseBuildImpl::getNewLiteralId() {
- rose_literal_id key(ue2_literal(), ROSE_ANCHORED, 0);
+
+ if (inserted) {
+ literal_info.push_back(rose_literal_info());
+ assert(literal_info.size() == id + 1);
+
+ if (delay) {
+ u32 undelayed_id = getLiteralId(s, msk, cmp, 0, table);
+ literal_info[id].undelayed_id = undelayed_id;
+ literal_info[undelayed_id].delayed_ids.insert(id);
+ } else {
+ literal_info[id].undelayed_id = id;
+ }
+ }
+ return id;
+}
+
+u32 RoseBuildImpl::getNewLiteralId() {
+ rose_literal_id key(ue2_literal(), ROSE_ANCHORED, 0);
u32 numLiterals = verify_u32(literals.size());
- key.distinctiveness = numLiterals;
-
+ key.distinctiveness = numLiterals;
+
auto m = literals.insert(key);
assert(m.second);
u32 id = m.first;
-
- literal_info.push_back(rose_literal_info());
- assert(literal_info.size() == id + 1);
-
- literal_info[id].undelayed_id = id;
-
- return id;
-}
-
-bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b) {
- ORDER_CHECK(minBound);
- ORDER_CHECK(maxBound);
- ORDER_CHECK(history);
- return false;
-}
-
-#ifndef NDEBUG
+
+ literal_info.push_back(rose_literal_info());
+ assert(literal_info.size() == id + 1);
+
+ literal_info[id].undelayed_id = id;
+
+ return id;
+}
+
+bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b) {
+ ORDER_CHECK(minBound);
+ ORDER_CHECK(maxBound);
+ ORDER_CHECK(history);
+ return false;
+}
+
+#ifndef NDEBUG
bool roseHasTops(const RoseBuildImpl &build, RoseVertex v) {
const RoseGraph &g = build.g;
- assert(g[v].left);
-
- set<u32> graph_tops;
+ assert(g[v].left);
+
+ set<u32> graph_tops;
if (!build.isRootSuccessor(v)) {
for (const auto &e : in_edges_range(v, g)) {
graph_tops.insert(g[e].rose_top);
}
- }
-
- return is_subset_of(graph_tops, all_tops(g[v].left));
-}
-#endif
-
-u32 OutfixInfo::get_queue(QueueIndexFactory &qif) {
- if (queue == ~0U) {
- queue = qif.get_queue();
- }
-
- return queue;
-}
-
+ }
+
+ return is_subset_of(graph_tops, all_tops(g[v].left));
+}
+#endif
+
+u32 OutfixInfo::get_queue(QueueIndexFactory &qif) {
+ if (queue == ~0U) {
+ queue = qif.get_queue();
+ }
+
+ return queue;
+}
+
namespace {
class OutfixAllReports : public boost::static_visitor<set<ReportID>> {
public:
set<ReportID> operator()(const boost::blank &) const {
return set<ReportID>();
- }
+ }
template<class T>
set<ReportID> operator()(const unique_ptr<T> &x) const {
return all_reports(*x);
- }
-
+ }
+
set<ReportID> operator()(const MpvProto &mpv) const {
set<ReportID> reports;
for (const auto &puff : mpv.puffettes) {
@@ -548,160 +548,160 @@ public:
reports.insert(puff.report);
}
return reports;
- }
+ }
};
}
-
+
set<ReportID> all_reports(const OutfixInfo &outfix) {
auto reports = boost::apply_visitor(OutfixAllReports(), outfix.proto);
- assert(!reports.empty());
- return reports;
-}
-
-bool RoseSuffixInfo::operator==(const RoseSuffixInfo &b) const {
- return top == b.top && graph == b.graph && castle == b.castle &&
+ assert(!reports.empty());
+ return reports;
+}
+
+bool RoseSuffixInfo::operator==(const RoseSuffixInfo &b) const {
+ return top == b.top && graph == b.graph && castle == b.castle &&
rdfa == b.rdfa && haig == b.haig && tamarama == b.tamarama;
-}
-
-bool RoseSuffixInfo::operator<(const RoseSuffixInfo &b) const {
- const RoseSuffixInfo &a = *this;
- ORDER_CHECK(top);
- ORDER_CHECK(graph);
- ORDER_CHECK(castle);
- ORDER_CHECK(haig);
- ORDER_CHECK(rdfa);
+}
+
+bool RoseSuffixInfo::operator<(const RoseSuffixInfo &b) const {
+ const RoseSuffixInfo &a = *this;
+ ORDER_CHECK(top);
+ ORDER_CHECK(graph);
+ ORDER_CHECK(castle);
+ ORDER_CHECK(haig);
+ ORDER_CHECK(rdfa);
ORDER_CHECK(tamarama);
- assert(a.dfa_min_width == b.dfa_min_width);
- assert(a.dfa_max_width == b.dfa_max_width);
- return false;
-}
-
+ assert(a.dfa_min_width == b.dfa_min_width);
+ assert(a.dfa_max_width == b.dfa_max_width);
+ return false;
+}
+
size_t RoseSuffixInfo::hash() const {
return hash_all(top, graph, castle, rdfa, haig, tamarama);
}
-
-void RoseSuffixInfo::reset(void) {
- top = 0;
- graph.reset();
- castle.reset();
- rdfa.reset();
- haig.reset();
+
+void RoseSuffixInfo::reset(void) {
+ top = 0;
+ graph.reset();
+ castle.reset();
+ rdfa.reset();
+ haig.reset();
tamarama.reset();
dfa_min_width = depth(0);
- dfa_max_width = depth::infinity();
-}
-
-std::set<ReportID> all_reports(const suffix_id &s) {
- assert(s.graph() || s.castle() || s.haig() || s.dfa());
+ dfa_max_width = depth::infinity();
+}
+
+std::set<ReportID> all_reports(const suffix_id &s) {
+ assert(s.graph() || s.castle() || s.haig() || s.dfa());
if (s.tamarama()) {
return all_reports(*s.tamarama());
} else if (s.graph()) {
- return all_reports(*s.graph());
- } else if (s.castle()) {
- return all_reports(*s.castle());
- } else if (s.dfa()) {
- return all_reports(*s.dfa());
- } else {
- return all_reports(*s.haig());
- }
-}
-
-depth findMinWidth(const suffix_id &s) {
- assert(s.graph() || s.castle() || s.haig() || s.dfa());
- if (s.graph()) {
- return findMinWidth(*s.graph());
- } else if (s.castle()) {
- return findMinWidth(*s.castle());
- } else {
- return s.dfa_min_width;
- }
-}
-
-depth findMinWidth(const suffix_id &s, u32 top) {
- assert(s.graph() || s.castle() || s.haig() || s.dfa());
- if (s.graph()) {
- return findMinWidth(*s.graph(), top);
- } else if (s.castle()) {
- return findMinWidth(*s.castle(), top);
- } else {
- return s.dfa_min_width;
- }
-}
-
-depth findMaxWidth(const suffix_id &s) {
- assert(s.graph() || s.castle() || s.haig() || s.dfa());
- if (s.graph()) {
- return findMaxWidth(*s.graph());
- } else if (s.castle()) {
- return findMaxWidth(*s.castle());
- } else {
- return s.dfa_max_width;
- }
-}
-
-depth findMaxWidth(const suffix_id &s, u32 top) {
- assert(s.graph() || s.castle() || s.haig() || s.dfa());
- if (s.graph()) {
- return findMaxWidth(*s.graph(), top);
- } else if (s.castle()) {
- return findMaxWidth(*s.castle(), top);
- } else {
- return s.dfa_max_width;
- }
-}
-
-bool has_eod_accepts(const suffix_id &s) {
- assert(s.graph() || s.castle() || s.haig() || s.dfa());
- if (s.graph()) {
- /* ignore accept -> eod edge */
- return in_degree(s.graph()->acceptEod, *s.graph()) > 1;
- } else if (s.castle()) {
- return false;
- } else if (s.dfa()) {
- return has_eod_accepts(*s.dfa());
- } else {
- return has_eod_accepts(*s.haig());
- }
-}
-
-bool has_non_eod_accepts(const suffix_id &s) {
- assert(s.graph() || s.castle() || s.haig() || s.dfa());
- if (s.graph()) {
- return in_degree(s.graph()->accept, *s.graph());
- } else if (s.castle()) {
- return true;
- } else if (s.dfa()) {
- return has_non_eod_accepts(*s.dfa());
- } else {
- return has_non_eod_accepts(*s.haig());
- }
-}
-
-set<u32> all_tops(const suffix_id &s) {
- assert(s.graph() || s.castle() || s.haig() || s.dfa());
- if (s.graph()) {
+ return all_reports(*s.graph());
+ } else if (s.castle()) {
+ return all_reports(*s.castle());
+ } else if (s.dfa()) {
+ return all_reports(*s.dfa());
+ } else {
+ return all_reports(*s.haig());
+ }
+}
+
+depth findMinWidth(const suffix_id &s) {
+ assert(s.graph() || s.castle() || s.haig() || s.dfa());
+ if (s.graph()) {
+ return findMinWidth(*s.graph());
+ } else if (s.castle()) {
+ return findMinWidth(*s.castle());
+ } else {
+ return s.dfa_min_width;
+ }
+}
+
+depth findMinWidth(const suffix_id &s, u32 top) {
+ assert(s.graph() || s.castle() || s.haig() || s.dfa());
+ if (s.graph()) {
+ return findMinWidth(*s.graph(), top);
+ } else if (s.castle()) {
+ return findMinWidth(*s.castle(), top);
+ } else {
+ return s.dfa_min_width;
+ }
+}
+
+depth findMaxWidth(const suffix_id &s) {
+ assert(s.graph() || s.castle() || s.haig() || s.dfa());
+ if (s.graph()) {
+ return findMaxWidth(*s.graph());
+ } else if (s.castle()) {
+ return findMaxWidth(*s.castle());
+ } else {
+ return s.dfa_max_width;
+ }
+}
+
+depth findMaxWidth(const suffix_id &s, u32 top) {
+ assert(s.graph() || s.castle() || s.haig() || s.dfa());
+ if (s.graph()) {
+ return findMaxWidth(*s.graph(), top);
+ } else if (s.castle()) {
+ return findMaxWidth(*s.castle(), top);
+ } else {
+ return s.dfa_max_width;
+ }
+}
+
+bool has_eod_accepts(const suffix_id &s) {
+ assert(s.graph() || s.castle() || s.haig() || s.dfa());
+ if (s.graph()) {
+ /* ignore accept -> eod edge */
+ return in_degree(s.graph()->acceptEod, *s.graph()) > 1;
+ } else if (s.castle()) {
+ return false;
+ } else if (s.dfa()) {
+ return has_eod_accepts(*s.dfa());
+ } else {
+ return has_eod_accepts(*s.haig());
+ }
+}
+
+bool has_non_eod_accepts(const suffix_id &s) {
+ assert(s.graph() || s.castle() || s.haig() || s.dfa());
+ if (s.graph()) {
+ return in_degree(s.graph()->accept, *s.graph());
+ } else if (s.castle()) {
+ return true;
+ } else if (s.dfa()) {
+ return has_non_eod_accepts(*s.dfa());
+ } else {
+ return has_non_eod_accepts(*s.haig());
+ }
+}
+
+set<u32> all_tops(const suffix_id &s) {
+ assert(s.graph() || s.castle() || s.haig() || s.dfa());
+ if (s.graph()) {
flat_set<u32> tops = getTops(*s.graph());
assert(!tops.empty());
return {tops.begin(), tops.end()};
- }
-
- if (s.castle()) {
- return assoc_keys(s.castle()->repeats);
- }
-
- // Other types of suffix are not multi-top.
- return {0};
-}
-
-size_t suffix_id::hash() const {
+ }
+
+ if (s.castle()) {
+ return assoc_keys(s.castle()->repeats);
+ }
+
+ // Other types of suffix are not multi-top.
+ return {0};
+}
+
+size_t suffix_id::hash() const {
return hash_all(g, c, d, h, t);
-}
-
-bool isAnchored(const left_id &r) {
- assert(r.graph() || r.castle() || r.haig() || r.dfa());
- if (r.graph()) {
- return isAnchored(*r.graph());
- }
+}
+
+bool isAnchored(const left_id &r) {
+ assert(r.graph() || r.castle() || r.haig() || r.dfa());
+ if (r.graph()) {
+ return isAnchored(*r.graph());
+ }
if (r.dfa()) {
return r.dfa()->start_anchored == DEAD_STATE;
}
@@ -709,47 +709,47 @@ bool isAnchored(const left_id &r) {
return r.haig()->start_anchored == DEAD_STATE;
}
- // All other types are explicitly anchored.
- return true;
-}
-
-depth findMinWidth(const left_id &r) {
- assert(r.graph() || r.castle() || r.haig() || r.dfa());
- if (r.graph()) {
- return findMinWidth(*r.graph());
- } else if (r.castle()) {
- return findMinWidth(*r.castle());
- } else {
- return r.dfa_min_width;
- }
-}
-
-depth findMaxWidth(const left_id &r) {
- assert(r.graph() || r.castle() || r.haig() || r.dfa());
- if (r.graph()) {
- return findMaxWidth(*r.graph());
- } else if (r.castle()) {
- return findMaxWidth(*r.castle());
- } else {
- return r.dfa_max_width;
- }
-}
-
-set<u32> all_tops(const left_id &r) {
- assert(r.graph() || r.castle() || r.haig() || r.dfa());
- if (r.graph()) {
+ // All other types are explicitly anchored.
+ return true;
+}
+
+depth findMinWidth(const left_id &r) {
+ assert(r.graph() || r.castle() || r.haig() || r.dfa());
+ if (r.graph()) {
+ return findMinWidth(*r.graph());
+ } else if (r.castle()) {
+ return findMinWidth(*r.castle());
+ } else {
+ return r.dfa_min_width;
+ }
+}
+
+depth findMaxWidth(const left_id &r) {
+ assert(r.graph() || r.castle() || r.haig() || r.dfa());
+ if (r.graph()) {
+ return findMaxWidth(*r.graph());
+ } else if (r.castle()) {
+ return findMaxWidth(*r.castle());
+ } else {
+ return r.dfa_max_width;
+ }
+}
+
+set<u32> all_tops(const left_id &r) {
+ assert(r.graph() || r.castle() || r.haig() || r.dfa());
+ if (r.graph()) {
flat_set<u32> tops = getTops(*r.graph());
return {tops.begin(), tops.end()};
- }
-
- if (r.castle()) {
- return assoc_keys(r.castle()->repeats);
- }
-
- // Other types of rose are not multi-top.
- return {0};
-}
-
+ }
+
+ if (r.castle()) {
+ return assoc_keys(r.castle()->repeats);
+ }
+
+ // Other types of rose are not multi-top.
+ return {0};
+}
+
set<u32> all_reports(const left_id &left) {
assert(left.graph() || left.castle() || left.haig() || left.dfa());
if (left.graph()) {
@@ -763,142 +763,142 @@ set<u32> all_reports(const left_id &left) {
}
}
-u32 num_tops(const left_id &r) {
- return all_tops(r).size();
-}
-
-size_t left_id::hash() const {
+u32 num_tops(const left_id &r) {
+ return all_tops(r).size();
+}
+
+size_t left_id::hash() const {
return hash_all(g, c, d, h);
-}
-
-u64a findMaxOffset(const set<ReportID> &reports, const ReportManager &rm) {
- assert(!reports.empty());
- u64a maxOffset = 0;
- for (const auto &report_id : reports) {
- const Report &ir = rm.getReport(report_id);
- if (ir.hasBounds()) {
- maxOffset = max(maxOffset, ir.maxOffset);
- } else {
- return MAX_OFFSET;
- }
- }
- return maxOffset;
-}
-
+}
+
+u64a findMaxOffset(const set<ReportID> &reports, const ReportManager &rm) {
+ assert(!reports.empty());
+ u64a maxOffset = 0;
+ for (const auto &report_id : reports) {
+ const Report &ir = rm.getReport(report_id);
+ if (ir.hasBounds()) {
+ maxOffset = max(maxOffset, ir.maxOffset);
+ } else {
+ return MAX_OFFSET;
+ }
+ }
+ return maxOffset;
+}
+
size_t LeftEngInfo::hash() const {
return hash_all(graph, castle, dfa, haig, tamarama, lag, leftfix_report);
}
-void LeftEngInfo::reset(void) {
- graph.reset();
- castle.reset();
- dfa.reset();
- haig.reset();
+void LeftEngInfo::reset(void) {
+ graph.reset();
+ castle.reset();
+ dfa.reset();
+ haig.reset();
tamarama.reset();
- lag = 0;
- leftfix_report = MO_INVALID_IDX;
+ lag = 0;
+ leftfix_report = MO_INVALID_IDX;
dfa_min_width = depth(0);
- dfa_max_width = depth::infinity();
-}
-
-LeftEngInfo::operator bool() const {
- assert((int)!!castle + (int)!!dfa + (int)!!haig <= 1);
- assert(!castle || !graph);
- assert(!dfa || graph); /* dfas always have the graph as well */
- assert(!haig || graph);
- return graph || castle || dfa || haig;
-}
-
+ dfa_max_width = depth::infinity();
+}
+
+LeftEngInfo::operator bool() const {
+ assert((int)!!castle + (int)!!dfa + (int)!!haig <= 1);
+ assert(!castle || !graph);
+ assert(!dfa || graph); /* dfas always have the graph as well */
+ assert(!haig || graph);
+ return graph || castle || dfa || haig;
+}
+
u32 roseQuality(const RoseResources &res, const RoseEngine *t) {
- /* Rose is low quality if the atable is a Mcclellan 16 or has multiple DFAs
- */
+ /* Rose is low quality if the atable is a Mcclellan 16 or has multiple DFAs
+ */
if (res.has_anchored) {
if (res.has_anchored_multiple) {
- DEBUG_PRINTF("multiple atable engines\n");
- return 0;
- }
-
+ DEBUG_PRINTF("multiple atable engines\n");
+ return 0;
+ }
+
if (res.has_anchored_large) {
- DEBUG_PRINTF("m16 atable engine\n");
- return 0;
- }
- }
-
- /* if we always run multiple engines then we are slow */
- u32 always_run = 0;
-
+ DEBUG_PRINTF("m16 atable engine\n");
+ return 0;
+ }
+ }
+
+ /* if we always run multiple engines then we are slow */
+ u32 always_run = 0;
+
if (res.has_anchored) {
- always_run++;
- }
-
+ always_run++;
+ }
+
if (t->eagerIterOffset) {
/* eager prefixes are always run */
always_run++;
}
if (res.has_floating) {
- /* TODO: ignore conditional ftables, or ftables beyond smwr region */
- always_run++;
- }
-
- if (t->ematcherOffset) {
- always_run++;
- }
-
- /* ignore mpv outfixes as they are v good, mpv outfixes are before begin */
- if (t->outfixBeginQueue != t->outfixEndQueue) {
- /* TODO: ignore outfixes > smwr region */
- always_run++;
- }
-
- bool eod_prefix = false;
-
- const LeftNfaInfo *left = getLeftTable(t);
- for (u32 i = 0; i < t->activeLeftCount; i++) {
- if (left->eod_check) {
- eod_prefix = true;
- break;
- }
- }
-
- if (eod_prefix) {
- always_run++;
- DEBUG_PRINTF("eod prefixes are slow");
- return 0;
- }
-
- if (always_run > 1) {
- DEBUG_PRINTF("we always run %u engines\n", always_run);
- return 0;
- }
-
- return 1;
-}
-
+ /* TODO: ignore conditional ftables, or ftables beyond smwr region */
+ always_run++;
+ }
+
+ if (t->ematcherOffset) {
+ always_run++;
+ }
+
+ /* ignore mpv outfixes as they are v good, mpv outfixes are before begin */
+ if (t->outfixBeginQueue != t->outfixEndQueue) {
+ /* TODO: ignore outfixes > smwr region */
+ always_run++;
+ }
+
+ bool eod_prefix = false;
+
+ const LeftNfaInfo *left = getLeftTable(t);
+ for (u32 i = 0; i < t->activeLeftCount; i++) {
+ if (left->eod_check) {
+ eod_prefix = true;
+ break;
+ }
+ }
+
+ if (eod_prefix) {
+ always_run++;
+ DEBUG_PRINTF("eod prefixes are slow");
+ return 0;
+ }
+
+ if (always_run > 1) {
+ DEBUG_PRINTF("we always run %u engines\n", always_run);
+ return 0;
+ }
+
+ return 1;
+}
+
u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) {
const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
assert(!lit_vertices.empty());
-
+
u32 min_offset = UINT32_MAX;
for (const auto &v : lit_vertices) {
min_offset = min(min_offset, build.g[v].min_offset);
}
-
+
return min_offset;
}
-
+
u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) {
const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
assert(!lit_vertices.empty());
-
+
u32 max_offset = 0;
for (const auto &v : lit_vertices) {
max_offset = max(max_offset, build.g[v].max_offset);
}
-
+
return max_offset;
-}
-
+}
+
bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e) {
const auto &g = build.g;
const auto v = target(e, g);
@@ -928,70 +928,70 @@ bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e) {
return true;
}
-#ifndef NDEBUG
-/** \brief Returns true if all the graphs (NFA, DFA, Haig, etc) in this Rose
- * graph are implementable. */
-bool canImplementGraphs(const RoseBuildImpl &tbi) {
- const RoseGraph &g = tbi.g;
-
- // First, check the Rose leftfixes.
-
- for (auto v : vertices_range(g)) {
+#ifndef NDEBUG
+/** \brief Returns true if all the graphs (NFA, DFA, Haig, etc) in this Rose
+ * graph are implementable. */
+bool canImplementGraphs(const RoseBuildImpl &tbi) {
+ const RoseGraph &g = tbi.g;
+
+ // First, check the Rose leftfixes.
+
+ for (auto v : vertices_range(g)) {
DEBUG_PRINTF("leftfix: check vertex %zu\n", g[v].index);
-
- if (g[v].left.castle) {
- DEBUG_PRINTF("castle ok\n");
- continue;
- }
- if (g[v].left.dfa) {
- DEBUG_PRINTF("dfa ok\n");
- continue;
- }
- if (g[v].left.haig) {
- DEBUG_PRINTF("haig ok\n");
- continue;
- }
- if (g[v].left.graph) {
- assert(g[v].left.graph->kind
+
+ if (g[v].left.castle) {
+ DEBUG_PRINTF("castle ok\n");
+ continue;
+ }
+ if (g[v].left.dfa) {
+ DEBUG_PRINTF("dfa ok\n");
+ continue;
+ }
+ if (g[v].left.haig) {
+ DEBUG_PRINTF("haig ok\n");
+ continue;
+ }
+ if (g[v].left.graph) {
+ assert(g[v].left.graph->kind
== (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX));
- if (!isImplementableNFA(*g[v].left.graph, nullptr, tbi.cc)) {
+ if (!isImplementableNFA(*g[v].left.graph, nullptr, tbi.cc)) {
DEBUG_PRINTF("nfa prefix %zu failed (%zu vertices)\n",
g[v].index, num_vertices(*g[v].left.graph));
- return false;
- }
- }
- }
-
- // Suffix graphs.
-
- for (auto v : vertices_range(g)) {
+ return false;
+ }
+ }
+ }
+
+ // Suffix graphs.
+
+ for (auto v : vertices_range(g)) {
DEBUG_PRINTF("suffix: check vertex %zu\n", g[v].index);
-
- const RoseSuffixInfo &suffix = g[v].suffix;
- if (suffix.castle) {
- DEBUG_PRINTF("castle suffix ok\n");
- continue;
- }
- if (suffix.rdfa) {
- DEBUG_PRINTF("dfa suffix ok\n");
- continue;
- }
- if (suffix.haig) {
- DEBUG_PRINTF("haig suffix ok\n");
- continue;
- }
- if (suffix.graph) {
- assert(suffix.graph->kind == NFA_SUFFIX);
- if (!isImplementableNFA(*suffix.graph, &tbi.rm, tbi.cc)) {
+
+ const RoseSuffixInfo &suffix = g[v].suffix;
+ if (suffix.castle) {
+ DEBUG_PRINTF("castle suffix ok\n");
+ continue;
+ }
+ if (suffix.rdfa) {
+ DEBUG_PRINTF("dfa suffix ok\n");
+ continue;
+ }
+ if (suffix.haig) {
+ DEBUG_PRINTF("haig suffix ok\n");
+ continue;
+ }
+ if (suffix.graph) {
+ assert(suffix.graph->kind == NFA_SUFFIX);
+ if (!isImplementableNFA(*suffix.graph, &tbi.rm, tbi.cc)) {
DEBUG_PRINTF("nfa suffix %zu failed (%zu vertices)\n",
g[v].index, num_vertices(*suffix.graph));
- return false;
- }
- }
- }
-
- return true;
-}
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
/**
* \brief True if there is an engine with a top that is not triggered by a
@@ -1039,6 +1039,6 @@ bool hasOrphanedTops(const RoseBuildImpl &build) {
return false;
}
-#endif // NDEBUG
-
-} // namespace ue2
+#endif // NDEBUG
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.cpp b/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.cpp
index 3d2af35dc6..359550e118 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.cpp
@@ -1,180 +1,180 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_role_aliasing.h"
-
-#include "ue2common.h"
-#include "rose_build_impl.h"
-#include "rose_build_merge.h"
-#include "rose_build_util.h"
-#include "grey.h"
-#include "nfa/castlecompile.h"
-#include "nfa/goughcompile.h"
-#include "nfa/mcclellancompile_util.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_is_equal.h"
-#include "nfagraph/ng_limex.h"
-#include "nfagraph/ng_prune.h"
-#include "nfagraph/ng_uncalc_components.h"
-#include "nfagraph/ng_util.h"
-#include "util/bitutils.h"
-#include "util/compile_context.h"
-#include "util/container.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_role_aliasing.h"
+
+#include "ue2common.h"
+#include "rose_build_impl.h"
+#include "rose_build_merge.h"
+#include "rose_build_util.h"
+#include "grey.h"
+#include "nfa/castlecompile.h"
+#include "nfa/goughcompile.h"
+#include "nfa/mcclellancompile_util.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_is_equal.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_prune.h"
+#include "nfagraph/ng_uncalc_components.h"
+#include "nfagraph/ng_util.h"
+#include "util/bitutils.h"
+#include "util/compile_context.h"
+#include "util/container.h"
#include "util/flat_containers.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
#include "util/hash.h"
-#include "util/order_check.h"
-
-#include <algorithm>
-#include <numeric>
-#include <vector>
-#include <boost/graph/adjacency_iterator.hpp>
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_values;
-
-namespace ue2 {
-
+#include "util/order_check.h"
+
+#include <algorithm>
+#include <numeric>
+#include <vector>
+#include <boost/graph/adjacency_iterator.hpp>
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_values;
+
+namespace ue2 {
+
static constexpr size_t MERGE_GROUP_SIZE_MAX = 200;
-namespace {
-// Used for checking edge sets (both in- and out-) against each other.
-struct EdgeAndVertex {
- EdgeAndVertex(const RoseEdge &e, const RoseVertex v_,
- const RoseGraph &g) : v(v_), eprops(g[e]) {}
- virtual ~EdgeAndVertex() {}
-
- virtual bool operator<(const EdgeAndVertex &a) const {
- if (v != a.v) {
- return v < a.v;
- }
- if (eprops.minBound != a.eprops.minBound) {
- return eprops.minBound < a.eprops.minBound;
- }
- if (eprops.maxBound != a.eprops.maxBound) {
- return eprops.maxBound < a.eprops.maxBound;
- }
- if (eprops.rose_top != a.eprops.rose_top) {
- return eprops.rose_top < a.eprops.rose_top;
-
- }
- return eprops.history < a.eprops.history;
- }
-
- virtual bool operator==(const EdgeAndVertex &a) const {
- return v == a.v &&
- eprops.minBound == a.eprops.minBound &&
- eprops.maxBound == a.eprops.maxBound &&
- eprops.rose_top == a.eprops.rose_top &&
- eprops.history == a.eprops.history;
- }
-
-private:
- RoseVertex v;
- const RoseEdgeProps &eprops;
-};
-
-struct AliasOutEdge : EdgeAndVertex {
- AliasOutEdge(const RoseEdge &e, const RoseGraph &g) :
- EdgeAndVertex(e, target(e, g), g) {}
-};
-
-struct AliasInEdge : EdgeAndVertex {
- AliasInEdge(const RoseEdge &e, const RoseGraph &g) :
- EdgeAndVertex(e, source(e, g), g) {}
-};
-
-class CandidateSet {
-public:
+namespace {
+// Used for checking edge sets (both in- and out-) against each other.
+struct EdgeAndVertex {
+ EdgeAndVertex(const RoseEdge &e, const RoseVertex v_,
+ const RoseGraph &g) : v(v_), eprops(g[e]) {}
+ virtual ~EdgeAndVertex() {}
+
+ virtual bool operator<(const EdgeAndVertex &a) const {
+ if (v != a.v) {
+ return v < a.v;
+ }
+ if (eprops.minBound != a.eprops.minBound) {
+ return eprops.minBound < a.eprops.minBound;
+ }
+ if (eprops.maxBound != a.eprops.maxBound) {
+ return eprops.maxBound < a.eprops.maxBound;
+ }
+ if (eprops.rose_top != a.eprops.rose_top) {
+ return eprops.rose_top < a.eprops.rose_top;
+
+ }
+ return eprops.history < a.eprops.history;
+ }
+
+ virtual bool operator==(const EdgeAndVertex &a) const {
+ return v == a.v &&
+ eprops.minBound == a.eprops.minBound &&
+ eprops.maxBound == a.eprops.maxBound &&
+ eprops.rose_top == a.eprops.rose_top &&
+ eprops.history == a.eprops.history;
+ }
+
+private:
+ RoseVertex v;
+ const RoseEdgeProps &eprops;
+};
+
+struct AliasOutEdge : EdgeAndVertex {
+ AliasOutEdge(const RoseEdge &e, const RoseGraph &g) :
+ EdgeAndVertex(e, target(e, g), g) {}
+};
+
+struct AliasInEdge : EdgeAndVertex {
+ AliasInEdge(const RoseEdge &e, const RoseGraph &g) :
+ EdgeAndVertex(e, source(e, g), g) {}
+};
+
+class CandidateSet {
+public:
using key_type = RoseVertex;
using iterator = set<RoseVertex>::iterator;
using const_iterator = set<RoseVertex>::const_iterator;
-
- iterator begin() { return main_cont.begin(); }
- iterator end() { return main_cont.end(); }
+
+ iterator begin() { return main_cont.begin(); }
+ iterator end() { return main_cont.end(); }
const_iterator begin() const { return main_cont.begin(); }
const_iterator end() const { return main_cont.end(); }
-
- bool contains(RoseVertex a) const {
- return hash_cont.find(a) != hash_cont.end();
- }
-
- void insert(RoseVertex a) {
- main_cont.insert(a);
- hash_cont.insert(a);
- }
-
- void erase(iterator aa) {
- RoseVertex a = *aa;
- main_cont.erase(aa);
- hash_cont.erase(a);
- }
-
- void erase(RoseVertex a) {
- main_cont.erase(a);
- hash_cont.erase(a);
- }
-
- size_t size() const {
- assert(hash_cont.size() == main_cont.size());
- return main_cont.size();
- }
-
- bool empty() const {
- assert(hash_cont.size() == main_cont.size());
- return main_cont.empty();
- }
-
-private:
- /* if a vertex is worth storing, it is worth storing twice */
+
+ bool contains(RoseVertex a) const {
+ return hash_cont.find(a) != hash_cont.end();
+ }
+
+ void insert(RoseVertex a) {
+ main_cont.insert(a);
+ hash_cont.insert(a);
+ }
+
+ void erase(iterator aa) {
+ RoseVertex a = *aa;
+ main_cont.erase(aa);
+ hash_cont.erase(a);
+ }
+
+ void erase(RoseVertex a) {
+ main_cont.erase(a);
+ hash_cont.erase(a);
+ }
+
+ size_t size() const {
+ assert(hash_cont.size() == main_cont.size());
+ return main_cont.size();
+ }
+
+ bool empty() const {
+ assert(hash_cont.size() == main_cont.size());
+ return main_cont.empty();
+ }
+
+private:
+ /* if a vertex is worth storing, it is worth storing twice */
set<RoseVertex> main_cont; /* deterministic iterator */
unordered_set<RoseVertex> hash_cont; /* member checks */
-};
-
+};
+
struct RoseAliasingInfo {
RoseAliasingInfo(const RoseBuildImpl &build) {
const auto &g = build.g;
-
+
// Populate reverse leftfix map.
for (auto v : vertices_range(g)) {
if (g[v].left) {
rev_leftfix[g[v].left].insert(v);
}
}
-
+
// Populate reverse ghost vertex map.
for (const auto &m : build.ghost) {
rev_ghost[m.second].insert(m.first);
- }
- }
-
+ }
+ }
+
/** \brief Mapping from leftfix to vertices. */
unordered_map<left_id, set<RoseVertex>> rev_leftfix;
@@ -184,179 +184,179 @@ struct RoseAliasingInfo {
} // namespace
-// Check successor set: must lead to the same vertices via edges with the
-// same properties.
-static
-bool sameSuccessors(RoseVertex a, RoseVertex b, const RoseGraph &g) {
- if (out_degree(a, g) != out_degree(b, g)) {
- return false;
- }
-
- set<AliasOutEdge> succs_a, succs_b;
-
- for (const auto &e : out_edges_range(a, g)) {
- succs_a.insert(AliasOutEdge(e, g));
- }
-
- for (const auto &e : out_edges_range(b, g)) {
- succs_b.insert(AliasOutEdge(e, g));
- }
-
- return (succs_a == succs_b);
-}
-
-/* unlike LeftEngInfo::==, this does a deep check to see if the leftfixes are
- * equivalent rather than checking for pointer equality. */
-static
-bool hasEqualLeftfixes(RoseVertex a, RoseVertex b, const RoseGraph &g) {
- assert(g[a].left || g[b].left);
- if (!g[a].left || !g[b].left) {
- return false;
- }
- const LeftEngInfo &a_left = g[a].left;
- const LeftEngInfo &b_left = g[b].left;
-
- if (a_left.castle && b_left.castle) {
- return is_equal(*a_left.castle, a_left.leftfix_report,
- *b_left.castle, b_left.leftfix_report);
- }
-
- if (a_left.graph && b_left.graph) {
- /* non-castle engines have graphs */
- return is_equal(*a_left.graph, a_left.leftfix_report, *b_left.graph,
- b_left.leftfix_report);
- }
-
- /* graph <-> castle cases are not equal */
- return false;
-}
-
-// Check predecessor set: must come from the same vertices via edges with
-// the same properties.
-static
-bool samePredecessors(RoseVertex a, RoseVertex b, const RoseGraph &g) {
- if (in_degree(a, g) != in_degree(b, g)) {
- return false;
- }
-
- set<AliasInEdge> preds_a, preds_b;
-
- for (const auto &e : in_edges_range(a, g)) {
- preds_a.insert(AliasInEdge(e, g));
- }
-
- for (const auto &e : in_edges_range(b, g)) {
- preds_b.insert(AliasInEdge(e, g));
- }
-
- if (preds_a != preds_b) {
- return false;
- }
-
- if (g[a].left || g[b].left) {
- if (!hasEqualLeftfixes(a, b, g)) {
- return false;
- }
-
- for (const auto &e_a : in_edges_range(a, g)) {
+// Check successor set: must lead to the same vertices via edges with the
+// same properties.
+static
+bool sameSuccessors(RoseVertex a, RoseVertex b, const RoseGraph &g) {
+ if (out_degree(a, g) != out_degree(b, g)) {
+ return false;
+ }
+
+ set<AliasOutEdge> succs_a, succs_b;
+
+ for (const auto &e : out_edges_range(a, g)) {
+ succs_a.insert(AliasOutEdge(e, g));
+ }
+
+ for (const auto &e : out_edges_range(b, g)) {
+ succs_b.insert(AliasOutEdge(e, g));
+ }
+
+ return (succs_a == succs_b);
+}
+
+/* unlike LeftEngInfo::==, this does a deep check to see if the leftfixes are
+ * equivalent rather than checking for pointer equality. */
+static
+bool hasEqualLeftfixes(RoseVertex a, RoseVertex b, const RoseGraph &g) {
+ assert(g[a].left || g[b].left);
+ if (!g[a].left || !g[b].left) {
+ return false;
+ }
+ const LeftEngInfo &a_left = g[a].left;
+ const LeftEngInfo &b_left = g[b].left;
+
+ if (a_left.castle && b_left.castle) {
+ return is_equal(*a_left.castle, a_left.leftfix_report,
+ *b_left.castle, b_left.leftfix_report);
+ }
+
+ if (a_left.graph && b_left.graph) {
+ /* non-castle engines have graphs */
+ return is_equal(*a_left.graph, a_left.leftfix_report, *b_left.graph,
+ b_left.leftfix_report);
+ }
+
+ /* graph <-> castle cases are not equal */
+ return false;
+}
+
+// Check predecessor set: must come from the same vertices via edges with
+// the same properties.
+static
+bool samePredecessors(RoseVertex a, RoseVertex b, const RoseGraph &g) {
+ if (in_degree(a, g) != in_degree(b, g)) {
+ return false;
+ }
+
+ set<AliasInEdge> preds_a, preds_b;
+
+ for (const auto &e : in_edges_range(a, g)) {
+ preds_a.insert(AliasInEdge(e, g));
+ }
+
+ for (const auto &e : in_edges_range(b, g)) {
+ preds_b.insert(AliasInEdge(e, g));
+ }
+
+ if (preds_a != preds_b) {
+ return false;
+ }
+
+ if (g[a].left || g[b].left) {
+ if (!hasEqualLeftfixes(a, b, g)) {
+ return false;
+ }
+
+ for (const auto &e_a : in_edges_range(a, g)) {
RoseEdge e = edge(source(e_a, g), b, g);
if (!e || g[e].rose_top != g[e_a].rose_top) {
- DEBUG_PRINTF("bad tops\n");
- return false;
- }
- }
- }
-
- return true;
-}
-
-static
+ DEBUG_PRINTF("bad tops\n");
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+static
bool hasCommonSuccWithBadBounds(RoseVertex a, RoseVertex b,
const RoseGraph &g) {
- for (const auto &e_a : out_edges_range(a, g)) {
+ for (const auto &e_a : out_edges_range(a, g)) {
if (RoseEdge e = edge(b, target(e_a, g), g)) {
- if (g[e_a].maxBound < g[e].minBound
- || g[e].maxBound < g[e_a].minBound) {
- return true;
- }
- if (g[e_a].rose_top != g[e].rose_top) {
- // Can't trigger two tops on the same leftfix, we can't merge
- // this.
- return true;
- }
- }
- }
- return false;
-}
-
-static
+ if (g[e_a].maxBound < g[e].minBound
+ || g[e].maxBound < g[e_a].minBound) {
+ return true;
+ }
+ if (g[e_a].rose_top != g[e].rose_top) {
+ // Can't trigger two tops on the same leftfix, we can't merge
+ // this.
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+static
bool hasCommonPredWithBadBounds(RoseVertex a, RoseVertex b,
const RoseGraph &g) {
- for (const auto &e_a : in_edges_range(a, g)) {
+ for (const auto &e_a : in_edges_range(a, g)) {
if (RoseEdge e = edge(source(e_a, g), b, g)) {
- if (g[e_a].maxBound < g[e].minBound
- || g[e].maxBound < g[e_a].minBound) {
- return true;
- }
-
- // XXX: if we're merging two vertices with different roses, we
- // cannot allow them to share a pred, as we would be unable to
- // merge the (necessarily different) tops on the in-edges. This
- // could be relaxed if we made the tops mergeable (by making
- // edge_top a bitfield, for example).
- if (g[a].left != g[b].left) {
- return true;
- }
-
- }
- }
- return false;
-}
-
-static
+ if (g[e_a].maxBound < g[e].minBound
+ || g[e].maxBound < g[e_a].minBound) {
+ return true;
+ }
+
+ // XXX: if we're merging two vertices with different roses, we
+ // cannot allow them to share a pred, as we would be unable to
+ // merge the (necessarily different) tops on the in-edges. This
+ // could be relaxed if we made the tops mergeable (by making
+ // edge_top a bitfield, for example).
+ if (g[a].left != g[b].left) {
+ return true;
+ }
+
+ }
+ }
+ return false;
+}
+
+static
bool canMergeLiterals(RoseVertex a, RoseVertex b, const RoseBuildImpl &build) {
const auto &lits_a = build.g[a].literals;
const auto &lits_b = build.g[b].literals;
- assert(!lits_a.empty() && !lits_b.empty());
-
- // If both vertices have only pseudo-dotstar in-edges, we can merge
- // literals of different lengths and can avoid the check below.
+ assert(!lits_a.empty() && !lits_b.empty());
+
+ // If both vertices have only pseudo-dotstar in-edges, we can merge
+ // literals of different lengths and can avoid the check below.
if (build.hasOnlyPseudoStarInEdges(a) &&
build.hasOnlyPseudoStarInEdges(b)) {
- DEBUG_PRINTF("both have pseudo-dotstar in-edges\n");
- return true;
- }
-
- // Otherwise, all the literals involved must have the same length.
- for (u32 a_id : lits_a) {
+ DEBUG_PRINTF("both have pseudo-dotstar in-edges\n");
+ return true;
+ }
+
+ // Otherwise, all the literals involved must have the same length.
+ for (u32 a_id : lits_a) {
const rose_literal_id &la = build.literals.at(a_id);
- for (u32 b_id : lits_b) {
+ for (u32 b_id : lits_b) {
const rose_literal_id &lb = build.literals.at(b_id);
-
- if (la.elength() != lb.elength()) {
- DEBUG_PRINTF("bad merge %zu!=%zu '%s', '%s'\n", la.elength(),
- lb.elength(), la.s.c_str(), lb.s.c_str());
- return false;
- }
- }
- }
-
- return true;
-}
-
-static
+
+ if (la.elength() != lb.elength()) {
+ DEBUG_PRINTF("bad merge %zu!=%zu '%s', '%s'\n", la.elength(),
+ lb.elength(), la.s.c_str(), lb.s.c_str());
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+static
bool isAliasingCandidate(RoseVertex v, const RoseBuildImpl &build) {
const RoseVertexProps &props = build.g[v];
-
- // Must have literals.
- if (props.literals.empty()) {
- return false;
- }
-
+
+ // Must have literals.
+ if (props.literals.empty()) {
+ return false;
+ }
+
assert(*props.literals.begin() != MO_INVALID_IDX);
return true;
}
-
+
static
bool sameGhostProperties(const RoseBuildImpl &build,
const RoseAliasingInfo &rai, RoseVertex a,
@@ -374,8 +374,8 @@ bool sameGhostProperties(const RoseBuildImpl &build,
}
DEBUG_PRINTF("ghost mappings ok\n");
return true;
- }
-
+ }
+
// If they are ghost vertices, then they must have the same literals.
if (contains(rai.rev_ghost, a) || contains(rai.rev_ghost, b)) {
if (!contains(rai.rev_ghost, a) || !contains(rai.rev_ghost, b)) {
@@ -384,158 +384,158 @@ bool sameGhostProperties(const RoseBuildImpl &build,
}
return build.g[a].literals == build.g[b].literals;
}
-
- return true;
-}
-
-static
+
+ return true;
+}
+
+static
bool sameRoleProperties(const RoseBuildImpl &build, const RoseAliasingInfo &rai,
RoseVertex a, RoseVertex b) {
- const RoseGraph &g = build.g;
- const RoseVertexProps &aprops = g[a], &bprops = g[b];
-
+ const RoseGraph &g = build.g;
+ const RoseVertexProps &aprops = g[a], &bprops = g[b];
+
if (aprops.eod_accept != bprops.eod_accept) {
- return false;
- }
-
- // We don't want to merge a role with LAST_BYTE history with one without,
- // as a role that can only be triggered at EOD cannot safely precede
- // "ordinary" roles.
- if (hasLastByteHistorySucc(g, a) != hasLastByteHistorySucc(g, b)) {
- return false;
- }
-
- // We certainly don't want to merge root roles with non-root roles.
- /* TODO: explain */
- if (build.isRootSuccessor(a) != build.isRootSuccessor(b)) {
- return false;
- }
-
- if (aprops.som_adjust != bprops.som_adjust) {
- return false;
- }
-
+ return false;
+ }
+
+ // We don't want to merge a role with LAST_BYTE history with one without,
+ // as a role that can only be triggered at EOD cannot safely precede
+ // "ordinary" roles.
+ if (hasLastByteHistorySucc(g, a) != hasLastByteHistorySucc(g, b)) {
+ return false;
+ }
+
+ // We certainly don't want to merge root roles with non-root roles.
+ /* TODO: explain */
+ if (build.isRootSuccessor(a) != build.isRootSuccessor(b)) {
+ return false;
+ }
+
+ if (aprops.som_adjust != bprops.som_adjust) {
+ return false;
+ }
+
if (!sameGhostProperties(build, rai, a, b)) {
return false;
}
- /* "roses are mergeable" check are handled elsewhere */
-
- return true;
-}
-
+ /* "roses are mergeable" check are handled elsewhere */
+
+ return true;
+}
+
/* Checks compatibility of role properties if we require that two roles are
* right equiv. */
-static
-bool sameRightRoleProperties(const RoseBuildImpl &build, RoseVertex a,
- RoseVertex b) {
- const RoseGraph &g = build.g;
- const RoseVertexProps &aprops = g[a], &bprops = g[b];
-
- if (aprops.reports != bprops.reports) {
- return false;
- }
-
- if (hasAnchHistorySucc(g, a) != hasAnchHistorySucc(g, b)) {
- return false;
- }
-
- // If the history type is ANCH, then we need to be careful that we only
- // merge literals that occur at the same offsets.
- if (hasAnchHistorySucc(g, a) || hasAnchHistorySucc(g, b)) {
- if (aprops.min_offset != bprops.min_offset
- || aprops.max_offset != bprops.max_offset) {
- return false;
- }
- }
-
- if (aprops.suffix != bprops.suffix) {
- return false;
- }
-
- return true;
-}
-
-static
-void mergeEdgeAdd(RoseVertex u, RoseVertex v, const RoseEdge &from_edge,
- const RoseEdge *to_edge, RoseGraph &g) {
- const RoseEdgeProps &from_props = g[from_edge];
-
- if (!to_edge) {
+static
+bool sameRightRoleProperties(const RoseBuildImpl &build, RoseVertex a,
+ RoseVertex b) {
+ const RoseGraph &g = build.g;
+ const RoseVertexProps &aprops = g[a], &bprops = g[b];
+
+ if (aprops.reports != bprops.reports) {
+ return false;
+ }
+
+ if (hasAnchHistorySucc(g, a) != hasAnchHistorySucc(g, b)) {
+ return false;
+ }
+
+ // If the history type is ANCH, then we need to be careful that we only
+ // merge literals that occur at the same offsets.
+ if (hasAnchHistorySucc(g, a) || hasAnchHistorySucc(g, b)) {
+ if (aprops.min_offset != bprops.min_offset
+ || aprops.max_offset != bprops.max_offset) {
+ return false;
+ }
+ }
+
+ if (aprops.suffix != bprops.suffix) {
+ return false;
+ }
+
+ return true;
+}
+
+static
+void mergeEdgeAdd(RoseVertex u, RoseVertex v, const RoseEdge &from_edge,
+ const RoseEdge *to_edge, RoseGraph &g) {
+ const RoseEdgeProps &from_props = g[from_edge];
+
+ if (!to_edge) {
DEBUG_PRINTF("adding edge [%zu,%zu]\n", g[u].index, g[v].index);
- add_edge(u, v, from_props, g);
- } else {
- // union of the two edges.
+ add_edge(u, v, from_props, g);
+ } else {
+ // union of the two edges.
DEBUG_PRINTF("updating edge [%zu,%zu]\n", g[u].index, g[v].index);
- RoseEdgeProps &to_props = g[*to_edge];
- to_props.minBound = min(to_props.minBound, from_props.minBound);
- to_props.maxBound = max(to_props.maxBound, from_props.maxBound);
- assert(to_props.rose_top == from_props.rose_top);
- }
-}
-
-/* clone a's edges onto b */
-static
-void mergeEdges(RoseVertex a, RoseVertex b, RoseGraph &g) {
- // All the edges to or from b for quick lookup.
- typedef map<RoseVertex, RoseEdge> EdgeCache;
- EdgeCache b_edges;
-
- // Cache b's in-edges so we can look them up by source quickly.
- for (const auto &e : in_edges_range(b, g)) {
- RoseVertex u = source(e, g);
+ RoseEdgeProps &to_props = g[*to_edge];
+ to_props.minBound = min(to_props.minBound, from_props.minBound);
+ to_props.maxBound = max(to_props.maxBound, from_props.maxBound);
+ assert(to_props.rose_top == from_props.rose_top);
+ }
+}
+
+/* clone a's edges onto b */
+static
+void mergeEdges(RoseVertex a, RoseVertex b, RoseGraph &g) {
+ // All the edges to or from b for quick lookup.
+ typedef map<RoseVertex, RoseEdge> EdgeCache;
+ EdgeCache b_edges;
+
+ // Cache b's in-edges so we can look them up by source quickly.
+ for (const auto &e : in_edges_range(b, g)) {
+ RoseVertex u = source(e, g);
b_edges.emplace(u, e);
- }
-
- // Add a's in-edges to b, merging them in where b already has the new edge.
- // Once handled, the in-edges to a are removed.
- RoseGraph::in_edge_iterator ei, ee;
- tie(ei, ee) = in_edges(a, g);
- while (ei != ee) {
- RoseVertex u = source(*ei, g);
- EdgeCache::const_iterator it = b_edges.find(u);
- const RoseEdge *to_edge = (it == b_edges.end() ? nullptr : &it->second);
- mergeEdgeAdd(u, b, *ei, to_edge, g);
- remove_edge(*ei++, g);
- }
-
- // Cache b's out-edges so we can look them up by target quickly.
- b_edges.clear();
- for (const auto &e : out_edges_range(b, g)) {
- RoseVertex v = target(e, g);
+ }
+
+ // Add a's in-edges to b, merging them in where b already has the new edge.
+ // Once handled, the in-edges to a are removed.
+ RoseGraph::in_edge_iterator ei, ee;
+ tie(ei, ee) = in_edges(a, g);
+ while (ei != ee) {
+ RoseVertex u = source(*ei, g);
+ EdgeCache::const_iterator it = b_edges.find(u);
+ const RoseEdge *to_edge = (it == b_edges.end() ? nullptr : &it->second);
+ mergeEdgeAdd(u, b, *ei, to_edge, g);
+ remove_edge(*ei++, g);
+ }
+
+ // Cache b's out-edges so we can look them up by target quickly.
+ b_edges.clear();
+ for (const auto &e : out_edges_range(b, g)) {
+ RoseVertex v = target(e, g);
b_edges.emplace(v, e);
- }
-
- // Add a's out-edges to b, merging them in where b already has the new edge.
- // Once handled, the out-edges to a are removed.
- RoseGraph::out_edge_iterator oi, oe;
- tie(oi, oe) = out_edges(a, g);
- while (oi != oe) {
- RoseVertex v = target(*oi, g);
- EdgeCache::const_iterator it = b_edges.find(v);
- const RoseEdge *to_edge = (it == b_edges.end() ? nullptr : &it->second);
- mergeEdgeAdd(b, v, *oi, to_edge, g);
- remove_edge(*oi++, g);
- }
-
- // Vertex a should no longer have any in- or out-edges.
- assert(degree(a, g) == 0);
-}
-
-static
+ }
+
+ // Add a's out-edges to b, merging them in where b already has the new edge.
+ // Once handled, the out-edges to a are removed.
+ RoseGraph::out_edge_iterator oi, oe;
+ tie(oi, oe) = out_edges(a, g);
+ while (oi != oe) {
+ RoseVertex v = target(*oi, g);
+ EdgeCache::const_iterator it = b_edges.find(v);
+ const RoseEdge *to_edge = (it == b_edges.end() ? nullptr : &it->second);
+ mergeEdgeAdd(b, v, *oi, to_edge, g);
+ remove_edge(*oi++, g);
+ }
+
+ // Vertex a should no longer have any in- or out-edges.
+ assert(degree(a, g) == 0);
+}
+
+static
void mergeLiteralSets(RoseVertex a, RoseVertex b, RoseBuildImpl &build) {
RoseGraph &g = build.g;
- const auto &a_literals = g[a].literals;
- for (u32 lit_id : a_literals) {
+ const auto &a_literals = g[a].literals;
+ for (u32 lit_id : a_literals) {
auto &lit_vertices = build.literal_info[lit_id].vertices;
- lit_vertices.erase(a);
- lit_vertices.insert(b);
- }
-
- insert(&g[b].literals, a_literals);
-}
-
-static
+ lit_vertices.erase(a);
+ lit_vertices.insert(b);
+ }
+
+ insert(&g[b].literals, a_literals);
+}
+
+static
void updateAliasingInfo(RoseBuildImpl &build, RoseAliasingInfo &rai,
RoseVertex a, RoseVertex b) {
if (build.g[a].left) {
@@ -549,7 +549,7 @@ void updateAliasingInfo(RoseBuildImpl &build, RoseAliasingInfo &rai,
build.ghost.erase(a);
rai.rev_ghost[ghost].erase(a);
}
-
+
if (contains(rai.rev_ghost, a)) {
for (const auto &v : rai.rev_ghost[a]) {
build.ghost[v] = b;
@@ -565,21 +565,21 @@ void mergeCommon(RoseBuildImpl &build, RoseAliasingInfo &rai, RoseVertex a,
RoseVertex b) {
RoseGraph &g = build.g;
- assert(g[a].eod_accept == g[b].eod_accept);
- assert(g[a].left == g[b].left);
+ assert(g[a].eod_accept == g[b].eod_accept);
+ assert(g[a].left == g[b].left);
assert(!g[a].suffix || g[a].suffix == g[b].suffix);
-
- // In some situations (ghost roles etc), we can have different groups.
- assert(!g[a].groups && !g[b].groups); /* current structure means groups
- * haven't been assigned yet */
- g[b].groups |= g[a].groups;
-
+
+ // In some situations (ghost roles etc), we can have different groups.
+ assert(!g[a].groups && !g[b].groups); /* current structure means groups
+ * haven't been assigned yet */
+ g[b].groups |= g[a].groups;
+
mergeLiteralSets(a, b, build);
updateAliasingInfo(build, rai, a, b);
-
+
// Our min and max_offsets should be sane.
assert(g[b].min_offset <= g[b].max_offset);
-
+
// Safety check: we should not have created through a merge a vertex that
// has an out-edge with ANCH history but is not fixed-offset.
assert(!hasAnchHistorySucc(g, b) || g[b].fixedOffset());
@@ -599,14 +599,14 @@ void mergeVerticesLeft(RoseVertex a, RoseVertex b, RoseBuildImpl &build,
g[b].min_offset = max(g[a].min_offset, g[b].min_offset);
g[b].max_offset = min(g[a].max_offset, g[b].max_offset);
- if (!g[b].suffix) {
- g[b].suffix = g[a].suffix;
- }
-
- mergeEdges(a, b, g);
+ if (!g[b].suffix) {
+ g[b].suffix = g[a].suffix;
+ }
+
+ mergeEdges(a, b, g);
mergeCommon(build, rai, a, b);
-}
-
+}
+
/** \brief Merge role 'a' into 'b', right merge path. */
static
void mergeVerticesRight(RoseVertex a, RoseVertex b, RoseBuildImpl &build,
@@ -622,201 +622,201 @@ void mergeVerticesRight(RoseVertex a, RoseVertex b, RoseBuildImpl &build,
mergeCommon(build, rai, a, b);
}
-/**
- * Faster version of \ref mergeVertices for diamond merges, for which we know
- * that the in- and out-edge sets, reports and suffixes are identical.
- */
-static
+/**
+ * Faster version of \ref mergeVertices for diamond merges, for which we know
+ * that the in- and out-edge sets, reports and suffixes are identical.
+ */
+static
void mergeVerticesDiamond(RoseVertex a, RoseVertex b, RoseBuildImpl &build,
RoseAliasingInfo &rai) {
RoseGraph &g = build.g;
DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].index, g[b].index);
-
+
// For a diamond merge, most properties are already the same (with the
// notable exception of the literal set).
- assert(g[a].reports == g[b].reports);
- assert(g[a].suffix == g[b].suffix);
-
- g[b].min_offset = min(g[a].min_offset, g[b].min_offset);
- g[b].max_offset = max(g[a].max_offset, g[b].max_offset);
-
+ assert(g[a].reports == g[b].reports);
+ assert(g[a].suffix == g[b].suffix);
+
+ g[b].min_offset = min(g[a].min_offset, g[b].min_offset);
+ g[b].max_offset = max(g[a].max_offset, g[b].max_offset);
+
mergeCommon(build, rai, a, b);
-}
-
-static never_inline
+}
+
+static never_inline
void findCandidates(const RoseBuildImpl &build, CandidateSet *candidates) {
for (auto v : vertices_range(build.g)) {
if (isAliasingCandidate(v, build)) {
DEBUG_PRINTF("candidate %zu\n", build.g[v].index);
DEBUG_PRINTF("lits: %u\n", *build.g[v].literals.begin());
- candidates->insert(v);
- }
- }
-
+ candidates->insert(v);
+ }
+ }
+
assert(candidates->size() <= num_vertices(build.g));
- DEBUG_PRINTF("found %zu/%zu candidates\n", candidates->size(),
+ DEBUG_PRINTF("found %zu/%zu candidates\n", candidates->size(),
num_vertices(build.g));
-}
-
-static
-RoseVertex pickPred(const RoseVertex v, const RoseGraph &g,
+}
+
+static
+RoseVertex pickPred(const RoseVertex v, const RoseGraph &g,
const RoseBuildImpl &build) {
- RoseGraph::in_edge_iterator ei, ee;
- tie(ei, ee) = in_edges(v, g);
- if (ei == ee) {
- assert(0); // every candidate should have in-degree!
- return RoseGraph::null_vertex();
- }
-
- // Avoid roots if we have other options, since it doesn't matter to the
- // merge pass which predecessor we pick.
- RoseVertex u = source(*ei, g);
+ RoseGraph::in_edge_iterator ei, ee;
+ tie(ei, ee) = in_edges(v, g);
+ if (ei == ee) {
+ assert(0); // every candidate should have in-degree!
+ return RoseGraph::null_vertex();
+ }
+
+ // Avoid roots if we have other options, since it doesn't matter to the
+ // merge pass which predecessor we pick.
+ RoseVertex u = source(*ei, g);
while (build.isAnyStart(u) && ++ei != ee) {
- u = source(*ei, g);
- }
- return u;
-}
-
-template<>
-bool contains<>(const CandidateSet &container, const RoseVertex &key) {
- return container.contains(key);
-}
-
-// Simplified version of hasCommonPredWithBadBounds for diamond merges.
-static
-bool hasCommonPredWithDiffRoses(RoseVertex a, RoseVertex b,
- const RoseGraph &g) {
- if (!g[a].left || !g[b].left) {
- DEBUG_PRINTF("one of (a, b) doesn't have a prefix\n");
- return true;
- }
-
- // XXX: if we're merging two vertices with different leftfixes, we
- // cannot allow them to share a pred, as we would be unable to
- // merge the (necessarily different) tops on the in-edges. This
- // could be relaxed if we made the tops mergeable (by making
- // edge_top a bitfield, for example).
-
- const bool equal_roses = hasEqualLeftfixes(a, b, g);
-
- for (const auto &e_a : in_edges_range(a, g)) {
+ u = source(*ei, g);
+ }
+ return u;
+}
+
+template<>
+bool contains<>(const CandidateSet &container, const RoseVertex &key) {
+ return container.contains(key);
+}
+
+// Simplified version of hasCommonPredWithBadBounds for diamond merges.
+static
+bool hasCommonPredWithDiffRoses(RoseVertex a, RoseVertex b,
+ const RoseGraph &g) {
+ if (!g[a].left || !g[b].left) {
+ DEBUG_PRINTF("one of (a, b) doesn't have a prefix\n");
+ return true;
+ }
+
+ // XXX: if we're merging two vertices with different leftfixes, we
+ // cannot allow them to share a pred, as we would be unable to
+ // merge the (necessarily different) tops on the in-edges. This
+ // could be relaxed if we made the tops mergeable (by making
+ // edge_top a bitfield, for example).
+
+ const bool equal_roses = hasEqualLeftfixes(a, b, g);
+
+ for (const auto &e_a : in_edges_range(a, g)) {
if (RoseEdge e = edge(source(e_a, g), b, g)) {
- DEBUG_PRINTF("common pred, e_r=%d r_t %u,%u\n",
- (int)equal_roses, g[e].rose_top, g[e_a].rose_top);
- if (!equal_roses) {
- DEBUG_PRINTF("different roses\n");
- return true;
- }
- if (g[e].rose_top != g[e_a].rose_top) {
- DEBUG_PRINTF("bad tops\n");
- return true;
- }
- }
- }
- DEBUG_PRINTF("ok\n");
- return false;
-}
-
-static
+ DEBUG_PRINTF("common pred, e_r=%d r_t %u,%u\n",
+ (int)equal_roses, g[e].rose_top, g[e_a].rose_top);
+ if (!equal_roses) {
+ DEBUG_PRINTF("different roses\n");
+ return true;
+ }
+ if (g[e].rose_top != g[e_a].rose_top) {
+ DEBUG_PRINTF("bad tops\n");
+ return true;
+ }
+ }
+ }
+ DEBUG_PRINTF("ok\n");
+ return false;
+}
+
+static
void pruneReportIfUnused(const RoseBuildImpl &build, shared_ptr<NGHolder> h,
- const set<RoseVertex> &verts, ReportID report) {
- DEBUG_PRINTF("trying to prune %u from %p (v %zu)\n", report, h.get(),
- verts.size());
- for (RoseVertex v : verts) {
+ const set<RoseVertex> &verts, ReportID report) {
+ DEBUG_PRINTF("trying to prune %u from %p (v %zu)\n", report, h.get(),
+ verts.size());
+ for (RoseVertex v : verts) {
if (build.g[v].left.graph == h &&
build.g[v].left.leftfix_report == report) {
- DEBUG_PRINTF("report %u still in use\n", report);
- return;
- }
- }
-
- if (!verts.empty()) {
- // Report no longer in use, but graph h is still alive: we should prune
- // the report if we can do so without rendering the graph
- // unimplementable.
-
- DEBUG_PRINTF("report %u has been merged away, pruning\n", report);
+ DEBUG_PRINTF("report %u still in use\n", report);
+ return;
+ }
+ }
+
+ if (!verts.empty()) {
+ // Report no longer in use, but graph h is still alive: we should prune
+ // the report if we can do so without rendering the graph
+ // unimplementable.
+
+ DEBUG_PRINTF("report %u has been merged away, pruning\n", report);
assert(h->kind == (build.isRootSuccessor(*verts.begin()) ? NFA_PREFIX
: NFA_INFIX));
- unique_ptr<NGHolder> h_new = cloneHolder(*h);
- pruneReport(*h_new, report);
-
+ unique_ptr<NGHolder> h_new = cloneHolder(*h);
+ pruneReport(*h_new, report);
+
if (isImplementableNFA(*h_new, nullptr, build.cc)) {
- clear_graph(*h);
- cloneHolder(*h, *h_new);
- } else {
- DEBUG_PRINTF("prune produced unimplementable graph, "
- "leaving as-is\n");
- }
- }
-}
-
-/** \brief Remove any tops that don't lead to the given report from this
- * Castle. */
-static
-void pruneCastle(CastleProto &castle, ReportID report) {
- unordered_set<u32> dead; // tops to remove.
- for (const auto &m : castle.repeats) {
- if (!contains(m.second.reports, report)) {
- dead.insert(m.first);
- }
- }
-
- for (const auto &top : dead) {
- castle.erase(top);
- }
-
- assert(!castle.repeats.empty());
-}
-
-/** \brief Set all reports to the given one. */
-static
-void setReports(CastleProto &castle, ReportID report) {
+ clear_graph(*h);
+ cloneHolder(*h, *h_new);
+ } else {
+ DEBUG_PRINTF("prune produced unimplementable graph, "
+ "leaving as-is\n");
+ }
+ }
+}
+
+/** \brief Remove any tops that don't lead to the given report from this
+ * Castle. */
+static
+void pruneCastle(CastleProto &castle, ReportID report) {
+ unordered_set<u32> dead; // tops to remove.
+ for (const auto &m : castle.repeats) {
+ if (!contains(m.second.reports, report)) {
+ dead.insert(m.first);
+ }
+ }
+
+ for (const auto &top : dead) {
+ castle.erase(top);
+ }
+
+ assert(!castle.repeats.empty());
+}
+
+/** \brief Set all reports to the given one. */
+static
+void setReports(CastleProto &castle, ReportID report) {
castle.report_map.clear();
for (auto &e : castle.repeats) {
u32 top = e.first;
auto &repeat = e.second;
- repeat.reports.clear();
- repeat.reports.insert(report);
+ repeat.reports.clear();
+ repeat.reports.insert(report);
castle.report_map[report].insert(top);
- }
-}
-
-static
-void updateEdgeTops(RoseGraph &g, RoseVertex v, const map<u32, u32> &top_map) {
- for (const auto &e : in_edges_range(v, g)) {
- g[e].rose_top = top_map.at(g[e].rose_top);
- }
-}
-
-static
-void pruneUnusedTops(CastleProto &castle, const RoseGraph &g,
- const set<RoseVertex> &verts) {
+ }
+}
+
+static
+void updateEdgeTops(RoseGraph &g, RoseVertex v, const map<u32, u32> &top_map) {
+ for (const auto &e : in_edges_range(v, g)) {
+ g[e].rose_top = top_map.at(g[e].rose_top);
+ }
+}
+
+static
+void pruneUnusedTops(CastleProto &castle, const RoseGraph &g,
+ const set<RoseVertex> &verts) {
unordered_set<u32> used_tops;
- for (auto v : verts) {
- assert(g[v].left.castle.get() == &castle);
-
- for (const auto &e : in_edges_range(v, g)) {
- u32 top = g[e].rose_top;
- assert(contains(castle.repeats, top));
- used_tops.insert(top);
- }
- }
-
- DEBUG_PRINTF("castle has %zu tops, graph has %zu tops\n",
- castle.repeats.size(), used_tops.size());
-
- for (u32 top : assoc_keys(castle.repeats)) {
- if (!contains(used_tops, top)) {
- DEBUG_PRINTF("removing unused top %u\n", top);
- castle.erase(top);
- }
- }
-}
-
-static
-void pruneUnusedTops(NGHolder &h, const RoseGraph &g,
- const set<RoseVertex> &verts) {
+ for (auto v : verts) {
+ assert(g[v].left.castle.get() == &castle);
+
+ for (const auto &e : in_edges_range(v, g)) {
+ u32 top = g[e].rose_top;
+ assert(contains(castle.repeats, top));
+ used_tops.insert(top);
+ }
+ }
+
+ DEBUG_PRINTF("castle has %zu tops, graph has %zu tops\n",
+ castle.repeats.size(), used_tops.size());
+
+ for (u32 top : assoc_keys(castle.repeats)) {
+ if (!contains(used_tops, top)) {
+ DEBUG_PRINTF("removing unused top %u\n", top);
+ castle.erase(top);
+ }
+ }
+}
+
+static
+void pruneUnusedTops(NGHolder &h, const RoseGraph &g,
+ const set<RoseVertex> &verts) {
if (!is_triggered(h)) {
DEBUG_PRINTF("not triggered, no tops\n");
return;
@@ -824,21 +824,21 @@ void pruneUnusedTops(NGHolder &h, const RoseGraph &g,
assert(isCorrectlyTopped(h));
DEBUG_PRINTF("pruning unused tops\n");
flat_set<u32> used_tops;
- for (auto v : verts) {
- assert(g[v].left.graph.get() == &h);
-
- for (const auto &e : in_edges_range(v, g)) {
- u32 top = g[e].rose_top;
- used_tops.insert(top);
- }
- }
-
- vector<NFAEdge> dead;
- for (const auto &e : out_edges_range(h.start, h)) {
- NFAVertex v = target(e, h);
- if (v == h.startDs) {
- continue; // stylised edge, leave it alone.
- }
+ for (auto v : verts) {
+ assert(g[v].left.graph.get() == &h);
+
+ for (const auto &e : in_edges_range(v, g)) {
+ u32 top = g[e].rose_top;
+ used_tops.insert(top);
+ }
+ }
+
+ vector<NFAEdge> dead;
+ for (const auto &e : out_edges_range(h.start, h)) {
+ NFAVertex v = target(e, h);
+ if (v == h.startDs) {
+ continue; // stylised edge, leave it alone.
+ }
flat_set<u32> pruned_tops;
auto pt_inserter = inserter(pruned_tops, pruned_tops.end());
set_intersection(h[e].tops.begin(), h[e].tops.end(),
@@ -846,567 +846,567 @@ void pruneUnusedTops(NGHolder &h, const RoseGraph &g,
h[e].tops = std::move(pruned_tops);
if (h[e].tops.empty()) {
DEBUG_PRINTF("edge (start,%zu) has only unused tops\n", h[v].index);
- dead.push_back(e);
- }
- }
-
- if (dead.empty()) {
- return;
- }
-
- remove_edges(dead, h);
- pruneUseless(h);
- clearReports(h); // As we may have removed vacuous edges.
-}
-
-static
+ dead.push_back(e);
+ }
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
+ remove_edges(dead, h);
+ pruneUseless(h);
+ clearReports(h); // As we may have removed vacuous edges.
+}
+
+static
bool mergeSameCastle(RoseBuildImpl &build, RoseVertex a, RoseVertex b,
RoseAliasingInfo &rai) {
RoseGraph &g = build.g;
- LeftEngInfo &a_left = g[a].left;
- LeftEngInfo &b_left = g[b].left;
- CastleProto &castle = *a_left.castle;
-
- DEBUG_PRINTF("a report=%u, b report=%u\n", a_left.leftfix_report,
- b_left.leftfix_report);
-
- u32 merge_count = 0;
- for (const auto &c : castle.repeats) {
- DEBUG_PRINTF("top %u -> %s report %u\n", c.first,
- c.second.bounds.str().c_str(), *c.second.reports.begin());
- if (contains(c.second.reports, a_left.leftfix_report) ||
- contains(c.second.reports, b_left.leftfix_report)) {
- merge_count++;
- }
- }
-
- if (castle.repeats.size() + merge_count > castle.max_occupancy) {
- DEBUG_PRINTF("too big to merge\n");
- return false;
- }
-
+ LeftEngInfo &a_left = g[a].left;
+ LeftEngInfo &b_left = g[b].left;
+ CastleProto &castle = *a_left.castle;
+
+ DEBUG_PRINTF("a report=%u, b report=%u\n", a_left.leftfix_report,
+ b_left.leftfix_report);
+
+ u32 merge_count = 0;
+ for (const auto &c : castle.repeats) {
+ DEBUG_PRINTF("top %u -> %s report %u\n", c.first,
+ c.second.bounds.str().c_str(), *c.second.reports.begin());
+ if (contains(c.second.reports, a_left.leftfix_report) ||
+ contains(c.second.reports, b_left.leftfix_report)) {
+ merge_count++;
+ }
+ }
+
+ if (castle.repeats.size() + merge_count > castle.max_occupancy) {
+ DEBUG_PRINTF("too big to merge\n");
+ return false;
+ }
+
const ReportID new_report = build.getNewNfaReport();
- map<u32, u32> a_top_map, b_top_map;
-
- for (const auto &c : castle.repeats) {
- u32 old_top = c.first;
- if (contains(c.second.reports, a_left.leftfix_report)) {
- PureRepeat pr = c.second;
- pr.reports.clear();
- pr.reports.insert(new_report);
- u32 new_top = castle.merge(pr);
- assert(new_top < castle.max_occupancy);
- a_top_map[old_top] = new_top;
- } else if (contains(c.second.reports, b_left.leftfix_report)) {
- PureRepeat pr = c.second;
- pr.reports.clear();
- pr.reports.insert(new_report);
- u32 new_top = castle.merge(pr);
- assert(new_top < castle.max_occupancy);
- b_top_map[old_top] = new_top;
- }
- }
-
+ map<u32, u32> a_top_map, b_top_map;
+
+ for (const auto &c : castle.repeats) {
+ u32 old_top = c.first;
+ if (contains(c.second.reports, a_left.leftfix_report)) {
+ PureRepeat pr = c.second;
+ pr.reports.clear();
+ pr.reports.insert(new_report);
+ u32 new_top = castle.merge(pr);
+ assert(new_top < castle.max_occupancy);
+ a_top_map[old_top] = new_top;
+ } else if (contains(c.second.reports, b_left.leftfix_report)) {
+ PureRepeat pr = c.second;
+ pr.reports.clear();
+ pr.reports.insert(new_report);
+ u32 new_top = castle.merge(pr);
+ assert(new_top < castle.max_occupancy);
+ b_top_map[old_top] = new_top;
+ }
+ }
+
assert(contains(rai.rev_leftfix[b_left], b));
rai.rev_leftfix[b_left].erase(b);
rai.rev_leftfix[a_left].insert(b);
-
- a_left.leftfix_report = new_report;
- b_left.leftfix_report = new_report;
- assert(a_left == b_left);
-
- updateEdgeTops(g, a, a_top_map);
- updateEdgeTops(g, b, b_top_map);
-
+
+ a_left.leftfix_report = new_report;
+ b_left.leftfix_report = new_report;
+ assert(a_left == b_left);
+
+ updateEdgeTops(g, a, a_top_map);
+ updateEdgeTops(g, b, b_top_map);
+
pruneUnusedTops(castle, g, rai.rev_leftfix[a_left]);
- return true;
-}
-
-static
+ return true;
+}
+
+static
bool attemptRoseCastleMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a,
- RoseVertex b, bool trivialCasesOnly,
+ RoseVertex b, bool trivialCasesOnly,
RoseAliasingInfo &rai) {
RoseGraph &g = build.g;
- LeftEngInfo &a_left = g[a].left;
- LeftEngInfo &b_left = g[b].left;
- left_id a_left_id(a_left);
- left_id b_left_id(b_left);
- CastleProto &a_castle = *a_left_id.castle();
- CastleProto &b_castle = *b_left_id.castle();
-
- if (a_castle.reach() != b_castle.reach()) {
- DEBUG_PRINTF("different reach\n");
- return false;
- }
-
- DEBUG_PRINTF("a castle=%p, report=%u\n", &a_castle, a_left.leftfix_report);
- DEBUG_PRINTF("b castle=%p, report=%u\n", &b_castle, b_left.leftfix_report);
-
- if (&a_castle == &b_castle) {
- DEBUG_PRINTF("castles are the same\n");
+ LeftEngInfo &a_left = g[a].left;
+ LeftEngInfo &b_left = g[b].left;
+ left_id a_left_id(a_left);
+ left_id b_left_id(b_left);
+ CastleProto &a_castle = *a_left_id.castle();
+ CastleProto &b_castle = *b_left_id.castle();
+
+ if (a_castle.reach() != b_castle.reach()) {
+ DEBUG_PRINTF("different reach\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("a castle=%p, report=%u\n", &a_castle, a_left.leftfix_report);
+ DEBUG_PRINTF("b castle=%p, report=%u\n", &b_castle, b_left.leftfix_report);
+
+ if (&a_castle == &b_castle) {
+ DEBUG_PRINTF("castles are the same\n");
return mergeSameCastle(build, a, b, rai);
- }
-
- if (is_equal(a_castle, a_left.leftfix_report, b_castle,
- b_left.leftfix_report)) {
- DEBUG_PRINTF("castles are equiv with respect to reports\n");
+ }
+
+ if (is_equal(a_castle, a_left.leftfix_report, b_castle,
+ b_left.leftfix_report)) {
+ DEBUG_PRINTF("castles are equiv with respect to reports\n");
if (rai.rev_leftfix[a_left_id].size() == 1) {
- /* nobody else is using a_castle */
+ /* nobody else is using a_castle */
rai.rev_leftfix[b_left_id].erase(b);
rai.rev_leftfix[a_left_id].insert(b);
pruneUnusedTops(b_castle, g, rai.rev_leftfix[b_left_id]);
- b_left.castle = a_left.castle;
- b_left.leftfix_report = a_left.leftfix_report;
- DEBUG_PRINTF("OK -> only user of a_castle\n");
- return true;
- }
-
+ b_left.castle = a_left.castle;
+ b_left.leftfix_report = a_left.leftfix_report;
+ DEBUG_PRINTF("OK -> only user of a_castle\n");
+ return true;
+ }
+
if (rai.rev_leftfix[b_left_id].size() == 1) {
- /* nobody else is using b_castle */
+ /* nobody else is using b_castle */
rai.rev_leftfix[a_left_id].erase(a);
rai.rev_leftfix[b_left_id].insert(a);
pruneUnusedTops(a_castle, g, rai.rev_leftfix[a_left_id]);
- a_left.castle = b_left.castle;
- a_left.leftfix_report = b_left.leftfix_report;
- DEBUG_PRINTF("OK -> only user of b_castle\n");
- return true;
- }
-
- if (preds_same) {
- /* preds are the same anyway in diamond/left merges just need to
+ a_left.castle = b_left.castle;
+ a_left.leftfix_report = b_left.leftfix_report;
+ DEBUG_PRINTF("OK -> only user of b_castle\n");
+ return true;
+ }
+
+ if (preds_same) {
+ /* preds are the same anyway in diamond/left merges just need to
* check that all the literals in rev_leftfix[b_h] can handle a_h */
for (auto v : rai.rev_leftfix[b_left_id]) {
if (!mergeableRoseVertices(build, a, v)) {
- goto literal_mismatch_1;
- }
- }
-
+ goto literal_mismatch_1;
+ }
+ }
+
rai.rev_leftfix[a_left_id].erase(a);
rai.rev_leftfix[b_left_id].insert(a);
pruneUnusedTops(a_castle, g, rai.rev_leftfix[a_left_id]);
- a_left.castle = b_left.castle;
- a_left.leftfix_report = b_left.leftfix_report;
- DEBUG_PRINTF("OK -> same preds ???\n");
- return true;
- literal_mismatch_1:
- /* preds are the same anyway in diamond/left merges just need to
+ a_left.castle = b_left.castle;
+ a_left.leftfix_report = b_left.leftfix_report;
+ DEBUG_PRINTF("OK -> same preds ???\n");
+ return true;
+ literal_mismatch_1:
+ /* preds are the same anyway in diamond/left merges just need to
* check that all the literals in rev_leftfix[a_h] can handle b_h */
for (auto v : rai.rev_leftfix[a_left_id]) {
if (!mergeableRoseVertices(build, v, b)) {
- goto literal_mismatch_2;
- }
- }
-
+ goto literal_mismatch_2;
+ }
+ }
+
rai.rev_leftfix[b_left_id].erase(b);
rai.rev_leftfix[a_left_id].insert(b);
pruneUnusedTops(b_castle, g, rai.rev_leftfix[b_left_id]);
- b_left.castle = a_left.castle;
- b_left.leftfix_report = a_left.leftfix_report;
- DEBUG_PRINTF("OK -> same preds ???\n");
- return true;
- literal_mismatch_2:;
- }
- DEBUG_PRINTF("OK -> create new\n");
- /* we need to create a new graph as there may be other people
- * using b_left and it would be bad if a's preds started triggering it
- */
+ b_left.castle = a_left.castle;
+ b_left.leftfix_report = a_left.leftfix_report;
+ DEBUG_PRINTF("OK -> same preds ???\n");
+ return true;
+ literal_mismatch_2:;
+ }
+ DEBUG_PRINTF("OK -> create new\n");
+ /* we need to create a new graph as there may be other people
+ * using b_left and it would be bad if a's preds started triggering it
+ */
ReportID new_report = build.getNewNfaReport();
- shared_ptr<CastleProto> new_castle = make_shared<CastleProto>(a_castle);
- pruneCastle(*new_castle, a_left.leftfix_report);
- setReports(*new_castle, new_report);
-
+ shared_ptr<CastleProto> new_castle = make_shared<CastleProto>(a_castle);
+ pruneCastle(*new_castle, a_left.leftfix_report);
+ setReports(*new_castle, new_report);
+
rai.rev_leftfix[a_left_id].erase(a);
rai.rev_leftfix[b_left_id].erase(b);
pruneUnusedTops(*a_left.castle, g, rai.rev_leftfix[a_left_id]);
pruneUnusedTops(*b_left.castle, g, rai.rev_leftfix[b_left_id]);
-
- a_left.leftfix_report = new_report;
- b_left.leftfix_report = new_report;
- a_left.castle = new_castle;
- b_left.castle = new_castle;
-
- assert(a_left == b_left);
+
+ a_left.leftfix_report = new_report;
+ b_left.leftfix_report = new_report;
+ a_left.castle = new_castle;
+ b_left.castle = new_castle;
+
+ assert(a_left == b_left);
rai.rev_leftfix[a_left].insert(a);
rai.rev_leftfix[a_left].insert(b);
pruneUnusedTops(*new_castle, g, rai.rev_leftfix[a_left]);
- return true;
- }
-
- // Everything after this point requires more work, so we guard it with the
- // trivial cases argument..
- if (trivialCasesOnly) {
- return false;
- }
-
- // Only infixes. Prefixes require special care when doing non-trivial
- // merges.
+ return true;
+ }
+
+ // Everything after this point requires more work, so we guard it with the
+ // trivial cases argument..
+ if (trivialCasesOnly) {
+ return false;
+ }
+
+ // Only infixes. Prefixes require special care when doing non-trivial
+ // merges.
if (!build.isNonRootSuccessor(a) || !build.isNonRootSuccessor(b)) {
- return false;
- }
-
+ return false;
+ }
+
set<RoseVertex> &b_verts = rai.rev_leftfix[b_left_id];
- set<RoseVertex> aa;
- aa.insert(a);
-
+ set<RoseVertex> aa;
+ aa.insert(a);
+
if (!mergeableRoseVertices(build, aa, b_verts)) {
- DEBUG_PRINTF("vertices not mergeable\n");
- return false;
- }
-
+ DEBUG_PRINTF("vertices not mergeable\n");
+ return false;
+ }
+
if (!build.cc.grey.roseMultiTopRoses || !build.cc.grey.allowCastle) {
- return false;
- }
-
- DEBUG_PRINTF("merging into new castle\n");
-
- // Clone new castle with a's repeats in it, set to a new report.
+ return false;
+ }
+
+ DEBUG_PRINTF("merging into new castle\n");
+
+ // Clone new castle with a's repeats in it, set to a new report.
ReportID new_report = build.getNewNfaReport();
- shared_ptr<CastleProto> m_castle = make_shared<CastleProto>(a_castle);
- pruneCastle(*m_castle, a_left.leftfix_report);
- setReports(*m_castle, new_report);
-
- // Merge in the relevant repeats from b with the new report. Note that
- // we'll have to remap tops appropriately.
- map<u32, u32> b_top_map;
- for (const auto &e : in_edges_range(b, g)) {
- u32 top = g[e].rose_top;
- assert(contains(b_castle.repeats, top));
-
- PureRepeat pr = b_castle.repeats[top]; // mutable copy
- pr.reports.clear();
- pr.reports.insert(new_report);
-
- // We should be protected from merging common preds with tops leading
- // to completely different repeats by earlier checks, but just in
- // case...
+ shared_ptr<CastleProto> m_castle = make_shared<CastleProto>(a_castle);
+ pruneCastle(*m_castle, a_left.leftfix_report);
+ setReports(*m_castle, new_report);
+
+ // Merge in the relevant repeats from b with the new report. Note that
+ // we'll have to remap tops appropriately.
+ map<u32, u32> b_top_map;
+ for (const auto &e : in_edges_range(b, g)) {
+ u32 top = g[e].rose_top;
+ assert(contains(b_castle.repeats, top));
+
+ PureRepeat pr = b_castle.repeats[top]; // mutable copy
+ pr.reports.clear();
+ pr.reports.insert(new_report);
+
+ // We should be protected from merging common preds with tops leading
+ // to completely different repeats by earlier checks, but just in
+ // case...
if (RoseEdge a_edge = edge(source(e, g), a, g)) {
- u32 a_top = g[a_edge].rose_top;
- const PureRepeat &a_pr = m_castle->repeats[a_top]; // new report
- if (pr != a_pr) {
- DEBUG_PRINTF("merge failed, common pred with diff repeat\n");
- return false;
- }
- }
-
- u32 new_top = m_castle->merge(pr);
- if (new_top == CastleProto::max_occupancy) {
- DEBUG_PRINTF("merge failed\n");
- return false;
- }
- b_top_map[top] = new_top;
- }
-
- updateEdgeTops(g, b, b_top_map);
-
- DEBUG_PRINTF("merged into castle containing %zu repeats\n",
- m_castle->repeats.size());
-
+ u32 a_top = g[a_edge].rose_top;
+ const PureRepeat &a_pr = m_castle->repeats[a_top]; // new report
+ if (pr != a_pr) {
+ DEBUG_PRINTF("merge failed, common pred with diff repeat\n");
+ return false;
+ }
+ }
+
+ u32 new_top = m_castle->merge(pr);
+ if (new_top == CastleProto::max_occupancy) {
+ DEBUG_PRINTF("merge failed\n");
+ return false;
+ }
+ b_top_map[top] = new_top;
+ }
+
+ updateEdgeTops(g, b, b_top_map);
+
+ DEBUG_PRINTF("merged into castle containing %zu repeats\n",
+ m_castle->repeats.size());
+
rai.rev_leftfix[a_left_id].erase(a);
rai.rev_leftfix[b_left_id].erase(b);
pruneUnusedTops(*a_left.castle, g, rai.rev_leftfix[a_left_id]);
pruneUnusedTops(*b_left.castle, g, rai.rev_leftfix[b_left_id]);
-
- a_left.castle = m_castle;
- a_left.leftfix_report = new_report;
- b_left.castle = m_castle;
- b_left.leftfix_report = new_report;
-
- assert(a_left == b_left);
+
+ a_left.castle = m_castle;
+ a_left.leftfix_report = new_report;
+ b_left.castle = m_castle;
+ b_left.leftfix_report = new_report;
+
+ assert(a_left == b_left);
rai.rev_leftfix[a_left].insert(a);
rai.rev_leftfix[a_left].insert(b);
pruneUnusedTops(*m_castle, g, rai.rev_leftfix[a_left]);
- return true;
-}
-
-static
+ return true;
+}
+
+static
bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a,
- RoseVertex b, bool trivialCasesOnly,
+ RoseVertex b, bool trivialCasesOnly,
RoseAliasingInfo &rai) {
RoseGraph &g = build.g;
- LeftEngInfo &a_left = g[a].left;
- LeftEngInfo &b_left = g[b].left;
- left_id a_left_id(a_left);
- left_id b_left_id(b_left);
- shared_ptr<NGHolder> a_h = a_left.graph;
- shared_ptr<NGHolder> b_h = b_left.graph;
- assert(a_h && b_h);
+ LeftEngInfo &a_left = g[a].left;
+ LeftEngInfo &b_left = g[b].left;
+ left_id a_left_id(a_left);
+ left_id b_left_id(b_left);
+ shared_ptr<NGHolder> a_h = a_left.graph;
+ shared_ptr<NGHolder> b_h = b_left.graph;
+ assert(a_h && b_h);
assert(isImplementableNFA(*a_h, nullptr, build.cc));
assert(isImplementableNFA(*b_h, nullptr, build.cc));
-
- // If we only differ in reports, this is a very easy merge. Just use b's
- // report for both.
- /* Actually not so easy, there may be other poor suckers using a and/or b's
- * reports who will be surprised by this change */
- if (a_h == b_h) {
- DEBUG_PRINTF("OK -> same actual holder\n");
- ReportID a_oldreport = a_left.leftfix_report;
- ReportID b_oldreport = b_left.leftfix_report;
+
+ // If we only differ in reports, this is a very easy merge. Just use b's
+ // report for both.
+ /* Actually not so easy, there may be other poor suckers using a and/or b's
+ * reports who will be surprised by this change */
+ if (a_h == b_h) {
+ DEBUG_PRINTF("OK -> same actual holder\n");
+ ReportID a_oldreport = a_left.leftfix_report;
+ ReportID b_oldreport = b_left.leftfix_report;
ReportID new_report = build.getNewNfaReport();
- duplicateReport(*a_h, a_left.leftfix_report, new_report);
- duplicateReport(*b_h, b_left.leftfix_report, new_report);
- a_left.leftfix_report = new_report;
- b_left.leftfix_report = new_report;
+ duplicateReport(*a_h, a_left.leftfix_report, new_report);
+ duplicateReport(*b_h, b_left.leftfix_report, new_report);
+ a_left.leftfix_report = new_report;
+ b_left.leftfix_report = new_report;
pruneReportIfUnused(build, b_h, rai.rev_leftfix[b_left_id],
a_oldreport);
pruneReportIfUnused(build, b_h, rai.rev_leftfix[b_left_id],
b_oldreport);
pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]);
- assert(a_left == b_left);
- return true;
- }
-
- /* if it is the same graph, it is also fairly easy */
- if (is_equal(*a_h, a_left.leftfix_report, *b_h, b_left.leftfix_report)) {
+ assert(a_left == b_left);
+ return true;
+ }
+
+ /* if it is the same graph, it is also fairly easy */
+ if (is_equal(*a_h, a_left.leftfix_report, *b_h, b_left.leftfix_report)) {
if (rai.rev_leftfix[a_left_id].size() == 1) {
- /* nobody else is using a_h */
+ /* nobody else is using a_h */
rai.rev_leftfix[b_left_id].erase(b);
rai.rev_leftfix[a_left_id].insert(b);
- b_left.graph = a_h;
- b_left.leftfix_report = a_left.leftfix_report;
+ b_left.graph = a_h;
+ b_left.leftfix_report = a_left.leftfix_report;
pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]);
- DEBUG_PRINTF("OK -> only user of a_h\n");
- return true;
- }
-
+ DEBUG_PRINTF("OK -> only user of a_h\n");
+ return true;
+ }
+
if (rai.rev_leftfix[b_left_id].size() == 1) {
- /* nobody else is using b_h */
+ /* nobody else is using b_h */
rai.rev_leftfix[a_left_id].erase(a);
rai.rev_leftfix[b_left_id].insert(a);
- a_left.graph = b_h;
- a_left.leftfix_report = b_left.leftfix_report;
+ a_left.graph = b_h;
+ a_left.leftfix_report = b_left.leftfix_report;
pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]);
- DEBUG_PRINTF("OK -> only user of b_h\n");
- return true;
- }
-
- if (preds_same) {
- /* preds are the same anyway in diamond/left merges just need to
+ DEBUG_PRINTF("OK -> only user of b_h\n");
+ return true;
+ }
+
+ if (preds_same) {
+ /* preds are the same anyway in diamond/left merges just need to
* check that all the literals in rev_leftfix[b_h] can handle a_h */
for (auto v : rai.rev_leftfix[b_left_id]) {
if (!mergeableRoseVertices(build, a, v)) {
- goto literal_mismatch_1;
- }
- }
-
+ goto literal_mismatch_1;
+ }
+ }
+
rai.rev_leftfix[a_left_id].erase(a);
rai.rev_leftfix[b_left_id].insert(a);
- a_left.graph = b_h;
- a_left.leftfix_report = b_left.leftfix_report;
+ a_left.graph = b_h;
+ a_left.leftfix_report = b_left.leftfix_report;
pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]);
- DEBUG_PRINTF("OK -> same preds ???\n");
- return true;
- literal_mismatch_1:
- /* preds are the same anyway in diamond/left merges just need to
+ DEBUG_PRINTF("OK -> same preds ???\n");
+ return true;
+ literal_mismatch_1:
+ /* preds are the same anyway in diamond/left merges just need to
* check that all the literals in rev_leftfix[a_h] can handle b_h */
for (auto v : rai.rev_leftfix[a_left_id]) {
if (!mergeableRoseVertices(build, v, b)) {
- goto literal_mismatch_2;
- }
- }
-
+ goto literal_mismatch_2;
+ }
+ }
+
rai.rev_leftfix[b_left_id].erase(b);
rai.rev_leftfix[a_left_id].insert(b);
- b_left.graph = a_h;
- b_left.leftfix_report = a_left.leftfix_report;
+ b_left.graph = a_h;
+ b_left.leftfix_report = a_left.leftfix_report;
pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]);
- DEBUG_PRINTF("OK -> same preds ???\n");
- return true;
- literal_mismatch_2:;
- }
- DEBUG_PRINTF("OK -> create new\n");
- /* we need to create a new graph as there may be other people
- * using b_left and it would be bad if a's preds started triggering it
- */
+ DEBUG_PRINTF("OK -> same preds ???\n");
+ return true;
+ literal_mismatch_2:;
+ }
+ DEBUG_PRINTF("OK -> create new\n");
+ /* we need to create a new graph as there may be other people
+ * using b_left and it would be bad if a's preds started triggering it
+ */
ReportID new_report = build.getNewNfaReport();
- shared_ptr<NGHolder> new_graph = cloneHolder(*b_h);
- duplicateReport(*new_graph, b_left.leftfix_report, new_report);
+ shared_ptr<NGHolder> new_graph = cloneHolder(*b_h);
+ duplicateReport(*new_graph, b_left.leftfix_report, new_report);
pruneAllOtherReports(*new_graph, new_report);
-
+
if (!isImplementableNFA(*new_graph, nullptr, build.cc)) {
DEBUG_PRINTF("new graph not implementable\n");
return false;
}
-
+
rai.rev_leftfix[a_left_id].erase(a);
rai.rev_leftfix[b_left_id].erase(b);
pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]);
pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]);
- a_left.leftfix_report = new_report;
- b_left.leftfix_report = new_report;
- a_left.graph = new_graph;
- b_left.graph = new_graph;
-
+ a_left.leftfix_report = new_report;
+ b_left.leftfix_report = new_report;
+ a_left.graph = new_graph;
+ b_left.graph = new_graph;
+
rai.rev_leftfix[a_left].insert(a);
rai.rev_leftfix[a_left].insert(b);
pruneUnusedTops(*new_graph, g, rai.rev_leftfix[a_left]);
- return true;
- }
-
- // Everything after this point requires merging via the uncalc code, so we
- // guard it with the trivial cases arg.
- if (trivialCasesOnly) {
- return false;
- }
-
- // Only infixes. Prefixes require special care when doing non-trivial
- // merges.
+ return true;
+ }
+
+ // Everything after this point requires merging via the uncalc code, so we
+ // guard it with the trivial cases arg.
+ if (trivialCasesOnly) {
+ return false;
+ }
+
+ // Only infixes. Prefixes require special care when doing non-trivial
+ // merges.
if (!build.isNonRootSuccessor(a) || !build.isNonRootSuccessor(b)) {
- return false;
- }
-
- DEBUG_PRINTF("attempting merge of roses on vertices %zu and %zu\n",
+ return false;
+ }
+
+ DEBUG_PRINTF("attempting merge of roses on vertices %zu and %zu\n",
g[a].index, g[b].index);
-
+
set<RoseVertex> &b_verts = rai.rev_leftfix[b_left];
- set<RoseVertex> aa;
- aa.insert(a);
-
+ set<RoseVertex> aa;
+ aa.insert(a);
+
if (!mergeableRoseVertices(build, aa, b_verts)) {
- DEBUG_PRINTF("vertices not mergeable\n");
- return false;
- }
-
+ DEBUG_PRINTF("vertices not mergeable\n");
+ return false;
+ }
+
if (!build.cc.grey.roseMultiTopRoses) {
- return false;
- }
-
- // Clone a copy of a's NFA to operate on, and store a copy of its in-edge
- // properties.
-
- /* We need to allocate a new report id because */
- ReportID a_oldreport = a_left.leftfix_report;
- ReportID b_oldreport = b_left.leftfix_report;
+ return false;
+ }
+
+ // Clone a copy of a's NFA to operate on, and store a copy of its in-edge
+ // properties.
+
+ /* We need to allocate a new report id because */
+ ReportID a_oldreport = a_left.leftfix_report;
+ ReportID b_oldreport = b_left.leftfix_report;
ReportID new_report = build.getNewNfaReport();
- duplicateReport(*b_h, b_left.leftfix_report, new_report);
- b_left.leftfix_report = new_report;
+ duplicateReport(*b_h, b_left.leftfix_report, new_report);
+ b_left.leftfix_report = new_report;
pruneReportIfUnused(build, b_h, rai.rev_leftfix[b_left_id], b_oldreport);
-
- NGHolder victim;
- cloneHolder(victim, *a_h);
- duplicateReport(victim, a_left.leftfix_report, new_report);
- pruneAllOtherReports(victim, new_report);
-
- map<RoseVertex, RoseEdgeProps> a_props;
- for (const auto &e : in_edges_range(a, g)) {
- a_props[source(e, g)] = g[e];
- }
-
- DEBUG_PRINTF("victim %zu states\n", num_vertices(*a_h));
- DEBUG_PRINTF("winner %zu states\n", num_vertices(*b_h));
-
- if (!setDistinctRoseTops(g, victim, *b_h, deque<RoseVertex>(1, a))) {
+
+ NGHolder victim;
+ cloneHolder(victim, *a_h);
+ duplicateReport(victim, a_left.leftfix_report, new_report);
+ pruneAllOtherReports(victim, new_report);
+
+ map<RoseVertex, RoseEdgeProps> a_props;
+ for (const auto &e : in_edges_range(a, g)) {
+ a_props[source(e, g)] = g[e];
+ }
+
+ DEBUG_PRINTF("victim %zu states\n", num_vertices(*a_h));
+ DEBUG_PRINTF("winner %zu states\n", num_vertices(*b_h));
+
+ if (!setDistinctRoseTops(g, victim, *b_h, deque<RoseVertex>(1, a))) {
assert(roseHasTops(build, a));
assert(roseHasTops(build, b));
- return false;
- }
-
- assert(victim.kind == b_h->kind);
- assert(!generates_callbacks(*b_h));
-
+ return false;
+ }
+
+ assert(victim.kind == b_h->kind);
+ assert(!generates_callbacks(*b_h));
+
if (!mergeNfaPair(victim, *b_h, nullptr, build.cc)) {
- DEBUG_PRINTF("merge failed\n");
- // Restore in-edge properties.
- for (const auto &e : in_edges_range(a, g)) {
- g[e] = a_props[source(e, g)];
- }
+ DEBUG_PRINTF("merge failed\n");
+ // Restore in-edge properties.
+ for (const auto &e : in_edges_range(a, g)) {
+ g[e] = a_props[source(e, g)];
+ }
assert(roseHasTops(build, a));
assert(roseHasTops(build, b));
- return false;
- }
-
- DEBUG_PRINTF("merge succeeded -> %zu vertices\n", num_vertices(*b_h));
-
- // update A's rose data to point to the merged graph.
- a_left.graph = b_h;
- a_left.leftfix_report = new_report;
-
+ return false;
+ }
+
+ DEBUG_PRINTF("merge succeeded -> %zu vertices\n", num_vertices(*b_h));
+
+ // update A's rose data to point to the merged graph.
+ a_left.graph = b_h;
+ a_left.leftfix_report = new_report;
+
assert(contains(rai.rev_leftfix[a_left_id], a));
assert(contains(rai.rev_leftfix[b_left_id], b));
rai.rev_leftfix[a_left_id].erase(a);
rai.rev_leftfix[b_left_id].insert(a);
-
+
pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]);
pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]);
-
- // Prune A's report from its old prefix if it was only used by A.
+
+ // Prune A's report from its old prefix if it was only used by A.
pruneReportIfUnused(build, a_h, rai.rev_leftfix[a_left_id], a_oldreport);
-
+
reduceImplementableGraph(*b_h, SOM_NONE, nullptr, build.cc);
-
+
assert(roseHasTops(build, a));
assert(roseHasTops(build, b));
assert(isImplementableNFA(*b_h, nullptr, build.cc));
- return true;
-}
-
-// Called by the role aliasing pass: Attempt to merge rose a into b, updating
-// the two LeftEngInfo structures to be the same. Returns false if the merge
-// is not possible.
-static
+ return true;
+}
+
+// Called by the role aliasing pass: Attempt to merge rose a into b, updating
+// the two LeftEngInfo structures to be the same. Returns false if the merge
+// is not possible.
+static
bool attemptRoseMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a,
RoseVertex b, bool trivialCasesOnly,
RoseAliasingInfo &rai) {
- DEBUG_PRINTF("attempting rose merge, vertices a=%zu, b=%zu\n",
+ DEBUG_PRINTF("attempting rose merge, vertices a=%zu, b=%zu\n",
build.g[a].index, build.g[b].index);
- assert(a != b);
-
+ assert(a != b);
+
RoseGraph &g = build.g;
- LeftEngInfo &a_left = g[a].left;
- LeftEngInfo &b_left = g[b].left;
-
- // Trivial case.
- if (a_left == b_left) {
- DEBUG_PRINTF("roses are identical, no leftfix or already merged\n");
- return true;
- }
-
- const left_id a_left_id(a_left);
- const left_id b_left_id(b_left);
-
- /* Haig merges not supported at the moment */
- if (a_left.haig || b_left.haig) {
- return false;
- }
-
- /* dfa merges not supported at the moment (no multitop) */
- if (a_left.dfa || b_left.dfa) {
- return false;
- }
-
- // Only non-transients for the moment.
+ LeftEngInfo &a_left = g[a].left;
+ LeftEngInfo &b_left = g[b].left;
+
+ // Trivial case.
+ if (a_left == b_left) {
+ DEBUG_PRINTF("roses are identical, no leftfix or already merged\n");
+ return true;
+ }
+
+ const left_id a_left_id(a_left);
+ const left_id b_left_id(b_left);
+
+ /* Haig merges not supported at the moment */
+ if (a_left.haig || b_left.haig) {
+ return false;
+ }
+
+ /* dfa merges not supported at the moment (no multitop) */
+ if (a_left.dfa || b_left.dfa) {
+ return false;
+ }
+
+ // Only non-transients for the moment.
if (contains(build.transient, a_left_id) ||
contains(build.transient, b_left_id)) {
- return false;
- }
-
- /* It is not possible to merge roles with different lags as we can only
- * test the leftfix at one location relative to the literal match */
- if (a_left.lag != b_left.lag) {
- return false;
- }
-
+ return false;
+ }
+
+ /* It is not possible to merge roles with different lags as we can only
+ * test the leftfix at one location relative to the literal match */
+ if (a_left.lag != b_left.lag) {
+ return false;
+ }
+
assert(roseHasTops(build, a));
assert(roseHasTops(build, b));
-
- if (a_left_id.graph() && b_left_id.graph()) {
+
+ if (a_left_id.graph() && b_left_id.graph()) {
return attemptRoseGraphMerge(build, preds_same, a, b, trivialCasesOnly,
rai);
- }
-
- if (a_left_id.castle() && b_left_id.castle()) {
+ }
+
+ if (a_left_id.castle() && b_left_id.castle()) {
return attemptRoseCastleMerge(build, preds_same, a, b, trivialCasesOnly,
rai);
- }
-
- return false;
-}
-
+ }
+
+ return false;
+}
+
/**
* \brief Buckets that only contain one vertex are never going to lead to a
* merge.
*/
-static
+static
void removeSingletonBuckets(vector<vector<RoseVertex>> &buckets) {
auto it = remove_if(
begin(buckets), end(buckets),
@@ -1415,10 +1415,10 @@ void removeSingletonBuckets(vector<vector<RoseVertex>> &buckets) {
DEBUG_PRINTF("deleting %zu singleton buckets\n",
distance(it, end(buckets)));
buckets.erase(it, end(buckets));
- }
-}
-
-static
+ }
+}
+
+static
void buildInvBucketMap(const vector<vector<RoseVertex>> &buckets,
unordered_map<RoseVertex, size_t> &inv) {
inv.clear();
@@ -1429,7 +1429,7 @@ void buildInvBucketMap(const vector<vector<RoseVertex>> &buckets,
}
}
}
-
+
/**
* \brief Generic splitter that will use the given split function to partition
* the vector of buckets, then remove buckets with <= 1 entry.
@@ -1441,35 +1441,35 @@ void splitAndFilterBuckets(vector<vector<RoseVertex>> &buckets,
return;
}
- vector<vector<RoseVertex>> out;
-
+ vector<vector<RoseVertex>> out;
+
// Mapping from split key value to new bucket index.
using key_type = decltype(make_split_key(RoseGraph::null_vertex()));
unordered_map<key_type, size_t> dest_map;
dest_map.reserve(buckets.front().size());
- for (const auto &bucket : buckets) {
- assert(!bucket.empty());
+ for (const auto &bucket : buckets) {
+ assert(!bucket.empty());
dest_map.clear();
- for (RoseVertex v : bucket) {
+ for (RoseVertex v : bucket) {
auto p = dest_map.emplace(make_split_key(v), out.size());
if (p.second) { // New key, add a bucket.
out.emplace_back();
- }
+ }
auto out_bucket = p.first->second;
- out[out_bucket].push_back(v);
- }
- }
-
+ out[out_bucket].push_back(v);
+ }
+ }
+
if (out.size() == buckets.size()) {
return; // No new buckets created.
}
buckets = std::move(out);
removeSingletonBuckets(buckets);
-}
-
-static
+}
+
+static
void splitByReportSuffixBehaviour(const RoseGraph &g,
vector<vector<RoseVertex>> &buckets) {
// Split by report set and suffix info.
@@ -1495,74 +1495,74 @@ void splitByLiteralTable(const RoseBuildImpl &build,
}
static
-void splitByNeighbour(const RoseGraph &g, vector<vector<RoseVertex>> &buckets,
+void splitByNeighbour(const RoseGraph &g, vector<vector<RoseVertex>> &buckets,
unordered_map<RoseVertex, size_t> &inv, bool succ) {
- vector<vector<RoseVertex>> extras;
- map<size_t, vector<RoseVertex>> neighbours_by_bucket;
- set<RoseVertex> picked;
- vector<RoseVertex> leftovers;
-
- for (RoseVertex u : vertices_range(g)) {
- /* once split by v, stays split. also keeps iterator in buckets valid */
- extras.clear();
- neighbours_by_bucket.clear();
- if (succ) {
- /* forward pass */
- for (RoseVertex v : adjacent_vertices_range(u, g)) {
- auto it = inv.find(v);
- if (it != end(inv)) {
- neighbours_by_bucket[it->second].push_back(v);
- }
- }
- } else {
- /* backward pass */
- for (RoseVertex v : inv_adjacent_vertices_range(u, g)) {
- auto it = inv.find(v);
- if (it != end(inv)) {
- neighbours_by_bucket[it->second].push_back(v);
- }
- }
- }
- for (const auto &e : neighbours_by_bucket) {
- size_t old_key = e.first;
- if (buckets[old_key].size() == e.second.size()) {
- /* did not split */
- continue;
- }
- assert(!e.second.empty());
-
- picked.clear();
- picked.insert(begin(e.second), end(e.second));
-
- size_t new_key = buckets.size() + extras.size();
- leftovers.clear();
- for (RoseVertex v : buckets[old_key]) {
- if (contains(picked, v)) {
- inv[v] = new_key;
- } else {
- leftovers.push_back(v);
- }
- }
-
- assert(!leftovers.empty());
- assert(e.second.size() + leftovers.size()
- == buckets[old_key].size());
- extras.push_back(e.second);
- buckets[old_key].swap(leftovers);
- }
- insert(&buckets, buckets.end(), extras);
- }
+ vector<vector<RoseVertex>> extras;
+ map<size_t, vector<RoseVertex>> neighbours_by_bucket;
+ set<RoseVertex> picked;
+ vector<RoseVertex> leftovers;
+
+ for (RoseVertex u : vertices_range(g)) {
+ /* once split by v, stays split. also keeps iterator in buckets valid */
+ extras.clear();
+ neighbours_by_bucket.clear();
+ if (succ) {
+ /* forward pass */
+ for (RoseVertex v : adjacent_vertices_range(u, g)) {
+ auto it = inv.find(v);
+ if (it != end(inv)) {
+ neighbours_by_bucket[it->second].push_back(v);
+ }
+ }
+ } else {
+ /* backward pass */
+ for (RoseVertex v : inv_adjacent_vertices_range(u, g)) {
+ auto it = inv.find(v);
+ if (it != end(inv)) {
+ neighbours_by_bucket[it->second].push_back(v);
+ }
+ }
+ }
+ for (const auto &e : neighbours_by_bucket) {
+ size_t old_key = e.first;
+ if (buckets[old_key].size() == e.second.size()) {
+ /* did not split */
+ continue;
+ }
+ assert(!e.second.empty());
+
+ picked.clear();
+ picked.insert(begin(e.second), end(e.second));
+
+ size_t new_key = buckets.size() + extras.size();
+ leftovers.clear();
+ for (RoseVertex v : buckets[old_key]) {
+ if (contains(picked, v)) {
+ inv[v] = new_key;
+ } else {
+ leftovers.push_back(v);
+ }
+ }
+
+ assert(!leftovers.empty());
+ assert(e.second.size() + leftovers.size()
+ == buckets[old_key].size());
+ extras.push_back(e.second);
+ buckets[old_key].swap(leftovers);
+ }
+ insert(&buckets, buckets.end(), extras);
+ }
removeSingletonBuckets(buckets);
buildInvBucketMap(buckets, inv);
-}
-
-static
+}
+
+static
vector<vector<RoseVertex>>
splitDiamondMergeBuckets(CandidateSet &candidates, const RoseBuildImpl &build) {
- const RoseGraph &g = build.g;
-
- vector<vector<RoseVertex>> buckets(1);
+ const RoseGraph &g = build.g;
+
+ vector<vector<RoseVertex>> buckets(1);
buckets[0].reserve(candidates.size());
insert(&buckets[0], buckets[0].end(), candidates);
@@ -1572,8 +1572,8 @@ splitDiamondMergeBuckets(CandidateSet &candidates, const RoseBuildImpl &build) {
DEBUG_PRINTF("split by report/suffix, %zu buckets\n", buckets.size());
if (buckets.empty()) {
return buckets;
- }
-
+ }
+
splitByLiteralTable(build, buckets);
DEBUG_PRINTF("split by lit table, %zu buckets\n", buckets.size());
if (buckets.empty()) {
@@ -1584,130 +1584,130 @@ splitDiamondMergeBuckets(CandidateSet &candidates, const RoseBuildImpl &build) {
unordered_map<RoseVertex, size_t> inv;
buildInvBucketMap(buckets, inv);
- splitByNeighbour(g, buckets, inv, true);
+ splitByNeighbour(g, buckets, inv, true);
DEBUG_PRINTF("split by successor, %zu buckets\n", buckets.size());
if (buckets.empty()) {
return buckets;
}
- splitByNeighbour(g, buckets, inv, false);
+ splitByNeighbour(g, buckets, inv, false);
DEBUG_PRINTF("split by predecessor, %zu buckets\n", buckets.size());
-
- return buckets;
-}
-static never_inline
+ return buckets;
+}
+
+static never_inline
void diamondMergePass(CandidateSet &candidates, RoseBuildImpl &build,
- vector<RoseVertex> *dead, bool mergeRoses,
+ vector<RoseVertex> *dead, bool mergeRoses,
RoseAliasingInfo &rai) {
- DEBUG_PRINTF("begin\n");
+ DEBUG_PRINTF("begin\n");
RoseGraph &g = build.g;
-
- if (candidates.empty()) {
- return;
- }
-
- /* Vertices may only be diamond merged with others in the same bucket */
+
+ if (candidates.empty()) {
+ return;
+ }
+
+ /* Vertices may only be diamond merged with others in the same bucket */
auto cand_buckets = splitDiamondMergeBuckets(candidates, build);
-
- for (const vector<RoseVertex> &siblings : cand_buckets) {
- for (auto it = siblings.begin(); it != siblings.end();) {
- RoseVertex a = *it;
- ++it;
-
- assert(contains(candidates, a));
-
+
+ for (const vector<RoseVertex> &siblings : cand_buckets) {
+ for (auto it = siblings.begin(); it != siblings.end();) {
+ RoseVertex a = *it;
+ ++it;
+
+ assert(contains(candidates, a));
+
DEBUG_PRINTF("trying to merge %zu into somebody\n", g[a].index);
- for (auto jt = it; jt != siblings.end(); ++jt) {
- RoseVertex b = *jt;
- assert(contains(candidates, b));
-
+ for (auto jt = it; jt != siblings.end(); ++jt) {
+ RoseVertex b = *jt;
+ assert(contains(candidates, b));
+
if (!sameRoleProperties(build, rai, a, b)) {
- DEBUG_PRINTF("diff role prop\n");
- continue;
- }
-
- // Check "diamond" requirements: must have same right side
- // (successors, reports) and left side (predecessors).
- /* Note: bucketing does not check edge properties (bounds, tops)
- * so we still have to checks successors and predecessors. */
-
- if (!sameSuccessors(a, b, g)
+ DEBUG_PRINTF("diff role prop\n");
+ continue;
+ }
+
+ // Check "diamond" requirements: must have same right side
+ // (successors, reports) and left side (predecessors).
+ /* Note: bucketing does not check edge properties (bounds, tops)
+ * so we still have to checks successors and predecessors. */
+
+ if (!sameSuccessors(a, b, g)
|| !sameRightRoleProperties(build, a, b)
- || !samePredecessors(a, b, g)) {
- DEBUG_PRINTF("not diamond\n");
- continue;
- }
-
+ || !samePredecessors(a, b, g)) {
+ DEBUG_PRINTF("not diamond\n");
+ continue;
+ }
+
if (!canMergeLiterals(a, b, build)) {
- DEBUG_PRINTF("incompatible lits\n");
- continue;
- }
-
+ DEBUG_PRINTF("incompatible lits\n");
+ continue;
+ }
+
if (!attemptRoseMerge(build, true, a, b, !mergeRoses, rai)) {
- DEBUG_PRINTF("rose fail\n");
- continue;
- }
-
+ DEBUG_PRINTF("rose fail\n");
+ continue;
+ }
+
mergeVerticesDiamond(a, b, build, rai);
- dead->push_back(a);
- candidates.erase(a);
- break; // next a
- }
- }
- }
-
- DEBUG_PRINTF("%zu candidates remaining\n", candidates.size());
-}
-
-static
-vector<RoseVertex>::iterator findLeftMergeSibling(
- vector<RoseVertex>::iterator it,
- const vector<RoseVertex>::iterator &end,
- const RoseVertex a, const RoseBuildImpl &build,
+ dead->push_back(a);
+ candidates.erase(a);
+ break; // next a
+ }
+ }
+ }
+
+ DEBUG_PRINTF("%zu candidates remaining\n", candidates.size());
+}
+
+static
+vector<RoseVertex>::iterator findLeftMergeSibling(
+ vector<RoseVertex>::iterator it,
+ const vector<RoseVertex>::iterator &end,
+ const RoseVertex a, const RoseBuildImpl &build,
const RoseAliasingInfo &rai,
- const CandidateSet &candidates) {
- const RoseGraph &g = build.g;
-
- for (; it != end; ++it) {
- RoseVertex b = *it;
- if (a == b) {
- continue;
- }
-
- if (!contains(candidates, b)) {
- continue;
- }
-
+ const CandidateSet &candidates) {
+ const RoseGraph &g = build.g;
+
+ for (; it != end; ++it) {
+ RoseVertex b = *it;
+ if (a == b) {
+ continue;
+ }
+
+ if (!contains(candidates, b)) {
+ continue;
+ }
+
if (!sameRoleProperties(build, rai, a, b)) {
- continue;
- }
-
- // Check left-equivalence: must have same predecessors and same
- // literals.
-
- if (g[a].literals != g[b].literals) {
- continue;
- }
-
- if (!samePredecessors(a, b, g)) {
- continue;
- }
-
- if (hasCommonSuccWithBadBounds(a, b, g)) {
- continue;
- }
-
- if (g[a].suffix && g[b].suffix && g[a].suffix != g[b].suffix) {
- continue; /* we can only trigger one suffix */
- }
-
- return it;
- }
-
- return end;
-}
-
+ continue;
+ }
+
+ // Check left-equivalence: must have same predecessors and same
+ // literals.
+
+ if (g[a].literals != g[b].literals) {
+ continue;
+ }
+
+ if (!samePredecessors(a, b, g)) {
+ continue;
+ }
+
+ if (hasCommonSuccWithBadBounds(a, b, g)) {
+ continue;
+ }
+
+ if (g[a].suffix && g[b].suffix && g[a].suffix != g[b].suffix) {
+ continue; /* we can only trigger one suffix */
+ }
+
+ return it;
+ }
+
+ return end;
+}
+
static
void getLeftMergeSiblings(const RoseBuildImpl &build, RoseVertex a,
vector<RoseVertex> &siblings) {
@@ -1734,20 +1734,20 @@ void getLeftMergeSiblings(const RoseBuildImpl &build, RoseVertex a,
}
}
-static never_inline
+static never_inline
void leftMergePass(CandidateSet &candidates, RoseBuildImpl &build,
vector<RoseVertex> *dead, RoseAliasingInfo &rai) {
- DEBUG_PRINTF("begin (%zu)\n", candidates.size());
- vector<RoseVertex> siblings;
-
+ DEBUG_PRINTF("begin (%zu)\n", candidates.size());
+ vector<RoseVertex> siblings;
+
auto it = candidates.begin();
- while (it != candidates.end()) {
- RoseVertex a = *it;
- CandidateSet::iterator ait = it;
- ++it;
-
+ while (it != candidates.end()) {
+ RoseVertex a = *it;
+ CandidateSet::iterator ait = it;
+ ++it;
+
getLeftMergeSiblings(build, a, siblings);
-
+
auto jt = siblings.begin();
while (jt != siblings.end()) {
jt = findLeftMergeSibling(jt, siblings.end(), a, build, rai,
@@ -1763,98 +1763,98 @@ void leftMergePass(CandidateSet &candidates, RoseBuildImpl &build,
break; // consider next a
}
++jt;
- }
- }
-
- DEBUG_PRINTF("%zu candidates remaining\n", candidates.size());
+ }
+ }
+
+ DEBUG_PRINTF("%zu candidates remaining\n", candidates.size());
assert(!hasOrphanedTops(build));
-}
-
-// Can't merge vertices with different root predecessors.
-static
-bool safeRootPreds(RoseVertex a, RoseVertex b, const RoseGraph &g) {
- set<RoseVertex> a_roots, b_roots;
-
- for (auto u : inv_adjacent_vertices_range(a, g)) {
+}
+
+// Can't merge vertices with different root predecessors.
+static
+bool safeRootPreds(RoseVertex a, RoseVertex b, const RoseGraph &g) {
+ set<RoseVertex> a_roots, b_roots;
+
+ for (auto u : inv_adjacent_vertices_range(a, g)) {
if (!in_degree(u, g)) {
- a_roots.insert(u);
- }
- }
- for (auto u : inv_adjacent_vertices_range(b, g)) {
+ a_roots.insert(u);
+ }
+ }
+ for (auto u : inv_adjacent_vertices_range(b, g)) {
if (!in_degree(u, g)) {
- b_roots.insert(u);
- }
- }
-
- assert(a_roots.size() <= 1);
- assert(b_roots.size() <= 1);
-
- return a_roots == b_roots;
-}
-
-static never_inline
-vector<RoseVertex>::const_iterator findRightMergeSibling(
- vector<RoseVertex>::const_iterator it,
- const vector<RoseVertex>::const_iterator &end,
- const RoseVertex a, const RoseBuildImpl &build,
+ b_roots.insert(u);
+ }
+ }
+
+ assert(a_roots.size() <= 1);
+ assert(b_roots.size() <= 1);
+
+ return a_roots == b_roots;
+}
+
+static never_inline
+vector<RoseVertex>::const_iterator findRightMergeSibling(
+ vector<RoseVertex>::const_iterator it,
+ const vector<RoseVertex>::const_iterator &end,
+ const RoseVertex a, const RoseBuildImpl &build,
const RoseAliasingInfo &rai,
- const CandidateSet &candidates) {
- const RoseGraph &g = build.g;
-
- for (; it != end; ++it) {
- RoseVertex b = *it;
- if (a == b) {
- continue;
- }
-
- if (!contains(candidates, b)) {
- continue;
- }
-
+ const CandidateSet &candidates) {
+ const RoseGraph &g = build.g;
+
+ for (; it != end; ++it) {
+ RoseVertex b = *it;
+ if (a == b) {
+ continue;
+ }
+
+ if (!contains(candidates, b)) {
+ continue;
+ }
+
if (!sameRoleProperties(build, rai, a, b)) {
- continue;
- }
-
- // Check right-equivalence: must have same successors, reports and same
- // literals.
-
- if (g[a].literals != g[b].literals) {
- continue;
- }
-
- if (!sameSuccessors(a, b, g)
- || !sameRightRoleProperties(build, a, b)) {
- continue;
- }
-
- // An extra wrinkle: we cannot merge two vertices that are root
- // successors if their preds are different. (e.g. one is anchored and
- // one is not)
- if (!safeRootPreds(a, b, g)) {
- continue;
- }
-
- if (hasCommonPredWithBadBounds(a, b, g)) {
- continue;
- }
-
- if (hasCommonPredWithDiffRoses(a, b, g)) {
- continue;
- }
-
- return it;
- }
-
- return end;
-}
-
-static
+ continue;
+ }
+
+ // Check right-equivalence: must have same successors, reports and same
+ // literals.
+
+ if (g[a].literals != g[b].literals) {
+ continue;
+ }
+
+ if (!sameSuccessors(a, b, g)
+ || !sameRightRoleProperties(build, a, b)) {
+ continue;
+ }
+
+ // An extra wrinkle: we cannot merge two vertices that are root
+ // successors if their preds are different. (e.g. one is anchored and
+ // one is not)
+ if (!safeRootPreds(a, b, g)) {
+ continue;
+ }
+
+ if (hasCommonPredWithBadBounds(a, b, g)) {
+ continue;
+ }
+
+ if (hasCommonPredWithDiffRoses(a, b, g)) {
+ continue;
+ }
+
+ return it;
+ }
+
+ return end;
+}
+
+static
void splitByRightProps(const RoseGraph &g,
vector<vector<RoseVertex>> &buckets) {
// Successor vector used in make_split_key. We declare it here so we can
// reuse storage.
vector<RoseVertex> succ;
-
+
// Split by {successors, literals, reports}.
auto make_split_key = [&](RoseVertex v) {
succ.clear();
@@ -1863,48 +1863,48 @@ void splitByRightProps(const RoseGraph &g,
return hash_all(g[v].literals, g[v].reports, succ);
};
splitAndFilterBuckets(buckets, make_split_key);
-}
-
-static never_inline
+}
+
+static never_inline
vector<vector<RoseVertex>>
splitRightMergeBuckets(const CandidateSet &candidates,
const RoseBuildImpl &build) {
const RoseGraph &g = build.g;
-
+
vector<vector<RoseVertex>> buckets(1);
buckets[0].reserve(candidates.size());
insert(&buckets[0], buckets[0].end(), candidates);
-
+
DEBUG_PRINTF("at start, %zu candidates in 1 bucket\n", candidates.size());
-
+
splitByReportSuffixBehaviour(g, buckets);
DEBUG_PRINTF("split by report/suffix, %zu buckets\n", buckets.size());
if (buckets.empty()) {
return buckets;
- }
-
+ }
+
splitByRightProps(g, buckets);
DEBUG_PRINTF("split by right-merge properties, %zu buckets\n",
buckets.size());
if (buckets.empty()) {
return buckets;
- }
-
+ }
+
return buckets;
-}
-
-static never_inline
+}
+
+static never_inline
void rightMergePass(CandidateSet &candidates, RoseBuildImpl &build,
- vector<RoseVertex> *dead, bool mergeRoses,
+ vector<RoseVertex> *dead, bool mergeRoses,
RoseAliasingInfo &rai) {
- DEBUG_PRINTF("begin\n");
-
+ DEBUG_PRINTF("begin\n");
+
if (candidates.empty()) {
return;
}
-
+
auto buckets = splitRightMergeBuckets(candidates, build);
-
+
for (const auto &bucket : buckets) {
assert(!bucket.empty());
for (auto it = bucket.begin(); it != bucket.end(); it++) {
@@ -1922,116 +1922,116 @@ void rightMergePass(CandidateSet &candidates, RoseBuildImpl &build,
candidates.erase(a);
break; // consider next a
}
- }
- }
- }
-
- DEBUG_PRINTF("%zu candidates remaining\n", candidates.size());
+ }
+ }
+ }
+
+ DEBUG_PRINTF("%zu candidates remaining\n", candidates.size());
assert(!hasOrphanedTops(build));
-}
-
-/**
- * \brief True if the given vertex has no siblings for the purposes of a
- * diamond merge.
- *
- * This is the case if it has no successors with more than one predecessor
- * (itself), or no predecessors with more than one successor (itself).
- */
-static
-bool hasNoDiamondSiblings(const RoseGraph &g, RoseVertex v) {
- if (has_successor(v, g)) {
- bool only_succ = true;
- for (const auto &w : adjacent_vertices_range(v, g)) {
+}
+
+/**
+ * \brief True if the given vertex has no siblings for the purposes of a
+ * diamond merge.
+ *
+ * This is the case if it has no successors with more than one predecessor
+ * (itself), or no predecessors with more than one successor (itself).
+ */
+static
+bool hasNoDiamondSiblings(const RoseGraph &g, RoseVertex v) {
+ if (has_successor(v, g)) {
+ bool only_succ = true;
+ for (const auto &w : adjacent_vertices_range(v, g)) {
if (in_degree(w, g) > 1) {
- only_succ = false;
- break;
- }
- }
- if (only_succ) {
- return true;
- }
- }
-
- // Any candidate vertex will have a predecessor; the only vertices without
- // preds are the root vertices.
- assert(in_edges(v, g).first != in_edges(v, g).second);
-
- bool only_pred = true;
- for (const auto &u : inv_adjacent_vertices_range(v, g)) {
+ only_succ = false;
+ break;
+ }
+ }
+ if (only_succ) {
+ return true;
+ }
+ }
+
+ // Any candidate vertex will have a predecessor; the only vertices without
+ // preds are the root vertices.
+ assert(in_edges(v, g).first != in_edges(v, g).second);
+
+ bool only_pred = true;
+ for (const auto &u : inv_adjacent_vertices_range(v, g)) {
if (out_degree(u, g) > 1) {
- only_pred = false;
- break;
- }
- }
-
- return only_pred;
-}
-
-/**
- * \brief Filter out some merge candidates that are not mergeable by a diamond
- * merge.
- */
-static
-void filterDiamondCandidates(RoseGraph &g, CandidateSet &candidates) {
- DEBUG_PRINTF("%zu candidates enter\n", candidates.size());
-
- vector<RoseVertex> dead;
- for (const auto &v : candidates) {
- if (hasNoDiamondSiblings(g, v)) {
- dead.push_back(v);
- }
- }
-
- for (const auto &v : dead) {
- candidates.erase(v);
- }
-
- DEBUG_PRINTF("pruned %zu candidates, leaving %zu\n", dead.size(),
- candidates.size());
-}
-
-void aliasRoles(RoseBuildImpl &build, bool mergeRoses) {
- const CompileContext &cc = build.cc;
- RoseGraph &g = build.g;
+ only_pred = false;
+ break;
+ }
+ }
+
+ return only_pred;
+}
+
+/**
+ * \brief Filter out some merge candidates that are not mergeable by a diamond
+ * merge.
+ */
+static
+void filterDiamondCandidates(RoseGraph &g, CandidateSet &candidates) {
+ DEBUG_PRINTF("%zu candidates enter\n", candidates.size());
+
+ vector<RoseVertex> dead;
+ for (const auto &v : candidates) {
+ if (hasNoDiamondSiblings(g, v)) {
+ dead.push_back(v);
+ }
+ }
+
+ for (const auto &v : dead) {
+ candidates.erase(v);
+ }
+
+ DEBUG_PRINTF("pruned %zu candidates, leaving %zu\n", dead.size(),
+ candidates.size());
+}
+
+void aliasRoles(RoseBuildImpl &build, bool mergeRoses) {
+ const CompileContext &cc = build.cc;
+ RoseGraph &g = build.g;
assert(!hasOrphanedTops(build));
assert(canImplementGraphs(build));
-
- if (!cc.grey.roseRoleAliasing || !cc.grey.roseGraphReduction) {
- return;
- }
-
- DEBUG_PRINTF("doing role aliasing mr=%d\n", (int)mergeRoses);
-
+
+ if (!cc.grey.roseRoleAliasing || !cc.grey.roseGraphReduction) {
+ return;
+ }
+
+ DEBUG_PRINTF("doing role aliasing mr=%d\n", (int)mergeRoses);
+
RoseAliasingInfo rai(build);
- mergeRoses &= cc.grey.mergeRose & cc.grey.roseMergeRosesDuringAliasing;
-
+ mergeRoses &= cc.grey.mergeRose & cc.grey.roseMergeRosesDuringAliasing;
+
CandidateSet candidates;
- findCandidates(build, &candidates);
-
- DEBUG_PRINTF("candidates %zu\n", candidates.size());
-
- vector<RoseVertex> dead;
- size_t old_dead_size = 0;
- do {
- old_dead_size = dead.size();
+ findCandidates(build, &candidates);
+
+ DEBUG_PRINTF("candidates %zu\n", candidates.size());
+
+ vector<RoseVertex> dead;
+ size_t old_dead_size = 0;
+ do {
+ old_dead_size = dead.size();
leftMergePass(candidates, build, &dead, rai);
rightMergePass(candidates, build, &dead, mergeRoses, rai);
- } while (old_dead_size != dead.size());
-
- /* Diamond merge passes cannot create extra merges as they require the same
- * succ and preds before merging --> that if a succ/pred was ineligible due
- * to a merge to different pred/succ before a diamond merge, it will still
- * be afterwards. */
- filterDiamondCandidates(g, candidates);
+ } while (old_dead_size != dead.size());
+
+ /* Diamond merge passes cannot create extra merges as they require the same
+ * succ and preds before merging --> that if a succ/pred was ineligible due
+ * to a merge to different pred/succ before a diamond merge, it will still
+ * be afterwards. */
+ filterDiamondCandidates(g, candidates);
diamondMergePass(candidates, build, &dead, mergeRoses, rai);
-
- DEBUG_PRINTF("killed %zu vertices\n", dead.size());
- build.removeVertices(dead);
+
+ DEBUG_PRINTF("killed %zu vertices\n", dead.size());
+ build.removeVertices(dead);
assert(!hasOrphanedTops(build));
assert(canImplementGraphs(build));
-}
-
+}
+
namespace {
struct DupeLeafKey {
explicit DupeLeafKey(const RoseVertexProps &litv)
@@ -2332,4 +2332,4 @@ void uncalcLeaves(RoseBuildImpl &build) {
build.removeVertices(dead);
}
-} // namespace ue2
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.h b/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.h
index 33f0bf2ddd..4655f10d52 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.h
@@ -1,48 +1,48 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
#ifndef ROSE_BUILD_ROLE_ALIASING_H
#define ROSE_BUILD_ROLE_ALIASING_H
-
+
/** \file
* \brief Rose Build: functions for reducing the size of the Rose graph
* through merging roles (RoseVertices) together.
*/
-namespace ue2 {
-
-class RoseBuildImpl;
-
-void aliasRoles(RoseBuildImpl &build, bool mergeRoses);
-
+namespace ue2 {
+
+class RoseBuildImpl;
+
+void aliasRoles(RoseBuildImpl &build, bool mergeRoses);
+
void mergeDupeLeaves(RoseBuildImpl &build);
void uncalcLeaves(RoseBuildImpl &build);
-} // namespace ue2
-
-#endif
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_scatter.cpp b/contrib/libs/hyperscan/src/rose/rose_build_scatter.cpp
index 35d66df9d2..87085ae9a8 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_scatter.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_scatter.cpp
@@ -1,131 +1,131 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_scatter.h"
-#include "ue2common.h"
-#include "util/container.h"
-#include "util/multibit_build.h"
-
-#include <cstring> // memset
-#include <set>
-
-using namespace std;
-
-namespace ue2 {
-
-template<typename T>
-static
-void rebase(vector<T> *p, u32 adj) {
- for (typename vector<T>::iterator it = p->begin(); it != p->end(); ++it) {
- DEBUG_PRINTF("=%u+%u\n", it->offset, adj);
- it->offset += adj;
- }
-}
-
-static
-void rebase(scatter_plan_raw *raw, u32 adj) {
- rebase(&raw->p_u64a, adj);
- rebase(&raw->p_u32, adj);
- rebase(&raw->p_u16, adj);
- rebase(&raw->p_u8, adj);
-}
-
-static
-void merge_in(scatter_plan_raw *out, const scatter_plan_raw &in) {
- insert(&out->p_u64a, out->p_u64a.end(), in.p_u64a);
- insert(&out->p_u32, out->p_u32.end(), in.p_u32);
- insert(&out->p_u16, out->p_u16.end(), in.p_u16);
- insert(&out->p_u8, out->p_u8.end(), in.p_u8);
-}
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_scatter.h"
+#include "ue2common.h"
+#include "util/container.h"
+#include "util/multibit_build.h"
+
+#include <cstring> // memset
+#include <set>
+
+using namespace std;
+
+namespace ue2 {
+
+template<typename T>
+static
+void rebase(vector<T> *p, u32 adj) {
+ for (typename vector<T>::iterator it = p->begin(); it != p->end(); ++it) {
+ DEBUG_PRINTF("=%u+%u\n", it->offset, adj);
+ it->offset += adj;
+ }
+}
+
+static
+void rebase(scatter_plan_raw *raw, u32 adj) {
+ rebase(&raw->p_u64a, adj);
+ rebase(&raw->p_u32, adj);
+ rebase(&raw->p_u16, adj);
+ rebase(&raw->p_u8, adj);
+}
+
+static
+void merge_in(scatter_plan_raw *out, const scatter_plan_raw &in) {
+ insert(&out->p_u64a, out->p_u64a.end(), in.p_u64a);
+ insert(&out->p_u32, out->p_u32.end(), in.p_u32);
+ insert(&out->p_u16, out->p_u16.end(), in.p_u16);
+ insert(&out->p_u8, out->p_u8.end(), in.p_u8);
+}
+
scatter_plan_raw buildStateScatterPlan(u32 role_state_offset,
u32 role_state_count, u32 left_array_count, u32 left_prefix_count,
const RoseStateOffsets &stateOffsets, bool streaming,
u32 leaf_array_count, u32 outfix_begin, u32 outfix_end) {
scatter_plan_raw out;
- /* init role array */
- scatter_plan_raw spr_role;
- mmbBuildClearPlan(role_state_count, &spr_role);
- rebase(&spr_role, role_state_offset);
+ /* init role array */
+ scatter_plan_raw spr_role;
+ mmbBuildClearPlan(role_state_count, &spr_role);
+ rebase(&spr_role, role_state_offset);
merge_in(&out, spr_role);
-
- /* init rose array: turn on prefixes */
- u32 rose_array_offset = stateOffsets.activeLeftArray;
- scatter_plan_raw spr_rose;
- mmbBuildInitRangePlan(left_array_count, 0, left_prefix_count, &spr_rose);
- rebase(&spr_rose, rose_array_offset);
+
+ /* init rose array: turn on prefixes */
+ u32 rose_array_offset = stateOffsets.activeLeftArray;
+ scatter_plan_raw spr_rose;
+ mmbBuildInitRangePlan(left_array_count, 0, left_prefix_count, &spr_rose);
+ rebase(&spr_rose, rose_array_offset);
merge_in(&out, spr_rose);
-
- /* suffix/outfix array */
- scatter_plan_raw spr_leaf;
- if (streaming) {
- mmbBuildInitRangePlan(leaf_array_count, outfix_begin, outfix_end,
- &spr_leaf);
- } else {
- mmbBuildClearPlan(leaf_array_count, &spr_leaf);
- }
- rebase(&spr_leaf, stateOffsets.activeLeafArray);
+
+ /* suffix/outfix array */
+ scatter_plan_raw spr_leaf;
+ if (streaming) {
+ mmbBuildInitRangePlan(leaf_array_count, outfix_begin, outfix_end,
+ &spr_leaf);
+ } else {
+ mmbBuildClearPlan(leaf_array_count, &spr_leaf);
+ }
+ rebase(&spr_leaf, stateOffsets.activeLeafArray);
merge_in(&out, spr_leaf);
return out;
-}
-
-u32 aux_size(const scatter_plan_raw &raw) {
- u32 rv = 0;
-
- rv += byte_length(raw.p_u64a);
- rv += byte_length(raw.p_u32);
- rv += byte_length(raw.p_u16);
- rv += byte_length(raw.p_u8);
-
- return rv;
-}
-
-void write_out(scatter_full_plan *plan_out, void *aux_out,
- const scatter_plan_raw &raw, u32 aux_base_offset) {
- memset(plan_out, 0, sizeof(*plan_out));
-
-#define DO_CASE(t) \
- if (!raw.p_##t.empty()) { \
- plan_out->s_##t##_offset = aux_base_offset; \
- plan_out->s_##t##_count = raw.p_##t.size(); \
- assert(ISALIGNED_N((char *)aux_out + aux_base_offset, \
- alignof(scatter_unit_##t))); \
- memcpy((char *)aux_out + aux_base_offset, raw.p_##t.data(), \
- byte_length(raw.p_##t)); \
- aux_base_offset += byte_length(raw.p_##t); \
- }
-
- DO_CASE(u64a);
- DO_CASE(u32);
- DO_CASE(u16);
- DO_CASE(u8);
-}
-
-} // namespace ue2
+}
+
+u32 aux_size(const scatter_plan_raw &raw) {
+ u32 rv = 0;
+
+ rv += byte_length(raw.p_u64a);
+ rv += byte_length(raw.p_u32);
+ rv += byte_length(raw.p_u16);
+ rv += byte_length(raw.p_u8);
+
+ return rv;
+}
+
+void write_out(scatter_full_plan *plan_out, void *aux_out,
+ const scatter_plan_raw &raw, u32 aux_base_offset) {
+ memset(plan_out, 0, sizeof(*plan_out));
+
+#define DO_CASE(t) \
+ if (!raw.p_##t.empty()) { \
+ plan_out->s_##t##_offset = aux_base_offset; \
+ plan_out->s_##t##_count = raw.p_##t.size(); \
+ assert(ISALIGNED_N((char *)aux_out + aux_base_offset, \
+ alignof(scatter_unit_##t))); \
+ memcpy((char *)aux_out + aux_base_offset, raw.p_##t.data(), \
+ byte_length(raw.p_##t)); \
+ aux_base_offset += byte_length(raw.p_##t); \
+ }
+
+ DO_CASE(u64a);
+ DO_CASE(u32);
+ DO_CASE(u16);
+ DO_CASE(u8);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_scatter.h b/contrib/libs/hyperscan/src/rose/rose_build_scatter.h
index 7ce1c034ab..67a82b9937 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_scatter.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_scatter.h
@@ -1,60 +1,60 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_BUILD_SCATTER_H
-#define ROSE_BUILD_SCATTER_H
-
-#include "rose_internal.h"
-#include "util/scatter.h"
-
-#include <vector>
-
-namespace ue2 {
-
-class RoseBuildImpl;
-
-struct scatter_plan_raw {
- std::vector<scatter_unit_u64a> p_u64a;
- std::vector<scatter_unit_u32> p_u32;
- std::vector<scatter_unit_u16> p_u16;
- std::vector<scatter_unit_u8> p_u8;
-};
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_SCATTER_H
+#define ROSE_BUILD_SCATTER_H
+
+#include "rose_internal.h"
+#include "util/scatter.h"
+
+#include <vector>
+
+namespace ue2 {
+
+class RoseBuildImpl;
+
+struct scatter_plan_raw {
+ std::vector<scatter_unit_u64a> p_u64a;
+ std::vector<scatter_unit_u32> p_u32;
+ std::vector<scatter_unit_u16> p_u16;
+ std::vector<scatter_unit_u8> p_u8;
+};
+
scatter_plan_raw buildStateScatterPlan(u32 role_state_offset,
u32 role_state_count, u32 left_array_count, u32 left_prefix_count,
const RoseStateOffsets &stateOffsets, bool streaming,
u32 leaf_array_count, u32 outfix_begin, u32 outfix_end);
-
-u32 aux_size(const scatter_plan_raw &raw);
-
-void write_out(scatter_full_plan *plan_out, void *aux_out,
- const scatter_plan_raw &raw, u32 aux_base_offset);
-
-} // namespace ue2
-
-#endif
+
+u32 aux_size(const scatter_plan_raw &raw);
+
+void write_out(scatter_full_plan *plan_out, void *aux_out,
+ const scatter_plan_raw &raw, u32 aux_base_offset);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_util.h b/contrib/libs/hyperscan/src/rose/rose_build_util.h
index 2318bee856..81bb68459b 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_util.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_util.h
@@ -1,62 +1,62 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_BUILD_UTIL_H
-#define ROSE_BUILD_UTIL_H
-
-#include "rose_graph.h"
-#include "util/graph.h"
-
-#include <algorithm>
-
-namespace ue2 {
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_UTIL_H
+#define ROSE_BUILD_UTIL_H
+
+#include "rose_graph.h"
+#include "util/graph.h"
+
+#include <algorithm>
+
+namespace ue2 {
+
/** Max allowed width for transient graphs in block mode */
#define ROSE_BLOCK_TRANSIENT_MAX_WIDTH 255U
-
-/**
- * \brief Add two Rose depths together, coping correctly with infinity at
- * ROSE_BOUND_INF.
- */
-static inline
-u32 add_rose_depth(u32 a, u32 b) {
- assert(a <= ROSE_BOUND_INF);
- assert(b <= ROSE_BOUND_INF);
-
- if (a == ROSE_BOUND_INF || b == ROSE_BOUND_INF) {
- return ROSE_BOUND_INF;
- }
-
- u32 rv = a + b;
- assert(rv >= a && rv >= b);
- return rv;
-}
-
-} // namespace ue2
-
-#endif // ROSE_BUILD_UTIL_H
+
+/**
+ * \brief Add two Rose depths together, coping correctly with infinity at
+ * ROSE_BOUND_INF.
+ */
+static inline
+u32 add_rose_depth(u32 a, u32 b) {
+ assert(a <= ROSE_BOUND_INF);
+ assert(b <= ROSE_BOUND_INF);
+
+ if (a == ROSE_BOUND_INF || b == ROSE_BOUND_INF) {
+ return ROSE_BOUND_INF;
+ }
+
+ u32 rv = a + b;
+ assert(rv >= a && rv >= b);
+ return rv;
+}
+
+} // namespace ue2
+
+#endif // ROSE_BUILD_UTIL_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_width.cpp b/contrib/libs/hyperscan/src/rose/rose_build_width.cpp
index 422e77d479..182b62ee6f 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_width.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_width.cpp
@@ -1,255 +1,255 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_width.h"
-
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_dump.h"
-#include "nfagraph/ng_width.h"
-#include "rose_build_impl.h"
-#include "ue2common.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-
-#include <algorithm>
-
-using namespace std;
-
-namespace ue2 {
-
-static
-bool is_end_anchored(const RoseGraph &g, RoseVertex v) {
- for (auto w : adjacent_vertices_range(v, g)) {
- if (g[w].eod_accept) {
- return true;
- }
- }
-
- return false;
-}
-
-u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) {
- if (table != ROSE_FLOATING && table != ROSE_ANCHORED &&
- table != ROSE_EOD_ANCHORED) {
- /* handle other tables if ever required */
- assert(0);
- return 0;
- }
-
- const RoseGraph &g = tbi.g;
-
- vector<RoseVertex> table_verts;
-
- for (auto v : vertices_range(g)) {
- if (tbi.hasLiteralInTable(v, table)) {
- table_verts.push_back(v);
- }
- }
-
- set<RoseVertex> reachable;
- find_reachable(g, table_verts, &reachable);
-
- u32 minWidth = ROSE_BOUND_INF;
- for (auto v : reachable) {
- if (g[v].eod_accept) {
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_width.h"
+
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_dump.h"
+#include "nfagraph/ng_width.h"
+#include "rose_build_impl.h"
+#include "ue2common.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+
+#include <algorithm>
+
+using namespace std;
+
+namespace ue2 {
+
+static
+bool is_end_anchored(const RoseGraph &g, RoseVertex v) {
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (g[w].eod_accept) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) {
+ if (table != ROSE_FLOATING && table != ROSE_ANCHORED &&
+ table != ROSE_EOD_ANCHORED) {
+ /* handle other tables if ever required */
+ assert(0);
+ return 0;
+ }
+
+ const RoseGraph &g = tbi.g;
+
+ vector<RoseVertex> table_verts;
+
+ for (auto v : vertices_range(g)) {
+ if (tbi.hasLiteralInTable(v, table)) {
+ table_verts.push_back(v);
+ }
+ }
+
+ set<RoseVertex> reachable;
+ find_reachable(g, table_verts, &reachable);
+
+ u32 minWidth = ROSE_BOUND_INF;
+ for (auto v : reachable) {
+ if (g[v].eod_accept) {
DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].index);
- continue;
- }
-
- const u32 w = g[v].min_offset;
-
- if (!g[v].reports.empty()) {
+ continue;
+ }
+
+ const u32 w = g[v].min_offset;
+
+ if (!g[v].reports.empty()) {
DEBUG_PRINTF("%zu can fire report at offset %u\n", g[v].index, w);
- minWidth = min(minWidth, w);
- }
-
- if (is_end_anchored(g, v)) {
+ minWidth = min(minWidth, w);
+ }
+
+ if (is_end_anchored(g, v)) {
DEBUG_PRINTF("%zu can fire eod report at offset %u\n", g[v].index,
w);
- minWidth = min(minWidth, w);
- }
-
- if (g[v].suffix) {
- depth suffix_width = findMinWidth(g[v].suffix, g[v].suffix.top);
- assert(suffix_width.is_reachable());
- DEBUG_PRINTF("%zu has suffix with top %u (width %s), can fire "
- "report at %u\n",
+ minWidth = min(minWidth, w);
+ }
+
+ if (g[v].suffix) {
+ depth suffix_width = findMinWidth(g[v].suffix, g[v].suffix.top);
+ assert(suffix_width.is_reachable());
+ DEBUG_PRINTF("%zu has suffix with top %u (width %s), can fire "
+ "report at %u\n",
g[v].index, g[v].suffix.top, suffix_width.str().c_str(),
- w + suffix_width);
- minWidth = min(minWidth, w + suffix_width);
- }
- }
-
- /* TODO: take into account the chain relationship between the mpv and other
- * engines */
- DEBUG_PRINTF("min width %u\n", minWidth);
- return minWidth;
-}
-
-u32 findMaxBAWidth(const RoseBuildImpl &tbi) {
- const RoseGraph &g = tbi.g;
- if (!isLeafNode(tbi.root, g)) {
- DEBUG_PRINTF("floating literal -> no max width\n");
- return ROSE_BOUND_INF;
- }
-
- u64a maxWidth = 0;
-
- for (const auto &outfix : tbi.outfixes) {
- maxWidth = max(maxWidth, (u64a)outfix.maxBAWidth);
- if (maxWidth >= ROSE_BOUND_INF) {
- DEBUG_PRINTF("outfix with no max ba width\n");
- return ROSE_BOUND_INF;
- }
- }
-
- // Everyone's anchored, so the max width can be taken from the max
- // max_offset on our vertices (so long as all accepts are EOD).
- for (auto v : vertices_range(g)) {
- if (!g[v].reports.empty() && !g[v].eod_accept) {
- DEBUG_PRINTF("accept not at eod\n");
- return ROSE_BOUND_INF;
- }
-
- if (g[v].reports.empty() && !g[v].suffix) {
- continue;
- }
-
- assert(g[v].eod_accept || g[v].suffix);
-
- u64a w = g[v].max_offset;
-
- if (g[v].suffix) {
- if (has_non_eod_accepts(g[v].suffix)) {
- return ROSE_BOUND_INF;
- }
- depth suffix_width = findMaxWidth(g[v].suffix, g[v].suffix.top);
- DEBUG_PRINTF("suffix max width for top %u is %s\n", g[v].suffix.top,
- suffix_width.str().c_str());
- assert(suffix_width.is_reachable());
- if (!suffix_width.is_finite()) {
- DEBUG_PRINTF("suffix too wide\n");
- return ROSE_BOUND_INF;
- }
-
- w += suffix_width;
- }
-
- maxWidth = max(maxWidth, w);
- if (maxWidth >= ROSE_BOUND_INF) {
- DEBUG_PRINTF("too wide\n");
- return ROSE_BOUND_INF;
- }
- }
-
- DEBUG_PRINTF("max ba width %llu\n", maxWidth);
- assert(maxWidth < ROSE_BOUND_INF);
- return maxWidth;
-}
-
-u32 findMaxBAWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) {
- const RoseGraph &g = tbi.g;
- if (!isLeafNode(tbi.root, g) && table == ROSE_FLOATING) {
- DEBUG_PRINTF("floating literal -> no max width\n");
- return ROSE_BOUND_INF;
- }
-
- if (table != ROSE_FLOATING && table != ROSE_ANCHORED) {
- /* handle other tables if ever required */
- assert(0);
- return ROSE_BOUND_INF;
- }
-
- DEBUG_PRINTF("looking for a max ba width for %s\n",
- table == ROSE_FLOATING ? "floating" : "anchored");
-
- vector<RoseVertex> table_verts;
-
- for (auto v : vertices_range(g)) {
- if ((table == ROSE_FLOATING && tbi.isFloating(v))
- || (table == ROSE_ANCHORED && tbi.isAnchored(v))) {
- table_verts.push_back(v);
- }
- }
-
- set<RoseVertex> reachable;
- find_reachable(g, table_verts, &reachable);
-
- u64a maxWidth = 0;
- // Everyone's anchored, so the max width can be taken from the max
- // max_offset on our vertices (so long as all accepts are ACCEPT_EOD).
- for (auto v : reachable) {
+ w + suffix_width);
+ minWidth = min(minWidth, w + suffix_width);
+ }
+ }
+
+ /* TODO: take into account the chain relationship between the mpv and other
+ * engines */
+ DEBUG_PRINTF("min width %u\n", minWidth);
+ return minWidth;
+}
+
+u32 findMaxBAWidth(const RoseBuildImpl &tbi) {
+ const RoseGraph &g = tbi.g;
+ if (!isLeafNode(tbi.root, g)) {
+ DEBUG_PRINTF("floating literal -> no max width\n");
+ return ROSE_BOUND_INF;
+ }
+
+ u64a maxWidth = 0;
+
+ for (const auto &outfix : tbi.outfixes) {
+ maxWidth = max(maxWidth, (u64a)outfix.maxBAWidth);
+ if (maxWidth >= ROSE_BOUND_INF) {
+ DEBUG_PRINTF("outfix with no max ba width\n");
+ return ROSE_BOUND_INF;
+ }
+ }
+
+ // Everyone's anchored, so the max width can be taken from the max
+ // max_offset on our vertices (so long as all accepts are EOD).
+ for (auto v : vertices_range(g)) {
+ if (!g[v].reports.empty() && !g[v].eod_accept) {
+ DEBUG_PRINTF("accept not at eod\n");
+ return ROSE_BOUND_INF;
+ }
+
+ if (g[v].reports.empty() && !g[v].suffix) {
+ continue;
+ }
+
+ assert(g[v].eod_accept || g[v].suffix);
+
+ u64a w = g[v].max_offset;
+
+ if (g[v].suffix) {
+ if (has_non_eod_accepts(g[v].suffix)) {
+ return ROSE_BOUND_INF;
+ }
+ depth suffix_width = findMaxWidth(g[v].suffix, g[v].suffix.top);
+ DEBUG_PRINTF("suffix max width for top %u is %s\n", g[v].suffix.top,
+ suffix_width.str().c_str());
+ assert(suffix_width.is_reachable());
+ if (!suffix_width.is_finite()) {
+ DEBUG_PRINTF("suffix too wide\n");
+ return ROSE_BOUND_INF;
+ }
+
+ w += suffix_width;
+ }
+
+ maxWidth = max(maxWidth, w);
+ if (maxWidth >= ROSE_BOUND_INF) {
+ DEBUG_PRINTF("too wide\n");
+ return ROSE_BOUND_INF;
+ }
+ }
+
+ DEBUG_PRINTF("max ba width %llu\n", maxWidth);
+ assert(maxWidth < ROSE_BOUND_INF);
+ return maxWidth;
+}
+
+u32 findMaxBAWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) {
+ const RoseGraph &g = tbi.g;
+ if (!isLeafNode(tbi.root, g) && table == ROSE_FLOATING) {
+ DEBUG_PRINTF("floating literal -> no max width\n");
+ return ROSE_BOUND_INF;
+ }
+
+ if (table != ROSE_FLOATING && table != ROSE_ANCHORED) {
+ /* handle other tables if ever required */
+ assert(0);
+ return ROSE_BOUND_INF;
+ }
+
+ DEBUG_PRINTF("looking for a max ba width for %s\n",
+ table == ROSE_FLOATING ? "floating" : "anchored");
+
+ vector<RoseVertex> table_verts;
+
+ for (auto v : vertices_range(g)) {
+ if ((table == ROSE_FLOATING && tbi.isFloating(v))
+ || (table == ROSE_ANCHORED && tbi.isAnchored(v))) {
+ table_verts.push_back(v);
+ }
+ }
+
+ set<RoseVertex> reachable;
+ find_reachable(g, table_verts, &reachable);
+
+ u64a maxWidth = 0;
+ // Everyone's anchored, so the max width can be taken from the max
+ // max_offset on our vertices (so long as all accepts are ACCEPT_EOD).
+ for (auto v : reachable) {
DEBUG_PRINTF("inspecting vert %zu\n", g[v].index);
-
- if (g[v].eod_accept) {
+
+ if (g[v].eod_accept) {
DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].index);
- continue;
- }
-
- if (!g[v].reports.empty()) {
- DEBUG_PRINTF("accept not at eod\n");
- return ROSE_BOUND_INF;
- }
-
- u64a w = g[v].max_offset;
-
- u64a follow_max = tbi.calcSuccMaxBound(v); /* may have a long bound to
- accept_eod node */
-
- if (g[v].suffix) {
- if (has_non_eod_accepts(g[v].suffix)) {
- DEBUG_PRINTF("has accept\n");
- return ROSE_BOUND_INF;
- }
- depth suffix_width = findMaxWidth(g[v].suffix);
- DEBUG_PRINTF("suffix max width %s\n", suffix_width.str().c_str());
- assert(suffix_width.is_reachable());
- if (!suffix_width.is_finite()) {
- DEBUG_PRINTF("suffix too wide\n");
- return ROSE_BOUND_INF;
- }
- follow_max = max(follow_max, (u64a)suffix_width);
- }
-
- w += follow_max;
-
- DEBUG_PRINTF("w %llu\n", w);
-
- maxWidth = max(maxWidth, w);
- if (maxWidth >= ROSE_BOUND_INF) {
- DEBUG_PRINTF("too wide\n");
- return ROSE_BOUND_INF;
- }
- }
-
- DEBUG_PRINTF("max ba width %llu\n", maxWidth);
- assert(maxWidth < ROSE_BOUND_INF);
- return maxWidth;
-}
-
-} // namespace ue2
+ continue;
+ }
+
+ if (!g[v].reports.empty()) {
+ DEBUG_PRINTF("accept not at eod\n");
+ return ROSE_BOUND_INF;
+ }
+
+ u64a w = g[v].max_offset;
+
+ u64a follow_max = tbi.calcSuccMaxBound(v); /* may have a long bound to
+ accept_eod node */
+
+ if (g[v].suffix) {
+ if (has_non_eod_accepts(g[v].suffix)) {
+ DEBUG_PRINTF("has accept\n");
+ return ROSE_BOUND_INF;
+ }
+ depth suffix_width = findMaxWidth(g[v].suffix);
+ DEBUG_PRINTF("suffix max width %s\n", suffix_width.str().c_str());
+ assert(suffix_width.is_reachable());
+ if (!suffix_width.is_finite()) {
+ DEBUG_PRINTF("suffix too wide\n");
+ return ROSE_BOUND_INF;
+ }
+ follow_max = max(follow_max, (u64a)suffix_width);
+ }
+
+ w += follow_max;
+
+ DEBUG_PRINTF("w %llu\n", w);
+
+ maxWidth = max(maxWidth, w);
+ if (maxWidth >= ROSE_BOUND_INF) {
+ DEBUG_PRINTF("too wide\n");
+ return ROSE_BOUND_INF;
+ }
+ }
+
+ DEBUG_PRINTF("max ba width %llu\n", maxWidth);
+ assert(maxWidth < ROSE_BOUND_INF);
+ return maxWidth;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_width.h b/contrib/libs/hyperscan/src/rose/rose_build_width.h
index 1cbc1f1e12..a395b62daf 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_width.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_width.h
@@ -1,66 +1,66 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_BUILD_WIDTH_H
-#define ROSE_BUILD_WIDTH_H
-
-#include "rose_build_impl.h"
-#include "ue2common.h"
-
-namespace ue2 {
-
-class RoseBuildImpl;
-
-/* returns a lower bound on the minimum number of bytes required for match to be
- * raised up to the user which requires the given literal table to be used
- *
- * returns ROSE_BOUND_INF if the table can never produce matches */
-u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table);
-
-/* returns an upper bound on the maximum length of a buffer that can result in
- * matches. If there are any patterns which are not bianchored (start and end
- * anchored), then there is no such limit and ROSE_BOUND_INF is returned.
- */
-u32 findMaxBAWidth(const RoseBuildImpl &tbi);
-
-/* returns an upper bound on the maximum length of a buffer that can result in
- * matches and requires that the given table to be used. If there are any
- * patterns which are not bianchored (start and end anchored), then there is no
- * such limit and ROSE_BOUND_INF is returned.
- */
-u32 findMaxBAWidth(const RoseBuildImpl &tbi, enum rose_literal_table table);
-
-/**
- * Note: there is no function for determining the min width of the whole rose
- * as this is more easily done by the NG layer which has access to the full
- * nfagraphs before they are chopped into little pieces.
- */
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_WIDTH_H
+#define ROSE_BUILD_WIDTH_H
+
+#include "rose_build_impl.h"
+#include "ue2common.h"
+
+namespace ue2 {
+
+class RoseBuildImpl;
+
+/* returns a lower bound on the minimum number of bytes required for match to be
+ * raised up to the user which requires the given literal table to be used
+ *
+ * returns ROSE_BOUND_INF if the table can never produce matches */
+u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table);
+
+/* returns an upper bound on the maximum length of a buffer that can result in
+ * matches. If there are any patterns which are not bianchored (start and end
+ * anchored), then there is no such limit and ROSE_BOUND_INF is returned.
+ */
+u32 findMaxBAWidth(const RoseBuildImpl &tbi);
+
+/* returns an upper bound on the maximum length of a buffer that can result in
+ * matches and requires that the given table to be used. If there are any
+ * patterns which are not bianchored (start and end anchored), then there is no
+ * such limit and ROSE_BOUND_INF is returned.
+ */
+u32 findMaxBAWidth(const RoseBuildImpl &tbi, enum rose_literal_table table);
+
+/**
+ * Note: there is no function for determining the min width of the whole rose
+ * as this is more easily done by the NG layer which has access to the full
+ * nfagraphs before they are chopped into little pieces.
+ */
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/rose_common.h b/contrib/libs/hyperscan/src/rose/rose_common.h
index 16347b1985..34678b8fcc 100644
--- a/contrib/libs/hyperscan/src/rose/rose_common.h
+++ b/contrib/libs/hyperscan/src/rose/rose_common.h
@@ -1,46 +1,46 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_COMMON_H
-#define ROSE_COMMON_H
-
-// Common defs available to build-time clients as well as runtime.
-
-#define ROSE_BOUND_INF (~0U)
-#define MAX_MASK2_WIDTH 32
-
-// Max block width to use the combined small-block matcher on, instead of
-// running the floating and anchored tables.
-#define ROSE_SMALL_BLOCK_LEN 32
-
-/** \brief Length in bytes of a reach bitvector, used by the lookaround code. */
-#define REACH_BITVECTOR_LEN 32
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_COMMON_H
+#define ROSE_COMMON_H
+
+// Common defs available to build-time clients as well as runtime.
+
+#define ROSE_BOUND_INF (~0U)
+#define MAX_MASK2_WIDTH 32
+
+// Max block width to use the combined small-block matcher on, instead of
+// running the floating and anchored tables.
+#define ROSE_SMALL_BLOCK_LEN 32
+
+/** \brief Length in bytes of a reach bitvector, used by the lookaround code. */
+#define REACH_BITVECTOR_LEN 32
+
/** \brief Length in bytes of a reach bitvector for multi-path lookaround. */
#define MULTI_REACH_BITVECTOR_LEN 256
@@ -53,4 +53,4 @@
/** \brief Value used to represent an invalid Rose program offset. */
#define ROSE_INVALID_PROG_OFFSET 0
-#endif // ROSE_COMMON_H
+#endif // ROSE_COMMON_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_graph.h b/contrib/libs/hyperscan/src/rose/rose_graph.h
index d526463934..b5bf1985d8 100644
--- a/contrib/libs/hyperscan/src/rose/rose_graph.h
+++ b/contrib/libs/hyperscan/src/rose/rose_graph.h
@@ -1,69 +1,69 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief BGL graph structures used internally by the Rose build process.
- *
- * BGL graph structures used internally by the build-time portion of Rose. The
- * graph used for input is in rose_in_graph.h since it's part of the RoseBuild
- * external API.
- */
-
-#ifndef ROSE_GRAPH_H
-#define ROSE_GRAPH_H
-
-#include "ue2common.h"
-#include "rose_build.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief BGL graph structures used internally by the Rose build process.
+ *
+ * BGL graph structures used internally by the build-time portion of Rose. The
+ * graph used for input is in rose_in_graph.h since it's part of the RoseBuild
+ * external API.
+ */
+
+#ifndef ROSE_GRAPH_H
+#define ROSE_GRAPH_H
+
+#include "ue2common.h"
+#include "rose_build.h"
#include "rose_internal.h"
-#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
-#include "util/depth.h"
+#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
+#include "util/depth.h"
#include "util/flat_containers.h"
#include "util/ue2_graph.h"
-
-#include <memory>
-#include <set>
-
-namespace ue2 {
-
-struct CastleProto;
-struct raw_dfa;
-struct raw_som_dfa;
+
+#include <memory>
+#include <set>
+
+namespace ue2 {
+
+struct CastleProto;
+struct raw_dfa;
+struct raw_som_dfa;
struct TamaProto;
-
-/** \brief Table type for a literal. */
-enum rose_literal_table {
- ROSE_ANCHORED, //!< literals anchored to start
- ROSE_FLOATING, //!< general floating literals
- ROSE_EOD_ANCHORED, //!< literals that match near EOD
- ROSE_ANCHORED_SMALL_BLOCK, //!< anchored literals for small block table
- ROSE_EVENT //!< "literal-like" events, such as EOD
-};
-
+
+/** \brief Table type for a literal. */
+enum rose_literal_table {
+ ROSE_ANCHORED, //!< literals anchored to start
+ ROSE_FLOATING, //!< general floating literals
+ ROSE_EOD_ANCHORED, //!< literals that match near EOD
+ ROSE_ANCHORED_SMALL_BLOCK, //!< anchored literals for small block table
+ ROSE_EVENT //!< "literal-like" events, such as EOD
+};
+
/** \brief Edge history types. */
enum RoseRoleHistory {
ROSE_ROLE_HISTORY_NONE, //!< no special history
@@ -72,159 +72,159 @@ enum RoseRoleHistory {
ROSE_ROLE_HISTORY_INVALID //!< history not yet assigned
};
-#include "util/order_check.h"
-
-/** \brief Provides information about the (pre|in)fix engine to the left of a
- * role. */
-struct LeftEngInfo {
- std::shared_ptr<NGHolder> graph;
- std::shared_ptr<CastleProto> castle;
- std::shared_ptr<raw_dfa> dfa;
- std::shared_ptr<raw_som_dfa> haig;
+#include "util/order_check.h"
+
+/** \brief Provides information about the (pre|in)fix engine to the left of a
+ * role. */
+struct LeftEngInfo {
+ std::shared_ptr<NGHolder> graph;
+ std::shared_ptr<CastleProto> castle;
+ std::shared_ptr<raw_dfa> dfa;
+ std::shared_ptr<raw_som_dfa> haig;
std::shared_ptr<TamaProto> tamarama;
- u32 lag = 0U;
- ReportID leftfix_report = MO_INVALID_IDX;
+ u32 lag = 0U;
+ ReportID leftfix_report = MO_INVALID_IDX;
depth dfa_min_width{0};
- depth dfa_max_width = depth::infinity();
-
- bool operator==(const LeftEngInfo &other) const {
- return other.graph == graph
- && other.castle == castle
- && other.dfa == dfa
- && other.haig == haig
+ depth dfa_max_width = depth::infinity();
+
+ bool operator==(const LeftEngInfo &other) const {
+ return other.graph == graph
+ && other.castle == castle
+ && other.dfa == dfa
+ && other.haig == haig
&& other.tamarama == tamarama
- && other.lag == lag
- && other.leftfix_report == leftfix_report;
- }
- bool operator!=(const LeftEngInfo &other) const {
- return !(*this == other);
- }
- bool operator<(const LeftEngInfo &b) const {
- const LeftEngInfo &a = *this;
- ORDER_CHECK(graph);
- ORDER_CHECK(castle);
- ORDER_CHECK(dfa);
- ORDER_CHECK(haig);
+ && other.lag == lag
+ && other.leftfix_report == leftfix_report;
+ }
+ bool operator!=(const LeftEngInfo &other) const {
+ return !(*this == other);
+ }
+ bool operator<(const LeftEngInfo &b) const {
+ const LeftEngInfo &a = *this;
+ ORDER_CHECK(graph);
+ ORDER_CHECK(castle);
+ ORDER_CHECK(dfa);
+ ORDER_CHECK(haig);
ORDER_CHECK(tamarama);
- ORDER_CHECK(lag);
- ORDER_CHECK(leftfix_report);
- return false;
- }
+ ORDER_CHECK(lag);
+ ORDER_CHECK(leftfix_report);
+ return false;
+ }
size_t hash() const;
- void reset(void);
+ void reset(void);
explicit operator bool() const;
- bool tracksSom() const { return !!haig; }
-};
-
-/** \brief Provides information about the suffix engine to the right of a
- * role. */
-struct RoseSuffixInfo {
- u32 top = 0;
- std::shared_ptr<NGHolder> graph; /* if triggers a trailing nfa */
- std::shared_ptr<CastleProto> castle;
- std::shared_ptr<raw_som_dfa> haig;
- std::shared_ptr<raw_dfa> rdfa;
+ bool tracksSom() const { return !!haig; }
+};
+
+/** \brief Provides information about the suffix engine to the right of a
+ * role. */
+struct RoseSuffixInfo {
+ u32 top = 0;
+ std::shared_ptr<NGHolder> graph; /* if triggers a trailing nfa */
+ std::shared_ptr<CastleProto> castle;
+ std::shared_ptr<raw_som_dfa> haig;
+ std::shared_ptr<raw_dfa> rdfa;
std::shared_ptr<TamaProto> tamarama;
depth dfa_min_width{0};
- depth dfa_max_width = depth::infinity();
-
- bool operator==(const RoseSuffixInfo &b) const;
- bool operator!=(const RoseSuffixInfo &b) const { return !(*this == b); }
- bool operator<(const RoseSuffixInfo &b) const;
+ depth dfa_max_width = depth::infinity();
+
+ bool operator==(const RoseSuffixInfo &b) const;
+ bool operator!=(const RoseSuffixInfo &b) const { return !(*this == b); }
+ bool operator<(const RoseSuffixInfo &b) const;
size_t hash() const;
- void reset(void);
+ void reset(void);
explicit operator bool() const { return graph || castle || haig || rdfa || tamarama; }
-};
-
-/** \brief Properties attached to each Rose graph vertex. */
-struct RoseVertexProps {
- /** \brief Unique dense vertex index. Used for BGL algorithms. */
+};
+
+/** \brief Properties attached to each Rose graph vertex. */
+struct RoseVertexProps {
+ /** \brief Unique dense vertex index. Used for BGL algorithms. */
size_t index = ~size_t{0};
-
- /** \brief IDs of literals in the Rose literal map. */
- flat_set<u32> literals;
-
- /**
- * \brief If true, this vertex is a virtual vertex for firing reports at
- * EOD. These vertices must have reports and have no associated literals.
- */
- bool eod_accept = false;
-
- /** \brief Report IDs to fire. */
- flat_set<ReportID> reports;
-
- /** \brief Bitmask of groups that this role sets. */
- rose_group groups = 0;
-
- /** \brief Minimum role (end of literal) offset depth in bytes. */
- u32 min_offset = ~u32{0};
-
- /** \brief Maximum role (end of literal) offset depth in bytes */
- u32 max_offset = 0;
-
- /** \brief SOM for the role is offset from end match offset */
- u32 som_adjust = 0;
-
- /** \brief Prefix/infix engine to the left of this role. */
- LeftEngInfo left;
-
- /**
- * \brief Suffix engine to the right of this role.
- *
- * Note: information about triggered infixes is associated with the left of
- * the destination role.
- */
- RoseSuffixInfo suffix;
-
- bool isBoring(void) const;
- bool fixedOffset(void) const;
-};
-
-/** \brief Properties attached to each Rose graph edge. */
-/* bounds are distance from end of prev to start of the next */
-struct RoseEdgeProps {
+
+ /** \brief IDs of literals in the Rose literal map. */
+ flat_set<u32> literals;
+
+ /**
+ * \brief If true, this vertex is a virtual vertex for firing reports at
+ * EOD. These vertices must have reports and have no associated literals.
+ */
+ bool eod_accept = false;
+
+ /** \brief Report IDs to fire. */
+ flat_set<ReportID> reports;
+
+ /** \brief Bitmask of groups that this role sets. */
+ rose_group groups = 0;
+
+ /** \brief Minimum role (end of literal) offset depth in bytes. */
+ u32 min_offset = ~u32{0};
+
+ /** \brief Maximum role (end of literal) offset depth in bytes */
+ u32 max_offset = 0;
+
+ /** \brief SOM for the role is offset from end match offset */
+ u32 som_adjust = 0;
+
+ /** \brief Prefix/infix engine to the left of this role. */
+ LeftEngInfo left;
+
+ /**
+ * \brief Suffix engine to the right of this role.
+ *
+ * Note: information about triggered infixes is associated with the left of
+ * the destination role.
+ */
+ RoseSuffixInfo suffix;
+
+ bool isBoring(void) const;
+ bool fixedOffset(void) const;
+};
+
+/** \brief Properties attached to each Rose graph edge. */
+/* bounds are distance from end of prev to start of the next */
+struct RoseEdgeProps {
/** \brief Unique dense vertex index. Used for BGL algorithms. */
size_t index = ~size_t{0};
- /**
- * \brief Minimum distance from the end of the source role's match to the
- * start of the target role's match.
- *
- * Not used when the target has a left engine (as the engine represents
- * bounds).
- */
- u32 minBound = 0;
-
- /**
- * \brief Maximum distance from the end of the source role's match to the
- * start of the target role's match.
- *
- * Not used when the target has a left engine (as the engine represents
- * bounds).
- */
- u32 maxBound = 0;
-
- /** \brief Which top to trigger on the target role's left engine. */
- u32 rose_top = 0;
-
- /** \brief True if the rose_top can clear all other previous tops. */
- u8 rose_cancel_prev_top = false;
-
- /** \brief History required by this edge. */
- RoseRoleHistory history = ROSE_ROLE_HISTORY_INVALID;
-};
-
-bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b);
-
-/**
- * \brief Core Rose graph structure.
- */
+ /**
+ * \brief Minimum distance from the end of the source role's match to the
+ * start of the target role's match.
+ *
+ * Not used when the target has a left engine (as the engine represents
+ * bounds).
+ */
+ u32 minBound = 0;
+
+ /**
+ * \brief Maximum distance from the end of the source role's match to the
+ * start of the target role's match.
+ *
+ * Not used when the target has a left engine (as the engine represents
+ * bounds).
+ */
+ u32 maxBound = 0;
+
+ /** \brief Which top to trigger on the target role's left engine. */
+ u32 rose_top = 0;
+
+ /** \brief True if the rose_top can clear all other previous tops. */
+ u8 rose_cancel_prev_top = false;
+
+ /** \brief History required by this edge. */
+ RoseRoleHistory history = ROSE_ROLE_HISTORY_INVALID;
+};
+
+bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b);
+
+/**
+ * \brief Core Rose graph structure.
+ */
struct RoseGraph : public ue2_graph<RoseGraph, RoseVertexProps, RoseEdgeProps> {
friend class RoseBuildImpl; /* to allow index renumbering */
};
-using RoseVertex = RoseGraph::vertex_descriptor;
-using RoseEdge = RoseGraph::edge_descriptor;
-
-} // namespace ue2
-
-#endif // ROSE_GRAPH_H
+using RoseVertex = RoseGraph::vertex_descriptor;
+using RoseEdge = RoseGraph::edge_descriptor;
+
+} // namespace ue2
+
+#endif // ROSE_GRAPH_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_in_dump.h b/contrib/libs/hyperscan/src/rose/rose_in_dump.h
index 8fe43fa1e8..22c9fefd49 100644
--- a/contrib/libs/hyperscan/src/rose/rose_in_dump.h
+++ b/contrib/libs/hyperscan/src/rose/rose_in_dump.h
@@ -1,49 +1,49 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_IN_DUMP_H
-#define ROSE_IN_DUMP_H
-
-#include "rose_in_graph.h"
-
-namespace ue2 {
-
-struct Grey;
-
-#ifdef DUMP_SUPPORT
-void dumpPreRoseGraph(const RoseInGraph &ig, const Grey &grey,
- const char *filename = nullptr);
-#else
-static UNUSED
-void dumpPreRoseGraph(const RoseInGraph &, const Grey &,
- const char * = nullptr) { }
-#endif
-
-}
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_IN_DUMP_H
+#define ROSE_IN_DUMP_H
+
+#include "rose_in_graph.h"
+
+namespace ue2 {
+
+struct Grey;
+
+#ifdef DUMP_SUPPORT
+void dumpPreRoseGraph(const RoseInGraph &ig, const Grey &grey,
+ const char *filename = nullptr);
+#else
+static UNUSED
+void dumpPreRoseGraph(const RoseInGraph &, const Grey &,
+ const char * = nullptr) { }
+#endif
+
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/rose_in_graph.h b/contrib/libs/hyperscan/src/rose/rose_in_graph.h
index f99928f147..da0ea08da1 100644
--- a/contrib/libs/hyperscan/src/rose/rose_in_graph.h
+++ b/contrib/libs/hyperscan/src/rose/rose_in_graph.h
@@ -1,197 +1,197 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
* \brief Rose Input Graph: Used for ng_violet -> rose_build_add communication.
- *
- * The input graph MUST be a DAG.
- * There MUST be exactly 1 START or ANCHORED_START vertex.
- * The edges MUST be of the form START->LITERAL, LITERAL->LITERAL,
- * LITERAL->ACCEPT or LITERAL->ACCEPT_EOD.
- * Every non START/ANCHORED_START vertex MUST have an in-edge.
- * Every non ACCEPT/ACCEPT_EOD vertex MUST have an out-edge.
- *
- * Edges are either a graph or have bounds associated with them.
- * Graphs on edges to accepts use their internal report ids.
- */
-
-#ifndef ROSE_IN_GRAPH_H
-#define ROSE_IN_GRAPH_H
-
-#include "ue2common.h"
-#include "rose/rose_common.h"
+ *
+ * The input graph MUST be a DAG.
+ * There MUST be exactly 1 START or ANCHORED_START vertex.
+ * The edges MUST be of the form START->LITERAL, LITERAL->LITERAL,
+ * LITERAL->ACCEPT or LITERAL->ACCEPT_EOD.
+ * Every non START/ANCHORED_START vertex MUST have an in-edge.
+ * Every non ACCEPT/ACCEPT_EOD vertex MUST have an out-edge.
+ *
+ * Edges are either a graph or have bounds associated with them.
+ * Graphs on edges to accepts use their internal report ids.
+ */
+
+#ifndef ROSE_IN_GRAPH_H
+#define ROSE_IN_GRAPH_H
+
+#include "ue2common.h"
+#include "rose/rose_common.h"
#include "util/flat_containers.h"
#include "util/ue2_graph.h"
-#include "util/ue2string.h"
-
-#include <memory>
-
-namespace ue2 {
-
-class NGHolder;
-struct raw_som_dfa;
+#include "util/ue2string.h"
+
+#include <memory>
+
+namespace ue2 {
+
+class NGHolder;
+struct raw_som_dfa;
struct raw_dfa;
-
-enum RoseInVertexType {
- RIV_LITERAL,
- RIV_START,
- RIV_ANCHORED_START,
- RIV_ACCEPT,
- RIV_ACCEPT_EOD
-};
-
-struct RoseInVertexProps {
- RoseInVertexProps()
- : type(RIV_LITERAL), delay(0), min_offset(0),
- max_offset(ROSE_BOUND_INF) {}
-
-private:
- template <class ReportContainer>
- RoseInVertexProps(RoseInVertexType type_in, const ue2_literal &s_in,
- const ReportContainer &reports_in, u32 min_offset_in,
- u32 max_offset_in)
- : type(type_in), s(s_in), delay(0),
- reports(begin(reports_in), end(reports_in)),
- min_offset(min_offset_in), max_offset(max_offset_in) {}
-
- // Constructor for a vertex with no reports.
- RoseInVertexProps(RoseInVertexType type_in, const ue2_literal &s_in,
- u32 min_offset_in, u32 max_offset_in)
- : type(type_in), s(s_in), delay(0), min_offset(min_offset_in),
- max_offset(max_offset_in) {}
-
-public:
- static RoseInVertexProps makeLiteral(const ue2_literal &lit) {
- DEBUG_PRINTF("making literal %s\n", dumpString(lit).c_str());
- return RoseInVertexProps(RIV_LITERAL, lit, 0, ROSE_BOUND_INF);
- }
-
- template <class ReportContainer>
- static RoseInVertexProps makeAccept(const ReportContainer &rep) {
- DEBUG_PRINTF("making accept for %zu reports\n", rep.size());
- return RoseInVertexProps(RIV_ACCEPT, ue2_literal(), rep, 0,
- ROSE_BOUND_INF);
- }
-
- template <class ReportContainer>
- static RoseInVertexProps makeAcceptEod(const ReportContainer &rep) {
- DEBUG_PRINTF("making accept-eod for %zu reports\n", rep.size());
- return RoseInVertexProps(RIV_ACCEPT_EOD, ue2_literal(), rep, 0,
- ROSE_BOUND_INF);
- }
-
+
+enum RoseInVertexType {
+ RIV_LITERAL,
+ RIV_START,
+ RIV_ANCHORED_START,
+ RIV_ACCEPT,
+ RIV_ACCEPT_EOD
+};
+
+struct RoseInVertexProps {
+ RoseInVertexProps()
+ : type(RIV_LITERAL), delay(0), min_offset(0),
+ max_offset(ROSE_BOUND_INF) {}
+
+private:
+ template <class ReportContainer>
+ RoseInVertexProps(RoseInVertexType type_in, const ue2_literal &s_in,
+ const ReportContainer &reports_in, u32 min_offset_in,
+ u32 max_offset_in)
+ : type(type_in), s(s_in), delay(0),
+ reports(begin(reports_in), end(reports_in)),
+ min_offset(min_offset_in), max_offset(max_offset_in) {}
+
+ // Constructor for a vertex with no reports.
+ RoseInVertexProps(RoseInVertexType type_in, const ue2_literal &s_in,
+ u32 min_offset_in, u32 max_offset_in)
+ : type(type_in), s(s_in), delay(0), min_offset(min_offset_in),
+ max_offset(max_offset_in) {}
+
+public:
+ static RoseInVertexProps makeLiteral(const ue2_literal &lit) {
+ DEBUG_PRINTF("making literal %s\n", dumpString(lit).c_str());
+ return RoseInVertexProps(RIV_LITERAL, lit, 0, ROSE_BOUND_INF);
+ }
+
+ template <class ReportContainer>
+ static RoseInVertexProps makeAccept(const ReportContainer &rep) {
+ DEBUG_PRINTF("making accept for %zu reports\n", rep.size());
+ return RoseInVertexProps(RIV_ACCEPT, ue2_literal(), rep, 0,
+ ROSE_BOUND_INF);
+ }
+
+ template <class ReportContainer>
+ static RoseInVertexProps makeAcceptEod(const ReportContainer &rep) {
+ DEBUG_PRINTF("making accept-eod for %zu reports\n", rep.size());
+ return RoseInVertexProps(RIV_ACCEPT_EOD, ue2_literal(), rep, 0,
+ ROSE_BOUND_INF);
+ }
+
/* for when there is a suffix graph which handles the reports */
static RoseInVertexProps makeAcceptEod() {
return RoseInVertexProps(RIV_ACCEPT_EOD, ue2_literal(), 0,
ROSE_BOUND_INF);
}
- static RoseInVertexProps makeStart(bool anchored) {
- DEBUG_PRINTF("making %s\n", anchored ? "anchored start" : "start");
- if (anchored) {
- return RoseInVertexProps(RIV_ANCHORED_START, ue2_literal(), 0, 0);
- } else {
- return RoseInVertexProps(RIV_START, ue2_literal(), 0,
- ROSE_BOUND_INF);
- }
- }
-
- RoseInVertexType type; /* polymorphic vertices are probably a bad idea */
- ue2_literal s; /**< for RIV_LITERAL */
- u32 delay; /**< for RIV_LITERAL, delay applied to literal. */
- flat_set<ReportID> reports; /**< for RIV_ACCEPT/RIV_ACCEPT_EOD */
- u32 min_offset; /**< Minimum offset at which this vertex can match. */
- u32 max_offset; /**< Maximum offset at which this vertex can match. */
+ static RoseInVertexProps makeStart(bool anchored) {
+ DEBUG_PRINTF("making %s\n", anchored ? "anchored start" : "start");
+ if (anchored) {
+ return RoseInVertexProps(RIV_ANCHORED_START, ue2_literal(), 0, 0);
+ } else {
+ return RoseInVertexProps(RIV_START, ue2_literal(), 0,
+ ROSE_BOUND_INF);
+ }
+ }
+
+ RoseInVertexType type; /* polymorphic vertices are probably a bad idea */
+ ue2_literal s; /**< for RIV_LITERAL */
+ u32 delay; /**< for RIV_LITERAL, delay applied to literal. */
+ flat_set<ReportID> reports; /**< for RIV_ACCEPT/RIV_ACCEPT_EOD */
+ u32 min_offset; /**< Minimum offset at which this vertex can match. */
+ u32 max_offset; /**< Maximum offset at which this vertex can match. */
size_t index = 0; /**< \brief Unique vertex index. */
-};
-
-struct RoseInEdgeProps {
- RoseInEdgeProps()
- : minBound(0), maxBound(0), graph(), haig(), graph_lag(0) {}
-
- RoseInEdgeProps(u32 min_in, u32 max_in)
- : minBound(min_in), maxBound(max_in), graph(), graph_lag(0) {
- assert(minBound <= maxBound);
- assert(minBound != ROSE_BOUND_INF);
- }
-
- /* haig rosefixes (prefix/infix) require their corresponding holders */
- RoseInEdgeProps(std::shared_ptr<NGHolder> g, std::shared_ptr<raw_som_dfa> h,
- u32 lag)
- : minBound(0), maxBound(ROSE_BOUND_INF), graph(g), haig(h),
- graph_lag(lag) {
- assert(graph);
- assert(haig);
- }
-
- /* haig suffixes do not require their corresponding holders */
- explicit RoseInEdgeProps(std::shared_ptr<raw_som_dfa> h)
- : minBound(0), maxBound(ROSE_BOUND_INF), haig(h), graph_lag(0) {
- assert(haig);
- }
-
- RoseInEdgeProps(std::shared_ptr<NGHolder> g, u32 lag)
- : minBound(0), maxBound(ROSE_BOUND_INF), graph(g), graph_lag(lag) {
- assert(graph);
- }
-
- /** \brief Minimum bound on 'dot' repeat between literals. ie pred end ->
- * succ begin. */
- u32 minBound;
-
- /** \brief Maximum bound on 'dot' repeat between literals. */
- u32 maxBound;
-
+};
+
+struct RoseInEdgeProps {
+ RoseInEdgeProps()
+ : minBound(0), maxBound(0), graph(), haig(), graph_lag(0) {}
+
+ RoseInEdgeProps(u32 min_in, u32 max_in)
+ : minBound(min_in), maxBound(max_in), graph(), graph_lag(0) {
+ assert(minBound <= maxBound);
+ assert(minBound != ROSE_BOUND_INF);
+ }
+
+ /* haig rosefixes (prefix/infix) require their corresponding holders */
+ RoseInEdgeProps(std::shared_ptr<NGHolder> g, std::shared_ptr<raw_som_dfa> h,
+ u32 lag)
+ : minBound(0), maxBound(ROSE_BOUND_INF), graph(g), haig(h),
+ graph_lag(lag) {
+ assert(graph);
+ assert(haig);
+ }
+
+ /* haig suffixes do not require their corresponding holders */
+ explicit RoseInEdgeProps(std::shared_ptr<raw_som_dfa> h)
+ : minBound(0), maxBound(ROSE_BOUND_INF), haig(h), graph_lag(0) {
+ assert(haig);
+ }
+
+ RoseInEdgeProps(std::shared_ptr<NGHolder> g, u32 lag)
+ : minBound(0), maxBound(ROSE_BOUND_INF), graph(g), graph_lag(lag) {
+ assert(graph);
+ }
+
+ /** \brief Minimum bound on 'dot' repeat between literals. ie pred end ->
+ * succ begin. */
+ u32 minBound;
+
+ /** \brief Maximum bound on 'dot' repeat between literals. */
+ u32 maxBound;
+
/** \brief Graph on edge. Graph is end to (end - lag). */
- std::shared_ptr<NGHolder> graph;
-
+ std::shared_ptr<NGHolder> graph;
+
/** \brief DFA version of graph, if we have already determinised. */
std::shared_ptr<raw_dfa> dfa;
- /** \brief Haig version of graph, if required. */
- std::shared_ptr<raw_som_dfa> haig;
-
+ /** \brief Haig version of graph, if required. */
+ std::shared_ptr<raw_som_dfa> haig;
+
/**
* \brief Distance behind the match offset for the literal in the target
* vertex that the leftfix needs to be checked at.
*/
- u32 graph_lag;
+ u32 graph_lag;
/** \brief Unique edge index. */
size_t index = 0;
};
-
+
struct RoseInGraph
: public ue2_graph<RoseInGraph, RoseInVertexProps, RoseInEdgeProps> {
-};
-typedef RoseInGraph::vertex_descriptor RoseInVertex;
-typedef RoseInGraph::edge_descriptor RoseInEdge;
-
-} // namespace ue2
-
-#endif
+};
+typedef RoseInGraph::vertex_descriptor RoseInVertex;
+typedef RoseInGraph::edge_descriptor RoseInEdge;
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/rose_in_util.cpp b/contrib/libs/hyperscan/src/rose/rose_in_util.cpp
index 0de66411b2..cb531017e3 100644
--- a/contrib/libs/hyperscan/src/rose/rose_in_util.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_in_util.cpp
@@ -1,251 +1,251 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_in_util.h"
-
-#include "rose_build_util.h"
-#include "nfa/goughcompile.h"
-#include "nfagraph/ng_depth.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_width.h"
-#include "util/container.h"
-#include "util/graph_range.h"
-#include "util/make_unique.h"
-
-#include <vector>
-
-#include <boost/graph/copy.hpp>
-#include <boost/graph/reverse_graph.hpp>
-#include <boost/graph/topological_sort.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
-/* Returns a topological ordering of the vertices in g. That is the starts are
- * at the front and all the predecessors of a vertex occur earlier in the list
- * than the vertex. */
-vector<RoseInVertex> topo_order(const RoseInGraph &g) {
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_in_util.h"
+
+#include "rose_build_util.h"
+#include "nfa/goughcompile.h"
+#include "nfagraph/ng_depth.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
+#include "util/container.h"
+#include "util/graph_range.h"
+#include "util/make_unique.h"
+
+#include <vector>
+
+#include <boost/graph/copy.hpp>
+#include <boost/graph/reverse_graph.hpp>
+#include <boost/graph/topological_sort.hpp>
+
+using namespace std;
+
+namespace ue2 {
+
+/* Returns a topological ordering of the vertices in g. That is the starts are
+ * at the front and all the predecessors of a vertex occur earlier in the list
+ * than the vertex. */
+vector<RoseInVertex> topo_order(const RoseInGraph &g) {
assert(hasCorrectlyNumberedVertices(g));
- vector<RoseInVertex> v_order;
+ vector<RoseInVertex> v_order;
v_order.reserve(num_vertices(g));
-
+
boost::topological_sort(g, back_inserter(v_order));
-
- reverse(v_order.begin(), v_order.end()); /* put starts at the front */
-
- return v_order;
-}
-
-namespace {
-struct RoseEdgeCopier {
- typedef unordered_map<const NGHolder *, shared_ptr<NGHolder>> GraphMap;
- typedef unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> HaigMap;
-
- RoseEdgeCopier(const RoseInGraph &g1, RoseInGraph &g2,
- const GraphMap &graph_map_in, const HaigMap &haig_map_in)
- : ig(g1), out(g2), graph_map(graph_map_in), haig_map(haig_map_in) {}
-
- void operator()(const RoseInEdge &e1, RoseInEdge &e2) {
- // Clone all properties.
- put(boost::edge_all, out, e2, get(boost::edge_all, ig, e1));
- // Substitute in cloned graphs.
- if (ig[e1].graph) {
- out[e2].graph = graph_map.at(ig[e1].graph.get());
- }
- if (ig[e1].haig) {
- out[e2].haig = haig_map.at(ig[e1].haig.get());
- }
- }
-
-private:
- const RoseInGraph &ig;
- RoseInGraph &out;
- const GraphMap &graph_map;
- const HaigMap &haig_map;
-};
-}
-
-unique_ptr<RoseInGraph> cloneRoseGraph(const RoseInGraph &ig) {
+
+ reverse(v_order.begin(), v_order.end()); /* put starts at the front */
+
+ return v_order;
+}
+
+namespace {
+struct RoseEdgeCopier {
+ typedef unordered_map<const NGHolder *, shared_ptr<NGHolder>> GraphMap;
+ typedef unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> HaigMap;
+
+ RoseEdgeCopier(const RoseInGraph &g1, RoseInGraph &g2,
+ const GraphMap &graph_map_in, const HaigMap &haig_map_in)
+ : ig(g1), out(g2), graph_map(graph_map_in), haig_map(haig_map_in) {}
+
+ void operator()(const RoseInEdge &e1, RoseInEdge &e2) {
+ // Clone all properties.
+ put(boost::edge_all, out, e2, get(boost::edge_all, ig, e1));
+ // Substitute in cloned graphs.
+ if (ig[e1].graph) {
+ out[e2].graph = graph_map.at(ig[e1].graph.get());
+ }
+ if (ig[e1].haig) {
+ out[e2].haig = haig_map.at(ig[e1].haig.get());
+ }
+ }
+
+private:
+ const RoseInGraph &ig;
+ RoseInGraph &out;
+ const GraphMap &graph_map;
+ const HaigMap &haig_map;
+};
+}
+
+unique_ptr<RoseInGraph> cloneRoseGraph(const RoseInGraph &ig) {
assert(hasCorrectlyNumberedVertices(ig));
unique_ptr<RoseInGraph> out = std::make_unique<RoseInGraph>();
-
- unordered_map<const NGHolder *, shared_ptr<NGHolder>> graph_map;
- unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> haig_map;
-
- for (const auto &e : edges_range(ig)) {
- const RoseInEdgeProps &ep = ig[e];
- if (ep.graph && !contains(graph_map, ep.graph.get())) {
- graph_map[ep.graph.get()] = cloneHolder(*ep.graph);
- }
- if (ep.haig && !contains(haig_map, ep.haig.get())) {
- haig_map[ep.haig.get()] = make_shared<raw_som_dfa>(*ep.haig);
- }
- }
-
- copy_graph(ig, *out,
+
+ unordered_map<const NGHolder *, shared_ptr<NGHolder>> graph_map;
+ unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> haig_map;
+
+ for (const auto &e : edges_range(ig)) {
+ const RoseInEdgeProps &ep = ig[e];
+ if (ep.graph && !contains(graph_map, ep.graph.get())) {
+ graph_map[ep.graph.get()] = cloneHolder(*ep.graph);
+ }
+ if (ep.haig && !contains(haig_map, ep.haig.get())) {
+ haig_map[ep.haig.get()] = make_shared<raw_som_dfa>(*ep.haig);
+ }
+ }
+
+ copy_graph(ig, *out,
boost::edge_copy(RoseEdgeCopier(ig, *out, graph_map, haig_map)));
- return out;
-}
-
-void calcVertexOffsets(RoseInGraph &g) {
- vector<RoseInVertex> v_order = topo_order(g);
-
- for (RoseInVertex v : v_order) {
- if (g[v].type == RIV_START) {
- g[v].min_offset = 0;
- g[v].max_offset = ROSE_BOUND_INF;
- continue;
- } else if (g[v].type == RIV_ANCHORED_START) {
- g[v].min_offset = 0;
- g[v].max_offset = 0;
- continue;
- }
-
- DEBUG_PRINTF("vertex '%s'\n", dumpString(g[v].s).c_str());
-
- // Min and max predecessor depths.
- u32 min_d = ROSE_BOUND_INF;
- u32 max_d = 0;
-
- for (const auto &e : in_edges_range(v, g)) {
- RoseInVertex u = source(e, g);
- u32 e_min = g[u].min_offset;
- u32 e_max = g[u].max_offset;
-
- DEBUG_PRINTF("in-edge from u with offsets [%u,%u]\n", e_min, e_max);
-
- if (g[e].graph) {
- const NGHolder &h = *g[e].graph;
- depth g_min_width = findMinWidth(h);
- depth g_max_width =
- isAnchored(h) ? findMaxWidth(h) : depth::infinity();
- u32 graph_lag = g[e].graph_lag;
-
- DEBUG_PRINTF("edge has graph, depths [%s,%s] and lag %u\n",
- g_min_width.str().c_str(),
- g_max_width.str().c_str(), graph_lag);
- g_min_width += graph_lag;
- g_max_width += graph_lag;
- e_min = add_rose_depth(e_min, g_min_width);
- if (g_max_width.is_finite()) {
- e_max = add_rose_depth(e_max, g_max_width);
- } else {
- e_max = ROSE_BOUND_INF;
- }
- } else {
- DEBUG_PRINTF("edge has bounds [%u,%u]\n", g[e].minBound,
- g[e].maxBound);
- e_min = add_rose_depth(e_min, g[e].minBound);
- e_max = add_rose_depth(e_max, g[e].maxBound);
- if (g[v].type == RIV_LITERAL) {
- u32 len = g[v].s.length();
- DEBUG_PRINTF("lit len %u\n", len);
- e_min = add_rose_depth(e_min, len);
- e_max = add_rose_depth(e_max, len);
- }
- }
-
- min_d = min(min_d, e_min);
- max_d = max(max_d, e_max);
- }
-
- DEBUG_PRINTF("vertex depths [%u,%u]\n", min_d, max_d);
-
- assert(max_d >= min_d);
- g[v].min_offset = min_d;
- g[v].max_offset = max_d;
- }
-
- // It's possible that we may have literal delays assigned to vertices here
- // as well. If so, these need to be added to the min/max offsets.
- for (RoseInVertex v : v_order) {
- const u32 delay = g[v].delay;
- g[v].min_offset = add_rose_depth(g[v].min_offset, delay);
- g[v].max_offset = add_rose_depth(g[v].max_offset, delay);
- }
-}
-
-nfa_kind whatRoseIsThis(const RoseInGraph &in, const RoseInEdge &e) {
- RoseInVertex u = source(e, in);
- RoseInVertex v = target(e, in);
-
- bool start = in[u].type == RIV_START || in[u].type == RIV_ANCHORED_START;
- bool end = in[v].type == RIV_ACCEPT || in[v].type == RIV_ACCEPT_EOD;
-
- if (start && !end) {
- return NFA_PREFIX;
- } else if (!start && end) {
- return NFA_SUFFIX;
- } else if (!start && !end) {
- return NFA_INFIX;
- } else {
- assert(in[v].type == RIV_ACCEPT_EOD);
- return NFA_OUTFIX;
- }
-}
-
-void pruneUseless(RoseInGraph &g) {
- DEBUG_PRINTF("pruning useless vertices\n");
-
- set<RoseInVertex> dead;
- RoseInVertex dummy_start
- = add_vertex(RoseInVertexProps::makeStart(true), g);
- RoseInVertex dummy_end
- = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), g);
- dead.insert(dummy_start);
- dead.insert(dummy_end);
- for (auto v : vertices_range(g)) {
- if (v == dummy_start || v == dummy_end) {
- continue;
- }
- switch (g[v].type) {
- case RIV_ANCHORED_START:
- case RIV_START:
- add_edge(dummy_start, v, g);
- break;
- case RIV_ACCEPT:
- case RIV_ACCEPT_EOD:
- add_edge(v, dummy_end, g);
- break;
- default:
- break;
- }
- }
-
- find_unreachable(g, vector<RoseInVertex>(1, dummy_start), &dead);
- find_unreachable(boost::reverse_graph<RoseInGraph, RoseInGraph &>(g),
- vector<RoseInVertex>(1, dummy_end), &dead);
-
- for (auto v : dead) {
- clear_vertex(v, g);
- remove_vertex(v, g);
- }
-}
-
-}
+ return out;
+}
+
+void calcVertexOffsets(RoseInGraph &g) {
+ vector<RoseInVertex> v_order = topo_order(g);
+
+ for (RoseInVertex v : v_order) {
+ if (g[v].type == RIV_START) {
+ g[v].min_offset = 0;
+ g[v].max_offset = ROSE_BOUND_INF;
+ continue;
+ } else if (g[v].type == RIV_ANCHORED_START) {
+ g[v].min_offset = 0;
+ g[v].max_offset = 0;
+ continue;
+ }
+
+ DEBUG_PRINTF("vertex '%s'\n", dumpString(g[v].s).c_str());
+
+ // Min and max predecessor depths.
+ u32 min_d = ROSE_BOUND_INF;
+ u32 max_d = 0;
+
+ for (const auto &e : in_edges_range(v, g)) {
+ RoseInVertex u = source(e, g);
+ u32 e_min = g[u].min_offset;
+ u32 e_max = g[u].max_offset;
+
+ DEBUG_PRINTF("in-edge from u with offsets [%u,%u]\n", e_min, e_max);
+
+ if (g[e].graph) {
+ const NGHolder &h = *g[e].graph;
+ depth g_min_width = findMinWidth(h);
+ depth g_max_width =
+ isAnchored(h) ? findMaxWidth(h) : depth::infinity();
+ u32 graph_lag = g[e].graph_lag;
+
+ DEBUG_PRINTF("edge has graph, depths [%s,%s] and lag %u\n",
+ g_min_width.str().c_str(),
+ g_max_width.str().c_str(), graph_lag);
+ g_min_width += graph_lag;
+ g_max_width += graph_lag;
+ e_min = add_rose_depth(e_min, g_min_width);
+ if (g_max_width.is_finite()) {
+ e_max = add_rose_depth(e_max, g_max_width);
+ } else {
+ e_max = ROSE_BOUND_INF;
+ }
+ } else {
+ DEBUG_PRINTF("edge has bounds [%u,%u]\n", g[e].minBound,
+ g[e].maxBound);
+ e_min = add_rose_depth(e_min, g[e].minBound);
+ e_max = add_rose_depth(e_max, g[e].maxBound);
+ if (g[v].type == RIV_LITERAL) {
+ u32 len = g[v].s.length();
+ DEBUG_PRINTF("lit len %u\n", len);
+ e_min = add_rose_depth(e_min, len);
+ e_max = add_rose_depth(e_max, len);
+ }
+ }
+
+ min_d = min(min_d, e_min);
+ max_d = max(max_d, e_max);
+ }
+
+ DEBUG_PRINTF("vertex depths [%u,%u]\n", min_d, max_d);
+
+ assert(max_d >= min_d);
+ g[v].min_offset = min_d;
+ g[v].max_offset = max_d;
+ }
+
+ // It's possible that we may have literal delays assigned to vertices here
+ // as well. If so, these need to be added to the min/max offsets.
+ for (RoseInVertex v : v_order) {
+ const u32 delay = g[v].delay;
+ g[v].min_offset = add_rose_depth(g[v].min_offset, delay);
+ g[v].max_offset = add_rose_depth(g[v].max_offset, delay);
+ }
+}
+
+nfa_kind whatRoseIsThis(const RoseInGraph &in, const RoseInEdge &e) {
+ RoseInVertex u = source(e, in);
+ RoseInVertex v = target(e, in);
+
+ bool start = in[u].type == RIV_START || in[u].type == RIV_ANCHORED_START;
+ bool end = in[v].type == RIV_ACCEPT || in[v].type == RIV_ACCEPT_EOD;
+
+ if (start && !end) {
+ return NFA_PREFIX;
+ } else if (!start && end) {
+ return NFA_SUFFIX;
+ } else if (!start && !end) {
+ return NFA_INFIX;
+ } else {
+ assert(in[v].type == RIV_ACCEPT_EOD);
+ return NFA_OUTFIX;
+ }
+}
+
+void pruneUseless(RoseInGraph &g) {
+ DEBUG_PRINTF("pruning useless vertices\n");
+
+ set<RoseInVertex> dead;
+ RoseInVertex dummy_start
+ = add_vertex(RoseInVertexProps::makeStart(true), g);
+ RoseInVertex dummy_end
+ = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), g);
+ dead.insert(dummy_start);
+ dead.insert(dummy_end);
+ for (auto v : vertices_range(g)) {
+ if (v == dummy_start || v == dummy_end) {
+ continue;
+ }
+ switch (g[v].type) {
+ case RIV_ANCHORED_START:
+ case RIV_START:
+ add_edge(dummy_start, v, g);
+ break;
+ case RIV_ACCEPT:
+ case RIV_ACCEPT_EOD:
+ add_edge(v, dummy_end, g);
+ break;
+ default:
+ break;
+ }
+ }
+
+ find_unreachable(g, vector<RoseInVertex>(1, dummy_start), &dead);
+ find_unreachable(boost::reverse_graph<RoseInGraph, RoseInGraph &>(g),
+ vector<RoseInVertex>(1, dummy_end), &dead);
+
+ for (auto v : dead) {
+ clear_vertex(v, g);
+ remove_vertex(v, g);
+ }
+}
+
+}
diff --git a/contrib/libs/hyperscan/src/rose/rose_in_util.h b/contrib/libs/hyperscan/src/rose/rose_in_util.h
index e6aaa042de..1f3c4ef78a 100644
--- a/contrib/libs/hyperscan/src/rose/rose_in_util.h
+++ b/contrib/libs/hyperscan/src/rose/rose_in_util.h
@@ -1,56 +1,56 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_IN_UTIL_H
-#define ROSE_IN_UTIL_H
-
-#include "rose_in_graph.h"
-#include "nfa/nfa_kind.h"
-
-#include <vector>
-
-namespace ue2 {
-
-/* Returns a topological ordering of the vertices in g. That is the starts are
- * at the front and all the predecessors of a vertex occur earlier in the list
- * than the vertex. */
-std::vector<RoseInVertex> topo_order(const RoseInGraph &g);
-
-std::unique_ptr<RoseInGraph> cloneRoseGraph(const RoseInGraph &ig);
-void calcVertexOffsets(RoseInGraph &ig);
-enum nfa_kind whatRoseIsThis(const RoseInGraph &in, const RoseInEdge &e);
-void pruneUseless(RoseInGraph &g);
-
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_IN_UTIL_H
+#define ROSE_IN_UTIL_H
+
+#include "rose_in_graph.h"
+#include "nfa/nfa_kind.h"
+
+#include <vector>
+
+namespace ue2 {
+
+/* Returns a topological ordering of the vertices in g. That is the starts are
+ * at the front and all the predecessors of a vertex occur earlier in the list
+ * than the vertex. */
+std::vector<RoseInVertex> topo_order(const RoseInGraph &g);
+
+std::unique_ptr<RoseInGraph> cloneRoseGraph(const RoseInGraph &ig);
+void calcVertexOffsets(RoseInGraph &ig);
+enum nfa_kind whatRoseIsThis(const RoseInGraph &in, const RoseInEdge &e);
+void pruneUseless(RoseInGraph &g);
+
inline
bool is_any_accept(RoseInVertex v, const RoseInGraph &g) {
return g[v].type == RIV_ACCEPT || g[v].type == RIV_ACCEPT_EOD;
-}
-
}
-#endif
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/rose_internal.h b/contrib/libs/hyperscan/src/rose/rose_internal.h
index dbbb43ee92..7bd6779c3d 100644
--- a/contrib/libs/hyperscan/src/rose/rose_internal.h
+++ b/contrib/libs/hyperscan/src/rose/rose_internal.h
@@ -1,207 +1,207 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose data structures.
- */
-
-#ifndef ROSE_INTERNAL_H
-#define ROSE_INTERNAL_H
-
-#include "ue2common.h"
-#include "rose_common.h"
-#include "util/scatter.h"
-
-#define ROSE_OFFSET_INVALID 0xffffffff
-
-// Group constants
-typedef u64a rose_group;
-
-// Delayed literal stuff
-#define DELAY_BITS 5
-#define DELAY_SLOT_COUNT (1U << DELAY_BITS)
-#define MAX_DELAY (DELAY_SLOT_COUNT - 1)
-#define DELAY_MASK (DELAY_SLOT_COUNT - 1)
-
-/* Allocation of Rose literal ids
- *
- * The rose literal id space is segmented:
- *
- * ---- 0
- * | | 'Normal' undelayed literals in either e or f tables
- * | |
- * | |
- * | |
- * ---- anchored_base_id
- * | | literals from the a table
- * | |
- * ---- delay_base_id
- * | | Delayed version of normal literals
- * | |
- * ---- literalCount
- */
-
-/* Rose Literal Sources
- *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose data structures.
+ */
+
+#ifndef ROSE_INTERNAL_H
+#define ROSE_INTERNAL_H
+
+#include "ue2common.h"
+#include "rose_common.h"
+#include "util/scatter.h"
+
+#define ROSE_OFFSET_INVALID 0xffffffff
+
+// Group constants
+typedef u64a rose_group;
+
+// Delayed literal stuff
+#define DELAY_BITS 5
+#define DELAY_SLOT_COUNT (1U << DELAY_BITS)
+#define MAX_DELAY (DELAY_SLOT_COUNT - 1)
+#define DELAY_MASK (DELAY_SLOT_COUNT - 1)
+
+/* Allocation of Rose literal ids
+ *
+ * The rose literal id space is segmented:
+ *
+ * ---- 0
+ * | | 'Normal' undelayed literals in either e or f tables
+ * | |
+ * | |
+ * | |
+ * ---- anchored_base_id
+ * | | literals from the a table
+ * | |
+ * ---- delay_base_id
+ * | | Delayed version of normal literals
+ * | |
+ * ---- literalCount
+ */
+
+/* Rose Literal Sources
+ *
* Rose currently gets events (mainly roseProcessMatch calls) from a number of
* sources:
- * 1) The floating table
- * 2) The anchored table
- * 3) Delayed literals
+ * 1) The floating table
+ * 2) The anchored table
+ * 3) Delayed literals
* 4) Suffix NFAs
* 5) Literal masks
* 5) End anchored table
* 6) Prefix / Infix nfas
- *
- * Care is required to ensure that events appear to come into Rose in order
- * (or sufficiently ordered for Rose to cope). Generally the progress of the
- * floating table is considered the canonical position in the buffer.
- *
- * Anchored table:
- * The anchored table is run before the floating table as nothing in it can
- * depend on a floating literal. Order is achieved by two measures:
- * a) user matches^1 are logged and held until the floating matcher passes that
- * point;
- * b) any floating role with an anchored predecessor has a history relationship
- * to enforce the ordering.
- *
- * Delayed literals:
- * Delayed literal ordering is handled by delivering any pending delayed
- * literals before processing any floating match.
- *
- * Suffix:
- * Suffixes are always pure terminal roles. Prior to raising a match^2, pending
- * NFA queues are run to the current point (floating or delayed literal) as
- * appropriate.
- *
+ *
+ * Care is required to ensure that events appear to come into Rose in order
+ * (or sufficiently ordered for Rose to cope). Generally the progress of the
+ * floating table is considered the canonical position in the buffer.
+ *
+ * Anchored table:
+ * The anchored table is run before the floating table as nothing in it can
+ * depend on a floating literal. Order is achieved by two measures:
+ * a) user matches^1 are logged and held until the floating matcher passes that
+ * point;
+ * b) any floating role with an anchored predecessor has a history relationship
+ * to enforce the ordering.
+ *
+ * Delayed literals:
+ * Delayed literal ordering is handled by delivering any pending delayed
+ * literals before processing any floating match.
+ *
+ * Suffix:
+ * Suffixes are always pure terminal roles. Prior to raising a match^2, pending
+ * NFA queues are run to the current point (floating or delayed literal) as
+ * appropriate.
+ *
* Literal Masks:
- * These are triggered from either floating literals or delayed literals and
- * inspect the data behind them. Matches are raised at the same location as the
- * trigger literal so there are no ordering issues. Masks are always pure
- * terminal roles.
- *
- * Lookaround:
- * These are tests run on receipt of a role that "look around" the match,
- * checking characters at nearby offsets against reachability masks. Each role
- * can have a list of these lookaround offset/reach pairs, ordered in offset
- * order, and any failure will prevent the role from being switched on. Offsets
- * are relative to the byte after a literal match, and can be negative.
- *
- * Prefix / Infix:
- * TODO: remember / discuss
- *
- * End anchored table:
- * All user matches occur at the last byte. We do this last, so no problems
- * (yippee)
- *
- * ^1 User matches which occur before any possible match from the other tables
- * are not delayed.
- * ^2 Queues may also be run to the current location if a queue is full and
- * needs to be emptied.
- * ^3 There is no need to catch up at the end of a block scan as it contains no
- * terminals.
- */
-
-struct RoseCountingMiracle {
- char shufti; /** 1: count shufti class; 0: count a single character */
- u8 count; /** minimum number of occurrences for the counting
- * miracle char to kill the leftfix. */
- u8 c; /** character to look for if not shufti */
- u8 poison; /** character not in the shufti mask */
- m128 lo; /** shufti lo mask */
- m128 hi; /** shufti hi mask */
-};
-
-struct LeftNfaInfo {
- u32 maxQueueLen;
- u32 maxLag; // maximum of successor roles' lag
- u32 lagIndex; // iff lag != 0, index into leftfixLagTable
- u32 stopTable; // stop table index, or ROSE_OFFSET_INVALID
- u8 transient; /**< 0 if not transient, else max width of transient prefix */
- char infix; /* TODO: make flags */
+ * These are triggered from either floating literals or delayed literals and
+ * inspect the data behind them. Matches are raised at the same location as the
+ * trigger literal so there are no ordering issues. Masks are always pure
+ * terminal roles.
+ *
+ * Lookaround:
+ * These are tests run on receipt of a role that "look around" the match,
+ * checking characters at nearby offsets against reachability masks. Each role
+ * can have a list of these lookaround offset/reach pairs, ordered in offset
+ * order, and any failure will prevent the role from being switched on. Offsets
+ * are relative to the byte after a literal match, and can be negative.
+ *
+ * Prefix / Infix:
+ * TODO: remember / discuss
+ *
+ * End anchored table:
+ * All user matches occur at the last byte. We do this last, so no problems
+ * (yippee)
+ *
+ * ^1 User matches which occur before any possible match from the other tables
+ * are not delayed.
+ * ^2 Queues may also be run to the current location if a queue is full and
+ * needs to be emptied.
+ * ^3 There is no need to catch up at the end of a block scan as it contains no
+ * terminals.
+ */
+
+struct RoseCountingMiracle {
+ char shufti; /** 1: count shufti class; 0: count a single character */
+ u8 count; /** minimum number of occurrences for the counting
+ * miracle char to kill the leftfix. */
+ u8 c; /** character to look for if not shufti */
+ u8 poison; /** character not in the shufti mask */
+ m128 lo; /** shufti lo mask */
+ m128 hi; /** shufti hi mask */
+};
+
+struct LeftNfaInfo {
+ u32 maxQueueLen;
+ u32 maxLag; // maximum of successor roles' lag
+ u32 lagIndex; // iff lag != 0, index into leftfixLagTable
+ u32 stopTable; // stop table index, or ROSE_OFFSET_INVALID
+ u8 transient; /**< 0 if not transient, else max width of transient prefix */
+ char infix; /* TODO: make flags */
char eager; /**< nfa should be run eagerly to first match or death */
- char eod_check; /**< nfa is used by the event eod literal */
- u32 countingMiracleOffset; /** if not 0, offset to RoseCountingMiracle. */
- rose_group squash_mask; /* & mask applied when rose nfa dies */
-};
-
-struct NfaInfo {
- u32 nfaOffset;
- u32 stateOffset;
- u32 fullStateOffset; /* offset in scratch, relative to ??? */
- u32 ekeyListOffset; /* suffix, relative to base of rose, 0 if no ekeys */
- u8 no_retrigger; /* TODO */
- u8 in_sbmatcher; /**< this outfix should not be run in small-block
- * execution, as it will be handled by the sbmatcher
- * HWLM table. */
- u8 eod; /* suffix is triggered by the etable --> can only produce eod
- * matches */
-};
-
-#define MAX_STORED_LEFTFIX_LAG 127 /* max leftfix lag that we can store in one
- * whole byte (OWB) (streaming only). Other
- * values in OWB are reserved for zombie
- * status */
-#define OWB_ZOMBIE_ALWAYS_YES 128 /* nfa will always answer yes to any rose
- * prefix checks */
-
+ char eod_check; /**< nfa is used by the event eod literal */
+ u32 countingMiracleOffset; /** if not 0, offset to RoseCountingMiracle. */
+ rose_group squash_mask; /* & mask applied when rose nfa dies */
+};
+
+struct NfaInfo {
+ u32 nfaOffset;
+ u32 stateOffset;
+ u32 fullStateOffset; /* offset in scratch, relative to ??? */
+ u32 ekeyListOffset; /* suffix, relative to base of rose, 0 if no ekeys */
+ u8 no_retrigger; /* TODO */
+ u8 in_sbmatcher; /**< this outfix should not be run in small-block
+ * execution, as it will be handled by the sbmatcher
+ * HWLM table. */
+ u8 eod; /* suffix is triggered by the etable --> can only produce eod
+ * matches */
+};
+
+#define MAX_STORED_LEFTFIX_LAG 127 /* max leftfix lag that we can store in one
+ * whole byte (OWB) (streaming only). Other
+ * values in OWB are reserved for zombie
+ * status */
+#define OWB_ZOMBIE_ALWAYS_YES 128 /* nfa will always answer yes to any rose
+ * prefix checks */
+
/* offset of the status flags in the stream state. */
#define ROSE_STATE_OFFSET_STATUS_FLAGS 0
-
+
/* offset of role mmbit in stream state (just after the status flag byte). */
#define ROSE_STATE_OFFSET_ROLE_MMBIT sizeof(u8)
-
-/**
- * \brief Rose state offsets.
- *
- * Stores pre-calculated offsets (in bytes) to MOST of the state structures
- * used by Rose, relative to the start of stream state.
- *
- * State not covered by this structure includes:
- *
+
+/**
+ * \brief Rose state offsets.
+ *
+ * Stores pre-calculated offsets (in bytes) to MOST of the state structures
+ * used by Rose, relative to the start of stream state.
+ *
+ * State not covered by this structure includes:
+ *
* -# the first byte, containing the status bitmask
- * -# the role state multibit
- */
-struct RoseStateOffsets {
- /** History buffer.
- *
+ * -# the role state multibit
+ */
+struct RoseStateOffsets {
+ /** History buffer.
+ *
* Max size of history is RoseEngine::historyRequired. */
- u32 history;
-
+ u32 history;
+
/** Exhausted multibit.
- *
+ *
* entry per exhaustible key (used by Highlander mode). If a bit is set,
- * reports with that ekey should not be delivered to the user. */
- u32 exhausted;
-
+ * reports with that ekey should not be delivered to the user. */
+ u32 exhausted;
+
/** size in bytes of exhausted multibit */
u32 exhausted_size;
-
+
/** Logical multibit.
*
* entry per logical key(operand/operator) (used by Logical Combination). */
@@ -218,46 +218,46 @@ struct RoseStateOffsets {
/** size in bytes of combination multibit */
u32 combVec_size;
- /** Multibit for active suffix/outfix engines. */
- u32 activeLeafArray;
-
+ /** Multibit for active suffix/outfix engines. */
+ u32 activeLeafArray;
+
/** Size of multibit for active suffix/outfix engines in bytes. */
u32 activeLeafArray_size;
/** Multibit for active leftfix (prefix/infix) engines. */
- u32 activeLeftArray;
-
+ u32 activeLeftArray;
+
/** Size of multibit for active leftfix (prefix/infix) engines in bytes. */
- u32 activeLeftArray_size;
-
- /** Table of lag information (stored as one byte per engine) for active
- * Rose leftfix engines. */
- u32 leftfixLagTable;
-
- /** State for anchored matchers (McClellan DFAs). */
- u32 anchorState;
-
- /** Packed Rose groups value. */
- u32 groups;
-
- /** Size of packed Rose groups value, in bytes. */
- u32 groups_size;
-
+ u32 activeLeftArray_size;
+
+ /** Table of lag information (stored as one byte per engine) for active
+ * Rose leftfix engines. */
+ u32 leftfixLagTable;
+
+ /** State for anchored matchers (McClellan DFAs). */
+ u32 anchorState;
+
+ /** Packed Rose groups value. */
+ u32 groups;
+
+ /** Size of packed Rose groups value, in bytes. */
+ u32 groups_size;
+
/** State for long literal support. */
u32 longLitState;
-
+
/** Size of the long literal state. */
u32 longLitState_size;
- /** Packed SOM location slots. */
- u32 somLocation;
-
- /** Multibit guarding SOM location slots. */
- u32 somValid;
-
- /** Multibit guarding SOM location slots. */
- u32 somWritable;
-
+ /** Packed SOM location slots. */
+ u32 somLocation;
+
+ /** Multibit guarding SOM location slots. */
+ u32 somValid;
+
+ /** Multibit guarding SOM location slots. */
+ u32 somWritable;
+
/** Size of each of the somValid and somWritable multibits, in bytes. */
u32 somMultibit_size;
@@ -265,11 +265,11 @@ struct RoseStateOffsets {
* The NFA state region extends to end. */
u32 nfaStateBegin;
- /** Total size of Rose state, in bytes. */
- u32 end;
-};
-
-struct RoseBoundaryReports {
+ /** Total size of Rose state, in bytes. */
+ u32 end;
+};
+
+struct RoseBoundaryReports {
/** \brief 0 if no reports list, otherwise offset of program to run to
* deliver reports at EOD. */
u32 reportEodOffset;
@@ -281,35 +281,35 @@ struct RoseBoundaryReports {
/** \brief 0 if no reports list, otherwise offset of program to run to
* deliver reports if EOD is at offset 0. Superset of other programs. */
u32 reportZeroEodOffset;
-};
-
-/* NFA Queue Assignment
- *
- * --- 0
- * (|) chained mpv (if present)
- * #
- * --- outfixBeginQueue -
- * | outfixes. enabled at offset 0.
- * |
- * #
- * --- outfixEndQueue -
- * | suffixes. enabled by rose roles.
- * |
- * #
- * --- leftfixBeginQueue -
- * | prefixes
- * |
- * #
- * --- ?
- * | infixes
- * |
- * #
- */
-
-#define ROSE_RUNTIME_FULL_ROSE 0
-#define ROSE_RUNTIME_PURE_LITERAL 1
-#define ROSE_RUNTIME_SINGLE_OUTFIX 2
-
+};
+
+/* NFA Queue Assignment
+ *
+ * --- 0
+ * (|) chained mpv (if present)
+ * #
+ * --- outfixBeginQueue -
+ * | outfixes. enabled at offset 0.
+ * |
+ * #
+ * --- outfixEndQueue -
+ * | suffixes. enabled by rose roles.
+ * |
+ * #
+ * --- leftfixBeginQueue -
+ * | prefixes
+ * |
+ * #
+ * --- ?
+ * | infixes
+ * |
+ * #
+ */
+
+#define ROSE_RUNTIME_FULL_ROSE 0
+#define ROSE_RUNTIME_PURE_LITERAL 1
+#define ROSE_RUNTIME_SINGLE_OUTFIX 2
+
/**
* \brief Runtime structure header for Rose.
*
@@ -323,67 +323,67 @@ struct RoseBoundaryReports {
* -# array of NFA offsets, one per queue
* -# array of state offsets, one per queue (+)
*
- * (+) stateOffset array note: Offsets in the array are either into the stream
- * state (normal case) or into the tstate region of scratch (for transient rose
- * nfas). Rose nfa info table can distinguish the cases.
- */
-struct RoseEngine {
+ * (+) stateOffset array note: Offsets in the array are either into the stream
+ * state (normal case) or into the tstate region of scratch (for transient rose
+ * nfas). Rose nfa info table can distinguish the cases.
+ */
+struct RoseEngine {
u8 pureLiteral; /* Indicator of pure literal API */
- u8 noFloatingRoots; /* only need to run the anchored table if something
- * matched in the anchored table */
- u8 requiresEodCheck; /* stuff happens at eod time */
- u8 hasOutfixesInSmallBlock; /**< has at least one outfix that must run even
- in small block scans. */
- u8 runtimeImpl; /**< can we just run the floating table or a single outfix?
- * or do we need a full rose? */
- u8 mpvTriggeredByLeaf; /**< need to check (suf|out)fixes for mpv trigger */
- u8 canExhaust; /**< every pattern has an exhaustion key */
- u8 hasSom; /**< has at least one pattern which tracks SOM. */
- u8 somHorizon; /**< width in bytes of SOM offset storage (governed by
- SOM precision) */
- u32 mode; /**< scanning mode, one of HS_MODE_{BLOCK,STREAM,VECTORED} */
- u32 historyRequired; /**< max amount of history required for streaming */
- u32 ekeyCount; /**< number of exhaustion keys */
+ u8 noFloatingRoots; /* only need to run the anchored table if something
+ * matched in the anchored table */
+ u8 requiresEodCheck; /* stuff happens at eod time */
+ u8 hasOutfixesInSmallBlock; /**< has at least one outfix that must run even
+ in small block scans. */
+ u8 runtimeImpl; /**< can we just run the floating table or a single outfix?
+ * or do we need a full rose? */
+ u8 mpvTriggeredByLeaf; /**< need to check (suf|out)fixes for mpv trigger */
+ u8 canExhaust; /**< every pattern has an exhaustion key */
+ u8 hasSom; /**< has at least one pattern which tracks SOM. */
+ u8 somHorizon; /**< width in bytes of SOM offset storage (governed by
+ SOM precision) */
+ u32 mode; /**< scanning mode, one of HS_MODE_{BLOCK,STREAM,VECTORED} */
+ u32 historyRequired; /**< max amount of history required for streaming */
+ u32 ekeyCount; /**< number of exhaustion keys */
u32 lkeyCount; /**< number of logical keys */
u32 lopCount; /**< number of logical ops */
u32 ckeyCount; /**< number of combination keys */
u32 logicalTreeOffset; /**< offset to mapping from lkey to LogicalOp */
u32 combInfoMapOffset; /**< offset to mapping from ckey to combInfo */
- u32 dkeyCount; /**< number of dedupe keys */
+ u32 dkeyCount; /**< number of dedupe keys */
u32 dkeyLogSize; /**< size of fatbit for storing dkey log (bytes) */
- u32 invDkeyOffset; /**< offset to table mapping from dkeys to the external
- * report ids */
- u32 somLocationCount; /**< number of som locations required */
+ u32 invDkeyOffset; /**< offset to table mapping from dkeys to the external
+ * report ids */
+ u32 somLocationCount; /**< number of som locations required */
u32 somLocationFatbitSize; /**< size of SOM location fatbit (bytes) */
- u32 rolesWithStateCount; // number of roles with entries in state bitset
- u32 stateSize; /* size of the state bitset
- * WARNING: not the size of the rose state */
- u32 anchorStateSize; /* size of the state for the anchor dfas */
- u32 tStateSize; /* total size of the state for transient rose nfas */
- u32 scratchStateSize; /**< uncompressed state req'd for NFAs in scratch;
- * used for sizing scratch only. */
- u32 smallWriteOffset; /**< offset of small-write matcher */
- u32 amatcherOffset; // offset of the anchored literal matcher (bytes)
- u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes)
- u32 fmatcherOffset; // offset of the floating literal matcher (bytes)
+ u32 rolesWithStateCount; // number of roles with entries in state bitset
+ u32 stateSize; /* size of the state bitset
+ * WARNING: not the size of the rose state */
+ u32 anchorStateSize; /* size of the state for the anchor dfas */
+ u32 tStateSize; /* total size of the state for transient rose nfas */
+ u32 scratchStateSize; /**< uncompressed state req'd for NFAs in scratch;
+ * used for sizing scratch only. */
+ u32 smallWriteOffset; /**< offset of small-write matcher */
+ u32 amatcherOffset; // offset of the anchored literal matcher (bytes)
+ u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes)
+ u32 fmatcherOffset; // offset of the floating literal matcher (bytes)
u32 drmatcherOffset; // offset of the delayed rebuild table (bytes)
- u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes)
+ u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes)
u32 longLitTableOffset; // offset of the long literal table
- u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern
- * involved with the anchored table to produce a full
- * match. */
- u32 fmatcherMinWidth; /**< minimum number of bytes required for a pattern
- * involved with the floating table to produce a full
- * match. */
- u32 eodmatcherMinWidth; /**< minimum number of bytes required for a pattern
- * involved with the eod table to produce a full
- * match. */
- u32 amatcherMaxBiAnchoredWidth; /**< maximum number of bytes that can still
- * produce a match for a pattern involved
- * with the anchored table. */
- u32 fmatcherMaxBiAnchoredWidth; /**< maximum number of bytes that can still
- * produce a match for a pattern involved
- * with the anchored table. */
+ u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern
+ * involved with the anchored table to produce a full
+ * match. */
+ u32 fmatcherMinWidth; /**< minimum number of bytes required for a pattern
+ * involved with the floating table to produce a full
+ * match. */
+ u32 eodmatcherMinWidth; /**< minimum number of bytes required for a pattern
+ * involved with the eod table to produce a full
+ * match. */
+ u32 amatcherMaxBiAnchoredWidth; /**< maximum number of bytes that can still
+ * produce a match for a pattern involved
+ * with the anchored table. */
+ u32 fmatcherMaxBiAnchoredWidth; /**< maximum number of bytes that can still
+ * produce a match for a pattern involved
+ * with the anchored table. */
/**
* \brief Offset of u32 array of program offsets for reports used by
@@ -407,11 +407,11 @@ struct RoseEngine {
*/
u32 anchoredProgramOffset;
- u32 activeArrayCount; //number of nfas tracked in the active array
- u32 activeLeftCount; //number of nfas tracked in the active rose array
- u32 queueCount; /**< number of nfa queues */
+ u32 activeArrayCount; //number of nfas tracked in the active array
+ u32 activeLeftCount; //number of nfas tracked in the active rose array
+ u32 queueCount; /**< number of nfa queues */
u32 activeQueueArraySize; //!< size of fatbit for active queues (bytes)
-
+
u32 eagerIterOffset; /**< offset to sparse iter for eager prefixes or 0 if
* none */
@@ -422,72 +422,72 @@ struct RoseEngine {
/** \brief Size of the handled keys fatbit in scratch (bytes). */
u32 handledKeyFatbitSize;
- u32 leftOffset;
- u32 roseCount;
-
+ u32 leftOffset;
+ u32 roseCount;
+
u32 eodProgramOffset; //!< EOD program, otherwise 0.
u32 flushCombProgramOffset; /**< FlushCombination program, otherwise 0 */
u32 lastFlushCombProgramOffset; /**< LastFlushCombination program,
* otherwise 0 */
-
- u32 lastByteHistoryIterOffset; // if non-zero
-
- /** \brief Minimum number of bytes required to match. */
- u32 minWidth;
-
- /** \brief Minimum number of bytes required to match, excluding boundary
- * reports. */
- u32 minWidthExcludingBoundaries;
-
- u32 maxBiAnchoredWidth; /* ROSE_BOUND_INF if any non bianchored patterns
- * present */
- u32 anchoredDistance; // region to run the anchored table over
- u32 anchoredMinDistance; /* start of region to run anchored table over */
- u32 floatingDistance; /* end of region to run the floating table over
- ROSE_BOUND_INF if not bounded */
- u32 floatingMinDistance; /* start of region to run floating table over */
- u32 smallBlockDistance; /* end of region to run the floating table over
- ROSE_BOUND_INF if not bounded */
- u32 floatingMinLiteralMatchOffset; /* the minimum offset that we can get a
- * 'valid' match from the floating
- * table */
- u32 nfaInfoOffset; /* offset to the nfa info offset array */
- rose_group initialGroups;
+
+ u32 lastByteHistoryIterOffset; // if non-zero
+
+ /** \brief Minimum number of bytes required to match. */
+ u32 minWidth;
+
+ /** \brief Minimum number of bytes required to match, excluding boundary
+ * reports. */
+ u32 minWidthExcludingBoundaries;
+
+ u32 maxBiAnchoredWidth; /* ROSE_BOUND_INF if any non bianchored patterns
+ * present */
+ u32 anchoredDistance; // region to run the anchored table over
+ u32 anchoredMinDistance; /* start of region to run anchored table over */
+ u32 floatingDistance; /* end of region to run the floating table over
+ ROSE_BOUND_INF if not bounded */
+ u32 floatingMinDistance; /* start of region to run floating table over */
+ u32 smallBlockDistance; /* end of region to run the floating table over
+ ROSE_BOUND_INF if not bounded */
+ u32 floatingMinLiteralMatchOffset; /* the minimum offset that we can get a
+ * 'valid' match from the floating
+ * table */
+ u32 nfaInfoOffset; /* offset to the nfa info offset array */
+ rose_group initialGroups;
rose_group floating_group_mask; /* groups that are used by the ftable */
- u32 size; // (bytes)
- u32 delay_count; /* number of delayed literal ids. */
+ u32 size; // (bytes)
+ u32 delay_count; /* number of delayed literal ids. */
u32 delay_fatbit_size; //!< size of each delay fatbit in scratch (bytes)
- u32 anchored_count; /* number of anchored literal ids */
+ u32 anchored_count; /* number of anchored literal ids */
u32 anchored_fatbit_size; //!< size of each anch fatbit in scratch (bytes)
- u32 maxFloatingDelayedMatch; /* max offset that a delayed literal can
- * usefully be reported */
- u32 delayRebuildLength; /* length of the history region which needs to be
- * rescanned when we are doing a delayed literal
- * rebuild scan. */
- struct RoseStateOffsets stateOffsets;
- struct RoseBoundaryReports boundary;
- u32 totalNumLiterals; /* total number of literals including dr */
- u32 asize; /* size of the atable */
- u32 outfixBeginQueue; /* first outfix queue */
- u32 outfixEndQueue; /* one past the last outfix queue */
- u32 leftfixBeginQueue; /* first prefix/infix queue */
- u32 initMpvNfa; /* (allegedly chained) mpv to force on at init */
- u32 rosePrefixCount; /* number of rose prefixes */
- u32 activeLeftIterOffset; /* mmbit_sparse_iter over non-transient roses */
- u32 ematcherRegionSize; /* max region size to pass to ematcher */
- u32 somRevCount; /**< number of som reverse nfas */
- u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */
+ u32 maxFloatingDelayedMatch; /* max offset that a delayed literal can
+ * usefully be reported */
+ u32 delayRebuildLength; /* length of the history region which needs to be
+ * rescanned when we are doing a delayed literal
+ * rebuild scan. */
+ struct RoseStateOffsets stateOffsets;
+ struct RoseBoundaryReports boundary;
+ u32 totalNumLiterals; /* total number of literals including dr */
+ u32 asize; /* size of the atable */
+ u32 outfixBeginQueue; /* first outfix queue */
+ u32 outfixEndQueue; /* one past the last outfix queue */
+ u32 leftfixBeginQueue; /* first prefix/infix queue */
+ u32 initMpvNfa; /* (allegedly chained) mpv to force on at init */
+ u32 rosePrefixCount; /* number of rose prefixes */
+ u32 activeLeftIterOffset; /* mmbit_sparse_iter over non-transient roses */
+ u32 ematcherRegionSize; /* max region size to pass to ematcher */
+ u32 somRevCount; /**< number of som reverse nfas */
+ u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */
u32 longLitStreamState; // size in bytes
-
- struct scatter_full_plan state_init;
-};
-
-struct ALIGN_CL_DIRECTIVE anchored_matcher_info {
- u32 next_offset; /* relative to this, 0 for end */
- u32 state_offset; /* relative to anchorState */
- u32 anchoredMinDistance; /* start of region to run anchored table over */
-};
-
+
+ struct scatter_full_plan state_init;
+};
+
+struct ALIGN_CL_DIRECTIVE anchored_matcher_info {
+ u32 next_offset; /* relative to this, 0 for end */
+ u32 state_offset; /* relative to anchorState */
+ u32 anchoredMinDistance; /* start of region to run anchored table over */
+};
+
/**
* \brief Long literal subtable for a particular mode (caseful or nocase).
*/
@@ -554,106 +554,106 @@ struct RoseLongLitHashEntry {
u32 str_len;
};
-static really_inline
-const struct anchored_matcher_info *getALiteralMatcher(
- const struct RoseEngine *t) {
- if (!t->amatcherOffset) {
- return NULL;
- }
-
- const char *lt = (const char *)t + t->amatcherOffset;
- assert(ISALIGNED_CL(lt));
- return (const struct anchored_matcher_info *)lt;
-}
-
-struct HWLM;
-
-static really_inline
-const struct HWLM *getFLiteralMatcher(const struct RoseEngine *t) {
- if (!t->fmatcherOffset) {
- return NULL;
- }
-
- const char *lt = (const char *)t + t->fmatcherOffset;
- assert(ISALIGNED_CL(lt));
- return (const struct HWLM *)lt;
-}
-
-static really_inline
-const void *getSBLiteralMatcher(const struct RoseEngine *t) {
- if (!t->sbmatcherOffset) {
- return NULL;
- }
-
- const char *matcher = (const char *)t + t->sbmatcherOffset;
- assert(ISALIGNED_N(matcher, 8));
- return matcher;
-}
-
-static really_inline
-const struct LeftNfaInfo *getLeftTable(const struct RoseEngine *t) {
- const struct LeftNfaInfo *r
- = (const struct LeftNfaInfo *)((const char *)t + t->leftOffset);
- assert(ISALIGNED_N(r, 4));
- return r;
-}
-
-struct mmbit_sparse_iter; // forward decl
-
-static really_inline
-const struct mmbit_sparse_iter *getActiveLeftIter(const struct RoseEngine *t) {
- assert(t->activeLeftIterOffset);
- const struct mmbit_sparse_iter *it = (const struct mmbit_sparse_iter *)
- ((const char *)t + t->activeLeftIterOffset);
- assert(ISALIGNED_N(it, 4));
- return it;
-}
-
-static really_inline
-const struct NfaInfo *getNfaInfoByQueue(const struct RoseEngine *t, u32 qi) {
- const struct NfaInfo *infos
- = (const struct NfaInfo *)((const char *)t + t->nfaInfoOffset);
- assert(ISALIGNED_N(infos, sizeof(u32)));
-
- return &infos[qi];
-}
-
-static really_inline
-const struct NFA *getNfaByInfo(const struct RoseEngine *t,
- const struct NfaInfo *info) {
- return (const struct NFA *)((const char *)t + info->nfaOffset);
-}
-
-static really_inline
-const struct NFA *getNfaByQueue(const struct RoseEngine *t, u32 qi) {
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
- return getNfaByInfo(t, info);
-}
-
-static really_inline
-u32 queueToLeftIndex(const struct RoseEngine *t, u32 qi) {
- assert(qi >= t->leftfixBeginQueue);
- return qi - t->leftfixBeginQueue;
-}
-
-static really_inline
-const struct LeftNfaInfo *getLeftInfoByQueue(const struct RoseEngine *t,
- u32 qi) {
- const struct LeftNfaInfo *infos = getLeftTable(t);
- return &infos[queueToLeftIndex(t, qi)];
-}
-
-struct SmallWriteEngine;
-
-static really_inline
-const struct SmallWriteEngine *getSmallWrite(const struct RoseEngine *t) {
- if (!t->smallWriteOffset) {
- return NULL;
- }
-
- const struct SmallWriteEngine *smwr =
- (const struct SmallWriteEngine *)((const char *)t + t->smallWriteOffset);
- return smwr;
-}
-
-#endif // ROSE_INTERNAL_H
+static really_inline
+const struct anchored_matcher_info *getALiteralMatcher(
+ const struct RoseEngine *t) {
+ if (!t->amatcherOffset) {
+ return NULL;
+ }
+
+ const char *lt = (const char *)t + t->amatcherOffset;
+ assert(ISALIGNED_CL(lt));
+ return (const struct anchored_matcher_info *)lt;
+}
+
+struct HWLM;
+
+static really_inline
+const struct HWLM *getFLiteralMatcher(const struct RoseEngine *t) {
+ if (!t->fmatcherOffset) {
+ return NULL;
+ }
+
+ const char *lt = (const char *)t + t->fmatcherOffset;
+ assert(ISALIGNED_CL(lt));
+ return (const struct HWLM *)lt;
+}
+
+static really_inline
+const void *getSBLiteralMatcher(const struct RoseEngine *t) {
+ if (!t->sbmatcherOffset) {
+ return NULL;
+ }
+
+ const char *matcher = (const char *)t + t->sbmatcherOffset;
+ assert(ISALIGNED_N(matcher, 8));
+ return matcher;
+}
+
+static really_inline
+const struct LeftNfaInfo *getLeftTable(const struct RoseEngine *t) {
+ const struct LeftNfaInfo *r
+ = (const struct LeftNfaInfo *)((const char *)t + t->leftOffset);
+ assert(ISALIGNED_N(r, 4));
+ return r;
+}
+
+struct mmbit_sparse_iter; // forward decl
+
+static really_inline
+const struct mmbit_sparse_iter *getActiveLeftIter(const struct RoseEngine *t) {
+ assert(t->activeLeftIterOffset);
+ const struct mmbit_sparse_iter *it = (const struct mmbit_sparse_iter *)
+ ((const char *)t + t->activeLeftIterOffset);
+ assert(ISALIGNED_N(it, 4));
+ return it;
+}
+
+static really_inline
+const struct NfaInfo *getNfaInfoByQueue(const struct RoseEngine *t, u32 qi) {
+ const struct NfaInfo *infos
+ = (const struct NfaInfo *)((const char *)t + t->nfaInfoOffset);
+ assert(ISALIGNED_N(infos, sizeof(u32)));
+
+ return &infos[qi];
+}
+
+static really_inline
+const struct NFA *getNfaByInfo(const struct RoseEngine *t,
+ const struct NfaInfo *info) {
+ return (const struct NFA *)((const char *)t + info->nfaOffset);
+}
+
+static really_inline
+const struct NFA *getNfaByQueue(const struct RoseEngine *t, u32 qi) {
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+ return getNfaByInfo(t, info);
+}
+
+static really_inline
+u32 queueToLeftIndex(const struct RoseEngine *t, u32 qi) {
+ assert(qi >= t->leftfixBeginQueue);
+ return qi - t->leftfixBeginQueue;
+}
+
+static really_inline
+const struct LeftNfaInfo *getLeftInfoByQueue(const struct RoseEngine *t,
+ u32 qi) {
+ const struct LeftNfaInfo *infos = getLeftTable(t);
+ return &infos[queueToLeftIndex(t, qi)];
+}
+
+struct SmallWriteEngine;
+
+static really_inline
+const struct SmallWriteEngine *getSmallWrite(const struct RoseEngine *t) {
+ if (!t->smallWriteOffset) {
+ return NULL;
+ }
+
+ const struct SmallWriteEngine *smwr =
+ (const struct SmallWriteEngine *)((const char *)t + t->smallWriteOffset);
+ return smwr;
+}
+
+#endif // ROSE_INTERNAL_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_types.h b/contrib/libs/hyperscan/src/rose/rose_types.h
index 5e22191aec..9dcef1cef0 100644
--- a/contrib/libs/hyperscan/src/rose/rose_types.h
+++ b/contrib/libs/hyperscan/src/rose/rose_types.h
@@ -1,42 +1,42 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/** \file
* \brief Rose runtime types (callbacks, etc).
*/
-#ifndef ROSE_TYPES_H
-#define ROSE_TYPES_H
-
-#include "ue2common.h"
-
+#ifndef ROSE_TYPES_H
+#define ROSE_TYPES_H
+
+#include "ue2common.h"
+
struct hs_scratch;
-
+
/**
* \brief Continue without checking for exhaustion.
*
@@ -65,7 +65,7 @@ typedef int (*RoseCallback)(u64a offset, ReportID id,
*
* \see RoseCallback
*/
-typedef int (*RoseCallbackSom)(u64a from_offset, u64a to_offset, ReportID id,
+typedef int (*RoseCallbackSom)(u64a from_offset, u64a to_offset, ReportID id,
struct hs_scratch *scratch);
-
-#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/runtime.h b/contrib/libs/hyperscan/src/rose/runtime.h
index 36508c0d67..5fbb2b7416 100644
--- a/contrib/libs/hyperscan/src/rose/runtime.h
+++ b/contrib/libs/hyperscan/src/rose/runtime.h
@@ -1,44 +1,44 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Runtime functions shared between various Rose runtime code.
- */
-
-#ifndef ROSE_RUNTIME_H
-#define ROSE_RUNTIME_H
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Runtime functions shared between various Rose runtime code.
+ */
+
+#ifndef ROSE_RUNTIME_H
+#define ROSE_RUNTIME_H
+
#include "rose_internal.h"
-#include "scratch.h"
-#include "util/partial_store.h"
-
-/*
- * ROSE STATE LAYOUT:
+#include "scratch.h"
+#include "util/partial_store.h"
+
+/*
+ * ROSE STATE LAYOUT:
*
* - runtime status byte (halt status, delay rebuild dirty, etc)
* - rose state multibit
@@ -51,83 +51,83 @@
* - exhausted bitvector
* - som slots, som multibit arrays
* - nfa stream state (for each nfa)
- */
-
-#define rose_inline really_inline
-
+ */
+
+#define rose_inline really_inline
+
/* Maximum offset that we will eagerly run prefixes to. Beyond this point, eager
* prefixes are always run in exactly the same way as normal prefixes. */
#define EAGER_STOP_OFFSET 64
-
-
-static really_inline
-const void *getByOffset(const struct RoseEngine *t, u32 offset) {
- assert(offset < t->size);
- return (const u8 *)t + offset;
-}
-
-static really_inline
+
+
+static really_inline
+const void *getByOffset(const struct RoseEngine *t, u32 offset) {
+ assert(offset < t->size);
+ return (const u8 *)t + offset;
+}
+
+static really_inline
void *getRoleState(char *state) {
return state + ROSE_STATE_OFFSET_ROLE_MMBIT;
-}
-
-/** \brief Fetch the active array for suffix nfas. */
-static really_inline
+}
+
+/** \brief Fetch the active array for suffix nfas. */
+static really_inline
u8 *getActiveLeafArray(const struct RoseEngine *t, char *state) {
return (u8 *)(state + t->stateOffsets.activeLeafArray);
-}
-
-/** \brief Fetch the active array for rose nfas. */
-static really_inline
+}
+
+/** \brief Fetch the active array for rose nfas. */
+static really_inline
u8 *getActiveLeftArray(const struct RoseEngine *t, char *state) {
return (u8 *)(state + t->stateOffsets.activeLeftArray);
-}
-
-static really_inline
+}
+
+static really_inline
rose_group loadGroups(const struct RoseEngine *t, const char *state) {
- return partial_load_u64a(state + t->stateOffsets.groups,
- t->stateOffsets.groups_size);
-
-}
-
-static really_inline
+ return partial_load_u64a(state + t->stateOffsets.groups,
+ t->stateOffsets.groups_size);
+
+}
+
+static really_inline
void storeGroups(const struct RoseEngine *t, char *state, rose_group groups) {
- partial_store_u64a(state + t->stateOffsets.groups, groups,
- t->stateOffsets.groups_size);
-}
-
-static really_inline
+ partial_store_u64a(state + t->stateOffsets.groups, groups,
+ t->stateOffsets.groups_size);
+}
+
+static really_inline
u8 *getLongLitState(const struct RoseEngine *t, char *state) {
return (u8 *)(state + t->stateOffsets.longLitState);
-}
-
-static really_inline
+}
+
+static really_inline
u8 *getLeftfixLagTable(const struct RoseEngine *t, char *state) {
return (u8 *)(state + t->stateOffsets.leftfixLagTable);
-}
-
-static really_inline
+}
+
+static really_inline
const u8 *getLeftfixLagTableConst(const struct RoseEngine *t,
const char *state) {
return (const u8 *)(state + t->stateOffsets.leftfixLagTable);
-}
-
-static really_inline
-u32 has_chained_nfas(const struct RoseEngine *t) {
- return t->outfixBeginQueue;
-}
-
-static really_inline
-void updateLastMatchOffset(struct RoseContext *tctxt, u64a offset) {
- DEBUG_PRINTF("match @%llu, last match @%llu\n", offset,
- tctxt->lastMatchOffset);
-
- assert(offset >= tctxt->minMatchOffset);
- assert(offset >= tctxt->lastMatchOffset);
- tctxt->lastMatchOffset = offset;
-}
-
-static really_inline
+}
+
+static really_inline
+u32 has_chained_nfas(const struct RoseEngine *t) {
+ return t->outfixBeginQueue;
+}
+
+static really_inline
+void updateLastMatchOffset(struct RoseContext *tctxt, u64a offset) {
+ DEBUG_PRINTF("match @%llu, last match @%llu\n", offset,
+ tctxt->lastMatchOffset);
+
+ assert(offset >= tctxt->minMatchOffset);
+ assert(offset >= tctxt->lastMatchOffset);
+ tctxt->lastMatchOffset = offset;
+}
+
+static really_inline
void updateLastCombMatchOffset(struct RoseContext *tctxt, u64a offset) {
DEBUG_PRINTF("match @%llu, last match @%llu\n", offset,
tctxt->lastCombMatchOffset);
@@ -137,24 +137,24 @@ void updateLastCombMatchOffset(struct RoseContext *tctxt, u64a offset) {
}
static really_inline
-void updateMinMatchOffset(struct RoseContext *tctxt, u64a offset) {
- DEBUG_PRINTF("min match now @%llu, was @%llu\n", offset,
- tctxt->minMatchOffset);
-
- assert(offset >= tctxt->minMatchOffset);
- assert(offset >= tctxt->minNonMpvMatchOffset);
- tctxt->minMatchOffset = offset;
- tctxt->minNonMpvMatchOffset = offset;
-}
-
-static really_inline
-void updateMinMatchOffsetFromMpv(struct RoseContext *tctxt, u64a offset) {
- DEBUG_PRINTF("min match now @%llu, was @%llu\n", offset,
- tctxt->minMatchOffset);
-
- assert(offset >= tctxt->minMatchOffset);
- assert(tctxt->minNonMpvMatchOffset >= tctxt->minMatchOffset);
- tctxt->minMatchOffset = offset;
- tctxt->minNonMpvMatchOffset = MAX(tctxt->minNonMpvMatchOffset, offset);
-}
-#endif
+void updateMinMatchOffset(struct RoseContext *tctxt, u64a offset) {
+ DEBUG_PRINTF("min match now @%llu, was @%llu\n", offset,
+ tctxt->minMatchOffset);
+
+ assert(offset >= tctxt->minMatchOffset);
+ assert(offset >= tctxt->minNonMpvMatchOffset);
+ tctxt->minMatchOffset = offset;
+ tctxt->minNonMpvMatchOffset = offset;
+}
+
+static really_inline
+void updateMinMatchOffsetFromMpv(struct RoseContext *tctxt, u64a offset) {
+ DEBUG_PRINTF("min match now @%llu, was @%llu\n", offset,
+ tctxt->minMatchOffset);
+
+ assert(offset >= tctxt->minMatchOffset);
+ assert(tctxt->minNonMpvMatchOffset >= tctxt->minMatchOffset);
+ tctxt->minMatchOffset = offset;
+ tctxt->minNonMpvMatchOffset = MAX(tctxt->minNonMpvMatchOffset, offset);
+}
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/stream.c b/contrib/libs/hyperscan/src/rose/stream.c
index 3fa8e2bd46..26268dd574 100644
--- a/contrib/libs/hyperscan/src/rose/stream.c
+++ b/contrib/libs/hyperscan/src/rose/stream.c
@@ -1,433 +1,433 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "catchup.h"
-#include "counting_miracle.h"
-#include "infix.h"
-#include "match.h"
-#include "miracle.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "catchup.h"
+#include "counting_miracle.h"
+#include "infix.h"
+#include "match.h"
+#include "miracle.h"
#include "program_runtime.h"
#include "rose.h"
#include "rose_internal.h"
#include "stream_long_lit.h"
-#include "hwlm/hwlm.h"
-#include "nfa/mcclellan.h"
-#include "nfa/nfa_api.h"
-#include "nfa/nfa_api_queue.h"
-#include "nfa/nfa_internal.h"
-#include "util/fatbit.h"
-
-static rose_inline
-void runAnchoredTableStream(const struct RoseEngine *t, const void *atable,
- size_t alen, u64a offset,
- struct hs_scratch *scratch) {
+#include "hwlm/hwlm.h"
+#include "nfa/mcclellan.h"
+#include "nfa/nfa_api.h"
+#include "nfa/nfa_api_queue.h"
+#include "nfa/nfa_internal.h"
+#include "util/fatbit.h"
+
+static rose_inline
+void runAnchoredTableStream(const struct RoseEngine *t, const void *atable,
+ size_t alen, u64a offset,
+ struct hs_scratch *scratch) {
char *state_base = scratch->core_info.state + t->stateOffsets.anchorState;
- const struct anchored_matcher_info *curr = atable;
-
- do {
- DEBUG_PRINTF("--anchored nfa (+%u) no %u so %u\n",
- curr->anchoredMinDistance, curr->next_offset,
- curr->state_offset);
- const struct NFA *nfa
- = (const struct NFA *)((const char *)curr + sizeof(*curr));
- assert(ISALIGNED_CL(nfa));
- assert(isMcClellanType(nfa->type));
-
- char *state = state_base + curr->state_offset;
-
- char start = 0;
- size_t adj = 0;
-
- if (offset <= curr->anchoredMinDistance) {
- adj = curr->anchoredMinDistance - offset;
- if (adj >= alen) {
- goto next_nfa;
- }
-
- start = 1;
- } else {
- // (No state decompress necessary.)
- if (nfa->type == MCCLELLAN_NFA_8) {
- if (!*(u8 *)state) {
- goto next_nfa;
- }
- } else {
+ const struct anchored_matcher_info *curr = atable;
+
+ do {
+ DEBUG_PRINTF("--anchored nfa (+%u) no %u so %u\n",
+ curr->anchoredMinDistance, curr->next_offset,
+ curr->state_offset);
+ const struct NFA *nfa
+ = (const struct NFA *)((const char *)curr + sizeof(*curr));
+ assert(ISALIGNED_CL(nfa));
+ assert(isMcClellanType(nfa->type));
+
+ char *state = state_base + curr->state_offset;
+
+ char start = 0;
+ size_t adj = 0;
+
+ if (offset <= curr->anchoredMinDistance) {
+ adj = curr->anchoredMinDistance - offset;
+ if (adj >= alen) {
+ goto next_nfa;
+ }
+
+ start = 1;
+ } else {
+ // (No state decompress necessary.)
+ if (nfa->type == MCCLELLAN_NFA_8) {
+ if (!*(u8 *)state) {
+ goto next_nfa;
+ }
+ } else {
if (!unaligned_load_u16(state)) {
- goto next_nfa;
- }
- }
- }
-
- if (nfa->type == MCCLELLAN_NFA_8) {
- nfaExecMcClellan8_SimpStream(nfa, state, scratch->core_info.buf,
- start, adj, alen, roseAnchoredCallback,
+ goto next_nfa;
+ }
+ }
+ }
+
+ if (nfa->type == MCCLELLAN_NFA_8) {
+ nfaExecMcClellan8_SimpStream(nfa, state, scratch->core_info.buf,
+ start, adj, alen, roseAnchoredCallback,
scratch);
- } else {
- nfaExecMcClellan16_SimpStream(nfa, state, scratch->core_info.buf,
+ } else {
+ nfaExecMcClellan16_SimpStream(nfa, state, scratch->core_info.buf,
start, adj, alen,
roseAnchoredCallback, scratch);
- }
-
- next_nfa:
- if (!curr->next_offset) {
- break;
- }
-
- curr = (const void *)((const char *)curr + curr->next_offset);
- } while (1);
-}
-
-
-static really_inline
-void saveStreamState(const struct NFA *nfa, struct mq *q, s64a loc) {
- DEBUG_PRINTF("offset=%llu, length=%zu, hlength=%zu, loc=%lld\n",
- q->offset, q->length, q->hlength, loc);
- nfaQueueCompressState(nfa, q, loc);
-}
-
-static really_inline
-u8 getByteBefore(const struct core_info *ci, s64a sp) {
- if (sp > 0) { // in main buffer
- assert(sp <= (s64a)ci->len);
- return ci->buf[sp - 1];
- }
- // in history buffer
- assert(-sp < (s64a)ci->hlen);
- return ci->hbuf[ci->hlen + sp - 1];
-}
-
-/** \brief Return value for \ref roseScanForMiracles. */
-enum MiracleAction {
- MIRACLE_DEAD, //!< kill off this engine
- MIRACLE_SAVED, //!< engine has been caught up and state saved
- MIRACLE_CONTINUE //!< continue running and catch up engine
-};
-
-static really_inline
+ }
+
+ next_nfa:
+ if (!curr->next_offset) {
+ break;
+ }
+
+ curr = (const void *)((const char *)curr + curr->next_offset);
+ } while (1);
+}
+
+
+static really_inline
+void saveStreamState(const struct NFA *nfa, struct mq *q, s64a loc) {
+ DEBUG_PRINTF("offset=%llu, length=%zu, hlength=%zu, loc=%lld\n",
+ q->offset, q->length, q->hlength, loc);
+ nfaQueueCompressState(nfa, q, loc);
+}
+
+static really_inline
+u8 getByteBefore(const struct core_info *ci, s64a sp) {
+ if (sp > 0) { // in main buffer
+ assert(sp <= (s64a)ci->len);
+ return ci->buf[sp - 1];
+ }
+ // in history buffer
+ assert(-sp < (s64a)ci->hlen);
+ return ci->hbuf[ci->hlen + sp - 1];
+}
+
+/** \brief Return value for \ref roseScanForMiracles. */
+enum MiracleAction {
+ MIRACLE_DEAD, //!< kill off this engine
+ MIRACLE_SAVED, //!< engine has been caught up and state saved
+ MIRACLE_CONTINUE //!< continue running and catch up engine
+};
+
+static really_inline
enum MiracleAction roseScanForMiracles(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch, u32 qi,
- const struct LeftNfaInfo *left,
- const struct NFA *nfa) {
- struct core_info *ci = &scratch->core_info;
- const u32 qCount = t->queueCount;
- struct mq *q = scratch->queues + qi;
-
- const char q_active = fatbit_isset(scratch->aqa, qCount, qi);
- DEBUG_PRINTF("q_active=%d\n", q_active);
-
- const s64a begin_loc = q_active ? q_cur_loc(q) : 0;
- const s64a end_loc = ci->len;
-
- s64a miracle_loc;
- if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) {
- goto found_miracle;
- }
-
- if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc,
- &miracle_loc)) {
- goto found_miracle;
- }
-
- DEBUG_PRINTF("no miracle\n");
- return MIRACLE_CONTINUE;
-
-found_miracle:
- DEBUG_PRINTF("miracle at %lld\n", miracle_loc);
-
- if (left->infix) {
- if (!q_active) {
- DEBUG_PRINTF("killing infix\n");
- return MIRACLE_DEAD;
- }
-
- DEBUG_PRINTF("skip q forward, %lld to %lld\n", begin_loc, miracle_loc);
- q_skip_forward_to(q, miracle_loc);
- if (q_last_type(q) == MQE_START) {
- DEBUG_PRINTF("miracle caused infix to die\n");
- return MIRACLE_DEAD;
- }
-
- DEBUG_PRINTF("re-init infix state\n");
- assert(q->items[q->cur].type == MQE_START);
- q->items[q->cur].location = miracle_loc;
- nfaQueueInitState(q->nfa, q);
- } else {
- if (miracle_loc > end_loc - t->historyRequired) {
+ struct hs_scratch *scratch, u32 qi,
+ const struct LeftNfaInfo *left,
+ const struct NFA *nfa) {
+ struct core_info *ci = &scratch->core_info;
+ const u32 qCount = t->queueCount;
+ struct mq *q = scratch->queues + qi;
+
+ const char q_active = fatbit_isset(scratch->aqa, qCount, qi);
+ DEBUG_PRINTF("q_active=%d\n", q_active);
+
+ const s64a begin_loc = q_active ? q_cur_loc(q) : 0;
+ const s64a end_loc = ci->len;
+
+ s64a miracle_loc;
+ if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) {
+ goto found_miracle;
+ }
+
+ if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc,
+ &miracle_loc)) {
+ goto found_miracle;
+ }
+
+ DEBUG_PRINTF("no miracle\n");
+ return MIRACLE_CONTINUE;
+
+found_miracle:
+ DEBUG_PRINTF("miracle at %lld\n", miracle_loc);
+
+ if (left->infix) {
+ if (!q_active) {
+ DEBUG_PRINTF("killing infix\n");
+ return MIRACLE_DEAD;
+ }
+
+ DEBUG_PRINTF("skip q forward, %lld to %lld\n", begin_loc, miracle_loc);
+ q_skip_forward_to(q, miracle_loc);
+ if (q_last_type(q) == MQE_START) {
+ DEBUG_PRINTF("miracle caused infix to die\n");
+ return MIRACLE_DEAD;
+ }
+
+ DEBUG_PRINTF("re-init infix state\n");
+ assert(q->items[q->cur].type == MQE_START);
+ q->items[q->cur].location = miracle_loc;
+ nfaQueueInitState(q->nfa, q);
+ } else {
+ if (miracle_loc > end_loc - t->historyRequired) {
char *streamState = state + getNfaInfoByQueue(t, qi)->stateOffset;
- u64a offset = ci->buf_offset + miracle_loc;
- u8 key = offset ? getByteBefore(ci, miracle_loc) : 0;
- DEBUG_PRINTF("init state, key=0x%02x, offset=%llu\n", key, offset);
- if (!nfaInitCompressedState(nfa, offset, streamState, key)) {
- return MIRACLE_DEAD;
- }
- storeRoseDelay(t, state, left, (s64a)ci->len - miracle_loc);
- return MIRACLE_SAVED;
- }
-
- DEBUG_PRINTF("re-init prefix (skip %lld->%lld)\n", begin_loc,
- miracle_loc);
- if (!q_active) {
- fatbit_set(scratch->aqa, qCount, qi);
+ u64a offset = ci->buf_offset + miracle_loc;
+ u8 key = offset ? getByteBefore(ci, miracle_loc) : 0;
+ DEBUG_PRINTF("init state, key=0x%02x, offset=%llu\n", key, offset);
+ if (!nfaInitCompressedState(nfa, offset, streamState, key)) {
+ return MIRACLE_DEAD;
+ }
+ storeRoseDelay(t, state, left, (s64a)ci->len - miracle_loc);
+ return MIRACLE_SAVED;
+ }
+
+ DEBUG_PRINTF("re-init prefix (skip %lld->%lld)\n", begin_loc,
+ miracle_loc);
+ if (!q_active) {
+ fatbit_set(scratch->aqa, qCount, qi);
initRoseQueue(t, qi, left, scratch);
- }
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, miracle_loc);
- pushQueueAt(q, 1, MQE_TOP, miracle_loc);
- nfaQueueInitState(q->nfa, q);
- }
-
- return MIRACLE_CONTINUE;
-}
-
-
-static really_inline
+ }
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, miracle_loc);
+ pushQueueAt(q, 1, MQE_TOP, miracle_loc);
+ nfaQueueInitState(q->nfa, q);
+ }
+
+ return MIRACLE_CONTINUE;
+}
+
+
+static really_inline
char roseCatchUpLeftfix(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch, u32 qi,
- const struct LeftNfaInfo *left) {
- assert(!left->transient); // active roses only
-
- struct core_info *ci = &scratch->core_info;
- const u32 qCount = t->queueCount;
- struct mq *q = scratch->queues + qi;
- const struct NFA *nfa = getNfaByQueue(t, qi);
-
- if (nfaSupportsZombie(nfa)
- && ci->buf_offset /* prefix can be alive with no q */
- && !fatbit_isset(scratch->aqa, qCount, qi)
- && isZombie(t, state, left)) {
- DEBUG_PRINTF("yawn - zombie\n");
- return 1;
- }
-
- if (left->stopTable) {
- enum MiracleAction mrv =
- roseScanForMiracles(t, state, scratch, qi, left, nfa);
- switch (mrv) {
- case MIRACLE_DEAD:
- return 0;
- case MIRACLE_SAVED:
- return 1;
- default:
- assert(mrv == MIRACLE_CONTINUE);
- break;
- }
- }
-
- if (!fatbit_set(scratch->aqa, qCount, qi)) {
+ struct hs_scratch *scratch, u32 qi,
+ const struct LeftNfaInfo *left) {
+ assert(!left->transient); // active roses only
+
+ struct core_info *ci = &scratch->core_info;
+ const u32 qCount = t->queueCount;
+ struct mq *q = scratch->queues + qi;
+ const struct NFA *nfa = getNfaByQueue(t, qi);
+
+ if (nfaSupportsZombie(nfa)
+ && ci->buf_offset /* prefix can be alive with no q */
+ && !fatbit_isset(scratch->aqa, qCount, qi)
+ && isZombie(t, state, left)) {
+ DEBUG_PRINTF("yawn - zombie\n");
+ return 1;
+ }
+
+ if (left->stopTable) {
+ enum MiracleAction mrv =
+ roseScanForMiracles(t, state, scratch, qi, left, nfa);
+ switch (mrv) {
+ case MIRACLE_DEAD:
+ return 0;
+ case MIRACLE_SAVED:
+ return 1;
+ default:
+ assert(mrv == MIRACLE_CONTINUE);
+ break;
+ }
+ }
+
+ if (!fatbit_set(scratch->aqa, qCount, qi)) {
initRoseQueue(t, qi, left, scratch);
-
- s32 sp;
- if (ci->buf_offset) {
- sp = -(s32)loadRoseDelay(t, state, left);
- } else {
- sp = 0;
- }
-
- DEBUG_PRINTF("ci->len=%zu, sp=%d, historyRequired=%u\n", ci->len, sp,
- t->historyRequired);
-
- if ( ci->len - sp + 1 < t->historyRequired) {
- // we'll end up safely in the history region.
- DEBUG_PRINTF("safely in history, skipping\n");
- storeRoseDelay(t, state, left, (s64a)ci->len - sp);
- return 1;
- }
-
- pushQueueAt(q, 0, MQE_START, sp);
- if (left->infix || ci->buf_offset + sp > 0) {
- loadStreamState(nfa, q, sp);
- } else {
- pushQueueAt(q, 1, MQE_TOP, sp);
- nfaQueueInitState(nfa, q);
- }
- } else {
- DEBUG_PRINTF("queue already active\n");
- if (q->end - q->cur == 1 && q_cur_type(q) == MQE_START) {
- DEBUG_PRINTF("empty queue, start loc=%lld\n", q_cur_loc(q));
- s64a last_loc = q_cur_loc(q);
- if (ci->len - last_loc + 1 < t->historyRequired) {
- // we'll end up safely in the history region.
- DEBUG_PRINTF("safely in history, saving state and skipping\n");
- saveStreamState(nfa, q, last_loc);
- storeRoseDelay(t, state, left, (s64a)ci->len - last_loc);
- return 1;
- }
- }
- }
-
- // Determine whether the byte before last_loc will be in the history
- // buffer on the next stream write.
- s64a last_loc = q_last_loc(q);
- s64a leftovers = ci->len - last_loc;
- if (leftovers + 1 >= t->historyRequired) {
- u32 catchup_offset = left->maxLag ? left->maxLag - 1 : 0;
- last_loc = (s64a)ci->len - catchup_offset;
- }
-
- if (left->infix) {
- if (infixTooOld(q, last_loc)) {
- DEBUG_PRINTF("infix died of old age\n");
- return 0;
- }
+
+ s32 sp;
+ if (ci->buf_offset) {
+ sp = -(s32)loadRoseDelay(t, state, left);
+ } else {
+ sp = 0;
+ }
+
+ DEBUG_PRINTF("ci->len=%zu, sp=%d, historyRequired=%u\n", ci->len, sp,
+ t->historyRequired);
+
+ if ( ci->len - sp + 1 < t->historyRequired) {
+ // we'll end up safely in the history region.
+ DEBUG_PRINTF("safely in history, skipping\n");
+ storeRoseDelay(t, state, left, (s64a)ci->len - sp);
+ return 1;
+ }
+
+ pushQueueAt(q, 0, MQE_START, sp);
+ if (left->infix || ci->buf_offset + sp > 0) {
+ loadStreamState(nfa, q, sp);
+ } else {
+ pushQueueAt(q, 1, MQE_TOP, sp);
+ nfaQueueInitState(nfa, q);
+ }
+ } else {
+ DEBUG_PRINTF("queue already active\n");
+ if (q->end - q->cur == 1 && q_cur_type(q) == MQE_START) {
+ DEBUG_PRINTF("empty queue, start loc=%lld\n", q_cur_loc(q));
+ s64a last_loc = q_cur_loc(q);
+ if (ci->len - last_loc + 1 < t->historyRequired) {
+ // we'll end up safely in the history region.
+ DEBUG_PRINTF("safely in history, saving state and skipping\n");
+ saveStreamState(nfa, q, last_loc);
+ storeRoseDelay(t, state, left, (s64a)ci->len - last_loc);
+ return 1;
+ }
+ }
+ }
+
+ // Determine whether the byte before last_loc will be in the history
+ // buffer on the next stream write.
+ s64a last_loc = q_last_loc(q);
+ s64a leftovers = ci->len - last_loc;
+ if (leftovers + 1 >= t->historyRequired) {
+ u32 catchup_offset = left->maxLag ? left->maxLag - 1 : 0;
+ last_loc = (s64a)ci->len - catchup_offset;
+ }
+
+ if (left->infix) {
+ if (infixTooOld(q, last_loc)) {
+ DEBUG_PRINTF("infix died of old age\n");
+ return 0;
+ }
reduceInfixQueue(q, last_loc, left->maxQueueLen, q->nfa->maxWidth);
- }
-
- DEBUG_PRINTF("end scan at %lld\n", last_loc);
- pushQueueNoMerge(q, MQE_END, last_loc);
-
-#ifdef DEBUG
- debugQueue(q);
-#endif
-
- char rv = nfaQueueExecRose(nfa, q, MO_INVALID_IDX);
- if (!rv) { /* nfa is dead */
- DEBUG_PRINTF("died catching up to stream boundary\n");
- return 0;
- } else {
- DEBUG_PRINTF("alive, saving stream state\n");
- if (nfaSupportsZombie(nfa) &&
- nfaGetZombieStatus(nfa, q, last_loc) == NFA_ZOMBIE_ALWAYS_YES) {
- DEBUG_PRINTF("not so fast - zombie\n");
- setAsZombie(t, state, left);
- } else {
- saveStreamState(nfa, q, last_loc);
- storeRoseDelay(t, state, left, (s64a)ci->len - last_loc);
- }
- }
-
- return 1;
-}
-
-static rose_inline
+ }
+
+ DEBUG_PRINTF("end scan at %lld\n", last_loc);
+ pushQueueNoMerge(q, MQE_END, last_loc);
+
+#ifdef DEBUG
+ debugQueue(q);
+#endif
+
+ char rv = nfaQueueExecRose(nfa, q, MO_INVALID_IDX);
+ if (!rv) { /* nfa is dead */
+ DEBUG_PRINTF("died catching up to stream boundary\n");
+ return 0;
+ } else {
+ DEBUG_PRINTF("alive, saving stream state\n");
+ if (nfaSupportsZombie(nfa) &&
+ nfaGetZombieStatus(nfa, q, last_loc) == NFA_ZOMBIE_ALWAYS_YES) {
+ DEBUG_PRINTF("not so fast - zombie\n");
+ setAsZombie(t, state, left);
+ } else {
+ saveStreamState(nfa, q, last_loc);
+ storeRoseDelay(t, state, left, (s64a)ci->len - last_loc);
+ }
+ }
+
+ return 1;
+}
+
+static rose_inline
void roseCatchUpLeftfixes(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch) {
- if (!t->activeLeftIterOffset) {
- // No sparse iter, no non-transient roses.
- return;
- }
-
- // As per UE-1629, we catch up leftfix engines to:
- // * current position (last location in the queue, or last location we
- // executed to if the queue is empty) if that position (and the byte
- // before so we can decompress the stream state) will be in the history
- // buffer on the next stream write; OR
- // * (stream_boundary - max_delay) other
-
- u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into
- * left_table */
- const u32 arCount = t->activeLeftCount;
- const struct LeftNfaInfo *left_table = getLeftTable(t);
- const struct mmbit_sparse_iter *it = getActiveLeftIter(t);
-
+ struct hs_scratch *scratch) {
+ if (!t->activeLeftIterOffset) {
+ // No sparse iter, no non-transient roses.
+ return;
+ }
+
+ // As per UE-1629, we catch up leftfix engines to:
+ // * current position (last location in the queue, or last location we
+ // executed to if the queue is empty) if that position (and the byte
+ // before so we can decompress the stream state) will be in the history
+ // buffer on the next stream write; OR
+ // * (stream_boundary - max_delay) other
+
+ u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into
+ * left_table */
+ const u32 arCount = t->activeLeftCount;
+ const struct LeftNfaInfo *left_table = getLeftTable(t);
+ const struct mmbit_sparse_iter *it = getActiveLeftIter(t);
+
struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
- u32 idx = 0;
+ u32 idx = 0;
u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state);
- for (; ri != MMB_INVALID;
+ for (; ri != MMB_INVALID;
ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) {
- const struct LeftNfaInfo *left = left_table + ri;
- u32 qi = ri + t->leftfixBeginQueue;
- DEBUG_PRINTF("leftfix %u of %u, maxLag=%u, infix=%d\n", ri, arCount,
- left->maxLag, (int)left->infix);
- if (!roseCatchUpLeftfix(t, state, scratch, qi, left)) {
- DEBUG_PRINTF("removing rose %u from active list\n", ri);
- DEBUG_PRINTF("groups old=%016llx mask=%016llx\n",
- scratch->tctxt.groups, left->squash_mask);
- scratch->tctxt.groups &= left->squash_mask;
- mmbit_unset(ara, arCount, ri);
- }
- }
-}
-
-// Saves out stream state for all our active suffix NFAs.
-static rose_inline
+ const struct LeftNfaInfo *left = left_table + ri;
+ u32 qi = ri + t->leftfixBeginQueue;
+ DEBUG_PRINTF("leftfix %u of %u, maxLag=%u, infix=%d\n", ri, arCount,
+ left->maxLag, (int)left->infix);
+ if (!roseCatchUpLeftfix(t, state, scratch, qi, left)) {
+ DEBUG_PRINTF("removing rose %u from active list\n", ri);
+ DEBUG_PRINTF("groups old=%016llx mask=%016llx\n",
+ scratch->tctxt.groups, left->squash_mask);
+ scratch->tctxt.groups &= left->squash_mask;
+ mmbit_unset(ara, arCount, ri);
+ }
+ }
+}
+
+// Saves out stream state for all our active suffix NFAs.
+static rose_inline
void roseSaveNfaStreamState(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch) {
- struct mq *queues = scratch->queues;
- u8 *aa = getActiveLeafArray(t, state);
- u32 aaCount = t->activeArrayCount;
-
- if (scratch->tctxt.mpv_inactive) {
- DEBUG_PRINTF("mpv is dead as a doornail\n");
- /* mpv if it exists is queue 0 */
- mmbit_unset(aa, aaCount, 0);
- }
-
- for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID;
- qi = mmbit_iterate(aa, aaCount, qi)) {
- DEBUG_PRINTF("saving stream state for qi=%u\n", qi);
-
- struct mq *q = queues + qi;
-
- // If it's active, it should have an active queue (as we should have
- // done some work!)
- assert(fatbit_isset(scratch->aqa, t->queueCount, qi));
-
- const struct NFA *nfa = getNfaByQueue(t, qi);
- saveStreamState(nfa, q, q_cur_loc(q));
- }
-}
-
-static rose_inline
+ struct hs_scratch *scratch) {
+ struct mq *queues = scratch->queues;
+ u8 *aa = getActiveLeafArray(t, state);
+ u32 aaCount = t->activeArrayCount;
+
+ if (scratch->tctxt.mpv_inactive) {
+ DEBUG_PRINTF("mpv is dead as a doornail\n");
+ /* mpv if it exists is queue 0 */
+ mmbit_unset(aa, aaCount, 0);
+ }
+
+ for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID;
+ qi = mmbit_iterate(aa, aaCount, qi)) {
+ DEBUG_PRINTF("saving stream state for qi=%u\n", qi);
+
+ struct mq *q = queues + qi;
+
+ // If it's active, it should have an active queue (as we should have
+ // done some work!)
+ assert(fatbit_isset(scratch->aqa, t->queueCount, qi));
+
+ const struct NFA *nfa = getNfaByQueue(t, qi);
+ saveStreamState(nfa, q, q_cur_loc(q));
+ }
+}
+
+static rose_inline
void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch, size_t length,
+ struct hs_scratch *scratch, size_t length,
u64a offset) {
- struct RoseContext *tctxt = &scratch->tctxt;
-
+ struct RoseContext *tctxt = &scratch->tctxt;
+
if (roseCatchUpTo(t, scratch, length + scratch->core_info.buf_offset) ==
HWLM_TERMINATE_MATCHING) {
- return; /* dead; no need to clean up state. */
- }
- roseSaveNfaStreamState(t, state, scratch);
- roseCatchUpLeftfixes(t, state, scratch);
+ return; /* dead; no need to clean up state. */
+ }
+ roseSaveNfaStreamState(t, state, scratch);
+ roseCatchUpLeftfixes(t, state, scratch);
roseFlushLastByteHistory(t, scratch, offset + length);
- tctxt->lastEndOffset = offset + length;
- storeGroups(t, state, tctxt->groups);
+ tctxt->lastEndOffset = offset + length;
+ storeGroups(t, state, tctxt->groups);
storeLongLiteralState(t, state, scratch);
-}
-
-static really_inline
+}
+
+static really_inline
void do_rebuild(const struct RoseEngine *t, struct hs_scratch *scratch) {
assert(t->drmatcherOffset);
- assert(!can_stop_matching(scratch));
+ assert(!can_stop_matching(scratch));
const struct HWLM *hwlm = getByOffset(t, t->drmatcherOffset);
- size_t len = MIN(scratch->core_info.hlen, t->delayRebuildLength);
- const u8 *buf = scratch->core_info.hbuf + scratch->core_info.hlen - len;
- DEBUG_PRINTF("BEGIN FLOATING REBUILD over %zu bytes\n", len);
-
+ size_t len = MIN(scratch->core_info.hlen, t->delayRebuildLength);
+ const u8 *buf = scratch->core_info.hbuf + scratch->core_info.hlen - len;
+ DEBUG_PRINTF("BEGIN FLOATING REBUILD over %zu bytes\n", len);
+
scratch->core_info.status &= ~STATUS_DELAY_DIRTY;
hwlmExec(hwlm, buf, len, 0, roseDelayRebuildCallback, scratch,
- scratch->tctxt.groups);
- assert(!can_stop_matching(scratch));
-}
-
+ scratch->tctxt.groups);
+ assert(!can_stop_matching(scratch));
+}
+
static rose_inline
void runEagerPrefixesStream(const struct RoseEngine *t,
struct hs_scratch *scratch) {
@@ -545,72 +545,72 @@ int can_never_match(const struct RoseEngine *t, char *state,
void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
DEBUG_PRINTF("OH HAI [%llu, %llu)\n", scratch->core_info.buf_offset,
scratch->core_info.buf_offset + (u64a)scratch->core_info.len);
- assert(t);
- assert(scratch->core_info.hbuf);
- assert(scratch->core_info.buf);
-
+ assert(t);
+ assert(scratch->core_info.hbuf);
+ assert(scratch->core_info.buf);
+
// We should not have been called if we've already been told to terminate
// matching.
assert(!told_to_stop_matching(scratch));
- assert(mmbit_sparse_iter_state_size(t->rolesWithStateCount)
- < MAX_SPARSE_ITER_STATES);
-
- size_t length = scratch->core_info.len;
- u64a offset = scratch->core_info.buf_offset;
-
- // We may have a maximum width (for engines constructed entirely
- // of bi-anchored patterns). If this write would result in us progressing
- // beyond this point, we cannot possibly match.
- if (t->maxBiAnchoredWidth != ROSE_BOUND_INF
- && offset + length > t->maxBiAnchoredWidth) {
- DEBUG_PRINTF("bailing, write would progress beyond maxBAWidth\n");
- return;
- }
-
+ assert(mmbit_sparse_iter_state_size(t->rolesWithStateCount)
+ < MAX_SPARSE_ITER_STATES);
+
+ size_t length = scratch->core_info.len;
+ u64a offset = scratch->core_info.buf_offset;
+
+ // We may have a maximum width (for engines constructed entirely
+ // of bi-anchored patterns). If this write would result in us progressing
+ // beyond this point, we cannot possibly match.
+ if (t->maxBiAnchoredWidth != ROSE_BOUND_INF
+ && offset + length > t->maxBiAnchoredWidth) {
+ DEBUG_PRINTF("bailing, write would progress beyond maxBAWidth\n");
+ return;
+ }
+
char *state = scratch->core_info.state;
-
- struct RoseContext *tctxt = &scratch->tctxt;
- tctxt->mpv_inactive = 0;
- tctxt->groups = loadGroups(t, state);
- tctxt->lit_offset_adjust = offset + 1; // index after last byte
- tctxt->delayLastEndOffset = offset;
- tctxt->lastEndOffset = offset;
- tctxt->filledDelayedSlots = 0;
- tctxt->lastMatchOffset = 0;
+
+ struct RoseContext *tctxt = &scratch->tctxt;
+ tctxt->mpv_inactive = 0;
+ tctxt->groups = loadGroups(t, state);
+ tctxt->lit_offset_adjust = offset + 1; // index after last byte
+ tctxt->delayLastEndOffset = offset;
+ tctxt->lastEndOffset = offset;
+ tctxt->filledDelayedSlots = 0;
+ tctxt->lastMatchOffset = 0;
tctxt->lastCombMatchOffset = offset;
- tctxt->minMatchOffset = offset;
- tctxt->minNonMpvMatchOffset = offset;
- tctxt->next_mpv_offset = 0;
-
+ tctxt->minMatchOffset = offset;
+ tctxt->minNonMpvMatchOffset = offset;
+ tctxt->next_mpv_offset = 0;
+
DEBUG_PRINTF("BEGIN: history len=%zu, buffer len=%zu groups=%016llx\n",
scratch->core_info.hlen, scratch->core_info.len, tctxt->groups);
-
- fatbit_clear(scratch->aqa);
- scratch->al_log_sum = 0;
- scratch->catchup_pq.qm_size = 0;
-
- if (t->outfixBeginQueue != t->outfixEndQueue) {
- streamInitSufPQ(t, state, scratch);
- }
-
+
+ fatbit_clear(scratch->aqa);
+ scratch->al_log_sum = 0;
+ scratch->catchup_pq.qm_size = 0;
+
+ if (t->outfixBeginQueue != t->outfixEndQueue) {
+ streamInitSufPQ(t, state, scratch);
+ }
+
runEagerPrefixesStream(t, scratch);
-
- u32 alen = t->anchoredDistance > offset ?
- MIN(length + offset, t->anchoredDistance) - offset : 0;
-
- const struct anchored_matcher_info *atable = getALiteralMatcher(t);
- if (atable && alen) {
- DEBUG_PRINTF("BEGIN ANCHORED %zu/%u\n", scratch->core_info.hlen, alen);
- runAnchoredTableStream(t, atable, alen, offset, scratch);
-
- if (can_stop_matching(scratch)) {
- goto exit;
- }
- }
-
- const struct HWLM *ftable = getFLiteralMatcher(t);
- if (ftable) {
+
+ u32 alen = t->anchoredDistance > offset ?
+ MIN(length + offset, t->anchoredDistance) - offset : 0;
+
+ const struct anchored_matcher_info *atable = getALiteralMatcher(t);
+ if (atable && alen) {
+ DEBUG_PRINTF("BEGIN ANCHORED %zu/%u\n", scratch->core_info.hlen, alen);
+ runAnchoredTableStream(t, atable, alen, offset, scratch);
+
+ if (can_stop_matching(scratch)) {
+ goto exit;
+ }
+ }
+
+ const struct HWLM *ftable = getFLiteralMatcher(t);
+ if (ftable) {
// Load in long literal table state and set up "fake history" buffers
// (ll_buf, etc, used by the CHECK_LONG_LIT instruction). Note that this
// must be done here in order to ensure that it happens before any path
@@ -618,63 +618,63 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
loadLongLiteralState(t, state, scratch);
if (t->noFloatingRoots && !roseHasInFlightMatches(t, state, scratch)) {
- DEBUG_PRINTF("skip FLOATING: no inflight matches\n");
- goto flush_delay_and_exit;
- }
-
- size_t flen = length;
- if (t->floatingDistance != ROSE_BOUND_INF) {
- flen = t->floatingDistance > offset ?
- MIN(t->floatingDistance, length + offset) - offset : 0;
- }
-
- size_t hlength = scratch->core_info.hlen;
-
+ DEBUG_PRINTF("skip FLOATING: no inflight matches\n");
+ goto flush_delay_and_exit;
+ }
+
+ size_t flen = length;
+ if (t->floatingDistance != ROSE_BOUND_INF) {
+ flen = t->floatingDistance > offset ?
+ MIN(t->floatingDistance, length + offset) - offset : 0;
+ }
+
+ size_t hlength = scratch->core_info.hlen;
+
char rebuild = hlength &&
(scratch->core_info.status & STATUS_DELAY_DIRTY) &&
(t->maxFloatingDelayedMatch == ROSE_BOUND_INF ||
offset < t->maxFloatingDelayedMatch);
- DEBUG_PRINTF("**rebuild %hhd status %hhu mfdm %u, offset %llu\n",
+ DEBUG_PRINTF("**rebuild %hhd status %hhu mfdm %u, offset %llu\n",
rebuild, scratch->core_info.status,
t->maxFloatingDelayedMatch, offset);
-
+
if (rebuild) { /* rebuild floating delayed match stuff */
do_rebuild(t, scratch);
}
- if (!flen) {
- goto flush_delay_and_exit;
- }
-
- if (flen + offset <= t->floatingMinDistance) {
- DEBUG_PRINTF("skip FLOATING: before floating min\n");
- goto flush_delay_and_exit;
- }
-
- size_t start = 0;
- if (offset < t->floatingMinDistance) {
- // This scan crosses the floating min distance, so we can use that
- // to set HWLM's "start" offset.
- start = t->floatingMinDistance - offset;
- }
- DEBUG_PRINTF("start=%zu\n", start);
-
- DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length);
+ if (!flen) {
+ goto flush_delay_and_exit;
+ }
+
+ if (flen + offset <= t->floatingMinDistance) {
+ DEBUG_PRINTF("skip FLOATING: before floating min\n");
+ goto flush_delay_and_exit;
+ }
+
+ size_t start = 0;
+ if (offset < t->floatingMinDistance) {
+ // This scan crosses the floating min distance, so we can use that
+ // to set HWLM's "start" offset.
+ start = t->floatingMinDistance - offset;
+ }
+ DEBUG_PRINTF("start=%zu\n", start);
+
+ DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length);
hwlmExecStreaming(ftable, flen, start, roseFloatingCallback, scratch,
tctxt->groups & t->floating_group_mask);
- }
-
-flush_delay_and_exit:
- DEBUG_PRINTF("flushing floating\n");
+ }
+
+flush_delay_and_exit:
+ DEBUG_PRINTF("flushing floating\n");
if (cleanUpDelayed(t, scratch, length, offset) == HWLM_TERMINATE_MATCHING) {
- return;
- }
-
-exit:
- DEBUG_PRINTF("CLEAN UP TIME\n");
- if (!can_stop_matching(scratch)) {
+ return;
+ }
+
+exit:
+ DEBUG_PRINTF("CLEAN UP TIME\n");
+ if (!can_stop_matching(scratch)) {
ensureStreamNeatAndTidy(t, state, scratch, length, offset);
- }
+ }
if (!told_to_stop_matching(scratch)
&& can_never_match(t, state, scratch, length, offset)) {
@@ -685,8 +685,8 @@ exit:
DEBUG_PRINTF("DONE STREAMING SCAN, status = %u\n",
scratch->core_info.status);
- return;
-}
+ return;
+}
static rose_inline
void roseStreamInitEod(const struct RoseEngine *t, u64a offset,
diff --git a/contrib/libs/hyperscan/src/runtime.c b/contrib/libs/hyperscan/src/runtime.c
index c5b16ab606..a3659348c5 100644
--- a/contrib/libs/hyperscan/src/runtime.c
+++ b/contrib/libs/hyperscan/src/runtime.c
@@ -1,194 +1,194 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Runtime functions.
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "allocator.h"
-#include "hs_compile.h" /* for HS_MODE_* flags */
-#include "hs_runtime.h"
-#include "hs_internal.h"
-#include "hwlm/hwlm.h"
-#include "nfa/mcclellan.h"
-#include "nfa/nfa_api.h"
-#include "nfa/nfa_api_util.h"
-#include "nfa/nfa_internal.h"
-#include "nfa/nfa_rev_api.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Runtime functions.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "allocator.h"
+#include "hs_compile.h" /* for HS_MODE_* flags */
+#include "hs_runtime.h"
+#include "hs_internal.h"
+#include "hwlm/hwlm.h"
+#include "nfa/mcclellan.h"
+#include "nfa/nfa_api.h"
+#include "nfa/nfa_api_util.h"
+#include "nfa/nfa_internal.h"
+#include "nfa/nfa_rev_api.h"
#include "nfa/sheng.h"
-#include "smallwrite/smallwrite_internal.h"
-#include "rose/rose.h"
-#include "rose/runtime.h"
-#include "database.h"
+#include "smallwrite/smallwrite_internal.h"
+#include "rose/rose.h"
+#include "rose/runtime.h"
+#include "database.h"
#include "report.h"
-#include "scratch.h"
-#include "som/som_runtime.h"
-#include "som/som_stream.h"
-#include "state.h"
+#include "scratch.h"
+#include "som/som_runtime.h"
+#include "som/som_stream.h"
+#include "state.h"
#include "stream_compress.h"
-#include "ue2common.h"
-#include "util/exhaust.h"
-#include "util/multibit.h"
-
-static really_inline
-void prefetch_data(const char *data, unsigned length) {
- __builtin_prefetch(data);
- __builtin_prefetch(data + length/2);
- __builtin_prefetch(data + length - 24);
-}
-
-/** dummy event handler for use when user does not provide one */
-static
+#include "ue2common.h"
+#include "util/exhaust.h"
+#include "util/multibit.h"
+
+static really_inline
+void prefetch_data(const char *data, unsigned length) {
+ __builtin_prefetch(data);
+ __builtin_prefetch(data + length/2);
+ __builtin_prefetch(data + length - 24);
+}
+
+/** dummy event handler for use when user does not provide one */
+static
int HS_CDECL null_onEvent(UNUSED unsigned id, UNUSED unsigned long long from,
UNUSED unsigned long long to, UNUSED unsigned flags,
UNUSED void *ctxt) {
- return 0;
-}
-
-static really_inline
-u32 getHistoryAmount(const struct RoseEngine *t, u64a offset) {
- return MIN(t->historyRequired, offset);
-}
-
-static really_inline
-u8 *getHistory(char *state, const struct RoseEngine *t, u64a offset) {
- return (u8 *)state + t->stateOffsets.history + t->historyRequired
- - MIN(t->historyRequired, offset);
-}
-
-/** \brief Sanity checks for scratch space.
- *
- * Although more at home in scratch.c, it is located here to be closer to its
- * callers.
- */
-static really_inline
-char validScratch(const struct RoseEngine *t, const struct hs_scratch *s) {
- if (!ISALIGNED_CL(s)) {
- DEBUG_PRINTF("bad alignment %p\n", s);
- return 0;
- }
-
- if (s->magic != SCRATCH_MAGIC) {
- DEBUG_PRINTF("bad magic 0x%x\n", s->magic);
- return 0;
- }
-
- if (t->mode == HS_MODE_BLOCK && t->stateOffsets.end > s->bStateSize) {
- DEBUG_PRINTF("bad state size\n");
- return 0;
- }
-
- if (t->queueCount > s->queueCount) {
- DEBUG_PRINTF("bad queue count\n");
- return 0;
- }
-
- /* TODO: add quick rose sanity checks */
-
- return 1;
-}
-
-static really_inline
-void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose,
- char *state, match_event_handler onEvent, void *userCtx,
- const char *data, size_t length, const u8 *history,
+ return 0;
+}
+
+static really_inline
+u32 getHistoryAmount(const struct RoseEngine *t, u64a offset) {
+ return MIN(t->historyRequired, offset);
+}
+
+static really_inline
+u8 *getHistory(char *state, const struct RoseEngine *t, u64a offset) {
+ return (u8 *)state + t->stateOffsets.history + t->historyRequired
+ - MIN(t->historyRequired, offset);
+}
+
+/** \brief Sanity checks for scratch space.
+ *
+ * Although more at home in scratch.c, it is located here to be closer to its
+ * callers.
+ */
+static really_inline
+char validScratch(const struct RoseEngine *t, const struct hs_scratch *s) {
+ if (!ISALIGNED_CL(s)) {
+ DEBUG_PRINTF("bad alignment %p\n", s);
+ return 0;
+ }
+
+ if (s->magic != SCRATCH_MAGIC) {
+ DEBUG_PRINTF("bad magic 0x%x\n", s->magic);
+ return 0;
+ }
+
+ if (t->mode == HS_MODE_BLOCK && t->stateOffsets.end > s->bStateSize) {
+ DEBUG_PRINTF("bad state size\n");
+ return 0;
+ }
+
+ if (t->queueCount > s->queueCount) {
+ DEBUG_PRINTF("bad queue count\n");
+ return 0;
+ }
+
+ /* TODO: add quick rose sanity checks */
+
+ return 1;
+}
+
+static really_inline
+void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose,
+ char *state, match_event_handler onEvent, void *userCtx,
+ const char *data, size_t length, const u8 *history,
size_t hlen, u64a offset, u8 status,
UNUSED unsigned int flags) {
- assert(rose);
- s->core_info.userContext = userCtx;
- s->core_info.userCallback = onEvent ? onEvent : null_onEvent;
- s->core_info.rose = rose;
- s->core_info.state = state; /* required for chained queues + evec */
-
- s->core_info.exhaustionVector = state + rose->stateOffsets.exhausted;
+ assert(rose);
+ s->core_info.userContext = userCtx;
+ s->core_info.userCallback = onEvent ? onEvent : null_onEvent;
+ s->core_info.rose = rose;
+ s->core_info.state = state; /* required for chained queues + evec */
+
+ s->core_info.exhaustionVector = state + rose->stateOffsets.exhausted;
s->core_info.status = status;
- s->core_info.buf = (const u8 *)data;
- s->core_info.len = length;
- s->core_info.hbuf = history;
- s->core_info.hlen = hlen;
- s->core_info.buf_offset = offset;
-
- /* and some stuff not actually in core info */
- s->som_set_now_offset = ~0ULL;
- s->deduper.current_report_offset = ~0ULL;
- s->deduper.som_log_dirty = 1; /* som logs have not been cleared */
+ s->core_info.buf = (const u8 *)data;
+ s->core_info.len = length;
+ s->core_info.hbuf = history;
+ s->core_info.hlen = hlen;
+ s->core_info.buf_offset = offset;
+
+ /* and some stuff not actually in core info */
+ s->som_set_now_offset = ~0ULL;
+ s->deduper.current_report_offset = ~0ULL;
+ s->deduper.som_log_dirty = 1; /* som logs have not been cleared */
s->fdr_conf = NULL;
-
+
// Rose program execution (used for some report paths) depends on these
// values being initialised.
s->tctxt.lastMatchOffset = 0;
s->tctxt.minMatchOffset = offset;
s->tctxt.minNonMpvMatchOffset = offset;
-}
-
+}
+
#define STATUS_VALID_BITS \
(STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_DELAY_DIRTY | STATUS_ERROR)
-
+
/** \brief Retrieve status bitmask from stream state. */
-static really_inline
+static really_inline
u8 getStreamStatus(const char *state) {
u8 status = *(const u8 *)(state + ROSE_STATE_OFFSET_STATUS_FLAGS);
assert((status & ~STATUS_VALID_BITS) == 0);
return status;
-}
-
+}
+
/** \brief Store status bitmask to stream state. */
-static really_inline
+static really_inline
void setStreamStatus(char *state, u8 status) {
assert((status & ~STATUS_VALID_BITS) == 0);
*(u8 *)(state + ROSE_STATE_OFFSET_STATUS_FLAGS) = status;
-}
-
-/** \brief Initialise SOM state. Used in both block and streaming mode. */
-static really_inline
+}
+
+/** \brief Initialise SOM state. Used in both block and streaming mode. */
+static really_inline
void initSomState(const struct RoseEngine *rose, char *state) {
- assert(rose && state);
- const u32 somCount = rose->somLocationCount;
+ assert(rose && state);
+ const u32 somCount = rose->somLocationCount;
mmbit_clear((u8 *)state + rose->stateOffsets.somValid, somCount);
mmbit_clear((u8 *)state + rose->stateOffsets.somWritable, somCount);
-}
-
-static really_inline
-void rawBlockExec(const struct RoseEngine *rose, struct hs_scratch *scratch) {
- assert(rose);
- assert(scratch);
-
+}
+
+static really_inline
+void rawBlockExec(const struct RoseEngine *rose, struct hs_scratch *scratch) {
+ assert(rose);
+ assert(scratch);
+
initSomState(rose, scratch->core_info.state);
-
- DEBUG_PRINTF("blockmode scan len=%zu\n", scratch->core_info.len);
-
+
+ DEBUG_PRINTF("blockmode scan len=%zu\n", scratch->core_info.len);
+
roseBlockExec(rose, scratch);
-}
-
-static really_inline
+}
+
+static really_inline
void pureLiteralInitScratch(struct hs_scratch *scratch, u64a offset) {
// Some init has already been done.
assert(offset == scratch->core_info.buf_offset);
@@ -201,160 +201,160 @@ void pureLiteralInitScratch(struct hs_scratch *scratch, u64a offset) {
}
static really_inline
-void pureLiteralBlockExec(const struct RoseEngine *rose,
- struct hs_scratch *scratch) {
- assert(rose);
- assert(scratch);
-
- const struct HWLM *ftable = getFLiteralMatcher(rose);
+void pureLiteralBlockExec(const struct RoseEngine *rose,
+ struct hs_scratch *scratch) {
+ assert(rose);
+ assert(scratch);
+
+ const struct HWLM *ftable = getFLiteralMatcher(rose);
initSomState(rose, scratch->core_info.state);
- const u8 *buffer = scratch->core_info.buf;
- size_t length = scratch->core_info.len;
- DEBUG_PRINTF("rose engine %d\n", rose->runtimeImpl);
-
+ const u8 *buffer = scratch->core_info.buf;
+ size_t length = scratch->core_info.len;
+ DEBUG_PRINTF("rose engine %d\n", rose->runtimeImpl);
+
pureLiteralInitScratch(scratch, 0);
scratch->tctxt.groups = rose->initialGroups;
hwlmExec(ftable, buffer, length, 0, roseCallback, scratch,
rose->initialGroups & rose->floating_group_mask);
-}
-
-static really_inline
+}
+
+static really_inline
void initOutfixQueue(struct mq *q, u32 qi, const struct RoseEngine *t,
struct hs_scratch *scratch) {
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
- q->nfa = getNfaByInfo(t, info);
- q->end = 0;
- q->cur = 0;
- q->state = scratch->fullState + info->fullStateOffset;
- q->streamState = (char *)scratch->core_info.state + info->stateOffset;
- q->offset = scratch->core_info.buf_offset;
- q->buffer = scratch->core_info.buf;
- q->length = scratch->core_info.len;
- q->history = scratch->core_info.hbuf;
- q->hlength = scratch->core_info.hlen;
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+ q->nfa = getNfaByInfo(t, info);
+ q->end = 0;
+ q->cur = 0;
+ q->state = scratch->fullState + info->fullStateOffset;
+ q->streamState = (char *)scratch->core_info.state + info->stateOffset;
+ q->offset = scratch->core_info.buf_offset;
+ q->buffer = scratch->core_info.buf;
+ q->length = scratch->core_info.len;
+ q->history = scratch->core_info.hbuf;
+ q->hlength = scratch->core_info.hlen;
q->cb = roseReportAdaptor;
- q->context = scratch;
- q->report_current = 0;
-
- DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
- "state=%u\n", qi, q->offset, info->fullStateOffset,
- info->stateOffset, *(u32 *)q->state);
-}
-
-static never_inline
-void soleOutfixBlockExec(const struct RoseEngine *t,
- struct hs_scratch *scratch) {
- assert(t);
- assert(scratch);
-
+ q->context = scratch;
+ q->report_current = 0;
+
+ DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
+ "state=%u\n", qi, q->offset, info->fullStateOffset,
+ info->stateOffset, *(u32 *)q->state);
+}
+
+static never_inline
+void soleOutfixBlockExec(const struct RoseEngine *t,
+ struct hs_scratch *scratch) {
+ assert(t);
+ assert(scratch);
+
initSomState(t, scratch->core_info.state);
- assert(t->outfixEndQueue == 1);
- assert(!t->amatcherOffset);
- assert(!t->ematcherOffset);
- assert(!t->fmatcherOffset);
-
- const struct NFA *nfa = getNfaByQueue(t, 0);
-
- size_t len = nfaRevAccelCheck(nfa, scratch->core_info.buf,
- scratch->core_info.len);
- if (!len) {
- return;
- }
-
- struct mq *q = scratch->queues;
+ assert(t->outfixEndQueue == 1);
+ assert(!t->amatcherOffset);
+ assert(!t->ematcherOffset);
+ assert(!t->fmatcherOffset);
+
+ const struct NFA *nfa = getNfaByQueue(t, 0);
+
+ size_t len = nfaRevAccelCheck(nfa, scratch->core_info.buf,
+ scratch->core_info.len);
+ if (!len) {
+ return;
+ }
+
+ struct mq *q = scratch->queues;
initOutfixQueue(q, 0, t, scratch);
- q->length = len; /* adjust for rev_accel */
- nfaQueueInitState(nfa, q);
- pushQueueAt(q, 0, MQE_START, 0);
- pushQueueAt(q, 1, MQE_TOP, 0);
- pushQueueAt(q, 2, MQE_END, scratch->core_info.len);
-
- char rv = nfaQueueExec(q->nfa, q, scratch->core_info.len);
-
- if (rv && nfaAcceptsEod(nfa) && len == scratch->core_info.len) {
+ q->length = len; /* adjust for rev_accel */
+ nfaQueueInitState(nfa, q);
+ pushQueueAt(q, 0, MQE_START, 0);
+ pushQueueAt(q, 1, MQE_TOP, 0);
+ pushQueueAt(q, 2, MQE_END, scratch->core_info.len);
+
+ char rv = nfaQueueExec(q->nfa, q, scratch->core_info.len);
+
+ if (rv && nfaAcceptsEod(nfa) && len == scratch->core_info.len) {
nfaCheckFinalState(nfa, q->state, q->streamState, q->length, q->cb,
scratch);
- }
-}
-
-static rose_inline
-void runSmallWriteEngine(const struct SmallWriteEngine *smwr,
- struct hs_scratch *scratch) {
- assert(smwr);
- assert(scratch);
-
- const u8 *buffer = scratch->core_info.buf;
- size_t length = scratch->core_info.len;
-
- DEBUG_PRINTF("USING SMALL WRITE\n");
-
- if (length <= smwr->start_offset) {
- DEBUG_PRINTF("too short\n");
- return;
- }
-
- const struct NFA *nfa = getSmwrNfa(smwr);
-
- size_t local_alen = length - smwr->start_offset;
- const u8 *local_buffer = buffer + smwr->start_offset;
-
+ }
+}
+
+static rose_inline
+void runSmallWriteEngine(const struct SmallWriteEngine *smwr,
+ struct hs_scratch *scratch) {
+ assert(smwr);
+ assert(scratch);
+
+ const u8 *buffer = scratch->core_info.buf;
+ size_t length = scratch->core_info.len;
+
+ DEBUG_PRINTF("USING SMALL WRITE\n");
+
+ if (length <= smwr->start_offset) {
+ DEBUG_PRINTF("too short\n");
+ return;
+ }
+
+ const struct NFA *nfa = getSmwrNfa(smwr);
+
+ size_t local_alen = length - smwr->start_offset;
+ const u8 *local_buffer = buffer + smwr->start_offset;
+
assert(isDfaType(nfa->type));
- if (nfa->type == MCCLELLAN_NFA_8) {
- nfaExecMcClellan8_B(nfa, smwr->start_offset, local_buffer,
+ if (nfa->type == MCCLELLAN_NFA_8) {
+ nfaExecMcClellan8_B(nfa, smwr->start_offset, local_buffer,
local_alen, roseReportAdaptor, scratch);
} else if (nfa->type == MCCLELLAN_NFA_16) {
nfaExecMcClellan16_B(nfa, smwr->start_offset, local_buffer,
local_alen, roseReportAdaptor, scratch);
- } else {
+ } else {
nfaExecSheng_B(nfa, smwr->start_offset, local_buffer,
local_alen, roseReportAdaptor, scratch);
- }
-}
-
-HS_PUBLIC_API
+ }
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data,
unsigned length, unsigned flags,
hs_scratch_t *scratch, match_event_handler onEvent,
void *userCtx) {
- if (unlikely(!scratch || !data)) {
- return HS_INVALID;
- }
-
- hs_error_t err = validDatabase(db);
- if (unlikely(err != HS_SUCCESS)) {
- return err;
- }
-
- const struct RoseEngine *rose = hs_get_bytecode(db);
- if (unlikely(!ISALIGNED_16(rose))) {
- return HS_INVALID;
- }
-
- if (unlikely(rose->mode != HS_MODE_BLOCK)) {
- return HS_DB_MODE_ERROR;
- }
-
- if (unlikely(!validScratch(rose, scratch))) {
- return HS_INVALID;
- }
-
+ if (unlikely(!scratch || !data)) {
+ return HS_INVALID;
+ }
+
+ hs_error_t err = validDatabase(db);
+ if (unlikely(err != HS_SUCCESS)) {
+ return err;
+ }
+
+ const struct RoseEngine *rose = hs_get_bytecode(db);
+ if (unlikely(!ISALIGNED_16(rose))) {
+ return HS_INVALID;
+ }
+
+ if (unlikely(rose->mode != HS_MODE_BLOCK)) {
+ return HS_DB_MODE_ERROR;
+ }
+
+ if (unlikely(!validScratch(rose, scratch))) {
+ return HS_INVALID;
+ }
+
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
- if (rose->minWidth > length) {
- DEBUG_PRINTF("minwidth=%u > length=%u\n", rose->minWidth, length);
+ if (rose->minWidth > length) {
+ DEBUG_PRINTF("minwidth=%u > length=%u\n", rose->minWidth, length);
unmarkScratchInUse(scratch);
- return HS_SUCCESS;
- }
-
- prefetch_data(data, length);
-
- /* populate core info in scratch */
- populateCoreInfo(scratch, rose, scratch->bstate, onEvent, userCtx, data,
+ return HS_SUCCESS;
+ }
+
+ prefetch_data(data, length);
+
+ /* populate core info in scratch */
+ populateCoreInfo(scratch, rose, scratch->bstate, onEvent, userCtx, data,
length, NULL, 0, 0, 0, flags);
-
+
clearEvec(rose, scratch->core_info.exhaustionVector);
if (rose->ckeyCount) {
scratch->core_info.logicalVector = scratch->bstate +
@@ -365,90 +365,90 @@ hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data,
clearLvec(rose, scratch->core_info.logicalVector,
scratch->core_info.combVector);
}
-
- if (!length) {
- if (rose->boundary.reportZeroEodOffset) {
+
+ if (!length) {
+ if (rose->boundary.reportZeroEodOffset) {
roseRunBoundaryProgram(rose, rose->boundary.reportZeroEodOffset, 0,
scratch);
- }
- goto set_retval;
- }
-
- if (rose->boundary.reportZeroOffset) {
+ }
+ goto set_retval;
+ }
+
+ if (rose->boundary.reportZeroOffset) {
int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset,
0, scratch);
if (rv == MO_HALT_MATCHING) {
goto set_retval;
}
- }
-
- if (rose->minWidthExcludingBoundaries > length) {
- DEBUG_PRINTF("minWidthExcludingBoundaries=%u > length=%u\n",
- rose->minWidthExcludingBoundaries, length);
- goto done_scan;
- }
-
- // Similarly, we may have a maximum width (for engines constructed entirely
- // of bi-anchored patterns).
- if (rose->maxBiAnchoredWidth != ROSE_BOUND_INF
- && length > rose->maxBiAnchoredWidth) {
- DEBUG_PRINTF("block len=%u longer than maxBAWidth=%u\n", length,
- rose->maxBiAnchoredWidth);
- goto done_scan;
- }
-
- // Is this a small write case?
- if (rose->smallWriteOffset) {
- const struct SmallWriteEngine *smwr = getSmallWrite(rose);
- assert(smwr);
-
- // Apply the small write engine if and only if the block (buffer) is
- // small enough. Otherwise, we allow rose &co to deal with it.
- if (length < smwr->largestBuffer) {
- DEBUG_PRINTF("Attempting small write of block %u bytes long.\n",
- length);
- runSmallWriteEngine(smwr, scratch);
- goto done_scan;
- }
- }
-
- switch (rose->runtimeImpl) {
- default:
- assert(0);
- case ROSE_RUNTIME_FULL_ROSE:
- rawBlockExec(rose, scratch);
- break;
- case ROSE_RUNTIME_PURE_LITERAL:
- pureLiteralBlockExec(rose, scratch);
- break;
- case ROSE_RUNTIME_SINGLE_OUTFIX:
- soleOutfixBlockExec(rose, scratch);
- break;
- }
-
-done_scan:
+ }
+
+ if (rose->minWidthExcludingBoundaries > length) {
+ DEBUG_PRINTF("minWidthExcludingBoundaries=%u > length=%u\n",
+ rose->minWidthExcludingBoundaries, length);
+ goto done_scan;
+ }
+
+ // Similarly, we may have a maximum width (for engines constructed entirely
+ // of bi-anchored patterns).
+ if (rose->maxBiAnchoredWidth != ROSE_BOUND_INF
+ && length > rose->maxBiAnchoredWidth) {
+ DEBUG_PRINTF("block len=%u longer than maxBAWidth=%u\n", length,
+ rose->maxBiAnchoredWidth);
+ goto done_scan;
+ }
+
+ // Is this a small write case?
+ if (rose->smallWriteOffset) {
+ const struct SmallWriteEngine *smwr = getSmallWrite(rose);
+ assert(smwr);
+
+ // Apply the small write engine if and only if the block (buffer) is
+ // small enough. Otherwise, we allow rose &co to deal with it.
+ if (length < smwr->largestBuffer) {
+ DEBUG_PRINTF("Attempting small write of block %u bytes long.\n",
+ length);
+ runSmallWriteEngine(smwr, scratch);
+ goto done_scan;
+ }
+ }
+
+ switch (rose->runtimeImpl) {
+ default:
+ assert(0);
+ case ROSE_RUNTIME_FULL_ROSE:
+ rawBlockExec(rose, scratch);
+ break;
+ case ROSE_RUNTIME_PURE_LITERAL:
+ pureLiteralBlockExec(rose, scratch);
+ break;
+ case ROSE_RUNTIME_SINGLE_OUTFIX:
+ soleOutfixBlockExec(rose, scratch);
+ break;
+ }
+
+done_scan:
if (unlikely(internal_matching_error(scratch))) {
unmarkScratchInUse(scratch);
return HS_UNKNOWN_ERROR;
} else if (told_to_stop_matching(scratch)) {
unmarkScratchInUse(scratch);
- return HS_SCAN_TERMINATED;
- }
-
- if (rose->hasSom) {
- int halt = flushStoredSomMatches(scratch, ~0ULL);
- if (halt) {
+ return HS_SCAN_TERMINATED;
+ }
+
+ if (rose->hasSom) {
+ int halt = flushStoredSomMatches(scratch, ~0ULL);
+ if (halt) {
unmarkScratchInUse(scratch);
- return HS_SCAN_TERMINATED;
- }
- }
-
- if (rose->boundary.reportEodOffset) {
+ return HS_SCAN_TERMINATED;
+ }
+ }
+
+ if (rose->boundary.reportEodOffset) {
roseRunBoundaryProgram(rose, rose->boundary.reportEodOffset, length,
scratch);
- }
-
-set_retval:
+ }
+
+set_retval:
if (unlikely(internal_matching_error(scratch))) {
unmarkScratchInUse(scratch);
return HS_UNKNOWN_ERROR;
@@ -466,48 +466,48 @@ set_retval:
}
}
- DEBUG_PRINTF("done. told_to_stop_matching=%d\n",
- told_to_stop_matching(scratch));
+ DEBUG_PRINTF("done. told_to_stop_matching=%d\n",
+ told_to_stop_matching(scratch));
hs_error_t rv = told_to_stop_matching(scratch) ? HS_SCAN_TERMINATED
: HS_SUCCESS;
unmarkScratchInUse(scratch);
return rv;
-}
-
-static really_inline
-void maintainHistoryBuffer(const struct RoseEngine *rose, char *state,
- const char *buffer, size_t length) {
- if (!rose->historyRequired) {
- return;
- }
-
- // Hopefully few of our users are scanning no data.
- if (unlikely(length == 0)) {
- DEBUG_PRINTF("zero-byte scan\n");
- return;
- }
-
- char *his_state = state + rose->stateOffsets.history;
-
- if (length < rose->historyRequired) {
- size_t shortfall = rose->historyRequired - length;
- memmove(his_state, his_state + rose->historyRequired - shortfall,
- shortfall);
- }
- size_t amount = MIN(rose->historyRequired, length);
-
- memcpy(his_state + rose->historyRequired - amount, buffer + length - amount,
- amount);
-#ifdef DEBUG_HISTORY
- printf("History [%u] : ", rose->historyRequired);
- for (size_t i = 0; i < rose->historyRequired; i++) {
- printf(" %02hhx", his_state[i]);
- }
- printf("\n");
-#endif
-}
-
-static really_inline
+}
+
+static really_inline
+void maintainHistoryBuffer(const struct RoseEngine *rose, char *state,
+ const char *buffer, size_t length) {
+ if (!rose->historyRequired) {
+ return;
+ }
+
+ // Hopefully few of our users are scanning no data.
+ if (unlikely(length == 0)) {
+ DEBUG_PRINTF("zero-byte scan\n");
+ return;
+ }
+
+ char *his_state = state + rose->stateOffsets.history;
+
+ if (length < rose->historyRequired) {
+ size_t shortfall = rose->historyRequired - length;
+ memmove(his_state, his_state + rose->historyRequired - shortfall,
+ shortfall);
+ }
+ size_t amount = MIN(rose->historyRequired, length);
+
+ memcpy(his_state + rose->historyRequired - amount, buffer + length - amount,
+ amount);
+#ifdef DEBUG_HISTORY
+ printf("History [%u] : ", rose->historyRequired);
+ for (size_t i = 0; i < rose->historyRequired; i++) {
+ printf(" %02hhx", his_state[i]);
+ }
+ printf("\n");
+#endif
+}
+
+static really_inline
void init_stream(struct hs_stream *s, const struct RoseEngine *rose,
char init_history) {
char *state = getMultiState(s);
@@ -522,131 +522,131 @@ void init_stream(struct hs_stream *s, const struct RoseEngine *rose,
memset(hist_end - 16, 0x5a, 16);
}
- s->rose = rose;
- s->offset = 0;
-
+ s->rose = rose;
+ s->offset = 0;
+
setStreamStatus(state, 0);
- roseInitState(rose, state);
-
+ roseInitState(rose, state);
+
clearEvec(rose, state + rose->stateOffsets.exhausted);
if (rose->ckeyCount) {
clearLvec(rose, state + rose->stateOffsets.logicalVec,
state + rose->stateOffsets.combVec);
}
-
- // SOM state multibit structures.
- initSomState(rose, state);
-}
-
-HS_PUBLIC_API
+
+ // SOM state multibit structures.
+ initSomState(rose, state);
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_open_stream(const hs_database_t *db,
UNUSED unsigned flags,
hs_stream_t **stream) {
- if (unlikely(!stream)) {
- return HS_INVALID;
- }
-
- *stream = NULL;
-
- hs_error_t err = validDatabase(db);
- if (unlikely(err != HS_SUCCESS)) {
- return err;
- }
-
- const struct RoseEngine *rose = hs_get_bytecode(db);
- if (unlikely(!ISALIGNED_16(rose))) {
- return HS_INVALID;
- }
-
- if (unlikely(rose->mode != HS_MODE_STREAM)) {
- return HS_DB_MODE_ERROR;
- }
-
- size_t stateSize = rose->stateOffsets.end;
- struct hs_stream *s = hs_stream_alloc(sizeof(struct hs_stream) + stateSize);
- if (unlikely(!s)) {
- return HS_NOMEM;
- }
-
+ if (unlikely(!stream)) {
+ return HS_INVALID;
+ }
+
+ *stream = NULL;
+
+ hs_error_t err = validDatabase(db);
+ if (unlikely(err != HS_SUCCESS)) {
+ return err;
+ }
+
+ const struct RoseEngine *rose = hs_get_bytecode(db);
+ if (unlikely(!ISALIGNED_16(rose))) {
+ return HS_INVALID;
+ }
+
+ if (unlikely(rose->mode != HS_MODE_STREAM)) {
+ return HS_DB_MODE_ERROR;
+ }
+
+ size_t stateSize = rose->stateOffsets.end;
+ struct hs_stream *s = hs_stream_alloc(sizeof(struct hs_stream) + stateSize);
+ if (unlikely(!s)) {
+ return HS_NOMEM;
+ }
+
init_stream(s, rose, 1);
-
- *stream = s;
- return HS_SUCCESS;
-}
-
-
-static really_inline
-void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
- const struct RoseEngine *rose = id->rose;
-
+
+ *stream = s;
+ return HS_SUCCESS;
+}
+
+
+static really_inline
+void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
+ const struct RoseEngine *rose = id->rose;
+
if (can_stop_matching(scratch)) {
- DEBUG_PRINTF("stream already broken\n");
- return;
- }
-
- if (isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
- DEBUG_PRINTF("stream exhausted\n");
- return;
- }
-
+ DEBUG_PRINTF("stream already broken\n");
+ return;
+ }
+
+ if (isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
+ DEBUG_PRINTF("stream exhausted\n");
+ return;
+ }
+
roseStreamEodExec(rose, id->offset, scratch);
-}
-
-static never_inline
-void soleOutfixEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
- const struct RoseEngine *t = id->rose;
-
+}
+
+static never_inline
+void soleOutfixEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
+ const struct RoseEngine *t = id->rose;
+
if (can_stop_matching(scratch)) {
- DEBUG_PRINTF("stream already broken\n");
- return;
- }
-
- if (isAllExhausted(t, scratch->core_info.exhaustionVector)) {
- DEBUG_PRINTF("stream exhausted\n");
- return;
- }
-
- assert(t->outfixEndQueue == 1);
- assert(!t->amatcherOffset);
- assert(!t->ematcherOffset);
- assert(!t->fmatcherOffset);
-
- const struct NFA *nfa = getNfaByQueue(t, 0);
-
- struct mq *q = scratch->queues;
+ DEBUG_PRINTF("stream already broken\n");
+ return;
+ }
+
+ if (isAllExhausted(t, scratch->core_info.exhaustionVector)) {
+ DEBUG_PRINTF("stream exhausted\n");
+ return;
+ }
+
+ assert(t->outfixEndQueue == 1);
+ assert(!t->amatcherOffset);
+ assert(!t->ematcherOffset);
+ assert(!t->fmatcherOffset);
+
+ const struct NFA *nfa = getNfaByQueue(t, 0);
+
+ struct mq *q = scratch->queues;
initOutfixQueue(q, 0, t, scratch);
- if (!scratch->core_info.buf_offset) {
- DEBUG_PRINTF("buf_offset is zero\n");
- return; /* no vacuous engines */
- }
-
- nfaExpandState(nfa, q->state, q->streamState, q->offset,
- queue_prev_byte(q, 0));
-
- assert(nfaAcceptsEod(nfa));
- nfaCheckFinalState(nfa, q->state, q->streamState, q->offset, q->cb,
+ if (!scratch->core_info.buf_offset) {
+ DEBUG_PRINTF("buf_offset is zero\n");
+ return; /* no vacuous engines */
+ }
+
+ nfaExpandState(nfa, q->state, q->streamState, q->offset,
+ queue_prev_byte(q, 0));
+
+ assert(nfaAcceptsEod(nfa));
+ nfaCheckFinalState(nfa, q->state, q->streamState, q->offset, q->cb,
scratch);
-}
-
-static really_inline
-void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
- match_event_handler onEvent, void *context) {
- DEBUG_PRINTF("--- report eod matches at offset %llu\n", id->offset);
- assert(onEvent);
-
- const struct RoseEngine *rose = id->rose;
- char *state = getMultiState(id);
+}
+
+static really_inline
+void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
+ match_event_handler onEvent, void *context) {
+ DEBUG_PRINTF("--- report eod matches at offset %llu\n", id->offset);
+ assert(onEvent);
+
+ const struct RoseEngine *rose = id->rose;
+ char *state = getMultiState(id);
u8 status = getStreamStatus(state);
-
+
if (status & (STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_ERROR)) {
- DEBUG_PRINTF("stream is broken, just freeing storage\n");
- return;
- }
-
- populateCoreInfo(scratch, rose, state, onEvent, context, NULL, 0,
- getHistory(state, rose, id->offset),
+ DEBUG_PRINTF("stream is broken, just freeing storage\n");
+ return;
+ }
+
+ populateCoreInfo(scratch, rose, state, onEvent, context, NULL, 0,
+ getHistory(state, rose, id->offset),
getHistoryAmount(rose, id->offset), id->offset, status, 0);
-
+
if (rose->ckeyCount) {
scratch->core_info.logicalVector = state +
rose->stateOffsets.logicalVec;
@@ -656,49 +656,49 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
}
}
- if (rose->somLocationCount) {
- loadSomFromStream(scratch, id->offset);
- }
-
- if (!id->offset) {
- if (rose->boundary.reportZeroEodOffset) {
+ if (rose->somLocationCount) {
+ loadSomFromStream(scratch, id->offset);
+ }
+
+ if (!id->offset) {
+ if (rose->boundary.reportZeroEodOffset) {
int rv = roseRunBoundaryProgram(
rose, rose->boundary.reportZeroEodOffset, 0, scratch);
if (rv == MO_HALT_MATCHING) {
return;
}
- }
- } else {
- if (rose->boundary.reportEodOffset) {
+ }
+ } else {
+ if (rose->boundary.reportEodOffset) {
int rv = roseRunBoundaryProgram(
rose, rose->boundary.reportEodOffset, id->offset, scratch);
if (rv == MO_HALT_MATCHING) {
return;
}
- }
-
- if (rose->requiresEodCheck) {
- switch (rose->runtimeImpl) {
- default:
- case ROSE_RUNTIME_PURE_LITERAL:
- assert(0);
- case ROSE_RUNTIME_FULL_ROSE:
- rawEodExec(id, scratch);
- break;
- case ROSE_RUNTIME_SINGLE_OUTFIX:
- soleOutfixEodExec(id, scratch);
- break;
- }
- }
- }
-
- if (rose->hasSom && !told_to_stop_matching(scratch)) {
- int halt = flushStoredSomMatches(scratch, ~0ULL);
- if (halt) {
- DEBUG_PRINTF("told to stop matching\n");
+ }
+
+ if (rose->requiresEodCheck) {
+ switch (rose->runtimeImpl) {
+ default:
+ case ROSE_RUNTIME_PURE_LITERAL:
+ assert(0);
+ case ROSE_RUNTIME_FULL_ROSE:
+ rawEodExec(id, scratch);
+ break;
+ case ROSE_RUNTIME_SINGLE_OUTFIX:
+ soleOutfixEodExec(id, scratch);
+ break;
+ }
+ }
+ }
+
+ if (rose->hasSom && !told_to_stop_matching(scratch)) {
+ int halt = flushStoredSomMatches(scratch, ~0ULL);
+ if (halt) {
+ DEBUG_PRINTF("told to stop matching\n");
scratch->core_info.status |= STATUS_TERMINATED;
- }
- }
+ }
+ }
if (rose->lastFlushCombProgramOffset && !told_to_stop_matching(scratch)) {
if (roseRunLastFlushCombProgram(rose, scratch, id->offset)
@@ -707,203 +707,203 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
scratch->core_info.status |= STATUS_TERMINATED;
}
}
-}
-
-HS_PUBLIC_API
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_copy_stream(hs_stream_t **to_id,
const hs_stream_t *from_id) {
- if (!to_id) {
- return HS_INVALID;
- }
-
- *to_id = NULL;
-
- if (!from_id || !from_id->rose) {
- return HS_INVALID;
- }
-
- const struct RoseEngine *rose = from_id->rose;
- size_t stateSize = sizeof(struct hs_stream) + rose->stateOffsets.end;
-
- struct hs_stream *s = hs_stream_alloc(stateSize);
- if (!s) {
- return HS_NOMEM;
- }
-
- memcpy(s, from_id, stateSize);
-
- *to_id = s;
-
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+ if (!to_id) {
+ return HS_INVALID;
+ }
+
+ *to_id = NULL;
+
+ if (!from_id || !from_id->rose) {
+ return HS_INVALID;
+ }
+
+ const struct RoseEngine *rose = from_id->rose;
+ size_t stateSize = sizeof(struct hs_stream) + rose->stateOffsets.end;
+
+ struct hs_stream *s = hs_stream_alloc(stateSize);
+ if (!s) {
+ return HS_NOMEM;
+ }
+
+ memcpy(s, from_id, stateSize);
+
+ *to_id = s;
+
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id,
const hs_stream_t *from_id,
hs_scratch_t *scratch,
match_event_handler onEvent,
void *context) {
- if (!from_id || !from_id->rose) {
- return HS_INVALID;
- }
-
- if (!to_id || to_id->rose != from_id->rose) {
- return HS_INVALID;
- }
-
- if (to_id == from_id) {
- return HS_INVALID;
- }
-
- if (onEvent) {
- if (!scratch || !validScratch(to_id->rose, scratch)) {
- return HS_INVALID;
- }
+ if (!from_id || !from_id->rose) {
+ return HS_INVALID;
+ }
+
+ if (!to_id || to_id->rose != from_id->rose) {
+ return HS_INVALID;
+ }
+
+ if (to_id == from_id) {
+ return HS_INVALID;
+ }
+
+ if (onEvent) {
+ if (!scratch || !validScratch(to_id->rose, scratch)) {
+ return HS_INVALID;
+ }
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
- report_eod_matches(to_id, scratch, onEvent, context);
+ report_eod_matches(to_id, scratch, onEvent, context);
if (unlikely(internal_matching_error(scratch))) {
unmarkScratchInUse(scratch);
return HS_UNKNOWN_ERROR;
}
unmarkScratchInUse(scratch);
- }
-
- size_t stateSize
- = sizeof(struct hs_stream) + from_id->rose->stateOffsets.end;
-
- memcpy(to_id, from_id, stateSize);
-
- return HS_SUCCESS;
-}
-
-static really_inline
-void rawStreamExec(struct hs_stream *stream_state, struct hs_scratch *scratch) {
- assert(stream_state);
- assert(scratch);
+ }
+
+ size_t stateSize
+ = sizeof(struct hs_stream) + from_id->rose->stateOffsets.end;
+
+ memcpy(to_id, from_id, stateSize);
+
+ return HS_SUCCESS;
+}
+
+static really_inline
+void rawStreamExec(struct hs_stream *stream_state, struct hs_scratch *scratch) {
+ assert(stream_state);
+ assert(scratch);
assert(!can_stop_matching(scratch));
-
- DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n",
- stream_state->offset, scratch->core_info.len);
-
- const struct RoseEngine *rose = stream_state->rose;
- assert(rose);
+
+ DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n",
+ stream_state->offset, scratch->core_info.len);
+
+ const struct RoseEngine *rose = stream_state->rose;
+ assert(rose);
roseStreamExec(rose, scratch);
-
- if (!told_to_stop_matching(scratch) &&
- isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
- DEBUG_PRINTF("stream exhausted\n");
+
+ if (!told_to_stop_matching(scratch) &&
+ isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
+ DEBUG_PRINTF("stream exhausted\n");
scratch->core_info.status |= STATUS_EXHAUSTED;
- }
-}
-
-static really_inline
-void pureLiteralStreamExec(struct hs_stream *stream_state,
- struct hs_scratch *scratch) {
- assert(stream_state);
- assert(scratch);
+ }
+}
+
+static really_inline
+void pureLiteralStreamExec(struct hs_stream *stream_state,
+ struct hs_scratch *scratch) {
+ assert(stream_state);
+ assert(scratch);
assert(!can_stop_matching(scratch));
-
- const struct RoseEngine *rose = stream_state->rose;
- const struct HWLM *ftable = getFLiteralMatcher(rose);
-
- size_t len2 = scratch->core_info.len;
-
- DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n",
- stream_state->offset, scratch->core_info.len);
-
+
+ const struct RoseEngine *rose = stream_state->rose;
+ const struct HWLM *ftable = getFLiteralMatcher(rose);
+
+ size_t len2 = scratch->core_info.len;
+
+ DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n",
+ stream_state->offset, scratch->core_info.len);
+
pureLiteralInitScratch(scratch, stream_state->offset);
scratch->tctxt.groups = loadGroups(rose, scratch->core_info.state);
- // Pure literal cases don't have floatingMinDistance set, so we always
- // start the match region at zero.
- const size_t start = 0;
-
+ // Pure literal cases don't have floatingMinDistance set, so we always
+ // start the match region at zero.
+ const size_t start = 0;
+
hwlmExecStreaming(ftable, len2, start, roseCallback, scratch,
rose->initialGroups & rose->floating_group_mask);
-
- if (!told_to_stop_matching(scratch) &&
- isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
- DEBUG_PRINTF("stream exhausted\n");
+
+ if (!told_to_stop_matching(scratch) &&
+ isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
+ DEBUG_PRINTF("stream exhausted\n");
scratch->core_info.status |= STATUS_EXHAUSTED;
- }
-}
-
-static never_inline
-void soleOutfixStreamExec(struct hs_stream *stream_state,
- struct hs_scratch *scratch) {
- assert(stream_state);
- assert(scratch);
+ }
+}
+
+static never_inline
+void soleOutfixStreamExec(struct hs_stream *stream_state,
+ struct hs_scratch *scratch) {
+ assert(stream_state);
+ assert(scratch);
assert(!can_stop_matching(scratch));
-
- const struct RoseEngine *t = stream_state->rose;
- assert(t->outfixEndQueue == 1);
- assert(!t->amatcherOffset);
- assert(!t->ematcherOffset);
- assert(!t->fmatcherOffset);
-
- const struct NFA *nfa = getNfaByQueue(t, 0);
-
- struct mq *q = scratch->queues;
+
+ const struct RoseEngine *t = stream_state->rose;
+ assert(t->outfixEndQueue == 1);
+ assert(!t->amatcherOffset);
+ assert(!t->ematcherOffset);
+ assert(!t->fmatcherOffset);
+
+ const struct NFA *nfa = getNfaByQueue(t, 0);
+
+ struct mq *q = scratch->queues;
initOutfixQueue(q, 0, t, scratch);
- if (!scratch->core_info.buf_offset) {
- nfaQueueInitState(nfa, q);
- pushQueueAt(q, 0, MQE_START, 0);
- pushQueueAt(q, 1, MQE_TOP, 0);
- pushQueueAt(q, 2, MQE_END, scratch->core_info.len);
- } else {
- nfaExpandState(nfa, q->state, q->streamState, q->offset,
- queue_prev_byte(q, 0));
- pushQueueAt(q, 0, MQE_START, 0);
- pushQueueAt(q, 1, MQE_END, scratch->core_info.len);
- }
-
- if (nfaQueueExec(q->nfa, q, scratch->core_info.len)) {
- nfaQueueCompressState(nfa, q, scratch->core_info.len);
- } else if (!told_to_stop_matching(scratch)) {
+ if (!scratch->core_info.buf_offset) {
+ nfaQueueInitState(nfa, q);
+ pushQueueAt(q, 0, MQE_START, 0);
+ pushQueueAt(q, 1, MQE_TOP, 0);
+ pushQueueAt(q, 2, MQE_END, scratch->core_info.len);
+ } else {
+ nfaExpandState(nfa, q->state, q->streamState, q->offset,
+ queue_prev_byte(q, 0));
+ pushQueueAt(q, 0, MQE_START, 0);
+ pushQueueAt(q, 1, MQE_END, scratch->core_info.len);
+ }
+
+ if (nfaQueueExec(q->nfa, q, scratch->core_info.len)) {
+ nfaQueueCompressState(nfa, q, scratch->core_info.len);
+ } else if (!told_to_stop_matching(scratch)) {
scratch->core_info.status |= STATUS_EXHAUSTED;
- }
-}
-
-static inline
-hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
- unsigned length, UNUSED unsigned flags,
- hs_scratch_t *scratch,
- match_event_handler onEvent, void *context) {
+ }
+}
+
+static inline
+hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
+ unsigned length, UNUSED unsigned flags,
+ hs_scratch_t *scratch,
+ match_event_handler onEvent, void *context) {
assert(id);
assert(scratch);
if (unlikely(!data)) {
- return HS_INVALID;
- }
-
- const struct RoseEngine *rose = id->rose;
- char *state = getMultiState(id);
-
+ return HS_INVALID;
+ }
+
+ const struct RoseEngine *rose = id->rose;
+ char *state = getMultiState(id);
+
u8 status = getStreamStatus(state);
if (status & (STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_ERROR)) {
- DEBUG_PRINTF("stream is broken, halting scan\n");
+ DEBUG_PRINTF("stream is broken, halting scan\n");
if (status & STATUS_ERROR) {
return HS_UNKNOWN_ERROR;
} else if (status & STATUS_TERMINATED) {
- return HS_SCAN_TERMINATED;
- } else {
- return HS_SUCCESS;
- }
- }
-
- // We avoid doing any work if the user has given us zero bytes of data to
- // scan. Arguably we should define some semantics for how we treat vacuous
- // cases here.
- if (unlikely(length == 0)) {
- DEBUG_PRINTF("zero length block\n");
- return HS_SUCCESS;
- }
-
- u32 historyAmount = getHistoryAmount(rose, id->offset);
- populateCoreInfo(scratch, rose, state, onEvent, context, data, length,
- getHistory(state, rose, id->offset), historyAmount,
+ return HS_SCAN_TERMINATED;
+ } else {
+ return HS_SUCCESS;
+ }
+ }
+
+ // We avoid doing any work if the user has given us zero bytes of data to
+ // scan. Arguably we should define some semantics for how we treat vacuous
+ // cases here.
+ if (unlikely(length == 0)) {
+ DEBUG_PRINTF("zero length block\n");
+ return HS_SUCCESS;
+ }
+
+ u32 historyAmount = getHistoryAmount(rose, id->offset);
+ populateCoreInfo(scratch, rose, state, onEvent, context, data, length,
+ getHistory(state, rose, id->offset), historyAmount,
id->offset, status, flags);
if (rose->ckeyCount) {
scratch->core_info.logicalVector = state +
@@ -913,17 +913,17 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
scratch->tctxt.lastCombMatchOffset = id->offset;
}
}
- assert(scratch->core_info.hlen <= id->offset
- && scratch->core_info.hlen <= rose->historyRequired);
-
- prefetch_data(data, length);
-
- if (rose->somLocationCount) {
- loadSomFromStream(scratch, id->offset);
- }
-
- if (!id->offset && rose->boundary.reportZeroOffset) {
- DEBUG_PRINTF("zero reports\n");
+ assert(scratch->core_info.hlen <= id->offset
+ && scratch->core_info.hlen <= rose->historyRequired);
+
+ prefetch_data(data, length);
+
+ if (rose->somLocationCount) {
+ loadSomFromStream(scratch, id->offset);
+ }
+
+ if (!id->offset && rose->boundary.reportZeroOffset) {
+ DEBUG_PRINTF("zero reports\n");
int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset,
0, scratch);
if (rv == MO_HALT_MATCHING) {
@@ -936,47 +936,47 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
return HS_SUCCESS;
}
}
- }
-
- switch (rose->runtimeImpl) {
- default:
- assert(0);
- case ROSE_RUNTIME_FULL_ROSE:
- rawStreamExec(id, scratch);
- break;
- case ROSE_RUNTIME_PURE_LITERAL:
- pureLiteralStreamExec(id, scratch);
- break;
- case ROSE_RUNTIME_SINGLE_OUTFIX:
- soleOutfixStreamExec(id, scratch);
- }
-
- if (rose->hasSom && !told_to_stop_matching(scratch)) {
- int halt = flushStoredSomMatches(scratch, ~0ULL);
- if (halt) {
+ }
+
+ switch (rose->runtimeImpl) {
+ default:
+ assert(0);
+ case ROSE_RUNTIME_FULL_ROSE:
+ rawStreamExec(id, scratch);
+ break;
+ case ROSE_RUNTIME_PURE_LITERAL:
+ pureLiteralStreamExec(id, scratch);
+ break;
+ case ROSE_RUNTIME_SINGLE_OUTFIX:
+ soleOutfixStreamExec(id, scratch);
+ }
+
+ if (rose->hasSom && !told_to_stop_matching(scratch)) {
+ int halt = flushStoredSomMatches(scratch, ~0ULL);
+ if (halt) {
scratch->core_info.status |= STATUS_TERMINATED;
- }
- }
-
+ }
+ }
+
setStreamStatus(state, scratch->core_info.status);
if (unlikely(internal_matching_error(scratch))) {
return HS_UNKNOWN_ERROR;
} else if (likely(!can_stop_matching(scratch))) {
maintainHistoryBuffer(rose, state, data, length);
- id->offset += length; /* maintain offset */
-
- if (rose->somLocationCount) {
- storeSomToStream(scratch, id->offset);
- }
- } else if (told_to_stop_matching(scratch)) {
- return HS_SCAN_TERMINATED;
- }
-
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+ id->offset += length; /* maintain offset */
+
+ if (rose->somLocationCount) {
+ storeSomToStream(scratch, id->offset);
+ }
+ } else if (told_to_stop_matching(scratch)) {
+ return HS_SCAN_TERMINATED;
+ }
+
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data,
unsigned length, unsigned flags,
hs_scratch_t *scratch,
@@ -993,115 +993,115 @@ hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data,
onEvent, context);
unmarkScratchInUse(scratch);
return rv;
-}
-
-HS_PUBLIC_API
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
match_event_handler onEvent,
void *context) {
- if (!id) {
- return HS_INVALID;
- }
-
- if (onEvent) {
- if (!scratch || !validScratch(id->rose, scratch)) {
- return HS_INVALID;
- }
+ if (!id) {
+ return HS_INVALID;
+ }
+
+ if (onEvent) {
+ if (!scratch || !validScratch(id->rose, scratch)) {
+ return HS_INVALID;
+ }
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
- report_eod_matches(id, scratch, onEvent, context);
+ report_eod_matches(id, scratch, onEvent, context);
if (unlikely(internal_matching_error(scratch))) {
unmarkScratchInUse(scratch);
return HS_UNKNOWN_ERROR;
}
unmarkScratchInUse(scratch);
- }
-
- hs_stream_free(id);
-
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+ }
+
+ hs_stream_free(id);
+
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags,
hs_scratch_t *scratch,
match_event_handler onEvent,
void *context) {
- if (!id) {
- return HS_INVALID;
- }
-
- if (onEvent) {
- if (!scratch || !validScratch(id->rose, scratch)) {
- return HS_INVALID;
- }
+ if (!id) {
+ return HS_INVALID;
+ }
+
+ if (onEvent) {
+ if (!scratch || !validScratch(id->rose, scratch)) {
+ return HS_INVALID;
+ }
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
- report_eod_matches(id, scratch, onEvent, context);
+ report_eod_matches(id, scratch, onEvent, context);
if (unlikely(internal_matching_error(scratch))) {
unmarkScratchInUse(scratch);
return HS_UNKNOWN_ERROR;
}
unmarkScratchInUse(scratch);
- }
-
+ }
+
// history already initialised
init_stream(id, id->rose, 0);
-
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_stream_size(const hs_database_t *db,
size_t *stream_size) {
- if (!stream_size) {
- return HS_INVALID;
- }
-
- hs_error_t ret = validDatabase(db);
- if (ret != HS_SUCCESS) {
- return ret;
- }
-
- const struct RoseEngine *rose = hs_get_bytecode(db);
- if (!ISALIGNED_16(rose)) {
- return HS_INVALID;
- }
-
- if (rose->mode != HS_MODE_STREAM) {
- return HS_DB_MODE_ERROR;
- }
-
- u32 base_stream_size = rose->stateOffsets.end;
-
- // stream state plus the hs_stream struct itself
- *stream_size = base_stream_size + sizeof(struct hs_stream);
-
- return HS_SUCCESS;
-}
-
-#if defined(DEBUG) || defined(DUMP_SUPPORT)
-#include "util/compare.h"
-// A debugging crutch: print a hex-escaped version of the match for our
-// perusal.
-static UNUSED
-void dumpData(const char *data, size_t len) {
- DEBUG_PRINTF("BUFFER:");
- for (size_t i = 0; i < len; i++) {
- u8 c = data[i];
- if (ourisprint(c) && c != '\'') {
- printf("%c", c);
- } else {
- printf("\\x%02x", c);
- }
- }
- printf("\n");
-}
-#endif
-
-HS_PUBLIC_API
+ if (!stream_size) {
+ return HS_INVALID;
+ }
+
+ hs_error_t ret = validDatabase(db);
+ if (ret != HS_SUCCESS) {
+ return ret;
+ }
+
+ const struct RoseEngine *rose = hs_get_bytecode(db);
+ if (!ISALIGNED_16(rose)) {
+ return HS_INVALID;
+ }
+
+ if (rose->mode != HS_MODE_STREAM) {
+ return HS_DB_MODE_ERROR;
+ }
+
+ u32 base_stream_size = rose->stateOffsets.end;
+
+ // stream state plus the hs_stream struct itself
+ *stream_size = base_stream_size + sizeof(struct hs_stream);
+
+ return HS_SUCCESS;
+}
+
+#if defined(DEBUG) || defined(DUMP_SUPPORT)
+#include "util/compare.h"
+// A debugging crutch: print a hex-escaped version of the match for our
+// perusal.
+static UNUSED
+void dumpData(const char *data, size_t len) {
+ DEBUG_PRINTF("BUFFER:");
+ for (size_t i = 0; i < len; i++) {
+ u8 c = data[i];
+ if (ourisprint(c) && c != '\'') {
+ printf("%c", c);
+ } else {
+ printf("\\x%02x", c);
+ }
+ }
+ printf("\n");
+}
+#endif
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db,
const char * const * data,
const unsigned int *length,
@@ -1109,68 +1109,68 @@ hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db,
UNUSED unsigned int flags,
hs_scratch_t *scratch,
match_event_handler onEvent, void *context) {
- if (unlikely(!scratch || !data || !length)) {
- return HS_INVALID;
- }
-
- hs_error_t err = validDatabase(db);
- if (unlikely(err != HS_SUCCESS)) {
- return err;
- }
-
- const struct RoseEngine *rose = hs_get_bytecode(db);
- if (unlikely(!ISALIGNED_16(rose))) {
- return HS_INVALID;
- }
-
- if (unlikely(rose->mode != HS_MODE_VECTORED)) {
- return HS_DB_MODE_ERROR;
- }
-
- if (unlikely(!validScratch(rose, scratch))) {
- return HS_INVALID;
- }
-
+ if (unlikely(!scratch || !data || !length)) {
+ return HS_INVALID;
+ }
+
+ hs_error_t err = validDatabase(db);
+ if (unlikely(err != HS_SUCCESS)) {
+ return err;
+ }
+
+ const struct RoseEngine *rose = hs_get_bytecode(db);
+ if (unlikely(!ISALIGNED_16(rose))) {
+ return HS_INVALID;
+ }
+
+ if (unlikely(rose->mode != HS_MODE_VECTORED)) {
+ return HS_DB_MODE_ERROR;
+ }
+
+ if (unlikely(!validScratch(rose, scratch))) {
+ return HS_INVALID;
+ }
+
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
- hs_stream_t *id = (hs_stream_t *)(scratch->bstate);
-
+ hs_stream_t *id = (hs_stream_t *)(scratch->bstate);
+
init_stream(id, rose, 1); /* open stream */
-
- for (u32 i = 0; i < count; i++) {
- DEBUG_PRINTF("block %u/%u offset=%llu len=%u\n", i, count, id->offset,
- length[i]);
-#ifdef DEBUG
- dumpData(data[i], length[i]);
-#endif
- hs_error_t ret
- = hs_scan_stream_internal(id, data[i], length[i], 0, scratch,
- onEvent, context);
- if (ret != HS_SUCCESS) {
+
+ for (u32 i = 0; i < count; i++) {
+ DEBUG_PRINTF("block %u/%u offset=%llu len=%u\n", i, count, id->offset,
+ length[i]);
+#ifdef DEBUG
+ dumpData(data[i], length[i]);
+#endif
+ hs_error_t ret
+ = hs_scan_stream_internal(id, data[i], length[i], 0, scratch,
+ onEvent, context);
+ if (ret != HS_SUCCESS) {
unmarkScratchInUse(scratch);
- return ret;
- }
- }
-
- /* close stream */
- if (onEvent) {
- report_eod_matches(id, scratch, onEvent, context);
-
+ return ret;
+ }
+ }
+
+ /* close stream */
+ if (onEvent) {
+ report_eod_matches(id, scratch, onEvent, context);
+
if (unlikely(internal_matching_error(scratch))) {
unmarkScratchInUse(scratch);
return HS_UNKNOWN_ERROR;
} else if (told_to_stop_matching(scratch)) {
unmarkScratchInUse(scratch);
- return HS_SCAN_TERMINATED;
- }
- }
-
+ return HS_SCAN_TERMINATED;
+ }
+ }
+
unmarkScratchInUse(scratch);
- return HS_SUCCESS;
-}
+ return HS_SUCCESS;
+}
HS_PUBLIC_API
hs_error_t HS_CDECL hs_compress_stream(const hs_stream_t *stream, char *buf,
diff --git a/contrib/libs/hyperscan/src/scratch.c b/contrib/libs/hyperscan/src/scratch.c
index f3e7856743..25991e2bba 100644
--- a/contrib/libs/hyperscan/src/scratch.c
+++ b/contrib/libs/hyperscan/src/scratch.c
@@ -1,49 +1,49 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Functions for allocating and manipulating scratch space.
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "allocator.h"
-#include "hs_internal.h"
-#include "hs_runtime.h"
-#include "scratch.h"
-#include "state.h"
-#include "ue2common.h"
-#include "database.h"
-#include "nfa/nfa_api_queue.h"
-#include "rose/rose_internal.h"
-#include "util/fatbit.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Functions for allocating and manipulating scratch space.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "allocator.h"
+#include "hs_internal.h"
+#include "hs_runtime.h"
+#include "scratch.h"
+#include "state.h"
+#include "ue2common.h"
+#include "database.h"
+#include "nfa/nfa_api_queue.h"
+#include "rose/rose_internal.h"
+#include "util/fatbit.h"
+
/**
* Determine the space required for a correctly aligned array of fatbit
* structure, laid out as:
@@ -68,93 +68,93 @@ size_t fatbit_array_size(u32 num_entries, u32 fatbit_len) {
return ROUNDUP_N(len, 8); // Round up for potential padding.
}
-/** Used by hs_alloc_scratch and hs_clone_scratch to allocate a complete
- * scratch region from a prototype structure. */
-static
-hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
- u32 queueCount = proto->queueCount;
+/** Used by hs_alloc_scratch and hs_clone_scratch to allocate a complete
+ * scratch region from a prototype structure. */
+static
+hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
+ u32 queueCount = proto->queueCount;
u32 activeQueueArraySize = proto->activeQueueArraySize;
u32 deduperCount = proto->deduper.dkey_count;
u32 deduperLogSize = proto->deduper.log_size;
- u32 bStateSize = proto->bStateSize;
- u32 tStateSize = proto->tStateSize;
- u32 fullStateSize = proto->fullStateSize;
- u32 anchored_literal_region_len = proto->anchored_literal_region_len;
+ u32 bStateSize = proto->bStateSize;
+ u32 tStateSize = proto->tStateSize;
+ u32 fullStateSize = proto->fullStateSize;
+ u32 anchored_literal_region_len = proto->anchored_literal_region_len;
u32 anchored_literal_fatbit_size = proto->anchored_literal_fatbit_size;
-
- u32 som_store_size = proto->som_store_count * sizeof(u64a);
- u32 som_attempted_store_size = proto->som_store_count * sizeof(u64a);
+
+ u32 som_store_size = proto->som_store_count * sizeof(u64a);
+ u32 som_attempted_store_size = proto->som_store_count * sizeof(u64a);
u32 som_now_size = proto->som_fatbit_size;
u32 som_attempted_size = proto->som_fatbit_size;
-
- struct hs_scratch *s;
- struct hs_scratch *s_tmp;
- size_t queue_size = queueCount * sizeof(struct mq);
- size_t qmpq_size = queueCount * sizeof(struct queue_match);
-
+
+ struct hs_scratch *s;
+ struct hs_scratch *s_tmp;
+ size_t queue_size = queueCount * sizeof(struct mq);
+ size_t qmpq_size = queueCount * sizeof(struct queue_match);
+
assert(anchored_literal_region_len < 8 * sizeof(s->al_log_sum));
-
+
size_t anchored_literal_region_size = fatbit_array_size(
anchored_literal_region_len, proto->anchored_literal_fatbit_size);
size_t delay_region_size =
fatbit_array_size(DELAY_SLOT_COUNT, proto->delay_fatbit_size);
-
- // the size is all the allocated stuff, not including the struct itself
- size_t size = queue_size + 63
- + bStateSize + tStateSize
- + fullStateSize + 63 /* cacheline padding */
+
+ // the size is all the allocated stuff, not including the struct itself
+ size_t size = queue_size + 63
+ + bStateSize + tStateSize
+ + fullStateSize + 63 /* cacheline padding */
+ proto->handledKeyFatbitSize /* handled roles */
+ activeQueueArraySize /* active queue array */
+ 2 * deduperLogSize /* need odd and even logs */
+ 2 * deduperLogSize /* ditto som logs */
- + 2 * sizeof(u64a) * deduperCount /* start offsets for som */
+ + 2 * sizeof(u64a) * deduperCount /* start offsets for som */
+ anchored_literal_region_size + qmpq_size
+ delay_region_size
- + som_store_size
- + som_now_size
- + som_attempted_size
+ + som_store_size
+ + som_now_size
+ + som_attempted_size
+ som_attempted_store_size + 15;
-
- /* the struct plus the allocated stuff plus padding for cacheline
- * alignment */
- const size_t alloc_size = sizeof(struct hs_scratch) + size + 256;
- s_tmp = hs_scratch_alloc(alloc_size);
- hs_error_t err = hs_check_alloc(s_tmp);
- if (err != HS_SUCCESS) {
- hs_scratch_free(s_tmp);
- *scratch = NULL;
- return err;
- }
-
- memset(s_tmp, 0, alloc_size);
- s = ROUNDUP_PTR(s_tmp, 64);
- DEBUG_PRINTF("allocated %zu bytes at %p but realigning to %p\n", alloc_size, s_tmp, s);
- DEBUG_PRINTF("sizeof %zu\n", sizeof(struct hs_scratch));
- *s = *proto;
-
- s->magic = SCRATCH_MAGIC;
+
+ /* the struct plus the allocated stuff plus padding for cacheline
+ * alignment */
+ const size_t alloc_size = sizeof(struct hs_scratch) + size + 256;
+ s_tmp = hs_scratch_alloc(alloc_size);
+ hs_error_t err = hs_check_alloc(s_tmp);
+ if (err != HS_SUCCESS) {
+ hs_scratch_free(s_tmp);
+ *scratch = NULL;
+ return err;
+ }
+
+ memset(s_tmp, 0, alloc_size);
+ s = ROUNDUP_PTR(s_tmp, 64);
+ DEBUG_PRINTF("allocated %zu bytes at %p but realigning to %p\n", alloc_size, s_tmp, s);
+ DEBUG_PRINTF("sizeof %zu\n", sizeof(struct hs_scratch));
+ *s = *proto;
+
+ s->magic = SCRATCH_MAGIC;
s->in_use = 0;
- s->scratchSize = alloc_size;
- s->scratch_alloc = (char *)s_tmp;
+ s->scratchSize = alloc_size;
+ s->scratch_alloc = (char *)s_tmp;
s->fdr_conf = NULL;
-
- // each of these is at an offset from the previous
- char *current = (char *)s + sizeof(*s);
-
- // align current so that the following arrays are naturally aligned: this
- // is accounted for in the padding allocated
- current = ROUNDUP_PTR(current, 8);
-
- s->queues = (struct mq *)current;
- current += queue_size;
-
- assert(ISALIGNED_N(current, 8));
- s->som_store = (u64a *)current;
- current += som_store_size;
-
- s->som_attempted_store = (u64a *)current;
- current += som_attempted_store_size;
-
+
+ // each of these is at an offset from the previous
+ char *current = (char *)s + sizeof(*s);
+
+ // align current so that the following arrays are naturally aligned: this
+ // is accounted for in the padding allocated
+ current = ROUNDUP_PTR(current, 8);
+
+ s->queues = (struct mq *)current;
+ current += queue_size;
+
+ assert(ISALIGNED_N(current, 8));
+ s->som_store = (u64a *)current;
+ current += som_store_size;
+
+ s->som_attempted_store = (u64a *)current;
+ current += som_attempted_store_size;
+
current = ROUNDUP_PTR(current, alignof(struct fatbit *));
s->delay_slots = (struct fatbit **)current;
current += sizeof(struct fatbit *) * DELAY_SLOT_COUNT;
@@ -163,281 +163,281 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
s->delay_slots[i] = (struct fatbit *)current;
assert(ISALIGNED(s->delay_slots[i]));
current += proto->delay_fatbit_size;
- }
-
+ }
+
current = ROUNDUP_PTR(current, alignof(struct fatbit *));
s->al_log = (struct fatbit **)current;
current += sizeof(struct fatbit *) * anchored_literal_region_len;
current = ROUNDUP_PTR(current, alignof(struct fatbit));
- for (u32 i = 0; i < anchored_literal_region_len; i++) {
+ for (u32 i = 0; i < anchored_literal_region_len; i++) {
s->al_log[i] = (struct fatbit *)current;
assert(ISALIGNED(s->al_log[i]));
current += anchored_literal_fatbit_size;
- }
-
- current = ROUNDUP_PTR(current, 8);
- s->catchup_pq.qm = (struct queue_match *)current;
- current += qmpq_size;
-
- s->bstate = (char *)current;
- s->bStateSize = bStateSize;
- current += bStateSize;
-
- s->tstate = (char *)current;
- s->tStateSize = tStateSize;
- current += tStateSize;
-
- current = ROUNDUP_PTR(current, 64);
-
- assert(ISALIGNED_N(current, 8));
- s->deduper.som_start_log[0] = (u64a *)current;
- current += sizeof(u64a) * deduperCount;
-
- s->deduper.som_start_log[1] = (u64a *)current;
- current += sizeof(u64a) * deduperCount;
-
- assert(ISALIGNED_N(current, 8));
- s->aqa = (struct fatbit *)current;
+ }
+
+ current = ROUNDUP_PTR(current, 8);
+ s->catchup_pq.qm = (struct queue_match *)current;
+ current += qmpq_size;
+
+ s->bstate = (char *)current;
+ s->bStateSize = bStateSize;
+ current += bStateSize;
+
+ s->tstate = (char *)current;
+ s->tStateSize = tStateSize;
+ current += tStateSize;
+
+ current = ROUNDUP_PTR(current, 64);
+
+ assert(ISALIGNED_N(current, 8));
+ s->deduper.som_start_log[0] = (u64a *)current;
+ current += sizeof(u64a) * deduperCount;
+
+ s->deduper.som_start_log[1] = (u64a *)current;
+ current += sizeof(u64a) * deduperCount;
+
+ assert(ISALIGNED_N(current, 8));
+ s->aqa = (struct fatbit *)current;
current += activeQueueArraySize;
-
- s->handled_roles = (struct fatbit *)current;
+
+ s->handled_roles = (struct fatbit *)current;
current += proto->handledKeyFatbitSize;
-
- s->deduper.log[0] = (struct fatbit *)current;
+
+ s->deduper.log[0] = (struct fatbit *)current;
current += deduperLogSize;
-
- s->deduper.log[1] = (struct fatbit *)current;
+
+ s->deduper.log[1] = (struct fatbit *)current;
current += deduperLogSize;
-
- s->deduper.som_log[0] = (struct fatbit *)current;
+
+ s->deduper.som_log[0] = (struct fatbit *)current;
current += deduperLogSize;
-
- s->deduper.som_log[1] = (struct fatbit *)current;
+
+ s->deduper.som_log[1] = (struct fatbit *)current;
current += deduperLogSize;
-
- s->som_set_now = (struct fatbit *)current;
- current += som_now_size;
-
- s->som_attempted_set = (struct fatbit *)current;
- current += som_attempted_size;
-
- current = ROUNDUP_PTR(current, 64);
- assert(ISALIGNED_CL(current));
- s->fullState = (char *)current;
- s->fullStateSize = fullStateSize;
- current += fullStateSize;
-
- *scratch = s;
-
- // Don't get too big for your boots
- assert((size_t)(current - (char *)s) <= alloc_size);
-
- // Init q->scratch ptr for every queue.
- for (struct mq *qi = s->queues; qi != s->queues + queueCount; ++qi) {
- qi->scratch = s;
- }
-
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+
+ s->som_set_now = (struct fatbit *)current;
+ current += som_now_size;
+
+ s->som_attempted_set = (struct fatbit *)current;
+ current += som_attempted_size;
+
+ current = ROUNDUP_PTR(current, 64);
+ assert(ISALIGNED_CL(current));
+ s->fullState = (char *)current;
+ s->fullStateSize = fullStateSize;
+ current += fullStateSize;
+
+ *scratch = s;
+
+ // Don't get too big for your boots
+ assert((size_t)(current - (char *)s) <= alloc_size);
+
+ // Init q->scratch ptr for every queue.
+ for (struct mq *qi = s->queues; qi != s->queues + queueCount; ++qi) {
+ qi->scratch = s;
+ }
+
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db,
hs_scratch_t **scratch) {
- if (!db || !scratch) {
- return HS_INVALID;
- }
-
- /* We need to do some real sanity checks on the database as some users mmap
- * in old deserialised databases, so this is the first real opportunity we
- * have to make sure it is sane.
- */
- hs_error_t rv = dbIsValid(db);
- if (rv != HS_SUCCESS) {
- return rv;
- }
-
- /* We can also sanity-check the scratch parameter: if it points to an
- * existing scratch area, that scratch should have valid magic bits. */
- if (*scratch != NULL) {
- /* has to be aligned before we can do anything with it */
- if (!ISALIGNED_CL(*scratch)) {
- return HS_INVALID;
- }
- if ((*scratch)->magic != SCRATCH_MAGIC) {
- return HS_INVALID;
- }
+ if (!db || !scratch) {
+ return HS_INVALID;
+ }
+
+ /* We need to do some real sanity checks on the database as some users mmap
+ * in old deserialised databases, so this is the first real opportunity we
+ * have to make sure it is sane.
+ */
+ hs_error_t rv = dbIsValid(db);
+ if (rv != HS_SUCCESS) {
+ return rv;
+ }
+
+ /* We can also sanity-check the scratch parameter: if it points to an
+ * existing scratch area, that scratch should have valid magic bits. */
+ if (*scratch != NULL) {
+ /* has to be aligned before we can do anything with it */
+ if (!ISALIGNED_CL(*scratch)) {
+ return HS_INVALID;
+ }
+ if ((*scratch)->magic != SCRATCH_MAGIC) {
+ return HS_INVALID;
+ }
if (markScratchInUse(*scratch)) {
return HS_SCRATCH_IN_USE;
}
- }
-
- const struct RoseEngine *rose = hs_get_bytecode(db);
- int resize = 0;
-
- hs_scratch_t *proto;
- hs_scratch_t *proto_tmp = hs_scratch_alloc(sizeof(struct hs_scratch) + 256);
- hs_error_t proto_ret = hs_check_alloc(proto_tmp);
- if (proto_ret != HS_SUCCESS) {
- hs_scratch_free(proto_tmp);
+ }
+
+ const struct RoseEngine *rose = hs_get_bytecode(db);
+ int resize = 0;
+
+ hs_scratch_t *proto;
+ hs_scratch_t *proto_tmp = hs_scratch_alloc(sizeof(struct hs_scratch) + 256);
+ hs_error_t proto_ret = hs_check_alloc(proto_tmp);
+ if (proto_ret != HS_SUCCESS) {
+ hs_scratch_free(proto_tmp);
if (*scratch) {
hs_scratch_free((*scratch)->scratch_alloc);
}
- *scratch = NULL;
- return proto_ret;
- }
-
- proto = ROUNDUP_PTR(proto_tmp, 64);
-
- if (*scratch) {
- *proto = **scratch;
- } else {
- memset(proto, 0, sizeof(*proto));
- resize = 1;
- }
- proto->scratch_alloc = (char *)proto_tmp;
-
- if (rose->anchoredDistance > proto->anchored_literal_region_len) {
- resize = 1;
- proto->anchored_literal_region_len = rose->anchoredDistance;
- }
-
+ *scratch = NULL;
+ return proto_ret;
+ }
+
+ proto = ROUNDUP_PTR(proto_tmp, 64);
+
+ if (*scratch) {
+ *proto = **scratch;
+ } else {
+ memset(proto, 0, sizeof(*proto));
+ resize = 1;
+ }
+ proto->scratch_alloc = (char *)proto_tmp;
+
+ if (rose->anchoredDistance > proto->anchored_literal_region_len) {
+ resize = 1;
+ proto->anchored_literal_region_len = rose->anchoredDistance;
+ }
+
if (rose->anchored_fatbit_size > proto->anchored_literal_fatbit_size) {
- resize = 1;
+ resize = 1;
proto->anchored_literal_fatbit_size = rose->anchored_fatbit_size;
- }
-
+ }
+
if (rose->delay_fatbit_size > proto->delay_fatbit_size) {
- resize = 1;
+ resize = 1;
proto->delay_fatbit_size = rose->delay_fatbit_size;
- }
-
+ }
+
if (rose->handledKeyFatbitSize > proto->handledKeyFatbitSize) {
- resize = 1;
+ resize = 1;
proto->handledKeyFatbitSize = rose->handledKeyFatbitSize;
- }
-
- if (rose->tStateSize > proto->tStateSize) {
- resize = 1;
- proto->tStateSize = rose->tStateSize;
- }
-
- u32 som_store_count = rose->somLocationCount;
- if (som_store_count > proto->som_store_count) {
- resize = 1;
- proto->som_store_count = som_store_count;
- }
-
+ }
+
+ if (rose->tStateSize > proto->tStateSize) {
+ resize = 1;
+ proto->tStateSize = rose->tStateSize;
+ }
+
+ u32 som_store_count = rose->somLocationCount;
+ if (som_store_count > proto->som_store_count) {
+ resize = 1;
+ proto->som_store_count = som_store_count;
+ }
+
if (rose->somLocationFatbitSize > proto->som_fatbit_size) {
resize = 1;
proto->som_fatbit_size = rose->somLocationFatbitSize;
}
- u32 queueCount = rose->queueCount;
- if (queueCount > proto->queueCount) {
- resize = 1;
- proto->queueCount = queueCount;
- }
-
+ u32 queueCount = rose->queueCount;
+ if (queueCount > proto->queueCount) {
+ resize = 1;
+ proto->queueCount = queueCount;
+ }
+
if (rose->activeQueueArraySize > proto->activeQueueArraySize) {
resize = 1;
proto->activeQueueArraySize = rose->activeQueueArraySize;
}
- u32 bStateSize = 0;
- if (rose->mode == HS_MODE_BLOCK) {
- bStateSize = rose->stateOffsets.end;
- } else if (rose->mode == HS_MODE_VECTORED) {
- /* vectoring database require a full stream state (inc header) */
- bStateSize = sizeof(struct hs_stream) + rose->stateOffsets.end;
- }
-
- if (bStateSize > proto->bStateSize) {
- resize = 1;
- proto->bStateSize = bStateSize;
- }
-
- u32 fullStateSize = rose->scratchStateSize;
- if (fullStateSize > proto->fullStateSize) {
- resize = 1;
- proto->fullStateSize = fullStateSize;
- }
-
+ u32 bStateSize = 0;
+ if (rose->mode == HS_MODE_BLOCK) {
+ bStateSize = rose->stateOffsets.end;
+ } else if (rose->mode == HS_MODE_VECTORED) {
+ /* vectoring database require a full stream state (inc header) */
+ bStateSize = sizeof(struct hs_stream) + rose->stateOffsets.end;
+ }
+
+ if (bStateSize > proto->bStateSize) {
+ resize = 1;
+ proto->bStateSize = bStateSize;
+ }
+
+ u32 fullStateSize = rose->scratchStateSize;
+ if (fullStateSize > proto->fullStateSize) {
+ resize = 1;
+ proto->fullStateSize = fullStateSize;
+ }
+
if (rose->dkeyCount > proto->deduper.dkey_count) {
- resize = 1;
+ resize = 1;
proto->deduper.dkey_count = rose->dkeyCount;
proto->deduper.log_size = rose->dkeyLogSize;
- }
-
- if (resize) {
- if (*scratch) {
- hs_scratch_free((*scratch)->scratch_alloc);
- }
-
- hs_error_t alloc_ret = alloc_scratch(proto, scratch);
- hs_scratch_free(proto_tmp); /* kill off temp used for sizing */
- if (alloc_ret != HS_SUCCESS) {
- *scratch = NULL;
- return alloc_ret;
- }
- } else {
- hs_scratch_free(proto_tmp); /* kill off temp used for sizing */
+ }
+
+ if (resize) {
+ if (*scratch) {
+ hs_scratch_free((*scratch)->scratch_alloc);
+ }
+
+ hs_error_t alloc_ret = alloc_scratch(proto, scratch);
+ hs_scratch_free(proto_tmp); /* kill off temp used for sizing */
+ if (alloc_ret != HS_SUCCESS) {
+ *scratch = NULL;
+ return alloc_ret;
+ }
+ } else {
+ hs_scratch_free(proto_tmp); /* kill off temp used for sizing */
unmarkScratchInUse(*scratch);
- }
-
+ }
+
assert(!(*scratch)->in_use);
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_clone_scratch(const hs_scratch_t *src,
hs_scratch_t **dest) {
- if (!dest || !src || !ISALIGNED_CL(src) || src->magic != SCRATCH_MAGIC) {
- return HS_INVALID;
- }
-
- *dest = NULL;
- hs_error_t ret = alloc_scratch(src, dest);
- if (ret != HS_SUCCESS) {
- *dest = NULL;
- return ret;
- }
-
+ if (!dest || !src || !ISALIGNED_CL(src) || src->magic != SCRATCH_MAGIC) {
+ return HS_INVALID;
+ }
+
+ *dest = NULL;
+ hs_error_t ret = alloc_scratch(src, dest);
+ if (ret != HS_SUCCESS) {
+ *dest = NULL;
+ return ret;
+ }
+
assert(!(*dest)->in_use);
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_free_scratch(hs_scratch_t *scratch) {
- if (scratch) {
- /* has to be aligned before we can do anything with it */
- if (!ISALIGNED_CL(scratch)) {
- return HS_INVALID;
- }
- if (scratch->magic != SCRATCH_MAGIC) {
- return HS_INVALID;
- }
+ if (scratch) {
+ /* has to be aligned before we can do anything with it */
+ if (!ISALIGNED_CL(scratch)) {
+ return HS_INVALID;
+ }
+ if (scratch->magic != SCRATCH_MAGIC) {
+ return HS_INVALID;
+ }
if (markScratchInUse(scratch)) {
return HS_SCRATCH_IN_USE;
}
- scratch->magic = 0;
- assert(scratch->scratch_alloc);
- DEBUG_PRINTF("scratch %p is really at %p : freeing\n", scratch,
- scratch->scratch_alloc);
- hs_scratch_free(scratch->scratch_alloc);
- }
-
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+ scratch->magic = 0;
+ assert(scratch->scratch_alloc);
+ DEBUG_PRINTF("scratch %p is really at %p : freeing\n", scratch,
+ scratch->scratch_alloc);
+ hs_scratch_free(scratch->scratch_alloc);
+ }
+
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_scratch_size(const hs_scratch_t *scratch, size_t *size) {
- if (!size || !scratch || !ISALIGNED_CL(scratch) ||
- scratch->magic != SCRATCH_MAGIC) {
- return HS_INVALID;
- }
-
- *size = scratch->scratchSize;
-
- return HS_SUCCESS;
-}
+ if (!size || !scratch || !ISALIGNED_CL(scratch) ||
+ scratch->magic != SCRATCH_MAGIC) {
+ return HS_INVALID;
+ }
+
+ *size = scratch->scratchSize;
+
+ return HS_SUCCESS;
+}
diff --git a/contrib/libs/hyperscan/src/scratch.h b/contrib/libs/hyperscan/src/scratch.h
index 00d47d2465..1256f7aba8 100644
--- a/contrib/libs/hyperscan/src/scratch.h
+++ b/contrib/libs/hyperscan/src/scratch.h
@@ -1,137 +1,137 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Scratch and associated data structures.
- *
- * This header gets pulled into many places (many deep, slow to compile
- * places). Try to keep the included headers under control.
- */
-
-#ifndef SCRATCH_H_DA6D4FC06FF410
-#define SCRATCH_H_DA6D4FC06FF410
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Scratch and associated data structures.
+ *
+ * This header gets pulled into many places (many deep, slow to compile
+ * places). Try to keep the included headers under control.
+ */
+
+#ifndef SCRATCH_H_DA6D4FC06FF410
+#define SCRATCH_H_DA6D4FC06FF410
+
#include "hs_common.h"
-#include "ue2common.h"
-#include "rose/rose_types.h"
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-UNUSED static const u32 SCRATCH_MAGIC = 0x544F4259;
-
-struct fatbit;
-struct hs_scratch;
-struct RoseEngine;
-struct mq;
-
-struct queue_match {
- /** \brief used to store the current location of an (suf|out)fix match in
- * the current buffer.
- *
- * As (suf|out)fixes always run in the main buffer and never in history
- * this number will always be positive (matches at 0 belong to previous
- * write). Hence we can get away with a size_t rather than the usual s64a
- * for a location. */
- size_t loc;
-
- u32 queue; /**< queue index. */
-};
-
-struct catchup_pq {
- struct queue_match *qm;
- u32 qm_size; /**< current size of the priority queue */
-};
-
+#include "ue2common.h"
+#include "rose/rose_types.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+UNUSED static const u32 SCRATCH_MAGIC = 0x544F4259;
+
+struct fatbit;
+struct hs_scratch;
+struct RoseEngine;
+struct mq;
+
+struct queue_match {
+ /** \brief used to store the current location of an (suf|out)fix match in
+ * the current buffer.
+ *
+ * As (suf|out)fixes always run in the main buffer and never in history
+ * this number will always be positive (matches at 0 belong to previous
+ * write). Hence we can get away with a size_t rather than the usual s64a
+ * for a location. */
+ size_t loc;
+
+ u32 queue; /**< queue index. */
+};
+
+struct catchup_pq {
+ struct queue_match *qm;
+ u32 qm_size; /**< current size of the priority queue */
+};
+
/** \brief Status flag: user requested termination. */
#define STATUS_TERMINATED (1U << 0)
-
+
/** \brief Status flag: it has been determined that it is not possible for this
* stream to raise any more matches.
*
* This may be because all its exhaustion keys are on or for other reasons
* (anchored sections not matching). */
#define STATUS_EXHAUSTED (1U << 1)
-
+
/** \brief Status flag: Rose requires rebuild as delay literal matched in
* history. */
#define STATUS_DELAY_DIRTY (1U << 2)
-
+
/** \brief Status flag: Unexpected Rose program error. */
#define STATUS_ERROR (1U << 3)
-/** \brief Core information about the current scan, used everywhere. */
-struct core_info {
- void *userContext; /**< user-supplied context */
-
- /** \brief user-supplied match callback */
+/** \brief Core information about the current scan, used everywhere. */
+struct core_info {
+ void *userContext; /**< user-supplied context */
+
+ /** \brief user-supplied match callback */
int (HS_CDECL *userCallback)(unsigned int id, unsigned long long from,
unsigned long long to, unsigned int flags,
void *ctx);
-
- const struct RoseEngine *rose;
- char *state; /**< full stream state */
- char *exhaustionVector; /**< pointer to evec for this stream */
+
+ const struct RoseEngine *rose;
+ char *state; /**< full stream state */
+ char *exhaustionVector; /**< pointer to evec for this stream */
char *logicalVector; /**< pointer to lvec for this stream */
char *combVector; /**< pointer to cvec for this stream */
- const u8 *buf; /**< main scan buffer */
- size_t len; /**< length of main scan buffer in bytes */
- const u8 *hbuf; /**< history buffer */
- size_t hlen; /**< length of history buffer in bytes. */
- u64a buf_offset; /**< stream offset, for the base of the buffer */
+ const u8 *buf; /**< main scan buffer */
+ size_t len; /**< length of main scan buffer in bytes */
+ const u8 *hbuf; /**< history buffer */
+ size_t hlen; /**< length of history buffer in bytes. */
+ u64a buf_offset; /**< stream offset, for the base of the buffer */
u8 status; /**< stream status bitmask, using STATUS_ flags above */
-};
-
-/** \brief Rose state information. */
-struct RoseContext {
- u8 mpv_inactive;
- u64a groups;
- u64a lit_offset_adjust; /**< offset to add to matches coming from hwlm */
- u64a delayLastEndOffset; /**< end of the last match from FDR used by delay
- * code */
- u64a lastEndOffset; /**< end of the last match from FDR/anchored DFAs used
- * by history code. anchored DFA matches update this
- * when they are inserted into the literal match
- * stream */
- u64a lastMatchOffset; /**< last match offset report up out of rose;
- * used _only_ for debugging, asserts */
+};
+
+/** \brief Rose state information. */
+struct RoseContext {
+ u8 mpv_inactive;
+ u64a groups;
+ u64a lit_offset_adjust; /**< offset to add to matches coming from hwlm */
+ u64a delayLastEndOffset; /**< end of the last match from FDR used by delay
+ * code */
+ u64a lastEndOffset; /**< end of the last match from FDR/anchored DFAs used
+ * by history code. anchored DFA matches update this
+ * when they are inserted into the literal match
+ * stream */
+ u64a lastMatchOffset; /**< last match offset report up out of rose;
+ * used _only_ for debugging, asserts */
u64a lastCombMatchOffset; /**< last match offset of active combinations */
- u64a minMatchOffset; /**< the earliest offset that we are still allowed to
- * report */
- u64a minNonMpvMatchOffset; /**< the earliest offset that non-mpv engines are
- * still allowed to report */
- u64a next_mpv_offset; /**< earliest offset that the MPV can next report a
- * match, cleared if top events arrive */
- u32 filledDelayedSlots;
- u32 curr_qi; /**< currently executing main queue index during
- * \ref nfaQueueExec */
+ u64a minMatchOffset; /**< the earliest offset that we are still allowed to
+ * report */
+ u64a minNonMpvMatchOffset; /**< the earliest offset that non-mpv engines are
+ * still allowed to report */
+ u64a next_mpv_offset; /**< earliest offset that the MPV can next report a
+ * match, cleared if top events arrive */
+ u32 filledDelayedSlots;
+ u32 curr_qi; /**< currently executing main queue index during
+ * \ref nfaQueueExec */
/**
* \brief Buffer for caseful long literal support, used in streaming mode
@@ -152,57 +152,57 @@ struct RoseContext {
/** \brief Length in bytes of the string pointed to by ll_buf_nocase. */
size_t ll_len_nocase;
-};
-
-struct match_deduper {
- struct fatbit *log[2]; /**< even, odd logs */
+};
+
+struct match_deduper {
+ struct fatbit *log[2]; /**< even, odd logs */
struct fatbit *som_log[2]; /**< even, odd fatbit logs for som */
- u64a *som_start_log[2]; /**< even, odd start offset logs for som */
+ u64a *som_start_log[2]; /**< even, odd start offset logs for som */
u32 dkey_count;
- u32 log_size;
- u64a current_report_offset;
- u8 som_log_dirty;
-};
-
-/** \brief Hyperscan scratch region header.
- *
- * NOTE: there is no requirement that scratch is 16-byte aligned, as it is
- * allocated by a malloc equivalent, possibly supplied by the user.
- */
-struct ALIGN_CL_DIRECTIVE hs_scratch {
- u32 magic;
+ u32 log_size;
+ u64a current_report_offset;
+ u8 som_log_dirty;
+};
+
+/** \brief Hyperscan scratch region header.
+ *
+ * NOTE: there is no requirement that scratch is 16-byte aligned, as it is
+ * allocated by a malloc equivalent, possibly supplied by the user.
+ */
+struct ALIGN_CL_DIRECTIVE hs_scratch {
+ u32 magic;
u8 in_use; /**< non-zero when being used by an API call. */
- u32 queueCount;
+ u32 queueCount;
u32 activeQueueArraySize; /**< size of active queue array fatbit in bytes */
- u32 bStateSize; /**< sizeof block mode states */
- u32 tStateSize; /**< sizeof transient rose states */
- u32 fullStateSize; /**< size of uncompressed nfa state */
- struct RoseContext tctxt;
- char *bstate; /**< block mode states */
- char *tstate; /**< state for transient roses */
- char *fullState; /**< uncompressed NFA state */
- struct mq *queues;
- struct fatbit *aqa; /**< active queue array; fatbit of queues that are valid
- * & active */
+ u32 bStateSize; /**< sizeof block mode states */
+ u32 tStateSize; /**< sizeof transient rose states */
+ u32 fullStateSize; /**< size of uncompressed nfa state */
+ struct RoseContext tctxt;
+ char *bstate; /**< block mode states */
+ char *tstate; /**< state for transient roses */
+ char *fullState; /**< uncompressed NFA state */
+ struct mq *queues;
+ struct fatbit *aqa; /**< active queue array; fatbit of queues that are valid
+ * & active */
struct fatbit **delay_slots;
struct fatbit **al_log;
- u64a al_log_sum;
- struct catchup_pq catchup_pq;
- struct core_info core_info;
- struct match_deduper deduper;
- u32 anchored_literal_region_len;
+ u64a al_log_sum;
+ struct catchup_pq catchup_pq;
+ struct core_info core_info;
+ struct match_deduper deduper;
+ u32 anchored_literal_region_len;
u32 anchored_literal_fatbit_size; /**< size of each anch fatbit in bytes */
struct fatbit *handled_roles; /**< fatbit of ROLES (not states) already
- * handled by this literal */
- u64a *som_store; /**< array of som locations */
- u64a *som_attempted_store; /**< array of som locations for fail stores */
- struct fatbit *som_set_now; /**< fatbit, true if the som location was set
- * based on a match at the current offset */
- struct fatbit *som_attempted_set; /**< fatbit, true if the som location
- * would have been set at the current offset if the
- * location had been writable */
- u64a som_set_now_offset; /**< offset at which som_set_now represents */
- u32 som_store_count;
+ * handled by this literal */
+ u64a *som_store; /**< array of som locations */
+ u64a *som_attempted_store; /**< array of som locations for fail stores */
+ struct fatbit *som_set_now; /**< fatbit, true if the som location was set
+ * based on a match at the current offset */
+ struct fatbit *som_attempted_set; /**< fatbit, true if the som location
+ * would have been set at the current offset if the
+ * location had been writable */
+ u64a som_set_now_offset; /**< offset at which som_set_now represents */
+ u32 som_store_count;
u32 som_fatbit_size; /**< size of som location fatbit structures in bytes */
u32 handledKeyFatbitSize; /**< size of handled_keys fatbit in bytes */
u32 delay_fatbit_size; /**< size of each delay fatbit in bytes */
@@ -211,30 +211,30 @@ struct ALIGN_CL_DIRECTIVE hs_scratch {
u64a *fdr_conf; /**< FDR confirm value */
u8 fdr_conf_offset; /**< offset where FDR/Teddy front end matches
* in buffer */
-};
-
+};
+
/* array of fatbit ptr; TODO: why not an array of fatbits? */
-static really_inline
+static really_inline
struct fatbit **getAnchoredLiteralLog(struct hs_scratch *scratch) {
return scratch->al_log;
-}
-
-static really_inline
+}
+
+static really_inline
struct fatbit **getDelaySlots(struct hs_scratch *scratch) {
return scratch->delay_slots;
-}
-
-static really_inline
+}
+
+static really_inline
char told_to_stop_matching(const struct hs_scratch *scratch) {
return scratch->core_info.status & STATUS_TERMINATED;
-}
-
-static really_inline
+}
+
+static really_inline
char can_stop_matching(const struct hs_scratch *scratch) {
return scratch->core_info.status &
(STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_ERROR);
-}
-
+}
+
static really_inline
char internal_matching_error(const struct hs_scratch *scratch) {
return scratch->core_info.status & STATUS_ERROR;
@@ -245,7 +245,7 @@ char internal_matching_error(const struct hs_scratch *scratch) {
*
* Returns non-zero if it was already in use, zero otherwise.
*/
-static really_inline
+static really_inline
char markScratchInUse(struct hs_scratch *scratch) {
DEBUG_PRINTF("marking scratch as in use\n");
assert(scratch && scratch->magic == SCRATCH_MAGIC);
@@ -255,22 +255,22 @@ char markScratchInUse(struct hs_scratch *scratch) {
}
scratch->in_use = 1;
return 0;
-}
-
+}
+
/**
* \brief Mark scratch as no longer in use.
*/
-static really_inline
+static really_inline
void unmarkScratchInUse(struct hs_scratch *scratch) {
DEBUG_PRINTF("marking scratch as not in use\n");
assert(scratch && scratch->magic == SCRATCH_MAGIC);
assert(scratch->in_use == 1);
scratch->in_use = 0;
-}
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* SCRATCH_H_DA6D4FC06FF410 */
-
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* SCRATCH_H_DA6D4FC06FF410 */
+
diff --git a/contrib/libs/hyperscan/src/smallwrite/smallwrite_build.cpp b/contrib/libs/hyperscan/src/smallwrite/smallwrite_build.cpp
index 2e4ec74a2e..d993137632 100644
--- a/contrib/libs/hyperscan/src/smallwrite/smallwrite_build.cpp
+++ b/contrib/libs/hyperscan/src/smallwrite/smallwrite_build.cpp
@@ -1,80 +1,80 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
* \brief Small-write engine build code.
*/
-#include "smallwrite/smallwrite_build.h"
-
-#include "grey.h"
-#include "ue2common.h"
+#include "smallwrite/smallwrite_build.h"
+
+#include "grey.h"
+#include "ue2common.h"
#include "compiler/compiler.h"
#include "nfa/dfa_min.h"
-#include "nfa/mcclellancompile.h"
-#include "nfa/mcclellancompile_util.h"
-#include "nfa/nfa_internal.h"
-#include "nfa/rdfa_merge.h"
+#include "nfa/mcclellancompile.h"
+#include "nfa/mcclellancompile_util.h"
+#include "nfa/nfa_internal.h"
+#include "nfa/rdfa_merge.h"
#include "nfa/shengcompile.h"
-#include "nfagraph/ng.h"
+#include "nfagraph/ng.h"
#include "nfagraph/ng_depth.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_mcclellan.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_mcclellan.h"
#include "nfagraph/ng_reports.h"
#include "nfagraph/ng_prune.h"
-#include "nfagraph/ng_util.h"
-#include "smallwrite/smallwrite_internal.h"
-#include "util/alloc.h"
+#include "nfagraph/ng_util.h"
+#include "smallwrite/smallwrite_internal.h"
+#include "util/alloc.h"
#include "util/bytecode_ptr.h"
-#include "util/charreach.h"
+#include "util/charreach.h"
#include "util/compare.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/make_unique.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/make_unique.h"
#include "util/ue2_graph.h"
-#include "util/ue2string.h"
-#include "util/verify_types.h"
-
-#include <map>
-#include <set>
-#include <vector>
-#include <utility>
-
+#include "util/ue2string.h"
+#include "util/verify_types.h"
+
+#include <map>
+#include <set>
+#include <vector>
+#include <utility>
+
#include <boost/graph/breadth_first_search.hpp>
-using namespace std;
-
-namespace ue2 {
-
-#define DFA_MERGE_MAX_STATES 8000
+using namespace std;
+
+namespace ue2 {
+
+#define DFA_MERGE_MAX_STATES 8000
#define MAX_TRIE_VERTICES 8000
-
+
struct LitTrieVertexProps {
LitTrieVertexProps() = default;
explicit LitTrieVertexProps(u8 c_in) : c(c_in) {}
@@ -116,46 +116,46 @@ std::set<ReportID> all_reports(const LitTrie &trie) {
using LitTrieVertex = LitTrie::vertex_descriptor;
using LitTrieEdge = LitTrie::edge_descriptor;
-namespace { // unnamed
-
-// Concrete impl class
-class SmallWriteBuildImpl : public SmallWriteBuild {
-public:
+namespace { // unnamed
+
+// Concrete impl class
+class SmallWriteBuildImpl : public SmallWriteBuild {
+public:
SmallWriteBuildImpl(size_t num_patterns, const ReportManager &rm,
const CompileContext &cc);
-
- // Construct a runtime implementation.
+
+ // Construct a runtime implementation.
bytecode_ptr<SmallWriteEngine> build(u32 roseQuality) override;
-
+
void add(const NGHolder &g, const ExpressionInfo &expr) override;
- void add(const ue2_literal &literal, ReportID r) override;
-
+ void add(const ue2_literal &literal, ReportID r) override;
+
set<ReportID> all_reports() const override;
-
- const ReportManager &rm;
- const CompileContext &cc;
-
+
+ const ReportManager &rm;
+ const CompileContext &cc;
+
vector<unique_ptr<raw_dfa>> dfas;
LitTrie lit_trie;
LitTrie lit_trie_nocase;
size_t num_literals = 0;
- bool poisoned;
-};
-
-} // namespace
-
+ bool poisoned;
+};
+
+} // namespace
+
SmallWriteBuild::~SmallWriteBuild() = default;
-
+
SmallWriteBuildImpl::SmallWriteBuildImpl(size_t num_patterns,
const ReportManager &rm_in,
- const CompileContext &cc_in)
- : rm(rm_in), cc(cc_in),
- /* small write is block mode only */
+ const CompileContext &cc_in)
+ : rm(rm_in), cc(cc_in),
+ /* small write is block mode only */
poisoned(!cc.grey.allowSmallWrite
|| cc.streaming
|| num_patterns > cc.grey.smallWriteMaxPatterns) {
-}
-
+}
+
/**
* \brief Remove any reports from the given vertex that cannot match within
* max_depth due to their constraints.
@@ -259,24 +259,24 @@ bool mergeDfas(vector<unique_ptr<raw_dfa>> &dfas, const ReportManager &rm,
}
void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) {
- // If the graph is poisoned (i.e. we can't build a SmallWrite version),
- // we don't even try.
- if (poisoned) {
- return;
- }
-
+ // If the graph is poisoned (i.e. we can't build a SmallWrite version),
+ // we don't even try.
+ if (poisoned) {
+ return;
+ }
+
if (expr.som) {
DEBUG_PRINTF("no SOM support in small-write engine\n");
- poisoned = true;
- return;
- }
-
+ poisoned = true;
+ return;
+ }
+
if (isVacuous(g)) {
DEBUG_PRINTF("no vacuous graph support in small-write engine\n");
poisoned = true;
return;
}
-
+
if (any_of_in(::ue2::all_reports(g), [&](ReportID id) {
return rm.getReport(id).minLength > 0;
})) {
@@ -287,45 +287,45 @@ void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) {
DEBUG_PRINTF("g=%p\n", &g);
- // make a copy of the graph so that we can modify it for our purposes
+ // make a copy of the graph so that we can modify it for our purposes
unique_ptr<NGHolder> h = cloneHolder(g);
-
+
pruneOverlong(*h, depth(cc.grey.smallWriteLargestBuffer), rm);
-
+
reduceGraph(*h, SOM_NONE, expr.utf8, cc);
if (can_never_match(*h)) {
DEBUG_PRINTF("graph can never match in small block\n");
- return;
- }
-
- // Now we can actually build the McClellan DFA
- assert(h->kind == NFA_OUTFIX);
- auto r = buildMcClellan(*h, &rm, cc.grey);
-
- // If we couldn't build a McClellan DFA for this portion, we won't be able
- // build a smwr which represents the pattern set
- if (!r) {
- DEBUG_PRINTF("failed to determinise\n");
- poisoned = true;
- return;
- }
-
+ return;
+ }
+
+ // Now we can actually build the McClellan DFA
+ assert(h->kind == NFA_OUTFIX);
+ auto r = buildMcClellan(*h, &rm, cc.grey);
+
+ // If we couldn't build a McClellan DFA for this portion, we won't be able
+ // build a smwr which represents the pattern set
+ if (!r) {
+ DEBUG_PRINTF("failed to determinise\n");
+ poisoned = true;
+ return;
+ }
+
if (clear_deeper_reports(*r, cc.grey.smallWriteLargestBuffer)) {
minimize_hopcroft(*r, cc.grey);
}
-
+
dfas.push_back(std::move(r));
if (dfas.size() >= cc.grey.smallWriteMergeBatchSize) {
if (!mergeDfas(dfas, rm, cc)) {
dfas.clear();
- poisoned = true;
- return;
- }
- }
-}
-
+ poisoned = true;
+ return;
+ }
+ }
+}
+
static
bool add_to_trie(const ue2_literal &literal, ReportID report, LitTrie &trie) {
auto u = trie.root;
@@ -351,19 +351,19 @@ bool add_to_trie(const ue2_literal &literal, ReportID report, LitTrie &trie) {
return num_vertices(trie) <= MAX_TRIE_VERTICES;
}
-void SmallWriteBuildImpl::add(const ue2_literal &literal, ReportID r) {
- // If the graph is poisoned (i.e. we can't build a SmallWrite version),
- // we don't even try.
- if (poisoned) {
+void SmallWriteBuildImpl::add(const ue2_literal &literal, ReportID r) {
+ // If the graph is poisoned (i.e. we can't build a SmallWrite version),
+ // we don't even try.
+ if (poisoned) {
DEBUG_PRINTF("poisoned\n");
- return;
- }
-
- if (literal.length() > cc.grey.smallWriteLargestBuffer) {
+ return;
+ }
+
+ if (literal.length() > cc.grey.smallWriteLargestBuffer) {
DEBUG_PRINTF("exceeded length limit\n");
- return; /* too long */
- }
-
+ return; /* too long */
+ }
+
if (++num_literals > cc.grey.smallWriteMaxLiterals) {
DEBUG_PRINTF("exceeded literal limit\n");
poisoned = true;
@@ -375,8 +375,8 @@ void SmallWriteBuildImpl::add(const ue2_literal &literal, ReportID r) {
DEBUG_PRINTF("trie add failed\n");
poisoned = true;
}
-}
-
+}
+
namespace {
/**
@@ -419,7 +419,7 @@ struct ACVisitor : public boost::default_bfs_visitor {
DEBUG_PRINTF("no failure edge\n");
return LitTrie::null_vertex();
- }
+ }
void tree_edge(LitTrieEdge e, const LitTrie &trie) {
auto u = source(e, trie);
@@ -448,8 +448,8 @@ private:
unordered_map<LitTrieVertex, LitTrieVertex> &failure_map;
vector<LitTrieVertex> &ordering; //!< BFS ordering for vertices.
};
-}
-
+}
+
static UNUSED
bool isSaneTrie(const LitTrie &trie) {
CharReach seen;
@@ -464,7 +464,7 @@ bool isSaneTrie(const LitTrie &trie) {
}
return true;
}
-
+
/**
* \brief Turn the given literal trie into an AC automaton by adding additional
* edges and reports.
@@ -497,14 +497,14 @@ void buildAutomaton(LitTrie &trie,
add_edge(v, w, trie);
}
}
- }
+ }
}
-
+
static
vector<u32> findDistFromRoot(const LitTrie &trie) {
vector<u32> dist(num_vertices(trie), UINT32_MAX);
dist[trie[trie.root].index] = 0;
-
+
// BFS to find dist from root.
breadth_first_search(
trie, trie.root,
@@ -512,7 +512,7 @@ vector<u32> findDistFromRoot(const LitTrie &trie) {
make_iterator_property_map(dist.begin(),
get(&LitTrieVertexProps::index, trie)),
boost::on_tree_edge()))));
-
+
return dist;
}
@@ -526,9 +526,9 @@ vector<u32> findDistToAccept(const LitTrie &trie) {
if (!trie[v].reports.empty()) {
q.push_back(v);
dist[trie[v].index] = 0;
- }
- }
-
+ }
+ }
+
// Custom BFS, since we have a pile of sources.
while (!q.empty()) {
auto v = q.front();
@@ -542,11 +542,11 @@ vector<u32> findDistToAccept(const LitTrie &trie) {
u_dist = d + 1;
}
}
- }
-
+ }
+
return dist;
}
-
+
/**
* \brief Prune all vertices from the trie that do not lie on a path from root
* to accept of length <= max_depth.
@@ -554,7 +554,7 @@ vector<u32> findDistToAccept(const LitTrie &trie) {
static
void pruneTrie(LitTrie &trie, u32 max_depth) {
DEBUG_PRINTF("pruning trie to %u\n", max_depth);
-
+
auto dist_from_root = findDistFromRoot(trie);
auto dist_to_accept = findDistToAccept(trie);
@@ -575,30 +575,30 @@ void pruneTrie(LitTrie &trie, u32 max_depth) {
clear_vertex(v, trie);
dead.push_back(v);
}
- }
-
+ }
+
if (dead.empty()) {
return;
- }
-
+ }
+
for (auto v : dead) {
remove_vertex(v, trie);
}
-
+
DEBUG_PRINTF("%zu vertices remain\n", num_vertices(trie));
-
+
renumber_edges(trie);
renumber_vertices(trie);
}
-
+
static
vector<CharReach> getAlphabet(const LitTrie &trie, bool nocase) {
vector<CharReach> esets = {CharReach::dot()};
for (auto v : vertices_range(trie)) {
if (v == trie.root) {
continue;
- }
-
+ }
+
CharReach cr;
if (nocase) {
cr.set(mytoupper(trie[v].c));
@@ -618,13 +618,13 @@ vector<CharReach> getAlphabet(const LitTrie &trie, bool nocase) {
esets.push_back(t);
}
}
- }
-
+ }
+
// For deterministic compiles.
sort(esets.begin(), esets.end());
return esets;
}
-
+
static
u16 buildAlphabet(const LitTrie &trie, bool nocase,
array<u16, ALPHABET_SIZE> &alpha,
@@ -639,17 +639,17 @@ u16 buildAlphabet(const LitTrie &trie, bool nocase,
}
unalpha[i] = leader;
i++;
- }
-
+ }
+
for (u16 j = N_CHARS; j < ALPHABET_SIZE; j++, i++) {
alpha[j] = i;
unalpha[i] = j;
}
-
+
DEBUG_PRINTF("alphabet size %u\n", i);
return i;
}
-
+
/**
* \brief Calculate state mapping, from vertex in trie to state index in BFS
* ordering.
@@ -666,8 +666,8 @@ makeStateMap(const LitTrie &trie, const vector<LitTrieVertex> &ordering) {
}
assert(state_ids.size() == num_vertices(trie));
return state_ids;
-}
-
+}
+
/** \brief Construct a raw_dfa from a literal trie. */
static
unique_ptr<raw_dfa> buildDfa(LitTrie &trie, bool nocase) {
@@ -739,49 +739,49 @@ unique_ptr<raw_dfa> buildDfa(LitTrie &trie, bool nocase) {
return rdfa;
}
-#define MAX_GOOD_ACCEL_DEPTH 4
-
-static
-bool is_slow(const raw_dfa &rdfa, const set<dstate_id_t> &accel,
- u32 roseQuality) {
- /* we consider a dfa as slow if there is no way to quickly get into an accel
- * state/dead state. In these cases, it is more likely that we will be
- * running at our unaccelerated dfa speeds so the small write engine is only
- * competitive over a small region where start up costs are dominant. */
-
- if (roseQuality) {
- return true;
- }
-
- set<dstate_id_t> visited;
- set<dstate_id_t> next;
- set<dstate_id_t> curr;
- curr.insert(rdfa.start_anchored);
-
- u32 ialpha_size = rdfa.getImplAlphaSize();
-
- for (u32 i = 0; i < MAX_GOOD_ACCEL_DEPTH; i++) {
- next.clear();
- for (dstate_id_t s : curr) {
- if (contains(visited, s)) {
- continue;
- }
- visited.insert(s);
- if (s == DEAD_STATE || contains(accel, s)) {
- return false;
- }
-
- for (size_t j = 0; j < ialpha_size; j++) {
- next.insert(rdfa.states[s].next[j]);
- }
- }
- curr.swap(next);
- }
-
- return true;
-}
-
-static
+#define MAX_GOOD_ACCEL_DEPTH 4
+
+static
+bool is_slow(const raw_dfa &rdfa, const set<dstate_id_t> &accel,
+ u32 roseQuality) {
+ /* we consider a dfa as slow if there is no way to quickly get into an accel
+ * state/dead state. In these cases, it is more likely that we will be
+ * running at our unaccelerated dfa speeds so the small write engine is only
+ * competitive over a small region where start up costs are dominant. */
+
+ if (roseQuality) {
+ return true;
+ }
+
+ set<dstate_id_t> visited;
+ set<dstate_id_t> next;
+ set<dstate_id_t> curr;
+ curr.insert(rdfa.start_anchored);
+
+ u32 ialpha_size = rdfa.getImplAlphaSize();
+
+ for (u32 i = 0; i < MAX_GOOD_ACCEL_DEPTH; i++) {
+ next.clear();
+ for (dstate_id_t s : curr) {
+ if (contains(visited, s)) {
+ continue;
+ }
+ visited.insert(s);
+ if (s == DEAD_STATE || contains(accel, s)) {
+ return false;
+ }
+
+ for (size_t j = 0; j < ialpha_size; j++) {
+ next.insert(rdfa.states[s].next[j]);
+ }
+ }
+ curr.swap(next);
+ }
+
+ return true;
+}
+
+static
bytecode_ptr<NFA> getDfa(raw_dfa &rdfa, const CompileContext &cc,
const ReportManager &rm, bool has_non_literals,
set<dstate_id_t> &accel_states) {
@@ -812,73 +812,73 @@ bytecode_ptr<NFA> prepEngine(raw_dfa &rdfa, u32 roseQuality,
const CompileContext &cc, const ReportManager &rm,
bool has_non_literals, u32 *start_offset,
u32 *small_region) {
- *start_offset = remove_leading_dots(rdfa);
-
- // Unleash the McClellan!
- set<dstate_id_t> accel_states;
-
+ *start_offset = remove_leading_dots(rdfa);
+
+ // Unleash the McClellan!
+ set<dstate_id_t> accel_states;
+
auto nfa = getDfa(rdfa, cc, rm, has_non_literals, accel_states);
- if (!nfa) {
+ if (!nfa) {
DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n");
- return nullptr;
- }
-
- if (is_slow(rdfa, accel_states, roseQuality)) {
+ return nullptr;
+ }
+
+ if (is_slow(rdfa, accel_states, roseQuality)) {
DEBUG_PRINTF("is slow\n");
- *small_region = cc.grey.smallWriteLargestBufferBad;
- if (*small_region <= *start_offset) {
- return nullptr;
- }
+ *small_region = cc.grey.smallWriteLargestBufferBad;
+ if (*small_region <= *start_offset) {
+ return nullptr;
+ }
if (clear_deeper_reports(rdfa, *small_region - *start_offset)) {
minimize_hopcroft(rdfa, cc.grey);
if (rdfa.start_anchored == DEAD_STATE) {
DEBUG_PRINTF("all patterns pruned out\n");
return nullptr;
}
-
+
nfa = getDfa(rdfa, cc, rm, has_non_literals, accel_states);
if (!nfa) {
DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n");
assert(0); /* able to build orig dfa but not the trimmed? */
return nullptr;
}
- }
- } else {
- *small_region = cc.grey.smallWriteLargestBuffer;
- }
-
+ }
+ } else {
+ *small_region = cc.grey.smallWriteLargestBuffer;
+ }
+
assert(isDfaType(nfa->type));
- if (nfa->length > cc.grey.limitSmallWriteOutfixSize
- || nfa->length > cc.grey.limitDFASize) {
- DEBUG_PRINTF("smallwrite outfix size too large\n");
- return nullptr; /* this is just a soft failure - don't build smwr */
- }
-
- nfa->queueIndex = 0; /* dummy, small write API does not use queue */
- return nfa;
-}
-
-// SmallWriteBuild factory
+ if (nfa->length > cc.grey.limitSmallWriteOutfixSize
+ || nfa->length > cc.grey.limitDFASize) {
+ DEBUG_PRINTF("smallwrite outfix size too large\n");
+ return nullptr; /* this is just a soft failure - don't build smwr */
+ }
+
+ nfa->queueIndex = 0; /* dummy, small write API does not use queue */
+ return nfa;
+}
+
+// SmallWriteBuild factory
unique_ptr<SmallWriteBuild> makeSmallWriteBuilder(size_t num_patterns,
const ReportManager &rm,
- const CompileContext &cc) {
+ const CompileContext &cc) {
return ue2::make_unique<SmallWriteBuildImpl>(num_patterns, rm, cc);
-}
-
+}
+
bytecode_ptr<SmallWriteEngine> SmallWriteBuildImpl::build(u32 roseQuality) {
const bool has_literals = !is_empty(lit_trie) || !is_empty(lit_trie_nocase);
const bool has_non_literals = !dfas.empty();
if (dfas.empty() && !has_literals) {
- DEBUG_PRINTF("no smallwrite engine\n");
- poisoned = true;
- return nullptr;
- }
-
- if (poisoned) {
- DEBUG_PRINTF("some pattern could not be made into a smallwrite dfa\n");
- return nullptr;
- }
-
+ DEBUG_PRINTF("no smallwrite engine\n");
+ poisoned = true;
+ return nullptr;
+ }
+
+ if (poisoned) {
+ DEBUG_PRINTF("some pattern could not be made into a smallwrite dfa\n");
+ return nullptr;
+ }
+
// We happen to know that if the rose is high quality, we're going to limit
// depth further.
if (roseQuality) {
@@ -904,9 +904,9 @@ bytecode_ptr<SmallWriteEngine> SmallWriteBuildImpl::build(u32 roseQuality) {
if (dfas.empty()) {
DEBUG_PRINTF("no dfa, pruned everything away\n");
- return nullptr;
- }
-
+ return nullptr;
+ }
+
if (!mergeDfas(dfas, rm, cc)) {
dfas.clear();
return nullptr;
@@ -916,34 +916,34 @@ bytecode_ptr<SmallWriteEngine> SmallWriteBuildImpl::build(u32 roseQuality) {
auto rdfa = std::move(dfas.front());
dfas.clear();
- DEBUG_PRINTF("building rdfa %p\n", rdfa.get());
-
- u32 start_offset;
- u32 small_region;
+ DEBUG_PRINTF("building rdfa %p\n", rdfa.get());
+
+ u32 start_offset;
+ u32 small_region;
auto nfa = prepEngine(*rdfa, roseQuality, cc, rm, has_non_literals,
&start_offset, &small_region);
- if (!nfa) {
- DEBUG_PRINTF("some smallwrite outfix could not be prepped\n");
- /* just skip the smallwrite optimization */
- poisoned = true;
- return nullptr;
- }
-
- u32 size = sizeof(SmallWriteEngine) + nfa->length;
+ if (!nfa) {
+ DEBUG_PRINTF("some smallwrite outfix could not be prepped\n");
+ /* just skip the smallwrite optimization */
+ poisoned = true;
+ return nullptr;
+ }
+
+ u32 size = sizeof(SmallWriteEngine) + nfa->length;
auto smwr = make_zeroed_bytecode_ptr<SmallWriteEngine>(size);
-
- smwr->size = size;
- smwr->start_offset = start_offset;
- smwr->largestBuffer = small_region;
-
- /* copy in nfa after the smwr */
- assert(ISALIGNED_CL(smwr.get() + 1));
- memcpy(smwr.get() + 1, nfa.get(), nfa->length);
-
- DEBUG_PRINTF("smallwrite done %p\n", smwr.get());
- return smwr;
-}
-
+
+ smwr->size = size;
+ smwr->start_offset = start_offset;
+ smwr->largestBuffer = small_region;
+
+ /* copy in nfa after the smwr */
+ assert(ISALIGNED_CL(smwr.get() + 1));
+ memcpy(smwr.get() + 1, nfa.get(), nfa->length);
+
+ DEBUG_PRINTF("smallwrite done %p\n", smwr.get());
+ return smwr;
+}
+
set<ReportID> SmallWriteBuildImpl::all_reports() const {
set<ReportID> reports;
if (poisoned) {
@@ -958,6 +958,6 @@ set<ReportID> SmallWriteBuildImpl::all_reports() const {
insert(&reports, ::ue2::all_reports(lit_trie_nocase));
return reports;
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/smallwrite/smallwrite_build.h b/contrib/libs/hyperscan/src/smallwrite/smallwrite_build.h
index 552e7dc1a4..648b13db79 100644
--- a/contrib/libs/hyperscan/src/smallwrite/smallwrite_build.h
+++ b/contrib/libs/hyperscan/src/smallwrite/smallwrite_build.h
@@ -1,80 +1,80 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef SMWR_BUILD_H
-#define SMWR_BUILD_H
-
-/**
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SMWR_BUILD_H
+#define SMWR_BUILD_H
+
+/**
* \file
* \brief Small-write engine build interface.
*
* Everything you ever needed to feed literals in and get a SmallWriteEngine
* out. This header should be everything needed by the rest of UE2.
- */
-
-#include "ue2common.h"
+ */
+
+#include "ue2common.h"
#include "util/bytecode_ptr.h"
#include "util/noncopyable.h"
-
+
#include <memory>
#include <set>
-
-struct SmallWriteEngine;
-
-namespace ue2 {
-
-struct CompileContext;
-struct ue2_literal;
+
+struct SmallWriteEngine;
+
+namespace ue2 {
+
+struct CompileContext;
+struct ue2_literal;
class ExpressionInfo;
class NGHolder;
class ReportManager;
-
+
/**
* Abstract interface intended for callers from elsewhere in the tree, real
* underlying implementation is SmallWriteBuildImpl in smwr_build_impl.h.
*/
class SmallWriteBuild : noncopyable {
-public:
- virtual ~SmallWriteBuild();
-
+public:
+ virtual ~SmallWriteBuild();
+
virtual bytecode_ptr<SmallWriteEngine> build(u32 roseQuality) = 0;
-
+
virtual void add(const NGHolder &g, const ExpressionInfo &expr) = 0;
- virtual void add(const ue2_literal &literal, ReportID r) = 0;
+ virtual void add(const ue2_literal &literal, ReportID r) = 0;
virtual std::set<ReportID> all_reports() const = 0;
-};
-
+};
+
/** \brief Construct a usable SmallWrite builder. */
std::unique_ptr<SmallWriteBuild>
makeSmallWriteBuilder(size_t num_patterns, const ReportManager &rm,
const CompileContext &cc);
-
-} // namespace ue2
-
-#endif // SMWR_BUILD_H
+
+} // namespace ue2
+
+#endif // SMWR_BUILD_H
diff --git a/contrib/libs/hyperscan/src/smallwrite/smallwrite_internal.h b/contrib/libs/hyperscan/src/smallwrite/smallwrite_internal.h
index 5980632881..8f350dbeaa 100644
--- a/contrib/libs/hyperscan/src/smallwrite/smallwrite_internal.h
+++ b/contrib/libs/hyperscan/src/smallwrite/smallwrite_internal.h
@@ -1,53 +1,53 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef SMALLWRITE_INTERNAL_H
-#define SMALLWRITE_INTERNAL_H
-
-#include "ue2common.h"
-
-// Runtime structure header for SmallWrite.
-struct ALIGN_CL_DIRECTIVE SmallWriteEngine {
- u32 largestBuffer; /**< largest buffer that can be considered small write */
- u32 start_offset; /**< where to start scanning in the buffer. */
- u32 size; /**< size of the small write engine in bytes (including the nfa) */
-};
-
-struct NFA;
-
-static really_inline
-const struct NFA *getSmwrNfa(const struct SmallWriteEngine *smwr) {
- assert(smwr);
- const struct NFA *n
- = (const struct NFA *)((const char *)smwr + sizeof(*smwr));
- assert(ISALIGNED_CL(n));
- return n;
-}
-
-#endif // SMALLWRITE_INTERNAL_H
-
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SMALLWRITE_INTERNAL_H
+#define SMALLWRITE_INTERNAL_H
+
+#include "ue2common.h"
+
+// Runtime structure header for SmallWrite.
+struct ALIGN_CL_DIRECTIVE SmallWriteEngine {
+ u32 largestBuffer; /**< largest buffer that can be considered small write */
+ u32 start_offset; /**< where to start scanning in the buffer. */
+ u32 size; /**< size of the small write engine in bytes (including the nfa) */
+};
+
+struct NFA;
+
+static really_inline
+const struct NFA *getSmwrNfa(const struct SmallWriteEngine *smwr) {
+ assert(smwr);
+ const struct NFA *n
+ = (const struct NFA *)((const char *)smwr + sizeof(*smwr));
+ assert(ISALIGNED_CL(n));
+ return n;
+}
+
+#endif // SMALLWRITE_INTERNAL_H
+
diff --git a/contrib/libs/hyperscan/src/som/slot_manager.cpp b/contrib/libs/hyperscan/src/som/slot_manager.cpp
index 3c9309a5ee..d97e8fc1d7 100644
--- a/contrib/libs/hyperscan/src/som/slot_manager.cpp
+++ b/contrib/libs/hyperscan/src/som/slot_manager.cpp
@@ -1,255 +1,255 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief SOM Slot Manager.
- */
-
-#include "slot_manager.h"
-
-#include "slot_manager_internal.h"
-#include "ue2common.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_is_equal.h"
-#include "nfagraph/ng_som_util.h"
-#include "nfagraph/ng_region.h"
-#include "util/charreach.h"
+ * \brief SOM Slot Manager.
+ */
+
+#include "slot_manager.h"
+
+#include "slot_manager_internal.h"
+#include "ue2common.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_is_equal.h"
+#include "nfagraph/ng_som_util.h"
+#include "nfagraph/ng_region.h"
+#include "util/charreach.h"
#include "util/hash.h"
-#include "util/make_unique.h"
-#include "util/dump_charclass.h"
-#include "util/verify_types.h"
-
-#include <cassert>
-#include <deque>
-#include <utility>
-
-using namespace std;
-
-namespace ue2 {
-
-/** \brief Define this to disable the cache and have everyone get their own
- * SOM slot. */
-//#define NO_SLOT_CACHING
-
-SlotCacheEntry::SlotCacheEntry(const NGHolder &prefix_in,
- const CharReach &escapes_in, u32 parent_in,
- bool is_reset_in, u32 slot_in)
- : prefix(cloneHolder(prefix_in)), escapes(escapes_in),
- parent_slot(parent_in), is_reset(is_reset_in), slot(slot_in) {}
-
-size_t SlotEntryHasher::operator()(const SlotCacheEntry &e) const {
- assert(e.prefix);
-
+#include "util/make_unique.h"
+#include "util/dump_charclass.h"
+#include "util/verify_types.h"
+
+#include <cassert>
+#include <deque>
+#include <utility>
+
+using namespace std;
+
+namespace ue2 {
+
+/** \brief Define this to disable the cache and have everyone get their own
+ * SOM slot. */
+//#define NO_SLOT_CACHING
+
+SlotCacheEntry::SlotCacheEntry(const NGHolder &prefix_in,
+ const CharReach &escapes_in, u32 parent_in,
+ bool is_reset_in, u32 slot_in)
+ : prefix(cloneHolder(prefix_in)), escapes(escapes_in),
+ parent_slot(parent_in), is_reset(is_reset_in), slot(slot_in) {}
+
+size_t SlotEntryHasher::operator()(const SlotCacheEntry &e) const {
+ assert(e.prefix);
+
size_t v = hash_all(hash_holder(*e.prefix), e.parent_slot,
e.is_reset, e.escapes);
-
- DEBUG_PRINTF("%zu vertices, parent_slot=%u, escapes=%s, is_reset=%d "
- "hashes to %zx\n", num_vertices(*e.prefix), e.parent_slot,
- describeClass(e.escapes, 10, CC_OUT_TEXT).c_str(),
- (int)e.is_reset, v);
- return v;
-}
-
-bool SlotEntryEqual::operator()(const SlotCacheEntry &a,
- const SlotCacheEntry &b) const {
- assert(a.prefix);
- assert(b.prefix);
- return a.parent_slot == b.parent_slot
- && a.is_reset == b.is_reset
- && a.escapes == b.escapes
- && is_equal(*a.prefix, *b.prefix);
- // NOTE: slot not compared.
-}
-
-void SlotCache::insert(const NGHolder &prefix, const CharReach &escapes,
- u32 parent_slot, bool is_reset, u32 slot) {
- store.emplace(prefix, escapes, parent_slot, is_reset, slot);
-}
-
-const SlotCacheEntry *SlotCache::find(const NGHolder &prefix,
- const CharReach &escapes, u32 parent_slot,
- bool is_reset) {
- SlotCacheEntry entry(prefix, escapes, parent_slot, is_reset,
- 0 /* unused for searching with SlotEntryEqual */);
- CacheStore::const_iterator it = store.find(entry);
- if (it != store.end()) {
- return &(*it);
- }
- return nullptr;
-}
-
-SomSlotManager::SomSlotManager(u8 p)
- : nextSomSlot(0), cache(ue2::make_unique<SlotCache>()), historyRequired(0),
- precision(p) {}
-
-SomSlotManager::~SomSlotManager() { }
-
-u32 SomSlotManager::getSomSlot(const NGHolder &prefix,
- const CharReach &escapes, bool is_reset,
- u32 parent_slot) {
- assert(parent_slot == NO_PARENT || parent_slot < nextSomSlot);
-
- DEBUG_PRINTF("prefix with %zu vertices, parent_slot=%u\n",
- num_vertices(prefix), parent_slot);
- DEBUG_PRINTF("nextSomSlot=%u\n", nextSomSlot);
-
-#ifdef NO_SLOT_CACHING
- return nextSomSlot++;
-#endif
-
- const SlotCacheEntry *entry =
- cache->find(prefix, escapes, parent_slot, is_reset);
- if (entry) {
- DEBUG_PRINTF("cache hit: slot %u\n", entry->slot);
- return entry->slot;
- }
-
- DEBUG_PRINTF("cache miss: handing out new slot %u\n", nextSomSlot);
- cache->insert(prefix, escapes, parent_slot, is_reset, nextSomSlot);
- return nextSomSlot++;
-}
-
-u32 SomSlotManager::getInitialResetSomSlot(const NGHolder &prefix,
- const NGHolder &g,
+
+ DEBUG_PRINTF("%zu vertices, parent_slot=%u, escapes=%s, is_reset=%d "
+ "hashes to %zx\n", num_vertices(*e.prefix), e.parent_slot,
+ describeClass(e.escapes, 10, CC_OUT_TEXT).c_str(),
+ (int)e.is_reset, v);
+ return v;
+}
+
+bool SlotEntryEqual::operator()(const SlotCacheEntry &a,
+ const SlotCacheEntry &b) const {
+ assert(a.prefix);
+ assert(b.prefix);
+ return a.parent_slot == b.parent_slot
+ && a.is_reset == b.is_reset
+ && a.escapes == b.escapes
+ && is_equal(*a.prefix, *b.prefix);
+ // NOTE: slot not compared.
+}
+
+void SlotCache::insert(const NGHolder &prefix, const CharReach &escapes,
+ u32 parent_slot, bool is_reset, u32 slot) {
+ store.emplace(prefix, escapes, parent_slot, is_reset, slot);
+}
+
+const SlotCacheEntry *SlotCache::find(const NGHolder &prefix,
+ const CharReach &escapes, u32 parent_slot,
+ bool is_reset) {
+ SlotCacheEntry entry(prefix, escapes, parent_slot, is_reset,
+ 0 /* unused for searching with SlotEntryEqual */);
+ CacheStore::const_iterator it = store.find(entry);
+ if (it != store.end()) {
+ return &(*it);
+ }
+ return nullptr;
+}
+
+SomSlotManager::SomSlotManager(u8 p)
+ : nextSomSlot(0), cache(ue2::make_unique<SlotCache>()), historyRequired(0),
+ precision(p) {}
+
+SomSlotManager::~SomSlotManager() { }
+
+u32 SomSlotManager::getSomSlot(const NGHolder &prefix,
+ const CharReach &escapes, bool is_reset,
+ u32 parent_slot) {
+ assert(parent_slot == NO_PARENT || parent_slot < nextSomSlot);
+
+ DEBUG_PRINTF("prefix with %zu vertices, parent_slot=%u\n",
+ num_vertices(prefix), parent_slot);
+ DEBUG_PRINTF("nextSomSlot=%u\n", nextSomSlot);
+
+#ifdef NO_SLOT_CACHING
+ return nextSomSlot++;
+#endif
+
+ const SlotCacheEntry *entry =
+ cache->find(prefix, escapes, parent_slot, is_reset);
+ if (entry) {
+ DEBUG_PRINTF("cache hit: slot %u\n", entry->slot);
+ return entry->slot;
+ }
+
+ DEBUG_PRINTF("cache miss: handing out new slot %u\n", nextSomSlot);
+ cache->insert(prefix, escapes, parent_slot, is_reset, nextSomSlot);
+ return nextSomSlot++;
+}
+
+u32 SomSlotManager::getInitialResetSomSlot(const NGHolder &prefix,
+ const NGHolder &g,
const unordered_map<NFAVertex, u32> &region_map,
- u32 last_sent_region, bool *prefix_already_implemented) {
- DEBUG_PRINTF("getting initial reset; last sent region %u\n",
- last_sent_region);
- assert(last_sent_region);
- assert(!hasBigCycles(prefix));
- *prefix_already_implemented = false;
-
-#ifdef NO_SLOT_CACHING
- return nextSomSlot++;
-#endif
-
- shared_ptr<const NGHolder> pp = cloneHolder(prefix);
- assert(hash_holder(*pp) == hash_holder(prefix));
-
- auto hs_it = cache->initial_prefixes.find(pp);
- if (hs_it != cache->initial_prefixes.end()) {
- DEBUG_PRINTF("pulling from cache\n");
- pp = *hs_it;
- } else {
- DEBUG_PRINTF("storing in cache entry %zu, hash=%llu\n",
- cache->initial_prefixes.size(), hash_holder(*pp));
- cache->initial_prefixes.insert(pp);
- }
-
- // Clone a copy of g (and its region map) that we will be able to store
- // later on.
- shared_ptr<NGHolder> gg = make_shared<NGHolder>();
+ u32 last_sent_region, bool *prefix_already_implemented) {
+ DEBUG_PRINTF("getting initial reset; last sent region %u\n",
+ last_sent_region);
+ assert(last_sent_region);
+ assert(!hasBigCycles(prefix));
+ *prefix_already_implemented = false;
+
+#ifdef NO_SLOT_CACHING
+ return nextSomSlot++;
+#endif
+
+ shared_ptr<const NGHolder> pp = cloneHolder(prefix);
+ assert(hash_holder(*pp) == hash_holder(prefix));
+
+ auto hs_it = cache->initial_prefixes.find(pp);
+ if (hs_it != cache->initial_prefixes.end()) {
+ DEBUG_PRINTF("pulling from cache\n");
+ pp = *hs_it;
+ } else {
+ DEBUG_PRINTF("storing in cache entry %zu, hash=%llu\n",
+ cache->initial_prefixes.size(), hash_holder(*pp));
+ cache->initial_prefixes.insert(pp);
+ }
+
+ // Clone a copy of g (and its region map) that we will be able to store
+ // later on.
+ shared_ptr<NGHolder> gg = make_shared<NGHolder>();
unordered_map<NFAVertex, NFAVertex> orig_to_copy;
- cloneHolder(*gg, g, &orig_to_copy);
+ cloneHolder(*gg, g, &orig_to_copy);
unordered_map<NFAVertex, u32> gg_region_map;
- for (const auto &m : region_map) {
- assert(contains(region_map, m.first));
- gg_region_map.emplace(orig_to_copy.at(m.first), m.second);
- }
-
- u32 first_bad_region = ~0U;
- UNUSED bool rv = sentClearsTail(g, region_map, *pp, last_sent_region,
- &first_bad_region);
- assert(!rv || first_bad_region == ~0U);
-
- InitialResetInfo *ir = nullptr;
-
- for (auto &reset : cache->initial_resets) {
- /* is this prefix already in our list? */
- auto has_prefix_func =
- [&pp](const InitialResetEntry &e) { return e.sent == pp; };
- bool already_seen_prefix =
- find_if(reset.entries.begin(), reset.entries.end(),
- has_prefix_func) != reset.entries.end();
-
- for (auto &e : reset.entries) {
- u32 temp = 0;
- /* we don't need to test against sentinels which are identical to
- * our current one as races don't matter and we know it clears
- * sufficiently. */
- if (e.sent != pp &&
- !sentClearsTail(g, region_map, *e.sent, last_sent_region - 1,
- &temp) &&
- (temp < first_bad_region || first_bad_region == ~0U)) {
- goto try_next;
- }
-
- /* if we have already seen the prefix it must be fine */
- if (!already_seen_prefix &&
- !sentClearsTail(*e.body, e.body_regions, prefix,
- e.sent_region - 1, &temp) &&
- (temp < e.first_bad_region || e.first_bad_region == ~0U)) {
- goto try_next;
- }
- }
- DEBUG_PRINTF("sharing\n");
- if (already_seen_prefix) {
- /* if we have already created this prefix using this som slot, we
- * can avoid creating another copy of the prefix. */
- *prefix_already_implemented = true;
- }
- ir = &reset;
- goto found;
- try_next:;
- }
-
- cache->initial_resets.emplace_back(nextSomSlot++);
- ir = &cache->initial_resets.back();
-
-found:
- ir->entries.emplace_back(pp, gg, gg_region_map, last_sent_region,
- first_bad_region);
- return ir->slot;
-}
-
-u32 SomSlotManager::getPrivateSomSlot(void) {
- return nextSomSlot++;
-}
-
-void SomSlotManager::rollbackSomTo(u32 num) {
- assert(nextSomSlot >= num);
- nextSomSlot = num;
-}
-
-u32 SomSlotManager::numSomSlots() const {
- return nextSomSlot;
-}
-
+ for (const auto &m : region_map) {
+ assert(contains(region_map, m.first));
+ gg_region_map.emplace(orig_to_copy.at(m.first), m.second);
+ }
+
+ u32 first_bad_region = ~0U;
+ UNUSED bool rv = sentClearsTail(g, region_map, *pp, last_sent_region,
+ &first_bad_region);
+ assert(!rv || first_bad_region == ~0U);
+
+ InitialResetInfo *ir = nullptr;
+
+ for (auto &reset : cache->initial_resets) {
+ /* is this prefix already in our list? */
+ auto has_prefix_func =
+ [&pp](const InitialResetEntry &e) { return e.sent == pp; };
+ bool already_seen_prefix =
+ find_if(reset.entries.begin(), reset.entries.end(),
+ has_prefix_func) != reset.entries.end();
+
+ for (auto &e : reset.entries) {
+ u32 temp = 0;
+ /* we don't need to test against sentinels which are identical to
+ * our current one as races don't matter and we know it clears
+ * sufficiently. */
+ if (e.sent != pp &&
+ !sentClearsTail(g, region_map, *e.sent, last_sent_region - 1,
+ &temp) &&
+ (temp < first_bad_region || first_bad_region == ~0U)) {
+ goto try_next;
+ }
+
+ /* if we have already seen the prefix it must be fine */
+ if (!already_seen_prefix &&
+ !sentClearsTail(*e.body, e.body_regions, prefix,
+ e.sent_region - 1, &temp) &&
+ (temp < e.first_bad_region || e.first_bad_region == ~0U)) {
+ goto try_next;
+ }
+ }
+ DEBUG_PRINTF("sharing\n");
+ if (already_seen_prefix) {
+ /* if we have already created this prefix using this som slot, we
+ * can avoid creating another copy of the prefix. */
+ *prefix_already_implemented = true;
+ }
+ ir = &reset;
+ goto found;
+ try_next:;
+ }
+
+ cache->initial_resets.emplace_back(nextSomSlot++);
+ ir = &cache->initial_resets.back();
+
+found:
+ ir->entries.emplace_back(pp, gg, gg_region_map, last_sent_region,
+ first_bad_region);
+ return ir->slot;
+}
+
+u32 SomSlotManager::getPrivateSomSlot(void) {
+ return nextSomSlot++;
+}
+
+void SomSlotManager::rollbackSomTo(u32 num) {
+ assert(nextSomSlot >= num);
+ nextSomSlot = num;
+}
+
+u32 SomSlotManager::numSomSlots() const {
+ return nextSomSlot;
+}
+
u32 SomSlotManager::addRevNfa(bytecode_ptr<NFA> nfa, u32 maxWidth) {
- u32 rv = verify_u32(rev_nfas.size());
- rev_nfas.push_back(move(nfa));
-
- // A rev nfa commits us to having enough history around to handle its
- // max width.
- historyRequired = max(historyRequired, maxWidth);
-
- return rv;
-}
-
-} // namespace ue2
+ u32 rv = verify_u32(rev_nfas.size());
+ rev_nfas.push_back(move(nfa));
+
+ // A rev nfa commits us to having enough history around to handle its
+ // max width.
+ historyRequired = max(historyRequired, maxWidth);
+
+ return rv;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/som/slot_manager.h b/contrib/libs/hyperscan/src/som/slot_manager.h
index 2283af33c1..e5b2d794c0 100644
--- a/contrib/libs/hyperscan/src/som/slot_manager.h
+++ b/contrib/libs/hyperscan/src/som/slot_manager.h
@@ -1,118 +1,118 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief SOM Slot Manager.
- */
-
-#ifndef SLOT_MANAGER_H
-#define SLOT_MANAGER_H
-
-#include "ue2common.h"
+ * \brief SOM Slot Manager.
+ */
+
+#ifndef SLOT_MANAGER_H
+#define SLOT_MANAGER_H
+
+#include "ue2common.h"
#include "nfagraph/ng_holder.h"
#include "util/bytecode_ptr.h"
#include "util/noncopyable.h"
-
-#include <deque>
-#include <memory>
+
+#include <deque>
+#include <memory>
#include <unordered_map>
-
-struct NFA;
-
-namespace ue2 {
-
-class CharReach;
-class NGHolder;
-struct Grey;
-struct SlotCache;
-
-/** \brief SOM slot manager. Used to hand out SOM slots and track their
- * relationships during SOM construction. Also stores reverse NFAs used for
- * SOM. */
+
+struct NFA;
+
+namespace ue2 {
+
+class CharReach;
+class NGHolder;
+struct Grey;
+struct SlotCache;
+
+/** \brief SOM slot manager. Used to hand out SOM slots and track their
+ * relationships during SOM construction. Also stores reverse NFAs used for
+ * SOM. */
class SomSlotManager : noncopyable {
-public:
- explicit SomSlotManager(u8 precision);
- ~SomSlotManager();
-
- /** \brief Sentinel value used to specify that a slot has no parent. */
- static constexpr u32 NO_PARENT = ~0;
-
- u32 getSomSlot(const NGHolder &prefix, const CharReach &escapes,
- bool is_reset, u32 parent_slot);
-
- /** prefix must be acting as a resetting sentinel and should be a dag (if
- * not how are we establish som?) */
- u32 getInitialResetSomSlot(const NGHolder &prefix, const NGHolder &g,
+public:
+ explicit SomSlotManager(u8 precision);
+ ~SomSlotManager();
+
+ /** \brief Sentinel value used to specify that a slot has no parent. */
+ static constexpr u32 NO_PARENT = ~0;
+
+ u32 getSomSlot(const NGHolder &prefix, const CharReach &escapes,
+ bool is_reset, u32 parent_slot);
+
+ /** prefix must be acting as a resetting sentinel and should be a dag (if
+ * not how are we establish som?) */
+ u32 getInitialResetSomSlot(const NGHolder &prefix, const NGHolder &g,
const std::unordered_map<NFAVertex, u32> &region_map,
- u32 last_sent_region,
- bool *prefix_already_implemented);
-
- u32 getPrivateSomSlot(void);
-
- void rollbackSomTo(u32 num);
-
- u32 numSomSlots() const;
-
+ u32 last_sent_region,
+ bool *prefix_already_implemented);
+
+ u32 getPrivateSomSlot(void);
+
+ void rollbackSomTo(u32 num);
+
+ u32 numSomSlots() const;
+
const std::deque<bytecode_ptr<NFA>> &getRevNfas() const {
- return rev_nfas;
- }
-
+ return rev_nfas;
+ }
+
u32 addRevNfa(bytecode_ptr<NFA> nfa, u32 maxWidth);
-
- u32 somHistoryRequired() const { return historyRequired; }
-
- u32 somPrecision() const { return precision; }
-
- void somPrecision(u32 p) {
- precision = p;
- }
-
-private:
- u32 nextSomSlot;
- std::unique_ptr<SlotCache> cache;
-
- /** \brief Reverse NFAs used for SOM support. */
+
+ u32 somHistoryRequired() const { return historyRequired; }
+
+ u32 somPrecision() const { return precision; }
+
+ void somPrecision(u32 p) {
+ precision = p;
+ }
+
+private:
+ u32 nextSomSlot;
+ std::unique_ptr<SlotCache> cache;
+
+ /** \brief Reverse NFAs used for SOM support. */
std::deque<bytecode_ptr<NFA>> rev_nfas;
-
- /** \brief In streaming mode, the amount of history we've committed to
- * using for SOM rev NFAs. */
- u32 historyRequired;
-
- /** \brief Number of bytes of SOM precision requested by the user, zero if
- * not in SOM mode. */
- u32 precision;
-
-#ifdef DUMP_SUPPORT
- friend void dumpSomSlotManager(const SomSlotManager &ssm, const Grey &grey);
-#endif
-};
-
-} // namespace ue2
-
-#endif
+
+ /** \brief In streaming mode, the amount of history we've committed to
+ * using for SOM rev NFAs. */
+ u32 historyRequired;
+
+ /** \brief Number of bytes of SOM precision requested by the user, zero if
+ * not in SOM mode. */
+ u32 precision;
+
+#ifdef DUMP_SUPPORT
+ friend void dumpSomSlotManager(const SomSlotManager &ssm, const Grey &grey);
+#endif
+};
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/som/slot_manager_dump.h b/contrib/libs/hyperscan/src/som/slot_manager_dump.h
index 0c6d942982..3a0371e215 100644
--- a/contrib/libs/hyperscan/src/som/slot_manager_dump.h
+++ b/contrib/libs/hyperscan/src/som/slot_manager_dump.h
@@ -1,51 +1,51 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef SLOT_MANAGER_DUMP_H
-#define SLOT_MANAGER_DUMP_H
-
-namespace ue2 {
-
-class SomSlotManager;
-struct Grey;
-
-#ifdef DUMP_SUPPORT
-
-void dumpSomSlotManager(const SomSlotManager &ssm, const Grey &grey);
-
-#else
-
-static inline UNUSED
-void dumpSomSlotManager(const SomSlotManager &, const Grey &) {
-}
-
-#endif
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SLOT_MANAGER_DUMP_H
+#define SLOT_MANAGER_DUMP_H
+
+namespace ue2 {
+
+class SomSlotManager;
+struct Grey;
+
+#ifdef DUMP_SUPPORT
+
+void dumpSomSlotManager(const SomSlotManager &ssm, const Grey &grey);
+
+#else
+
+static inline UNUSED
+void dumpSomSlotManager(const SomSlotManager &, const Grey &) {
+}
+
+#endif
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/som/slot_manager_internal.h b/contrib/libs/hyperscan/src/som/slot_manager_internal.h
index 3ee863d978..7e1fecc7e6 100644
--- a/contrib/libs/hyperscan/src/som/slot_manager_internal.h
+++ b/contrib/libs/hyperscan/src/som/slot_manager_internal.h
@@ -1,107 +1,107 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef SLOT_MANAGER_INTERNAL_H
-#define SLOT_MANAGER_INTERNAL_H
-
-#include "nfagraph/ng.h"
-#include "nfagraph/ng_is_equal.h"
-#include "util/charreach.h"
-#include "ue2common.h"
-
-#include <memory>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SLOT_MANAGER_INTERNAL_H
+#define SLOT_MANAGER_INTERNAL_H
+
+#include "nfagraph/ng.h"
+#include "nfagraph/ng_is_equal.h"
+#include "util/charreach.h"
+#include "ue2common.h"
+
+#include <memory>
#include <unordered_map>
#include <unordered_set>
-#include <vector>
-
-namespace ue2 {
-
-struct InitialResetEntry {
- InitialResetEntry(std::shared_ptr<const NGHolder> sent_in,
- std::shared_ptr<const NGHolder> body_in,
+#include <vector>
+
+namespace ue2 {
+
+struct InitialResetEntry {
+ InitialResetEntry(std::shared_ptr<const NGHolder> sent_in,
+ std::shared_ptr<const NGHolder> body_in,
const std::unordered_map<NFAVertex, u32> &body_regions_in,
- u32 sent_region_in, u32 first_bad_region_in)
- : sent(sent_in), body(body_in), body_regions(body_regions_in),
- sent_region(sent_region_in), first_bad_region(first_bad_region_in) {}
-
- std::shared_ptr<const NGHolder> sent;
- std::shared_ptr<const NGHolder> body;
+ u32 sent_region_in, u32 first_bad_region_in)
+ : sent(sent_in), body(body_in), body_regions(body_regions_in),
+ sent_region(sent_region_in), first_bad_region(first_bad_region_in) {}
+
+ std::shared_ptr<const NGHolder> sent;
+ std::shared_ptr<const NGHolder> body;
std::unordered_map<NFAVertex, u32> body_regions;
- u32 sent_region;
- u32 first_bad_region; /* ~0U if it must cover the whole g */
-};
-
-struct InitialResetInfo {
- explicit InitialResetInfo(u32 slot_in) : slot(slot_in) {}
-
- std::vector<InitialResetEntry> entries;
- u32 slot;
-};
-
-struct SlotCacheEntry {
- // We store our own copy of the prefix so we control its lifetime. A
- // pointer is used so that this entry can be placed in STL containers, as
- // NGHolder is not copy-constructible.
- SlotCacheEntry(const NGHolder &prefix_in, const CharReach &escapes_in,
- u32 parent_in, bool is_reset_in, u32 slot_in);
-
- std::unique_ptr<const NGHolder> prefix;
- CharReach escapes;
- u32 parent_slot;
- bool is_reset;
- u32 slot;
-};
-
-struct SlotEntryHasher {
- size_t operator()(const SlotCacheEntry &e) const;
-};
-
-struct SlotEntryEqual {
- bool operator()(const SlotCacheEntry &a, const SlotCacheEntry &b) const;
-};
-
-struct SlotCache {
+ u32 sent_region;
+ u32 first_bad_region; /* ~0U if it must cover the whole g */
+};
+
+struct InitialResetInfo {
+ explicit InitialResetInfo(u32 slot_in) : slot(slot_in) {}
+
+ std::vector<InitialResetEntry> entries;
+ u32 slot;
+};
+
+struct SlotCacheEntry {
+ // We store our own copy of the prefix so we control its lifetime. A
+ // pointer is used so that this entry can be placed in STL containers, as
+ // NGHolder is not copy-constructible.
+ SlotCacheEntry(const NGHolder &prefix_in, const CharReach &escapes_in,
+ u32 parent_in, bool is_reset_in, u32 slot_in);
+
+ std::unique_ptr<const NGHolder> prefix;
+ CharReach escapes;
+ u32 parent_slot;
+ bool is_reset;
+ u32 slot;
+};
+
+struct SlotEntryHasher {
+ size_t operator()(const SlotCacheEntry &e) const;
+};
+
+struct SlotEntryEqual {
+ bool operator()(const SlotCacheEntry &a, const SlotCacheEntry &b) const;
+};
+
+struct SlotCache {
typedef std::unordered_set<SlotCacheEntry, SlotEntryHasher,
- SlotEntryEqual> CacheStore;
-
- void insert(const NGHolder &prefix, const CharReach &escapes,
- u32 parent_slot, bool is_reset, u32 slot);
-
- const SlotCacheEntry *find(const NGHolder &prefix, const CharReach &escapes,
- u32 parent_slot, bool is_reset);
-
- CacheStore store;
-
+ SlotEntryEqual> CacheStore;
+
+ void insert(const NGHolder &prefix, const CharReach &escapes,
+ u32 parent_slot, bool is_reset, u32 slot);
+
+ const SlotCacheEntry *find(const NGHolder &prefix, const CharReach &escapes,
+ u32 parent_slot, bool is_reset);
+
+ CacheStore store;
+
std::unordered_set<std::shared_ptr<const NGHolder>, NGHolderHasher,
NGHolderEqual> initial_prefixes;
- std::vector<InitialResetInfo> initial_resets;
-};
-
-} // namespace ue2
-
-#endif
+ std::vector<InitialResetInfo> initial_resets;
+};
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/som/som.h b/contrib/libs/hyperscan/src/som/som.h
index 6050003cda..e759cf0a25 100644
--- a/contrib/libs/hyperscan/src/som/som.h
+++ b/contrib/libs/hyperscan/src/som/som.h
@@ -1,47 +1,47 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Common SOM definitions.
- */
-
-#ifndef UE2_SOM_H
-#define UE2_SOM_H
-
+ * \brief Common SOM definitions.
+ */
+
+#ifndef UE2_SOM_H
+#define UE2_SOM_H
+
namespace ue2 {
-/** \brief Enumeration specifying a start of match behaviour. */
-enum som_type {
- SOM_NONE, //!< No SOM required
- SOM_LEFT //!< Exact leftmost SOM
-};
-
+/** \brief Enumeration specifying a start of match behaviour. */
+enum som_type {
+ SOM_NONE, //!< No SOM required
+ SOM_LEFT //!< Exact leftmost SOM
+};
+
} // namespace ue2
-#endif // UE2_SOM_H
+#endif // UE2_SOM_H
diff --git a/contrib/libs/hyperscan/src/som/som_runtime.c b/contrib/libs/hyperscan/src/som/som_runtime.c
index 4f7f3f2342..1a868efc97 100644
--- a/contrib/libs/hyperscan/src/som/som_runtime.c
+++ b/contrib/libs/hyperscan/src/som/som_runtime.c
@@ -1,535 +1,535 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief SOM runtime code.
- *
- *
- * Runtime code for SOM handling called by the Rose callback adaptors.
- *
- * Note:
- * Races between escapes making a som loc writeable and attempts to write to it
- * at the same to_offset are always resolved as if the escape arrived first
- * and then the request to write to that location.
- */
-
-#include "hs_internal.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief SOM runtime code.
+ *
+ *
+ * Runtime code for SOM handling called by the Rose callback adaptors.
+ *
+ * Note:
+ * Races between escapes making a som loc writeable and attempts to write to it
+ * at the same to_offset are always resolved as if the escape arrived first
+ * and then the request to write to that location.
+ */
+
+#include "hs_internal.h"
#include "som_operation.h"
-#include "som_runtime.h"
-#include "scratch.h"
-#include "ue2common.h"
-#include "rose/rose_internal.h"
-#include "nfa/nfa_api.h"
-#include "nfa/nfa_internal.h"
-#include "util/fatbit.h"
-#include "util/multibit.h"
-
-static really_inline
-void setSomLoc(struct fatbit *som_set_now, u64a *som_store, u32 som_store_count,
+#include "som_runtime.h"
+#include "scratch.h"
+#include "ue2common.h"
+#include "rose/rose_internal.h"
+#include "nfa/nfa_api.h"
+#include "nfa/nfa_internal.h"
+#include "util/fatbit.h"
+#include "util/multibit.h"
+
+static really_inline
+void setSomLoc(struct fatbit *som_set_now, u64a *som_store, u32 som_store_count,
const struct som_operation *ri, u64a to_offset) {
- /* validity handled by callers */
- assert(to_offset >= ri->aux.somDistance);
- u64a start_offset = to_offset - ri->aux.somDistance;
- u32 som_loc = ri->onmatch;
-
- /* resolve any races for matches at this point in favour of the earliest som
- */
- if (!fatbit_set(som_set_now, som_store_count, som_loc)) {
- som_store[som_loc] = start_offset;
- } else {
- LIMIT_TO_AT_MOST(&som_store[som_loc], start_offset);
- }
-
- DEBUG_PRINTF("som_store[%u] set to %llu\n", som_loc, som_store[som_loc]);
-}
-
-static really_inline
-char ok_and_mark_if_write(u8 *som_store_valid, struct fatbit *som_set_now,
- u8 *som_store_writable, u32 som_store_count,
- u32 loc) {
- return !mmbit_set(som_store_valid, som_store_count, loc) /* unwritten */
- || fatbit_isset(som_set_now, som_store_count, loc) /* write here, need
- * to resolve race */
- || mmbit_isset(som_store_writable, som_store_count, loc); /* writable */
-}
-
-static really_inline
-char ok_and_mark_if_unset(u8 *som_store_valid, struct fatbit *som_set_now,
- u32 som_store_count, u32 loc) {
- return !mmbit_set(som_store_valid, som_store_count, loc) /* unwritten */
- || fatbit_isset(som_set_now, som_store_count, loc); /* write here, need
- * to resolve race */
-}
-
-static
+ /* validity handled by callers */
+ assert(to_offset >= ri->aux.somDistance);
+ u64a start_offset = to_offset - ri->aux.somDistance;
+ u32 som_loc = ri->onmatch;
+
+ /* resolve any races for matches at this point in favour of the earliest som
+ */
+ if (!fatbit_set(som_set_now, som_store_count, som_loc)) {
+ som_store[som_loc] = start_offset;
+ } else {
+ LIMIT_TO_AT_MOST(&som_store[som_loc], start_offset);
+ }
+
+ DEBUG_PRINTF("som_store[%u] set to %llu\n", som_loc, som_store[som_loc]);
+}
+
+static really_inline
+char ok_and_mark_if_write(u8 *som_store_valid, struct fatbit *som_set_now,
+ u8 *som_store_writable, u32 som_store_count,
+ u32 loc) {
+ return !mmbit_set(som_store_valid, som_store_count, loc) /* unwritten */
+ || fatbit_isset(som_set_now, som_store_count, loc) /* write here, need
+ * to resolve race */
+ || mmbit_isset(som_store_writable, som_store_count, loc); /* writable */
+}
+
+static really_inline
+char ok_and_mark_if_unset(u8 *som_store_valid, struct fatbit *som_set_now,
+ u32 som_store_count, u32 loc) {
+ return !mmbit_set(som_store_valid, som_store_count, loc) /* unwritten */
+ || fatbit_isset(som_set_now, som_store_count, loc); /* write here, need
+ * to resolve race */
+}
+
+static
int somRevCallback(UNUSED u64a start, u64a end, ReportID id, void *ctx) {
DEBUG_PRINTF("offset=%llu, id=%u\n", end, id);
-
- // We use the id to store the offset adjustment (for assertions like a
- // leading \b or multiline mode).
- assert(id <= 1);
- u64a *from_offset = ctx;
+
+ // We use the id to store the offset adjustment (for assertions like a
+ // leading \b or multiline mode).
+ assert(id <= 1);
+ u64a *from_offset = ctx;
LIMIT_TO_AT_MOST(from_offset, end + id);
- return 1; // continue matching.
-}
-
-static really_inline
-const struct NFA *getSomRevNFA(const struct RoseEngine *t, u32 i) {
- assert(t->somRevOffsetOffset);
- const u32 *rev_offsets
- = (const u32 *)((const u8 *)t + t->somRevOffsetOffset);
- u32 nfa_offset = rev_offsets[i];
- assert(nfa_offset && nfa_offset < t->size);
- const struct NFA *n = (const struct NFA *)(((const u8 *)t + nfa_offset));
- assert(ISALIGNED(n));
-
- return n;
-}
-
-static
+ return 1; // continue matching.
+}
+
+static really_inline
+const struct NFA *getSomRevNFA(const struct RoseEngine *t, u32 i) {
+ assert(t->somRevOffsetOffset);
+ const u32 *rev_offsets
+ = (const u32 *)((const u8 *)t + t->somRevOffsetOffset);
+ u32 nfa_offset = rev_offsets[i];
+ assert(nfa_offset && nfa_offset < t->size);
+ const struct NFA *n = (const struct NFA *)(((const u8 *)t + nfa_offset));
+ assert(ISALIGNED(n));
+
+ return n;
+}
+
+static
void runRevNfa(struct hs_scratch *scratch, const struct som_operation *ri,
- const u64a to_offset, u64a *from_offset) {
- struct core_info *ci = &scratch->core_info;
-
- DEBUG_PRINTF("buf has %zu bytes total, history has %zu\n",
- ci->len, ci->hlen);
-
- u32 nfa_idx = ri->aux.revNfaIndex;
- DEBUG_PRINTF("run rev nfa %u from to_offset=%llu\n", nfa_idx, to_offset);
- const struct NFA *nfa = getSomRevNFA(ci->rose, nfa_idx);
-
- assert(nfa->maxWidth); // No inf width rev NFAs.
-
- size_t buf_bytes = to_offset - ci->buf_offset;
- size_t history_bytes = ci->hlen;
-
- DEBUG_PRINTF("nfa min/max widths [%u,%u], %zu in buffer, %zu in history\n",
- nfa->minWidth, nfa->maxWidth, buf_bytes, history_bytes);
- assert(nfa->minWidth <= buf_bytes + history_bytes);
-
- const u8 *buf = ci->buf;
- const u8 *hbuf = ci->hbuf;
-
- // Work out if we need to scan any history as well.
- if (history_bytes && buf_bytes < nfa->maxWidth) {
- assert(hbuf);
- size_t remainder = nfa->maxWidth - buf_bytes;
- if (remainder < history_bytes) {
- hbuf += history_bytes - remainder;
- history_bytes = remainder;
- }
- }
-
- DEBUG_PRINTF("scanning %zu from buffer and %zu from history\n", buf_bytes,
- history_bytes);
-
- *from_offset = to_offset;
-
- nfaBlockExecReverse(nfa, to_offset, buf, buf_bytes, hbuf, history_bytes,
+ const u64a to_offset, u64a *from_offset) {
+ struct core_info *ci = &scratch->core_info;
+
+ DEBUG_PRINTF("buf has %zu bytes total, history has %zu\n",
+ ci->len, ci->hlen);
+
+ u32 nfa_idx = ri->aux.revNfaIndex;
+ DEBUG_PRINTF("run rev nfa %u from to_offset=%llu\n", nfa_idx, to_offset);
+ const struct NFA *nfa = getSomRevNFA(ci->rose, nfa_idx);
+
+ assert(nfa->maxWidth); // No inf width rev NFAs.
+
+ size_t buf_bytes = to_offset - ci->buf_offset;
+ size_t history_bytes = ci->hlen;
+
+ DEBUG_PRINTF("nfa min/max widths [%u,%u], %zu in buffer, %zu in history\n",
+ nfa->minWidth, nfa->maxWidth, buf_bytes, history_bytes);
+ assert(nfa->minWidth <= buf_bytes + history_bytes);
+
+ const u8 *buf = ci->buf;
+ const u8 *hbuf = ci->hbuf;
+
+ // Work out if we need to scan any history as well.
+ if (history_bytes && buf_bytes < nfa->maxWidth) {
+ assert(hbuf);
+ size_t remainder = nfa->maxWidth - buf_bytes;
+ if (remainder < history_bytes) {
+ hbuf += history_bytes - remainder;
+ history_bytes = remainder;
+ }
+ }
+
+ DEBUG_PRINTF("scanning %zu from buffer and %zu from history\n", buf_bytes,
+ history_bytes);
+
+ *from_offset = to_offset;
+
+ nfaBlockExecReverse(nfa, to_offset, buf, buf_bytes, hbuf, history_bytes,
somRevCallback, from_offset);
-
- assert(*from_offset <= to_offset);
-}
-
-static really_inline
-void setSomLocRevNfa(struct hs_scratch *scratch, struct fatbit *som_set_now,
- u64a *som_store, u32 som_store_count,
+
+ assert(*from_offset <= to_offset);
+}
+
+static really_inline
+void setSomLocRevNfa(struct hs_scratch *scratch, struct fatbit *som_set_now,
+ u64a *som_store, u32 som_store_count,
const struct som_operation *ri, u64a to_offset) {
- /* validity handled by callers */
- u64a from_offset = 0;
- runRevNfa(scratch, ri, to_offset, &from_offset);
-
- u32 som_loc = ri->onmatch;
-
- /* resolve any races for matches at this point in favour of the earliest som
- */
- if (!fatbit_set(som_set_now, som_store_count, som_loc)) {
- som_store[som_loc] = from_offset;
- } else {
- LIMIT_TO_AT_MOST(&som_store[som_loc], from_offset);
- }
-
- DEBUG_PRINTF("som_store[%u] set to %llu\n", som_loc, som_store[som_loc]);
-}
-
-void handleSomInternal(struct hs_scratch *scratch,
+ /* validity handled by callers */
+ u64a from_offset = 0;
+ runRevNfa(scratch, ri, to_offset, &from_offset);
+
+ u32 som_loc = ri->onmatch;
+
+ /* resolve any races for matches at this point in favour of the earliest som
+ */
+ if (!fatbit_set(som_set_now, som_store_count, som_loc)) {
+ som_store[som_loc] = from_offset;
+ } else {
+ LIMIT_TO_AT_MOST(&som_store[som_loc], from_offset);
+ }
+
+ DEBUG_PRINTF("som_store[%u] set to %llu\n", som_loc, som_store[som_loc]);
+}
+
+void handleSomInternal(struct hs_scratch *scratch,
const struct som_operation *ri, const u64a to_offset) {
- assert(scratch);
- assert(ri);
- DEBUG_PRINTF("-->som action required at %llu\n", to_offset);
-
- // SOM handling at scan time operates on data held in scratch. In
- // streaming mode, this data is read from / written out to stream state at
- // stream write boundaries.
-
- struct core_info *ci = &scratch->core_info;
- const struct RoseEngine *rose = ci->rose;
- assert(rose->hasSom);
-
- const u32 som_store_count = rose->somLocationCount;
- u8 *som_store_valid = (u8 *)ci->state + rose->stateOffsets.somValid;
- u8 *som_store_writable = (u8 *)ci->state + rose->stateOffsets.somWritable;
- struct fatbit *som_set_now = scratch->som_set_now;
- struct fatbit *som_attempted_set = scratch->som_attempted_set;
- u64a *som_store = scratch->som_store;
- u64a *som_failed_store = scratch->som_attempted_store;
-
- if (to_offset != scratch->som_set_now_offset) {
- assert(scratch->som_set_now_offset == ~0ULL
- || to_offset > scratch->som_set_now_offset);
- DEBUG_PRINTF("setting som_set_now_offset=%llu\n", to_offset);
- fatbit_clear(som_set_now);
- fatbit_clear(som_attempted_set);
- scratch->som_set_now_offset = to_offset;
- }
-
- switch (ri->type) {
+ assert(scratch);
+ assert(ri);
+ DEBUG_PRINTF("-->som action required at %llu\n", to_offset);
+
+ // SOM handling at scan time operates on data held in scratch. In
+ // streaming mode, this data is read from / written out to stream state at
+ // stream write boundaries.
+
+ struct core_info *ci = &scratch->core_info;
+ const struct RoseEngine *rose = ci->rose;
+ assert(rose->hasSom);
+
+ const u32 som_store_count = rose->somLocationCount;
+ u8 *som_store_valid = (u8 *)ci->state + rose->stateOffsets.somValid;
+ u8 *som_store_writable = (u8 *)ci->state + rose->stateOffsets.somWritable;
+ struct fatbit *som_set_now = scratch->som_set_now;
+ struct fatbit *som_attempted_set = scratch->som_attempted_set;
+ u64a *som_store = scratch->som_store;
+ u64a *som_failed_store = scratch->som_attempted_store;
+
+ if (to_offset != scratch->som_set_now_offset) {
+ assert(scratch->som_set_now_offset == ~0ULL
+ || to_offset > scratch->som_set_now_offset);
+ DEBUG_PRINTF("setting som_set_now_offset=%llu\n", to_offset);
+ fatbit_clear(som_set_now);
+ fatbit_clear(som_attempted_set);
+ scratch->som_set_now_offset = to_offset;
+ }
+
+ switch (ri->type) {
case SOM_INTERNAL_LOC_SET:
DEBUG_PRINTF("SOM_INTERNAL_LOC_SET\n");
- mmbit_set(som_store_valid, som_store_count, ri->onmatch);
- setSomLoc(som_set_now, som_store, som_store_count, ri, to_offset);
- return;
+ mmbit_set(som_store_valid, som_store_count, ri->onmatch);
+ setSomLoc(som_set_now, som_store, som_store_count, ri, to_offset);
+ return;
case SOM_INTERNAL_LOC_SET_IF_UNSET:
DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_IF_UNSET\n");
- if (ok_and_mark_if_unset(som_store_valid, som_set_now, som_store_count,
- ri->onmatch)) {
- setSomLoc(som_set_now, som_store, som_store_count, ri, to_offset);
- }
- return;
+ if (ok_and_mark_if_unset(som_store_valid, som_set_now, som_store_count,
+ ri->onmatch)) {
+ setSomLoc(som_set_now, som_store, som_store_count, ri, to_offset);
+ }
+ return;
case SOM_INTERNAL_LOC_SET_IF_WRITABLE: {
- u32 slot = ri->onmatch;
+ u32 slot = ri->onmatch;
DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_IF_WRITABLE\n");
- if (ok_and_mark_if_write(som_store_valid, som_set_now,
- som_store_writable, som_store_count, slot)) {
- setSomLoc(som_set_now, som_store, som_store_count, ri, to_offset);
- mmbit_unset(som_store_writable, som_store_count, slot);
- } else {
- /* not writable, stash as an attempted write in case we are
- * racing our escape. */
- DEBUG_PRINTF("not writable, stashing attempt\n");
- assert(to_offset >= ri->aux.somDistance);
- u64a start_offset = to_offset - ri->aux.somDistance;
-
- if (!fatbit_set(som_attempted_set, som_store_count, slot)) {
- som_failed_store[slot] = start_offset;
- } else {
- LIMIT_TO_AT_MOST(&som_failed_store[slot], start_offset);
- }
- DEBUG_PRINTF("som_failed_store[%u] = %llu\n", slot,
- som_failed_store[slot]);
- }
- return;
- }
+ if (ok_and_mark_if_write(som_store_valid, som_set_now,
+ som_store_writable, som_store_count, slot)) {
+ setSomLoc(som_set_now, som_store, som_store_count, ri, to_offset);
+ mmbit_unset(som_store_writable, som_store_count, slot);
+ } else {
+ /* not writable, stash as an attempted write in case we are
+ * racing our escape. */
+ DEBUG_PRINTF("not writable, stashing attempt\n");
+ assert(to_offset >= ri->aux.somDistance);
+ u64a start_offset = to_offset - ri->aux.somDistance;
+
+ if (!fatbit_set(som_attempted_set, som_store_count, slot)) {
+ som_failed_store[slot] = start_offset;
+ } else {
+ LIMIT_TO_AT_MOST(&som_failed_store[slot], start_offset);
+ }
+ DEBUG_PRINTF("som_failed_store[%u] = %llu\n", slot,
+ som_failed_store[slot]);
+ }
+ return;
+ }
case SOM_INTERNAL_LOC_SET_REV_NFA:
DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_REV_NFA\n");
- mmbit_set(som_store_valid, som_store_count, ri->onmatch);
- setSomLocRevNfa(scratch, som_set_now, som_store, som_store_count, ri,
- to_offset);
- return;
+ mmbit_set(som_store_valid, som_store_count, ri->onmatch);
+ setSomLocRevNfa(scratch, som_set_now, som_store, som_store_count, ri,
+ to_offset);
+ return;
case SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET:
DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET\n");
- if (ok_and_mark_if_unset(som_store_valid, som_set_now, som_store_count,
- ri->onmatch)) {
- setSomLocRevNfa(scratch, som_set_now, som_store, som_store_count,
- ri, to_offset);
- }
- return;
+ if (ok_and_mark_if_unset(som_store_valid, som_set_now, som_store_count,
+ ri->onmatch)) {
+ setSomLocRevNfa(scratch, som_set_now, som_store, som_store_count,
+ ri, to_offset);
+ }
+ return;
case SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE: {
- u32 slot = ri->onmatch;
+ u32 slot = ri->onmatch;
DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_IF_WRITABLE\n");
- if (ok_and_mark_if_write(som_store_valid, som_set_now,
- som_store_writable, som_store_count, slot)) {
- setSomLocRevNfa(scratch, som_set_now, som_store, som_store_count,
- ri, to_offset);
- mmbit_unset(som_store_writable, som_store_count, slot);
- } else {
- /* not writable, stash as an attempted write in case we are
- * racing our escape. */
- DEBUG_PRINTF("not writable, stashing attempt\n");
-
- u64a from_offset = 0;
- runRevNfa(scratch, ri, to_offset, &from_offset);
-
- if (!fatbit_set(som_attempted_set, som_store_count, slot)) {
- som_failed_store[slot] = from_offset;
- } else {
- LIMIT_TO_AT_MOST(&som_failed_store[slot], from_offset);
- }
- DEBUG_PRINTF("som_failed_store[%u] = %llu\n", slot,
- som_failed_store[slot]);
- }
- return;
- }
+ if (ok_and_mark_if_write(som_store_valid, som_set_now,
+ som_store_writable, som_store_count, slot)) {
+ setSomLocRevNfa(scratch, som_set_now, som_store, som_store_count,
+ ri, to_offset);
+ mmbit_unset(som_store_writable, som_store_count, slot);
+ } else {
+ /* not writable, stash as an attempted write in case we are
+ * racing our escape. */
+ DEBUG_PRINTF("not writable, stashing attempt\n");
+
+ u64a from_offset = 0;
+ runRevNfa(scratch, ri, to_offset, &from_offset);
+
+ if (!fatbit_set(som_attempted_set, som_store_count, slot)) {
+ som_failed_store[slot] = from_offset;
+ } else {
+ LIMIT_TO_AT_MOST(&som_failed_store[slot], from_offset);
+ }
+ DEBUG_PRINTF("som_failed_store[%u] = %llu\n", slot,
+ som_failed_store[slot]);
+ }
+ return;
+ }
case SOM_INTERNAL_LOC_COPY: {
- u32 slot_in = ri->aux.somDistance;
- u32 slot_out = ri->onmatch;
+ u32 slot_in = ri->aux.somDistance;
+ u32 slot_out = ri->onmatch;
DEBUG_PRINTF("SOM_INTERNAL_LOC_COPY S[%u] = S[%u]\n", slot_out,
- slot_in);
- assert(mmbit_isset(som_store_valid, som_store_count, slot_in));
- mmbit_set(som_store_valid, som_store_count, slot_out);
- fatbit_set(som_set_now, som_store_count, slot_out);
- som_store[slot_out] = som_store[slot_in];
-
- return;
- }
+ slot_in);
+ assert(mmbit_isset(som_store_valid, som_store_count, slot_in));
+ mmbit_set(som_store_valid, som_store_count, slot_out);
+ fatbit_set(som_set_now, som_store_count, slot_out);
+ som_store[slot_out] = som_store[slot_in];
+
+ return;
+ }
case SOM_INTERNAL_LOC_COPY_IF_WRITABLE: {
- u32 slot_in = ri->aux.somDistance;
- u32 slot_out = ri->onmatch;
+ u32 slot_in = ri->aux.somDistance;
+ u32 slot_out = ri->onmatch;
DEBUG_PRINTF("SOM_INTERNAL_LOC_COPY_IF_WRITABLE S[%u] = S[%u]\n",
- slot_out, slot_in);
- assert(mmbit_isset(som_store_valid, som_store_count, slot_in));
- if (ok_and_mark_if_write(som_store_valid, som_set_now,
- som_store_writable, som_store_count,
- slot_out)) {
- DEBUG_PRINTF("copy, set som_store[%u]=%llu\n", slot_out,
- som_store[slot_in]);
- som_store[slot_out] = som_store[slot_in];
- fatbit_set(som_set_now, som_store_count, slot_out);
- mmbit_unset(som_store_writable, som_store_count, slot_out);
- } else {
- /* not writable, stash as an attempted write in case we are
- * racing our escape */
- DEBUG_PRINTF("not writable, stashing attempt\n");
- fatbit_set(som_attempted_set, som_store_count, slot_out);
- som_failed_store[slot_out] = som_store[slot_in];
- DEBUG_PRINTF("som_failed_store[%u] = %llu\n", slot_out,
- som_failed_store[slot_out]);
- }
- return;
- }
+ slot_out, slot_in);
+ assert(mmbit_isset(som_store_valid, som_store_count, slot_in));
+ if (ok_and_mark_if_write(som_store_valid, som_set_now,
+ som_store_writable, som_store_count,
+ slot_out)) {
+ DEBUG_PRINTF("copy, set som_store[%u]=%llu\n", slot_out,
+ som_store[slot_in]);
+ som_store[slot_out] = som_store[slot_in];
+ fatbit_set(som_set_now, som_store_count, slot_out);
+ mmbit_unset(som_store_writable, som_store_count, slot_out);
+ } else {
+ /* not writable, stash as an attempted write in case we are
+ * racing our escape */
+ DEBUG_PRINTF("not writable, stashing attempt\n");
+ fatbit_set(som_attempted_set, som_store_count, slot_out);
+ som_failed_store[slot_out] = som_store[slot_in];
+ DEBUG_PRINTF("som_failed_store[%u] = %llu\n", slot_out,
+ som_failed_store[slot_out]);
+ }
+ return;
+ }
case SOM_INTERNAL_LOC_MAKE_WRITABLE: {
- u32 slot = ri->onmatch;
+ u32 slot = ri->onmatch;
DEBUG_PRINTF("SOM_INTERNAL_LOC_MAKE_WRITABLE\n");
- /* if just written to the loc, ignore the racing escape */
- if (fatbit_isset(som_set_now, som_store_count, slot)) {
- DEBUG_PRINTF("just written\n");
- return;
- }
- if (fatbit_isset(som_attempted_set, som_store_count, slot)) {
- /* writes were waiting for an escape to arrive */
- DEBUG_PRINTF("setting som_store[%u] = %llu from "
- "som_failed_store[%u]\n", slot, som_failed_store[slot],
- slot);
- som_store[slot] = som_failed_store[slot];
- fatbit_set(som_set_now, som_store_count, slot);
- return;
- }
- mmbit_set(som_store_writable, som_store_count, slot);
- return;
- }
- default:
- DEBUG_PRINTF("unknown report type!\n");
- break;
- }
-
+ /* if just written to the loc, ignore the racing escape */
+ if (fatbit_isset(som_set_now, som_store_count, slot)) {
+ DEBUG_PRINTF("just written\n");
+ return;
+ }
+ if (fatbit_isset(som_attempted_set, som_store_count, slot)) {
+ /* writes were waiting for an escape to arrive */
+ DEBUG_PRINTF("setting som_store[%u] = %llu from "
+ "som_failed_store[%u]\n", slot, som_failed_store[slot],
+ slot);
+ som_store[slot] = som_failed_store[slot];
+ fatbit_set(som_set_now, som_store_count, slot);
+ return;
+ }
+ mmbit_set(som_store_writable, som_store_count, slot);
+ return;
+ }
+ default:
+ DEBUG_PRINTF("unknown report type!\n");
+ break;
+ }
+
// All valid som_operation types should be handled and returned above.
- assert(0);
- return;
-}
-
-// Returns the SOM offset.
-u64a handleSomExternal(struct hs_scratch *scratch,
+ assert(0);
+ return;
+}
+
+// Returns the SOM offset.
+u64a handleSomExternal(struct hs_scratch *scratch,
const struct som_operation *ri,
- const u64a to_offset) {
- assert(scratch);
- assert(ri);
-
- // SOM handling at scan time operates on data held in scratch. In
- // streaming mode, this data is read from / written out to stream state at
- // stream write boundaries.
-
- struct core_info *ci = &scratch->core_info;
- const struct RoseEngine *rose = ci->rose;
- assert(rose->hasSom);
-
- switch (ri->type) {
+ const u64a to_offset) {
+ assert(scratch);
+ assert(ri);
+
+ // SOM handling at scan time operates on data held in scratch. In
+ // streaming mode, this data is read from / written out to stream state at
+ // stream write boundaries.
+
+ struct core_info *ci = &scratch->core_info;
+ const struct RoseEngine *rose = ci->rose;
+ assert(rose->hasSom);
+
+ switch (ri->type) {
case SOM_EXTERNAL_CALLBACK_REL:
DEBUG_PRINTF("SOM_EXTERNAL_CALLBACK_REL: som is %llu chars back\n",
- ri->aux.somDistance);
- assert(to_offset >= ri->aux.somDistance);
- return to_offset - ri->aux.somDistance;
+ ri->aux.somDistance);
+ assert(to_offset >= ri->aux.somDistance);
+ return to_offset - ri->aux.somDistance;
case SOM_EXTERNAL_CALLBACK_ABS:
DEBUG_PRINTF("SOM_EXTERNAL_CALLBACK_ABS: som is at %llu\n",
- ri->aux.somDistance);
- assert(to_offset >= ri->aux.somDistance);
- return ri->aux.somDistance;
+ ri->aux.somDistance);
+ assert(to_offset >= ri->aux.somDistance);
+ return ri->aux.somDistance;
case SOM_EXTERNAL_CALLBACK_STORED: {
- const u64a *som_store = scratch->som_store;
- u32 slot = ri->aux.somDistance;
+ const u64a *som_store = scratch->som_store;
+ u32 slot = ri->aux.somDistance;
DEBUG_PRINTF("SOM_EXTERNAL_CALLBACK_STORED: <- som_store[%u]=%llu\n",
- slot, som_store[slot]);
-
- UNUSED const u32 som_store_count = rose->somLocationCount;
- UNUSED const u8 *som_store_valid = (u8 *)ci->state
- + rose->stateOffsets.somValid;
-
- assert(mmbit_isset(som_store_valid, som_store_count, slot));
- return som_store[slot];
- }
+ slot, som_store[slot]);
+
+ UNUSED const u32 som_store_count = rose->somLocationCount;
+ UNUSED const u8 *som_store_valid = (u8 *)ci->state
+ + rose->stateOffsets.somValid;
+
+ assert(mmbit_isset(som_store_valid, som_store_count, slot));
+ return som_store[slot];
+ }
case SOM_EXTERNAL_CALLBACK_REV_NFA: {
DEBUG_PRINTF("SOM_EXTERNAL_CALLBACK_REV_NFA\n");
- u64a from_offset = 0;
- runRevNfa(scratch, ri, to_offset, &from_offset);
- return from_offset;
- }
- default:
- DEBUG_PRINTF("unknown report type!\n");
- break;
- }
-
+ u64a from_offset = 0;
+ runRevNfa(scratch, ri, to_offset, &from_offset);
+ return from_offset;
+ }
+ default:
+ DEBUG_PRINTF("unknown report type!\n");
+ break;
+ }
+
// All valid som_operation types should be handled and returned above.
- assert(0);
- return 0;
-}
-
-void setSomFromSomAware(struct hs_scratch *scratch,
+ assert(0);
+ return 0;
+}
+
+void setSomFromSomAware(struct hs_scratch *scratch,
const struct som_operation *ri, u64a from_offset,
- u64a to_offset) {
- assert(scratch);
- assert(ri);
- assert(to_offset);
+ u64a to_offset) {
+ assert(scratch);
+ assert(ri);
+ assert(to_offset);
assert(ri->type == SOM_INTERNAL_LOC_SET_FROM
|| ri->type == SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE);
-
- struct core_info *ci = &scratch->core_info;
- const struct RoseEngine *rose = ci->rose;
- assert(rose->hasSom);
-
- const u32 som_store_count = rose->somLocationCount;
- u8 *som_store_valid = (u8 *)ci->state + rose->stateOffsets.somValid;
- u8 *som_store_writable = (u8 *)ci->state + rose->stateOffsets.somWritable;
- struct fatbit *som_set_now = scratch->som_set_now;
- struct fatbit *som_attempted_set = scratch->som_attempted_set;
- u64a *som_store = scratch->som_store;
- u64a *som_failed_store = scratch->som_attempted_store;
-
- if (to_offset != scratch->som_set_now_offset) {
- DEBUG_PRINTF("setting som_set_now_offset=%llu\n", to_offset);
- fatbit_clear(som_set_now);
- fatbit_clear(som_attempted_set);
- scratch->som_set_now_offset = to_offset;
- }
-
+
+ struct core_info *ci = &scratch->core_info;
+ const struct RoseEngine *rose = ci->rose;
+ assert(rose->hasSom);
+
+ const u32 som_store_count = rose->somLocationCount;
+ u8 *som_store_valid = (u8 *)ci->state + rose->stateOffsets.somValid;
+ u8 *som_store_writable = (u8 *)ci->state + rose->stateOffsets.somWritable;
+ struct fatbit *som_set_now = scratch->som_set_now;
+ struct fatbit *som_attempted_set = scratch->som_attempted_set;
+ u64a *som_store = scratch->som_store;
+ u64a *som_failed_store = scratch->som_attempted_store;
+
+ if (to_offset != scratch->som_set_now_offset) {
+ DEBUG_PRINTF("setting som_set_now_offset=%llu\n", to_offset);
+ fatbit_clear(som_set_now);
+ fatbit_clear(som_attempted_set);
+ scratch->som_set_now_offset = to_offset;
+ }
+
if (ri->type == SOM_INTERNAL_LOC_SET_FROM) {
DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_FROM\n");
- mmbit_set(som_store_valid, som_store_count, ri->onmatch);
- setSomLoc(som_set_now, som_store, som_store_count, ri, from_offset);
- } else {
+ mmbit_set(som_store_valid, som_store_count, ri->onmatch);
+ setSomLoc(som_set_now, som_store, som_store_count, ri, from_offset);
+ } else {
DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE\n");
- if (ok_and_mark_if_write(som_store_valid, som_set_now,
- som_store_writable, som_store_count,
- ri->onmatch)) {
- setSomLoc(som_set_now, som_store, som_store_count, ri, from_offset);
- mmbit_unset(som_store_writable, som_store_count, ri->onmatch);
- } else {
- /* not writable, stash as an attempted write in case we are
- * racing our escape. */
- DEBUG_PRINTF("not writable, stashing attempt\n");
- assert(to_offset >= ri->aux.somDistance);
- u32 som_loc = ri->onmatch;
-
- if (!fatbit_set(som_attempted_set, som_store_count, ri->onmatch)) {
- som_failed_store[som_loc] = from_offset;
- } else {
- LIMIT_TO_AT_MOST(&som_failed_store[som_loc], from_offset);
- }
- DEBUG_PRINTF("som_failed_store[%u] = %llu\n", som_loc,
- som_failed_store[som_loc]);
- }
- }
-}
-
-static really_inline
-int clearSomLog(struct hs_scratch *scratch, u64a offset, struct fatbit *log,
- const u64a *starts) {
- DEBUG_PRINTF("at %llu\n", offset);
- struct core_info *ci = &scratch->core_info;
- const struct RoseEngine *rose = ci->rose;
- const u32 dkeyCount = rose->dkeyCount;
- const u32 *dkey_to_report = (const u32 *)
- ((const char *)rose + rose->invDkeyOffset);
- u32 flags = 0;
-#ifndef RELEASE_BUILD
- if (scratch->deduper.current_report_offset != offset) {
- flags |= HS_MATCH_FLAG_ADJUSTED;
- }
-#endif
-
- for (u32 it = fatbit_iterate(log, dkeyCount, MMB_INVALID);
- it != MMB_INVALID; it = fatbit_iterate(log, dkeyCount, it)) {
- u64a from_offset = starts[it];
- u32 onmatch = dkey_to_report[it];
- int halt = ci->userCallback(onmatch, from_offset, offset, flags,
- ci->userContext);
- if (halt) {
+ if (ok_and_mark_if_write(som_store_valid, som_set_now,
+ som_store_writable, som_store_count,
+ ri->onmatch)) {
+ setSomLoc(som_set_now, som_store, som_store_count, ri, from_offset);
+ mmbit_unset(som_store_writable, som_store_count, ri->onmatch);
+ } else {
+ /* not writable, stash as an attempted write in case we are
+ * racing our escape. */
+ DEBUG_PRINTF("not writable, stashing attempt\n");
+ assert(to_offset >= ri->aux.somDistance);
+ u32 som_loc = ri->onmatch;
+
+ if (!fatbit_set(som_attempted_set, som_store_count, ri->onmatch)) {
+ som_failed_store[som_loc] = from_offset;
+ } else {
+ LIMIT_TO_AT_MOST(&som_failed_store[som_loc], from_offset);
+ }
+ DEBUG_PRINTF("som_failed_store[%u] = %llu\n", som_loc,
+ som_failed_store[som_loc]);
+ }
+ }
+}
+
+static really_inline
+int clearSomLog(struct hs_scratch *scratch, u64a offset, struct fatbit *log,
+ const u64a *starts) {
+ DEBUG_PRINTF("at %llu\n", offset);
+ struct core_info *ci = &scratch->core_info;
+ const struct RoseEngine *rose = ci->rose;
+ const u32 dkeyCount = rose->dkeyCount;
+ const u32 *dkey_to_report = (const u32 *)
+ ((const char *)rose + rose->invDkeyOffset);
+ u32 flags = 0;
+#ifndef RELEASE_BUILD
+ if (scratch->deduper.current_report_offset != offset) {
+ flags |= HS_MATCH_FLAG_ADJUSTED;
+ }
+#endif
+
+ for (u32 it = fatbit_iterate(log, dkeyCount, MMB_INVALID);
+ it != MMB_INVALID; it = fatbit_iterate(log, dkeyCount, it)) {
+ u64a from_offset = starts[it];
+ u32 onmatch = dkey_to_report[it];
+ int halt = ci->userCallback(onmatch, from_offset, offset, flags,
+ ci->userContext);
+ if (halt) {
ci->status |= STATUS_TERMINATED;
- return 1;
- }
- }
- fatbit_clear(log);
- return 0;
-}
-
-int flushStoredSomMatches_i(struct hs_scratch *scratch, u64a offset) {
- DEBUG_PRINTF("flush som matches\n");
- int halt = 0;
-
- assert(!told_to_stop_matching(scratch));
-
- if (scratch->deduper.current_report_offset == ~0ULL) {
- /* no matches recorded yet; just need to clear the logs */
- fatbit_clear(scratch->deduper.som_log[0]);
- fatbit_clear(scratch->deduper.som_log[1]);
- scratch->deduper.som_log_dirty = 0;
- return 0;
- }
-
- /* fire any reports from the logs and clear them */
- if (offset == scratch->deduper.current_report_offset + 1) {
- struct fatbit *done_log = scratch->deduper.som_log[offset % 2];
- u64a *done_starts = scratch->deduper.som_start_log[offset % 2];
-
- halt = clearSomLog(scratch, scratch->deduper.current_report_offset - 1,
- done_log, done_starts);
- scratch->deduper.som_log_dirty >>= 1;
- } else {
- /* need to report both logs */
- u64a f_offset = scratch->deduper.current_report_offset - 1;
- u64a s_offset = scratch->deduper.current_report_offset;
- struct fatbit *first_log = scratch->deduper.som_log[f_offset % 2];
- u64a *first_starts = scratch->deduper.som_start_log[f_offset % 2];
- struct fatbit *second_log = scratch->deduper.som_log[s_offset % 2];
- u64a *second_starts = scratch->deduper.som_start_log[s_offset % 2];
-
- halt = clearSomLog(scratch, f_offset, first_log, first_starts) ||
- clearSomLog(scratch, s_offset, second_log, second_starts);
- scratch->deduper.som_log_dirty = 0;
- }
-
- return halt;
-}
+ return 1;
+ }
+ }
+ fatbit_clear(log);
+ return 0;
+}
+
+int flushStoredSomMatches_i(struct hs_scratch *scratch, u64a offset) {
+ DEBUG_PRINTF("flush som matches\n");
+ int halt = 0;
+
+ assert(!told_to_stop_matching(scratch));
+
+ if (scratch->deduper.current_report_offset == ~0ULL) {
+ /* no matches recorded yet; just need to clear the logs */
+ fatbit_clear(scratch->deduper.som_log[0]);
+ fatbit_clear(scratch->deduper.som_log[1]);
+ scratch->deduper.som_log_dirty = 0;
+ return 0;
+ }
+
+ /* fire any reports from the logs and clear them */
+ if (offset == scratch->deduper.current_report_offset + 1) {
+ struct fatbit *done_log = scratch->deduper.som_log[offset % 2];
+ u64a *done_starts = scratch->deduper.som_start_log[offset % 2];
+
+ halt = clearSomLog(scratch, scratch->deduper.current_report_offset - 1,
+ done_log, done_starts);
+ scratch->deduper.som_log_dirty >>= 1;
+ } else {
+ /* need to report both logs */
+ u64a f_offset = scratch->deduper.current_report_offset - 1;
+ u64a s_offset = scratch->deduper.current_report_offset;
+ struct fatbit *first_log = scratch->deduper.som_log[f_offset % 2];
+ u64a *first_starts = scratch->deduper.som_start_log[f_offset % 2];
+ struct fatbit *second_log = scratch->deduper.som_log[s_offset % 2];
+ u64a *second_starts = scratch->deduper.som_start_log[s_offset % 2];
+
+ halt = clearSomLog(scratch, f_offset, first_log, first_starts) ||
+ clearSomLog(scratch, s_offset, second_log, second_starts);
+ scratch->deduper.som_log_dirty = 0;
+ }
+
+ return halt;
+}
diff --git a/contrib/libs/hyperscan/src/som/som_runtime.h b/contrib/libs/hyperscan/src/som/som_runtime.h
index 7b3e2448a1..30c7ace8cb 100644
--- a/contrib/libs/hyperscan/src/som/som_runtime.h
+++ b/contrib/libs/hyperscan/src/som/som_runtime.h
@@ -1,67 +1,67 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief SOM runtime code.
- *
- * Runtime code for SOM handling called by the Rose callback adaptors.
- */
-
-#ifndef SOM_RUNTIME_H
-#define SOM_RUNTIME_H
-
-#include "scratch.h"
-#include "ue2common.h"
-
+ * \brief SOM runtime code.
+ *
+ * Runtime code for SOM handling called by the Rose callback adaptors.
+ */
+
+#ifndef SOM_RUNTIME_H
+#define SOM_RUNTIME_H
+
+#include "scratch.h"
+#include "ue2common.h"
+
struct som_operation;
-
-void handleSomInternal(struct hs_scratch *scratch,
+
+void handleSomInternal(struct hs_scratch *scratch,
const struct som_operation *ri, const u64a to_offset);
-
-// Returns the from_offset.
-u64a handleSomExternal(struct hs_scratch *scratch,
+
+// Returns the from_offset.
+u64a handleSomExternal(struct hs_scratch *scratch,
const struct som_operation *ri, const u64a to_offset);
-
-void setSomFromSomAware(struct hs_scratch *scratch,
+
+void setSomFromSomAware(struct hs_scratch *scratch,
const struct som_operation *ri, u64a from_offset,
- u64a to_offset);
-
-int flushStoredSomMatches_i(struct hs_scratch *scratch, u64a offset);
-
-static really_inline
-int flushStoredSomMatches(struct hs_scratch *scratch, u64a offset) {
- if (scratch->deduper.som_log_dirty) {
- return flushStoredSomMatches_i(scratch, offset);
- } else {
- return 0;
- }
-}
-
-#endif // SOM_RUNTIME_H
-
+ u64a to_offset);
+
+int flushStoredSomMatches_i(struct hs_scratch *scratch, u64a offset);
+
+static really_inline
+int flushStoredSomMatches(struct hs_scratch *scratch, u64a offset) {
+ if (scratch->deduper.som_log_dirty) {
+ return flushStoredSomMatches_i(scratch, offset);
+ } else {
+ return 0;
+ }
+}
+
+#endif // SOM_RUNTIME_H
+
diff --git a/contrib/libs/hyperscan/src/som/som_stream.c b/contrib/libs/hyperscan/src/som/som_stream.c
index aab5121708..93ab709edd 100644
--- a/contrib/libs/hyperscan/src/som/som_stream.c
+++ b/contrib/libs/hyperscan/src/som/som_stream.c
@@ -1,174 +1,174 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief SOM streaming runtime code.
- *
- * Code in this file handles storing and loading SOM slot information from
- * stream state.
- */
-
-#include "scratch.h"
-#include "som_stream.h"
-#include "rose/rose_internal.h"
-#include "util/multibit.h"
-
-// Sentinel values stored in stream state and used to represent an SOM distance
-// that is too far in the past to be stored in the available space in stream
-// state.
-
-#define SOM_SENTINEL_LARGE (~0ull)
-#define SOM_SENTINEL_MEDIUM (~0u)
-#define SOM_SENTINEL_SMALL ((u16)~0u)
-
-static really_inline
-void storeSomValue(void *stream_som_store, u64a som_value,
- u64a stream_offset, u8 som_size) {
- // Special case for sentinel value.
- if (som_value == SOM_SENTINEL_LARGE) {
- switch (som_size) {
- case 2:
- *(u16 *)stream_som_store = SOM_SENTINEL_SMALL;
- break;
- case 4:
- *(u32 *)stream_som_store = SOM_SENTINEL_MEDIUM;
- break;
- case 8:
- *(u64a *)stream_som_store = SOM_SENTINEL_LARGE;
- break;
- default:
- break;
- }
- return;
- }
-
- assert(som_value <= stream_offset);
- u64a rel_offset = stream_offset - som_value;
- DEBUG_PRINTF("rel_offset=%llu\n", rel_offset);
-
- switch (som_size) {
- case 2:
- rel_offset = MIN(rel_offset, SOM_SENTINEL_SMALL);
- assert(ISALIGNED_N(stream_som_store, alignof(u16)));
- *(u16 *)stream_som_store = rel_offset;
- break;
- case 4:
- rel_offset = MIN(rel_offset, SOM_SENTINEL_MEDIUM);
- assert(ISALIGNED_N(stream_som_store, alignof(u32)));
- *(u32 *)stream_som_store = rel_offset;
- break;
- case 8:
- assert(ISALIGNED_N(stream_som_store, alignof(u64a)));
- *(u64a *)stream_som_store = rel_offset;
- break;
- default:
- assert(0);
- break;
- }
-}
-
-void storeSomToStream(struct hs_scratch *scratch, const u64a offset) {
- assert(scratch);
- DEBUG_PRINTF("stream offset %llu\n", offset);
-
- struct core_info *ci = &scratch->core_info;
- const struct RoseEngine *rose = ci->rose;
-
- const u32 som_store_count = rose->somLocationCount;
- assert(som_store_count); // Caller should ensure that we have work to do.
-
- u8 *som_store_valid = (u8 *)ci->state + rose->stateOffsets.somValid;
- char *stream_som_store = ci->state + rose->stateOffsets.somLocation;
- const u64a *som_store = scratch->som_store;
- const u8 som_size = rose->somHorizon;
-
- for (u32 i = mmbit_iterate(som_store_valid, som_store_count, MMB_INVALID);
- i != MMB_INVALID;
- i = mmbit_iterate(som_store_valid, som_store_count, i)) {
- DEBUG_PRINTF("storing %llu in %u\n", som_store[i], i);
- storeSomValue(stream_som_store + (i * som_size), som_store[i],
- offset, som_size);
- }
-}
-
-static really_inline
-u64a loadSomValue(const void *stream_som_store, u64a stream_offset,
- u8 som_size) {
- u64a rel_offset;
- switch (som_size) {
- case 2:
- assert(ISALIGNED_N(stream_som_store, alignof(u16)));
- rel_offset = *(const u16 *)stream_som_store;
- if (rel_offset == SOM_SENTINEL_SMALL) {
- return SOM_SENTINEL_LARGE;
- }
- break;
- case 4:
- assert(ISALIGNED_N(stream_som_store, alignof(u32)));
- rel_offset = *(const u32 *)stream_som_store;
- if (rel_offset == SOM_SENTINEL_MEDIUM) {
- return SOM_SENTINEL_LARGE;
- }
- break;
- case 8:
- assert(ISALIGNED_N(stream_som_store, alignof(u64a)));
- rel_offset = *(const u64a *)stream_som_store;
- break;
- default:
- assert(0);
- rel_offset = 0;
- break;
- }
-
- DEBUG_PRINTF("rel_offset=%llu\n", rel_offset);
- return stream_offset - rel_offset;
-}
-
-void loadSomFromStream(struct hs_scratch *scratch, const u64a offset) {
- assert(scratch);
- DEBUG_PRINTF("stream offset %llu\n", offset);
-
- struct core_info *ci = &scratch->core_info;
- const struct RoseEngine *rose = ci->rose;
-
- const u32 som_store_count = rose->somLocationCount;
- assert(som_store_count); // Caller should ensure that we have work to do.
-
- const u8 *som_store_valid = (u8 *)ci->state + rose->stateOffsets.somValid;
- const char *stream_som_store = ci->state + rose->stateOffsets.somLocation;
- u64a *som_store = scratch->som_store;
- const u8 som_size = rose->somHorizon;
-
- for (u32 i = mmbit_iterate(som_store_valid, som_store_count, MMB_INVALID);
- i != MMB_INVALID;
- i = mmbit_iterate(som_store_valid, som_store_count, i)) {
- som_store[i] = loadSomValue(stream_som_store + (i*som_size), offset,
- som_size);
- DEBUG_PRINTF("loaded %llu from %u\n", som_store[i], i);
- }
-}
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief SOM streaming runtime code.
+ *
+ * Code in this file handles storing and loading SOM slot information from
+ * stream state.
+ */
+
+#include "scratch.h"
+#include "som_stream.h"
+#include "rose/rose_internal.h"
+#include "util/multibit.h"
+
+// Sentinel values stored in stream state and used to represent an SOM distance
+// that is too far in the past to be stored in the available space in stream
+// state.
+
+#define SOM_SENTINEL_LARGE (~0ull)
+#define SOM_SENTINEL_MEDIUM (~0u)
+#define SOM_SENTINEL_SMALL ((u16)~0u)
+
+static really_inline
+void storeSomValue(void *stream_som_store, u64a som_value,
+ u64a stream_offset, u8 som_size) {
+ // Special case for sentinel value.
+ if (som_value == SOM_SENTINEL_LARGE) {
+ switch (som_size) {
+ case 2:
+ *(u16 *)stream_som_store = SOM_SENTINEL_SMALL;
+ break;
+ case 4:
+ *(u32 *)stream_som_store = SOM_SENTINEL_MEDIUM;
+ break;
+ case 8:
+ *(u64a *)stream_som_store = SOM_SENTINEL_LARGE;
+ break;
+ default:
+ break;
+ }
+ return;
+ }
+
+ assert(som_value <= stream_offset);
+ u64a rel_offset = stream_offset - som_value;
+ DEBUG_PRINTF("rel_offset=%llu\n", rel_offset);
+
+ switch (som_size) {
+ case 2:
+ rel_offset = MIN(rel_offset, SOM_SENTINEL_SMALL);
+ assert(ISALIGNED_N(stream_som_store, alignof(u16)));
+ *(u16 *)stream_som_store = rel_offset;
+ break;
+ case 4:
+ rel_offset = MIN(rel_offset, SOM_SENTINEL_MEDIUM);
+ assert(ISALIGNED_N(stream_som_store, alignof(u32)));
+ *(u32 *)stream_som_store = rel_offset;
+ break;
+ case 8:
+ assert(ISALIGNED_N(stream_som_store, alignof(u64a)));
+ *(u64a *)stream_som_store = rel_offset;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+void storeSomToStream(struct hs_scratch *scratch, const u64a offset) {
+ assert(scratch);
+ DEBUG_PRINTF("stream offset %llu\n", offset);
+
+ struct core_info *ci = &scratch->core_info;
+ const struct RoseEngine *rose = ci->rose;
+
+ const u32 som_store_count = rose->somLocationCount;
+ assert(som_store_count); // Caller should ensure that we have work to do.
+
+ u8 *som_store_valid = (u8 *)ci->state + rose->stateOffsets.somValid;
+ char *stream_som_store = ci->state + rose->stateOffsets.somLocation;
+ const u64a *som_store = scratch->som_store;
+ const u8 som_size = rose->somHorizon;
+
+ for (u32 i = mmbit_iterate(som_store_valid, som_store_count, MMB_INVALID);
+ i != MMB_INVALID;
+ i = mmbit_iterate(som_store_valid, som_store_count, i)) {
+ DEBUG_PRINTF("storing %llu in %u\n", som_store[i], i);
+ storeSomValue(stream_som_store + (i * som_size), som_store[i],
+ offset, som_size);
+ }
+}
+
+static really_inline
+u64a loadSomValue(const void *stream_som_store, u64a stream_offset,
+ u8 som_size) {
+ u64a rel_offset;
+ switch (som_size) {
+ case 2:
+ assert(ISALIGNED_N(stream_som_store, alignof(u16)));
+ rel_offset = *(const u16 *)stream_som_store;
+ if (rel_offset == SOM_SENTINEL_SMALL) {
+ return SOM_SENTINEL_LARGE;
+ }
+ break;
+ case 4:
+ assert(ISALIGNED_N(stream_som_store, alignof(u32)));
+ rel_offset = *(const u32 *)stream_som_store;
+ if (rel_offset == SOM_SENTINEL_MEDIUM) {
+ return SOM_SENTINEL_LARGE;
+ }
+ break;
+ case 8:
+ assert(ISALIGNED_N(stream_som_store, alignof(u64a)));
+ rel_offset = *(const u64a *)stream_som_store;
+ break;
+ default:
+ assert(0);
+ rel_offset = 0;
+ break;
+ }
+
+ DEBUG_PRINTF("rel_offset=%llu\n", rel_offset);
+ return stream_offset - rel_offset;
+}
+
+void loadSomFromStream(struct hs_scratch *scratch, const u64a offset) {
+ assert(scratch);
+ DEBUG_PRINTF("stream offset %llu\n", offset);
+
+ struct core_info *ci = &scratch->core_info;
+ const struct RoseEngine *rose = ci->rose;
+
+ const u32 som_store_count = rose->somLocationCount;
+ assert(som_store_count); // Caller should ensure that we have work to do.
+
+ const u8 *som_store_valid = (u8 *)ci->state + rose->stateOffsets.somValid;
+ const char *stream_som_store = ci->state + rose->stateOffsets.somLocation;
+ u64a *som_store = scratch->som_store;
+ const u8 som_size = rose->somHorizon;
+
+ for (u32 i = mmbit_iterate(som_store_valid, som_store_count, MMB_INVALID);
+ i != MMB_INVALID;
+ i = mmbit_iterate(som_store_valid, som_store_count, i)) {
+ som_store[i] = loadSomValue(stream_som_store + (i*som_size), offset,
+ som_size);
+ DEBUG_PRINTF("loaded %llu from %u\n", som_store[i], i);
+ }
+}
diff --git a/contrib/libs/hyperscan/src/som/som_stream.h b/contrib/libs/hyperscan/src/som/som_stream.h
index ffc69bab60..8b62264d19 100644
--- a/contrib/libs/hyperscan/src/som/som_stream.h
+++ b/contrib/libs/hyperscan/src/som/som_stream.h
@@ -1,48 +1,48 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief SOM streaming runtime code.
- */
-
-#ifndef SOM_STREAM_H
-#define SOM_STREAM_H
-
-#include "ue2common.h"
-
-struct hs_scratch;
-
-/** \brief Write all SOM slot information from scratch out to stream state
- * (given the current stream offset). */
-void storeSomToStream(struct hs_scratch *scratch, const u64a offset);
-
-/** \brief Read all SOM slot information from stream state into scratch (given
- * the current stream offset). */
-void loadSomFromStream(struct hs_scratch *scratch, const u64a offset);
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief SOM streaming runtime code.
+ */
+
+#ifndef SOM_STREAM_H
+#define SOM_STREAM_H
+
+#include "ue2common.h"
+
+struct hs_scratch;
+
+/** \brief Write all SOM slot information from scratch out to stream state
+ * (given the current stream offset). */
+void storeSomToStream(struct hs_scratch *scratch, const u64a offset);
+
+/** \brief Read all SOM slot information from stream state into scratch (given
+ * the current stream offset). */
+void loadSomFromStream(struct hs_scratch *scratch, const u64a offset);
+
+#endif
diff --git a/contrib/libs/hyperscan/src/state.h b/contrib/libs/hyperscan/src/state.h
index 7c1857b1ab..9ade59db4b 100644
--- a/contrib/libs/hyperscan/src/state.h
+++ b/contrib/libs/hyperscan/src/state.h
@@ -1,69 +1,69 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Stream state data structures.
- */
-
-#ifndef STATE_H
-#define STATE_H
-
-#include "hs_runtime.h" /* match_event_handler */
-#include "ue2common.h"
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-struct RoseEngine;
-
-/** \brief Stream context: allocated for each stream.
- *
- * struct hs_stream is followed in memory by the main Rose state: history,
- * exhaustion, individual states, etc. The RoseEngine has the offsets required
- * to correctly index into the main state structure. The offsets used by the
- * RoseEngine are based on the end of the hs_stream struct as its size may
- * vary from platform to platform.
- */
-struct hs_stream {
- /** \brief The RoseEngine that this stream is matching against. */
- const struct RoseEngine *rose;
-
- /** \brief The current stream offset. */
- u64a offset;
-};
-
-#define getMultiState(hs_s) ((char *)(hs_s) + sizeof(*(hs_s)))
-#define getMultiStateConst(hs_s) ((const char *)(hs_s) + sizeof(*(hs_s)))
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Stream state data structures.
+ */
+
+#ifndef STATE_H
+#define STATE_H
+
+#include "hs_runtime.h" /* match_event_handler */
+#include "ue2common.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+struct RoseEngine;
+
+/** \brief Stream context: allocated for each stream.
+ *
+ * struct hs_stream is followed in memory by the main Rose state: history,
+ * exhaustion, individual states, etc. The RoseEngine has the offsets required
+ * to correctly index into the main state structure. The offsets used by the
+ * RoseEngine are based on the end of the hs_stream struct as its size may
+ * vary from platform to platform.
+ */
+struct hs_stream {
+ /** \brief The RoseEngine that this stream is matching against. */
+ const struct RoseEngine *rose;
+
+ /** \brief The current stream offset. */
+ u64a offset;
+};
+
+#define getMultiState(hs_s) ((char *)(hs_s) + sizeof(*(hs_s)))
+#define getMultiStateConst(hs_s) ((const char *)(hs_s) + sizeof(*(hs_s)))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/ue2common.h b/contrib/libs/hyperscan/src/ue2common.h
index 12ff56e75b..5705af7be4 100644
--- a/contrib/libs/hyperscan/src/ue2common.h
+++ b/contrib/libs/hyperscan/src/ue2common.h
@@ -1,203 +1,203 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Core UE2 global types, defines, utilities.
- *
- * NOTE WELL: this file is included into both C and C++ source code, so
- * be sure to remain compatible with both.
- */
-
-#ifndef UE2COMMON_H
-#define UE2COMMON_H
-
-#include "config.h"
-
-/* standard types used across ue2 */
-
-// We use the size_t type all over the place, usually defined in stddef.h.
-#include <stddef.h>
-// stdint.h for things like uintptr_t and friends
-#include <stdint.h>
-
-/* ick */
-#if defined(_WIN32)
-#define ALIGN_ATTR(x) __declspec(align(x))
-#else
-#define ALIGN_ATTR(x) __attribute__((aligned((x))))
-#endif
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Core UE2 global types, defines, utilities.
+ *
+ * NOTE WELL: this file is included into both C and C++ source code, so
+ * be sure to remain compatible with both.
+ */
+
+#ifndef UE2COMMON_H
+#define UE2COMMON_H
+
+#include "config.h"
+
+/* standard types used across ue2 */
+
+// We use the size_t type all over the place, usually defined in stddef.h.
+#include <stddef.h>
+// stdint.h for things like uintptr_t and friends
+#include <stdint.h>
+
+/* ick */
+#if defined(_WIN32)
+#define ALIGN_ATTR(x) __declspec(align(x))
+#else
+#define ALIGN_ATTR(x) __attribute__((aligned((x))))
+#endif
+
#define ALIGN_DIRECTIVE ALIGN_ATTR(16)
#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32)
#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64)
-
-typedef signed char s8;
-typedef unsigned char u8;
-typedef signed short s16;
-typedef unsigned short u16;
-typedef unsigned int u32;
-typedef signed int s32;
-
-/* We append the 'a' for aligned, since these aren't common, garden variety
- * 64 bit values. The alignment is necessary for structs on some platforms,
- * so we don't end up performing accidental unaligned accesses. */
+
+typedef signed char s8;
+typedef unsigned char u8;
+typedef signed short s16;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef signed int s32;
+
+/* We append the 'a' for aligned, since these aren't common, garden variety
+ * 64 bit values. The alignment is necessary for structs on some platforms,
+ * so we don't end up performing accidental unaligned accesses. */
#if defined(_WIN32) && ! defined(_WIN64)
typedef unsigned long long ALIGN_ATTR(4) u64a;
typedef signed long long ALIGN_ATTR(4) s64a;
#else
-typedef unsigned long long ALIGN_ATTR(8) u64a;
-typedef signed long long ALIGN_ATTR(8) s64a;
-#endif
-
-/* get the SIMD types */
-#include "util/simd_types.h"
-
-/** \brief Report identifier, used for internal IDs and external IDs (those
- * reported on match). */
-typedef u32 ReportID;
-
-/* Shorthand for attribute to mark a function as part of our public API.
- * Functions without this attribute will be hidden. */
-#if !defined(_WIN32)
+typedef unsigned long long ALIGN_ATTR(8) u64a;
+typedef signed long long ALIGN_ATTR(8) s64a;
+#endif
+
+/* get the SIMD types */
+#include "util/simd_types.h"
+
+/** \brief Report identifier, used for internal IDs and external IDs (those
+ * reported on match). */
+typedef u32 ReportID;
+
+/* Shorthand for attribute to mark a function as part of our public API.
+ * Functions without this attribute will be hidden. */
+#if !defined(_WIN32)
#define HS_PUBLIC_API __attribute__((visibility("default")))
-#else
-// TODO: dllexport defines for windows
-#define HS_PUBLIC_API
-#endif
-
-#define ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0]))
-
-/** \brief Shorthand for the attribute to shut gcc about unused parameters */
-#if !defined(_WIN32)
-#define UNUSED __attribute__ ((unused))
-#else
-#define UNUSED
-#endif
-
-/* really_inline forces inlining always */
-#if !defined(_WIN32)
-#if defined(HS_OPTIMIZE)
-#define really_inline inline __attribute__ ((always_inline, unused))
-#else
-#define really_inline __attribute__ ((unused))
-#endif
-
-/** no, seriously, inline it, even if building in debug mode */
-#define really_really_inline inline __attribute__ ((always_inline, unused))
-#define never_inline __attribute__ ((noinline))
-#define alignof __alignof
-#define HAVE_TYPEOF 1
-
-#else // ms windows
-#define really_inline __forceinline
-#define really_really_inline __forceinline
-#define never_inline
-#define __builtin_prefetch(...) do {} while(0)
-#if defined(__cplusplus)
-#define __typeof__ decltype
-#define HAVE_TYPEOF 1
-#else // C
-/* msvc doesn't have decltype or typeof in C */
-#define inline __inline
-#define alignof __alignof
-#endif
-#endif
-
-
-// We use C99-style "restrict".
-#ifdef _WIN32
-#ifdef __cplusplus
-#define restrict
-#else
-#define restrict __restrict
-#endif
-#else
-#define restrict __restrict
-#endif
-
-
-// Align to 16-byte boundary
-#define ROUNDUP_16(a) (((a) + 0xf) & ~0xf)
-#define ROUNDDOWN_16(a) ((a) & ~0xf)
-
-// Align to N-byte boundary
-#define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
-#define ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
-
-// Align to a cacheline - assumed to be 64 bytes
-#define ROUNDUP_CL(a) ROUNDUP_N(a, 64)
-
-// Align ptr to next N-byte boundary
-#if defined(HAVE_TYPEOF)
-#define ROUNDUP_PTR(ptr, n) (__typeof__(ptr))(ROUNDUP_N((uintptr_t)(ptr), (n)))
-#define ROUNDDOWN_PTR(ptr, n) (__typeof__(ptr))(ROUNDDOWN_N((uintptr_t)(ptr), (n)))
-#else
-#define ROUNDUP_PTR(ptr, n) (void*)(ROUNDUP_N((uintptr_t)(ptr), (n)))
-#define ROUNDDOWN_PTR(ptr, n) (void*)(ROUNDDOWN_N((uintptr_t)(ptr), (n)))
-#endif
-
-#define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n) - 1)) == 0)
-#define ISALIGNED_16(ptr) ISALIGNED_N((ptr), 16)
-#define ISALIGNED_CL(ptr) ISALIGNED_N((ptr), 64)
-#if defined(HAVE_TYPEOF)
-#define ISALIGNED(ptr) ISALIGNED_N((ptr), alignof(__typeof__(*(ptr))))
-#else
-/* we should probably avoid using this test in C */
-#define ISALIGNED(ptr) (1)
-#endif
-#define N_CHARS 256
-
-// Maximum offset representable in the 'unsigned long long' we use to return
-// offset values.
-#define MAX_OFFSET 0xffffffffffffffffULL
-
-#if !defined(MIN)
- #define MIN(a,b) ((a) < (b) ? (a) : (b))
-#endif
-#if !defined(MAX)
- #define MAX(a,b) ((a) > (b) ? (a) : (b))
-#endif
-
-#define LIMIT_TO_AT_MOST(a, b) (*(a) = MIN(*(a),(b)))
-#define ENSURE_AT_LEAST(a, b) (*(a) = MAX(*(a),(b)))
-
-#ifndef _WIN32
-#ifndef likely
- #define likely(x) __builtin_expect(!!(x), 1)
-#endif
-#ifndef unlikely
- #define unlikely(x) __builtin_expect(!!(x), 0)
-#endif
-#else
+#else
+// TODO: dllexport defines for windows
+#define HS_PUBLIC_API
+#endif
+
+#define ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0]))
+
+/** \brief Shorthand for the attribute to shut gcc about unused parameters */
+#if !defined(_WIN32)
+#define UNUSED __attribute__ ((unused))
+#else
+#define UNUSED
+#endif
+
+/* really_inline forces inlining always */
+#if !defined(_WIN32)
+#if defined(HS_OPTIMIZE)
+#define really_inline inline __attribute__ ((always_inline, unused))
+#else
+#define really_inline __attribute__ ((unused))
+#endif
+
+/** no, seriously, inline it, even if building in debug mode */
+#define really_really_inline inline __attribute__ ((always_inline, unused))
+#define never_inline __attribute__ ((noinline))
+#define alignof __alignof
+#define HAVE_TYPEOF 1
+
+#else // ms windows
+#define really_inline __forceinline
+#define really_really_inline __forceinline
+#define never_inline
+#define __builtin_prefetch(...) do {} while(0)
+#if defined(__cplusplus)
+#define __typeof__ decltype
+#define HAVE_TYPEOF 1
+#else // C
+/* msvc doesn't have decltype or typeof in C */
+#define inline __inline
+#define alignof __alignof
+#endif
+#endif
+
+
+// We use C99-style "restrict".
+#ifdef _WIN32
+#ifdef __cplusplus
+#define restrict
+#else
+#define restrict __restrict
+#endif
+#else
+#define restrict __restrict
+#endif
+
+
+// Align to 16-byte boundary
+#define ROUNDUP_16(a) (((a) + 0xf) & ~0xf)
+#define ROUNDDOWN_16(a) ((a) & ~0xf)
+
+// Align to N-byte boundary
+#define ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
+#define ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
+
+// Align to a cacheline - assumed to be 64 bytes
+#define ROUNDUP_CL(a) ROUNDUP_N(a, 64)
+
+// Align ptr to next N-byte boundary
+#if defined(HAVE_TYPEOF)
+#define ROUNDUP_PTR(ptr, n) (__typeof__(ptr))(ROUNDUP_N((uintptr_t)(ptr), (n)))
+#define ROUNDDOWN_PTR(ptr, n) (__typeof__(ptr))(ROUNDDOWN_N((uintptr_t)(ptr), (n)))
+#else
+#define ROUNDUP_PTR(ptr, n) (void*)(ROUNDUP_N((uintptr_t)(ptr), (n)))
+#define ROUNDDOWN_PTR(ptr, n) (void*)(ROUNDDOWN_N((uintptr_t)(ptr), (n)))
+#endif
+
+#define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n) - 1)) == 0)
+#define ISALIGNED_16(ptr) ISALIGNED_N((ptr), 16)
+#define ISALIGNED_CL(ptr) ISALIGNED_N((ptr), 64)
+#if defined(HAVE_TYPEOF)
+#define ISALIGNED(ptr) ISALIGNED_N((ptr), alignof(__typeof__(*(ptr))))
+#else
+/* we should probably avoid using this test in C */
+#define ISALIGNED(ptr) (1)
+#endif
+#define N_CHARS 256
+
+// Maximum offset representable in the 'unsigned long long' we use to return
+// offset values.
+#define MAX_OFFSET 0xffffffffffffffffULL
+
+#if !defined(MIN)
+ #define MIN(a,b) ((a) < (b) ? (a) : (b))
+#endif
+#if !defined(MAX)
+ #define MAX(a,b) ((a) > (b) ? (a) : (b))
+#endif
+
+#define LIMIT_TO_AT_MOST(a, b) (*(a) = MIN(*(a),(b)))
+#define ENSURE_AT_LEAST(a, b) (*(a) = MAX(*(a),(b)))
+
+#ifndef _WIN32
+#ifndef likely
+ #define likely(x) __builtin_expect(!!(x), 1)
+#endif
+#ifndef unlikely
+ #define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
+#else
#define likely(x) (x)
#define unlikely(x) (x)
-#endif
-
+#endif
+
#if !defined(RELEASE_BUILD) || defined(DEBUG)
#ifdef _WIN32
#define PATH_SEP '\\'
@@ -207,25 +207,25 @@ typedef u32 ReportID;
#endif
#if defined(DEBUG) && !defined(DEBUG_PRINTF)
-#include <string.h>
-#include <stdio.h>
-#define DEBUG_PRINTF(format, ...) printf("%s:%s:%d:" format, \
+#include <string.h>
+#include <stdio.h>
+#define DEBUG_PRINTF(format, ...) printf("%s:%s:%d:" format, \
strrchr(__FILE__, PATH_SEP) + 1, \
__func__, __LINE__, ## __VA_ARGS__)
-#elif !defined(DEBUG_PRINTF)
-#define DEBUG_PRINTF(format, ...) do { } while(0)
-#endif
-
-#if !defined(RELEASE_BUILD)
-#include <string.h>
-#include <stdio.h>
-#define ADEBUG_PRINTF(format, ...) printf("!%s:%s:%d:" format, \
+#elif !defined(DEBUG_PRINTF)
+#define DEBUG_PRINTF(format, ...) do { } while(0)
+#endif
+
+#if !defined(RELEASE_BUILD)
+#include <string.h>
+#include <stdio.h>
+#define ADEBUG_PRINTF(format, ...) printf("!%s:%s:%d:" format, \
strrchr(__FILE__, PATH_SEP) + 1, \
__func__, __LINE__, ## __VA_ARGS__)
-#else
-#define ADEBUG_PRINTF(format, ...) do { } while(0)
-#endif
-
-#include <assert.h>
-
-#endif
+#else
+#define ADEBUG_PRINTF(format, ...) do { } while(0)
+#endif
+
+#include <assert.h>
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/alloc.cpp b/contrib/libs/hyperscan/src/util/alloc.cpp
index bd0812d0d3..ace26ed5d2 100644
--- a/contrib/libs/hyperscan/src/util/alloc.cpp
+++ b/contrib/libs/hyperscan/src/util/alloc.cpp
@@ -1,133 +1,133 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Aligned memory alloc/free.
- */
-#include "ue2common.h"
-#include "alloc.h"
-
-#include <cstdlib>
-#include <cstring>
-
-namespace ue2 {
-
-// This is one of the simplest ways to catch failure where we aren't using an
-// aligned_(zmalloc|_free) pair - it will force death if the wrong free is used.
-// We use this whenever assertions are switched on.
-#if !defined(NDEBUG)
-#define HACK_OFFSET 64
-#else
-#define HACK_OFFSET 0
-#endif
-
-/* get us a posix_memalign from somewhere */
-#if !defined(HAVE_POSIX_MEMALIGN)
-# if defined(HAVE_MEMALIGN)
- #define posix_memalign(A, B, C) ((*A = (void *)memalign(B, C)) == nullptr)
-# elif defined(HAVE__ALIGNED_MALLOC)
- /* on Windows */
- #include <malloc.h>
- #define posix_memalign(A, B, C) ((*A = (void *)_aligned_malloc(C, B)) == nullptr)
-# else
- #error no posix_memalign or memalign aligned malloc
-# endif
-#endif
-
-void *aligned_malloc_internal(size_t size, size_t align) {
- void *mem;
-#if !defined(_WIN32)
- int rv = posix_memalign(&mem, align, size);
- if (rv != 0) {
- DEBUG_PRINTF("posix_memalign returned %d when asked for %zu bytes\n",
- rv, size);
- return nullptr;
- }
-#else
- if (nullptr == (mem = _aligned_malloc(size, align))) {
- DEBUG_PRINTF("_aligned_malloc failed when asked for %zu bytes\n",
- size);
- return nullptr;
- }
-#endif
-
- assert(mem);
- return mem;
-}
-
-void aligned_free_internal(void *ptr) {
- if (!ptr) {
- return;
- }
-
-#if defined(_WIN32)
- _aligned_free(ptr);
-#else
- free(ptr);
-#endif
-}
-
-/** \brief 64-byte aligned, zeroed malloc.
- *
- * Pointers should be freed with \ref aligned_free. If we are unable to
- * allocate the requested number of bytes, this function will throw
- * std::bad_alloc. */
-void *aligned_zmalloc(size_t size) {
- // Really huge allocations are probably an indication that we've
- // done something wrong.
- assert(size < 1024 * 1024 * 1024); // 1GB
-
- const size_t alloc_size = size + HACK_OFFSET;
-
- void *mem = aligned_malloc_internal(alloc_size, 64);
- if (!mem) {
- DEBUG_PRINTF("unable to allocate %zu bytes\n", alloc_size);
- throw std::bad_alloc();
- }
-
- DEBUG_PRINTF("alloced %p reporting %p\n", mem, (char *)mem + HACK_OFFSET);
- assert(ISALIGNED_N(mem, 64));
-
- memset(mem, 0, alloc_size);
- return (void *)((char *)mem + HACK_OFFSET);
-}
-
-/** \brief Free a pointer allocated with \ref aligned_zmalloc. */
-void aligned_free(void *ptr) {
- if (!ptr) {
- return;
- }
-
- void *addr = (void *)((char *)ptr - HACK_OFFSET);
- DEBUG_PRINTF("asked to free %p freeing %p\n", ptr, addr);
-
- assert(ISALIGNED_N(addr, 64));
- aligned_free_internal(addr);
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Aligned memory alloc/free.
+ */
+#include "ue2common.h"
+#include "alloc.h"
+
+#include <cstdlib>
+#include <cstring>
+
+namespace ue2 {
+
+// This is one of the simplest ways to catch failure where we aren't using an
+// aligned_(zmalloc|_free) pair - it will force death if the wrong free is used.
+// We use this whenever assertions are switched on.
+#if !defined(NDEBUG)
+#define HACK_OFFSET 64
+#else
+#define HACK_OFFSET 0
+#endif
+
+/* get us a posix_memalign from somewhere */
+#if !defined(HAVE_POSIX_MEMALIGN)
+# if defined(HAVE_MEMALIGN)
+ #define posix_memalign(A, B, C) ((*A = (void *)memalign(B, C)) == nullptr)
+# elif defined(HAVE__ALIGNED_MALLOC)
+ /* on Windows */
+ #include <malloc.h>
+ #define posix_memalign(A, B, C) ((*A = (void *)_aligned_malloc(C, B)) == nullptr)
+# else
+ #error no posix_memalign or memalign aligned malloc
+# endif
+#endif
+
+void *aligned_malloc_internal(size_t size, size_t align) {
+ void *mem;
+#if !defined(_WIN32)
+ int rv = posix_memalign(&mem, align, size);
+ if (rv != 0) {
+ DEBUG_PRINTF("posix_memalign returned %d when asked for %zu bytes\n",
+ rv, size);
+ return nullptr;
+ }
+#else
+ if (nullptr == (mem = _aligned_malloc(size, align))) {
+ DEBUG_PRINTF("_aligned_malloc failed when asked for %zu bytes\n",
+ size);
+ return nullptr;
+ }
+#endif
+
+ assert(mem);
+ return mem;
+}
+
+void aligned_free_internal(void *ptr) {
+ if (!ptr) {
+ return;
+ }
+
+#if defined(_WIN32)
+ _aligned_free(ptr);
+#else
+ free(ptr);
+#endif
+}
+
+/** \brief 64-byte aligned, zeroed malloc.
+ *
+ * Pointers should be freed with \ref aligned_free. If we are unable to
+ * allocate the requested number of bytes, this function will throw
+ * std::bad_alloc. */
+void *aligned_zmalloc(size_t size) {
+ // Really huge allocations are probably an indication that we've
+ // done something wrong.
+ assert(size < 1024 * 1024 * 1024); // 1GB
+
+ const size_t alloc_size = size + HACK_OFFSET;
+
+ void *mem = aligned_malloc_internal(alloc_size, 64);
+ if (!mem) {
+ DEBUG_PRINTF("unable to allocate %zu bytes\n", alloc_size);
+ throw std::bad_alloc();
+ }
+
+ DEBUG_PRINTF("alloced %p reporting %p\n", mem, (char *)mem + HACK_OFFSET);
+ assert(ISALIGNED_N(mem, 64));
+
+ memset(mem, 0, alloc_size);
+ return (void *)((char *)mem + HACK_OFFSET);
+}
+
+/** \brief Free a pointer allocated with \ref aligned_zmalloc. */
+void aligned_free(void *ptr) {
+ if (!ptr) {
+ return;
+ }
+
+ void *addr = (void *)((char *)ptr - HACK_OFFSET);
+ DEBUG_PRINTF("asked to free %p freeing %p\n", ptr, addr);
+
+ assert(ISALIGNED_N(addr, 64));
+ aligned_free_internal(addr);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/util/alloc.h b/contrib/libs/hyperscan/src/util/alloc.h
index 6651437169..de20c8d028 100644
--- a/contrib/libs/hyperscan/src/util/alloc.h
+++ b/contrib/libs/hyperscan/src/util/alloc.h
@@ -1,101 +1,101 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
- * \brief Aligned memory alloc/free.
- */
-
-#ifndef UTIL_ALLOC_H
-#define UTIL_ALLOC_H
-
-#include <cassert>
-#include <cstddef> // size_t
-#include <limits>
-#include <memory>
-#include <new> // std::bad_alloc
-
-namespace ue2 {
-
-/** \brief 64-byte aligned, zeroed malloc.
- *
- * Pointers should be freed with \ref aligned_free. If we are unable to
- * allocate the requested number of bytes, this function will throw
- * std::bad_alloc. */
-void *aligned_zmalloc(size_t size);
-
-/** \brief Free a pointer allocated with \ref aligned_zmalloc. */
-void aligned_free(void *ptr);
-
-/** \brief Internal use only, used by AlignedAllocator. */
-void *aligned_malloc_internal(size_t size, size_t align);
-
-/** \brief Internal use only, used by AlignedAllocator. */
-void aligned_free_internal(void *ptr);
-
-/** \brief Aligned allocator class for use with STL containers. Ensures that
- * your objects are aligned to N bytes. */
+ * \brief Aligned memory alloc/free.
+ */
+
+#ifndef UTIL_ALLOC_H
+#define UTIL_ALLOC_H
+
+#include <cassert>
+#include <cstddef> // size_t
+#include <limits>
+#include <memory>
+#include <new> // std::bad_alloc
+
+namespace ue2 {
+
+/** \brief 64-byte aligned, zeroed malloc.
+ *
+ * Pointers should be freed with \ref aligned_free. If we are unable to
+ * allocate the requested number of bytes, this function will throw
+ * std::bad_alloc. */
+void *aligned_zmalloc(size_t size);
+
+/** \brief Free a pointer allocated with \ref aligned_zmalloc. */
+void aligned_free(void *ptr);
+
+/** \brief Internal use only, used by AlignedAllocator. */
+void *aligned_malloc_internal(size_t size, size_t align);
+
+/** \brief Internal use only, used by AlignedAllocator. */
+void aligned_free_internal(void *ptr);
+
+/** \brief Aligned allocator class for use with STL containers. Ensures that
+ * your objects are aligned to N bytes. */
template <class T, std::size_t N>
class AlignedAllocator {
-public:
+public:
using value_type = T;
-
+
AlignedAllocator() noexcept {}
-
+
template <class U, std::size_t N2>
AlignedAllocator(const AlignedAllocator<U, N2> &) noexcept {}
-
+
template <class U> struct rebind {
using other = AlignedAllocator<U, N>;
};
-
+
T *allocate(std::size_t size) const {
size_t alloc_size = size * sizeof(T);
return static_cast<T *>(aligned_malloc_internal(alloc_size, N));
- }
-
+ }
+
void deallocate(T *x, std::size_t) const noexcept {
aligned_free_internal(x);
- }
+ }
};
-
+
template <class T, class U, std::size_t N, std::size_t N2>
bool operator==(const AlignedAllocator<T, N> &,
const AlignedAllocator<U, N2> &) {
return true;
}
-
+
template <class T, class U, std::size_t N, std::size_t N2>
bool operator!=(const AlignedAllocator<T, N> &a,
const AlignedAllocator<U, N2> &b) {
return !(a == b);
}
-
-} // namespace ue2
-
-#endif
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/bitfield.h b/contrib/libs/hyperscan/src/util/bitfield.h
index f8c2a3336c..a580da7b60 100644
--- a/contrib/libs/hyperscan/src/util/bitfield.h
+++ b/contrib/libs/hyperscan/src/util/bitfield.h
@@ -1,318 +1,318 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Fast bitset class with find_first and find_next operations.
- */
-
-#ifndef BITFIELD_H
-#define BITFIELD_H
-
-#include "ue2common.h"
-#include "popcount.h"
-#include "util/bitutils.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Fast bitset class with find_first and find_next operations.
+ */
+
+#ifndef BITFIELD_H
+#define BITFIELD_H
+
+#include "ue2common.h"
+#include "popcount.h"
+#include "util/bitutils.h"
#include "util/hash.h"
-
-#include <array>
-#include <cassert>
-
-#include <boost/dynamic_bitset.hpp>
-
-namespace ue2 {
-
-/**
- * \brief Templated bitset class with find_first and find_next operations.
- *
- * This is a simple (but hopefully fast) class to replace our use of
- * std::bitset<>.
- *
- * Note: underlying storage is allocated as an array of 64-bit blocks. All
- * mutating operations MUST ensure that the trailer (the bits between
- * requested_size and the end of the array) is filled with zeroes; there's a
- * clear_trailer member function for this.
- */
-template<size_t requested_size>
-class bitfield {
-public:
- /// Empty constructor, zero initializes all bits.
- bitfield() : bits{{0}} {
- assert(none());
- }
-
- bitfield(const boost::dynamic_bitset<> &a) : bits{{0}} {
- assert(a.size() == requested_size);
- assert(none());
- for (auto i = a.find_first(); i != a.npos; i = a.find_next(i)) {
- set(i);
- }
- }
-
- /// Complete bitset equality.
- bool operator==(const bitfield &a) const {
- return bits == a.bits;
- }
-
- /// Inequality.
- bool operator!=(const bitfield &a) const {
- return bits != a.bits;
- }
-
- /// Ordering.
- bool operator<(const bitfield &a) const {
- return bits < a.bits;
- }
-
- /// Set all bits.
- void setall() {
- for (auto &e : bits) {
- e = all_ones;
- }
- clear_trailer();
- }
-
- /// Set all bits (alias for bitset::setall, to match dynamic_bitset).
- void set() {
- setall();
- }
-
- /// Clear all bits.
- void clear() {
- for (auto &e : bits) {
- e = 0;
- }
- }
-
- /// Clear all bits (alias for bitset::clear).
- void reset() {
- clear();
- }
-
- /// Clear bit N.
- void clear(size_t n) {
- assert(n < size());
- bits[getword(n)] &= ~maskbit(n);
- }
-
- /// Set bit N.
- void set(size_t n) {
- assert(n < size());
- bits[getword(n)] |= maskbit(n);
- }
-
- /// Test bit N.
- bool test(size_t n) const {
- assert(n < size());
- return bits[getword(n)] & maskbit(n);
- }
-
- /// Flip bit N.
- void flip(size_t n) {
- assert(n < size());
- bits[getword(n)] ^= maskbit(n);
- }
-
- /// Flip all bits.
- void flip() {
- for (auto &e : bits) {
- e = ~e;
- }
- clear_trailer();
- }
-
- /// Switch on the bit in the range [from, to], inclusive.
- void set_range(size_t from, size_t to) {
- assert(from <= to);
- assert(to < requested_size);
-
- if (from / block_size == to / block_size) {
- // Small case, our indices are in the same block.
- block_type block = all_ones << (from % block_size);
- if (to % block_size != block_size - 1) {
- block &= maskbit(to + 1) - 1;
- }
- bits[from / block_size] |= block;
- return;
- }
-
- // Large case, work in block units. Write a partial mask, then a
- // run of all-ones blocks, then a partial mask at the end.
- size_t i = from;
- if (i % block_size) {
- block_type block = all_ones << (i % block_size);
- bits[i / block_size] |= block;
- i = ROUNDUP_N(i, block_size);
- }
-
- for (; i + block_size <= to + 1; i += block_size) {
- bits[i / block_size] = all_ones;
- }
-
- if (i <= to) {
- assert(to - i + 1 < block_size);
- bits[i / block_size] |= (maskbit(to + 1) - 1);
- }
- }
-
- /// Returns total number of bits.
- static constexpr size_t size() {
- return requested_size;
- }
-
- /// Returns number of bits set on.
- size_t count() const {
- static_assert(block_size == 64, "adjust popcount for block_type");
- size_t sum = 0;
+
+#include <array>
+#include <cassert>
+
+#include <boost/dynamic_bitset.hpp>
+
+namespace ue2 {
+
+/**
+ * \brief Templated bitset class with find_first and find_next operations.
+ *
+ * This is a simple (but hopefully fast) class to replace our use of
+ * std::bitset<>.
+ *
+ * Note: underlying storage is allocated as an array of 64-bit blocks. All
+ * mutating operations MUST ensure that the trailer (the bits between
+ * requested_size and the end of the array) is filled with zeroes; there's a
+ * clear_trailer member function for this.
+ */
+template<size_t requested_size>
+class bitfield {
+public:
+ /// Empty constructor, zero initializes all bits.
+ bitfield() : bits{{0}} {
+ assert(none());
+ }
+
+ bitfield(const boost::dynamic_bitset<> &a) : bits{{0}} {
+ assert(a.size() == requested_size);
+ assert(none());
+ for (auto i = a.find_first(); i != a.npos; i = a.find_next(i)) {
+ set(i);
+ }
+ }
+
+ /// Complete bitset equality.
+ bool operator==(const bitfield &a) const {
+ return bits == a.bits;
+ }
+
+ /// Inequality.
+ bool operator!=(const bitfield &a) const {
+ return bits != a.bits;
+ }
+
+ /// Ordering.
+ bool operator<(const bitfield &a) const {
+ return bits < a.bits;
+ }
+
+ /// Set all bits.
+ void setall() {
+ for (auto &e : bits) {
+ e = all_ones;
+ }
+ clear_trailer();
+ }
+
+ /// Set all bits (alias for bitset::setall, to match dynamic_bitset).
+ void set() {
+ setall();
+ }
+
+ /// Clear all bits.
+ void clear() {
+ for (auto &e : bits) {
+ e = 0;
+ }
+ }
+
+ /// Clear all bits (alias for bitset::clear).
+ void reset() {
+ clear();
+ }
+
+ /// Clear bit N.
+ void clear(size_t n) {
+ assert(n < size());
+ bits[getword(n)] &= ~maskbit(n);
+ }
+
+ /// Set bit N.
+ void set(size_t n) {
+ assert(n < size());
+ bits[getword(n)] |= maskbit(n);
+ }
+
+ /// Test bit N.
+ bool test(size_t n) const {
+ assert(n < size());
+ return bits[getword(n)] & maskbit(n);
+ }
+
+ /// Flip bit N.
+ void flip(size_t n) {
+ assert(n < size());
+ bits[getword(n)] ^= maskbit(n);
+ }
+
+ /// Flip all bits.
+ void flip() {
+ for (auto &e : bits) {
+ e = ~e;
+ }
+ clear_trailer();
+ }
+
+ /// Switch on the bit in the range [from, to], inclusive.
+ void set_range(size_t from, size_t to) {
+ assert(from <= to);
+ assert(to < requested_size);
+
+ if (from / block_size == to / block_size) {
+ // Small case, our indices are in the same block.
+ block_type block = all_ones << (from % block_size);
+ if (to % block_size != block_size - 1) {
+ block &= maskbit(to + 1) - 1;
+ }
+ bits[from / block_size] |= block;
+ return;
+ }
+
+ // Large case, work in block units. Write a partial mask, then a
+ // run of all-ones blocks, then a partial mask at the end.
+ size_t i = from;
+ if (i % block_size) {
+ block_type block = all_ones << (i % block_size);
+ bits[i / block_size] |= block;
+ i = ROUNDUP_N(i, block_size);
+ }
+
+ for (; i + block_size <= to + 1; i += block_size) {
+ bits[i / block_size] = all_ones;
+ }
+
+ if (i <= to) {
+ assert(to - i + 1 < block_size);
+ bits[i / block_size] |= (maskbit(to + 1) - 1);
+ }
+ }
+
+ /// Returns total number of bits.
+ static constexpr size_t size() {
+ return requested_size;
+ }
+
+ /// Returns number of bits set on.
+ size_t count() const {
+ static_assert(block_size == 64, "adjust popcount for block_type");
+ size_t sum = 0;
size_t i = 0;
for (; i + 4 <= num_blocks; i += 4) {
sum += popcount64(bits[i]);
sum += popcount64(bits[i + 1]);
sum += popcount64(bits[i + 2]);
sum += popcount64(bits[i + 3]);
- }
+ }
for (; i < num_blocks; i++) {
sum += popcount64(bits[i]);
}
- assert(sum <= size());
- return sum;
- }
-
- /// Are no bits set?
- bool none() const {
- for (const auto &e : bits) {
- if (e != 0) {
- return false;
- }
- }
- return true;
- }
-
- /// Is any bit set?
- bool any() const {
- return !none();
- }
-
- /// Are all bits set?
- bool all() const {
- for (size_t i = 0; i < bits.size() - 1; i++) {
- if (bits[i] != all_ones) {
- return false;
- }
- }
- size_t rem = requested_size % block_size;
- block_type exp = rem ? ((block_type{1} << rem) - 1) : all_ones;
- return *bits.rbegin() == exp;
- }
-
- /// Returns first bit set, or bitfield::npos if none set.
- size_t find_first() const {
- for (size_t i = 0; i < bits.size(); i++) {
- if (bits[i] != 0) {
- return (i * block_size) + word_ctz(i);
- }
- }
- return npos;
- }
-
- // Returns last bit set, or bitfield::npos if none set.
- size_t find_last() const {
- for (int i = bits.size() - 1; i >= 0; i--) {
- if (bits[i]) {
- static_assert(block_size == 64, "adjust clz for block_type");
- return (i * block_size) + block_size - 1 - clz64(bits[i]);
- }
- }
- return npos;
- }
-
- /// Returns next bit set, or bitfield::npos if none set after 'last'.
- size_t find_next(size_t last) const {
- if (last >= size()) {
- return npos;
- }
-
- // check current word.
- size_t i = getword(last);
- block_type lastword = bits[i];
-
- if ((last % block_size) != (block_size - 1)) {
- lastword &= (all_ones << ((last % block_size) + 1));
-
- if (lastword) {
- static_assert(block_size == 64, "adjust ctz for block_type");
- return (i * block_size) + ctz64(lastword);
- }
- }
-
- // check the rest.
- for (i++; i < bits.size(); i++) {
- if (bits[i]) {
- return (i * block_size) + word_ctz(i);
- }
- }
-
- return npos;
- }
-
- size_t find_nth(size_t n) const {
- assert(n < npos);
-
- static_assert(block_size == 64, "adjust for block_type");
-
- size_t sum = 0;
- for (size_t i = 0; i < bits.size(); i++) {
- block_type block = bits[i];
- size_t aftersum = sum + popcount64(block);
- if (aftersum > n) { // Block contains the nth bit.
- for (; sum < n; sum++) {
- assert(block);
- block &= (block - 1);
- }
- assert(block);
- size_t bit = (i * block_size) + ctz64(block);
- assert(test(bit));
- return bit;
- }
- sum = aftersum;
- }
-
- assert(count() < n + 1);
- return npos;
- }
-
- /// Bitwise OR.
+ assert(sum <= size());
+ return sum;
+ }
+
+ /// Are no bits set?
+ bool none() const {
+ for (const auto &e : bits) {
+ if (e != 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /// Is any bit set?
+ bool any() const {
+ return !none();
+ }
+
+ /// Are all bits set?
+ bool all() const {
+ for (size_t i = 0; i < bits.size() - 1; i++) {
+ if (bits[i] != all_ones) {
+ return false;
+ }
+ }
+ size_t rem = requested_size % block_size;
+ block_type exp = rem ? ((block_type{1} << rem) - 1) : all_ones;
+ return *bits.rbegin() == exp;
+ }
+
+ /// Returns first bit set, or bitfield::npos if none set.
+ size_t find_first() const {
+ for (size_t i = 0; i < bits.size(); i++) {
+ if (bits[i] != 0) {
+ return (i * block_size) + word_ctz(i);
+ }
+ }
+ return npos;
+ }
+
+ // Returns last bit set, or bitfield::npos if none set.
+ size_t find_last() const {
+ for (int i = bits.size() - 1; i >= 0; i--) {
+ if (bits[i]) {
+ static_assert(block_size == 64, "adjust clz for block_type");
+ return (i * block_size) + block_size - 1 - clz64(bits[i]);
+ }
+ }
+ return npos;
+ }
+
+ /// Returns next bit set, or bitfield::npos if none set after 'last'.
+ size_t find_next(size_t last) const {
+ if (last >= size()) {
+ return npos;
+ }
+
+ // check current word.
+ size_t i = getword(last);
+ block_type lastword = bits[i];
+
+ if ((last % block_size) != (block_size - 1)) {
+ lastword &= (all_ones << ((last % block_size) + 1));
+
+ if (lastword) {
+ static_assert(block_size == 64, "adjust ctz for block_type");
+ return (i * block_size) + ctz64(lastword);
+ }
+ }
+
+ // check the rest.
+ for (i++; i < bits.size(); i++) {
+ if (bits[i]) {
+ return (i * block_size) + word_ctz(i);
+ }
+ }
+
+ return npos;
+ }
+
+ size_t find_nth(size_t n) const {
+ assert(n < npos);
+
+ static_assert(block_size == 64, "adjust for block_type");
+
+ size_t sum = 0;
+ for (size_t i = 0; i < bits.size(); i++) {
+ block_type block = bits[i];
+ size_t aftersum = sum + popcount64(block);
+ if (aftersum > n) { // Block contains the nth bit.
+ for (; sum < n; sum++) {
+ assert(block);
+ block &= (block - 1);
+ }
+ assert(block);
+ size_t bit = (i * block_size) + ctz64(block);
+ assert(test(bit));
+ return bit;
+ }
+ sum = aftersum;
+ }
+
+ assert(count() < n + 1);
+ return npos;
+ }
+
+ /// Bitwise OR.
bitfield operator|(const bitfield &a) const {
bitfield b = a;
b |= *this;
return b;
- }
-
- /// Bitwise OR-equals.
- void operator|=(const bitfield &a) {
+ }
+
+ /// Bitwise OR-equals.
+ void operator|=(const bitfield &a) {
size_t i = 0;
for (; i + 4 <= num_blocks; i += 4) {
bits[i] |= a.bits[i];
@@ -321,19 +321,19 @@ public:
bits[i + 3] |= a.bits[i + 3];
}
for (; i < num_blocks; i++) {
- bits[i] |= a.bits[i];
- }
- }
-
- /// Bitwise AND.
+ bits[i] |= a.bits[i];
+ }
+ }
+
+ /// Bitwise AND.
bitfield operator&(const bitfield &a) const {
bitfield b = a;
b &= *this;
return b;
- }
-
- /// Bitwise AND-equals.
- void operator&=(const bitfield &a) {
+ }
+
+ /// Bitwise AND-equals.
+ void operator&=(const bitfield &a) {
size_t i = 0;
for (; i + 4 <= num_blocks; i += 4) {
bits[i] &= a.bits[i];
@@ -342,17 +342,17 @@ public:
bits[i + 3] &= a.bits[i + 3];
}
for (; i < num_blocks; i++) {
- bits[i] &= a.bits[i];
- }
- }
-
- /// Bitwise XOR.
+ bits[i] &= a.bits[i];
+ }
+ }
+
+ /// Bitwise XOR.
bitfield operator^(bitfield a) const {
a ^= *this;
return a;
- }
-
- /// Bitwise XOR-equals.
+ }
+
+ /// Bitwise XOR-equals.
void operator^=(bitfield a) {
size_t i = 0;
for (; i + 4 <= num_blocks; i += 4) {
@@ -362,77 +362,77 @@ public:
bits[i + 3] ^= a.bits[i + 3];
}
for (; i < num_blocks; i++) {
- bits[i] ^= a.bits[i];
- }
- }
-
- /// Bitwise complement.
- bitfield operator~(void) const {
- bitfield cr(*this);
- cr.flip();
- return cr;
- }
-
- /// Simple hash.
- size_t hash() const {
+ bits[i] ^= a.bits[i];
+ }
+ }
+
+ /// Bitwise complement.
+ bitfield operator~(void) const {
+ bitfield cr(*this);
+ cr.flip();
+ return cr;
+ }
+
+ /// Simple hash.
+ size_t hash() const {
return ue2_hasher()(bits);
- }
-
- /// Sentinel value meaning "no more bits", used by find_first and
- /// find_next.
- static constexpr size_t npos = requested_size;
-
-private:
- /// Underlying block type.
- using block_type = u64a;
-
- /// A block filled with on bits.
- static constexpr block_type all_ones = ~block_type{0};
-
- /// Size of a block.
- static constexpr size_t block_size = sizeof(block_type) * 8;
-
- static size_t getword(size_t n) {
- return n / block_size;
- }
-
- static block_type maskbit(size_t n) {
- return (block_type{1} << (n % block_size));
- }
-
- size_t word_ctz(size_t n) const {
- static_assert(block_size == 64, "adjust ctz call for block type");
- return ctz64(bits[n]);
- }
-
- /// Ensures that bits between our requested size and the end of storage are
- /// zero.
- void clear_trailer() {
- size_t final_bits = requested_size % block_size;
- if (final_bits) {
- bits.back() &= ((block_type{1} << final_bits) - 1);
- }
- }
-
- /// Size of storage array of blocks.
- static constexpr size_t num_blocks =
- (requested_size + block_size - 1) / block_size;
-
- /// Underlying storage.
- std::array<block_type, num_blocks> bits;
-};
-
+ }
+
+ /// Sentinel value meaning "no more bits", used by find_first and
+ /// find_next.
+ static constexpr size_t npos = requested_size;
+
+private:
+ /// Underlying block type.
+ using block_type = u64a;
+
+ /// A block filled with on bits.
+ static constexpr block_type all_ones = ~block_type{0};
+
+ /// Size of a block.
+ static constexpr size_t block_size = sizeof(block_type) * 8;
+
+ static size_t getword(size_t n) {
+ return n / block_size;
+ }
+
+ static block_type maskbit(size_t n) {
+ return (block_type{1} << (n % block_size));
+ }
+
+ size_t word_ctz(size_t n) const {
+ static_assert(block_size == 64, "adjust ctz call for block type");
+ return ctz64(bits[n]);
+ }
+
+ /// Ensures that bits between our requested size and the end of storage are
+ /// zero.
+ void clear_trailer() {
+ size_t final_bits = requested_size % block_size;
+ if (final_bits) {
+ bits.back() &= ((block_type{1} << final_bits) - 1);
+ }
+ }
+
+ /// Size of storage array of blocks.
+ static constexpr size_t num_blocks =
+ (requested_size + block_size - 1) / block_size;
+
+ /// Underlying storage.
+ std::array<block_type, num_blocks> bits;
+};
+
} // namespace ue2
namespace std {
-template<size_t requested_size>
+template<size_t requested_size>
struct hash<ue2::bitfield<requested_size>> {
size_t operator()(const ue2::bitfield<requested_size> &b) const {
return b.hash();
}
};
-
+
} // namespace std
-
-#endif // BITFIELD_H
+
+#endif // BITFIELD_H
diff --git a/contrib/libs/hyperscan/src/util/bitutils.h b/contrib/libs/hyperscan/src/util/bitutils.h
index 0bb468d9dd..c545ee1872 100644
--- a/contrib/libs/hyperscan/src/util/bitutils.h
+++ b/contrib/libs/hyperscan/src/util/bitutils.h
@@ -1,67 +1,67 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Bit-twiddling primitives (ctz, compress etc)
- */
-
-#ifndef BITUTILS_H
-#define BITUTILS_H
-
-#include "ue2common.h"
-#include "popcount.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Bit-twiddling primitives (ctz, compress etc)
+ */
+
+#ifndef BITUTILS_H
+#define BITUTILS_H
+
+#include "ue2common.h"
+#include "popcount.h"
#include "util/arch.h"
#include "util/intrinsics.h"
-
-#define CASE_BIT 0x20
-#define CASE_CLEAR 0xdf
-#define DOUBLE_CASE_CLEAR 0xdfdf
+
+#define CASE_BIT 0x20
+#define CASE_CLEAR 0xdf
+#define DOUBLE_CASE_CLEAR 0xdfdf
#define OCTO_CASE_CLEAR 0xdfdfdfdfdfdfdfdfULL
-
-static really_inline
-u32 clz32(u32 x) {
- assert(x); // behaviour not defined for x == 0
-#if defined(_WIN32)
- unsigned long r;
- _BitScanReverse(&r, x);
- return 31 - r;
-#else
- return (u32)__builtin_clz(x);
-#endif
-}
-
-static really_inline
-u32 clz64(u64a x) {
- assert(x); // behaviour not defined for x == 0
+
+static really_inline
+u32 clz32(u32 x) {
+ assert(x); // behaviour not defined for x == 0
+#if defined(_WIN32)
+ unsigned long r;
+ _BitScanReverse(&r, x);
+ return 31 - r;
+#else
+ return (u32)__builtin_clz(x);
+#endif
+}
+
+static really_inline
+u32 clz64(u64a x) {
+ assert(x); // behaviour not defined for x == 0
#if defined(_WIN64)
- unsigned long r;
- _BitScanReverse64(&r, x);
- return 63 - r;
+ unsigned long r;
+ _BitScanReverse64(&r, x);
+ return 63 - r;
#elif defined(_WIN32)
unsigned long x1 = (u32)x;
unsigned long x2 = (u32)(x >> 32);
@@ -72,31 +72,31 @@ u32 clz64(u64a x) {
}
_BitScanReverse(&r, (u32)x1);
return (u32)(63 - r);
-#else
- return (u32)__builtin_clzll(x);
-#endif
-}
-
-// CTZ (count trailing zero) implementations.
-static really_inline
-u32 ctz32(u32 x) {
- assert(x); // behaviour not defined for x == 0
-#if defined(_WIN32)
- unsigned long r;
- _BitScanForward(&r, x);
- return r;
-#else
- return (u32)__builtin_ctz(x);
-#endif
-}
-
-static really_inline
-u32 ctz64(u64a x) {
- assert(x); // behaviour not defined for x == 0
+#else
+ return (u32)__builtin_clzll(x);
+#endif
+}
+
+// CTZ (count trailing zero) implementations.
+static really_inline
+u32 ctz32(u32 x) {
+ assert(x); // behaviour not defined for x == 0
+#if defined(_WIN32)
+ unsigned long r;
+ _BitScanForward(&r, x);
+ return r;
+#else
+ return (u32)__builtin_ctz(x);
+#endif
+}
+
+static really_inline
+u32 ctz64(u64a x) {
+ assert(x); // behaviour not defined for x == 0
#if defined(_WIN64)
- unsigned long r;
- _BitScanForward64(&r, x);
- return r;
+ unsigned long r;
+ _BitScanForward64(&r, x);
+ return r;
#elif defined(_WIN32)
unsigned long r;
if (_BitScanForward(&r, (u32)x)) {
@@ -104,328 +104,328 @@ u32 ctz64(u64a x) {
}
_BitScanForward(&r, x >> 32);
return (u32)(r + 32);
-#else
- return (u32)__builtin_ctzll(x);
-#endif
-}
-
-static really_inline
-u32 lg2(u32 x) {
- if (!x) {
- return 0;
- }
- return 31 - clz32(x);
-}
-
-static really_inline
-u64a lg2_64(u64a x) {
- if (!x) {
- return 0;
- }
- return 63 - clz64(x);
-}
-
-static really_inline
-u32 findAndClearLSB_32(u32 *v) {
- assert(*v != 0); // behaviour not defined in this case
-#ifndef NO_ASM
- u32 val = *v, offset;
- __asm__ ("bsf %1, %0\n"
- "btr %0, %1\n"
- : "=r" (offset), "=r" (val)
- : "1" (val));
- *v = val;
-#else
- u32 val = *v;
- u32 offset = ctz32(val);
- *v = val & (val - 1);
-#endif
-
- assert(offset < 32);
- return offset;
-}
-
-static really_inline
-u32 findAndClearLSB_64(u64a *v) {
- assert(*v != 0); // behaviour not defined in this case
-
-#ifdef ARCH_64_BIT
-#if defined(ARCH_X86_64) && !defined(NO_ASM)
- u64a val = *v, offset;
- __asm__ ("bsfq %1, %0\n"
- "btrq %0, %1\n"
- : "=r" (offset), "=r" (val)
- : "1" (val));
- *v = val;
-#else
- // generic variant using gcc's builtin on 64-bit
- u64a val = *v, offset;
- offset = ctz64(val);
- *v = val & (val - 1);
-#endif // ARCH_X86_64
-#else
- // fall back to doing things with two 32-bit cases, since gcc-4.1 doesn't
- // inline calls to __builtin_ctzll
+#else
+ return (u32)__builtin_ctzll(x);
+#endif
+}
+
+static really_inline
+u32 lg2(u32 x) {
+ if (!x) {
+ return 0;
+ }
+ return 31 - clz32(x);
+}
+
+static really_inline
+u64a lg2_64(u64a x) {
+ if (!x) {
+ return 0;
+ }
+ return 63 - clz64(x);
+}
+
+static really_inline
+u32 findAndClearLSB_32(u32 *v) {
+ assert(*v != 0); // behaviour not defined in this case
+#ifndef NO_ASM
+ u32 val = *v, offset;
+ __asm__ ("bsf %1, %0\n"
+ "btr %0, %1\n"
+ : "=r" (offset), "=r" (val)
+ : "1" (val));
+ *v = val;
+#else
+ u32 val = *v;
+ u32 offset = ctz32(val);
+ *v = val & (val - 1);
+#endif
+
+ assert(offset < 32);
+ return offset;
+}
+
+static really_inline
+u32 findAndClearLSB_64(u64a *v) {
+ assert(*v != 0); // behaviour not defined in this case
+
+#ifdef ARCH_64_BIT
+#if defined(ARCH_X86_64) && !defined(NO_ASM)
+ u64a val = *v, offset;
+ __asm__ ("bsfq %1, %0\n"
+ "btrq %0, %1\n"
+ : "=r" (offset), "=r" (val)
+ : "1" (val));
+ *v = val;
+#else
+ // generic variant using gcc's builtin on 64-bit
+ u64a val = *v, offset;
+ offset = ctz64(val);
+ *v = val & (val - 1);
+#endif // ARCH_X86_64
+#else
+ // fall back to doing things with two 32-bit cases, since gcc-4.1 doesn't
+ // inline calls to __builtin_ctzll
u32 v1 = (u32)*v;
u32 v2 = (u32)(*v >> 32);
- u32 offset;
- if (v1) {
- offset = findAndClearLSB_32(&v1);
- *v = (u64a)v1 | ((u64a)v2 << 32);
- } else {
- offset = findAndClearLSB_32(&v2) + 32;
- *v = (u64a)v2 << 32;
- }
-#endif
-
- assert(offset < 64);
- return (u32)offset;
-}
-
-static really_inline
-u32 findAndClearMSB_32(u32 *v) {
- assert(*v != 0); // behaviour not defined in this case
-#ifndef NO_ASM
- u32 val = *v, offset;
- __asm__ ("bsr %1, %0\n"
- "btr %0, %1\n"
- : "=r" (offset), "=r" (val)
- : "1" (val));
- *v = val;
-#else
- u32 val = *v;
- u32 offset = 31 - clz32(val);
- *v = val & ~(1 << offset);
-#endif
- assert(offset < 32);
- return offset;
-}
-
-static really_inline
-u32 findAndClearMSB_64(u64a *v) {
- assert(*v != 0); // behaviour not defined in this case
-
-#ifdef ARCH_64_BIT
-#if defined(ARCH_X86_64) && !defined(NO_ASM)
- u64a val = *v, offset;
- __asm__ ("bsrq %1, %0\n"
- "btrq %0, %1\n"
- : "=r" (offset), "=r" (val)
- : "1" (val));
- *v = val;
-#else
- // generic variant using gcc's builtin on 64-bit
- u64a val = *v, offset;
- offset = 63 - clz64(val);
- *v = val & ~(1ULL << offset);
-#endif // ARCH_X86_64
-#else
- // fall back to doing things with two 32-bit cases, since gcc-4.1 doesn't
- // inline calls to __builtin_ctzll
+ u32 offset;
+ if (v1) {
+ offset = findAndClearLSB_32(&v1);
+ *v = (u64a)v1 | ((u64a)v2 << 32);
+ } else {
+ offset = findAndClearLSB_32(&v2) + 32;
+ *v = (u64a)v2 << 32;
+ }
+#endif
+
+ assert(offset < 64);
+ return (u32)offset;
+}
+
+static really_inline
+u32 findAndClearMSB_32(u32 *v) {
+ assert(*v != 0); // behaviour not defined in this case
+#ifndef NO_ASM
+ u32 val = *v, offset;
+ __asm__ ("bsr %1, %0\n"
+ "btr %0, %1\n"
+ : "=r" (offset), "=r" (val)
+ : "1" (val));
+ *v = val;
+#else
+ u32 val = *v;
+ u32 offset = 31 - clz32(val);
+ *v = val & ~(1 << offset);
+#endif
+ assert(offset < 32);
+ return offset;
+}
+
+static really_inline
+u32 findAndClearMSB_64(u64a *v) {
+ assert(*v != 0); // behaviour not defined in this case
+
+#ifdef ARCH_64_BIT
+#if defined(ARCH_X86_64) && !defined(NO_ASM)
+ u64a val = *v, offset;
+ __asm__ ("bsrq %1, %0\n"
+ "btrq %0, %1\n"
+ : "=r" (offset), "=r" (val)
+ : "1" (val));
+ *v = val;
+#else
+ // generic variant using gcc's builtin on 64-bit
+ u64a val = *v, offset;
+ offset = 63 - clz64(val);
+ *v = val & ~(1ULL << offset);
+#endif // ARCH_X86_64
+#else
+ // fall back to doing things with two 32-bit cases, since gcc-4.1 doesn't
+ // inline calls to __builtin_ctzll
u32 v1 = (u32)*v;
- u32 v2 = (*v >> 32);
- u32 offset;
- if (v2) {
- offset = findAndClearMSB_32(&v2) + 32;
- *v = ((u64a)v2 << 32) | (u64a)v1;
- } else {
- offset = findAndClearMSB_32(&v1);
- *v = (u64a)v1;
- }
-#endif
-
- assert(offset < 64);
- return (u32)offset;
-}
-
-static really_inline
-u32 compress32(u32 x, u32 m) {
+ u32 v2 = (*v >> 32);
+ u32 offset;
+ if (v2) {
+ offset = findAndClearMSB_32(&v2) + 32;
+ *v = ((u64a)v2 << 32) | (u64a)v1;
+ } else {
+ offset = findAndClearMSB_32(&v1);
+ *v = (u64a)v1;
+ }
+#endif
+
+ assert(offset < 64);
+ return (u32)offset;
+}
+
+static really_inline
+u32 compress32(u32 x, u32 m) {
#if defined(HAVE_BMI2)
- // BMI2 has a single instruction for this operation.
- return _pext_u32(x, m);
-#else
-
- // Return zero quickly on trivial cases
- if ((x & m) == 0) {
- return 0;
- }
-
- u32 mk, mp, mv, t;
-
- x &= m; // clear irrelevant bits
-
- mk = ~m << 1; // we will count 0's to right
- for (u32 i = 0; i < 5; i++) {
- mp = mk ^ (mk << 1);
- mp ^= mp << 2;
- mp ^= mp << 4;
- mp ^= mp << 8;
- mp ^= mp << 16;
-
- mv = mp & m; // bits to move
- m = (m ^ mv) | (mv >> (1 << i)); // compress m
- t = x & mv;
- x = (x ^ t) | (t >> (1 << i)); // compress x
- mk = mk & ~mp;
- }
-
- return x;
-#endif
-}
-
-static really_inline
-u64a compress64(u64a x, u64a m) {
+ // BMI2 has a single instruction for this operation.
+ return _pext_u32(x, m);
+#else
+
+ // Return zero quickly on trivial cases
+ if ((x & m) == 0) {
+ return 0;
+ }
+
+ u32 mk, mp, mv, t;
+
+ x &= m; // clear irrelevant bits
+
+ mk = ~m << 1; // we will count 0's to right
+ for (u32 i = 0; i < 5; i++) {
+ mp = mk ^ (mk << 1);
+ mp ^= mp << 2;
+ mp ^= mp << 4;
+ mp ^= mp << 8;
+ mp ^= mp << 16;
+
+ mv = mp & m; // bits to move
+ m = (m ^ mv) | (mv >> (1 << i)); // compress m
+ t = x & mv;
+ x = (x ^ t) | (t >> (1 << i)); // compress x
+ mk = mk & ~mp;
+ }
+
+ return x;
+#endif
+}
+
+static really_inline
+u64a compress64(u64a x, u64a m) {
#if defined(ARCH_X86_64) && defined(HAVE_BMI2)
- // BMI2 has a single instruction for this operation.
- return _pext_u64(x, m);
-#else
-
- // Return zero quickly on trivial cases
- if ((x & m) == 0) {
- return 0;
- }
-
- u64a mk, mp, mv, t;
-
- x &= m; // clear irrelevant bits
-
- mk = ~m << 1; // we will count 0's to right
- for (u32 i = 0; i < 6; i++) {
- mp = mk ^ (mk << 1);
- mp ^= mp << 2;
- mp ^= mp << 4;
- mp ^= mp << 8;
- mp ^= mp << 16;
- mp ^= mp << 32;
-
- mv = mp & m; // bits to move
- m = (m ^ mv) | (mv >> (1 << i)); // compress m
- t = x & mv;
- x = (x ^ t) | (t >> (1 << i)); // compress x
- mk = mk & ~mp;
- }
-
- return x;
-#endif
-}
-
-static really_inline
-u32 expand32(u32 x, u32 m) {
+ // BMI2 has a single instruction for this operation.
+ return _pext_u64(x, m);
+#else
+
+ // Return zero quickly on trivial cases
+ if ((x & m) == 0) {
+ return 0;
+ }
+
+ u64a mk, mp, mv, t;
+
+ x &= m; // clear irrelevant bits
+
+ mk = ~m << 1; // we will count 0's to right
+ for (u32 i = 0; i < 6; i++) {
+ mp = mk ^ (mk << 1);
+ mp ^= mp << 2;
+ mp ^= mp << 4;
+ mp ^= mp << 8;
+ mp ^= mp << 16;
+ mp ^= mp << 32;
+
+ mv = mp & m; // bits to move
+ m = (m ^ mv) | (mv >> (1 << i)); // compress m
+ t = x & mv;
+ x = (x ^ t) | (t >> (1 << i)); // compress x
+ mk = mk & ~mp;
+ }
+
+ return x;
+#endif
+}
+
+static really_inline
+u32 expand32(u32 x, u32 m) {
#if defined(HAVE_BMI2)
- // BMI2 has a single instruction for this operation.
- return _pdep_u32(x, m);
-#else
-
- // Return zero quickly on trivial cases
- if (!x || !m) {
- return 0;
- }
-
- u32 m0, mk, mp, mv, t;
- u32 array[5];
-
- m0 = m; // save original mask
- mk = ~m << 1; // we will count 0's to right
-
- for (int i = 0; i < 5; i++) {
- mp = mk ^ (mk << 1); // parallel suffix
- mp = mp ^ (mp << 2);
- mp = mp ^ (mp << 4);
- mp = mp ^ (mp << 8);
- mp = mp ^ (mp << 16);
- mv = mp & m; // bits to move
- array[i] = mv;
- m = (m ^ mv) | (mv >> (1 << i)); // compress m
- mk = mk & ~mp;
- }
-
- for (int i = 4; i >= 0; i--) {
- mv = array[i];
- t = x << (1 << i);
- x = (x & ~mv) | (t & mv);
- }
-
- return x & m0; // clear out extraneous bits
-#endif
-}
-
-static really_inline
-u64a expand64(u64a x, u64a m) {
+ // BMI2 has a single instruction for this operation.
+ return _pdep_u32(x, m);
+#else
+
+ // Return zero quickly on trivial cases
+ if (!x || !m) {
+ return 0;
+ }
+
+ u32 m0, mk, mp, mv, t;
+ u32 array[5];
+
+ m0 = m; // save original mask
+ mk = ~m << 1; // we will count 0's to right
+
+ for (int i = 0; i < 5; i++) {
+ mp = mk ^ (mk << 1); // parallel suffix
+ mp = mp ^ (mp << 2);
+ mp = mp ^ (mp << 4);
+ mp = mp ^ (mp << 8);
+ mp = mp ^ (mp << 16);
+ mv = mp & m; // bits to move
+ array[i] = mv;
+ m = (m ^ mv) | (mv >> (1 << i)); // compress m
+ mk = mk & ~mp;
+ }
+
+ for (int i = 4; i >= 0; i--) {
+ mv = array[i];
+ t = x << (1 << i);
+ x = (x & ~mv) | (t & mv);
+ }
+
+ return x & m0; // clear out extraneous bits
+#endif
+}
+
+static really_inline
+u64a expand64(u64a x, u64a m) {
#if defined(ARCH_X86_64) && defined(HAVE_BMI2)
- // BMI2 has a single instruction for this operation.
- return _pdep_u64(x, m);
-#else
-
- // Return zero quickly on trivial cases
- if (!x || !m) {
- return 0;
- }
-
- u64a m0, mk, mp, mv, t;
- u64a array[6];
-
- m0 = m; // save original mask
- mk = ~m << 1; // we will count 0's to right
-
- for (int i = 0; i < 6; i++) {
- mp = mk ^ (mk << 1); // parallel suffix
- mp = mp ^ (mp << 2);
- mp = mp ^ (mp << 4);
- mp = mp ^ (mp << 8);
- mp = mp ^ (mp << 16);
- mp = mp ^ (mp << 32);
- mv = mp & m; // bits to move
- array[i] = mv;
- m = (m ^ mv) | (mv >> (1 << i)); // compress m
- mk = mk & ~mp;
- }
-
- for (int i = 5; i >= 0; i--) {
- mv = array[i];
- t = x << (1 << i);
- x = (x & ~mv) | (t & mv);
- }
-
- return x & m0; // clear out extraneous bits
-#endif
-}
-
-
-/* returns the first set bit after begin (if not ~0U). If no bit is set after
- * begin returns ~0U
- */
-static really_inline
-u32 bf64_iterate(u64a bitfield, u32 begin) {
- if (begin != ~0U) {
- /* switch off all bits at or below begin. Note: not legal to shift by
- * by size of the datatype or larger. */
- assert(begin <= 63);
- bitfield &= ~((2ULL << begin) - 1);
- }
-
- if (!bitfield) {
- return ~0U;
- }
-
- return ctz64(bitfield);
-}
-
-static really_inline
-char bf64_set(u64a *bitfield, u32 i) {
- assert(i < 64);
- u64a mask = 1ULL << i;
- char was_set = !!(*bitfield & mask);
- *bitfield |= mask;
-
- return was_set;
-}
-
-static really_inline
-void bf64_unset(u64a *bitfield, u32 i) {
- assert(i < 64);
- *bitfield &= ~(1ULL << i);
-}
-
+ // BMI2 has a single instruction for this operation.
+ return _pdep_u64(x, m);
+#else
+
+ // Return zero quickly on trivial cases
+ if (!x || !m) {
+ return 0;
+ }
+
+ u64a m0, mk, mp, mv, t;
+ u64a array[6];
+
+ m0 = m; // save original mask
+ mk = ~m << 1; // we will count 0's to right
+
+ for (int i = 0; i < 6; i++) {
+ mp = mk ^ (mk << 1); // parallel suffix
+ mp = mp ^ (mp << 2);
+ mp = mp ^ (mp << 4);
+ mp = mp ^ (mp << 8);
+ mp = mp ^ (mp << 16);
+ mp = mp ^ (mp << 32);
+ mv = mp & m; // bits to move
+ array[i] = mv;
+ m = (m ^ mv) | (mv >> (1 << i)); // compress m
+ mk = mk & ~mp;
+ }
+
+ for (int i = 5; i >= 0; i--) {
+ mv = array[i];
+ t = x << (1 << i);
+ x = (x & ~mv) | (t & mv);
+ }
+
+ return x & m0; // clear out extraneous bits
+#endif
+}
+
+
+/* returns the first set bit after begin (if not ~0U). If no bit is set after
+ * begin returns ~0U
+ */
+static really_inline
+u32 bf64_iterate(u64a bitfield, u32 begin) {
+ if (begin != ~0U) {
+ /* switch off all bits at or below begin. Note: not legal to shift by
+ * by size of the datatype or larger. */
+ assert(begin <= 63);
+ bitfield &= ~((2ULL << begin) - 1);
+ }
+
+ if (!bitfield) {
+ return ~0U;
+ }
+
+ return ctz64(bitfield);
+}
+
+static really_inline
+char bf64_set(u64a *bitfield, u32 i) {
+ assert(i < 64);
+ u64a mask = 1ULL << i;
+ char was_set = !!(*bitfield & mask);
+ *bitfield |= mask;
+
+ return was_set;
+}
+
+static really_inline
+void bf64_unset(u64a *bitfield, u32 i) {
+ assert(i < 64);
+ *bitfield &= ~(1ULL << i);
+}
+
static really_inline
u32 rank_in_mask32(u32 mask, u32 bit) {
assert(bit < sizeof(u32) * 8);
@@ -489,4 +489,4 @@ u64a pdep64(u64a x, u64a mask) {
}
#endif
-#endif // BITUTILS_H
+#endif // BITUTILS_H
diff --git a/contrib/libs/hyperscan/src/util/boundary_reports.h b/contrib/libs/hyperscan/src/util/boundary_reports.h
index 61ce42b043..b2bb1c9b0a 100644
--- a/contrib/libs/hyperscan/src/util/boundary_reports.h
+++ b/contrib/libs/hyperscan/src/util/boundary_reports.h
@@ -1,51 +1,51 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef BOUNDARY_REPORTS_H
-#define BOUNDARY_REPORTS_H
-
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef BOUNDARY_REPORTS_H
+#define BOUNDARY_REPORTS_H
+
+#include "ue2common.h"
#include "util/noncopyable.h"
-
-#include <set>
-
-namespace ue2 {
-
+
+#include <set>
+
+namespace ue2 {
+
struct BoundaryReports : noncopyable {
- std::set<ReportID> report_at_0; /* set of internal reports to fire
- * unconditionally at offset 0 */
- std::set<ReportID> report_at_0_eod; /* set of internal reports to fire
- * unconditionally at offset 0 if it is
- * eod */
- std::set<ReportID> report_at_eod; /* set of internal reports to fire
- * unconditionally at eod */
-};
-
-} // namespace ue2
-
-#endif
+ std::set<ReportID> report_at_0; /* set of internal reports to fire
+ * unconditionally at offset 0 */
+ std::set<ReportID> report_at_0_eod; /* set of internal reports to fire
+ * unconditionally at offset 0 if it is
+ * eod */
+ std::set<ReportID> report_at_eod; /* set of internal reports to fire
+ * unconditionally at eod */
+};
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/charreach.cpp b/contrib/libs/hyperscan/src/util/charreach.cpp
index 28379c2759..9116b719db 100644
--- a/contrib/libs/hyperscan/src/util/charreach.cpp
+++ b/contrib/libs/hyperscan/src/util/charreach.cpp
@@ -1,149 +1,149 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Class for representing character reachability.
- *
- * This is a simple (but hopefully fast) class for representing 8-bit character
- * reachability, along with a bunch of useful operations.
- */
-#include "ue2common.h"
-#include "charreach.h"
-#include "charreach_util.h"
-#include "compare.h"
-#include "unicode_def.h"
-
-#include <cassert>
-#include <string>
-
-namespace ue2 {
-
-/// Switch on the bits corresponding to the characters in \a s.
-void CharReach::set(const std::string &s) {
- for (const auto &c : s) {
- set(c);
- }
-}
-
-/// Do we only contain bits representing alpha characters?
-bool CharReach::isAlpha() const {
- if (none()) {
- return false;
- }
- for (size_t i = find_first(); i != npos; i = find_next(i)) {
- if (!ourisalpha((char)i)) {
- return false;
- }
- }
- return true;
-}
-
-/// Do we represent an uppercase/lowercase pair?
-bool CharReach::isCaselessChar() const {
- if (count() != 2) {
- return false;
- }
- size_t first = find_first();
- size_t second = find_next(first);
- assert(first != npos && second != npos);
- return (char)first == mytoupper((char)second);
-}
-
-/// Do we represent a cheapskate caseless set?
-bool CharReach::isBit5Insensitive() const {
- for (size_t i = find_first(); i != npos; i = find_next(i)) {
- if (!test((char)i ^ 0x20)) {
- return false;
- }
- }
- return true;
-}
-
-/// Return a string containing the characters that are switched on.
-std::string CharReach::to_string() const {
- std::string s;
- for (size_t i = find_first(); i != npos; i = find_next(i)) {
- s += (char)i;
- }
- return s;
-}
-
-/** \brief True iff there is a non-empty intersection between \a and \a b */
-bool overlaps(const CharReach &a, const CharReach &b) {
- return (a & b).any();
-}
-
-/** \brief True iff \a small is a subset of \a big. */
-bool isSubsetOf(const CharReach &small, const CharReach &big) {
- return small.isSubsetOf(big);
-}
-
-/// True if this character class is a subset of \a other.
-bool CharReach::isSubsetOf(const CharReach &other) const {
- return (bits & other.bits) == bits;
-}
-
-void make_caseless(CharReach *cr) {
- for (char c = 'A'; c <= 'Z'; c++) {
- if (cr->test(c) || cr->test(mytolower(c))) {
- cr->set(c);
- cr->set(mytolower(c));
- }
- }
-}
-
-bool isutf8ascii(const CharReach &cr) {
- return (cr & ~CharReach(0x0, 0x7f)).none();
-}
-
-bool isutf8start(const CharReach &cr) {
- return (cr & CharReach(0x0, UTF_CONT_MAX)).none();
-}
-
-void fill_bitvector(const CharReach &cr, u8 *bits) {
- assert(bits);
- std::fill_n(bits, 32, 0);
- for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
- bits[i / 8U] |= (u8)1U << (i % 8U);
- }
-}
-
-void make_and_cmp_mask(const CharReach &cr, u8 *and_mask, u8 *cmp_mask) {
- u8 lo = 0xff;
- u8 hi = 0;
-
- for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
- hi |= (u8)c;
- lo &= (u8)c;
- }
-
- *and_mask = ~(lo ^ hi);
- *cmp_mask = lo;
-}
-
-} // namespace ue2
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Class for representing character reachability.
+ *
+ * This is a simple (but hopefully fast) class for representing 8-bit character
+ * reachability, along with a bunch of useful operations.
+ */
+#include "ue2common.h"
+#include "charreach.h"
+#include "charreach_util.h"
+#include "compare.h"
+#include "unicode_def.h"
+
+#include <cassert>
+#include <string>
+
+namespace ue2 {
+
+/// Switch on the bits corresponding to the characters in \a s.
+void CharReach::set(const std::string &s) {
+ for (const auto &c : s) {
+ set(c);
+ }
+}
+
+/// Do we only contain bits representing alpha characters?
+bool CharReach::isAlpha() const {
+ if (none()) {
+ return false;
+ }
+ for (size_t i = find_first(); i != npos; i = find_next(i)) {
+ if (!ourisalpha((char)i)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/// Do we represent an uppercase/lowercase pair?
+bool CharReach::isCaselessChar() const {
+ if (count() != 2) {
+ return false;
+ }
+ size_t first = find_first();
+ size_t second = find_next(first);
+ assert(first != npos && second != npos);
+ return (char)first == mytoupper((char)second);
+}
+
+/// Do we represent a cheapskate caseless set?
+bool CharReach::isBit5Insensitive() const {
+ for (size_t i = find_first(); i != npos; i = find_next(i)) {
+ if (!test((char)i ^ 0x20)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/// Return a string containing the characters that are switched on.
+std::string CharReach::to_string() const {
+ std::string s;
+ for (size_t i = find_first(); i != npos; i = find_next(i)) {
+ s += (char)i;
+ }
+ return s;
+}
+
+/** \brief True iff there is a non-empty intersection between \a and \a b */
+bool overlaps(const CharReach &a, const CharReach &b) {
+ return (a & b).any();
+}
+
+/** \brief True iff \a small is a subset of \a big. */
+bool isSubsetOf(const CharReach &small, const CharReach &big) {
+ return small.isSubsetOf(big);
+}
+
+/// True if this character class is a subset of \a other.
+bool CharReach::isSubsetOf(const CharReach &other) const {
+ return (bits & other.bits) == bits;
+}
+
+void make_caseless(CharReach *cr) {
+ for (char c = 'A'; c <= 'Z'; c++) {
+ if (cr->test(c) || cr->test(mytolower(c))) {
+ cr->set(c);
+ cr->set(mytolower(c));
+ }
+ }
+}
+
+bool isutf8ascii(const CharReach &cr) {
+ return (cr & ~CharReach(0x0, 0x7f)).none();
+}
+
+bool isutf8start(const CharReach &cr) {
+ return (cr & CharReach(0x0, UTF_CONT_MAX)).none();
+}
+
+void fill_bitvector(const CharReach &cr, u8 *bits) {
+ assert(bits);
+ std::fill_n(bits, 32, 0);
+ for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
+ bits[i / 8U] |= (u8)1U << (i % 8U);
+ }
+}
+
+void make_and_cmp_mask(const CharReach &cr, u8 *and_mask, u8 *cmp_mask) {
+ u8 lo = 0xff;
+ u8 hi = 0;
+
+ for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
+ hi |= (u8)c;
+ lo &= (u8)c;
+ }
+
+ *and_mask = ~(lo ^ hi);
+ *cmp_mask = lo;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/util/charreach.h b/contrib/libs/hyperscan/src/util/charreach.h
index f25e940769..f6d3a2af3e 100644
--- a/contrib/libs/hyperscan/src/util/charreach.h
+++ b/contrib/libs/hyperscan/src/util/charreach.h
@@ -1,203 +1,203 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Class for representing character reachability.
- *
- * This is a simple (but hopefully fast) class for representing 8-bit character
- * reachability, along with a bunch of useful operations.
- */
-
-#ifndef NG_CHARREACH_H
-#define NG_CHARREACH_H
-
-#include "ue2common.h"
-#include "util/bitfield.h"
-
-#include <string>
-
-namespace ue2 {
-
-class CharReach {
-private:
- /// Underlying storage.
- ue2::bitfield<256> bits;
-
-public:
- static constexpr size_t npos = decltype(bits)::npos; //!< One past the max value.
-
- /// Empty constructor.
- CharReach() {}
-
- /// Constructor for a character class containing a single char.
- explicit CharReach(unsigned char c) { set(c); }
-
- /// Constructor for a character class representing a contiguous range of
- /// chars, inclusive.
- CharReach(unsigned char from, unsigned char to) { setRange(from, to); }
-
- /// Constructor for a character class based on the set of chars in a
- /// string.
- explicit CharReach(const std::string &str) { set(str); }
-
- /// Returns total capacity.
- static constexpr size_t size() { return npos; }
-
- /// Returns a CharReach with complete reachability (a "dot").
- static CharReach dot() { return CharReach(0, 255); }
-
- /// Complete bitset equality.
- bool operator==(const CharReach &a) const { return bits == a.bits; }
-
- /// Inequality.
- bool operator!=(const CharReach &a) const { return bits != a.bits; }
-
- /// Ordering.
- bool operator<(const CharReach &a) const { return bits < a.bits; }
-
- /// Set all bits.
- void setall() { bits.setall(); }
-
- /// Clear all bits.
- void clear() { bits.clear(); }
-
- /// Clear bit N.
- void clear(unsigned char n) { bits.clear(n); }
-
- /// Set bit N.
- void set(unsigned char n) { bits.set(n); }
-
- /// Test bit N.
- bool test(unsigned char n) const { return bits.test(n); }
-
- /// Flip bit N.
- void flip(unsigned char n) { bits.flip(n); }
-
- /// Flip all bits.
- void flip() { bits.flip(); }
-
- // Switch on the bit in the range (from, to), inclusive.
- void setRange(unsigned char from, unsigned char to) {
- bits.set_range(from, to);
- }
-
- // Switch on the bits corresponding to the characters in \a s.
- void set(const std::string &s);
-
- /// Returns number of bits set on.
- size_t count() const { return bits.count(); }
-
- /// Are no bits set?
- bool none() const { return bits.none(); }
-
- /// Is any bit set?
- bool any() const { return bits.any(); }
-
- /// Are all bits set?
- bool all() const { return bits.all(); }
-
- /// Returns first bit set, or CharReach::npos if none set.
- size_t find_first() const { return bits.find_first(); }
-
- /// Returns last bit set, or CharReach::npos if none set.
- size_t find_last() const { return bits.find_last(); }
-
- /// Returns next bit set, or CharReach::npos if none set after n.
- size_t find_next(size_t last) const { return bits.find_next(last); }
-
- /// Returns (zero-based) N'th bit set, or CharReach::npos if fewer than
- /// N + 1 bits are on.
- size_t find_nth(size_t n) const { return bits.find_nth(n); }
-
- /// Bitwise OR.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Class for representing character reachability.
+ *
+ * This is a simple (but hopefully fast) class for representing 8-bit character
+ * reachability, along with a bunch of useful operations.
+ */
+
+#ifndef NG_CHARREACH_H
+#define NG_CHARREACH_H
+
+#include "ue2common.h"
+#include "util/bitfield.h"
+
+#include <string>
+
+namespace ue2 {
+
+class CharReach {
+private:
+ /// Underlying storage.
+ ue2::bitfield<256> bits;
+
+public:
+ static constexpr size_t npos = decltype(bits)::npos; //!< One past the max value.
+
+ /// Empty constructor.
+ CharReach() {}
+
+ /// Constructor for a character class containing a single char.
+ explicit CharReach(unsigned char c) { set(c); }
+
+ /// Constructor for a character class representing a contiguous range of
+ /// chars, inclusive.
+ CharReach(unsigned char from, unsigned char to) { setRange(from, to); }
+
+ /// Constructor for a character class based on the set of chars in a
+ /// string.
+ explicit CharReach(const std::string &str) { set(str); }
+
+ /// Returns total capacity.
+ static constexpr size_t size() { return npos; }
+
+ /// Returns a CharReach with complete reachability (a "dot").
+ static CharReach dot() { return CharReach(0, 255); }
+
+ /// Complete bitset equality.
+ bool operator==(const CharReach &a) const { return bits == a.bits; }
+
+ /// Inequality.
+ bool operator!=(const CharReach &a) const { return bits != a.bits; }
+
+ /// Ordering.
+ bool operator<(const CharReach &a) const { return bits < a.bits; }
+
+ /// Set all bits.
+ void setall() { bits.setall(); }
+
+ /// Clear all bits.
+ void clear() { bits.clear(); }
+
+ /// Clear bit N.
+ void clear(unsigned char n) { bits.clear(n); }
+
+ /// Set bit N.
+ void set(unsigned char n) { bits.set(n); }
+
+ /// Test bit N.
+ bool test(unsigned char n) const { return bits.test(n); }
+
+ /// Flip bit N.
+ void flip(unsigned char n) { bits.flip(n); }
+
+ /// Flip all bits.
+ void flip() { bits.flip(); }
+
+ // Switch on the bit in the range (from, to), inclusive.
+ void setRange(unsigned char from, unsigned char to) {
+ bits.set_range(from, to);
+ }
+
+ // Switch on the bits corresponding to the characters in \a s.
+ void set(const std::string &s);
+
+ /// Returns number of bits set on.
+ size_t count() const { return bits.count(); }
+
+ /// Are no bits set?
+ bool none() const { return bits.none(); }
+
+ /// Is any bit set?
+ bool any() const { return bits.any(); }
+
+ /// Are all bits set?
+ bool all() const { return bits.all(); }
+
+ /// Returns first bit set, or CharReach::npos if none set.
+ size_t find_first() const { return bits.find_first(); }
+
+ /// Returns last bit set, or CharReach::npos if none set.
+ size_t find_last() const { return bits.find_last(); }
+
+ /// Returns next bit set, or CharReach::npos if none set after n.
+ size_t find_next(size_t last) const { return bits.find_next(last); }
+
+ /// Returns (zero-based) N'th bit set, or CharReach::npos if fewer than
+ /// N + 1 bits are on.
+ size_t find_nth(size_t n) const { return bits.find_nth(n); }
+
+ /// Bitwise OR.
CharReach operator|(const CharReach &a) const {
CharReach cr(*this);
cr.bits |= a.bits;
return cr;
}
-
- /// Bitwise OR-equals.
+
+ /// Bitwise OR-equals.
void operator|=(const CharReach &a) { bits |= a.bits; }
-
- /// Bitwise AND.
+
+ /// Bitwise AND.
CharReach operator&(const CharReach &a) const {
CharReach cr(*this);
cr.bits &= a.bits;
return cr;
}
-
- /// Bitwise AND-equals.
+
+ /// Bitwise AND-equals.
void operator&=(const CharReach &a) { bits &= a.bits; }
-
- /// Bitwise XOR.
+
+ /// Bitwise XOR.
CharReach operator^(const CharReach &a) const {
CharReach cr(*this);
cr.bits ^= a.bits;
return cr;
}
-
- /// Bitwise complement.
+
+ /// Bitwise complement.
CharReach operator~(void) const {
CharReach cr(*this);
cr.flip();
return cr;
}
-
- /// Do we only contain bits representing alpha characters?
- bool isAlpha() const;
-
- /// Do we represent an uppercase/lowercase pair?
- bool isCaselessChar() const;
-
- /// Do we represent a cheapskate caseless set?
- bool isBit5Insensitive() const;
-
- /// Return a string containing the characters that are switched on.
- std::string to_string() const;
-
- /// Hash of enabled bits.
- size_t hash() const { return bits.hash(); }
-
- /// True if this character class is a subset of \a other.
- bool isSubsetOf(const CharReach &other) const;
-};
-
-/** \brief True iff there is a non-empty intersection between \a and \a b */
-bool overlaps(const CharReach &a, const CharReach &b);
-
-/** \brief True iff \a small is a subset of \a big. */
-bool isSubsetOf(const CharReach &small, const CharReach &big);
-
-bool isutf8ascii(const CharReach &cr);
-bool isutf8start(const CharReach &cr);
-
-} // namespace ue2
-
+
+ /// Do we only contain bits representing alpha characters?
+ bool isAlpha() const;
+
+ /// Do we represent an uppercase/lowercase pair?
+ bool isCaselessChar() const;
+
+ /// Do we represent a cheapskate caseless set?
+ bool isBit5Insensitive() const;
+
+ /// Return a string containing the characters that are switched on.
+ std::string to_string() const;
+
+ /// Hash of enabled bits.
+ size_t hash() const { return bits.hash(); }
+
+ /// True if this character class is a subset of \a other.
+ bool isSubsetOf(const CharReach &other) const;
+};
+
+/** \brief True iff there is a non-empty intersection between \a and \a b */
+bool overlaps(const CharReach &a, const CharReach &b);
+
+/** \brief True iff \a small is a subset of \a big. */
+bool isSubsetOf(const CharReach &small, const CharReach &big);
+
+bool isutf8ascii(const CharReach &cr);
+bool isutf8start(const CharReach &cr);
+
+} // namespace ue2
+
namespace std {
template<>
@@ -209,4 +209,4 @@ struct hash<ue2::CharReach> {
} // namespace std
-#endif // NG_CHARREACH_H
+#endif // NG_CHARREACH_H
diff --git a/contrib/libs/hyperscan/src/util/charreach_util.h b/contrib/libs/hyperscan/src/util/charreach_util.h
index 0f2fbbb70c..f0dc4227b0 100644
--- a/contrib/libs/hyperscan/src/util/charreach_util.h
+++ b/contrib/libs/hyperscan/src/util/charreach_util.h
@@ -1,57 +1,57 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef CHARREACH_UTIL_H
-#define CHARREACH_UTIL_H
-
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CHARREACH_UTIL_H
+#define CHARREACH_UTIL_H
+
#include "ue2common.h"
-
-namespace ue2 {
-
+
+namespace ue2 {
+
class CharReach;
-
-void make_caseless(CharReach *cr);
-
-/**
- * \brief Fill a bitvector with the contents of the given CharReach.
- *
- * \a bits should point at an array of 32 bytes.
- */
-void fill_bitvector(const CharReach &cr, u8 *bits);
-
-/**
- * \brief Generate and and compare masks for checking the char reach.
- *
- * Any character c in cr will be result in (c & and_mask) == cmp_mask being true.
- * Note: characters not in cr may also pass the and/cmp checks.
- */
-void make_and_cmp_mask(const CharReach &cr, u8 *and_mask, u8 *cmp_mask);
-
-} // namespace ue2
-
-#endif
+
+void make_caseless(CharReach *cr);
+
+/**
+ * \brief Fill a bitvector with the contents of the given CharReach.
+ *
+ * \a bits should point at an array of 32 bytes.
+ */
+void fill_bitvector(const CharReach &cr, u8 *bits);
+
+/**
+ * \brief Generate and and compare masks for checking the char reach.
+ *
+ * Any character c in cr will be result in (c & and_mask) == cmp_mask being true.
+ * Note: characters not in cr may also pass the and/cmp checks.
+ */
+void make_and_cmp_mask(const CharReach &cr, u8 *and_mask, u8 *cmp_mask);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/compare.h b/contrib/libs/hyperscan/src/util/compare.h
index 8717e5c1ee..eaa717a4c2 100644
--- a/contrib/libs/hyperscan/src/util/compare.h
+++ b/contrib/libs/hyperscan/src/util/compare.h
@@ -1,183 +1,183 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef COMPARE_H
-#define COMPARE_H
-
-#include "unaligned.h"
-#include "ue2common.h"
-
-/* Our own definitions of tolower, toupper and isalpha are provided to prevent
- * us from going out to libc for these tests. */
-
-static really_inline
-char myisupper(const char c) {
- return ((c >= 'A') && (c <= 'Z'));
-}
-
-static really_inline
-char myislower(const char c) {
- return ((c >= 'a') && (c <= 'z'));
-}
-
-static really_inline
-char mytolower(const char c) {
- if (myisupper(c)) {
- return c + 0x20;
- }
- return c;
-}
-
-static really_inline
-char mytoupper(const char c) {
- if (myislower(c)) {
- return c - 0x20;
- }
- return c;
-}
-
-/* this is a slightly warped definition of `alpha'. What we really
- * mean is: does this character have different uppercase and lowercase forms?
- */
-static really_inline char ourisalpha(const char c) {
- return mytolower(c) != mytoupper(c);
-}
-
-static really_inline char ourisprint(const char c) {
- return c >= 0x20 && c <= 0x7e;
-}
-
-// Paul Hsieh's SWAR toupper; used because it doesn't
-// matter whether we go toupper or tolower. We should
-// probably change the other one
-static really_inline
-u32 theirtoupper32(const u32 x) {
- u32 b = 0x80808080ul | x;
- u32 c = b - 0x61616161ul;
- u32 d = ~(b - 0x7b7b7b7bul);
- u32 e = (c & d) & (~x & 0x80808080ul);
- return x - (e >> 2);
-}
-
-// 64-bit variant.
-static really_inline
-u64a theirtoupper64(const u64a x) {
- u64a b = 0x8080808080808080ull | x;
- u64a c = b - 0x6161616161616161ull;
- u64a d = ~(b - 0x7b7b7b7b7b7b7b7bull);
- u64a e = (c & d) & (~x & 0x8080808080808080ull);
- u64a v = x - (e >> 2);
- return v;
-}
-
-static really_inline
-int cmpNocaseNaive(const u8 *p1, const u8 *p2, size_t len) {
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef COMPARE_H
+#define COMPARE_H
+
+#include "unaligned.h"
+#include "ue2common.h"
+
+/* Our own definitions of tolower, toupper and isalpha are provided to prevent
+ * us from going out to libc for these tests. */
+
+static really_inline
+char myisupper(const char c) {
+ return ((c >= 'A') && (c <= 'Z'));
+}
+
+static really_inline
+char myislower(const char c) {
+ return ((c >= 'a') && (c <= 'z'));
+}
+
+static really_inline
+char mytolower(const char c) {
+ if (myisupper(c)) {
+ return c + 0x20;
+ }
+ return c;
+}
+
+static really_inline
+char mytoupper(const char c) {
+ if (myislower(c)) {
+ return c - 0x20;
+ }
+ return c;
+}
+
+/* this is a slightly warped definition of `alpha'. What we really
+ * mean is: does this character have different uppercase and lowercase forms?
+ */
+static really_inline char ourisalpha(const char c) {
+ return mytolower(c) != mytoupper(c);
+}
+
+static really_inline char ourisprint(const char c) {
+ return c >= 0x20 && c <= 0x7e;
+}
+
+// Paul Hsieh's SWAR toupper; used because it doesn't
+// matter whether we go toupper or tolower. We should
+// probably change the other one
+static really_inline
+u32 theirtoupper32(const u32 x) {
+ u32 b = 0x80808080ul | x;
+ u32 c = b - 0x61616161ul;
+ u32 d = ~(b - 0x7b7b7b7bul);
+ u32 e = (c & d) & (~x & 0x80808080ul);
+ return x - (e >> 2);
+}
+
+// 64-bit variant.
+static really_inline
+u64a theirtoupper64(const u64a x) {
+ u64a b = 0x8080808080808080ull | x;
+ u64a c = b - 0x6161616161616161ull;
+ u64a d = ~(b - 0x7b7b7b7b7b7b7b7bull);
+ u64a e = (c & d) & (~x & 0x8080808080808080ull);
+ u64a v = x - (e >> 2);
+ return v;
+}
+
+static really_inline
+int cmpNocaseNaive(const u8 *p1, const u8 *p2, size_t len) {
const u8 *pEnd = p1 + len;
- for (; p1 < pEnd; p1++, p2++) {
+ for (; p1 < pEnd; p1++, p2++) {
assert(!ourisalpha(*p2) || myisupper(*p2)); // Already upper-case.
if ((u8)mytoupper(*p1) != *p2) {
- return 1;
- }
- }
- return 0;
-}
-
-static really_inline
-int cmpCaseNaive(const u8 *p1, const u8 *p2, size_t len) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static really_inline
+int cmpCaseNaive(const u8 *p1, const u8 *p2, size_t len) {
const u8 *pEnd = p1 + len;
- for (; p1 < pEnd; p1++, p2++) {
- if (*p1 != *p2) {
- return 1;
- }
- }
- return 0;
-}
-
-#ifdef ARCH_64_BIT
-# define CMP_T u64a
-# define ULOAD(x) unaligned_load_u64a(x)
-# define TOUPPER(x) theirtoupper64(x)
-#else
-# define CMP_T u32
-# define ULOAD(x) unaligned_load_u32(x)
-# define TOUPPER(x) theirtoupper32(x)
-#endif
-
-#define CMP_SIZE sizeof(CMP_T)
-
+ for (; p1 < pEnd; p1++, p2++) {
+ if (*p1 != *p2) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+#ifdef ARCH_64_BIT
+# define CMP_T u64a
+# define ULOAD(x) unaligned_load_u64a(x)
+# define TOUPPER(x) theirtoupper64(x)
+#else
+# define CMP_T u32
+# define ULOAD(x) unaligned_load_u32(x)
+# define TOUPPER(x) theirtoupper32(x)
+#endif
+
+#define CMP_SIZE sizeof(CMP_T)
+
/**
* \brief Compare two strings, optionally caselessly.
*
* Note: If nocase is true, p2 is assumed to be already upper-case.
*/
-#if defined(ARCH_IA32)
-static UNUSED never_inline
-#else
-static really_inline
-#endif
-int cmpForward(const u8 *p1, const u8 *p2, size_t len, char nocase) {
- if (len < CMP_SIZE) {
- return nocase ? cmpNocaseNaive(p1, p2, len)
- : cmpCaseNaive(p1, p2, len);
- }
-
- const u8 *p1_end = p1 + len - CMP_SIZE;
- const u8 *p2_end = p2 + len - CMP_SIZE;
-
- if (nocase) { // Case-insensitive version.
- for (; p1 < p1_end; p1 += CMP_SIZE, p2 += CMP_SIZE) {
+#if defined(ARCH_IA32)
+static UNUSED never_inline
+#else
+static really_inline
+#endif
+int cmpForward(const u8 *p1, const u8 *p2, size_t len, char nocase) {
+ if (len < CMP_SIZE) {
+ return nocase ? cmpNocaseNaive(p1, p2, len)
+ : cmpCaseNaive(p1, p2, len);
+ }
+
+ const u8 *p1_end = p1 + len - CMP_SIZE;
+ const u8 *p2_end = p2 + len - CMP_SIZE;
+
+ if (nocase) { // Case-insensitive version.
+ for (; p1 < p1_end; p1 += CMP_SIZE, p2 += CMP_SIZE) {
assert(ULOAD(p2) == TOUPPER(ULOAD(p2))); // Already upper-case.
if (TOUPPER(ULOAD(p1)) != ULOAD(p2)) {
- return 1;
- }
- }
+ return 1;
+ }
+ }
assert(ULOAD(p2_end) == TOUPPER(ULOAD(p2_end))); // Already upper-case.
if (TOUPPER(ULOAD(p1_end)) != ULOAD(p2_end)) {
- return 1;
- }
- } else { // Case-sensitive version.
- for (; p1 < p1_end; p1 += CMP_SIZE, p2 += CMP_SIZE) {
- if (ULOAD(p1) != ULOAD(p2)) {
- return 1;
- }
- }
- if (ULOAD(p1_end) != ULOAD(p2_end)) {
- return 1;
- }
- }
-
- return 0;
-}
-
-#undef CMP_T
-#undef ULOAD
-#undef TOUPPER
-#undef CMP_SIZE
-
-#endif
-
+ return 1;
+ }
+ } else { // Case-sensitive version.
+ for (; p1 < p1_end; p1 += CMP_SIZE, p2 += CMP_SIZE) {
+ if (ULOAD(p1) != ULOAD(p2)) {
+ return 1;
+ }
+ }
+ if (ULOAD(p1_end) != ULOAD(p2_end)) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+#undef CMP_T
+#undef ULOAD
+#undef TOUPPER
+#undef CMP_SIZE
+
+#endif
+
diff --git a/contrib/libs/hyperscan/src/util/compile_context.cpp b/contrib/libs/hyperscan/src/util/compile_context.cpp
index b8a957db7c..d18f645389 100644
--- a/contrib/libs/hyperscan/src/util/compile_context.cpp
+++ b/contrib/libs/hyperscan/src/util/compile_context.cpp
@@ -1,46 +1,46 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Global compile context, describes compile environment.
- */
-#include "compile_context.h"
-#include "grey.h"
-
-namespace ue2 {
-
-CompileContext::CompileContext(bool in_isStreaming, bool in_isVectored,
- const target_t &in_target_info,
- const Grey &in_grey)
- : streaming(in_isStreaming || in_isVectored),
- vectored(in_isVectored),
- target_info(in_target_info),
- grey(in_grey) {
-}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Global compile context, describes compile environment.
+ */
+#include "compile_context.h"
+#include "grey.h"
+
+namespace ue2 {
+
+CompileContext::CompileContext(bool in_isStreaming, bool in_isVectored,
+ const target_t &in_target_info,
+ const Grey &in_grey)
+ : streaming(in_isStreaming || in_isVectored),
+ vectored(in_isVectored),
+ target_info(in_target_info),
+ grey(in_grey) {
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/util/compile_context.h b/contrib/libs/hyperscan/src/util/compile_context.h
index 211f95a411..481b1d3760 100644
--- a/contrib/libs/hyperscan/src/util/compile_context.h
+++ b/contrib/libs/hyperscan/src/util/compile_context.h
@@ -1,59 +1,59 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Global compile context, describes compile environment.
- */
-
-#ifndef COMPILE_CONTEXT_H
-#define COMPILE_CONTEXT_H
-
-#include "target_info.h"
-#include "grey.h"
-
-namespace ue2 {
-
-/** \brief Structure for describing the compile environment: grey box settings,
- * target arch, mode flags, etc. */
-struct CompileContext {
- CompileContext(bool isStreaming, bool isVectored,
- const target_t &target_info, const Grey &grey);
-
- const bool streaming; /* streaming or vectored mode */
- const bool vectored;
-
- /** \brief Target platform info. */
- const target_t target_info;
-
- /** \brief Greybox structure, allows tuning of all sorts of behaviour. */
- const Grey grey;
-};
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Global compile context, describes compile environment.
+ */
+
+#ifndef COMPILE_CONTEXT_H
+#define COMPILE_CONTEXT_H
+
+#include "target_info.h"
+#include "grey.h"
+
+namespace ue2 {
+
+/** \brief Structure for describing the compile environment: grey box settings,
+ * target arch, mode flags, etc. */
+struct CompileContext {
+ CompileContext(bool isStreaming, bool isVectored,
+ const target_t &target_info, const Grey &grey);
+
+ const bool streaming; /* streaming or vectored mode */
+ const bool vectored;
+
+ /** \brief Target platform info. */
+ const target_t target_info;
+
+ /** \brief Greybox structure, allows tuning of all sorts of behaviour. */
+ const Grey grey;
+};
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/compile_error.cpp b/contrib/libs/hyperscan/src/util/compile_error.cpp
index 8a916837c5..6519c1bb2f 100644
--- a/contrib/libs/hyperscan/src/util/compile_error.cpp
+++ b/contrib/libs/hyperscan/src/util/compile_error.cpp
@@ -1,59 +1,59 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "compile_error.h"
-
-using namespace std;
-
-namespace ue2 {
-
-CompileError::CompileError(const string &why)
- : reason(why), hasIndex(false), index(0) {
- assert(!why.empty());
- assert(*why.rbegin() == '.');
-}
-
-CompileError::CompileError(unsigned int idx, const string &why)
- : reason(why), hasIndex(true), index(idx) {
- assert(!why.empty());
- assert(*why.rbegin() == '.');
-}
-
-void CompileError::setExpressionIndex(u32 expr_index) {
- hasIndex = true;
- index = expr_index;
-}
-
-CompileError::~CompileError() {}
-
-ResourceLimitError::ResourceLimitError()
- : CompileError("Resource limit exceeded.") {}
-
-ResourceLimitError::~ResourceLimitError() {}
-
-} // namespace ue2
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "compile_error.h"
+
+using namespace std;
+
+namespace ue2 {
+
+CompileError::CompileError(const string &why)
+ : reason(why), hasIndex(false), index(0) {
+ assert(!why.empty());
+ assert(*why.rbegin() == '.');
+}
+
+CompileError::CompileError(unsigned int idx, const string &why)
+ : reason(why), hasIndex(true), index(idx) {
+ assert(!why.empty());
+ assert(*why.rbegin() == '.');
+}
+
+void CompileError::setExpressionIndex(u32 expr_index) {
+ hasIndex = true;
+ index = expr_index;
+}
+
+CompileError::~CompileError() {}
+
+ResourceLimitError::ResourceLimitError()
+ : CompileError("Resource limit exceeded.") {}
+
+ResourceLimitError::~ResourceLimitError() {}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/util/compile_error.h b/contrib/libs/hyperscan/src/util/compile_error.h
index 87e156a5f9..a38220526b 100644
--- a/contrib/libs/hyperscan/src/util/compile_error.h
+++ b/contrib/libs/hyperscan/src/util/compile_error.h
@@ -1,68 +1,68 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef UTIL_COMPILE_ERROR_H
-#define UTIL_COMPILE_ERROR_H
-
-#include <cassert>
-#include <stdexcept>
-#include <string>
-
-#include "ue2common.h"
-
-namespace ue2 {
-
-/** \brief Error thrown by the compiler, can reference a specific expression
- * index. */
-class CompileError {
-public:
- // Note: 'why' should describe why the error occurred and end with a
- // full stop, but no line break.
- explicit CompileError(const std::string &why);
- CompileError(u32 index, const std::string &why);
-
- virtual ~CompileError();
-
- void setExpressionIndex(u32 index);
-
- std::string reason; //!< Reason for the error
- bool hasIndex; //!< Does it reference a specific expression?
- u32 index; //!< The index of the expression referred to
-};
-
-/** \brief Error thrown by the compiler when an arbitrary resource limit (as
- * specified in the grey box) is exceeded. */
-class ResourceLimitError : public CompileError {
-public:
- ResourceLimitError();
- ~ResourceLimitError() override;
-};
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef UTIL_COMPILE_ERROR_H
+#define UTIL_COMPILE_ERROR_H
+
+#include <cassert>
+#include <stdexcept>
+#include <string>
+
+#include "ue2common.h"
+
+namespace ue2 {
+
+/** \brief Error thrown by the compiler, can reference a specific expression
+ * index. */
+class CompileError {
+public:
+ // Note: 'why' should describe why the error occurred and end with a
+ // full stop, but no line break.
+ explicit CompileError(const std::string &why);
+ CompileError(u32 index, const std::string &why);
+
+ virtual ~CompileError();
+
+ void setExpressionIndex(u32 index);
+
+ std::string reason; //!< Reason for the error
+ bool hasIndex; //!< Does it reference a specific expression?
+ u32 index; //!< The index of the expression referred to
+};
+
+/** \brief Error thrown by the compiler when an arbitrary resource limit (as
+ * specified in the grey box) is exceeded. */
+class ResourceLimitError : public CompileError {
+public:
+ ResourceLimitError();
+ ~ResourceLimitError() override;
+};
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/container.h b/contrib/libs/hyperscan/src/util/container.h
index 83aa318b6c..68f60e99ee 100644
--- a/contrib/libs/hyperscan/src/util/container.h
+++ b/contrib/libs/hyperscan/src/util/container.h
@@ -1,94 +1,94 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Convenience template functions for containers.
- */
-
-#ifndef UTIL_CONTAINER_H
-#define UTIL_CONTAINER_H
-
-#include "ue2common.h"
-
-#include <algorithm>
-#include <cassert>
-#include <cstring>
-#include <set>
-#include <type_traits>
-#include <utility>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Convenience template functions for containers.
+ */
+
+#ifndef UTIL_CONTAINER_H
+#define UTIL_CONTAINER_H
+
+#include "ue2common.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cstring>
+#include <set>
+#include <type_traits>
+#include <utility>
#include <vector>
-
-namespace ue2 {
-
-// Existence check for associative containers.
-template<typename C>
-bool contains(const C &container, const typename C::key_type &key) {
- return container.find(key) != container.end();
-}
-
-template<typename C, typename It>
-bool contains_any_of(const C &container, const std::pair<It, It> &range) {
- return std::find_first_of(range.first, range.second, container.begin(),
- container.end()) != range.second;
-}
-
-template<typename C, typename It>
-void insert(C *container, const std::pair<It, It> &range) {
- container->insert(range.first, range.second);
-}
-
-template<typename C, typename It>
-void insert(C *container, typename C::iterator pos,
- const std::pair<It, It> &range) {
- container->insert(pos, range.first, range.second);
-}
-
-template<typename C, typename D>
-void insert(C *container, const D &donor) {
- container->insert(donor.begin(), donor.end());
-}
-
-template<typename C, typename D>
-void insert(C *container, typename C::iterator pos, const D &donor) {
- container->insert(pos, donor.begin(), donor.end());
-}
-
-/**
+
+namespace ue2 {
+
+// Existence check for associative containers.
+template<typename C>
+bool contains(const C &container, const typename C::key_type &key) {
+ return container.find(key) != container.end();
+}
+
+template<typename C, typename It>
+bool contains_any_of(const C &container, const std::pair<It, It> &range) {
+ return std::find_first_of(range.first, range.second, container.begin(),
+ container.end()) != range.second;
+}
+
+template<typename C, typename It>
+void insert(C *container, const std::pair<It, It> &range) {
+ container->insert(range.first, range.second);
+}
+
+template<typename C, typename It>
+void insert(C *container, typename C::iterator pos,
+ const std::pair<It, It> &range) {
+ container->insert(pos, range.first, range.second);
+}
+
+template<typename C, typename D>
+void insert(C *container, const D &donor) {
+ container->insert(donor.begin(), donor.end());
+}
+
+template<typename C, typename D>
+void insert(C *container, typename C::iterator pos, const D &donor) {
+ container->insert(pos, donor.begin(), donor.end());
+}
+
+/**
* \brief Constructs a vector from a range bounded by the given pair of
* iterators.
*/
-template <typename It>
-auto make_vector_from(const std::pair<It, It> &range)
- -> std::vector<decltype(*range.first)> {
- using T = decltype(*range.first);
- return std::vector<T>(range.first, range.second);
-}
-
+template <typename It>
+auto make_vector_from(const std::pair<It, It> &range)
+ -> std::vector<decltype(*range.first)> {
+ using T = decltype(*range.first);
+ return std::vector<T>(range.first, range.second);
+}
+
/** \brief Sort a sequence container and remove duplicates. */
template <typename C, typename Compare = std::less<typename C::value_type>>
void sort_and_unique(C &container, Compare comp = Compare()) {
@@ -97,111 +97,111 @@ void sort_and_unique(C &container, Compare comp = Compare()) {
std::end(container));
}
-/** \brief Returns a set containing the keys in the given associative
- * container. */
-template <typename C>
-std::set<typename C::key_type> assoc_keys(const C &container) {
- std::set<typename C::key_type> keys;
- for (const auto &elem : container) {
- keys.insert(elem.first);
- }
- return keys;
-}
-
-/**
- * \brief Return the length in bytes of the given vector of (POD) objects.
- */
+/** \brief Returns a set containing the keys in the given associative
+ * container. */
+template <typename C>
+std::set<typename C::key_type> assoc_keys(const C &container) {
+ std::set<typename C::key_type> keys;
+ for (const auto &elem : container) {
+ keys.insert(elem.first);
+ }
+ return keys;
+}
+
+/**
+ * \brief Return the length in bytes of the given vector of (POD) objects.
+ */
template <typename T, typename Alloc>
typename std::vector<T, Alloc>::size_type
byte_length(const std::vector<T, Alloc> &vec) {
- static_assert(std::is_pod<T>::value, "should be pod");
- return vec.size() * sizeof(T);
-}
-
-/**
- * \brief Copy the given vector of POD objects to the given location in memory.
- * It is safe to give this function an empty vector.
- */
+ static_assert(std::is_pod<T>::value, "should be pod");
+ return vec.size() * sizeof(T);
+}
+
+/**
+ * \brief Copy the given vector of POD objects to the given location in memory.
+ * It is safe to give this function an empty vector.
+ */
template<typename T, typename Alloc>
void *copy_bytes(void *dest, const std::vector<T, Alloc> &vec) {
- static_assert(std::is_pod<T>::value, "should be pod");
- assert(dest);
-
- // Since we're generally using this function to write into the bytecode,
- // dest should be appropriately aligned for T.
- assert(ISALIGNED_N(dest, alignof(T)));
-
- if (vec.empty()) {
- return dest; // Protect memcpy against null pointers.
- }
- assert(vec.data() != nullptr);
- return std::memcpy(dest, vec.data(), byte_length(vec));
-}
-
-template<typename OrderedContainer1, typename OrderedContainer2>
-bool is_subset_of(const OrderedContainer1 &small, const OrderedContainer2 &big) {
- static_assert(std::is_same<typename OrderedContainer1::value_type,
- typename OrderedContainer2::value_type>::value,
- "Both containers should have the same value_type");
- auto sit = small.begin();
- auto bit = big.begin();
- if (small.size() > big.size()) {
- return false;
- }
-
- while (sit != small.end()) {
- if (bit == big.end()) {
- return false;
- }
-
- if (*sit == *bit) {
- ++sit;
- ++bit;
- continue;
- }
- if (*bit < *sit) {
- ++bit;
- continue;
- }
-
- return false;
- }
- return true;
-}
-
-template<typename OrderedContainer1, typename OrderedContainer2>
-bool has_intersection(const OrderedContainer1 &a, const OrderedContainer2 &b) {
- static_assert(std::is_same<typename OrderedContainer1::value_type,
- typename OrderedContainer2::value_type>::value,
- "Both containers should have the same value_type");
- auto ait = a.begin();
- auto bit = b.begin();
- while (ait != a.end() && bit != b.end()) {
- if (*ait == *bit) {
- return true;
- }
-
- if (*ait < *bit) {
- ++ait;
- } else {
- ++bit;
- }
- }
-
- return false;
-}
-
-/**
- * \brief Erase the elements (by value) in the donor container from the given
- * container.
- */
-template<typename C, typename D>
-void erase_all(C *container, const D &donor) {
- for (const auto &elem : donor) {
- container->erase(elem);
- }
-}
-
+ static_assert(std::is_pod<T>::value, "should be pod");
+ assert(dest);
+
+ // Since we're generally using this function to write into the bytecode,
+ // dest should be appropriately aligned for T.
+ assert(ISALIGNED_N(dest, alignof(T)));
+
+ if (vec.empty()) {
+ return dest; // Protect memcpy against null pointers.
+ }
+ assert(vec.data() != nullptr);
+ return std::memcpy(dest, vec.data(), byte_length(vec));
+}
+
+template<typename OrderedContainer1, typename OrderedContainer2>
+bool is_subset_of(const OrderedContainer1 &small, const OrderedContainer2 &big) {
+ static_assert(std::is_same<typename OrderedContainer1::value_type,
+ typename OrderedContainer2::value_type>::value,
+ "Both containers should have the same value_type");
+ auto sit = small.begin();
+ auto bit = big.begin();
+ if (small.size() > big.size()) {
+ return false;
+ }
+
+ while (sit != small.end()) {
+ if (bit == big.end()) {
+ return false;
+ }
+
+ if (*sit == *bit) {
+ ++sit;
+ ++bit;
+ continue;
+ }
+ if (*bit < *sit) {
+ ++bit;
+ continue;
+ }
+
+ return false;
+ }
+ return true;
+}
+
+template<typename OrderedContainer1, typename OrderedContainer2>
+bool has_intersection(const OrderedContainer1 &a, const OrderedContainer2 &b) {
+ static_assert(std::is_same<typename OrderedContainer1::value_type,
+ typename OrderedContainer2::value_type>::value,
+ "Both containers should have the same value_type");
+ auto ait = a.begin();
+ auto bit = b.begin();
+ while (ait != a.end() && bit != b.end()) {
+ if (*ait == *bit) {
+ return true;
+ }
+
+ if (*ait < *bit) {
+ ++ait;
+ } else {
+ ++bit;
+ }
+ }
+
+ return false;
+}
+
+/**
+ * \brief Erase the elements (by value) in the donor container from the given
+ * container.
+ */
+template<typename C, typename D>
+void erase_all(C *container, const D &donor) {
+ for (const auto &elem : donor) {
+ container->erase(elem);
+ }
+}
+
template<typename C, typename Pred>
bool any_of_in(const C &c, Pred p) {
@@ -213,33 +213,33 @@ bool all_of_in(const C &c, Pred p) {
return std::all_of(c.begin(), c.end(), std::move(p));
}
-} // namespace ue2
-
-#ifdef DUMP_SUPPORT
-
-#include <sstream>
-#include <string>
-
-namespace ue2 {
-
-/**
- * \brief Dump a container of stream-printable objects into a comma-separated
- * list in a string.
- */
-template<class C>
-std::string as_string_list(const C &c) {
- std::ostringstream oss;
- for (auto it = c.begin(); it != c.end(); ++it) {
- if (it != c.begin()) {
- oss << ", ";
- }
- oss << *it;
- }
- return oss.str();
-}
-
-} // namespace ue2
-
-#endif // DUMP_SUPPORT
-
-#endif // UTIL_CONTAINER_H
+} // namespace ue2
+
+#ifdef DUMP_SUPPORT
+
+#include <sstream>
+#include <string>
+
+namespace ue2 {
+
+/**
+ * \brief Dump a container of stream-printable objects into a comma-separated
+ * list in a string.
+ */
+template<class C>
+std::string as_string_list(const C &c) {
+ std::ostringstream oss;
+ for (auto it = c.begin(); it != c.end(); ++it) {
+ if (it != c.begin()) {
+ oss << ", ";
+ }
+ oss << *it;
+ }
+ return oss.str();
+}
+
+} // namespace ue2
+
+#endif // DUMP_SUPPORT
+
+#endif // UTIL_CONTAINER_H
diff --git a/contrib/libs/hyperscan/src/util/cpuid_flags.c b/contrib/libs/hyperscan/src/util/cpuid_flags.c
index 84f6077d32..c00ce58e2d 100644
--- a/contrib/libs/hyperscan/src/util/cpuid_flags.c
+++ b/contrib/libs/hyperscan/src/util/cpuid_flags.c
@@ -1,50 +1,50 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "cpuid_flags.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpuid_flags.h"
#include "cpuid_inline.h"
-#include "ue2common.h"
-#include "hs_compile.h" // for HS_MODE_ flags
-#include "hs_internal.h"
+#include "ue2common.h"
+#include "hs_compile.h" // for HS_MODE_ flags
+#include "hs_internal.h"
#include "util/arch.h"
-
+
#if !defined(_WIN32) && !defined(CPUID_H_)
-#include <cpuid.h>
-#endif
-
-u64a cpuid_flags(void) {
- u64a cap = 0;
-
+#include <cpuid.h>
+#endif
+
+u64a cpuid_flags(void) {
+ u64a cap = 0;
+
if (check_avx2()) {
DEBUG_PRINTF("AVX2 enabled\n");
- cap |= HS_CPU_FEATURES_AVX2;
- }
-
+ cap |= HS_CPU_FEATURES_AVX2;
+ }
+
if (check_avx512()) {
DEBUG_PRINTF("AVX512 enabled\n");
cap |= HS_CPU_FEATURES_AVX512;
@@ -56,9 +56,9 @@ u64a cpuid_flags(void) {
}
#if !defined(FAT_RUNTIME) && !defined(HAVE_AVX2)
- cap &= ~HS_CPU_FEATURES_AVX2;
-#endif
-
+ cap &= ~HS_CPU_FEATURES_AVX2;
+#endif
+
#if (!defined(FAT_RUNTIME) && !defined(HAVE_AVX512)) || \
(defined(FAT_RUNTIME) && !defined(BUILD_AVX512))
cap &= ~HS_CPU_FEATURES_AVX512;
@@ -69,108 +69,108 @@ u64a cpuid_flags(void) {
cap &= ~HS_CPU_FEATURES_AVX512VBMI;
#endif
- return cap;
-}
-
-struct family_id {
- u32 full_family;
- u32 full_model;
- u32 tune;
-};
-
-/* from table 35-1 of the Intel 64 and IA32 Arch. Software Developer's Manual
- * and "Intel Architecture and Processor Identification With CPUID Model and
- * Family Numbers" */
-static const struct family_id known_microarch[] = {
- { 0x6, 0x37, HS_TUNE_FAMILY_SLM }, /* baytrail */
+ return cap;
+}
+
+struct family_id {
+ u32 full_family;
+ u32 full_model;
+ u32 tune;
+};
+
+/* from table 35-1 of the Intel 64 and IA32 Arch. Software Developer's Manual
+ * and "Intel Architecture and Processor Identification With CPUID Model and
+ * Family Numbers" */
+static const struct family_id known_microarch[] = {
+ { 0x6, 0x37, HS_TUNE_FAMILY_SLM }, /* baytrail */
{ 0x6, 0x4A, HS_TUNE_FAMILY_SLM }, /* silvermont */
{ 0x6, 0x4C, HS_TUNE_FAMILY_SLM }, /* silvermont */
- { 0x6, 0x4D, HS_TUNE_FAMILY_SLM }, /* avoton, rangley */
+ { 0x6, 0x4D, HS_TUNE_FAMILY_SLM }, /* avoton, rangley */
{ 0x6, 0x5A, HS_TUNE_FAMILY_SLM }, /* silvermont */
{ 0x6, 0x5D, HS_TUNE_FAMILY_SLM }, /* silvermont */
-
+
{ 0x6, 0x5C, HS_TUNE_FAMILY_GLM }, /* goldmont */
{ 0x6, 0x5F, HS_TUNE_FAMILY_GLM }, /* denverton */
- { 0x6, 0x3C, HS_TUNE_FAMILY_HSW }, /* haswell */
- { 0x6, 0x45, HS_TUNE_FAMILY_HSW }, /* haswell */
- { 0x6, 0x46, HS_TUNE_FAMILY_HSW }, /* haswell */
+ { 0x6, 0x3C, HS_TUNE_FAMILY_HSW }, /* haswell */
+ { 0x6, 0x45, HS_TUNE_FAMILY_HSW }, /* haswell */
+ { 0x6, 0x46, HS_TUNE_FAMILY_HSW }, /* haswell */
{ 0x6, 0x3F, HS_TUNE_FAMILY_HSW }, /* haswell Xeon */
-
+
{ 0x6, 0x3E, HS_TUNE_FAMILY_IVB }, /* ivybridge Xeon */
- { 0x6, 0x3A, HS_TUNE_FAMILY_IVB }, /* ivybridge */
-
- { 0x6, 0x2A, HS_TUNE_FAMILY_SNB }, /* sandybridge */
+ { 0x6, 0x3A, HS_TUNE_FAMILY_IVB }, /* ivybridge */
+
+ { 0x6, 0x2A, HS_TUNE_FAMILY_SNB }, /* sandybridge */
{ 0x6, 0x2D, HS_TUNE_FAMILY_SNB }, /* sandybridge Xeon */
-
- { 0x6, 0x3D, HS_TUNE_FAMILY_BDW }, /* broadwell Core-M */
+
+ { 0x6, 0x3D, HS_TUNE_FAMILY_BDW }, /* broadwell Core-M */
{ 0x6, 0x47, HS_TUNE_FAMILY_BDW }, /* broadwell */
- { 0x6, 0x4F, HS_TUNE_FAMILY_BDW }, /* broadwell xeon */
- { 0x6, 0x56, HS_TUNE_FAMILY_BDW }, /* broadwell xeon-d */
-
+ { 0x6, 0x4F, HS_TUNE_FAMILY_BDW }, /* broadwell xeon */
+ { 0x6, 0x56, HS_TUNE_FAMILY_BDW }, /* broadwell xeon-d */
+
{ 0x6, 0x4E, HS_TUNE_FAMILY_SKL }, /* Skylake Mobile */
{ 0x6, 0x5E, HS_TUNE_FAMILY_SKL }, /* Skylake Core/E3 Xeon */
{ 0x6, 0x55, HS_TUNE_FAMILY_SKX }, /* Skylake Xeon */
-
+
{ 0x6, 0x8E, HS_TUNE_FAMILY_SKL }, /* Kabylake Mobile */
{ 0x6, 0x9E, HS_TUNE_FAMILY_SKL }, /* Kabylake desktop */
-
+
{ 0x6, 0x7D, HS_TUNE_FAMILY_ICL }, /* Icelake */
{ 0x6, 0x7E, HS_TUNE_FAMILY_ICL }, /* Icelake */
{ 0x6, 0x6A, HS_TUNE_FAMILY_ICX }, /* Icelake Xeon-D */
{ 0x6, 0x6C, HS_TUNE_FAMILY_ICX }, /* Icelake Xeon */
-};
-
-#ifdef DUMP_SUPPORT
-static UNUSED
-const char *dumpTune(u32 tune) {
-#define T_CASE(x) case x: return #x;
- switch (tune) {
- T_CASE(HS_TUNE_FAMILY_SLM);
+};
+
+#ifdef DUMP_SUPPORT
+static UNUSED
+const char *dumpTune(u32 tune) {
+#define T_CASE(x) case x: return #x;
+ switch (tune) {
+ T_CASE(HS_TUNE_FAMILY_SLM);
T_CASE(HS_TUNE_FAMILY_GLM);
- T_CASE(HS_TUNE_FAMILY_HSW);
- T_CASE(HS_TUNE_FAMILY_SNB);
- T_CASE(HS_TUNE_FAMILY_IVB);
- T_CASE(HS_TUNE_FAMILY_BDW);
+ T_CASE(HS_TUNE_FAMILY_HSW);
+ T_CASE(HS_TUNE_FAMILY_SNB);
+ T_CASE(HS_TUNE_FAMILY_IVB);
+ T_CASE(HS_TUNE_FAMILY_BDW);
T_CASE(HS_TUNE_FAMILY_SKL);
T_CASE(HS_TUNE_FAMILY_SKX);
T_CASE(HS_TUNE_FAMILY_ICL);
T_CASE(HS_TUNE_FAMILY_ICX);
- }
-#undef T_CASE
- return "unknown";
-}
-#endif
-
-u32 cpuid_tune(void) {
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(1, 0, &eax, &ebx, &ecx, &edx);
-
- u32 family = (eax >> 8) & 0xf;
- u32 model = 0;
-
- if (family == 0x6 || family == 0xf) {
- model = ((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0);
- } else {
- model = (eax >> 4) & 0xf;
- }
-
- DEBUG_PRINTF("family = %xh model = %xh\n", family, model);
- for (u32 i = 0; i < ARRAY_LENGTH(known_microarch); i++) {
- if (family != known_microarch[i].full_family) {
- continue;
- }
-
- if (model != known_microarch[i].full_model) {
- continue;
- }
-
- u32 tune = known_microarch[i].tune;
- DEBUG_PRINTF("found tune flag %s\n", dumpTune(tune) );
- return tune;
- }
-
- return HS_TUNE_FAMILY_GENERIC;
-}
+ }
+#undef T_CASE
+ return "unknown";
+}
+#endif
+
+u32 cpuid_tune(void) {
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(1, 0, &eax, &ebx, &ecx, &edx);
+
+ u32 family = (eax >> 8) & 0xf;
+ u32 model = 0;
+
+ if (family == 0x6 || family == 0xf) {
+ model = ((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0);
+ } else {
+ model = (eax >> 4) & 0xf;
+ }
+
+ DEBUG_PRINTF("family = %xh model = %xh\n", family, model);
+ for (u32 i = 0; i < ARRAY_LENGTH(known_microarch); i++) {
+ if (family != known_microarch[i].full_family) {
+ continue;
+ }
+
+ if (model != known_microarch[i].full_model) {
+ continue;
+ }
+
+ u32 tune = known_microarch[i].tune;
+ DEBUG_PRINTF("found tune flag %s\n", dumpTune(tune) );
+ return tune;
+ }
+
+ return HS_TUNE_FAMILY_GENERIC;
+}
diff --git a/contrib/libs/hyperscan/src/util/cpuid_flags.h b/contrib/libs/hyperscan/src/util/cpuid_flags.h
index c7ab2522d9..527c6d52f3 100644
--- a/contrib/libs/hyperscan/src/util/cpuid_flags.h
+++ b/contrib/libs/hyperscan/src/util/cpuid_flags.h
@@ -1,55 +1,55 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
#ifndef UTIL_CPUID_H_
#define UTIL_CPUID_H_
-
-#include "ue2common.h"
-
+
+#include "ue2common.h"
+
#if !defined(_WIN32) && !defined(CPUID_H_)
#include <cpuid.h>
/* system header doesn't have a header guard */
#define CPUID_H_
#endif
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-/* returns HS_CPU_FEATURES_* mask. */
-u64a cpuid_flags(void);
-
-u32 cpuid_tune(void);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* returns HS_CPU_FEATURES_* mask. */
+u64a cpuid_flags(void);
+
+u32 cpuid_tune(void);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
#endif /* UTIL_CPUID_H_ */
-
+
diff --git a/contrib/libs/hyperscan/src/util/depth.cpp b/contrib/libs/hyperscan/src/util/depth.cpp
index fdfc66546a..475458763a 100644
--- a/contrib/libs/hyperscan/src/util/depth.cpp
+++ b/contrib/libs/hyperscan/src/util/depth.cpp
@@ -1,91 +1,91 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Data types used to represent depth quantities.
- */
-#include "depth.h"
-#include "ue2common.h"
-
-#include <algorithm> // std::min, std::max
-
-namespace ue2 {
-
-DepthMinMax unionDepthMinMax(const DepthMinMax &a, const DepthMinMax &b) {
- DepthMinMax rv;
-
- if (a.min.is_unreachable()) {
- rv.min = b.min;
- } else if (b.min.is_unreachable()) {
- rv.min = a.min;
- } else {
- rv.min = std::min(a.min, b.min);
- }
-
- if (a.max.is_infinite() || b.max.is_infinite()) {
- rv.max = depth::infinity();
- } else if (a.max.is_unreachable()) {
- rv.max = b.max;
- } else if (b.max.is_unreachable()) {
- rv.max = a.max;
- } else {
- rv.max = std::max(a.max, b.max);
- }
-
- return rv;
-}
-
-} // namespace ue2
-
-#ifdef DUMP_SUPPORT
-
-#include <sstream>
-#include <string>
-
-namespace ue2 {
-
-std::string depth::str() const {
- if (is_unreachable()) {
- return "unr";
- } else if (is_infinite()) {
- return "inf";
- }
- std::ostringstream oss;
- oss << val;
- return oss.str();
-}
-
-std::string DepthMinMax::str() const {
- std::ostringstream oss;
- oss << "[" << min.str() << "," << max.str() << "]";
- return oss.str();
-}
-
-} // namespace ue2
-
-#endif // DUMP_SUPPORT
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Data types used to represent depth quantities.
+ */
+#include "depth.h"
+#include "ue2common.h"
+
+#include <algorithm> // std::min, std::max
+
+namespace ue2 {
+
+DepthMinMax unionDepthMinMax(const DepthMinMax &a, const DepthMinMax &b) {
+ DepthMinMax rv;
+
+ if (a.min.is_unreachable()) {
+ rv.min = b.min;
+ } else if (b.min.is_unreachable()) {
+ rv.min = a.min;
+ } else {
+ rv.min = std::min(a.min, b.min);
+ }
+
+ if (a.max.is_infinite() || b.max.is_infinite()) {
+ rv.max = depth::infinity();
+ } else if (a.max.is_unreachable()) {
+ rv.max = b.max;
+ } else if (b.max.is_unreachable()) {
+ rv.max = a.max;
+ } else {
+ rv.max = std::max(a.max, b.max);
+ }
+
+ return rv;
+}
+
+} // namespace ue2
+
+#ifdef DUMP_SUPPORT
+
+#include <sstream>
+#include <string>
+
+namespace ue2 {
+
+std::string depth::str() const {
+ if (is_unreachable()) {
+ return "unr";
+ } else if (is_infinite()) {
+ return "inf";
+ }
+ std::ostringstream oss;
+ oss << val;
+ return oss.str();
+}
+
+std::string DepthMinMax::str() const {
+ std::ostringstream oss;
+ oss << "[" << min.str() << "," << max.str() << "]";
+ return oss.str();
+}
+
+} // namespace ue2
+
+#endif // DUMP_SUPPORT
diff --git a/contrib/libs/hyperscan/src/util/depth.h b/contrib/libs/hyperscan/src/util/depth.h
index ad06184647..5305c6f1b3 100644
--- a/contrib/libs/hyperscan/src/util/depth.h
+++ b/contrib/libs/hyperscan/src/util/depth.h
@@ -1,198 +1,198 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Data types used to represent depth quantities.
- */
-
-#ifndef DEPTH_H
-#define DEPTH_H
-
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Data types used to represent depth quantities.
+ */
+
+#ifndef DEPTH_H
+#define DEPTH_H
+
+#include "ue2common.h"
#include "util/hash.h"
#include "util/operators.h"
-
-#ifdef DUMP_SUPPORT
-#include <string>
-#endif
-
-namespace ue2 {
-
-/**
- * \brief Exception thrown if a depth operation overflows.
- */
-struct DepthOverflowError {};
-
-/**
- * \brief Type used to represent depth information; value is either a count,
- * or the special values "infinity" and "unreachable".
- */
+
+#ifdef DUMP_SUPPORT
+#include <string>
+#endif
+
+namespace ue2 {
+
+/**
+ * \brief Exception thrown if a depth operation overflows.
+ */
+struct DepthOverflowError {};
+
+/**
+ * \brief Type used to represent depth information; value is either a count,
+ * or the special values "infinity" and "unreachable".
+ */
class depth : totally_ordered<depth> {
-public:
+public:
/** \brief The default depth is special value "unreachable". */
depth() = default;
-
+
explicit depth(u32 v) : val(v) {
- if (v > max_value()) {
- DEBUG_PRINTF("depth %u too large to represent!\n", v);
- throw DepthOverflowError();
- }
- }
-
- static depth unreachable() {
- depth d;
- d.val = val_unreachable;
- return d;
- }
-
- static depth infinity() {
- depth d;
- d.val = val_infinity;
- return d;
- }
-
- /** \brief Returns the max finite value representable as a depth. */
- static constexpr u32 max_value() { return val_infinity - 1; }
-
- bool is_finite() const { return val < val_infinity; }
- bool is_infinite() const { return val == val_infinity; }
- bool is_unreachable() const { return val == val_unreachable; }
- bool is_reachable() const { return !is_unreachable(); }
-
- /** \brief Convert a finite depth to an integer. */
- operator u32() const {
- if (!is_finite()) {
- throw DepthOverflowError();
- }
- return val;
- }
-
- bool operator<(const depth &d) const { return val < d.val; }
- bool operator==(const depth &d) const { return val == d.val; }
-
- // The following comparison operators exist for use against integer types
- // that are bigger than what we can safely convert to depth (such as those
- // in extparam).
-
- bool operator<(u64a d) const {
- if (!is_finite()) {
- return false;
- }
- return val < d;
- }
- bool operator<=(u64a d) const {
- if (!is_finite()) {
- return false;
- }
- return val <= d;
- }
- bool operator==(u64a d) const {
- if (!is_finite()) {
- return false;
- }
- return val == d;
- }
- bool operator>(u64a d) const { return !(*this <= d); }
- bool operator>=(u64a d) const { return !(*this < d); }
- bool operator!=(u64a d) const { return !(*this == d); }
-
- depth operator+(const depth &d) const {
- if (is_unreachable() || d.is_unreachable()) {
- return unreachable();
- }
- if (is_infinite() || d.is_infinite()) {
- return infinity();
- }
-
- u64a rv = val + d.val;
- if (rv >= val_infinity) {
- DEBUG_PRINTF("depth %llu too large to represent!\n", rv);
- throw DepthOverflowError();
- }
-
- return depth((u32)rv);
- }
-
- depth &operator+=(const depth &d) {
- depth rv = *this + d;
- *this = rv;
- return *this;
- }
-
- depth operator-(const depth &d) const {
- if (!d.is_finite()) {
- throw DepthOverflowError();
- }
-
- if (is_unreachable()) {
- return unreachable();
- }
- if (is_infinite()) {
- return infinity();
- }
-
- if (val < d.val) {
- throw DepthOverflowError();
- }
-
- u32 rv = val - d.val;
- return depth(rv);
- }
-
- depth &operator-=(const depth &d) {
- depth rv = *this - d;
- *this = rv;
- return *this;
- }
-
- depth operator+(s32 d) const {
- if (is_unreachable()) {
- return unreachable();
- }
- if (is_infinite()) {
- return infinity();
- }
-
- s64a rv = val + d;
- if (rv < 0 || (u64a)rv >= val_infinity) {
- DEBUG_PRINTF("depth %lld too large to represent!\n", rv);
- throw DepthOverflowError();
- }
-
- return depth((u32)rv);
- }
-
- depth operator+=(s32 d) {
- depth rv = *this + d;
- *this = rv;
- return *this;
- }
-
+ if (v > max_value()) {
+ DEBUG_PRINTF("depth %u too large to represent!\n", v);
+ throw DepthOverflowError();
+ }
+ }
+
+ static depth unreachable() {
+ depth d;
+ d.val = val_unreachable;
+ return d;
+ }
+
+ static depth infinity() {
+ depth d;
+ d.val = val_infinity;
+ return d;
+ }
+
+ /** \brief Returns the max finite value representable as a depth. */
+ static constexpr u32 max_value() { return val_infinity - 1; }
+
+ bool is_finite() const { return val < val_infinity; }
+ bool is_infinite() const { return val == val_infinity; }
+ bool is_unreachable() const { return val == val_unreachable; }
+ bool is_reachable() const { return !is_unreachable(); }
+
+ /** \brief Convert a finite depth to an integer. */
+ operator u32() const {
+ if (!is_finite()) {
+ throw DepthOverflowError();
+ }
+ return val;
+ }
+
+ bool operator<(const depth &d) const { return val < d.val; }
+ bool operator==(const depth &d) const { return val == d.val; }
+
+ // The following comparison operators exist for use against integer types
+ // that are bigger than what we can safely convert to depth (such as those
+ // in extparam).
+
+ bool operator<(u64a d) const {
+ if (!is_finite()) {
+ return false;
+ }
+ return val < d;
+ }
+ bool operator<=(u64a d) const {
+ if (!is_finite()) {
+ return false;
+ }
+ return val <= d;
+ }
+ bool operator==(u64a d) const {
+ if (!is_finite()) {
+ return false;
+ }
+ return val == d;
+ }
+ bool operator>(u64a d) const { return !(*this <= d); }
+ bool operator>=(u64a d) const { return !(*this < d); }
+ bool operator!=(u64a d) const { return !(*this == d); }
+
+ depth operator+(const depth &d) const {
+ if (is_unreachable() || d.is_unreachable()) {
+ return unreachable();
+ }
+ if (is_infinite() || d.is_infinite()) {
+ return infinity();
+ }
+
+ u64a rv = val + d.val;
+ if (rv >= val_infinity) {
+ DEBUG_PRINTF("depth %llu too large to represent!\n", rv);
+ throw DepthOverflowError();
+ }
+
+ return depth((u32)rv);
+ }
+
+ depth &operator+=(const depth &d) {
+ depth rv = *this + d;
+ *this = rv;
+ return *this;
+ }
+
+ depth operator-(const depth &d) const {
+ if (!d.is_finite()) {
+ throw DepthOverflowError();
+ }
+
+ if (is_unreachable()) {
+ return unreachable();
+ }
+ if (is_infinite()) {
+ return infinity();
+ }
+
+ if (val < d.val) {
+ throw DepthOverflowError();
+ }
+
+ u32 rv = val - d.val;
+ return depth(rv);
+ }
+
+ depth &operator-=(const depth &d) {
+ depth rv = *this - d;
+ *this = rv;
+ return *this;
+ }
+
+ depth operator+(s32 d) const {
+ if (is_unreachable()) {
+ return unreachable();
+ }
+ if (is_infinite()) {
+ return infinity();
+ }
+
+ s64a rv = val + d;
+ if (rv < 0 || (u64a)rv >= val_infinity) {
+ DEBUG_PRINTF("depth %lld too large to represent!\n", rv);
+ throw DepthOverflowError();
+ }
+
+ return depth((u32)rv);
+ }
+
+ depth operator+=(s32 d) {
+ depth rv = *this + d;
+ *this = rv;
+ return *this;
+ }
+
depth operator-(s32 d) const {
if (is_unreachable()) {
return unreachable();
@@ -216,57 +216,57 @@ public:
return *this;
}
-#ifdef DUMP_SUPPORT
- /** \brief Render as a string, useful for debugging. */
- std::string str() const;
-#endif
-
+#ifdef DUMP_SUPPORT
+ /** \brief Render as a string, useful for debugging. */
+ std::string str() const;
+#endif
+
size_t hash() const {
return val;
- }
-
-private:
- static constexpr u32 val_infinity = (1u << 31) - 1;
- static constexpr u32 val_unreachable = 1u << 31;
-
+ }
+
+private:
+ static constexpr u32 val_infinity = (1u << 31) - 1;
+ static constexpr u32 val_unreachable = 1u << 31;
+
u32 val = val_unreachable;
-};
-
-/**
- * \brief Encapsulates a min/max pair.
- */
+};
+
+/**
+ * \brief Encapsulates a min/max pair.
+ */
struct DepthMinMax : totally_ordered<DepthMinMax> {
depth min{depth::infinity()};
depth max{0};
-
+
DepthMinMax() = default;
- DepthMinMax(const depth &mn, const depth &mx) : min(mn), max(mx) {}
-
- bool operator<(const DepthMinMax &b) const {
- if (min != b.min) {
- return min < b.min;
- }
- return max < b.max;
- }
-
- bool operator==(const DepthMinMax &b) const {
- return min == b.min && max == b.max;
- }
-
-#ifdef DUMP_SUPPORT
- /** \brief Render as a string, useful for debugging. */
- std::string str() const;
-#endif
-
-};
-
-/**
- * \brief Merge two DepthMinMax values together to produce their union.
- */
-DepthMinMax unionDepthMinMax(const DepthMinMax &a, const DepthMinMax &b);
-
-} // namespace ue2
-
+ DepthMinMax(const depth &mn, const depth &mx) : min(mn), max(mx) {}
+
+ bool operator<(const DepthMinMax &b) const {
+ if (min != b.min) {
+ return min < b.min;
+ }
+ return max < b.max;
+ }
+
+ bool operator==(const DepthMinMax &b) const {
+ return min == b.min && max == b.max;
+ }
+
+#ifdef DUMP_SUPPORT
+ /** \brief Render as a string, useful for debugging. */
+ std::string str() const;
+#endif
+
+};
+
+/**
+ * \brief Merge two DepthMinMax values together to produce their union.
+ */
+DepthMinMax unionDepthMinMax(const DepthMinMax &a, const DepthMinMax &b);
+
+} // namespace ue2
+
namespace std {
template<>
@@ -285,4 +285,4 @@ struct hash<ue2::DepthMinMax> {
} // namespace
-#endif // DEPTH_H
+#endif // DEPTH_H
diff --git a/contrib/libs/hyperscan/src/util/determinise.h b/contrib/libs/hyperscan/src/util/determinise.h
index 0beeeef0a8..102a197441 100644
--- a/contrib/libs/hyperscan/src/util/determinise.h
+++ b/contrib/libs/hyperscan/src/util/determinise.h
@@ -1,205 +1,205 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief DFA determinisation code.
- */
-
-#ifndef DETERMINISE_H
-#define DETERMINISE_H
-
-#include "nfagraph/ng_holder.h"
-#include "charreach.h"
-#include "container.h"
-#include "ue2common.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief DFA determinisation code.
+ */
+
+#ifndef DETERMINISE_H
+#define DETERMINISE_H
+
+#include "nfagraph/ng_holder.h"
+#include "charreach.h"
+#include "container.h"
+#include "ue2common.h"
+
#include <algorithm>
-#include <array>
+#include <array>
#include <queue>
-#include <vector>
-
-namespace ue2 {
-
-/* Automaton details:
- *
- * const vector<StateSet> initial()
- * returns initial states to start determinising from. StateSets in the
- * initial() vector will given consecutive ids starting from 1, in the order
- * that they appear.
- *
- * void reports(StateSet s, flat_set<ReportID> *out)
- * fills out with any reports that need to be raised for stateset.
- *
- * void reportsEod(StateSet s, flat_set<ReportID> *out)
- * fills out with any reports that need to be raised for stateset at EOD.
- *
- * void transition(const StateSet &in, StateSet *next)
- * fills the next array such next[i] is the stateset that in transitions to
- * on seeing symbol i (i is in the compressed alphabet of the automaton).
- *
- * u16 alphasize
- * size of the compressed alphabet
- */
-
-/** \brief determinises some sort of nfa
- * \param n the automaton to determinise
- * \param dstates_out output dfa states
- * \param state_limit limit on the number of dfa states to construct
- * \param statesets_out a mapping from DFA state to the set of NFA states in
- * the automaton
+#include <vector>
+
+namespace ue2 {
+
+/* Automaton details:
+ *
+ * const vector<StateSet> initial()
+ * returns initial states to start determinising from. StateSets in the
+ * initial() vector will given consecutive ids starting from 1, in the order
+ * that they appear.
+ *
+ * void reports(StateSet s, flat_set<ReportID> *out)
+ * fills out with any reports that need to be raised for stateset.
+ *
+ * void reportsEod(StateSet s, flat_set<ReportID> *out)
+ * fills out with any reports that need to be raised for stateset at EOD.
+ *
+ * void transition(const StateSet &in, StateSet *next)
+ * fills the next array such next[i] is the stateset that in transitions to
+ * on seeing symbol i (i is in the compressed alphabet of the automaton).
+ *
+ * u16 alphasize
+ * size of the compressed alphabet
+ */
+
+/** \brief determinises some sort of nfa
+ * \param n the automaton to determinise
+ * \param dstates_out output dfa states
+ * \param state_limit limit on the number of dfa states to construct
+ * \param statesets_out a mapping from DFA state to the set of NFA states in
+ * the automaton
* \return true on success, false if state limit exceeded
- */
-template<class Auto, class ds>
-never_inline
+ */
+template<class Auto, class ds>
+never_inline
bool determinise(Auto &n, std::vector<ds> &dstates, size_t state_limit,
- std::vector<typename Auto::StateSet> *statesets_out = nullptr) {
- DEBUG_PRINTF("the determinator\n");
+ std::vector<typename Auto::StateSet> *statesets_out = nullptr) {
+ DEBUG_PRINTF("the determinator\n");
using StateSet = typename Auto::StateSet;
typename Auto::StateMap dstate_ids;
-
- const size_t alphabet_size = n.alphasize;
-
+
+ const size_t alphabet_size = n.alphasize;
+
dstates.clear();
dstates.reserve(state_limit);
-
+
dstate_ids.emplace(n.dead, DEAD_STATE);
- dstates.push_back(ds(alphabet_size));
- std::fill_n(dstates[0].next.begin(), alphabet_size, DEAD_STATE);
-
+ dstates.push_back(ds(alphabet_size));
+ std::fill_n(dstates[0].next.begin(), alphabet_size, DEAD_STATE);
+
std::queue<std::pair<StateSet, dstate_id_t>> q;
q.emplace(n.dead, DEAD_STATE);
-
- const std::vector<StateSet> &init = n.initial();
- for (u32 i = 0; i < init.size(); i++) {
+
+ const std::vector<StateSet> &init = n.initial();
+ for (u32 i = 0; i < init.size(); i++) {
q.emplace(init[i], dstates.size());
- assert(!contains(dstate_ids, init[i]));
+ assert(!contains(dstate_ids, init[i]));
dstate_ids.emplace(init[i], dstates.size());
- dstates.push_back(ds(alphabet_size));
- }
-
- std::vector<StateSet> succs(alphabet_size, n.dead);
-
+ dstates.push_back(ds(alphabet_size));
+ }
+
+ std::vector<StateSet> succs(alphabet_size, n.dead);
+
while (!q.empty()) {
auto m = std::move(q.front());
q.pop();
StateSet &curr = m.first;
dstate_id_t curr_id = m.second;
- DEBUG_PRINTF("curr: %hu\n", curr_id);
-
- /* fill in accepts */
- n.reports(curr, dstates[curr_id].reports);
- n.reportsEod(curr, dstates[curr_id].reports_eod);
-
- if (!dstates[curr_id].reports.empty()) {
- DEBUG_PRINTF("curr: %hu: is accept\n", curr_id);
- }
-
- if (!dstates[curr_id].reports.empty()) {
- /* only external reports set ekeys */
- if (n.canPrune(dstates[curr_id].reports)) {
- /* we only transition to dead on characters, TOPs leave us
- * alone */
- std::fill_n(dstates[curr_id].next.begin(), alphabet_size,
- DEAD_STATE);
- dstates[curr_id].next[n.alpha[TOP]] = curr_id;
- continue;
- }
- }
-
- /* fill in successor states */
- n.transition(curr, &succs[0]);
- for (symbol_t s = 0; s < n.alphasize; s++) {
- dstate_id_t succ_id;
- if (s && succs[s] == succs[s - 1]) {
- succ_id = dstates[curr_id].next[s - 1];
- } else {
+ DEBUG_PRINTF("curr: %hu\n", curr_id);
+
+ /* fill in accepts */
+ n.reports(curr, dstates[curr_id].reports);
+ n.reportsEod(curr, dstates[curr_id].reports_eod);
+
+ if (!dstates[curr_id].reports.empty()) {
+ DEBUG_PRINTF("curr: %hu: is accept\n", curr_id);
+ }
+
+ if (!dstates[curr_id].reports.empty()) {
+ /* only external reports set ekeys */
+ if (n.canPrune(dstates[curr_id].reports)) {
+ /* we only transition to dead on characters, TOPs leave us
+ * alone */
+ std::fill_n(dstates[curr_id].next.begin(), alphabet_size,
+ DEAD_STATE);
+ dstates[curr_id].next[n.alpha[TOP]] = curr_id;
+ continue;
+ }
+ }
+
+ /* fill in successor states */
+ n.transition(curr, &succs[0]);
+ for (symbol_t s = 0; s < n.alphasize; s++) {
+ dstate_id_t succ_id;
+ if (s && succs[s] == succs[s - 1]) {
+ succ_id = dstates[curr_id].next[s - 1];
+ } else {
auto p = dstate_ids.find(succs[s]);
if (p != dstate_ids.end()) { // succ[s] is already present
succ_id = p->second;
- if (succ_id > curr_id && !dstates[succ_id].daddy
- && n.unalpha[s] < N_CHARS) {
- dstates[succ_id].daddy = curr_id;
- }
- } else {
+ if (succ_id > curr_id && !dstates[succ_id].daddy
+ && n.unalpha[s] < N_CHARS) {
+ dstates[succ_id].daddy = curr_id;
+ }
+ } else {
succ_id = dstate_ids.size();
dstate_ids.emplace(succs[s], succ_id);
- dstates.push_back(ds(alphabet_size));
- dstates.back().daddy = n.unalpha[s] < N_CHARS ? curr_id : 0;
+ dstates.push_back(ds(alphabet_size));
+ dstates.back().daddy = n.unalpha[s] < N_CHARS ? curr_id : 0;
q.emplace(succs[s], succ_id);
- }
-
- DEBUG_PRINTF("-->%hu on %02hx\n", succ_id, n.unalpha[s]);
- }
-
- if (succ_id >= state_limit) {
+ }
+
+ DEBUG_PRINTF("-->%hu on %02hx\n", succ_id, n.unalpha[s]);
+ }
+
+ if (succ_id >= state_limit) {
DEBUG_PRINTF("succ_id %hu >= state_limit %zu\n",
- succ_id, state_limit);
+ succ_id, state_limit);
dstates.clear();
return false;
- }
-
- dstates[curr_id].next[s] = succ_id;
- }
- }
-
+ }
+
+ dstates[curr_id].next[s] = succ_id;
+ }
+ }
+
// The dstates vector will persist in the raw_dfa.
dstates.shrink_to_fit();
- if (statesets_out) {
+ if (statesets_out) {
auto &statesets = *statesets_out;
statesets.resize(dstate_ids.size());
for (auto &m : dstate_ids) {
statesets[m.second] = std::move(m.first);
}
- }
+ }
- DEBUG_PRINTF("ok\n");
+ DEBUG_PRINTF("ok\n");
return true;
-}
-
-static inline
-std::vector<CharReach> populateCR(const NGHolder &g,
- const std::vector<NFAVertex> &v_by_index,
- const std::array<u16, ALPHABET_SIZE> &alpha) {
- std::vector<CharReach> cr_by_index(v_by_index.size());
-
- for (size_t i = 0; i < v_by_index.size(); i++) {
- const CharReach &cr = g[v_by_index[i]].char_reach;
- CharReach &cr_out = cr_by_index[i];
- for (size_t s = cr.find_first(); s != cr.npos; s = cr.find_next(s)) {
- cr_out.set(alpha[s]);
- }
- }
-
- return cr_by_index;
-}
-
-} // namespace ue2
-
-#endif
+}
+
+static inline
+std::vector<CharReach> populateCR(const NGHolder &g,
+ const std::vector<NFAVertex> &v_by_index,
+ const std::array<u16, ALPHABET_SIZE> &alpha) {
+ std::vector<CharReach> cr_by_index(v_by_index.size());
+
+ for (size_t i = 0; i < v_by_index.size(); i++) {
+ const CharReach &cr = g[v_by_index[i]].char_reach;
+ CharReach &cr_out = cr_by_index[i];
+ for (size_t s = cr.find_first(); s != cr.npos; s = cr.find_next(s)) {
+ cr_out.set(alpha[s]);
+ }
+ }
+
+ return cr_by_index;
+}
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/dump_charclass.h b/contrib/libs/hyperscan/src/util/dump_charclass.h
index aa6b3b4d56..999641340a 100644
--- a/contrib/libs/hyperscan/src/util/dump_charclass.h
+++ b/contrib/libs/hyperscan/src/util/dump_charclass.h
@@ -1,61 +1,61 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Dump code for character classes (expressed as CharReach objects).
- */
-
-#ifndef DUMP_CHARCLASS_H
-#define DUMP_CHARCLASS_H
-
-#include "ue2common.h"
-
-#include <cstdio>
-#include <ostream>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Dump code for character classes (expressed as CharReach objects).
+ */
+
+#ifndef DUMP_CHARCLASS_H
+#define DUMP_CHARCLASS_H
+
+#include "ue2common.h"
+
+#include <cstdio>
+#include <ostream>
#include <sstream>
-#include <string>
+#include <string>
#include <vector>
-
-namespace ue2 {
-
-enum cc_output_t {
- CC_OUT_TEXT, //!< unescaped text output
- CC_OUT_DOT //!< escaped DOT label output
-};
-
-class CharReach;
-
+
+namespace ue2 {
+
+enum cc_output_t {
+ CC_OUT_TEXT, //!< unescaped text output
+ CC_OUT_DOT //!< escaped DOT label output
+};
+
+class CharReach;
+
void describeClass(std::ostream &os, const CharReach &cr, size_t maxLength = 16,
enum cc_output_t out_type = CC_OUT_TEXT);
-
-std::string describeClass(const CharReach &cr, size_t maxLength = 16,
- enum cc_output_t out_type = CC_OUT_TEXT);
-
+
+std::string describeClass(const CharReach &cr, size_t maxLength = 16,
+ enum cc_output_t out_type = CC_OUT_TEXT);
+
template<typename Container>
std::string describeClasses(const Container &container,
size_t maxClassLength = 16,
@@ -67,9 +67,9 @@ std::string describeClasses(const Container &container,
return oss.str();
}
-void describeClass(FILE *f, const CharReach &cr, size_t maxLength,
- enum cc_output_t out_type);
-
-} // namespace ue2
-
-#endif // DUMP_CHARCLASS_H
+void describeClass(FILE *f, const CharReach &cr, size_t maxLength,
+ enum cc_output_t out_type);
+
+} // namespace ue2
+
+#endif // DUMP_CHARCLASS_H
diff --git a/contrib/libs/hyperscan/src/util/dump_mask.cpp b/contrib/libs/hyperscan/src/util/dump_mask.cpp
index bc704805db..445f79b3af 100644
--- a/contrib/libs/hyperscan/src/util/dump_mask.cpp
+++ b/contrib/libs/hyperscan/src/util/dump_mask.cpp
@@ -1,63 +1,63 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Dump code for bitmasks.
- *
- * Note that these functions are only emitted in builds with DUMP_SUPPORT.
- */
-
-#include "config.h"
-
-#ifdef DUMP_SUPPORT
-
-#include "ue2common.h"
-#include "util/dump_mask.h"
-
-#include <string>
-
-namespace ue2 {
-
-std::string dumpMask(const u8 *mask, size_t len) {
- std::string s;
- s.reserve(len + len / 8);
-
- for (size_t i = 0; i < len; i++) {
- if ((i % 8) == 0 && i != 0) {
- s.push_back(' ');
- }
-
- s.push_back((mask[i / 8] >> (i % 8)) & 0x1 ? '1' : '0');
- }
-
- return s;
-}
-
-} // namespace ue2
-
-#endif // DUMP_SUPPORT
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Dump code for bitmasks.
+ *
+ * Note that these functions are only emitted in builds with DUMP_SUPPORT.
+ */
+
+#include "config.h"
+
+#ifdef DUMP_SUPPORT
+
+#include "ue2common.h"
+#include "util/dump_mask.h"
+
+#include <string>
+
+namespace ue2 {
+
+std::string dumpMask(const u8 *mask, size_t len) {
+ std::string s;
+ s.reserve(len + len / 8);
+
+ for (size_t i = 0; i < len; i++) {
+ if ((i % 8) == 0 && i != 0) {
+ s.push_back(' ');
+ }
+
+ s.push_back((mask[i / 8] >> (i % 8)) & 0x1 ? '1' : '0');
+ }
+
+ return s;
+}
+
+} // namespace ue2
+
+#endif // DUMP_SUPPORT
diff --git a/contrib/libs/hyperscan/src/util/dump_mask.h b/contrib/libs/hyperscan/src/util/dump_mask.h
index 791f2e4f88..04792ba7cc 100644
--- a/contrib/libs/hyperscan/src/util/dump_mask.h
+++ b/contrib/libs/hyperscan/src/util/dump_mask.h
@@ -1,55 +1,55 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Dump code for bitmasks.
- *
- * Note that these functions are only emitted in builds with DUMP_SUPPORT.
- */
-
-#ifndef DUMP_MASK_H
-#define DUMP_MASK_H
-
-#ifdef DUMP_SUPPORT
-
-#include "ue2common.h"
-#include <string>
-
-namespace ue2 {
-
-/**
- * Returns a representation of the given mask in binary, as a string of 1s and
- * 0s.
- */
-std::string dumpMask(const u8 *mask, size_t len);
-
-} // namespace ue2
-
-#endif // DUMP_SUPPORT
-
-#endif // DUMP_MASK_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Dump code for bitmasks.
+ *
+ * Note that these functions are only emitted in builds with DUMP_SUPPORT.
+ */
+
+#ifndef DUMP_MASK_H
+#define DUMP_MASK_H
+
+#ifdef DUMP_SUPPORT
+
+#include "ue2common.h"
+#include <string>
+
+namespace ue2 {
+
+/**
+ * Returns a representation of the given mask in binary, as a string of 1s and
+ * 0s.
+ */
+std::string dumpMask(const u8 *mask, size_t len);
+
+} // namespace ue2
+
+#endif // DUMP_SUPPORT
+
+#endif // DUMP_MASK_H
diff --git a/contrib/libs/hyperscan/src/util/exhaust.h b/contrib/libs/hyperscan/src/util/exhaust.h
index 3f1eab4a0a..d6f2ac06d9 100644
--- a/contrib/libs/hyperscan/src/util/exhaust.h
+++ b/contrib/libs/hyperscan/src/util/exhaust.h
@@ -1,41 +1,41 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Inline functions for manipulating exhaustion vector.
- */
-
-#ifndef EXHAUST_H
-#define EXHAUST_H
-
-#include "ue2common.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Inline functions for manipulating exhaustion vector.
+ */
+
+#ifndef EXHAUST_H
+#define EXHAUST_H
+
+#include "ue2common.h"
+
/** Index meaning a given exhaustion key is invalid. */
#define INVALID_EKEY (~(u32)0)
-
-#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/fatbit.h b/contrib/libs/hyperscan/src/util/fatbit.h
index b8860d7734..3c65db1a59 100644
--- a/contrib/libs/hyperscan/src/util/fatbit.h
+++ b/contrib/libs/hyperscan/src/util/fatbit.h
@@ -1,93 +1,93 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef FATBIT_H
-#define FATBIT_H
-
-/** \file
- * \brief Multibit: fast bitset structure for use in scratch.
- * Uses more space than mmbit, to avoid partial words for hopefully a taddy more
- * performance.
- *
- * API is also trimmed down.
- */
-
-#include "multibit.h"
-#include "ue2common.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FATBIT_H
+#define FATBIT_H
+
+/** \file
+ * \brief Multibit: fast bitset structure for use in scratch.
+ * Uses more space than mmbit, to avoid partial words for hopefully a taddy more
+ * performance.
+ *
+ * API is also trimmed down.
+ */
+
+#include "multibit.h"
+#include "ue2common.h"
+
#ifdef __cplusplus
extern "C" {
#endif
-#define MIN_FAT_SIZE 32
-
-struct fatbit {
- union {
- u64a flat[MIN_FAT_SIZE / sizeof(u64a)];
- u8 raw[MIN_FAT_SIZE];
- } fb_int;
- u64a tail[];
-};
-
-static really_inline
-void fatbit_clear(struct fatbit *bits) {
- assert(ISALIGNED(bits));
- memset(bits, 0, sizeof(struct fatbit));
-}
-
-static really_inline
-char fatbit_set(struct fatbit *bits, u32 total_bits, u32 key) {
+#define MIN_FAT_SIZE 32
+
+struct fatbit {
+ union {
+ u64a flat[MIN_FAT_SIZE / sizeof(u64a)];
+ u8 raw[MIN_FAT_SIZE];
+ } fb_int;
+ u64a tail[];
+};
+
+static really_inline
+void fatbit_clear(struct fatbit *bits) {
+ assert(ISALIGNED(bits));
+ memset(bits, 0, sizeof(struct fatbit));
+}
+
+static really_inline
+char fatbit_set(struct fatbit *bits, u32 total_bits, u32 key) {
assert(ISALIGNED(bits));
- return mmbit_set(bits->fb_int.raw, total_bits, key);
-}
-
-static really_inline
-void fatbit_unset(struct fatbit *bits, u32 total_bits, u32 key) {
+ return mmbit_set(bits->fb_int.raw, total_bits, key);
+}
+
+static really_inline
+void fatbit_unset(struct fatbit *bits, u32 total_bits, u32 key) {
assert(ISALIGNED(bits));
- mmbit_unset(bits->fb_int.raw, total_bits, key);
-}
-
-static really_inline
-char fatbit_isset(const struct fatbit *bits, u32 total_bits, u32 key) {
+ mmbit_unset(bits->fb_int.raw, total_bits, key);
+}
+
+static really_inline
+char fatbit_isset(const struct fatbit *bits, u32 total_bits, u32 key) {
assert(ISALIGNED(bits));
- return mmbit_isset(bits->fb_int.raw, total_bits, key);
-}
-
-static really_inline
-u32 fatbit_iterate(const struct fatbit *bits, u32 total_bits, u32 it_in) {
+ return mmbit_isset(bits->fb_int.raw, total_bits, key);
+}
+
+static really_inline
+u32 fatbit_iterate(const struct fatbit *bits, u32 total_bits, u32 it_in) {
assert(ISALIGNED(bits));
- /* TODO: iterate_flat could be specialised as we don't have to worry about
- * partial blocks. */
- return mmbit_iterate(bits->fb_int.raw, total_bits, it_in);
-}
-
+ /* TODO: iterate_flat could be specialised as we don't have to worry about
+ * partial blocks. */
+ return mmbit_iterate(bits->fb_int.raw, total_bits, it_in);
+}
+
#ifdef __cplusplus
} // extern "C"
#endif
-
-#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/graph.h b/contrib/libs/hyperscan/src/util/graph.h
index 15f5694cab..3e18dae552 100644
--- a/contrib/libs/hyperscan/src/util/graph.h
+++ b/contrib/libs/hyperscan/src/util/graph.h
@@ -1,151 +1,151 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Functions for graph manipulation that aren't in the base BGL toolkit.
- */
-
-#ifndef UTIL_GRAPH_H
-#define UTIL_GRAPH_H
-
-#include "container.h"
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Functions for graph manipulation that aren't in the base BGL toolkit.
+ */
+
+#ifndef UTIL_GRAPH_H
+#define UTIL_GRAPH_H
+
+#include "container.h"
+#include "ue2common.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
+#include "util/graph_range.h"
#include "util/unordered.h"
-
-#include <boost/graph/depth_first_search.hpp>
+
+#include <boost/graph/depth_first_search.hpp>
#include <boost/graph/strong_components.hpp>
#include <boost/range/adaptor/map.hpp>
-
+
#include <algorithm>
#include <map>
#include <set>
#include <utility>
#include <vector>
-namespace ue2 {
-
-/** \brief True if the given vertex has no out-edges. */
-template<class Graph>
-bool isLeafNode(const typename Graph::vertex_descriptor& v, const Graph& g) {
+namespace ue2 {
+
+/** \brief True if the given vertex has no out-edges. */
+template<class Graph>
+bool isLeafNode(const typename Graph::vertex_descriptor& v, const Graph& g) {
return out_degree(v, g) == 0;
-}
-
-/** \brief True if vertex \a v has an edge to itself. */
-template<class Graph>
-bool hasSelfLoop(const typename Graph::vertex_descriptor &v, const Graph &g) {
- return edge(v, v, g).second;
-}
-
-/** \brief True if any vertex in [it, end) has an edge to itself. */
-template<class Graph, class Iterator>
-bool anySelfLoop(const Graph &g, Iterator it, const Iterator &end) {
- for (; it != end; ++it) {
- if (hasSelfLoop(*it, g)) {
- return true;
- }
- }
-
- return false;
-}
-
-/** \brief Returns the out-degree of vertex \a v, ignoring self-loops. */
-template<class Graph>
-size_t proper_out_degree(const typename Graph::vertex_descriptor &v,
- const Graph &g) {
- return out_degree(v, g) - (edge(v, v, g).second ? 1 : 0);
-}
-
-/** \brief Returns the in-degree of vertex \a v, ignoring self-loops. */
-template<class Graph>
-size_t proper_in_degree(const typename Graph::vertex_descriptor &v,
- const Graph &g) {
- return in_degree(v, g) - (edge(v, v, g).second ? 1 : 0);
-}
-
-/** \brief True if vertex \a v has at least one successor. */
-template<class Graph>
-bool has_successor(const typename Graph::vertex_descriptor &v, const Graph &g) {
+}
+
+/** \brief True if vertex \a v has an edge to itself. */
+template<class Graph>
+bool hasSelfLoop(const typename Graph::vertex_descriptor &v, const Graph &g) {
+ return edge(v, v, g).second;
+}
+
+/** \brief True if any vertex in [it, end) has an edge to itself. */
+template<class Graph, class Iterator>
+bool anySelfLoop(const Graph &g, Iterator it, const Iterator &end) {
+ for (; it != end; ++it) {
+ if (hasSelfLoop(*it, g)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/** \brief Returns the out-degree of vertex \a v, ignoring self-loops. */
+template<class Graph>
+size_t proper_out_degree(const typename Graph::vertex_descriptor &v,
+ const Graph &g) {
+ return out_degree(v, g) - (edge(v, v, g).second ? 1 : 0);
+}
+
+/** \brief Returns the in-degree of vertex \a v, ignoring self-loops. */
+template<class Graph>
+size_t proper_in_degree(const typename Graph::vertex_descriptor &v,
+ const Graph &g) {
+ return in_degree(v, g) - (edge(v, v, g).second ? 1 : 0);
+}
+
+/** \brief True if vertex \a v has at least one successor. */
+template<class Graph>
+bool has_successor(const typename Graph::vertex_descriptor &v, const Graph &g) {
return out_degree(v, g) > 0;
-}
-
-/** \brief True if vertex \a v has at least one successor other than itself. */
-template<class Graph>
-bool has_proper_successor(const typename Graph::vertex_descriptor &v,
- const Graph &g) {
- typename Graph::adjacency_iterator ai, ae;
- std::tie(ai, ae) = adjacent_vertices(v, g);
- if (ai == ae) {
- return false;
- }
- if (*ai == v) {
- ++ai; // skip self-loop
- }
-
- return ai != ae;
-}
-
-/** \brief Find the set of vertices that are reachable from the vertices in \a
- * sources. */
-template<class Graph, class SourceCont, class OutCont>
-void find_reachable(const Graph &g, const SourceCont &sources, OutCont *out) {
- using vertex_descriptor = typename Graph::vertex_descriptor;
+}
+
+/** \brief True if vertex \a v has at least one successor other than itself. */
+template<class Graph>
+bool has_proper_successor(const typename Graph::vertex_descriptor &v,
+ const Graph &g) {
+ typename Graph::adjacency_iterator ai, ae;
+ std::tie(ai, ae) = adjacent_vertices(v, g);
+ if (ai == ae) {
+ return false;
+ }
+ if (*ai == v) {
+ ++ai; // skip self-loop
+ }
+
+ return ai != ae;
+}
+
+/** \brief Find the set of vertices that are reachable from the vertices in \a
+ * sources. */
+template<class Graph, class SourceCont, class OutCont>
+void find_reachable(const Graph &g, const SourceCont &sources, OutCont *out) {
+ using vertex_descriptor = typename Graph::vertex_descriptor;
std::unordered_map<vertex_descriptor, boost::default_color_type> colours;
-
- for (auto v : sources) {
- boost::depth_first_visit(g, v,
- boost::make_dfs_visitor(boost::null_visitor()),
- boost::make_assoc_property_map(colours));
- }
-
- for (const auto &e : colours) {
- out->insert(e.first);
- }
-}
-
-/** \brief Find the set of vertices that are NOT reachable from the vertices in
- * \a sources. */
-template<class Graph, class SourceCont, class OutCont>
-void find_unreachable(const Graph &g, const SourceCont &sources, OutCont *out) {
- using vertex_descriptor = typename Graph::vertex_descriptor;
+
+ for (auto v : sources) {
+ boost::depth_first_visit(g, v,
+ boost::make_dfs_visitor(boost::null_visitor()),
+ boost::make_assoc_property_map(colours));
+ }
+
+ for (const auto &e : colours) {
+ out->insert(e.first);
+ }
+}
+
+/** \brief Find the set of vertices that are NOT reachable from the vertices in
+ * \a sources. */
+template<class Graph, class SourceCont, class OutCont>
+void find_unreachable(const Graph &g, const SourceCont &sources, OutCont *out) {
+ using vertex_descriptor = typename Graph::vertex_descriptor;
std::unordered_set<vertex_descriptor> reachable;
-
- find_reachable(g, sources, &reachable);
-
- for (const auto &v : vertices_range(g)) {
- if (!contains(reachable, v)) {
- out->insert(v);
- }
- }
-}
-
-template <class Graph>
+
+ find_reachable(g, sources, &reachable);
+
+ for (const auto &v : vertices_range(g)) {
+ if (!contains(reachable, v)) {
+ out->insert(v);
+ }
+ }
+}
+
+template <class Graph>
flat_set<typename Graph::vertex_descriptor>
find_vertices_in_cycles(const Graph &g) {
using vertex_descriptor = typename Graph::vertex_descriptor;
@@ -182,46 +182,46 @@ find_vertices_in_cycles(const Graph &g) {
}
template <class Graph>
-bool has_parallel_edge(const Graph &g) {
- using vertex_descriptor = typename Graph::vertex_descriptor;
+bool has_parallel_edge(const Graph &g) {
+ using vertex_descriptor = typename Graph::vertex_descriptor;
ue2_unordered_set<std::pair<vertex_descriptor, vertex_descriptor>> seen;
- for (const auto &e : edges_range(g)) {
- auto u = source(e, g);
- auto v = target(e, g);
- if (!seen.emplace(u, v).second) {
- return true;
- }
- }
- return false;
-}
-
-struct found_back_edge {};
-struct detect_back_edges : public boost::default_dfs_visitor {
- explicit detect_back_edges(bool ignore_self_in)
- : ignore_self(ignore_self_in) {}
- template <class Graph>
- void back_edge(const typename Graph::edge_descriptor &e,
- const Graph &g) const {
- if (ignore_self && source(e, g) == target(e, g)) {
- return;
- }
- throw found_back_edge();
- }
- bool ignore_self;
-};
-
-template <class Graph>
-bool is_dag(const Graph &g, bool ignore_self_loops = false) {
- try {
- depth_first_search(g, visitor(detect_back_edges(ignore_self_loops)));
- } catch (const found_back_edge &) {
- return false;
- }
-
- return true;
-}
-
+ for (const auto &e : edges_range(g)) {
+ auto u = source(e, g);
+ auto v = target(e, g);
+ if (!seen.emplace(u, v).second) {
+ return true;
+ }
+ }
+ return false;
+}
+
+struct found_back_edge {};
+struct detect_back_edges : public boost::default_dfs_visitor {
+ explicit detect_back_edges(bool ignore_self_in)
+ : ignore_self(ignore_self_in) {}
+ template <class Graph>
+ void back_edge(const typename Graph::edge_descriptor &e,
+ const Graph &g) const {
+ if (ignore_self && source(e, g) == target(e, g)) {
+ return;
+ }
+ throw found_back_edge();
+ }
+ bool ignore_self;
+};
+
+template <class Graph>
+bool is_dag(const Graph &g, bool ignore_self_loops = false) {
+ try {
+ depth_first_search(g, visitor(detect_back_edges(ignore_self_loops)));
+ } catch (const found_back_edge &) {
+ return false;
+ }
+
+ return true;
+}
+
template<typename Cont>
class vertex_recorder : public boost::default_dfs_visitor {
public:
@@ -261,28 +261,28 @@ make_vertex_index_bitset_recorder(Bitset &o) {
return vertex_index_bitset_recorder<Bitset>(o);
}
-template <class Graph>
-std::pair<typename Graph::edge_descriptor, bool>
-add_edge_if_not_present(typename Graph::vertex_descriptor u,
- typename Graph::vertex_descriptor v, Graph &g) {
- std::pair<typename Graph::edge_descriptor, bool> e = edge(u, v, g);
- if (!e.second) {
- e = add_edge(u, v, g);
- }
- return e;
-}
-
-template <class Graph>
-std::pair<typename Graph::edge_descriptor, bool> add_edge_if_not_present(
- typename Graph::vertex_descriptor u, typename Graph::vertex_descriptor v,
- const typename Graph::edge_property_type &prop, Graph &g) {
- std::pair<typename Graph::edge_descriptor, bool> e = edge(u, v, g);
- if (!e.second) {
- e = add_edge(u, v, prop, g);
- }
- return e;
-}
-
+template <class Graph>
+std::pair<typename Graph::edge_descriptor, bool>
+add_edge_if_not_present(typename Graph::vertex_descriptor u,
+ typename Graph::vertex_descriptor v, Graph &g) {
+ std::pair<typename Graph::edge_descriptor, bool> e = edge(u, v, g);
+ if (!e.second) {
+ e = add_edge(u, v, g);
+ }
+ return e;
+}
+
+template <class Graph>
+std::pair<typename Graph::edge_descriptor, bool> add_edge_if_not_present(
+ typename Graph::vertex_descriptor u, typename Graph::vertex_descriptor v,
+ const typename Graph::edge_property_type &prop, Graph &g) {
+ std::pair<typename Graph::edge_descriptor, bool> e = edge(u, v, g);
+ if (!e.second) {
+ e = add_edge(u, v, prop, g);
+ }
+ return e;
+}
+
#ifndef NDEBUG
template <class Graph>
@@ -317,6 +317,6 @@ bool hasCorrectlyNumberedEdges(const Graph &g) {
#endif
-} // namespace ue2
-
-#endif // UTIL_GRAPH_H
+} // namespace ue2
+
+#endif // UTIL_GRAPH_H
diff --git a/contrib/libs/hyperscan/src/util/graph_range.h b/contrib/libs/hyperscan/src/util/graph_range.h
index 05b8f76896..3df06911a7 100644
--- a/contrib/libs/hyperscan/src/util/graph_range.h
+++ b/contrib/libs/hyperscan/src/util/graph_range.h
@@ -1,111 +1,111 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Convenience functions allowing range-based-for over BGL graphs.
- *
- * Normally with the BGL in C++98 you need to do this to iterate over graph
- * elements:
- *
- * Graph:out_edge_iterator ei, ee;
- * for (tie(ei, ee) = out_edges(v, g); ei != ee; ++ei) {
- * do_thing_with_edge(*ei, g);
- * }
- *
- * But now, with C++11 range-based-for and these functions, you can do this
- * instead:
- *
- * for (auto e : out_edges_range(v, g)) {
- * do_thing_with_edge(e, g);
- * }
- *
- * This syntax is much more compact and keeps the iterator vars from cluttering
- * the outer scope.
- */
-
-#ifndef UTIL_GRAPH_RANGE_H
-#define UTIL_GRAPH_RANGE_H
-
-#include <boost/range/iterator_range.hpp>
-
-namespace ue2 {
-
-/** Adapts a pair of iterators into a range. */
-template <class Iter>
-inline boost::iterator_range<Iter> pair_range(const std::pair<Iter, Iter> &p) {
- return boost::make_iterator_range(p.first, p.second);
-}
-
-/** vertices(g) */
-template <class Graph>
-inline auto vertices_range(const Graph &g)
- -> decltype(pair_range(vertices(g))) {
- return pair_range(vertices(g));
-}
-
-/** edges(g) */
-template <class Graph>
-inline auto edges_range(const Graph &g) -> decltype(pair_range(edges(g))) {
- return pair_range(edges(g));
-}
-
-/** out_edges(v, g) */
-template <class Graph>
-inline auto out_edges_range(const typename Graph::vertex_descriptor &v,
- const Graph &g)
- -> decltype(pair_range(out_edges(v, g))) {
- return pair_range(out_edges(v, g));
-}
-
-/** in_edges(v, g) */
-template <class Graph>
-inline auto in_edges_range(const typename Graph::vertex_descriptor &v,
- const Graph &g)
- -> decltype(pair_range(in_edges(v, g))) {
- return pair_range(in_edges(v, g));
-}
-
-/** adjacent_vertices(v, g) */
-template <class Graph>
-inline auto adjacent_vertices_range(const typename Graph::vertex_descriptor &v,
- const Graph &g)
- -> decltype(pair_range(adjacent_vertices(v, g))) {
- return pair_range(adjacent_vertices(v, g));
-}
-
-/** inv_adjacent_vertices(v, g) */
-template <class Graph>
-inline auto inv_adjacent_vertices_range(
- const typename Graph::vertex_descriptor &v, const Graph &g)
- -> decltype(pair_range(inv_adjacent_vertices(v, g))) {
- return pair_range(inv_adjacent_vertices(v, g));
-}
-
-} // namespace ue2
-
-#endif // UTIL_GRAPH_RANGE_H
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Convenience functions allowing range-based-for over BGL graphs.
+ *
+ * Normally with the BGL in C++98 you need to do this to iterate over graph
+ * elements:
+ *
+ * Graph:out_edge_iterator ei, ee;
+ * for (tie(ei, ee) = out_edges(v, g); ei != ee; ++ei) {
+ * do_thing_with_edge(*ei, g);
+ * }
+ *
+ * But now, with C++11 range-based-for and these functions, you can do this
+ * instead:
+ *
+ * for (auto e : out_edges_range(v, g)) {
+ * do_thing_with_edge(e, g);
+ * }
+ *
+ * This syntax is much more compact and keeps the iterator vars from cluttering
+ * the outer scope.
+ */
+
+#ifndef UTIL_GRAPH_RANGE_H
+#define UTIL_GRAPH_RANGE_H
+
+#include <boost/range/iterator_range.hpp>
+
+namespace ue2 {
+
+/** Adapts a pair of iterators into a range. */
+template <class Iter>
+inline boost::iterator_range<Iter> pair_range(const std::pair<Iter, Iter> &p) {
+ return boost::make_iterator_range(p.first, p.second);
+}
+
+/** vertices(g) */
+template <class Graph>
+inline auto vertices_range(const Graph &g)
+ -> decltype(pair_range(vertices(g))) {
+ return pair_range(vertices(g));
+}
+
+/** edges(g) */
+template <class Graph>
+inline auto edges_range(const Graph &g) -> decltype(pair_range(edges(g))) {
+ return pair_range(edges(g));
+}
+
+/** out_edges(v, g) */
+template <class Graph>
+inline auto out_edges_range(const typename Graph::vertex_descriptor &v,
+ const Graph &g)
+ -> decltype(pair_range(out_edges(v, g))) {
+ return pair_range(out_edges(v, g));
+}
+
+/** in_edges(v, g) */
+template <class Graph>
+inline auto in_edges_range(const typename Graph::vertex_descriptor &v,
+ const Graph &g)
+ -> decltype(pair_range(in_edges(v, g))) {
+ return pair_range(in_edges(v, g));
+}
+
+/** adjacent_vertices(v, g) */
+template <class Graph>
+inline auto adjacent_vertices_range(const typename Graph::vertex_descriptor &v,
+ const Graph &g)
+ -> decltype(pair_range(adjacent_vertices(v, g))) {
+ return pair_range(adjacent_vertices(v, g));
+}
+
+/** inv_adjacent_vertices(v, g) */
+template <class Graph>
+inline auto inv_adjacent_vertices_range(
+ const typename Graph::vertex_descriptor &v, const Graph &g)
+ -> decltype(pair_range(inv_adjacent_vertices(v, g))) {
+ return pair_range(inv_adjacent_vertices(v, g));
+}
+
+} // namespace ue2
+
+#endif // UTIL_GRAPH_RANGE_H
diff --git a/contrib/libs/hyperscan/src/util/join.h b/contrib/libs/hyperscan/src/util/join.h
index 2073ef7a5a..7d5a30c39a 100644
--- a/contrib/libs/hyperscan/src/util/join.h
+++ b/contrib/libs/hyperscan/src/util/join.h
@@ -1,40 +1,40 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef JOIN_H
-
-#define JOIN(x, y) JOIN_AGAIN(x, y)
-#define JOIN_AGAIN(x, y) x ## y
-
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef JOIN_H
+
+#define JOIN(x, y) JOIN_AGAIN(x, y)
+#define JOIN_AGAIN(x, y) x ## y
+
#define JOIN3(x, y, z) JOIN_AGAIN3(x, y, z)
#define JOIN_AGAIN3(x, y, z) x ## y ## z
#define JOIN4(w, x, y, z) JOIN_AGAIN4(w, x, y, z)
#define JOIN_AGAIN4(w, x, y, z) w ## x ## y ## z
-#endif
+#endif
diff --git a/contrib/libs/hyperscan/src/util/make_unique.h b/contrib/libs/hyperscan/src/util/make_unique.h
index 86de7cecc1..651e8c5cf9 100644
--- a/contrib/libs/hyperscan/src/util/make_unique.h
+++ b/contrib/libs/hyperscan/src/util/make_unique.h
@@ -1,49 +1,49 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef UTIL_MAKE_UNIQUE_H
-#define UTIL_MAKE_UNIQUE_H
-
-#if (defined(_WIN32) || defined(_WIN64)) && (_MSC_VER > 1700)
-// VC++ 2013 onwards has make_unique in the STL
-#define USE_STD
-#include <memory>
-#else
-#include <boost/smart_ptr/make_unique.hpp>
-#endif
-
-namespace ue2 {
-#if defined(USE_STD)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef UTIL_MAKE_UNIQUE_H
+#define UTIL_MAKE_UNIQUE_H
+
+#if (defined(_WIN32) || defined(_WIN64)) && (_MSC_VER > 1700)
+// VC++ 2013 onwards has make_unique in the STL
+#define USE_STD
+#include <memory>
+#else
+#include <boost/smart_ptr/make_unique.hpp>
+#endif
+
+namespace ue2 {
+#if defined(USE_STD)
using std::make_unique;
-#else
+#else
using boost::make_unique;
-#endif
-}
-
-#undef USE_STD
-#endif // UTIL_MAKE_UNIQUE_H
+#endif
+}
+
+#undef USE_STD
+#endif // UTIL_MAKE_UNIQUE_H
diff --git a/contrib/libs/hyperscan/src/util/masked_move.c b/contrib/libs/hyperscan/src/util/masked_move.c
index 8172d443c4..001cd49f28 100644
--- a/contrib/libs/hyperscan/src/util/masked_move.c
+++ b/contrib/libs/hyperscan/src/util/masked_move.c
@@ -1,91 +1,91 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include "ue2common.h"
-#include "masked_move.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "ue2common.h"
+#include "masked_move.h"
#include "util/arch.h"
-
+
#if defined(HAVE_AVX2)
-/* masks for masked moves */
-
-/* magic mask for maskload (vmmaskmovq) - described in UE-2424 */
+/* masks for masked moves */
+
+/* magic mask for maskload (vmmaskmovq) - described in UE-2424 */
const ALIGN_CL_DIRECTIVE u32 mm_mask_mask[16] = {
- 0x00000000U,
- 0x00000000U,
- 0x00000000U,
- 0x00000000U,
- 0x00000000U,
- 0x00000000U,
- 0x00000000U,
- 0x00000000U,
- 0xff000000U,
- 0xfe000000U,
- 0xfc000000U,
- 0xf8000000U,
- 0xf0000000U,
- 0xe0000000U,
- 0xc0000000U,
- 0x80000000U,
-};
-
-const u32 mm_shuffle_end[32][8] = {
- { 0x03020100U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
- { 0x02010080U, 0x80808003U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
- { 0x01008080U, 0x80800302U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
- { 0x00808080U, 0x80030201U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
- { 0x80808080U, 0x03020100U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
- { 0x80808080U, 0x02010080U, 0x80808003U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
- { 0x80808080U, 0x01008080U, 0x80800302U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
- { 0x80808080U, 0x00808080U, 0x80030201U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
- { 0x80808080U, 0x80808080U, 0x03020100U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
- { 0x80808080U, 0x80808080U, 0x02010080U, 0x80808003U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
- { 0x80808080U, 0x80808080U, 0x01008080U, 0x80800302U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
- { 0x80808080U, 0x80808080U, 0x00808080U, 0x80030201U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x03020100U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x02010080U, 0x80808003U, 0x80808080U, 0x80808080U, 0x80808080U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x01008080U, 0x80800302U, 0x80808080U, 0x80808080U, 0x80808080U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x00808080U, 0x80030201U, 0x80808080U, 0x80808080U, 0x80808080U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x03020100U, 0x80808080U, 0x80808080U, 0x80808080U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x02010080U, 0x80808003U, 0x80808080U, 0x80808080U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x01008080U, 0x80800302U, 0x80808080U, 0x80808080U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x00808080U, 0x80030201U, 0x80808080U, 0x80808080U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x03020100U, 0x80808080U, 0x80808080U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x02010080U, 0x80808003U, 0x80808080U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x01008080U, 0x80800302U, 0x80808080U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x00808080U, 0x80030201U, 0x80808080U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x03020100U, 0x80808080U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x02010080U, 0x80808003U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x01008080U, 0x80800302U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x00808080U, 0x80030201U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x03020100U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x02010080U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x01008080U, },
- { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x00808080U, },
-};
-#endif // AVX2
+ 0x00000000U,
+ 0x00000000U,
+ 0x00000000U,
+ 0x00000000U,
+ 0x00000000U,
+ 0x00000000U,
+ 0x00000000U,
+ 0x00000000U,
+ 0xff000000U,
+ 0xfe000000U,
+ 0xfc000000U,
+ 0xf8000000U,
+ 0xf0000000U,
+ 0xe0000000U,
+ 0xc0000000U,
+ 0x80000000U,
+};
+
+const u32 mm_shuffle_end[32][8] = {
+ { 0x03020100U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
+ { 0x02010080U, 0x80808003U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
+ { 0x01008080U, 0x80800302U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
+ { 0x00808080U, 0x80030201U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
+ { 0x80808080U, 0x03020100U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
+ { 0x80808080U, 0x02010080U, 0x80808003U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
+ { 0x80808080U, 0x01008080U, 0x80800302U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
+ { 0x80808080U, 0x00808080U, 0x80030201U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
+ { 0x80808080U, 0x80808080U, 0x03020100U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
+ { 0x80808080U, 0x80808080U, 0x02010080U, 0x80808003U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
+ { 0x80808080U, 0x80808080U, 0x01008080U, 0x80800302U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
+ { 0x80808080U, 0x80808080U, 0x00808080U, 0x80030201U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x03020100U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x02010080U, 0x80808003U, 0x80808080U, 0x80808080U, 0x80808080U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x01008080U, 0x80800302U, 0x80808080U, 0x80808080U, 0x80808080U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x00808080U, 0x80030201U, 0x80808080U, 0x80808080U, 0x80808080U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x03020100U, 0x80808080U, 0x80808080U, 0x80808080U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x02010080U, 0x80808003U, 0x80808080U, 0x80808080U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x01008080U, 0x80800302U, 0x80808080U, 0x80808080U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x00808080U, 0x80030201U, 0x80808080U, 0x80808080U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x03020100U, 0x80808080U, 0x80808080U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x02010080U, 0x80808003U, 0x80808080U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x01008080U, 0x80800302U, 0x80808080U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x00808080U, 0x80030201U, 0x80808080U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x03020100U, 0x80808080U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x02010080U, 0x80808003U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x01008080U, 0x80800302U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x00808080U, 0x80030201U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x03020100U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x02010080U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x01008080U, },
+ { 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x80808080U, 0x00808080U, },
+};
+#endif // AVX2
diff --git a/contrib/libs/hyperscan/src/util/masked_move.h b/contrib/libs/hyperscan/src/util/masked_move.h
index 2db31e6009..4c877ca9e5 100644
--- a/contrib/libs/hyperscan/src/util/masked_move.h
+++ b/contrib/libs/hyperscan/src/util/masked_move.h
@@ -1,82 +1,82 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef MASKED_MOVE_H
-#define MASKED_MOVE_H
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef MASKED_MOVE_H
+#define MASKED_MOVE_H
+
#include "arch.h"
-
+
#if defined(HAVE_AVX2)
-#include "unaligned.h"
-#include "simd_utils.h"
-
+#include "unaligned.h"
+#include "simd_utils.h"
+
#ifdef __cplusplus
extern "C" {
#endif
-extern const u32 mm_mask_mask[16];
-extern const u32 mm_shuffle_end[32][8];
+extern const u32 mm_mask_mask[16];
+extern const u32 mm_shuffle_end[32][8];
#ifdef __cplusplus
}
#endif
-
-/* load mask for len bytes from start of buffer */
-static really_inline m256
-_get_mm_mask_end(u32 len) {
- assert(len <= 32);
- const u8 *masky = (const u8 *)mm_mask_mask;
- m256 mask = load256(masky + 32);
- mask = _mm256_sll_epi32(mask, _mm_cvtsi32_si128(8 - (len >> 2)));
- return mask;
-}
-
-/*
- * masked_move256_len: Will load len bytes from *buf into m256
- * _______________________________
- * |0<----len---->| 32|
- * -------------------------------
- */
-static really_inline m256
-masked_move256_len(const u8 *buf, const u32 len) {
- assert(len >= 4);
-
- m256 lmask = _get_mm_mask_end(len);
-
- u32 end = unaligned_load_u32(buf + len - 4);
- m256 preshufend = _mm256_broadcastq_epi64(_mm_cvtsi32_si128(end));
- m256 v = _mm256_maskload_epi32((const int *)buf, lmask);
+
+/* load mask for len bytes from start of buffer */
+static really_inline m256
+_get_mm_mask_end(u32 len) {
+ assert(len <= 32);
+ const u8 *masky = (const u8 *)mm_mask_mask;
+ m256 mask = load256(masky + 32);
+ mask = _mm256_sll_epi32(mask, _mm_cvtsi32_si128(8 - (len >> 2)));
+ return mask;
+}
+
+/*
+ * masked_move256_len: Will load len bytes from *buf into m256
+ * _______________________________
+ * |0<----len---->| 32|
+ * -------------------------------
+ */
+static really_inline m256
+masked_move256_len(const u8 *buf, const u32 len) {
+ assert(len >= 4);
+
+ m256 lmask = _get_mm_mask_end(len);
+
+ u32 end = unaligned_load_u32(buf + len - 4);
+ m256 preshufend = _mm256_broadcastq_epi64(_mm_cvtsi32_si128(end));
+ m256 v = _mm256_maskload_epi32((const int *)buf, lmask);
m256 shufend = pshufb_m256(preshufend,
loadu256(&mm_shuffle_end[len - 4]));
- m256 target = or256(v, shufend);
-
- return target;
-}
-
-#endif /* AVX2 */
-#endif /* MASKED_MOVE_H */
-
+ m256 target = or256(v, shufend);
+
+ return target;
+}
+
+#endif /* AVX2 */
+#endif /* MASKED_MOVE_H */
+
diff --git a/contrib/libs/hyperscan/src/util/multibit.c b/contrib/libs/hyperscan/src/util/multibit.c
index 9a648528d3..de192d7dd7 100644
--- a/contrib/libs/hyperscan/src/util/multibit.c
+++ b/contrib/libs/hyperscan/src/util/multibit.c
@@ -1,140 +1,140 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Multibit: lookup tables and support code.
- *
- * This C file contains the constant tables used by multibit, so we don't end
- * up creating copies of them for every unit that uses it.
- */
-
-#include "multibit.h"
-#include "ue2common.h"
-
-const u8 mmbit_keyshift_lut[32] = {
- 30, 30, 24, 24, 24, 24, 24, 24, 18, 18, 18,
- 18, 18, 18, 12, 12, 12, 12, 12, 12, 6, 6,
- 6, 6, 6, 6, 0, 0, 0, 0, 0, 0
-};
-
-// The only actually valid values of ks are as shown in the LUT above, but a
-// division is just too expensive.
-const u8 mmbit_maxlevel_from_keyshift_lut[32] = {
- 0, 0, 0, 0, 0, 0,
- 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 3,
- 4, 4, 4, 4, 4, 4,
- 5, 5
-};
-
-const u8 mmbit_maxlevel_direct_lut[32] = {
- 5, 5, 4, 4, 4, 4, 4, 4, 3, 3, 3,
- 3, 3, 3, 2, 2, 2, 2, 2, 2, 1, 1,
- 1, 1, 1, 1, 0, 0, 0, 0, 0, 0
-};
-
-#define ZERO_TO_LUT(x) ((1ULL << x) - 1)
-
-const u64a mmbit_zero_to_lut[65] = {
- ZERO_TO_LUT(0),
- ZERO_TO_LUT(1),
- ZERO_TO_LUT(2),
- ZERO_TO_LUT(3),
- ZERO_TO_LUT(4),
- ZERO_TO_LUT(5),
- ZERO_TO_LUT(6),
- ZERO_TO_LUT(7),
- ZERO_TO_LUT(8),
- ZERO_TO_LUT(9),
- ZERO_TO_LUT(10),
- ZERO_TO_LUT(11),
- ZERO_TO_LUT(12),
- ZERO_TO_LUT(13),
- ZERO_TO_LUT(14),
- ZERO_TO_LUT(15),
- ZERO_TO_LUT(16),
- ZERO_TO_LUT(17),
- ZERO_TO_LUT(18),
- ZERO_TO_LUT(19),
- ZERO_TO_LUT(20),
- ZERO_TO_LUT(21),
- ZERO_TO_LUT(22),
- ZERO_TO_LUT(23),
- ZERO_TO_LUT(24),
- ZERO_TO_LUT(25),
- ZERO_TO_LUT(26),
- ZERO_TO_LUT(27),
- ZERO_TO_LUT(28),
- ZERO_TO_LUT(29),
- ZERO_TO_LUT(30),
- ZERO_TO_LUT(31),
- ZERO_TO_LUT(32),
- ZERO_TO_LUT(33),
- ZERO_TO_LUT(34),
- ZERO_TO_LUT(35),
- ZERO_TO_LUT(36),
- ZERO_TO_LUT(37),
- ZERO_TO_LUT(38),
- ZERO_TO_LUT(39),
- ZERO_TO_LUT(40),
- ZERO_TO_LUT(41),
- ZERO_TO_LUT(42),
- ZERO_TO_LUT(43),
- ZERO_TO_LUT(44),
- ZERO_TO_LUT(45),
- ZERO_TO_LUT(46),
- ZERO_TO_LUT(47),
- ZERO_TO_LUT(48),
- ZERO_TO_LUT(49),
- ZERO_TO_LUT(50),
- ZERO_TO_LUT(51),
- ZERO_TO_LUT(52),
- ZERO_TO_LUT(53),
- ZERO_TO_LUT(54),
- ZERO_TO_LUT(55),
- ZERO_TO_LUT(56),
- ZERO_TO_LUT(57),
- ZERO_TO_LUT(58),
- ZERO_TO_LUT(59),
- ZERO_TO_LUT(60),
- ZERO_TO_LUT(61),
- ZERO_TO_LUT(62),
- ZERO_TO_LUT(63),
- ~0ULL
-};
-
-const u32 mmbit_root_offset_from_level[7] = {
- 0,
- 1,
- 1 + (1 << MMB_KEY_SHIFT),
- 1 + (1 << MMB_KEY_SHIFT) + (1 << MMB_KEY_SHIFT * 2),
- 1 + (1 << MMB_KEY_SHIFT) + (1 << MMB_KEY_SHIFT * 2) + (1 << MMB_KEY_SHIFT * 3),
- 1 + (1 << MMB_KEY_SHIFT) + (1 << MMB_KEY_SHIFT * 2) + (1 << MMB_KEY_SHIFT * 3) + (1 << MMB_KEY_SHIFT * 4),
- 1 + (1 << MMB_KEY_SHIFT) + (1 << MMB_KEY_SHIFT * 2) + (1 << MMB_KEY_SHIFT * 3) + (1 << MMB_KEY_SHIFT * 4) + (1 << MMB_KEY_SHIFT * 5),
-};
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Multibit: lookup tables and support code.
+ *
+ * This C file contains the constant tables used by multibit, so we don't end
+ * up creating copies of them for every unit that uses it.
+ */
+
+#include "multibit.h"
+#include "ue2common.h"
+
+const u8 mmbit_keyshift_lut[32] = {
+ 30, 30, 24, 24, 24, 24, 24, 24, 18, 18, 18,
+ 18, 18, 18, 12, 12, 12, 12, 12, 12, 6, 6,
+ 6, 6, 6, 6, 0, 0, 0, 0, 0, 0
+};
+
+// The only actually valid values of ks are as shown in the LUT above, but a
+// division is just too expensive.
+const u8 mmbit_maxlevel_from_keyshift_lut[32] = {
+ 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4,
+ 5, 5
+};
+
+const u8 mmbit_maxlevel_direct_lut[32] = {
+ 5, 5, 4, 4, 4, 4, 4, 4, 3, 3, 3,
+ 3, 3, 3, 2, 2, 2, 2, 2, 2, 1, 1,
+ 1, 1, 1, 1, 0, 0, 0, 0, 0, 0
+};
+
+#define ZERO_TO_LUT(x) ((1ULL << x) - 1)
+
+const u64a mmbit_zero_to_lut[65] = {
+ ZERO_TO_LUT(0),
+ ZERO_TO_LUT(1),
+ ZERO_TO_LUT(2),
+ ZERO_TO_LUT(3),
+ ZERO_TO_LUT(4),
+ ZERO_TO_LUT(5),
+ ZERO_TO_LUT(6),
+ ZERO_TO_LUT(7),
+ ZERO_TO_LUT(8),
+ ZERO_TO_LUT(9),
+ ZERO_TO_LUT(10),
+ ZERO_TO_LUT(11),
+ ZERO_TO_LUT(12),
+ ZERO_TO_LUT(13),
+ ZERO_TO_LUT(14),
+ ZERO_TO_LUT(15),
+ ZERO_TO_LUT(16),
+ ZERO_TO_LUT(17),
+ ZERO_TO_LUT(18),
+ ZERO_TO_LUT(19),
+ ZERO_TO_LUT(20),
+ ZERO_TO_LUT(21),
+ ZERO_TO_LUT(22),
+ ZERO_TO_LUT(23),
+ ZERO_TO_LUT(24),
+ ZERO_TO_LUT(25),
+ ZERO_TO_LUT(26),
+ ZERO_TO_LUT(27),
+ ZERO_TO_LUT(28),
+ ZERO_TO_LUT(29),
+ ZERO_TO_LUT(30),
+ ZERO_TO_LUT(31),
+ ZERO_TO_LUT(32),
+ ZERO_TO_LUT(33),
+ ZERO_TO_LUT(34),
+ ZERO_TO_LUT(35),
+ ZERO_TO_LUT(36),
+ ZERO_TO_LUT(37),
+ ZERO_TO_LUT(38),
+ ZERO_TO_LUT(39),
+ ZERO_TO_LUT(40),
+ ZERO_TO_LUT(41),
+ ZERO_TO_LUT(42),
+ ZERO_TO_LUT(43),
+ ZERO_TO_LUT(44),
+ ZERO_TO_LUT(45),
+ ZERO_TO_LUT(46),
+ ZERO_TO_LUT(47),
+ ZERO_TO_LUT(48),
+ ZERO_TO_LUT(49),
+ ZERO_TO_LUT(50),
+ ZERO_TO_LUT(51),
+ ZERO_TO_LUT(52),
+ ZERO_TO_LUT(53),
+ ZERO_TO_LUT(54),
+ ZERO_TO_LUT(55),
+ ZERO_TO_LUT(56),
+ ZERO_TO_LUT(57),
+ ZERO_TO_LUT(58),
+ ZERO_TO_LUT(59),
+ ZERO_TO_LUT(60),
+ ZERO_TO_LUT(61),
+ ZERO_TO_LUT(62),
+ ZERO_TO_LUT(63),
+ ~0ULL
+};
+
+const u32 mmbit_root_offset_from_level[7] = {
+ 0,
+ 1,
+ 1 + (1 << MMB_KEY_SHIFT),
+ 1 + (1 << MMB_KEY_SHIFT) + (1 << MMB_KEY_SHIFT * 2),
+ 1 + (1 << MMB_KEY_SHIFT) + (1 << MMB_KEY_SHIFT * 2) + (1 << MMB_KEY_SHIFT * 3),
+ 1 + (1 << MMB_KEY_SHIFT) + (1 << MMB_KEY_SHIFT * 2) + (1 << MMB_KEY_SHIFT * 3) + (1 << MMB_KEY_SHIFT * 4),
+ 1 + (1 << MMB_KEY_SHIFT) + (1 << MMB_KEY_SHIFT * 2) + (1 << MMB_KEY_SHIFT * 3) + (1 << MMB_KEY_SHIFT * 4) + (1 << MMB_KEY_SHIFT * 5),
+};
diff --git a/contrib/libs/hyperscan/src/util/multibit.h b/contrib/libs/hyperscan/src/util/multibit.h
index 2fb3c948a2..c3a4ba461a 100644
--- a/contrib/libs/hyperscan/src/util/multibit.h
+++ b/contrib/libs/hyperscan/src/util/multibit.h
@@ -1,670 +1,670 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Multibit: fast bitset structure, main runtime.
- *
- * *Structure*
- *
- * For sizes <= MMB_FLAT_MAX_BITS, a flat bit vector is used, stored as N
- * 64-bit blocks followed by one "runt block".
- *
- * In larger cases, we use a sequence of blocks forming a tree. Each bit in an
- * internal block indicates whether its child block contains valid data. Every
- * level bar the last is complete. The last level is just a basic bit vector.
- *
- * -----------------------------------------------------------------------------
- * WARNING:
- *
- * mmbit code assumes that it is legal to load 8 bytes before the end of the
- * mmbit. This means that for small mmbits (< 8byte), data may be read from
- * before the base pointer. It is the user's responsibility to ensure that this
- * is possible.
- * -----------------------------------------------------------------------------
- */
-#ifndef MULTIBIT_H
-#define MULTIBIT_H
-
-#include "config.h"
-#include "ue2common.h"
-#include "bitutils.h"
-#include "partial_store.h"
-#include "unaligned.h"
-#include "multibit_internal.h"
-
-#include <string.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define MMB_ONE (1ULL)
-#define MMB_ALL_ONES (0xffffffffffffffffULL)
-
-/** \brief Number of bits in a block. */
-#define MMB_KEY_BITS (sizeof(MMB_TYPE) * 8)
-
-#define MMB_KEY_MASK (MMB_KEY_BITS - 1)
-
-// Key structure defines
-#define MMB_KEY_SHIFT 6
-
-/** \brief Max size of a flat multibit. */
-#define MMB_FLAT_MAX_BITS 256
-
-// Utility functions and data
-// see multibit.c for contents
-extern const u8 mmbit_keyshift_lut[32];
-extern const u8 mmbit_maxlevel_from_keyshift_lut[32];
-extern const u8 mmbit_maxlevel_direct_lut[32];
-extern const u32 mmbit_root_offset_from_level[7];
-extern const u64a mmbit_zero_to_lut[65];
-
-static really_inline
-MMB_TYPE mmb_load(const u8 * bits) {
- return unaligned_load_u64a(bits);
-}
-
-static really_inline
-void mmb_store(u8 *bits, MMB_TYPE val) {
- unaligned_store_u64a(bits, val);
-}
-
-static really_inline
-void mmb_store_partial(u8 *bits, MMB_TYPE val, u32 block_bits) {
- assert(block_bits <= MMB_KEY_BITS);
- partial_store_u64a(bits, val, ROUNDUP_N(block_bits, 8U) / 8U);
-}
-
-static really_inline
-MMB_TYPE mmb_single_bit(u32 bit) {
- assert(bit < MMB_KEY_BITS);
- return MMB_ONE << bit;
-}
-
-static really_inline
-MMB_TYPE mmb_mask_zero_to(u32 bit) {
- assert(bit <= MMB_KEY_BITS);
-#ifdef ARCH_32_BIT
- return mmbit_zero_to_lut[bit];
-#else
- if (bit == MMB_KEY_BITS) {
- return MMB_ALL_ONES;
- } else {
- return mmb_single_bit(bit) - MMB_ONE;
- }
-#endif
-}
-
-/** \brief Returns a mask of set bits up to position \a bit. Does not handle
- * the case where bit == MMB_KEY_BITS. */
-static really_inline
-MMB_TYPE mmb_mask_zero_to_nocheck(u32 bit) {
- assert(bit < MMB_KEY_BITS);
-#ifdef ARCH_32_BIT
- return mmbit_zero_to_lut[bit];
-#else
- return mmb_single_bit(bit) - MMB_ONE;
-#endif
-}
-
-static really_inline
-u32 mmb_test(MMB_TYPE val, u32 bit) {
- assert(bit < MMB_KEY_BITS);
- return (val >> bit) & MMB_ONE;
-}
-
-static really_inline
-void mmb_set(MMB_TYPE * val, u32 bit) {
- assert(bit < MMB_KEY_BITS);
- *val |= mmb_single_bit(bit);
-}
-
-static really_inline
-void mmb_clear(MMB_TYPE * val, u32 bit) {
- assert(bit < MMB_KEY_BITS);
- *val &= ~mmb_single_bit(bit);
-}
-
-static really_inline
-u32 mmb_ctz(MMB_TYPE val) {
- return ctz64(val);
-}
-
-static really_inline
-u32 mmb_popcount(MMB_TYPE val) {
- return popcount64(val);
-}
-
-#ifndef MMMB_DEBUG
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Multibit: fast bitset structure, main runtime.
+ *
+ * *Structure*
+ *
+ * For sizes <= MMB_FLAT_MAX_BITS, a flat bit vector is used, stored as N
+ * 64-bit blocks followed by one "runt block".
+ *
+ * In larger cases, we use a sequence of blocks forming a tree. Each bit in an
+ * internal block indicates whether its child block contains valid data. Every
+ * level bar the last is complete. The last level is just a basic bit vector.
+ *
+ * -----------------------------------------------------------------------------
+ * WARNING:
+ *
+ * mmbit code assumes that it is legal to load 8 bytes before the end of the
+ * mmbit. This means that for small mmbits (< 8byte), data may be read from
+ * before the base pointer. It is the user's responsibility to ensure that this
+ * is possible.
+ * -----------------------------------------------------------------------------
+ */
+#ifndef MULTIBIT_H
+#define MULTIBIT_H
+
+#include "config.h"
+#include "ue2common.h"
+#include "bitutils.h"
+#include "partial_store.h"
+#include "unaligned.h"
+#include "multibit_internal.h"
+
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MMB_ONE (1ULL)
+#define MMB_ALL_ONES (0xffffffffffffffffULL)
+
+/** \brief Number of bits in a block. */
+#define MMB_KEY_BITS (sizeof(MMB_TYPE) * 8)
+
+#define MMB_KEY_MASK (MMB_KEY_BITS - 1)
+
+// Key structure defines
+#define MMB_KEY_SHIFT 6
+
+/** \brief Max size of a flat multibit. */
+#define MMB_FLAT_MAX_BITS 256
+
+// Utility functions and data
+// see multibit.c for contents
+extern const u8 mmbit_keyshift_lut[32];
+extern const u8 mmbit_maxlevel_from_keyshift_lut[32];
+extern const u8 mmbit_maxlevel_direct_lut[32];
+extern const u32 mmbit_root_offset_from_level[7];
+extern const u64a mmbit_zero_to_lut[65];
+
+static really_inline
+MMB_TYPE mmb_load(const u8 * bits) {
+ return unaligned_load_u64a(bits);
+}
+
+static really_inline
+void mmb_store(u8 *bits, MMB_TYPE val) {
+ unaligned_store_u64a(bits, val);
+}
+
+static really_inline
+void mmb_store_partial(u8 *bits, MMB_TYPE val, u32 block_bits) {
+ assert(block_bits <= MMB_KEY_BITS);
+ partial_store_u64a(bits, val, ROUNDUP_N(block_bits, 8U) / 8U);
+}
+
+static really_inline
+MMB_TYPE mmb_single_bit(u32 bit) {
+ assert(bit < MMB_KEY_BITS);
+ return MMB_ONE << bit;
+}
+
+static really_inline
+MMB_TYPE mmb_mask_zero_to(u32 bit) {
+ assert(bit <= MMB_KEY_BITS);
+#ifdef ARCH_32_BIT
+ return mmbit_zero_to_lut[bit];
+#else
+ if (bit == MMB_KEY_BITS) {
+ return MMB_ALL_ONES;
+ } else {
+ return mmb_single_bit(bit) - MMB_ONE;
+ }
+#endif
+}
+
+/** \brief Returns a mask of set bits up to position \a bit. Does not handle
+ * the case where bit == MMB_KEY_BITS. */
+static really_inline
+MMB_TYPE mmb_mask_zero_to_nocheck(u32 bit) {
+ assert(bit < MMB_KEY_BITS);
+#ifdef ARCH_32_BIT
+ return mmbit_zero_to_lut[bit];
+#else
+ return mmb_single_bit(bit) - MMB_ONE;
+#endif
+}
+
+static really_inline
+u32 mmb_test(MMB_TYPE val, u32 bit) {
+ assert(bit < MMB_KEY_BITS);
+ return (val >> bit) & MMB_ONE;
+}
+
+static really_inline
+void mmb_set(MMB_TYPE * val, u32 bit) {
+ assert(bit < MMB_KEY_BITS);
+ *val |= mmb_single_bit(bit);
+}
+
+static really_inline
+void mmb_clear(MMB_TYPE * val, u32 bit) {
+ assert(bit < MMB_KEY_BITS);
+ *val &= ~mmb_single_bit(bit);
+}
+
+static really_inline
+u32 mmb_ctz(MMB_TYPE val) {
+ return ctz64(val);
+}
+
+static really_inline
+u32 mmb_popcount(MMB_TYPE val) {
+ return popcount64(val);
+}
+
+#ifndef MMMB_DEBUG
#define MDEBUG_PRINTF(x, ...) do { } while(0)
-#else
-#define MDEBUG_PRINTF DEBUG_PRINTF
-#endif
-
-// Switch the following define on to trace writes to multibit.
-//#define MMB_TRACE_WRITES
-#ifdef MMB_TRACE_WRITES
-#define MMB_TRACE(format, ...) \
- printf("mmb [%u bits @ %p] " format, total_bits, bits, ##__VA_ARGS__)
-#else
-#define MMB_TRACE(format, ...) \
- do { \
- } while (0)
-#endif
-
-static really_inline
-u32 mmbit_keyshift(u32 total_bits) {
- assert(total_bits > 1);
- u32 n = clz32(total_bits - 1); // subtract one as we're rounding down
- return mmbit_keyshift_lut[n];
-}
-
-static really_inline
-u32 mmbit_maxlevel(u32 total_bits) {
- assert(total_bits > 1);
- u32 n = clz32(total_bits - 1); // subtract one as we're rounding down
- u32 max_level = mmbit_maxlevel_direct_lut[n];
- assert(max_level <= MMB_MAX_LEVEL);
- return max_level;
-}
-
-static really_inline
-u32 mmbit_maxlevel_from_keyshift(u32 ks) {
- assert(ks <= 30);
- assert(ks % MMB_KEY_SHIFT == 0);
-
- u32 max_level = mmbit_maxlevel_from_keyshift_lut[ks];
- assert(max_level <= MMB_MAX_LEVEL);
- return max_level;
-}
-
-/** \brief get our keyshift for the current level */
-static really_inline
-u32 mmbit_get_ks(u32 max_level, u32 level) {
- assert(max_level <= MMB_MAX_LEVEL);
- assert(level <= max_level);
- return (max_level - level) * MMB_KEY_SHIFT;
-}
-
-/** \brief get our key value for the current level */
-static really_inline
-u32 mmbit_get_key_val(u32 max_level, u32 level, u32 key) {
- return (key >> mmbit_get_ks(max_level, level)) & MMB_KEY_MASK;
-}
-
-/** \brief get the level root for the current level */
-static really_inline
-u8 *mmbit_get_level_root(u8 *bits, u32 level) {
- assert(level < ARRAY_LENGTH(mmbit_root_offset_from_level));
- return bits + mmbit_root_offset_from_level[level] * sizeof(MMB_TYPE);
-}
-
-/** \brief get the level root for the current level as const */
-static really_inline
-const u8 *mmbit_get_level_root_const(const u8 *bits, u32 level) {
- assert(level < ARRAY_LENGTH(mmbit_root_offset_from_level));
- return bits + mmbit_root_offset_from_level[level] * sizeof(MMB_TYPE);
-}
-
-/** \brief get the block for this key on the current level as a u8 ptr */
-static really_inline
-u8 *mmbit_get_block_ptr(u8 *bits, u32 max_level, u32 level, u32 key) {
- u8 *level_root = mmbit_get_level_root(bits, level);
- u32 ks = mmbit_get_ks(max_level, level);
- return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT)) * sizeof(MMB_TYPE);
-}
-
-/** \brief get the block for this key on the current level as a const u8 ptr */
-static really_inline
-const u8 *mmbit_get_block_ptr_const(const u8 *bits, u32 max_level, u32 level,
- u32 key) {
- const u8 *level_root = mmbit_get_level_root_const(bits, level);
- u32 ks = mmbit_get_ks(max_level, level);
- return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT)) * sizeof(MMB_TYPE);
-}
-
-/** \brief get the _byte_ for this key on the current level as a u8 ptr */
-static really_inline
-u8 *mmbit_get_byte_ptr(u8 *bits, u32 max_level, u32 level, u32 key) {
- u8 *level_root = mmbit_get_level_root(bits, level);
- u32 ks = mmbit_get_ks(max_level, level);
- return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT - 3));
-}
-
-/** \brief get our key value for the current level */
-static really_inline
-u32 mmbit_get_key_val_byte(u32 max_level, u32 level, u32 key) {
- return (key >> (mmbit_get_ks(max_level, level))) & 0x7;
-}
-
-/** \brief Load a flat bitvector block corresponding to N bits. */
-static really_inline
-MMB_TYPE mmbit_get_flat_block(const u8 *bits, u32 n_bits) {
- assert(n_bits <= MMB_KEY_BITS);
- u32 n_bytes = ROUNDUP_N(n_bits, 8) / 8;
- switch (n_bytes) {
- case 1:
- return *bits;
- case 2:
- return unaligned_load_u16(bits);
- case 3:
- case 4: {
- u32 rv;
- assert(n_bytes <= sizeof(rv));
- memcpy(&rv, bits + n_bytes - sizeof(rv), sizeof(rv));
- rv >>= (sizeof(rv) - n_bytes) * 8; /* need to shift to get things in
- * the right position and remove
- * junk */
- assert(rv == partial_load_u32(bits, n_bytes));
- return rv;
- }
- default: {
- u64a rv;
- assert(n_bytes <= sizeof(rv));
- memcpy(&rv, bits + n_bytes - sizeof(rv), sizeof(rv));
- rv >>= (sizeof(rv) - n_bytes) * 8; /* need to shift to get things in
- * the right position and remove
- * junk */
- assert(rv == partial_load_u64a(bits, n_bytes));
- return rv;
- }
- }
-}
-
-/** \brief True if this multibit is small enough to use a flat model */
-static really_inline
-u32 mmbit_is_flat_model(u32 total_bits) {
- return total_bits <= MMB_FLAT_MAX_BITS;
-}
-
-static really_inline
-u32 mmbit_flat_size(u32 total_bits) {
- assert(mmbit_is_flat_model(total_bits));
- return ROUNDUP_N(total_bits, 8) / 8;
-}
-
-static really_inline
-u32 mmbit_flat_select_byte(u32 key, UNUSED u32 total_bits) {
- return key / 8;
-}
-
-/** \brief returns the dense index of the bit in the given mask. */
-static really_inline
-u32 mmbit_mask_index(u32 bit, MMB_TYPE mask) {
- assert(bit < MMB_KEY_BITS);
- assert(mmb_test(mask, bit));
-
- mask &= mmb_mask_zero_to(bit);
- if (mask == 0ULL) {
- return 0; // Common case.
- }
- return mmb_popcount(mask);
-}
-
-/** \brief Clear all bits. */
-static really_inline
-void mmbit_clear(u8 *bits, u32 total_bits) {
- MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
- MMB_TRACE("CLEAR\n");
- if (!total_bits) {
- return;
- }
- if (mmbit_is_flat_model(total_bits)) {
- memset(bits, 0, mmbit_flat_size(total_bits));
- return;
- }
- mmb_store(bits, 0);
-}
-
-/** \brief Specialisation of \ref mmbit_set for flat models. */
-static really_inline
-char mmbit_set_flat(u8 *bits, u32 total_bits, u32 key) {
- bits += mmbit_flat_select_byte(key, total_bits);
- u8 mask = 1U << (key % 8);
- char was_set = !!(*bits & mask);
- *bits |= mask;
- return was_set;
-}
-
-static really_inline
-char mmbit_set_big(u8 *bits, u32 total_bits, u32 key) {
- const u32 max_level = mmbit_maxlevel(total_bits);
- u32 level = 0;
- do {
- u8 * byte_ptr = mmbit_get_byte_ptr(bits, max_level, level, key);
- u8 keymask = 1U << mmbit_get_key_val_byte(max_level, level, key);
- u8 byte = *byte_ptr;
- if (likely(!(byte & keymask))) {
- *byte_ptr = byte | keymask;
- while (level++ != max_level) {
- u8 *block_ptr_1 = mmbit_get_block_ptr(bits, max_level, level, key);
- MMB_TYPE keymask_1 = mmb_single_bit(mmbit_get_key_val(max_level, level, key));
- mmb_store(block_ptr_1, keymask_1);
- }
- return 0;
- }
- } while (level++ != max_level);
- return 1;
-}
-
-/** Internal version of \ref mmbit_set without MMB_TRACE, so it can be used by
- * \ref mmbit_sparse_iter_dump. */
-static really_inline
-char mmbit_set_i(u8 *bits, u32 total_bits, u32 key) {
- assert(key < total_bits);
- if (mmbit_is_flat_model(total_bits)) {
- return mmbit_set_flat(bits, total_bits, key);
- } else {
- return mmbit_set_big(bits, total_bits, key);
- }
-}
-
-static really_inline
-char mmbit_isset(const u8 *bits, u32 total_bits, u32 key);
-
-/** \brief Sets the given key in the multibit. Returns 0 if the key was NOT
- * already set, 1 otherwise. */
-static really_inline
-char mmbit_set(u8 *bits, u32 total_bits, u32 key) {
- MDEBUG_PRINTF("%p total_bits %u key %u\n", bits, total_bits, key);
- char status = mmbit_set_i(bits, total_bits, key);
- MMB_TRACE("SET %u (prev status: %d)\n", key, (int)status);
- assert(mmbit_isset(bits, total_bits, key));
- return status;
-}
-
-/** \brief Specialisation of \ref mmbit_isset for flat models. */
-static really_inline
-char mmbit_isset_flat(const u8 *bits, u32 total_bits, u32 key) {
- bits += mmbit_flat_select_byte(key, total_bits);
- return !!(*bits & (1U << (key % 8U)));
-}
-
-static really_inline
-char mmbit_isset_big(const u8 *bits, u32 total_bits, u32 key) {
- const u32 max_level = mmbit_maxlevel(total_bits);
- u32 level = 0;
- do {
- const u8 *block_ptr = mmbit_get_block_ptr_const(bits, max_level, level, key);
- MMB_TYPE block = mmb_load(block_ptr);
- if (!mmb_test(block, mmbit_get_key_val(max_level, level, key))) {
- return 0;
- }
- } while (level++ != max_level);
- return 1;
-}
-
-/** \brief Returns whether the given key is set. */
-static really_inline
-char mmbit_isset(const u8 *bits, u32 total_bits, u32 key) {
- MDEBUG_PRINTF("%p total_bits %u key %u\n", bits, total_bits, key);
- assert(key < total_bits);
- if (mmbit_is_flat_model(total_bits)) {
- return mmbit_isset_flat(bits, total_bits, key);
- } else {
- return mmbit_isset_big(bits, total_bits, key);
- }
-}
-
-/** \brief Specialisation of \ref mmbit_unset for flat models. */
-static really_inline
-void mmbit_unset_flat(u8 *bits, u32 total_bits, u32 key) {
- bits += mmbit_flat_select_byte(key, total_bits);
- *bits &= ~(1U << (key % 8U));
-}
-
-// TODO:
-// build two versions of this - unset_dangerous that doesn't clear the summary
-// block and a regular unset that actually clears ALL the way up the levels if
-// possible - might make a utility function for the clear
-static really_inline
-void mmbit_unset_big(u8 *bits, u32 total_bits, u32 key) {
- /* This function is lazy as it does not clear the summary block
- * entry if the child becomes empty. This is not a correctness problem as the
- * summary block entries are used to mean that their children are valid
- * rather than that they have a set child. */
- const u32 max_level = mmbit_maxlevel(total_bits);
- u32 level = 0;
- do {
- u8 *block_ptr = mmbit_get_block_ptr(bits, max_level, level, key);
- u32 key_val = mmbit_get_key_val(max_level, level, key);
- MMB_TYPE block = mmb_load(block_ptr);
- if (!mmb_test(block, key_val)) {
- return;
- }
- if (level == max_level) {
- mmb_clear(&block, key_val);
- mmb_store(block_ptr, block);
- }
- } while (level++ != max_level);
-}
-
-/** \brief Switch off a given key. */
-static really_inline
-void mmbit_unset(u8 *bits, u32 total_bits, u32 key) {
- MDEBUG_PRINTF("%p total_bits %u key %u\n", bits, total_bits, key);
- assert(key < total_bits);
- MMB_TRACE("UNSET %u (prev status: %d)\n", key,
- (int)mmbit_isset(bits, total_bits, key));
-
- if (mmbit_is_flat_model(total_bits)) {
- mmbit_unset_flat(bits, total_bits, key);
- } else {
- mmbit_unset_big(bits, total_bits, key);
- }
-}
-
-/** \brief Specialisation of \ref mmbit_iterate for flat models. */
-static really_inline
-u32 mmbit_iterate_flat(const u8 *bits, u32 total_bits, u32 it_in) {
- // Short cut for single-block cases.
- if (total_bits <= MMB_KEY_BITS) {
- MMB_TYPE block = mmbit_get_flat_block(bits, total_bits);
- if (it_in != MMB_INVALID) {
- it_in++;
- assert(it_in < total_bits);
- block &= ~mmb_mask_zero_to(it_in);
- }
- if (block) {
- return mmb_ctz(block);
- }
- return MMB_INVALID;
- }
-
- const u32 last_block = total_bits / MMB_KEY_BITS;
- u32 start; // starting block index
-
- if (it_in != MMB_INVALID) {
- it_in++;
- assert(it_in < total_bits);
-
- start = (ROUNDUP_N(it_in, MMB_KEY_BITS) / MMB_KEY_BITS) - 1;
- u32 start_key = start * MMB_KEY_BITS;
- u32 block_size = MIN(MMB_KEY_BITS, total_bits - start_key);
- MMB_TYPE block =
- mmbit_get_flat_block(bits + (start * sizeof(MMB_TYPE)), block_size);
- block &= ~mmb_mask_zero_to(it_in - start_key);
-
- if (block) {
- return start_key + mmb_ctz(block);
- } else if (start_key + MMB_KEY_BITS >= total_bits) {
- return MMB_INVALID; // That was the final block.
- }
- start++;
- } else {
- start = 0;
- }
-
- // Remaining full-sized blocks.
- for (; start < last_block; start++) {
- MMB_TYPE block = mmb_load(bits + (start * sizeof(MMB_TYPE)));
- if (block) {
- return (start * MMB_KEY_BITS) + mmb_ctz(block);
- }
- }
-
- // We may have a final, smaller than full-sized, block to deal with at the
- // end.
- if (total_bits % MMB_KEY_BITS) {
- u32 start_key = start * MMB_KEY_BITS;
- u32 block_size = MIN(MMB_KEY_BITS, total_bits - start_key);
- MMB_TYPE block =
- mmbit_get_flat_block(bits + (start * sizeof(MMB_TYPE)), block_size);
- if (block) {
- return start_key + mmb_ctz(block);
- }
- }
-
- return MMB_INVALID;
-}
-
-static really_inline
-u32 mmbit_iterate_big(const u8 * bits, u32 total_bits, u32 it_in) {
- const u32 max_level = mmbit_maxlevel(total_bits);
- u32 level = 0;
- u32 key = 0;
- u32 key_rem = 0;
-
- if (it_in != MMB_INVALID) {
- // We're continuing a previous iteration, so we need to go
- // to max_level so we can pick up where we left off.
- // NOTE: assumes that we're valid down the whole tree
- key = it_in >> MMB_KEY_SHIFT;
- key_rem = (it_in & MMB_KEY_MASK) + 1;
- level = max_level;
- }
- while (1) {
- if (key_rem < MMB_KEY_BITS) {
- const u8 *block_ptr = mmbit_get_level_root_const(bits, level) +
- key * sizeof(MMB_TYPE);
- MMB_TYPE block
- = mmb_load(block_ptr) & ~mmb_mask_zero_to_nocheck(key_rem);
- if (block) {
- key = (key << MMB_KEY_SHIFT) + mmb_ctz(block);
- if (level++ == max_level) {
- break;
- }
- key_rem = 0;
- continue; // jump the rootwards step if we found a 'tree' non-zero bit
- }
- }
- // rootwards step (block is zero or key_rem == MMB_KEY_BITS)
- if (level-- == 0) {
- return MMB_INVALID; // if we don't find anything and we're at the top level, we're done
- }
- key_rem = (key & MMB_KEY_MASK) + 1;
- key >>= MMB_KEY_SHIFT;
- }
- assert(key < total_bits);
- assert(mmbit_isset(bits, total_bits, key));
- return key;
-}
-
-/** \brief Unbounded iterator. Returns the index of the next set bit after \a
- * it_in, or MMB_INVALID.
- *
- * Note: assumes that if you pass in a value of it_in other than MMB_INVALID,
- * that bit must be on (assumes all its summary blocks are set).
- */
-static really_inline
-u32 mmbit_iterate(const u8 *bits, u32 total_bits, u32 it_in) {
- MDEBUG_PRINTF("%p total_bits %u it_in %u\n", bits, total_bits, it_in);
- assert(it_in < total_bits || it_in == MMB_INVALID);
- if (!total_bits) {
- return MMB_INVALID;
- }
- if (it_in == total_bits - 1) {
- return MMB_INVALID; // it_in is the last key.
- }
-
- u32 key;
- if (mmbit_is_flat_model(total_bits)) {
- key = mmbit_iterate_flat(bits, total_bits, it_in);
- } else {
- key = mmbit_iterate_big(bits, total_bits, it_in);
- }
- assert(key == MMB_INVALID || mmbit_isset(bits, total_bits, key));
- return key;
-}
-
-/** \brief Specialisation of \ref mmbit_any and \ref mmbit_any_precise for flat
- * models. */
-static really_inline
-char mmbit_any_flat(const u8 *bits, u32 total_bits) {
- if (total_bits <= MMB_KEY_BITS) {
- return !!mmbit_get_flat_block(bits, total_bits);
- }
-
- const u8 *end = bits + mmbit_flat_size(total_bits);
- for (const u8 *last = end - sizeof(MMB_TYPE); bits < last;
- bits += sizeof(MMB_TYPE)) {
- if (mmb_load(bits)) {
- return 1;
- }
- }
-
- // Overlapping load at the end.
- return !!mmb_load(end - sizeof(MMB_TYPE));
-}
-
-/** \brief True if any keys are (or might be) on in the given multibit.
- *
- * NOTE: mmbit_any is sloppy (may return true when only summary bits are set).
- * Use \ref mmbit_any_precise if you need/want a correct answer.
- */
-static really_inline
-char mmbit_any(const u8 *bits, u32 total_bits) {
- MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
- if (!total_bits) {
- return 0;
- }
- if (mmbit_is_flat_model(total_bits)) {
- return mmbit_any_flat(bits, total_bits);
- }
- return !!mmb_load(bits);
-}
-
-/** \brief True if there are any keys on. Guaranteed precise. */
-static really_inline
-char mmbit_any_precise(const u8 *bits, u32 total_bits) {
- MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
- if (!total_bits) {
- return 0;
- }
- if (mmbit_is_flat_model(total_bits)) {
- return mmbit_any_flat(bits, total_bits);
- }
-
- return mmbit_iterate_big(bits, total_bits, MMB_INVALID) != MMB_INVALID;
-}
-
-static really_inline
+#else
+#define MDEBUG_PRINTF DEBUG_PRINTF
+#endif
+
+// Switch the following define on to trace writes to multibit.
+//#define MMB_TRACE_WRITES
+#ifdef MMB_TRACE_WRITES
+#define MMB_TRACE(format, ...) \
+ printf("mmb [%u bits @ %p] " format, total_bits, bits, ##__VA_ARGS__)
+#else
+#define MMB_TRACE(format, ...) \
+ do { \
+ } while (0)
+#endif
+
+static really_inline
+u32 mmbit_keyshift(u32 total_bits) {
+ assert(total_bits > 1);
+ u32 n = clz32(total_bits - 1); // subtract one as we're rounding down
+ return mmbit_keyshift_lut[n];
+}
+
+static really_inline
+u32 mmbit_maxlevel(u32 total_bits) {
+ assert(total_bits > 1);
+ u32 n = clz32(total_bits - 1); // subtract one as we're rounding down
+ u32 max_level = mmbit_maxlevel_direct_lut[n];
+ assert(max_level <= MMB_MAX_LEVEL);
+ return max_level;
+}
+
+static really_inline
+u32 mmbit_maxlevel_from_keyshift(u32 ks) {
+ assert(ks <= 30);
+ assert(ks % MMB_KEY_SHIFT == 0);
+
+ u32 max_level = mmbit_maxlevel_from_keyshift_lut[ks];
+ assert(max_level <= MMB_MAX_LEVEL);
+ return max_level;
+}
+
+/** \brief get our keyshift for the current level */
+static really_inline
+u32 mmbit_get_ks(u32 max_level, u32 level) {
+ assert(max_level <= MMB_MAX_LEVEL);
+ assert(level <= max_level);
+ return (max_level - level) * MMB_KEY_SHIFT;
+}
+
+/** \brief get our key value for the current level */
+static really_inline
+u32 mmbit_get_key_val(u32 max_level, u32 level, u32 key) {
+ return (key >> mmbit_get_ks(max_level, level)) & MMB_KEY_MASK;
+}
+
+/** \brief get the level root for the current level */
+static really_inline
+u8 *mmbit_get_level_root(u8 *bits, u32 level) {
+ assert(level < ARRAY_LENGTH(mmbit_root_offset_from_level));
+ return bits + mmbit_root_offset_from_level[level] * sizeof(MMB_TYPE);
+}
+
+/** \brief get the level root for the current level as const */
+static really_inline
+const u8 *mmbit_get_level_root_const(const u8 *bits, u32 level) {
+ assert(level < ARRAY_LENGTH(mmbit_root_offset_from_level));
+ return bits + mmbit_root_offset_from_level[level] * sizeof(MMB_TYPE);
+}
+
+/** \brief get the block for this key on the current level as a u8 ptr */
+static really_inline
+u8 *mmbit_get_block_ptr(u8 *bits, u32 max_level, u32 level, u32 key) {
+ u8 *level_root = mmbit_get_level_root(bits, level);
+ u32 ks = mmbit_get_ks(max_level, level);
+ return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT)) * sizeof(MMB_TYPE);
+}
+
+/** \brief get the block for this key on the current level as a const u8 ptr */
+static really_inline
+const u8 *mmbit_get_block_ptr_const(const u8 *bits, u32 max_level, u32 level,
+ u32 key) {
+ const u8 *level_root = mmbit_get_level_root_const(bits, level);
+ u32 ks = mmbit_get_ks(max_level, level);
+ return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT)) * sizeof(MMB_TYPE);
+}
+
+/** \brief get the _byte_ for this key on the current level as a u8 ptr */
+static really_inline
+u8 *mmbit_get_byte_ptr(u8 *bits, u32 max_level, u32 level, u32 key) {
+ u8 *level_root = mmbit_get_level_root(bits, level);
+ u32 ks = mmbit_get_ks(max_level, level);
+ return level_root + ((u64a)key >> (ks + MMB_KEY_SHIFT - 3));
+}
+
+/** \brief get our key value for the current level */
+static really_inline
+u32 mmbit_get_key_val_byte(u32 max_level, u32 level, u32 key) {
+ return (key >> (mmbit_get_ks(max_level, level))) & 0x7;
+}
+
+/** \brief Load a flat bitvector block corresponding to N bits. */
+static really_inline
+MMB_TYPE mmbit_get_flat_block(const u8 *bits, u32 n_bits) {
+ assert(n_bits <= MMB_KEY_BITS);
+ u32 n_bytes = ROUNDUP_N(n_bits, 8) / 8;
+ switch (n_bytes) {
+ case 1:
+ return *bits;
+ case 2:
+ return unaligned_load_u16(bits);
+ case 3:
+ case 4: {
+ u32 rv;
+ assert(n_bytes <= sizeof(rv));
+ memcpy(&rv, bits + n_bytes - sizeof(rv), sizeof(rv));
+ rv >>= (sizeof(rv) - n_bytes) * 8; /* need to shift to get things in
+ * the right position and remove
+ * junk */
+ assert(rv == partial_load_u32(bits, n_bytes));
+ return rv;
+ }
+ default: {
+ u64a rv;
+ assert(n_bytes <= sizeof(rv));
+ memcpy(&rv, bits + n_bytes - sizeof(rv), sizeof(rv));
+ rv >>= (sizeof(rv) - n_bytes) * 8; /* need to shift to get things in
+ * the right position and remove
+ * junk */
+ assert(rv == partial_load_u64a(bits, n_bytes));
+ return rv;
+ }
+ }
+}
+
+/** \brief True if this multibit is small enough to use a flat model */
+static really_inline
+u32 mmbit_is_flat_model(u32 total_bits) {
+ return total_bits <= MMB_FLAT_MAX_BITS;
+}
+
+static really_inline
+u32 mmbit_flat_size(u32 total_bits) {
+ assert(mmbit_is_flat_model(total_bits));
+ return ROUNDUP_N(total_bits, 8) / 8;
+}
+
+static really_inline
+u32 mmbit_flat_select_byte(u32 key, UNUSED u32 total_bits) {
+ return key / 8;
+}
+
+/** \brief returns the dense index of the bit in the given mask. */
+static really_inline
+u32 mmbit_mask_index(u32 bit, MMB_TYPE mask) {
+ assert(bit < MMB_KEY_BITS);
+ assert(mmb_test(mask, bit));
+
+ mask &= mmb_mask_zero_to(bit);
+ if (mask == 0ULL) {
+ return 0; // Common case.
+ }
+ return mmb_popcount(mask);
+}
+
+/** \brief Clear all bits. */
+static really_inline
+void mmbit_clear(u8 *bits, u32 total_bits) {
+ MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
+ MMB_TRACE("CLEAR\n");
+ if (!total_bits) {
+ return;
+ }
+ if (mmbit_is_flat_model(total_bits)) {
+ memset(bits, 0, mmbit_flat_size(total_bits));
+ return;
+ }
+ mmb_store(bits, 0);
+}
+
+/** \brief Specialisation of \ref mmbit_set for flat models. */
+static really_inline
+char mmbit_set_flat(u8 *bits, u32 total_bits, u32 key) {
+ bits += mmbit_flat_select_byte(key, total_bits);
+ u8 mask = 1U << (key % 8);
+ char was_set = !!(*bits & mask);
+ *bits |= mask;
+ return was_set;
+}
+
+static really_inline
+char mmbit_set_big(u8 *bits, u32 total_bits, u32 key) {
+ const u32 max_level = mmbit_maxlevel(total_bits);
+ u32 level = 0;
+ do {
+ u8 * byte_ptr = mmbit_get_byte_ptr(bits, max_level, level, key);
+ u8 keymask = 1U << mmbit_get_key_val_byte(max_level, level, key);
+ u8 byte = *byte_ptr;
+ if (likely(!(byte & keymask))) {
+ *byte_ptr = byte | keymask;
+ while (level++ != max_level) {
+ u8 *block_ptr_1 = mmbit_get_block_ptr(bits, max_level, level, key);
+ MMB_TYPE keymask_1 = mmb_single_bit(mmbit_get_key_val(max_level, level, key));
+ mmb_store(block_ptr_1, keymask_1);
+ }
+ return 0;
+ }
+ } while (level++ != max_level);
+ return 1;
+}
+
+/** Internal version of \ref mmbit_set without MMB_TRACE, so it can be used by
+ * \ref mmbit_sparse_iter_dump. */
+static really_inline
+char mmbit_set_i(u8 *bits, u32 total_bits, u32 key) {
+ assert(key < total_bits);
+ if (mmbit_is_flat_model(total_bits)) {
+ return mmbit_set_flat(bits, total_bits, key);
+ } else {
+ return mmbit_set_big(bits, total_bits, key);
+ }
+}
+
+static really_inline
+char mmbit_isset(const u8 *bits, u32 total_bits, u32 key);
+
+/** \brief Sets the given key in the multibit. Returns 0 if the key was NOT
+ * already set, 1 otherwise. */
+static really_inline
+char mmbit_set(u8 *bits, u32 total_bits, u32 key) {
+ MDEBUG_PRINTF("%p total_bits %u key %u\n", bits, total_bits, key);
+ char status = mmbit_set_i(bits, total_bits, key);
+ MMB_TRACE("SET %u (prev status: %d)\n", key, (int)status);
+ assert(mmbit_isset(bits, total_bits, key));
+ return status;
+}
+
+/** \brief Specialisation of \ref mmbit_isset for flat models. */
+static really_inline
+char mmbit_isset_flat(const u8 *bits, u32 total_bits, u32 key) {
+ bits += mmbit_flat_select_byte(key, total_bits);
+ return !!(*bits & (1U << (key % 8U)));
+}
+
+static really_inline
+char mmbit_isset_big(const u8 *bits, u32 total_bits, u32 key) {
+ const u32 max_level = mmbit_maxlevel(total_bits);
+ u32 level = 0;
+ do {
+ const u8 *block_ptr = mmbit_get_block_ptr_const(bits, max_level, level, key);
+ MMB_TYPE block = mmb_load(block_ptr);
+ if (!mmb_test(block, mmbit_get_key_val(max_level, level, key))) {
+ return 0;
+ }
+ } while (level++ != max_level);
+ return 1;
+}
+
+/** \brief Returns whether the given key is set. */
+static really_inline
+char mmbit_isset(const u8 *bits, u32 total_bits, u32 key) {
+ MDEBUG_PRINTF("%p total_bits %u key %u\n", bits, total_bits, key);
+ assert(key < total_bits);
+ if (mmbit_is_flat_model(total_bits)) {
+ return mmbit_isset_flat(bits, total_bits, key);
+ } else {
+ return mmbit_isset_big(bits, total_bits, key);
+ }
+}
+
+/** \brief Specialisation of \ref mmbit_unset for flat models. */
+static really_inline
+void mmbit_unset_flat(u8 *bits, u32 total_bits, u32 key) {
+ bits += mmbit_flat_select_byte(key, total_bits);
+ *bits &= ~(1U << (key % 8U));
+}
+
+// TODO:
+// build two versions of this - unset_dangerous that doesn't clear the summary
+// block and a regular unset that actually clears ALL the way up the levels if
+// possible - might make a utility function for the clear
+static really_inline
+void mmbit_unset_big(u8 *bits, u32 total_bits, u32 key) {
+ /* This function is lazy as it does not clear the summary block
+ * entry if the child becomes empty. This is not a correctness problem as the
+ * summary block entries are used to mean that their children are valid
+ * rather than that they have a set child. */
+ const u32 max_level = mmbit_maxlevel(total_bits);
+ u32 level = 0;
+ do {
+ u8 *block_ptr = mmbit_get_block_ptr(bits, max_level, level, key);
+ u32 key_val = mmbit_get_key_val(max_level, level, key);
+ MMB_TYPE block = mmb_load(block_ptr);
+ if (!mmb_test(block, key_val)) {
+ return;
+ }
+ if (level == max_level) {
+ mmb_clear(&block, key_val);
+ mmb_store(block_ptr, block);
+ }
+ } while (level++ != max_level);
+}
+
+/** \brief Switch off a given key. */
+static really_inline
+void mmbit_unset(u8 *bits, u32 total_bits, u32 key) {
+ MDEBUG_PRINTF("%p total_bits %u key %u\n", bits, total_bits, key);
+ assert(key < total_bits);
+ MMB_TRACE("UNSET %u (prev status: %d)\n", key,
+ (int)mmbit_isset(bits, total_bits, key));
+
+ if (mmbit_is_flat_model(total_bits)) {
+ mmbit_unset_flat(bits, total_bits, key);
+ } else {
+ mmbit_unset_big(bits, total_bits, key);
+ }
+}
+
+/** \brief Specialisation of \ref mmbit_iterate for flat models. */
+static really_inline
+u32 mmbit_iterate_flat(const u8 *bits, u32 total_bits, u32 it_in) {
+ // Short cut for single-block cases.
+ if (total_bits <= MMB_KEY_BITS) {
+ MMB_TYPE block = mmbit_get_flat_block(bits, total_bits);
+ if (it_in != MMB_INVALID) {
+ it_in++;
+ assert(it_in < total_bits);
+ block &= ~mmb_mask_zero_to(it_in);
+ }
+ if (block) {
+ return mmb_ctz(block);
+ }
+ return MMB_INVALID;
+ }
+
+ const u32 last_block = total_bits / MMB_KEY_BITS;
+ u32 start; // starting block index
+
+ if (it_in != MMB_INVALID) {
+ it_in++;
+ assert(it_in < total_bits);
+
+ start = (ROUNDUP_N(it_in, MMB_KEY_BITS) / MMB_KEY_BITS) - 1;
+ u32 start_key = start * MMB_KEY_BITS;
+ u32 block_size = MIN(MMB_KEY_BITS, total_bits - start_key);
+ MMB_TYPE block =
+ mmbit_get_flat_block(bits + (start * sizeof(MMB_TYPE)), block_size);
+ block &= ~mmb_mask_zero_to(it_in - start_key);
+
+ if (block) {
+ return start_key + mmb_ctz(block);
+ } else if (start_key + MMB_KEY_BITS >= total_bits) {
+ return MMB_INVALID; // That was the final block.
+ }
+ start++;
+ } else {
+ start = 0;
+ }
+
+ // Remaining full-sized blocks.
+ for (; start < last_block; start++) {
+ MMB_TYPE block = mmb_load(bits + (start * sizeof(MMB_TYPE)));
+ if (block) {
+ return (start * MMB_KEY_BITS) + mmb_ctz(block);
+ }
+ }
+
+ // We may have a final, smaller than full-sized, block to deal with at the
+ // end.
+ if (total_bits % MMB_KEY_BITS) {
+ u32 start_key = start * MMB_KEY_BITS;
+ u32 block_size = MIN(MMB_KEY_BITS, total_bits - start_key);
+ MMB_TYPE block =
+ mmbit_get_flat_block(bits + (start * sizeof(MMB_TYPE)), block_size);
+ if (block) {
+ return start_key + mmb_ctz(block);
+ }
+ }
+
+ return MMB_INVALID;
+}
+
+static really_inline
+u32 mmbit_iterate_big(const u8 * bits, u32 total_bits, u32 it_in) {
+ const u32 max_level = mmbit_maxlevel(total_bits);
+ u32 level = 0;
+ u32 key = 0;
+ u32 key_rem = 0;
+
+ if (it_in != MMB_INVALID) {
+ // We're continuing a previous iteration, so we need to go
+ // to max_level so we can pick up where we left off.
+ // NOTE: assumes that we're valid down the whole tree
+ key = it_in >> MMB_KEY_SHIFT;
+ key_rem = (it_in & MMB_KEY_MASK) + 1;
+ level = max_level;
+ }
+ while (1) {
+ if (key_rem < MMB_KEY_BITS) {
+ const u8 *block_ptr = mmbit_get_level_root_const(bits, level) +
+ key * sizeof(MMB_TYPE);
+ MMB_TYPE block
+ = mmb_load(block_ptr) & ~mmb_mask_zero_to_nocheck(key_rem);
+ if (block) {
+ key = (key << MMB_KEY_SHIFT) + mmb_ctz(block);
+ if (level++ == max_level) {
+ break;
+ }
+ key_rem = 0;
+ continue; // jump the rootwards step if we found a 'tree' non-zero bit
+ }
+ }
+ // rootwards step (block is zero or key_rem == MMB_KEY_BITS)
+ if (level-- == 0) {
+ return MMB_INVALID; // if we don't find anything and we're at the top level, we're done
+ }
+ key_rem = (key & MMB_KEY_MASK) + 1;
+ key >>= MMB_KEY_SHIFT;
+ }
+ assert(key < total_bits);
+ assert(mmbit_isset(bits, total_bits, key));
+ return key;
+}
+
+/** \brief Unbounded iterator. Returns the index of the next set bit after \a
+ * it_in, or MMB_INVALID.
+ *
+ * Note: assumes that if you pass in a value of it_in other than MMB_INVALID,
+ * that bit must be on (assumes all its summary blocks are set).
+ */
+static really_inline
+u32 mmbit_iterate(const u8 *bits, u32 total_bits, u32 it_in) {
+ MDEBUG_PRINTF("%p total_bits %u it_in %u\n", bits, total_bits, it_in);
+ assert(it_in < total_bits || it_in == MMB_INVALID);
+ if (!total_bits) {
+ return MMB_INVALID;
+ }
+ if (it_in == total_bits - 1) {
+ return MMB_INVALID; // it_in is the last key.
+ }
+
+ u32 key;
+ if (mmbit_is_flat_model(total_bits)) {
+ key = mmbit_iterate_flat(bits, total_bits, it_in);
+ } else {
+ key = mmbit_iterate_big(bits, total_bits, it_in);
+ }
+ assert(key == MMB_INVALID || mmbit_isset(bits, total_bits, key));
+ return key;
+}
+
+/** \brief Specialisation of \ref mmbit_any and \ref mmbit_any_precise for flat
+ * models. */
+static really_inline
+char mmbit_any_flat(const u8 *bits, u32 total_bits) {
+ if (total_bits <= MMB_KEY_BITS) {
+ return !!mmbit_get_flat_block(bits, total_bits);
+ }
+
+ const u8 *end = bits + mmbit_flat_size(total_bits);
+ for (const u8 *last = end - sizeof(MMB_TYPE); bits < last;
+ bits += sizeof(MMB_TYPE)) {
+ if (mmb_load(bits)) {
+ return 1;
+ }
+ }
+
+ // Overlapping load at the end.
+ return !!mmb_load(end - sizeof(MMB_TYPE));
+}
+
+/** \brief True if any keys are (or might be) on in the given multibit.
+ *
+ * NOTE: mmbit_any is sloppy (may return true when only summary bits are set).
+ * Use \ref mmbit_any_precise if you need/want a correct answer.
+ */
+static really_inline
+char mmbit_any(const u8 *bits, u32 total_bits) {
+ MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
+ if (!total_bits) {
+ return 0;
+ }
+ if (mmbit_is_flat_model(total_bits)) {
+ return mmbit_any_flat(bits, total_bits);
+ }
+ return !!mmb_load(bits);
+}
+
+/** \brief True if there are any keys on. Guaranteed precise. */
+static really_inline
+char mmbit_any_precise(const u8 *bits, u32 total_bits) {
+ MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
+ if (!total_bits) {
+ return 0;
+ }
+ if (mmbit_is_flat_model(total_bits)) {
+ return mmbit_any_flat(bits, total_bits);
+ }
+
+ return mmbit_iterate_big(bits, total_bits, MMB_INVALID) != MMB_INVALID;
+}
+
+static really_inline
char mmbit_all_flat(const u8 *bits, u32 total_bits) {
while (total_bits > MMB_KEY_BITS) {
if (mmb_load(bits) != MMB_ALL_ONES) {
@@ -743,760 +743,760 @@ char mmbit_all(const u8 *bits, u32 total_bits) {
}
static really_inline
-MMB_TYPE get_flat_masks(u32 base, u32 it_start, u32 it_end) {
- if (it_end <= base) {
- return 0;
- }
- u32 udiff = it_end - base;
- MMB_TYPE mask = udiff < 64 ? mmb_mask_zero_to_nocheck(udiff) : MMB_ALL_ONES;
- if (it_start >= base) {
- u32 ldiff = it_start - base;
- MMB_TYPE lmask = ldiff < 64 ? ~mmb_mask_zero_to_nocheck(ldiff) : 0;
- mask &= lmask;
- }
- return mask;
-}
-
-/** \brief Specialisation of \ref mmbit_iterate_bounded for flat models. */
-static really_inline
-u32 mmbit_iterate_bounded_flat(const u8 *bits, u32 total_bits, u32 begin,
- u32 end) {
- // Short cut for single-block cases.
- if (total_bits <= MMB_KEY_BITS) {
- MMB_TYPE block = mmbit_get_flat_block(bits, total_bits);
- block &= get_flat_masks(0, begin, end);
- if (block) {
- return mmb_ctz(block);
- }
- return MMB_INVALID;
- }
-
- const u32 last_block = ROUNDDOWN_N(total_bits, MMB_KEY_BITS);
-
- // Iterate over full-sized blocks.
- for (u32 i = ROUNDDOWN_N(begin, MMB_KEY_BITS), e = MIN(end, last_block);
- i < e; i += MMB_KEY_BITS) {
- const u8 *block_ptr = bits + i / 8;
- MMB_TYPE block = mmb_load(block_ptr);
- block &= get_flat_masks(i, begin, end);
- if (block) {
- return i + mmb_ctz(block);
- }
- }
-
- // Final block, which is less than full-sized.
- if (end > last_block) {
- const u8 *block_ptr = bits + last_block / 8;
- u32 num_bits = total_bits - last_block;
- MMB_TYPE block = mmbit_get_flat_block(block_ptr, num_bits);
- block &= get_flat_masks(last_block, begin, end);
- if (block) {
- return last_block + mmb_ctz(block);
- }
- }
-
- return MMB_INVALID;
-}
-
-static really_inline
-MMB_TYPE get_lowhi_masks(u32 level, u32 max_level, u64a block_min, u64a block_max,
- u64a block_base) {
- const u32 level_shift = (max_level - level) * MMB_KEY_SHIFT;
- u64a lshift = (block_min - block_base) >> level_shift;
- u64a ushift = (block_max - block_base) >> level_shift;
- MMB_TYPE lmask = lshift < 64 ? ~mmb_mask_zero_to_nocheck(lshift) : 0;
- MMB_TYPE umask =
- ushift < 63 ? mmb_mask_zero_to_nocheck(ushift + 1) : MMB_ALL_ONES;
- return lmask & umask;
-}
-
-static really_inline
-u32 mmbit_iterate_bounded_big(const u8 *bits, u32 total_bits, u32 it_start, u32 it_end) {
- u64a key = 0;
- u32 ks = mmbit_keyshift(total_bits);
- const u32 max_level = mmbit_maxlevel_from_keyshift(ks);
- u32 level = 0;
- --it_end; // make end-limit inclusive
- for (;;) {
- assert(level <= max_level);
-
+MMB_TYPE get_flat_masks(u32 base, u32 it_start, u32 it_end) {
+ if (it_end <= base) {
+ return 0;
+ }
+ u32 udiff = it_end - base;
+ MMB_TYPE mask = udiff < 64 ? mmb_mask_zero_to_nocheck(udiff) : MMB_ALL_ONES;
+ if (it_start >= base) {
+ u32 ldiff = it_start - base;
+ MMB_TYPE lmask = ldiff < 64 ? ~mmb_mask_zero_to_nocheck(ldiff) : 0;
+ mask &= lmask;
+ }
+ return mask;
+}
+
+/** \brief Specialisation of \ref mmbit_iterate_bounded for flat models. */
+static really_inline
+u32 mmbit_iterate_bounded_flat(const u8 *bits, u32 total_bits, u32 begin,
+ u32 end) {
+ // Short cut for single-block cases.
+ if (total_bits <= MMB_KEY_BITS) {
+ MMB_TYPE block = mmbit_get_flat_block(bits, total_bits);
+ block &= get_flat_masks(0, begin, end);
+ if (block) {
+ return mmb_ctz(block);
+ }
+ return MMB_INVALID;
+ }
+
+ const u32 last_block = ROUNDDOWN_N(total_bits, MMB_KEY_BITS);
+
+ // Iterate over full-sized blocks.
+ for (u32 i = ROUNDDOWN_N(begin, MMB_KEY_BITS), e = MIN(end, last_block);
+ i < e; i += MMB_KEY_BITS) {
+ const u8 *block_ptr = bits + i / 8;
+ MMB_TYPE block = mmb_load(block_ptr);
+ block &= get_flat_masks(i, begin, end);
+ if (block) {
+ return i + mmb_ctz(block);
+ }
+ }
+
+ // Final block, which is less than full-sized.
+ if (end > last_block) {
+ const u8 *block_ptr = bits + last_block / 8;
+ u32 num_bits = total_bits - last_block;
+ MMB_TYPE block = mmbit_get_flat_block(block_ptr, num_bits);
+ block &= get_flat_masks(last_block, begin, end);
+ if (block) {
+ return last_block + mmb_ctz(block);
+ }
+ }
+
+ return MMB_INVALID;
+}
+
+static really_inline
+MMB_TYPE get_lowhi_masks(u32 level, u32 max_level, u64a block_min, u64a block_max,
+ u64a block_base) {
+ const u32 level_shift = (max_level - level) * MMB_KEY_SHIFT;
+ u64a lshift = (block_min - block_base) >> level_shift;
+ u64a ushift = (block_max - block_base) >> level_shift;
+ MMB_TYPE lmask = lshift < 64 ? ~mmb_mask_zero_to_nocheck(lshift) : 0;
+ MMB_TYPE umask =
+ ushift < 63 ? mmb_mask_zero_to_nocheck(ushift + 1) : MMB_ALL_ONES;
+ return lmask & umask;
+}
+
+static really_inline
+u32 mmbit_iterate_bounded_big(const u8 *bits, u32 total_bits, u32 it_start, u32 it_end) {
+ u64a key = 0;
+ u32 ks = mmbit_keyshift(total_bits);
+ const u32 max_level = mmbit_maxlevel_from_keyshift(ks);
+ u32 level = 0;
+ --it_end; // make end-limit inclusive
+ for (;;) {
+ assert(level <= max_level);
+
u64a block_width = MMB_KEY_BITS << ks;
- u64a block_base = key * block_width;
- u64a block_min = MAX(it_start, block_base);
- u64a block_max = MIN(it_end, block_base + block_width - 1);
- const u8 *block_ptr =
- mmbit_get_level_root_const(bits, level) + key * sizeof(MMB_TYPE);
- MMB_TYPE block = mmb_load(block_ptr);
- block &= get_lowhi_masks(level, max_level, block_min, block_max, block_base);
- if (block) {
- // Found a bit, go down a level
- key = (key << MMB_KEY_SHIFT) + mmb_ctz(block);
- if (level++ == max_level) {
- return key;
- }
- ks -= MMB_KEY_SHIFT;
- } else {
- // No bit found, go up a level
- // we know that this block didn't have any answers, so we can push
- // our start iterator forward.
- u64a next_start = block_base + block_width;
- if (next_start > it_end) {
- break;
- }
- if (level-- == 0) {
- break;
- }
- it_start = next_start;
- key >>= MMB_KEY_SHIFT;
- ks += MMB_KEY_SHIFT;
- }
- }
- return MMB_INVALID;
-}
-
-/** \brief Bounded iterator. Returns the index of the first set bit between
- * it_start (inclusive) and it_end (exclusive) or MMB_INVALID if no bits are
- * set in that range.
- */
-static really_inline
-u32 mmbit_iterate_bounded(const u8 *bits, u32 total_bits, u32 it_start,
- u32 it_end) {
- MDEBUG_PRINTF("%p total_bits %u it_start %u it_end %u\n", bits, total_bits,
- it_start, it_end);
- assert(it_start <= it_end);
- assert(it_end <= total_bits);
- if (!total_bits || it_end == it_start) {
- return MMB_INVALID;
- }
- assert(it_start < total_bits);
- u32 key;
- if (mmbit_is_flat_model(total_bits)) {
- key = mmbit_iterate_bounded_flat(bits, total_bits, it_start, it_end);
- } else {
- key = mmbit_iterate_bounded_big(bits, total_bits, it_start, it_end);
- }
- assert(key == MMB_INVALID || mmbit_isset(bits, total_bits, key));
- return key;
-}
-
-/** \brief Specialisation of \ref mmbit_unset_range for flat models. */
-static really_inline
-void mmbit_unset_range_flat(u8 *bits, u32 total_bits, u32 begin, u32 end) {
- const u32 last_block = ROUNDDOWN_N(total_bits, MMB_KEY_BITS);
-
- // Iterate over full-sized blocks.
- for (u32 i = ROUNDDOWN_N(begin, MMB_KEY_BITS), e = MIN(end, last_block);
- i < e; i += MMB_KEY_BITS) {
- u8 *block_ptr = bits + i / 8;
- MMB_TYPE block = mmb_load(block_ptr);
- MMB_TYPE mask = get_flat_masks(i, begin, end);
- mmb_store(block_ptr, block & ~mask);
- }
-
- // Final block, which is less than full-sized.
- if (end > last_block) {
- u8 *block_ptr = bits + last_block / 8;
- u32 num_bits = total_bits - last_block;
- MMB_TYPE block = mmbit_get_flat_block(block_ptr, num_bits);
- MMB_TYPE mask = get_flat_masks(last_block, begin, end);
- mmb_store_partial(block_ptr, block & ~mask, num_bits);
- }
-}
-
-static really_inline
-void mmbit_unset_range_big(u8 *bits, const u32 total_bits, u32 begin,
- u32 end) {
- // TODO: combine iterator and unset operation; completely replace this
- u32 i = begin;
- for (;;) {
- i = mmbit_iterate_bounded(bits, total_bits, i, end);
- if (i == MMB_INVALID) {
- break;
- }
- mmbit_unset_big(bits, total_bits, i);
- if (++i == end) {
- break;
- }
- }
-}
-
-/** \brief Unset a whole range of bits. Ensures that all bits between \a begin
- * (inclusive) and \a end (exclusive) are switched off. */
-static really_inline
-void mmbit_unset_range(u8 *bits, const u32 total_bits, u32 begin, u32 end) {
- MDEBUG_PRINTF("%p total_bits %u begin %u end %u\n", bits, total_bits, begin,
- end);
- assert(begin <= end);
- assert(end <= total_bits);
- if (mmbit_is_flat_model(total_bits)) {
- mmbit_unset_range_flat(bits, total_bits, begin, end);
- } else {
- mmbit_unset_range_big(bits, total_bits, begin, end);
- }
- // No bits are on in [begin, end) once we're done.
- assert(MMB_INVALID == mmbit_iterate_bounded(bits, total_bits, begin, end));
-}
-
-/** \brief Specialisation of \ref mmbit_init_range for flat models. */
-static really_inline
-void mmbit_init_range_flat(u8 *bits, const u32 total_bits, u32 begin, u32 end) {
- const u32 last_block = ROUNDDOWN_N(total_bits, MMB_KEY_BITS);
-
- // Iterate over full-sized blocks.
- for (u32 i = 0; i < last_block; i += MMB_KEY_BITS) {
- mmb_store(bits + i / 8, get_flat_masks(i, begin, end));
- }
-
- // Final block, which is less than full-sized.
- if (total_bits % MMB_KEY_BITS) {
- u32 num_bits = total_bits - last_block;
- MMB_TYPE block = get_flat_masks(last_block, begin, end);
- mmb_store_partial(bits + last_block / 8, block, num_bits);
- }
-}
-
-static really_inline
-void mmbit_init_range_big(u8 *bits, const u32 total_bits, u32 begin, u32 end) {
- u32 ks = mmbit_keyshift(total_bits);
- u32 level = 0;
-
- for (;;) {
- u8 *block = mmbit_get_level_root(bits, level);
- u32 k1 = begin >> ks, k2 = end >> ks;
-
- // Summary blocks need to account for the runt block on the end.
- if ((k2 << ks) != end) {
- k2++;
- }
-
- // Partial block to deal with beginning.
- block += (k1 / MMB_KEY_BITS) * sizeof(MMB_TYPE);
- if (k1 % MMB_KEY_BITS) {
- u32 idx = k1 / MMB_KEY_BITS;
- u32 block_end = (idx + 1) * MMB_KEY_BITS;
-
- // Because k1 % MMB_KEY_BITS != 0, we can avoid checking edge cases
- // here (see the branch in mmb_mask_zero_to).
- MMB_TYPE mask = MMB_ALL_ONES << (k1 % MMB_KEY_BITS);
-
- if (k2 < block_end) {
- assert(k2 % MMB_KEY_BITS);
- mask &= mmb_mask_zero_to_nocheck(k2 % MMB_KEY_BITS);
- mmb_store(block, mask);
- goto next_level;
- } else {
- mmb_store(block, mask);
- k1 = block_end;
- block += sizeof(MMB_TYPE);
- }
- }
-
- // Write blocks filled with ones until we get to the last block.
- for (; k1 < (k2 & ~MMB_KEY_MASK); k1 += MMB_KEY_BITS) {
- mmb_store(block, MMB_ALL_ONES);
- block += sizeof(MMB_TYPE);
- }
-
- // Final block.
- if (likely(k1 < k2)) {
- // Again, if k2 was at a block boundary, it would have been handled
- // by the previous loop, so we know k2 % MMB_KEY_BITS != 0 and can
- // avoid the branch in mmb_mask_zero_to here.
- assert(k2 % MMB_KEY_BITS);
- MMB_TYPE mask = mmb_mask_zero_to_nocheck(k2 % MMB_KEY_BITS);
- mmb_store(block, mask);
- }
-
- next_level:
- if (ks == 0) {
- break; // Last level is done, finished.
- }
-
- ks -= MMB_KEY_SHIFT;
- level++;
- }
-}
-
-/** \brief Initialises the multibit so that only the given range of bits are
- * set.
- *
- * Ensures that all bits between \a begin (inclusive) and \a end (exclusive)
- * are switched on.
- */
-static really_inline
-void mmbit_init_range(u8 *bits, const u32 total_bits, u32 begin, u32 end) {
- MDEBUG_PRINTF("%p total_bits %u begin %u end %u\n", bits, total_bits, begin,
- end);
- assert(begin <= end);
- assert(end <= total_bits);
-
- if (!total_bits) {
- return;
- }
-
- // Short cut for cases where we're not actually setting any bits; just
- // clear the multibit.
- if (begin == end) {
- mmbit_clear(bits, total_bits);
- return;
- }
-
- if (mmbit_is_flat_model(total_bits)) {
- mmbit_init_range_flat(bits, total_bits, begin, end);
- } else {
- mmbit_init_range_big(bits, total_bits, begin, end);
- }
-
- assert(begin == end ||
- mmbit_iterate(bits, total_bits, MMB_INVALID) == begin);
- assert(!end || begin == end ||
- mmbit_iterate(bits, total_bits, end - 1) == MMB_INVALID);
-}
-
-/** \brief Determine the number of \ref mmbit_sparse_state elements required.
- * */
-static really_inline
-u32 mmbit_sparse_iter_state_size(u32 total_bits) {
- if (mmbit_is_flat_model(total_bits)) {
- return 2;
- }
- u32 levels = mmbit_maxlevel(total_bits);
- return levels + 1;
-}
-
-#ifdef DUMP_SUPPORT
-// Dump function, defined in multibit.c.
-void mmbit_sparse_iter_dump(const struct mmbit_sparse_iter *it, u32 total_bits);
-#endif
-
-/** Internal: common loop used by mmbit_sparse_iter_{begin,next}_big. Returns
- * matching next key given starting state, or MMB_INVALID. */
-static really_inline
-u32 mmbit_sparse_iter_exec(const u8 *bits, u32 key, u32 *idx, u32 level,
- const u32 max_level, struct mmbit_sparse_state *s,
- const struct mmbit_sparse_iter *it_root,
- const struct mmbit_sparse_iter *it) {
- for (;;) {
- MMB_TYPE block = s[level].mask;
- if (block) {
- u32 bit = mmb_ctz(block);
- key = (key << MMB_KEY_SHIFT) + bit;
- u32 bit_idx = mmbit_mask_index(bit, it->mask);
- if (level++ == max_level) {
- // we've found a key
- *idx = it->val + bit_idx;
- return key;
- } else {
- // iterator record is the start of the level (current it->val)
- // plus N, where N is the dense index of the bit in the current
- // level's itmask
- u32 iter_key = it->val + bit_idx;
- it = it_root + iter_key;
- MMB_TYPE nextblock =
- mmb_load(mmbit_get_level_root_const(bits, level) +
- key * sizeof(MMB_TYPE));
- s[level].mask = nextblock & it->mask;
- s[level].itkey = iter_key;
- }
- } else {
- // No bits set in this block
- if (level-- == 0) {
- break; // no key available
- }
- key >>= MMB_KEY_SHIFT;
- // Update state mask and iterator
- s[level].mask &= (s[level].mask - 1);
- it = it_root + s[level].itkey;
- }
- }
- return MMB_INVALID;
-}
-
-static really_inline
-u32 mmbit_sparse_iter_begin_big(const u8 *bits, u32 total_bits, u32 *idx,
- const struct mmbit_sparse_iter *it_root,
- struct mmbit_sparse_state *s) {
- const struct mmbit_sparse_iter *it = it_root;
- u32 key = 0;
- MMB_TYPE block = mmb_load(bits) & it->mask;
- if (!block) {
- return MMB_INVALID;
- }
-
- // Load first block into top level state.
- const u32 max_level = mmbit_maxlevel(total_bits);
- s[0].mask = block;
- s[0].itkey = 0;
- return mmbit_sparse_iter_exec(bits, key, idx, 0, max_level,
- s, it_root, it);
-}
-
-/** \brief Specialisation of \ref mmbit_sparse_iter_begin for flat models. */
-static really_inline
-u32 mmbit_sparse_iter_begin_flat(const u8 *bits, u32 total_bits, u32 *idx,
- const struct mmbit_sparse_iter *it_root,
- struct mmbit_sparse_state *s) {
- // Small cases have everything in the root iterator mask.
- if (total_bits <= MMB_KEY_BITS) {
- MMB_TYPE block = mmbit_get_flat_block(bits, total_bits);
- block &= it_root->mask;
- if (!block) {
- return MMB_INVALID;
- }
-
- s->mask = block;
- u32 key = mmb_ctz(block);
- *idx = mmbit_mask_index(key, it_root->mask);
- return key;
- }
-
- // Otherwise, the root iterator mask tells us which blocks (which we lay out
- // linearly in the flat model) could contain keys.
- assert(mmbit_maxlevel(total_bits) == 1); // Should only be two levels
- MMB_TYPE root = it_root->mask;
- for (; root; root &= (root - 1)) {
- u32 bit = mmb_ctz(root);
- u32 bit_idx = mmbit_mask_index(bit, it_root->mask);
- u32 iter_key = it_root->val + bit_idx;
- const struct mmbit_sparse_iter *it = it_root + iter_key;
- u32 block_key_min = bit * MMB_KEY_BITS;
- u32 block_key_max = block_key_min + MMB_KEY_BITS;
- MMB_TYPE block;
- if (block_key_max > total_bits) {
- block_key_max = total_bits;
- block = mmbit_get_flat_block(bits + (bit * sizeof(MMB_TYPE)),
- block_key_max - block_key_min);
- } else {
- block = mmb_load(bits + (bit * sizeof(MMB_TYPE)));
- }
-
- block &= it->mask;
- if (block) {
- s[0].mask = root;
- s[1].mask = block;
- s[1].itkey = iter_key;
- u32 key = mmb_ctz(block);
- *idx = it->val + mmbit_mask_index(key, it->mask);
- return key + block_key_min;
- }
- }
-
- return MMB_INVALID;
-}
-
-/** \brief Sparse iterator, find first key.
- *
- * Returns the first of the bits specified by the iterator \a it_root that is
- * on, and initialises the state \a s. If none of the bits specified by the
- * iterator are on, returns MMB_INVALID.
- */
-static really_inline
-u32 mmbit_sparse_iter_begin(const u8 *bits, u32 total_bits, u32 *idx,
- const struct mmbit_sparse_iter *it_root,
- struct mmbit_sparse_state *s) {
- assert(ISALIGNED_N(it_root, alignof(struct mmbit_sparse_iter)));
-
- // Our state _may_ be on the stack
+ u64a block_base = key * block_width;
+ u64a block_min = MAX(it_start, block_base);
+ u64a block_max = MIN(it_end, block_base + block_width - 1);
+ const u8 *block_ptr =
+ mmbit_get_level_root_const(bits, level) + key * sizeof(MMB_TYPE);
+ MMB_TYPE block = mmb_load(block_ptr);
+ block &= get_lowhi_masks(level, max_level, block_min, block_max, block_base);
+ if (block) {
+ // Found a bit, go down a level
+ key = (key << MMB_KEY_SHIFT) + mmb_ctz(block);
+ if (level++ == max_level) {
+ return key;
+ }
+ ks -= MMB_KEY_SHIFT;
+ } else {
+ // No bit found, go up a level
+ // we know that this block didn't have any answers, so we can push
+ // our start iterator forward.
+ u64a next_start = block_base + block_width;
+ if (next_start > it_end) {
+ break;
+ }
+ if (level-- == 0) {
+ break;
+ }
+ it_start = next_start;
+ key >>= MMB_KEY_SHIFT;
+ ks += MMB_KEY_SHIFT;
+ }
+ }
+ return MMB_INVALID;
+}
+
+/** \brief Bounded iterator. Returns the index of the first set bit between
+ * it_start (inclusive) and it_end (exclusive) or MMB_INVALID if no bits are
+ * set in that range.
+ */
+static really_inline
+u32 mmbit_iterate_bounded(const u8 *bits, u32 total_bits, u32 it_start,
+ u32 it_end) {
+ MDEBUG_PRINTF("%p total_bits %u it_start %u it_end %u\n", bits, total_bits,
+ it_start, it_end);
+ assert(it_start <= it_end);
+ assert(it_end <= total_bits);
+ if (!total_bits || it_end == it_start) {
+ return MMB_INVALID;
+ }
+ assert(it_start < total_bits);
+ u32 key;
+ if (mmbit_is_flat_model(total_bits)) {
+ key = mmbit_iterate_bounded_flat(bits, total_bits, it_start, it_end);
+ } else {
+ key = mmbit_iterate_bounded_big(bits, total_bits, it_start, it_end);
+ }
+ assert(key == MMB_INVALID || mmbit_isset(bits, total_bits, key));
+ return key;
+}
+
+/** \brief Specialisation of \ref mmbit_unset_range for flat models. */
+static really_inline
+void mmbit_unset_range_flat(u8 *bits, u32 total_bits, u32 begin, u32 end) {
+ const u32 last_block = ROUNDDOWN_N(total_bits, MMB_KEY_BITS);
+
+ // Iterate over full-sized blocks.
+ for (u32 i = ROUNDDOWN_N(begin, MMB_KEY_BITS), e = MIN(end, last_block);
+ i < e; i += MMB_KEY_BITS) {
+ u8 *block_ptr = bits + i / 8;
+ MMB_TYPE block = mmb_load(block_ptr);
+ MMB_TYPE mask = get_flat_masks(i, begin, end);
+ mmb_store(block_ptr, block & ~mask);
+ }
+
+ // Final block, which is less than full-sized.
+ if (end > last_block) {
+ u8 *block_ptr = bits + last_block / 8;
+ u32 num_bits = total_bits - last_block;
+ MMB_TYPE block = mmbit_get_flat_block(block_ptr, num_bits);
+ MMB_TYPE mask = get_flat_masks(last_block, begin, end);
+ mmb_store_partial(block_ptr, block & ~mask, num_bits);
+ }
+}
+
+static really_inline
+void mmbit_unset_range_big(u8 *bits, const u32 total_bits, u32 begin,
+ u32 end) {
+ // TODO: combine iterator and unset operation; completely replace this
+ u32 i = begin;
+ for (;;) {
+ i = mmbit_iterate_bounded(bits, total_bits, i, end);
+ if (i == MMB_INVALID) {
+ break;
+ }
+ mmbit_unset_big(bits, total_bits, i);
+ if (++i == end) {
+ break;
+ }
+ }
+}
+
+/** \brief Unset a whole range of bits. Ensures that all bits between \a begin
+ * (inclusive) and \a end (exclusive) are switched off. */
+static really_inline
+void mmbit_unset_range(u8 *bits, const u32 total_bits, u32 begin, u32 end) {
+ MDEBUG_PRINTF("%p total_bits %u begin %u end %u\n", bits, total_bits, begin,
+ end);
+ assert(begin <= end);
+ assert(end <= total_bits);
+ if (mmbit_is_flat_model(total_bits)) {
+ mmbit_unset_range_flat(bits, total_bits, begin, end);
+ } else {
+ mmbit_unset_range_big(bits, total_bits, begin, end);
+ }
+ // No bits are on in [begin, end) once we're done.
+ assert(MMB_INVALID == mmbit_iterate_bounded(bits, total_bits, begin, end));
+}
+
+/** \brief Specialisation of \ref mmbit_init_range for flat models. */
+static really_inline
+void mmbit_init_range_flat(u8 *bits, const u32 total_bits, u32 begin, u32 end) {
+ const u32 last_block = ROUNDDOWN_N(total_bits, MMB_KEY_BITS);
+
+ // Iterate over full-sized blocks.
+ for (u32 i = 0; i < last_block; i += MMB_KEY_BITS) {
+ mmb_store(bits + i / 8, get_flat_masks(i, begin, end));
+ }
+
+ // Final block, which is less than full-sized.
+ if (total_bits % MMB_KEY_BITS) {
+ u32 num_bits = total_bits - last_block;
+ MMB_TYPE block = get_flat_masks(last_block, begin, end);
+ mmb_store_partial(bits + last_block / 8, block, num_bits);
+ }
+}
+
+static really_inline
+void mmbit_init_range_big(u8 *bits, const u32 total_bits, u32 begin, u32 end) {
+ u32 ks = mmbit_keyshift(total_bits);
+ u32 level = 0;
+
+ for (;;) {
+ u8 *block = mmbit_get_level_root(bits, level);
+ u32 k1 = begin >> ks, k2 = end >> ks;
+
+ // Summary blocks need to account for the runt block on the end.
+ if ((k2 << ks) != end) {
+ k2++;
+ }
+
+ // Partial block to deal with beginning.
+ block += (k1 / MMB_KEY_BITS) * sizeof(MMB_TYPE);
+ if (k1 % MMB_KEY_BITS) {
+ u32 idx = k1 / MMB_KEY_BITS;
+ u32 block_end = (idx + 1) * MMB_KEY_BITS;
+
+ // Because k1 % MMB_KEY_BITS != 0, we can avoid checking edge cases
+ // here (see the branch in mmb_mask_zero_to).
+ MMB_TYPE mask = MMB_ALL_ONES << (k1 % MMB_KEY_BITS);
+
+ if (k2 < block_end) {
+ assert(k2 % MMB_KEY_BITS);
+ mask &= mmb_mask_zero_to_nocheck(k2 % MMB_KEY_BITS);
+ mmb_store(block, mask);
+ goto next_level;
+ } else {
+ mmb_store(block, mask);
+ k1 = block_end;
+ block += sizeof(MMB_TYPE);
+ }
+ }
+
+ // Write blocks filled with ones until we get to the last block.
+ for (; k1 < (k2 & ~MMB_KEY_MASK); k1 += MMB_KEY_BITS) {
+ mmb_store(block, MMB_ALL_ONES);
+ block += sizeof(MMB_TYPE);
+ }
+
+ // Final block.
+ if (likely(k1 < k2)) {
+ // Again, if k2 was at a block boundary, it would have been handled
+ // by the previous loop, so we know k2 % MMB_KEY_BITS != 0 and can
+ // avoid the branch in mmb_mask_zero_to here.
+ assert(k2 % MMB_KEY_BITS);
+ MMB_TYPE mask = mmb_mask_zero_to_nocheck(k2 % MMB_KEY_BITS);
+ mmb_store(block, mask);
+ }
+
+ next_level:
+ if (ks == 0) {
+ break; // Last level is done, finished.
+ }
+
+ ks -= MMB_KEY_SHIFT;
+ level++;
+ }
+}
+
+/** \brief Initialises the multibit so that only the given range of bits are
+ * set.
+ *
+ * Ensures that all bits between \a begin (inclusive) and \a end (exclusive)
+ * are switched on.
+ */
+static really_inline
+void mmbit_init_range(u8 *bits, const u32 total_bits, u32 begin, u32 end) {
+ MDEBUG_PRINTF("%p total_bits %u begin %u end %u\n", bits, total_bits, begin,
+ end);
+ assert(begin <= end);
+ assert(end <= total_bits);
+
+ if (!total_bits) {
+ return;
+ }
+
+ // Short cut for cases where we're not actually setting any bits; just
+ // clear the multibit.
+ if (begin == end) {
+ mmbit_clear(bits, total_bits);
+ return;
+ }
+
+ if (mmbit_is_flat_model(total_bits)) {
+ mmbit_init_range_flat(bits, total_bits, begin, end);
+ } else {
+ mmbit_init_range_big(bits, total_bits, begin, end);
+ }
+
+ assert(begin == end ||
+ mmbit_iterate(bits, total_bits, MMB_INVALID) == begin);
+ assert(!end || begin == end ||
+ mmbit_iterate(bits, total_bits, end - 1) == MMB_INVALID);
+}
+
+/** \brief Determine the number of \ref mmbit_sparse_state elements required.
+ * */
+static really_inline
+u32 mmbit_sparse_iter_state_size(u32 total_bits) {
+ if (mmbit_is_flat_model(total_bits)) {
+ return 2;
+ }
+ u32 levels = mmbit_maxlevel(total_bits);
+ return levels + 1;
+}
+
+#ifdef DUMP_SUPPORT
+// Dump function, defined in multibit.c.
+void mmbit_sparse_iter_dump(const struct mmbit_sparse_iter *it, u32 total_bits);
+#endif
+
+/** Internal: common loop used by mmbit_sparse_iter_{begin,next}_big. Returns
+ * matching next key given starting state, or MMB_INVALID. */
+static really_inline
+u32 mmbit_sparse_iter_exec(const u8 *bits, u32 key, u32 *idx, u32 level,
+ const u32 max_level, struct mmbit_sparse_state *s,
+ const struct mmbit_sparse_iter *it_root,
+ const struct mmbit_sparse_iter *it) {
+ for (;;) {
+ MMB_TYPE block = s[level].mask;
+ if (block) {
+ u32 bit = mmb_ctz(block);
+ key = (key << MMB_KEY_SHIFT) + bit;
+ u32 bit_idx = mmbit_mask_index(bit, it->mask);
+ if (level++ == max_level) {
+ // we've found a key
+ *idx = it->val + bit_idx;
+ return key;
+ } else {
+ // iterator record is the start of the level (current it->val)
+ // plus N, where N is the dense index of the bit in the current
+ // level's itmask
+ u32 iter_key = it->val + bit_idx;
+ it = it_root + iter_key;
+ MMB_TYPE nextblock =
+ mmb_load(mmbit_get_level_root_const(bits, level) +
+ key * sizeof(MMB_TYPE));
+ s[level].mask = nextblock & it->mask;
+ s[level].itkey = iter_key;
+ }
+ } else {
+ // No bits set in this block
+ if (level-- == 0) {
+ break; // no key available
+ }
+ key >>= MMB_KEY_SHIFT;
+ // Update state mask and iterator
+ s[level].mask &= (s[level].mask - 1);
+ it = it_root + s[level].itkey;
+ }
+ }
+ return MMB_INVALID;
+}
+
+static really_inline
+u32 mmbit_sparse_iter_begin_big(const u8 *bits, u32 total_bits, u32 *idx,
+ const struct mmbit_sparse_iter *it_root,
+ struct mmbit_sparse_state *s) {
+ const struct mmbit_sparse_iter *it = it_root;
+ u32 key = 0;
+ MMB_TYPE block = mmb_load(bits) & it->mask;
+ if (!block) {
+ return MMB_INVALID;
+ }
+
+ // Load first block into top level state.
+ const u32 max_level = mmbit_maxlevel(total_bits);
+ s[0].mask = block;
+ s[0].itkey = 0;
+ return mmbit_sparse_iter_exec(bits, key, idx, 0, max_level,
+ s, it_root, it);
+}
+
+/** \brief Specialisation of \ref mmbit_sparse_iter_begin for flat models. */
+static really_inline
+u32 mmbit_sparse_iter_begin_flat(const u8 *bits, u32 total_bits, u32 *idx,
+ const struct mmbit_sparse_iter *it_root,
+ struct mmbit_sparse_state *s) {
+ // Small cases have everything in the root iterator mask.
+ if (total_bits <= MMB_KEY_BITS) {
+ MMB_TYPE block = mmbit_get_flat_block(bits, total_bits);
+ block &= it_root->mask;
+ if (!block) {
+ return MMB_INVALID;
+ }
+
+ s->mask = block;
+ u32 key = mmb_ctz(block);
+ *idx = mmbit_mask_index(key, it_root->mask);
+ return key;
+ }
+
+ // Otherwise, the root iterator mask tells us which blocks (which we lay out
+ // linearly in the flat model) could contain keys.
+ assert(mmbit_maxlevel(total_bits) == 1); // Should only be two levels
+ MMB_TYPE root = it_root->mask;
+ for (; root; root &= (root - 1)) {
+ u32 bit = mmb_ctz(root);
+ u32 bit_idx = mmbit_mask_index(bit, it_root->mask);
+ u32 iter_key = it_root->val + bit_idx;
+ const struct mmbit_sparse_iter *it = it_root + iter_key;
+ u32 block_key_min = bit * MMB_KEY_BITS;
+ u32 block_key_max = block_key_min + MMB_KEY_BITS;
+ MMB_TYPE block;
+ if (block_key_max > total_bits) {
+ block_key_max = total_bits;
+ block = mmbit_get_flat_block(bits + (bit * sizeof(MMB_TYPE)),
+ block_key_max - block_key_min);
+ } else {
+ block = mmb_load(bits + (bit * sizeof(MMB_TYPE)));
+ }
+
+ block &= it->mask;
+ if (block) {
+ s[0].mask = root;
+ s[1].mask = block;
+ s[1].itkey = iter_key;
+ u32 key = mmb_ctz(block);
+ *idx = it->val + mmbit_mask_index(key, it->mask);
+ return key + block_key_min;
+ }
+ }
+
+ return MMB_INVALID;
+}
+
+/** \brief Sparse iterator, find first key.
+ *
+ * Returns the first of the bits specified by the iterator \a it_root that is
+ * on, and initialises the state \a s. If none of the bits specified by the
+ * iterator are on, returns MMB_INVALID.
+ */
+static really_inline
+u32 mmbit_sparse_iter_begin(const u8 *bits, u32 total_bits, u32 *idx,
+ const struct mmbit_sparse_iter *it_root,
+ struct mmbit_sparse_state *s) {
+ assert(ISALIGNED_N(it_root, alignof(struct mmbit_sparse_iter)));
+
+ // Our state _may_ be on the stack
#ifndef _WIN32
- assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state)));
+ assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state)));
#else
assert(ISALIGNED_N(s, 4));
#endif
-
- MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
- // iterator should have _something_ at the root level
- assert(it_root->mask != 0);
- u32 key;
- if (mmbit_is_flat_model(total_bits)) {
- key = mmbit_sparse_iter_begin_flat(bits, total_bits, idx, it_root, s);
- } else {
- key = mmbit_sparse_iter_begin_big(bits, total_bits, idx, it_root, s);
- }
- if (key != MMB_INVALID) {
- assert(key < total_bits);
- assert(mmbit_isset(bits, total_bits, key));
- }
- return key;
-}
-
-static really_inline
-u32 mmbit_sparse_iter_next_big(const u8 *bits, u32 total_bits, u32 last_key,
- u32 *idx,
- const struct mmbit_sparse_iter *it_root,
- struct mmbit_sparse_state *s) {
- const u32 max_level = mmbit_maxlevel(total_bits);
- u32 key = last_key >> MMB_KEY_SHIFT;
- s[max_level].mask &= (s[max_level].mask - 1);
- const struct mmbit_sparse_iter *it = it_root + s[max_level].itkey;
- return mmbit_sparse_iter_exec(bits, key, idx, max_level, max_level, s,
- it_root, it);
-}
-
-/** \brief Specialisation of \ref mmbit_sparse_iter_next for flat models. */
-static really_inline
-u32 mmbit_sparse_iter_next_flat(const u8 *bits, const u32 total_bits, u32 *idx,
- const struct mmbit_sparse_iter *it_root,
- struct mmbit_sparse_state *s) {
- if (total_bits <= MMB_KEY_BITS) {
- // All of our data is already in the s->mask, so we just need to scrape
- // off the next match.
- s->mask &= (s->mask - 1);
- if (s->mask) {
- u32 key = mmb_ctz(s->mask);
- *idx = mmbit_mask_index(key, it_root->mask);
- return key;
- }
- } else {
- assert(s[0].mask);
-
- s[1].mask &= (s[1].mask - 1); // Remove previous key from iter state.
- u32 bit = mmb_ctz(s[0].mask); // Flat block currently being accessed.
-
- for (;;) {
- if (s[1].mask) {
- u32 key = mmb_ctz(s[1].mask);
- const struct mmbit_sparse_iter *it = it_root + s[1].itkey;
- *idx = it->val + mmbit_mask_index(key, it->mask);
- key += (bit * MMB_KEY_BITS);
- return key;
- }
-
- // Otherwise, we have no keys left in this block. Consult the root
- // mask and find the next one.
-
- s[0].mask &= s[0].mask - 1;
- if (!s[0].mask) {
- break;
- }
-
- bit = mmb_ctz(s[0].mask);
- u32 bit_idx = mmbit_mask_index(bit, it_root->mask);
- u32 iter_key = it_root->val + bit_idx;
- const struct mmbit_sparse_iter *it = it_root + iter_key;
- u32 block_key_min = bit * MMB_KEY_BITS;
- u32 block_key_max = block_key_min + MMB_KEY_BITS;
- MMB_TYPE block;
- if (block_key_max > total_bits) {
- block_key_max = total_bits;
- block = mmbit_get_flat_block(bits + (bit * sizeof(MMB_TYPE)),
- block_key_max - block_key_min);
- } else {
- block = mmb_load(bits + (bit * sizeof(MMB_TYPE)));
- }
-
- s[1].mask = block & it->mask;
- s[1].itkey = iter_key;
- }
- }
-
- return MMB_INVALID;
-}
-
-/** \brief Sparse iterator, find next key.
- *
- * Takes in a sparse iterator tree structure \a it_root and a state array, and
- * finds the next on bit (from the set of bits specified in the iterator).
- *
- * NOTE: The sparse iterator stores copies of the multibit blocks in its state,
- * so it is not necessarily safe to set or unset bits in the multibit while
- * iterating: the changes you make may or may not be taken into account
- * by the iterator.
- */
-static really_inline
-u32 mmbit_sparse_iter_next(const u8 *bits, u32 total_bits, u32 last_key,
- u32 *idx, const struct mmbit_sparse_iter *it_root,
- struct mmbit_sparse_state *s) {
- assert(ISALIGNED_N(it_root, alignof(struct mmbit_sparse_iter)));
-
- // Our state _may_ be on the stack
+
+ MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
+ // iterator should have _something_ at the root level
+ assert(it_root->mask != 0);
+ u32 key;
+ if (mmbit_is_flat_model(total_bits)) {
+ key = mmbit_sparse_iter_begin_flat(bits, total_bits, idx, it_root, s);
+ } else {
+ key = mmbit_sparse_iter_begin_big(bits, total_bits, idx, it_root, s);
+ }
+ if (key != MMB_INVALID) {
+ assert(key < total_bits);
+ assert(mmbit_isset(bits, total_bits, key));
+ }
+ return key;
+}
+
+static really_inline
+u32 mmbit_sparse_iter_next_big(const u8 *bits, u32 total_bits, u32 last_key,
+ u32 *idx,
+ const struct mmbit_sparse_iter *it_root,
+ struct mmbit_sparse_state *s) {
+ const u32 max_level = mmbit_maxlevel(total_bits);
+ u32 key = last_key >> MMB_KEY_SHIFT;
+ s[max_level].mask &= (s[max_level].mask - 1);
+ const struct mmbit_sparse_iter *it = it_root + s[max_level].itkey;
+ return mmbit_sparse_iter_exec(bits, key, idx, max_level, max_level, s,
+ it_root, it);
+}
+
+/** \brief Specialisation of \ref mmbit_sparse_iter_next for flat models. */
+static really_inline
+u32 mmbit_sparse_iter_next_flat(const u8 *bits, const u32 total_bits, u32 *idx,
+ const struct mmbit_sparse_iter *it_root,
+ struct mmbit_sparse_state *s) {
+ if (total_bits <= MMB_KEY_BITS) {
+ // All of our data is already in the s->mask, so we just need to scrape
+ // off the next match.
+ s->mask &= (s->mask - 1);
+ if (s->mask) {
+ u32 key = mmb_ctz(s->mask);
+ *idx = mmbit_mask_index(key, it_root->mask);
+ return key;
+ }
+ } else {
+ assert(s[0].mask);
+
+ s[1].mask &= (s[1].mask - 1); // Remove previous key from iter state.
+ u32 bit = mmb_ctz(s[0].mask); // Flat block currently being accessed.
+
+ for (;;) {
+ if (s[1].mask) {
+ u32 key = mmb_ctz(s[1].mask);
+ const struct mmbit_sparse_iter *it = it_root + s[1].itkey;
+ *idx = it->val + mmbit_mask_index(key, it->mask);
+ key += (bit * MMB_KEY_BITS);
+ return key;
+ }
+
+ // Otherwise, we have no keys left in this block. Consult the root
+ // mask and find the next one.
+
+ s[0].mask &= s[0].mask - 1;
+ if (!s[0].mask) {
+ break;
+ }
+
+ bit = mmb_ctz(s[0].mask);
+ u32 bit_idx = mmbit_mask_index(bit, it_root->mask);
+ u32 iter_key = it_root->val + bit_idx;
+ const struct mmbit_sparse_iter *it = it_root + iter_key;
+ u32 block_key_min = bit * MMB_KEY_BITS;
+ u32 block_key_max = block_key_min + MMB_KEY_BITS;
+ MMB_TYPE block;
+ if (block_key_max > total_bits) {
+ block_key_max = total_bits;
+ block = mmbit_get_flat_block(bits + (bit * sizeof(MMB_TYPE)),
+ block_key_max - block_key_min);
+ } else {
+ block = mmb_load(bits + (bit * sizeof(MMB_TYPE)));
+ }
+
+ s[1].mask = block & it->mask;
+ s[1].itkey = iter_key;
+ }
+ }
+
+ return MMB_INVALID;
+}
+
+/** \brief Sparse iterator, find next key.
+ *
+ * Takes in a sparse iterator tree structure \a it_root and a state array, and
+ * finds the next on bit (from the set of bits specified in the iterator).
+ *
+ * NOTE: The sparse iterator stores copies of the multibit blocks in its state,
+ * so it is not necessarily safe to set or unset bits in the multibit while
+ * iterating: the changes you make may or may not be taken into account
+ * by the iterator.
+ */
+static really_inline
+u32 mmbit_sparse_iter_next(const u8 *bits, u32 total_bits, u32 last_key,
+ u32 *idx, const struct mmbit_sparse_iter *it_root,
+ struct mmbit_sparse_state *s) {
+ assert(ISALIGNED_N(it_root, alignof(struct mmbit_sparse_iter)));
+
+ // Our state _may_ be on the stack
#ifndef _WIN32
- assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state)));
+ assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state)));
#else
assert(ISALIGNED_N(s, 4));
#endif
-
- MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
- MDEBUG_PRINTF("NEXT (total_bits=%u, last_key=%u)\n", total_bits, last_key);
- UNUSED u32 last_idx = *idx; // for assertion at the end
- // our iterator should have _something_ at the root level
- assert(it_root->mask != 0);
- assert(last_key < total_bits);
-
- u32 key;
- if (mmbit_is_flat_model(total_bits)) {
- key = mmbit_sparse_iter_next_flat(bits, total_bits, idx, it_root, s);
- } else {
- key = mmbit_sparse_iter_next_big(bits, total_bits, last_key, idx,
- it_root, s);
- }
- if (key != MMB_INVALID) {
- MDEBUG_PRINTF("END NEXT: key=%u, idx=%u\n", key, *idx);
- assert(key < total_bits);
- assert(key > last_key);
- assert(mmbit_isset(bits, total_bits, key));
- assert(*idx > last_idx);
- } else {
- MDEBUG_PRINTF("END NEXT: no more keys\n");
- }
- return key;
-}
-
-/** \brief Specialisation of \ref mmbit_sparse_iter_unset for flat models. */
-static really_inline
-void mmbit_sparse_iter_unset_flat(u8 *bits, u32 total_bits,
- const struct mmbit_sparse_iter *it_root) {
- if (total_bits <= MMB_KEY_BITS) {
- // Everything is in the root mask: we can just mask those bits off.
- MMB_TYPE block = mmbit_get_flat_block(bits, total_bits);
- block &= ~it_root->mask;
- mmb_store_partial(bits, block, total_bits);
- return;
- }
-
- // Larger case, we have two iterator levels to worry about.
- u32 bit_idx = 0;
- for (MMB_TYPE root = it_root->mask; root; root &= (root - 1), bit_idx++) {
- u32 bit = mmb_ctz(root);
- u32 block_key_min = bit * MMB_KEY_BITS;
- u32 block_key_max = block_key_min + MMB_KEY_BITS;
- u8 *block_ptr = bits + (bit * sizeof(MMB_TYPE));
- u32 iter_key = it_root->val + bit_idx;
- const struct mmbit_sparse_iter *it = it_root + iter_key;
- if (block_key_max <= total_bits) {
- // Full-sized block.
- MMB_TYPE block = mmb_load(block_ptr);
- block &= ~it->mask;
- mmb_store(block_ptr, block);
- } else {
- // Runt (final) block.
- u32 num_bits = total_bits - block_key_min;
- MMB_TYPE block = mmbit_get_flat_block(block_ptr, num_bits);
- block &= ~it->mask;
- mmb_store_partial(block_ptr, block, num_bits);
- break; // We know this is the last block.
- }
- }
-}
-
-static really_inline
-void mmbit_sparse_iter_unset_big(u8 *bits, u32 total_bits,
- const struct mmbit_sparse_iter *it_root,
- struct mmbit_sparse_state *s) {
- const struct mmbit_sparse_iter *it = it_root;
- MMB_TYPE block = mmb_load(bits) & it->mask;
- if (!block) {
- return;
- }
-
- u32 key = 0;
- const u32 max_level = mmbit_maxlevel(total_bits);
- u32 level = 0;
-
- // Load first block into top level state
- s[level].mask = block;
- s[level].itkey = 0;
- for (;;) {
- block = s[level].mask;
- if (block) {
- if (level == max_level) {
- // bottom level block: we want to mask out the bits specified
- // by the iterator mask and then go back up a level.
- u8 *block_ptr =
- mmbit_get_level_root(bits, level) + key * sizeof(MMB_TYPE);
- MMB_TYPE real_block = mmb_load(block_ptr);
- real_block &= ~(it->mask);
- mmb_store(block_ptr, real_block);
- goto uplevel; // still cheap and nasty
- } else {
- u32 bit = mmb_ctz(block);
- key = (key << MMB_KEY_SHIFT) + bit;
- level++;
-
- // iterator record is the start of the level (current it->val)
- // plus N, where N is the dense index of the bit in the current
- // level's itmask
- u32 iter_key = it->val + mmbit_mask_index(bit, it->mask);
- it = it_root + iter_key;
- MMB_TYPE nextblock =
- mmb_load(mmbit_get_level_root_const(bits, level) +
- key * sizeof(MMB_TYPE));
- s[level].mask = nextblock & it->mask;
- s[level].itkey = iter_key;
- }
- } else {
-uplevel:
- // No bits set in this block
- if (level == 0) {
- return; // we are done
- }
- u8 *block_ptr =
- mmbit_get_level_root(bits, level) + key * sizeof(MMB_TYPE);
- MMB_TYPE real_block = mmb_load(block_ptr);
- key >>= MMB_KEY_SHIFT;
- level--;
-
- if (real_block == 0) {
- // If we've zeroed our block For Real (unmasked by iterator),
- // we can clear the parent bit that led us to it, so that
- // we don't go down this particular garden path again later.
- u32 bit = mmb_ctz(s[level].mask);
- u8 *parent_ptr =
- mmbit_get_level_root(bits, level) + key * sizeof(MMB_TYPE);
- MMB_TYPE parent_block = mmb_load(parent_ptr);
- mmb_clear(&parent_block, bit);
- mmb_store(parent_ptr, parent_block);
- }
-
- // Update state mask and iterator
- s[level].mask &= (s[level].mask - 1);
- it = it_root + s[level].itkey;
- }
- }
-}
-
-/** \brief Sparse iterator, unset all bits.
- *
- * Takes in a sparse iterator tree structure and switches off any entries found
- * therein.
- */
-static really_inline
-void mmbit_sparse_iter_unset(u8 *bits, u32 total_bits,
- const struct mmbit_sparse_iter *it,
- struct mmbit_sparse_state *s) {
- assert(ISALIGNED_N(it, alignof(struct mmbit_sparse_iter)));
-
- // Our state _may_ be on the stack
+
+ MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
+ MDEBUG_PRINTF("NEXT (total_bits=%u, last_key=%u)\n", total_bits, last_key);
+ UNUSED u32 last_idx = *idx; // for assertion at the end
+ // our iterator should have _something_ at the root level
+ assert(it_root->mask != 0);
+ assert(last_key < total_bits);
+
+ u32 key;
+ if (mmbit_is_flat_model(total_bits)) {
+ key = mmbit_sparse_iter_next_flat(bits, total_bits, idx, it_root, s);
+ } else {
+ key = mmbit_sparse_iter_next_big(bits, total_bits, last_key, idx,
+ it_root, s);
+ }
+ if (key != MMB_INVALID) {
+ MDEBUG_PRINTF("END NEXT: key=%u, idx=%u\n", key, *idx);
+ assert(key < total_bits);
+ assert(key > last_key);
+ assert(mmbit_isset(bits, total_bits, key));
+ assert(*idx > last_idx);
+ } else {
+ MDEBUG_PRINTF("END NEXT: no more keys\n");
+ }
+ return key;
+}
+
+/** \brief Specialisation of \ref mmbit_sparse_iter_unset for flat models. */
+static really_inline
+void mmbit_sparse_iter_unset_flat(u8 *bits, u32 total_bits,
+ const struct mmbit_sparse_iter *it_root) {
+ if (total_bits <= MMB_KEY_BITS) {
+ // Everything is in the root mask: we can just mask those bits off.
+ MMB_TYPE block = mmbit_get_flat_block(bits, total_bits);
+ block &= ~it_root->mask;
+ mmb_store_partial(bits, block, total_bits);
+ return;
+ }
+
+ // Larger case, we have two iterator levels to worry about.
+ u32 bit_idx = 0;
+ for (MMB_TYPE root = it_root->mask; root; root &= (root - 1), bit_idx++) {
+ u32 bit = mmb_ctz(root);
+ u32 block_key_min = bit * MMB_KEY_BITS;
+ u32 block_key_max = block_key_min + MMB_KEY_BITS;
+ u8 *block_ptr = bits + (bit * sizeof(MMB_TYPE));
+ u32 iter_key = it_root->val + bit_idx;
+ const struct mmbit_sparse_iter *it = it_root + iter_key;
+ if (block_key_max <= total_bits) {
+ // Full-sized block.
+ MMB_TYPE block = mmb_load(block_ptr);
+ block &= ~it->mask;
+ mmb_store(block_ptr, block);
+ } else {
+ // Runt (final) block.
+ u32 num_bits = total_bits - block_key_min;
+ MMB_TYPE block = mmbit_get_flat_block(block_ptr, num_bits);
+ block &= ~it->mask;
+ mmb_store_partial(block_ptr, block, num_bits);
+ break; // We know this is the last block.
+ }
+ }
+}
+
+static really_inline
+void mmbit_sparse_iter_unset_big(u8 *bits, u32 total_bits,
+ const struct mmbit_sparse_iter *it_root,
+ struct mmbit_sparse_state *s) {
+ const struct mmbit_sparse_iter *it = it_root;
+ MMB_TYPE block = mmb_load(bits) & it->mask;
+ if (!block) {
+ return;
+ }
+
+ u32 key = 0;
+ const u32 max_level = mmbit_maxlevel(total_bits);
+ u32 level = 0;
+
+ // Load first block into top level state
+ s[level].mask = block;
+ s[level].itkey = 0;
+ for (;;) {
+ block = s[level].mask;
+ if (block) {
+ if (level == max_level) {
+ // bottom level block: we want to mask out the bits specified
+ // by the iterator mask and then go back up a level.
+ u8 *block_ptr =
+ mmbit_get_level_root(bits, level) + key * sizeof(MMB_TYPE);
+ MMB_TYPE real_block = mmb_load(block_ptr);
+ real_block &= ~(it->mask);
+ mmb_store(block_ptr, real_block);
+ goto uplevel; // still cheap and nasty
+ } else {
+ u32 bit = mmb_ctz(block);
+ key = (key << MMB_KEY_SHIFT) + bit;
+ level++;
+
+ // iterator record is the start of the level (current it->val)
+ // plus N, where N is the dense index of the bit in the current
+ // level's itmask
+ u32 iter_key = it->val + mmbit_mask_index(bit, it->mask);
+ it = it_root + iter_key;
+ MMB_TYPE nextblock =
+ mmb_load(mmbit_get_level_root_const(bits, level) +
+ key * sizeof(MMB_TYPE));
+ s[level].mask = nextblock & it->mask;
+ s[level].itkey = iter_key;
+ }
+ } else {
+uplevel:
+ // No bits set in this block
+ if (level == 0) {
+ return; // we are done
+ }
+ u8 *block_ptr =
+ mmbit_get_level_root(bits, level) + key * sizeof(MMB_TYPE);
+ MMB_TYPE real_block = mmb_load(block_ptr);
+ key >>= MMB_KEY_SHIFT;
+ level--;
+
+ if (real_block == 0) {
+ // If we've zeroed our block For Real (unmasked by iterator),
+ // we can clear the parent bit that led us to it, so that
+ // we don't go down this particular garden path again later.
+ u32 bit = mmb_ctz(s[level].mask);
+ u8 *parent_ptr =
+ mmbit_get_level_root(bits, level) + key * sizeof(MMB_TYPE);
+ MMB_TYPE parent_block = mmb_load(parent_ptr);
+ mmb_clear(&parent_block, bit);
+ mmb_store(parent_ptr, parent_block);
+ }
+
+ // Update state mask and iterator
+ s[level].mask &= (s[level].mask - 1);
+ it = it_root + s[level].itkey;
+ }
+ }
+}
+
+/** \brief Sparse iterator, unset all bits.
+ *
+ * Takes in a sparse iterator tree structure and switches off any entries found
+ * therein.
+ */
+static really_inline
+void mmbit_sparse_iter_unset(u8 *bits, u32 total_bits,
+ const struct mmbit_sparse_iter *it,
+ struct mmbit_sparse_state *s) {
+ assert(ISALIGNED_N(it, alignof(struct mmbit_sparse_iter)));
+
+ // Our state _may_ be on the stack
#ifndef _WIN32
- assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state)));
+ assert(ISALIGNED_N(s, alignof(struct mmbit_sparse_state)));
#else
assert(ISALIGNED_N(s, 4));
#endif
-
- MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
-
-#ifdef MMB_TRACE_WRITES
- MMB_TRACE("ITER-UNSET iter=[");
- mmbit_sparse_iter_dump(it, total_bits);
- printf("] actually on=[");
- struct mmbit_sparse_state tmp[MAX_SPARSE_ITER_STATES];
- u32 idx = 0;
- u32 i = mmbit_sparse_iter_begin(bits, total_bits, &idx, it, tmp);
- for (; i != MMB_INVALID;
- i = mmbit_sparse_iter_next(bits, total_bits, i, &idx, it, tmp)) {
- printf(" %u", i);
- }
- printf("]\n");
-#endif
-
- if (mmbit_is_flat_model(total_bits)) {
- mmbit_sparse_iter_unset_flat(bits, total_bits, it);
- } else {
- mmbit_sparse_iter_unset_big(bits, total_bits, it, s);
- }
-}
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // MULTIBIT_H
+
+ MDEBUG_PRINTF("%p total_bits %u\n", bits, total_bits);
+
+#ifdef MMB_TRACE_WRITES
+ MMB_TRACE("ITER-UNSET iter=[");
+ mmbit_sparse_iter_dump(it, total_bits);
+ printf("] actually on=[");
+ struct mmbit_sparse_state tmp[MAX_SPARSE_ITER_STATES];
+ u32 idx = 0;
+ u32 i = mmbit_sparse_iter_begin(bits, total_bits, &idx, it, tmp);
+ for (; i != MMB_INVALID;
+ i = mmbit_sparse_iter_next(bits, total_bits, i, &idx, it, tmp)) {
+ printf(" %u", i);
+ }
+ printf("]\n");
+#endif
+
+ if (mmbit_is_flat_model(total_bits)) {
+ mmbit_sparse_iter_unset_flat(bits, total_bits, it);
+ } else {
+ mmbit_sparse_iter_unset_big(bits, total_bits, it, s);
+ }
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // MULTIBIT_H
diff --git a/contrib/libs/hyperscan/src/util/multibit_build.cpp b/contrib/libs/hyperscan/src/util/multibit_build.cpp
index f1a88de63d..67bb9ec702 100644
--- a/contrib/libs/hyperscan/src/util/multibit_build.cpp
+++ b/contrib/libs/hyperscan/src/util/multibit_build.cpp
@@ -1,51 +1,51 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Multibit: build code (for sparse iterators)
- */
-#include "multibit.h"
-#include "multibit_build.h"
-#include "scatter.h"
-#include "ue2common.h"
-#include "rose/rose_build_scatter.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Multibit: build code (for sparse iterators)
+ */
+#include "multibit.h"
+#include "multibit_build.h"
+#include "scatter.h"
+#include "ue2common.h"
+#include "rose/rose_build_scatter.h"
#include "util/compile_error.h"
-
-#include <cassert>
-#include <cstring> // for memset
-#include <map>
-#include <queue>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
+
+#include <cassert>
+#include <cstring> // for memset
+#include <map>
+#include <queue>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
u32 mmbit_size(u32 total_bits) {
if (total_bits > MMB_MAX_BITS) {
throw ResourceLimitError();
@@ -72,257 +72,257 @@ u32 mmbit_size(u32 total_bits) {
return (u32)(total * sizeof(MMB_TYPE));
}
-namespace {
-struct TreeNode {
- MMB_TYPE mask = 0;
- u32 depth = 0;
- map<u32, TreeNode> children; // keyed by rkey
-};
-} // namespace
-
-static
-void addNode(TreeNode &tree, u32 depth, u32 key, s32 ks, u32 rkey) {
- u32 bit = (key >> ks) & MMB_KEY_MASK;
- DEBUG_PRINTF("depth=%u, key=%u, ks=%d, rkey=%u, bit=%u\n", depth, key, ks,
- rkey, bit);
- mmb_set(&tree.mask, bit); // add bit to this level
- tree.depth = depth; // record depth
- // next level
- rkey = (rkey << MMB_KEY_SHIFT) + bit;
- ks -= MMB_KEY_SHIFT;
- depth++;
- if (ks >= 0) {
- addNode(tree.children[rkey], depth, key, ks, rkey);
- }
-}
-
-static
-void bfs(vector<mmbit_sparse_iter> &out, const TreeNode &tree) {
- queue<const TreeNode *> q;
- q.push(&tree);
-
- vector<u32> levels;
- u32 depth = 0;
-
- DEBUG_PRINTF("walking q\n");
-
- while (!q.empty()) {
- const TreeNode *t = q.front();
- q.pop();
-
- if (depth != t->depth) {
- depth = t->depth;
- levels.push_back(out.size());
- }
-
- DEBUG_PRINTF("pop: mask=0x%08llx, depth=%u, children.size()=%zu\n",
- t->mask, t->depth, t->children.size());
-
- out.push_back(mmbit_sparse_iter());
- memset(&out.back(), 0, sizeof(mmbit_sparse_iter));
- mmbit_sparse_iter &record = out.back();
- record.mask = t->mask;
- record.val = 0;
-
- for (auto &e : t->children) {
- q.push(&e.second);
- }
- }
-
- // val for records in non-last levels is the iterator array start offset
- // for that iterator record's children
- u32 start = 0;
- for (size_t i = 0; i < levels.size(); i++) {
- u32 start_next = levels[i];
- u32 population = 0;
- DEBUG_PRINTF("next level starts at %u\n", start_next);
- for (u32 j = start; j < start_next; j++) {
- out[j].val = start_next + population;
- DEBUG_PRINTF(" children of %u start at %u\n", j, out[j].val);
- population += mmb_popcount(out[j].mask);
- }
- start = start_next;
- }
-
- // val for records in the last level is the cumulative popcount
- u32 population = 0;
- for (size_t i = start; i < out.size(); i++) {
- DEBUG_PRINTF("last level: i=%zu, population=%u\n", i, population);
- out[i].val = population;
- population += mmb_popcount(out[i].mask);
- }
-}
-
-/** \brief Construct a sparse iterator over the values in \a bits for a
- * multibit of size \a total_bits. */
+namespace {
+struct TreeNode {
+ MMB_TYPE mask = 0;
+ u32 depth = 0;
+ map<u32, TreeNode> children; // keyed by rkey
+};
+} // namespace
+
+static
+void addNode(TreeNode &tree, u32 depth, u32 key, s32 ks, u32 rkey) {
+ u32 bit = (key >> ks) & MMB_KEY_MASK;
+ DEBUG_PRINTF("depth=%u, key=%u, ks=%d, rkey=%u, bit=%u\n", depth, key, ks,
+ rkey, bit);
+ mmb_set(&tree.mask, bit); // add bit to this level
+ tree.depth = depth; // record depth
+ // next level
+ rkey = (rkey << MMB_KEY_SHIFT) + bit;
+ ks -= MMB_KEY_SHIFT;
+ depth++;
+ if (ks >= 0) {
+ addNode(tree.children[rkey], depth, key, ks, rkey);
+ }
+}
+
+static
+void bfs(vector<mmbit_sparse_iter> &out, const TreeNode &tree) {
+ queue<const TreeNode *> q;
+ q.push(&tree);
+
+ vector<u32> levels;
+ u32 depth = 0;
+
+ DEBUG_PRINTF("walking q\n");
+
+ while (!q.empty()) {
+ const TreeNode *t = q.front();
+ q.pop();
+
+ if (depth != t->depth) {
+ depth = t->depth;
+ levels.push_back(out.size());
+ }
+
+ DEBUG_PRINTF("pop: mask=0x%08llx, depth=%u, children.size()=%zu\n",
+ t->mask, t->depth, t->children.size());
+
+ out.push_back(mmbit_sparse_iter());
+ memset(&out.back(), 0, sizeof(mmbit_sparse_iter));
+ mmbit_sparse_iter &record = out.back();
+ record.mask = t->mask;
+ record.val = 0;
+
+ for (auto &e : t->children) {
+ q.push(&e.second);
+ }
+ }
+
+ // val for records in non-last levels is the iterator array start offset
+ // for that iterator record's children
+ u32 start = 0;
+ for (size_t i = 0; i < levels.size(); i++) {
+ u32 start_next = levels[i];
+ u32 population = 0;
+ DEBUG_PRINTF("next level starts at %u\n", start_next);
+ for (u32 j = start; j < start_next; j++) {
+ out[j].val = start_next + population;
+ DEBUG_PRINTF(" children of %u start at %u\n", j, out[j].val);
+ population += mmb_popcount(out[j].mask);
+ }
+ start = start_next;
+ }
+
+ // val for records in the last level is the cumulative popcount
+ u32 population = 0;
+ for (size_t i = start; i < out.size(); i++) {
+ DEBUG_PRINTF("last level: i=%zu, population=%u\n", i, population);
+ out[i].val = population;
+ population += mmb_popcount(out[i].mask);
+ }
+}
+
+/** \brief Construct a sparse iterator over the values in \a bits for a
+ * multibit of size \a total_bits. */
vector<mmbit_sparse_iter> mmbBuildSparseIterator(const vector<u32> &bits,
u32 total_bits) {
vector<mmbit_sparse_iter> out;
- assert(!bits.empty());
- assert(total_bits > 0);
+ assert(!bits.empty());
+ assert(total_bits > 0);
assert(total_bits <= MMB_MAX_BITS);
-
- DEBUG_PRINTF("building sparse iter for %zu of %u bits\n",
- bits.size(), total_bits);
-
- s32 ks = (total_bits > 1 ? mmbit_keyshift(total_bits) : 0);
-
- // Construct an intermediate tree
- TreeNode tree;
- for (const auto &bit : bits) {
- assert(bit < total_bits);
- addNode(tree, 0, bit, ks, 0);
- }
-
- // From our intermediate tree, lay the data out with a breadth-first walk
- bfs(out, tree);
- assert(!out.empty());
-
-#ifdef DEBUG
- DEBUG_PRINTF("dump of iterator tree:\n");
- for (size_t i = 0; i < out.size(); ++i) {
- printf(" %zu:\tmask=0x%08llx, val=%u\n", i, out[i].mask, out[i].val);
- }
-#endif
-
- DEBUG_PRINTF("iter has %zu records\n", out.size());
+
+ DEBUG_PRINTF("building sparse iter for %zu of %u bits\n",
+ bits.size(), total_bits);
+
+ s32 ks = (total_bits > 1 ? mmbit_keyshift(total_bits) : 0);
+
+ // Construct an intermediate tree
+ TreeNode tree;
+ for (const auto &bit : bits) {
+ assert(bit < total_bits);
+ addNode(tree, 0, bit, ks, 0);
+ }
+
+ // From our intermediate tree, lay the data out with a breadth-first walk
+ bfs(out, tree);
+ assert(!out.empty());
+
+#ifdef DEBUG
+ DEBUG_PRINTF("dump of iterator tree:\n");
+ for (size_t i = 0; i < out.size(); ++i) {
+ printf(" %zu:\tmask=0x%08llx, val=%u\n", i, out[i].mask, out[i].val);
+ }
+#endif
+
+ DEBUG_PRINTF("iter has %zu records\n", out.size());
return out;
-}
-
-template<typename T>
-static
-void add_scatter(vector<T> *out, u32 offset, u64a mask) {
+}
+
+template<typename T>
+static
+void add_scatter(vector<T> *out, u32 offset, u64a mask) {
out->emplace_back();
T &su = out->back();
- memset(&su, 0, sizeof(su));
- su.offset = offset;
- su.val = mask;
- DEBUG_PRINTF("add %llu at offset %u\n", mask, offset);
-}
-
-static
-u32 mmbit_get_level_root_offset(u32 level) {
- return mmbit_root_offset_from_level[level] * sizeof(MMB_TYPE);
-}
-
-void mmbBuildInitRangePlan(u32 total_bits, u32 begin, u32 end,
- scatter_plan_raw *out) {
- DEBUG_PRINTF("building scatter plan for [%u, %u]/%u\n", begin, end,
- total_bits);
- if (!total_bits) {
- return;
- }
-
- if (total_bits <= MMB_FLAT_MAX_BITS) {
- // Handle flat model cases: first a bunch of 64-bit full-sized blocks,
- // then a single runt block at the end.
- u32 dest = 0; // dest offset
- u32 bits = total_bits;
- u32 base = 0;
- for (; bits > 64; bits -= 64, base += 64, dest += 8) {
- MMB_TYPE mask = get_flat_masks(base, begin, end);
- add_scatter(&out->p_u64a, dest, mask);
- }
-
- // Last chunk.
- assert(bits > 0 && bits <= 64);
-
- MMB_TYPE mask = get_flat_masks(base, begin, end);
- if (bits <= 8) {
- add_scatter(&out->p_u8, dest + 0, mask);
- } else if (bits <= 16) {
- add_scatter(&out->p_u16, dest + 0, mask);
- } else if (bits <= 24) {
- add_scatter(&out->p_u16, dest + 0, mask);
- add_scatter(&out->p_u8, dest + 2, mask >> 16);
- } else if (bits <= 32) {
- add_scatter(&out->p_u32, dest + 0, mask);
- } else if (bits <= 40) {
- add_scatter(&out->p_u32, dest + 0, mask);
- add_scatter(&out->p_u8, dest + 4, mask >> 32);
- } else if (bits <= 48) {
- add_scatter(&out->p_u32, dest + 0, mask);
- add_scatter(&out->p_u16, dest + 4, mask >> 32);
- } else if (bits <= 56) {
- add_scatter(&out->p_u32, dest + 0, mask);
- add_scatter(&out->p_u16, dest + 4, mask >> 32);
- add_scatter(&out->p_u8, dest + 6, mask >> 48);
- } else {
- add_scatter(&out->p_u64a, dest + 0, mask);
- }
- return;
- }
-
- /* handle the multilevel case */
- s32 ks = mmbit_keyshift(total_bits);
- u32 level = 0;
- assert(sizeof(MMB_TYPE) == sizeof(u64a));
-
- if (begin == end) {
- add_scatter(&out->p_u64a, 0, 0);
- return;
- }
-
- for (;;) {
- u32 block_offset = mmbit_get_level_root_offset(level);
- u32 k1 = begin >> ks, k2 = end >> ks;
-
- // Summary blocks need to account for the runt block on the end.
- if ((k2 << ks) != end) {
- k2++;
- }
-
- // Partial block to deal with beginning.
+ memset(&su, 0, sizeof(su));
+ su.offset = offset;
+ su.val = mask;
+ DEBUG_PRINTF("add %llu at offset %u\n", mask, offset);
+}
+
+static
+u32 mmbit_get_level_root_offset(u32 level) {
+ return mmbit_root_offset_from_level[level] * sizeof(MMB_TYPE);
+}
+
+void mmbBuildInitRangePlan(u32 total_bits, u32 begin, u32 end,
+ scatter_plan_raw *out) {
+ DEBUG_PRINTF("building scatter plan for [%u, %u]/%u\n", begin, end,
+ total_bits);
+ if (!total_bits) {
+ return;
+ }
+
+ if (total_bits <= MMB_FLAT_MAX_BITS) {
+ // Handle flat model cases: first a bunch of 64-bit full-sized blocks,
+ // then a single runt block at the end.
+ u32 dest = 0; // dest offset
+ u32 bits = total_bits;
+ u32 base = 0;
+ for (; bits > 64; bits -= 64, base += 64, dest += 8) {
+ MMB_TYPE mask = get_flat_masks(base, begin, end);
+ add_scatter(&out->p_u64a, dest, mask);
+ }
+
+ // Last chunk.
+ assert(bits > 0 && bits <= 64);
+
+ MMB_TYPE mask = get_flat_masks(base, begin, end);
+ if (bits <= 8) {
+ add_scatter(&out->p_u8, dest + 0, mask);
+ } else if (bits <= 16) {
+ add_scatter(&out->p_u16, dest + 0, mask);
+ } else if (bits <= 24) {
+ add_scatter(&out->p_u16, dest + 0, mask);
+ add_scatter(&out->p_u8, dest + 2, mask >> 16);
+ } else if (bits <= 32) {
+ add_scatter(&out->p_u32, dest + 0, mask);
+ } else if (bits <= 40) {
+ add_scatter(&out->p_u32, dest + 0, mask);
+ add_scatter(&out->p_u8, dest + 4, mask >> 32);
+ } else if (bits <= 48) {
+ add_scatter(&out->p_u32, dest + 0, mask);
+ add_scatter(&out->p_u16, dest + 4, mask >> 32);
+ } else if (bits <= 56) {
+ add_scatter(&out->p_u32, dest + 0, mask);
+ add_scatter(&out->p_u16, dest + 4, mask >> 32);
+ add_scatter(&out->p_u8, dest + 6, mask >> 48);
+ } else {
+ add_scatter(&out->p_u64a, dest + 0, mask);
+ }
+ return;
+ }
+
+ /* handle the multilevel case */
+ s32 ks = mmbit_keyshift(total_bits);
+ u32 level = 0;
+ assert(sizeof(MMB_TYPE) == sizeof(u64a));
+
+ if (begin == end) {
+ add_scatter(&out->p_u64a, 0, 0);
+ return;
+ }
+
+ for (;;) {
+ u32 block_offset = mmbit_get_level_root_offset(level);
+ u32 k1 = begin >> ks, k2 = end >> ks;
+
+ // Summary blocks need to account for the runt block on the end.
+ if ((k2 << ks) != end) {
+ k2++;
+ }
+
+ // Partial block to deal with beginning.
block_offset += (k1 / MMB_KEY_BITS) * sizeof(MMB_TYPE);
- if (k1 % MMB_KEY_BITS) {
- u32 idx = k1 / MMB_KEY_BITS;
- u32 block_end = (idx + 1) * MMB_KEY_BITS;
-
- // Because k1 % MMB_KEY_BITS != 0, we can avoid checking edge cases
- // here (see the branch in mmb_mask_zero_to).
- MMB_TYPE mask = (-MMB_ONE) << (k1 % MMB_KEY_BITS);
-
- if (k2 < block_end) {
- assert(k2 % MMB_KEY_BITS);
- mask &= mmb_mask_zero_to_nocheck(k2 % MMB_KEY_BITS);
- add_scatter(&out->p_u64a, block_offset, mask);
- goto next_level;
- } else {
- add_scatter(&out->p_u64a, block_offset, mask);
- k1 = block_end;
- block_offset += sizeof(MMB_TYPE);
- }
- }
-
- // Write blocks filled with ones until we get to the last block.
- for (; k1 < (k2 & ~MMB_KEY_MASK); k1 += MMB_KEY_BITS) {
- add_scatter(&out->p_u64a, block_offset, -MMB_ONE);
- block_offset += sizeof(MMB_TYPE);
- }
-
- // Final block.
- if (likely(k1 < k2)) {
- // Again, if k2 was at a block boundary, it would have been handled
- // by the previous loop, so we know k2 % MMB_KEY_BITS != 0 and can
- // avoid the branch in mmb_mask_zero_to here.
- assert(k2 % MMB_KEY_BITS);
- MMB_TYPE mask = mmb_mask_zero_to_nocheck(k2 % MMB_KEY_BITS);
-
- add_scatter(&out->p_u64a, block_offset, mask);
- }
-
- next_level:
- if (ks == 0) {
- break; // Last level is done, finished.
- }
-
- ks -= MMB_KEY_SHIFT;
- level++;
- }
-}
-
-void mmbBuildClearPlan(u32 total_bits, scatter_plan_raw *out) {
- return mmbBuildInitRangePlan(total_bits, 0, 0, out);
-}
-
-} // namespace ue2
+ if (k1 % MMB_KEY_BITS) {
+ u32 idx = k1 / MMB_KEY_BITS;
+ u32 block_end = (idx + 1) * MMB_KEY_BITS;
+
+ // Because k1 % MMB_KEY_BITS != 0, we can avoid checking edge cases
+ // here (see the branch in mmb_mask_zero_to).
+ MMB_TYPE mask = (-MMB_ONE) << (k1 % MMB_KEY_BITS);
+
+ if (k2 < block_end) {
+ assert(k2 % MMB_KEY_BITS);
+ mask &= mmb_mask_zero_to_nocheck(k2 % MMB_KEY_BITS);
+ add_scatter(&out->p_u64a, block_offset, mask);
+ goto next_level;
+ } else {
+ add_scatter(&out->p_u64a, block_offset, mask);
+ k1 = block_end;
+ block_offset += sizeof(MMB_TYPE);
+ }
+ }
+
+ // Write blocks filled with ones until we get to the last block.
+ for (; k1 < (k2 & ~MMB_KEY_MASK); k1 += MMB_KEY_BITS) {
+ add_scatter(&out->p_u64a, block_offset, -MMB_ONE);
+ block_offset += sizeof(MMB_TYPE);
+ }
+
+ // Final block.
+ if (likely(k1 < k2)) {
+ // Again, if k2 was at a block boundary, it would have been handled
+ // by the previous loop, so we know k2 % MMB_KEY_BITS != 0 and can
+ // avoid the branch in mmb_mask_zero_to here.
+ assert(k2 % MMB_KEY_BITS);
+ MMB_TYPE mask = mmb_mask_zero_to_nocheck(k2 % MMB_KEY_BITS);
+
+ add_scatter(&out->p_u64a, block_offset, mask);
+ }
+
+ next_level:
+ if (ks == 0) {
+ break; // Last level is done, finished.
+ }
+
+ ks -= MMB_KEY_SHIFT;
+ level++;
+ }
+}
+
+void mmbBuildClearPlan(u32 total_bits, scatter_plan_raw *out) {
+ return mmbBuildInitRangePlan(total_bits, 0, 0, out);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/util/multibit_build.h b/contrib/libs/hyperscan/src/util/multibit_build.h
index 595350a59f..24f1bb55b0 100644
--- a/contrib/libs/hyperscan/src/util/multibit_build.h
+++ b/contrib/libs/hyperscan/src/util/multibit_build.h
@@ -1,49 +1,49 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Multibit: build code (for sparse iterators)
- */
-
-#ifndef MULTIBIT_BUILD_H
-#define MULTIBIT_BUILD_H
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Multibit: build code (for sparse iterators)
+ */
+
+#ifndef MULTIBIT_BUILD_H
+#define MULTIBIT_BUILD_H
+
#include "hs_common.h"
-#include "multibit_internal.h"
+#include "multibit_internal.h"
#include "hash.h"
-
-#include <vector>
-
+
+#include <vector>
+
inline
bool operator==(const mmbit_sparse_iter &a, const mmbit_sparse_iter &b) {
return a.mask == b.mask && a.val == b.val;
-}
-
+}
+
namespace std {
template<>
@@ -55,8 +55,8 @@ struct hash<mmbit_sparse_iter> {
} // namespace std
-namespace ue2 {
-
+namespace ue2 {
+
/**
* \brief Return the size in bytes of a multibit that can store the given
* number of bits.
@@ -66,17 +66,17 @@ namespace ue2 {
*/
u32 mmbit_size(u32 total_bits);
-/** \brief Construct a sparse iterator over the values in \a bits for a
- * multibit of size \a total_bits. */
+/** \brief Construct a sparse iterator over the values in \a bits for a
+ * multibit of size \a total_bits. */
std::vector<mmbit_sparse_iter>
mmbBuildSparseIterator(const std::vector<u32> &bits, u32 total_bits);
-
-struct scatter_plan_raw;
-
-void mmbBuildInitRangePlan(u32 total_bits, u32 begin, u32 end,
- scatter_plan_raw *out);
-void mmbBuildClearPlan(u32 total_bits, scatter_plan_raw *out);
-
-} // namespace ue2
-
-#endif // MULTIBIT_BUILD_H
+
+struct scatter_plan_raw;
+
+void mmbBuildInitRangePlan(u32 total_bits, u32 begin, u32 end,
+ scatter_plan_raw *out);
+void mmbBuildClearPlan(u32 total_bits, scatter_plan_raw *out);
+
+} // namespace ue2
+
+#endif // MULTIBIT_BUILD_H
diff --git a/contrib/libs/hyperscan/src/util/multibit_internal.h b/contrib/libs/hyperscan/src/util/multibit_internal.h
index 0f74442e84..350f3bfd47 100644
--- a/contrib/libs/hyperscan/src/util/multibit_internal.h
+++ b/contrib/libs/hyperscan/src/util/multibit_internal.h
@@ -1,81 +1,81 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Multibit: data structures.
- *
- * If all you need is the sizes of multibit's few structures, then including
- * this file is a much better idea than including all of multibit.h.
- */
-#ifndef MULTIBIT_INTERNAL_H
-#define MULTIBIT_INTERNAL_H
-
-#include "ue2common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** \brief Sentinel value meaning "no key found". */
-#define MMB_INVALID 0xffffffffu
-
-typedef u64a MMB_TYPE; /**< Basic block type for mmbit operations. */
-#define MMB_MAX_LEVEL 6 /**< Maximum level in the mmbit pyramid. */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Multibit: data structures.
+ *
+ * If all you need is the sizes of multibit's few structures, then including
+ * this file is a much better idea than including all of multibit.h.
+ */
+#ifndef MULTIBIT_INTERNAL_H
+#define MULTIBIT_INTERNAL_H
+
+#include "ue2common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Sentinel value meaning "no key found". */
+#define MMB_INVALID 0xffffffffu
+
+typedef u64a MMB_TYPE; /**< Basic block type for mmbit operations. */
+#define MMB_MAX_LEVEL 6 /**< Maximum level in the mmbit pyramid. */
+
/** \brief Maximum number of keys (bits) in a multibit. */
#define MMB_MAX_BITS (1U << 31)
-/** \brief Sparse iterator record type.
- *
- * A sparse iterator is a tree of these records, where val identifies the
- * offset of the result for leaf nodes and points to the next record for
- * intermediate nodes. Built by the code in multibit_build.cpp.
- */
-struct mmbit_sparse_iter {
- MMB_TYPE mask;
- u32 val;
-};
-
-/** \brief Sparse iterator runtime state type.
- *
- * An array of these records (one per "level" in the multibit pyramid) is used
- * to store the current iteration state.
- */
-struct mmbit_sparse_state {
- MMB_TYPE mask; //!< \brief masked last block read at this level.
- u32 itkey; //!< \brief iterator offset for this level.
-};
-
-/** \brief Maximum number of \ref mmbit_sparse_state that could be needed. */
-#define MAX_SPARSE_ITER_STATES (6 + 1)
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // MULTIBIT_INTERNAL_H
+/** \brief Sparse iterator record type.
+ *
+ * A sparse iterator is a tree of these records, where val identifies the
+ * offset of the result for leaf nodes and points to the next record for
+ * intermediate nodes. Built by the code in multibit_build.cpp.
+ */
+struct mmbit_sparse_iter {
+ MMB_TYPE mask;
+ u32 val;
+};
+
+/** \brief Sparse iterator runtime state type.
+ *
+ * An array of these records (one per "level" in the multibit pyramid) is used
+ * to store the current iteration state.
+ */
+struct mmbit_sparse_state {
+ MMB_TYPE mask; //!< \brief masked last block read at this level.
+ u32 itkey; //!< \brief iterator offset for this level.
+};
+
+/** \brief Maximum number of \ref mmbit_sparse_state that could be needed. */
+#define MAX_SPARSE_ITER_STATES (6 + 1)
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // MULTIBIT_INTERNAL_H
diff --git a/contrib/libs/hyperscan/src/util/order_check.h b/contrib/libs/hyperscan/src/util/order_check.h
index 1a8fc2a3fa..33f3869d73 100644
--- a/contrib/libs/hyperscan/src/util/order_check.h
+++ b/contrib/libs/hyperscan/src/util/order_check.h
@@ -1,37 +1,37 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* for implementing operator<, assumes objects are a and b */
-#define ORDER_CHECK(field) do { \
- if (a.field < b.field) { \
- return 1; \
- } \
- if (b.field < a.field) { \
- return 0; \
- } \
- } while (0)
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* for implementing operator<, assumes objects are a and b */
+#define ORDER_CHECK(field) do { \
+ if (a.field < b.field) { \
+ return 1; \
+ } \
+ if (b.field < a.field) { \
+ return 0; \
+ } \
+ } while (0)
diff --git a/contrib/libs/hyperscan/src/util/pack_bits.h b/contrib/libs/hyperscan/src/util/pack_bits.h
index 301c2664c0..800ce25ec7 100644
--- a/contrib/libs/hyperscan/src/util/pack_bits.h
+++ b/contrib/libs/hyperscan/src/util/pack_bits.h
@@ -1,227 +1,227 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Functions for packing/unpacking arrays.
- */
-
-#ifndef UTIL_PACK_BITS_H
-#define UTIL_PACK_BITS_H
-
-#include "ue2common.h"
-#include "unaligned.h"
-#include "partial_store.h"
-
-/**
- * \brief Pack bits from an array of 32-bit words into \a out.
- *
- * \param out Output array. Must be large enough to store sum(bits).
- * \param v Input array.
- * \param bits Number of low bits in the corresponding element of \a v to pack.
- * \param elements Size of the \a v and \a bits arrays.
- */
-static really_inline
-void pack_bits_32(char *out, const u32 *v, const u32 *bits,
- const unsigned int elements);
-
-/**
- * \brief Pack bits from an array of 64-bit words into \a out.
- *
- * \param out Output array. Must be large enough to store sum(bits).
- * \param v Input array.
- * \param bits Number of low bits in the corresponding element of \a v to pack.
- * \param elements Size of the \a v and \a bits arrays.
- */
-static really_inline
-void pack_bits_64(char *out, const u64a *v, const u32 *bits,
- const unsigned int elements);
-
-/**
- * \brief Unpack bits into an array of 32-bit words according to the counts
- * given.
- *
- * \param v Output array.
- * \param in Packed input array.
- * \param bits Number of bits to unpack into the corresponding element of \a v.
- * \param elements Size of the \a v and \a bits arrays.
- */
-static really_inline
-void unpack_bits_32(u32 *v, const u8 *in, const u32 *bits,
- const unsigned int elements);
-
-/**
- * \brief Unpack bits into an array of 64-bit words according to the counts
- * given.
- *
- * \param v Output array.
- * \param in Packed input array.
- * \param bits Number of bits to unpack into the corresponding element of \a v.
- * \param elements Size of the \a v and \a bits arrays.
- */
-static really_inline
-void unpack_bits_64(u64a *v, const u8 *in, const u32 *bits,
- const unsigned int elements);
-
-/*
- * Inline implementations follow.
- */
-
-static really_inline
-void pack_bits_32(char *out, const u32 *v, const u32 *bits,
- const unsigned int elements) {
- u32 write = 0; // accumulator
- u32 idx = 0; // acc holds this many bits
-
- for (unsigned int i = 0; i < elements; i++) {
- assert(bits[i] <= 32);
- write |= (v[i] << idx);
- idx += bits[i];
- if (idx >= 32) {
- unaligned_store_u32(out, write);
- out += 4;
- idx -= 32;
- u32 leftover = bits[i] - idx;
- if (leftover == 32) {
- write = 0;
- } else {
- assert(leftover < 32);
- write = v[i] >> leftover;
- }
- }
- }
-
- // There might be a write left over.
- partial_store_u32(out, write, (idx + 7) / 8);
-}
-
-static really_inline
-void pack_bits_64(char *out, const u64a *v, const u32 *bits,
- const unsigned int elements) {
- u64a write = 0; // accumulator
- u32 idx = 0; // acc holds this many bits
-
- for (unsigned int i = 0; i < elements; i++) {
- assert(bits[i] <= 64);
- write |= (v[i] << idx);
- idx += bits[i];
- if (idx >= 64) {
- unaligned_store_u64a(out, write);
- out += 8;
- idx -= 64;
- u32 leftover = bits[i] - idx;
- if (leftover == 64) {
- write = 0;
- } else {
- assert(leftover < 64);
- write = v[i] >> leftover;
- }
- }
- }
-
- // There might be a write left over.
- DEBUG_PRINTF("partial store of idx=%u\n", idx);
- partial_store_u64a(out, write, (idx + 7) / 8);
-}
-
-static really_inline
-void unpack_bits_32(u32 *v, const u8 *in, const u32 *bits,
- const unsigned int elements) {
- u32 used = 0; // bits used from *in
-
- for (unsigned int i = 0; i < elements; i++) {
- assert(bits[i] <= 32);
- u32 v_out = 0; // accumulator for v[i]
- u32 b = bits[i]; // bits left to read for v[i]
- u32 vidx = 0; // bits written to v[i]
-
- while (b) {
- u32 read = *in >> used;
- u32 bits_read = 8 - used;
-
- if (b <= bits_read) {
- u32 mask = read & ((1U << b) - 1);
- v_out |= mask << vidx;
- vidx += b;
- used += b;
- b = 0;
- if (used < 8) {
- continue; // more from this *in
- }
- } else {
- v_out |= read << vidx;
- vidx += bits_read;
- b -= bits_read;
- }
-
- used = 0;
- in++;
- }
-
- v[i] = v_out;
- }
-}
-
-static really_inline
-void unpack_bits_64(u64a *v, const u8 *in, const u32 *bits,
- const unsigned int elements) {
- u32 used = 0; // bits used from *in
-
- for (unsigned int i = 0; i < elements; i++) {
- assert(bits[i] <= 64);
- u64a v_out = 0; // accumulator for v[i]
- u32 b = bits[i]; // bits left to read for v[i]
- u32 vidx = 0; // bits written to v[i]
-
- while (b) {
- u64a read = *in >> used;
- u32 bits_read = 8 - used;
-
- if (b <= bits_read) {
- u64a mask = read & ((1U << b) - 1);
- v_out |= mask << vidx;
- vidx += b;
- used += b;
- b = 0;
- if (used < 8) {
- continue; // more from this *in
- }
- } else {
- v_out |= read << vidx;
- vidx += bits_read;
- b -= bits_read;
- }
-
- used = 0;
- in++;
- }
-
- v[i] = v_out;
- }
-}
-
-#endif // UTIL_PACK_BITS_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Functions for packing/unpacking arrays.
+ */
+
+#ifndef UTIL_PACK_BITS_H
+#define UTIL_PACK_BITS_H
+
+#include "ue2common.h"
+#include "unaligned.h"
+#include "partial_store.h"
+
+/**
+ * \brief Pack bits from an array of 32-bit words into \a out.
+ *
+ * \param out Output array. Must be large enough to store sum(bits).
+ * \param v Input array.
+ * \param bits Number of low bits in the corresponding element of \a v to pack.
+ * \param elements Size of the \a v and \a bits arrays.
+ */
+static really_inline
+void pack_bits_32(char *out, const u32 *v, const u32 *bits,
+ const unsigned int elements);
+
+/**
+ * \brief Pack bits from an array of 64-bit words into \a out.
+ *
+ * \param out Output array. Must be large enough to store sum(bits).
+ * \param v Input array.
+ * \param bits Number of low bits in the corresponding element of \a v to pack.
+ * \param elements Size of the \a v and \a bits arrays.
+ */
+static really_inline
+void pack_bits_64(char *out, const u64a *v, const u32 *bits,
+ const unsigned int elements);
+
+/**
+ * \brief Unpack bits into an array of 32-bit words according to the counts
+ * given.
+ *
+ * \param v Output array.
+ * \param in Packed input array.
+ * \param bits Number of bits to unpack into the corresponding element of \a v.
+ * \param elements Size of the \a v and \a bits arrays.
+ */
+static really_inline
+void unpack_bits_32(u32 *v, const u8 *in, const u32 *bits,
+ const unsigned int elements);
+
+/**
+ * \brief Unpack bits into an array of 64-bit words according to the counts
+ * given.
+ *
+ * \param v Output array.
+ * \param in Packed input array.
+ * \param bits Number of bits to unpack into the corresponding element of \a v.
+ * \param elements Size of the \a v and \a bits arrays.
+ */
+static really_inline
+void unpack_bits_64(u64a *v, const u8 *in, const u32 *bits,
+ const unsigned int elements);
+
+/*
+ * Inline implementations follow.
+ */
+
+static really_inline
+void pack_bits_32(char *out, const u32 *v, const u32 *bits,
+ const unsigned int elements) {
+ u32 write = 0; // accumulator
+ u32 idx = 0; // acc holds this many bits
+
+ for (unsigned int i = 0; i < elements; i++) {
+ assert(bits[i] <= 32);
+ write |= (v[i] << idx);
+ idx += bits[i];
+ if (idx >= 32) {
+ unaligned_store_u32(out, write);
+ out += 4;
+ idx -= 32;
+ u32 leftover = bits[i] - idx;
+ if (leftover == 32) {
+ write = 0;
+ } else {
+ assert(leftover < 32);
+ write = v[i] >> leftover;
+ }
+ }
+ }
+
+ // There might be a write left over.
+ partial_store_u32(out, write, (idx + 7) / 8);
+}
+
+static really_inline
+void pack_bits_64(char *out, const u64a *v, const u32 *bits,
+ const unsigned int elements) {
+ u64a write = 0; // accumulator
+ u32 idx = 0; // acc holds this many bits
+
+ for (unsigned int i = 0; i < elements; i++) {
+ assert(bits[i] <= 64);
+ write |= (v[i] << idx);
+ idx += bits[i];
+ if (idx >= 64) {
+ unaligned_store_u64a(out, write);
+ out += 8;
+ idx -= 64;
+ u32 leftover = bits[i] - idx;
+ if (leftover == 64) {
+ write = 0;
+ } else {
+ assert(leftover < 64);
+ write = v[i] >> leftover;
+ }
+ }
+ }
+
+ // There might be a write left over.
+ DEBUG_PRINTF("partial store of idx=%u\n", idx);
+ partial_store_u64a(out, write, (idx + 7) / 8);
+}
+
+static really_inline
+void unpack_bits_32(u32 *v, const u8 *in, const u32 *bits,
+ const unsigned int elements) {
+ u32 used = 0; // bits used from *in
+
+ for (unsigned int i = 0; i < elements; i++) {
+ assert(bits[i] <= 32);
+ u32 v_out = 0; // accumulator for v[i]
+ u32 b = bits[i]; // bits left to read for v[i]
+ u32 vidx = 0; // bits written to v[i]
+
+ while (b) {
+ u32 read = *in >> used;
+ u32 bits_read = 8 - used;
+
+ if (b <= bits_read) {
+ u32 mask = read & ((1U << b) - 1);
+ v_out |= mask << vidx;
+ vidx += b;
+ used += b;
+ b = 0;
+ if (used < 8) {
+ continue; // more from this *in
+ }
+ } else {
+ v_out |= read << vidx;
+ vidx += bits_read;
+ b -= bits_read;
+ }
+
+ used = 0;
+ in++;
+ }
+
+ v[i] = v_out;
+ }
+}
+
+static really_inline
+void unpack_bits_64(u64a *v, const u8 *in, const u32 *bits,
+ const unsigned int elements) {
+ u32 used = 0; // bits used from *in
+
+ for (unsigned int i = 0; i < elements; i++) {
+ assert(bits[i] <= 64);
+ u64a v_out = 0; // accumulator for v[i]
+ u32 b = bits[i]; // bits left to read for v[i]
+ u32 vidx = 0; // bits written to v[i]
+
+ while (b) {
+ u64a read = *in >> used;
+ u32 bits_read = 8 - used;
+
+ if (b <= bits_read) {
+ u64a mask = read & ((1U << b) - 1);
+ v_out |= mask << vidx;
+ vidx += b;
+ used += b;
+ b = 0;
+ if (used < 8) {
+ continue; // more from this *in
+ }
+ } else {
+ v_out |= read << vidx;
+ vidx += bits_read;
+ b -= bits_read;
+ }
+
+ used = 0;
+ in++;
+ }
+
+ v[i] = v_out;
+ }
+}
+
+#endif // UTIL_PACK_BITS_H
diff --git a/contrib/libs/hyperscan/src/util/partial_store.h b/contrib/libs/hyperscan/src/util/partial_store.h
index 8ee23bdb1e..a49d1fae1d 100644
--- a/contrib/libs/hyperscan/src/util/partial_store.h
+++ b/contrib/libs/hyperscan/src/util/partial_store.h
@@ -1,163 +1,163 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef PARTIAL_STORE_H
-#define PARTIAL_STORE_H
-
-#include "ue2common.h"
-#include "unaligned.h"
-
-/* loads/stores the least significant bytes of the values. */
-
-static really_inline
-void partial_store_u32(void *ptr, u32 value, u32 numBytes) {
- assert(numBytes <= 4);
- switch (numBytes) {
- case 4:
- unaligned_store_u32(ptr, value);
- break;
- case 3:
- unaligned_store_u16(ptr, (u16)value);
- *((u8 *)ptr + 2) = (u8)(value >> 16);
- break;
- case 2:
- unaligned_store_u16(ptr, (u16)value);
- break;
- case 1:
- *(u8 *)ptr = (u8)value;
- break;
- case 0:
- break;
- }
-}
-
-static really_inline
-u32 partial_load_u32(const void *ptr, u32 numBytes) {
- u32 value;
- assert(numBytes <= 4);
- switch (numBytes) {
- case 4:
- value = unaligned_load_u32(ptr);
- return value;
- case 3:
- value = unaligned_load_u16(ptr);
- value |= ((u32)(*((const u8 *)ptr + 2)) << 16);
- return value;
- case 2:
- value = unaligned_load_u16(ptr);
- return value;
- case 1:
- value = *(const u8 *)ptr;
- return value;
- case 0:
- break;
- }
-
- return 0;
-}
-
-static really_inline
-void partial_store_u64a(void *ptr, u64a value, u32 numBytes) {
- assert(numBytes <= 8);
- switch (numBytes) {
- case 8:
- unaligned_store_u64a(ptr, value);
- break;
- case 7:
- unaligned_store_u32(ptr, (u32)value);
- unaligned_store_u16((u8 *)ptr + 4, (u16)(value >> 32));
- *((u8 *)ptr + 6) = (u8)(value >> 48);
- break;
- case 6:
- unaligned_store_u32(ptr, (u32)value);
- unaligned_store_u16((u8 *)ptr + 4, (u16)(value >> 32));
- break;
- case 5:
- unaligned_store_u32(ptr, (u32)value);
- *((u8 *)ptr + 4) = (u8)(value >> 32);
- break;
- case 4:
- unaligned_store_u32(ptr, (u32)value);
- break;
- case 3:
- unaligned_store_u16(ptr, (u16)value);
- *((u8 *)ptr + 2) = (u8)(value >> 16);
- break;
- case 2:
- unaligned_store_u16(ptr, (u16)value);
- break;
- case 1:
- *(u8 *)ptr = (u8)value;
- break;
- case 0:
- break;
- }
-}
-
-static really_inline
-u64a partial_load_u64a(const void *ptr, u32 numBytes) {
- u64a value;
- assert(numBytes <= 8);
- switch (numBytes) {
- case 8:
- value = unaligned_load_u64a(ptr);
- return value;
- case 7:
- value = unaligned_load_u32(ptr);
- value |= (u64a)unaligned_load_u16((const u8 *)ptr + 4) << 32;
- value |= (u64a)(*((const u8 *)ptr + 6)) << 48;
- return value;
- case 6:
- value = unaligned_load_u32(ptr);
- value |= (u64a)unaligned_load_u16((const u8 *)ptr + 4) << 32;
- return value;
- case 5:
- value = unaligned_load_u32(ptr);
- value |= (u64a)(*((const u8 *)ptr + 4)) << 32;
- return value;
- case 4:
- value = unaligned_load_u32(ptr);
- return value;
- case 3:
- value = unaligned_load_u16(ptr);
- value |= (u64a)(*((const u8 *)ptr + 2)) << 16;
- return value;
- case 2:
- value = unaligned_load_u16(ptr);
- return value;
- case 1:
- value = *(const u8 *)ptr;
- return value;
- case 0:
- break;
- }
-
- return 0;
-}
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef PARTIAL_STORE_H
+#define PARTIAL_STORE_H
+
+#include "ue2common.h"
+#include "unaligned.h"
+
+/* loads/stores the least significant bytes of the values. */
+
+static really_inline
+void partial_store_u32(void *ptr, u32 value, u32 numBytes) {
+ assert(numBytes <= 4);
+ switch (numBytes) {
+ case 4:
+ unaligned_store_u32(ptr, value);
+ break;
+ case 3:
+ unaligned_store_u16(ptr, (u16)value);
+ *((u8 *)ptr + 2) = (u8)(value >> 16);
+ break;
+ case 2:
+ unaligned_store_u16(ptr, (u16)value);
+ break;
+ case 1:
+ *(u8 *)ptr = (u8)value;
+ break;
+ case 0:
+ break;
+ }
+}
+
+static really_inline
+u32 partial_load_u32(const void *ptr, u32 numBytes) {
+ u32 value;
+ assert(numBytes <= 4);
+ switch (numBytes) {
+ case 4:
+ value = unaligned_load_u32(ptr);
+ return value;
+ case 3:
+ value = unaligned_load_u16(ptr);
+ value |= ((u32)(*((const u8 *)ptr + 2)) << 16);
+ return value;
+ case 2:
+ value = unaligned_load_u16(ptr);
+ return value;
+ case 1:
+ value = *(const u8 *)ptr;
+ return value;
+ case 0:
+ break;
+ }
+
+ return 0;
+}
+
+static really_inline
+void partial_store_u64a(void *ptr, u64a value, u32 numBytes) {
+ assert(numBytes <= 8);
+ switch (numBytes) {
+ case 8:
+ unaligned_store_u64a(ptr, value);
+ break;
+ case 7:
+ unaligned_store_u32(ptr, (u32)value);
+ unaligned_store_u16((u8 *)ptr + 4, (u16)(value >> 32));
+ *((u8 *)ptr + 6) = (u8)(value >> 48);
+ break;
+ case 6:
+ unaligned_store_u32(ptr, (u32)value);
+ unaligned_store_u16((u8 *)ptr + 4, (u16)(value >> 32));
+ break;
+ case 5:
+ unaligned_store_u32(ptr, (u32)value);
+ *((u8 *)ptr + 4) = (u8)(value >> 32);
+ break;
+ case 4:
+ unaligned_store_u32(ptr, (u32)value);
+ break;
+ case 3:
+ unaligned_store_u16(ptr, (u16)value);
+ *((u8 *)ptr + 2) = (u8)(value >> 16);
+ break;
+ case 2:
+ unaligned_store_u16(ptr, (u16)value);
+ break;
+ case 1:
+ *(u8 *)ptr = (u8)value;
+ break;
+ case 0:
+ break;
+ }
+}
+
+static really_inline
+u64a partial_load_u64a(const void *ptr, u32 numBytes) {
+ u64a value;
+ assert(numBytes <= 8);
+ switch (numBytes) {
+ case 8:
+ value = unaligned_load_u64a(ptr);
+ return value;
+ case 7:
+ value = unaligned_load_u32(ptr);
+ value |= (u64a)unaligned_load_u16((const u8 *)ptr + 4) << 32;
+ value |= (u64a)(*((const u8 *)ptr + 6)) << 48;
+ return value;
+ case 6:
+ value = unaligned_load_u32(ptr);
+ value |= (u64a)unaligned_load_u16((const u8 *)ptr + 4) << 32;
+ return value;
+ case 5:
+ value = unaligned_load_u32(ptr);
+ value |= (u64a)(*((const u8 *)ptr + 4)) << 32;
+ return value;
+ case 4:
+ value = unaligned_load_u32(ptr);
+ return value;
+ case 3:
+ value = unaligned_load_u16(ptr);
+ value |= (u64a)(*((const u8 *)ptr + 2)) << 16;
+ return value;
+ case 2:
+ value = unaligned_load_u16(ptr);
+ return value;
+ case 1:
+ value = *(const u8 *)ptr;
+ return value;
+ case 0:
+ break;
+ }
+
+ return 0;
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/partitioned_set.h b/contrib/libs/hyperscan/src/util/partitioned_set.h
index e6f907edd1..8a4d3dd9e1 100644
--- a/contrib/libs/hyperscan/src/util/partitioned_set.h
+++ b/contrib/libs/hyperscan/src/util/partitioned_set.h
@@ -1,263 +1,263 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef PARTITIONED_SET_H
-#define PARTITIONED_SET_H
-
-#include "container.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef PARTITIONED_SET_H
+#define PARTITIONED_SET_H
+
+#include "container.h"
#include "noncopyable.h"
#include "flat_containers.h"
-#include "ue2common.h"
-
-#include <algorithm>
-#include <vector>
-
-#include <boost/dynamic_bitset.hpp>
-
-namespace ue2 {
-
-static constexpr size_t INVALID_SUBSET = ~(size_t)0;
-
-/**
- * partition_set represents a partitioning of a set of integers [0, n) into
- * disjoint non-empty subsets.
- *
- * The subsets themselves are also indexed by integers.
- *
- * The underlying integer type for the set members is parameterized.
- */
-
-template<typename T>
+#include "ue2common.h"
+
+#include <algorithm>
+#include <vector>
+
+#include <boost/dynamic_bitset.hpp>
+
+namespace ue2 {
+
+static constexpr size_t INVALID_SUBSET = ~(size_t)0;
+
+/**
+ * partition_set represents a partitioning of a set of integers [0, n) into
+ * disjoint non-empty subsets.
+ *
+ * The subsets themselves are also indexed by integers.
+ *
+ * The underlying integer type for the set members is parameterized.
+ */
+
+template<typename T>
class partitioned_set : noncopyable {
-public:
- class subset {
- public:
- typedef typename std::vector<T>::const_iterator const_iterator;
-
- size_t size() const {
- assert(members.size());
- return members.size();
- }
-
- const_iterator begin() const {
- return members.begin();
- }
-
- const_iterator end() const {
- return members.end();
- }
-
- private:
- std::vector<T> members; /**< sorted members of the subset */
-
- friend class partitioned_set;
- };
-
- /** returns the number of subsets in the partition */
- size_t size() const { return subsets.size(); }
-
- /** returns the subset with the given index */
- const subset &operator[](size_t subset_index) const {
- assert(subset_index < size());
- return subsets[subset_index];
- }
-
- /**
- * Splits the subset with the given subset_index based on whether its
- * members are also members of the splitter set.
- *
- * The smaller of the intersection and difference is placed into a new
- * subset, the index of which is returned. The larger part remains with the
- * subset index.
- *
- * If the set was not split (due to there being no overlap with splitter or
- * being a complete subset), INVALID_SUBSET is returned.
- */
+public:
+ class subset {
+ public:
+ typedef typename std::vector<T>::const_iterator const_iterator;
+
+ size_t size() const {
+ assert(members.size());
+ return members.size();
+ }
+
+ const_iterator begin() const {
+ return members.begin();
+ }
+
+ const_iterator end() const {
+ return members.end();
+ }
+
+ private:
+ std::vector<T> members; /**< sorted members of the subset */
+
+ friend class partitioned_set;
+ };
+
+ /** returns the number of subsets in the partition */
+ size_t size() const { return subsets.size(); }
+
+ /** returns the subset with the given index */
+ const subset &operator[](size_t subset_index) const {
+ assert(subset_index < size());
+ return subsets[subset_index];
+ }
+
+ /**
+ * Splits the subset with the given subset_index based on whether its
+ * members are also members of the splitter set.
+ *
+ * The smaller of the intersection and difference is placed into a new
+ * subset, the index of which is returned. The larger part remains with the
+ * subset index.
+ *
+ * If the set was not split (due to there being no overlap with splitter or
+ * being a complete subset), INVALID_SUBSET is returned.
+ */
size_t split(size_t subset_index, const flat_set<T> &splitter) {
- assert(!splitter.empty());
- if (splitter.empty()) {
- return INVALID_SUBSET;
- }
-
- subset &orig = subsets[subset_index];
-
- assert(orig.size());
-
- split_temp_diff.clear();
- split_temp_inter.clear();
-
- auto sp_it = splitter.begin();
- auto sp_e = splitter.end();
-
- /* subset members are always in sorted order. */
- assert(std::is_sorted(orig.members.begin(), orig.members.end()));
-
- if (orig.members.back() < *sp_it) {
- /* first splitter is greater than all our members */
- return INVALID_SUBSET;
- }
-
- if (orig.members.front() > *splitter.rbegin()) {
- /* last splitter is less than all our members */
- return INVALID_SUBSET;
- }
-
- for (auto it = orig.members.begin(); it != orig.members.end(); ++it) {
+ assert(!splitter.empty());
+ if (splitter.empty()) {
+ return INVALID_SUBSET;
+ }
+
+ subset &orig = subsets[subset_index];
+
+ assert(orig.size());
+
+ split_temp_diff.clear();
+ split_temp_inter.clear();
+
+ auto sp_it = splitter.begin();
+ auto sp_e = splitter.end();
+
+ /* subset members are always in sorted order. */
+ assert(std::is_sorted(orig.members.begin(), orig.members.end()));
+
+ if (orig.members.back() < *sp_it) {
+ /* first splitter is greater than all our members */
+ return INVALID_SUBSET;
+ }
+
+ if (orig.members.front() > *splitter.rbegin()) {
+ /* last splitter is less than all our members */
+ return INVALID_SUBSET;
+ }
+
+ for (auto it = orig.members.begin(); it != orig.members.end(); ++it) {
const auto &member = *it;
- assert(member < member_to_subset.size());
-
+ assert(member < member_to_subset.size());
+
sp_it = std::lower_bound(sp_it, sp_e, member);
- if (sp_it == sp_e) {
- split_temp_diff.insert(split_temp_diff.end(), it,
- orig.members.end());
- break;
- }
-
- if (*sp_it > member) {
- split_temp_diff.push_back(member);
- } else {
- split_temp_inter.push_back(member);
- }
- }
-
- assert(split_temp_diff.size() + split_temp_inter.size() == orig.size());
-
- if (split_temp_inter.empty()) {
- assert(split_temp_diff == orig.members);
- return INVALID_SUBSET;
- }
-
- if (split_temp_diff.empty()) {
- assert(split_temp_inter == orig.members);
- return INVALID_SUBSET;
- }
-
- assert(MIN(split_temp_inter[0], split_temp_diff[0]) == orig.members[0]);
-
- /* work out which is the bigger half */
- std::vector<T> *big;
- std::vector<T> *small;
- if (split_temp_diff.size() > split_temp_inter.size()) {
- big = &split_temp_diff;
- small = &split_temp_inter;
- } else {
- big = &split_temp_inter;
- small = &split_temp_diff;
- }
-
- /* larger subset replaces the input subset */
- std::vector<T> temp_i;
- insert(&temp_i, temp_i.end(), *big);
- orig.members.swap(temp_i);
-
- /* smaller subset is placed in the new subset */
- size_t new_index = subsets.size();
- subsets.push_back(subset());
- insert(&subsets.back().members, subsets.back().members.end(), *small);
-
- for (const auto &e : *small) {
- member_to_subset[e] = new_index;
- }
-
- return new_index;
- }
-
- /**
- * Returns all subsets which have a member in keys.
- */
+ if (sp_it == sp_e) {
+ split_temp_diff.insert(split_temp_diff.end(), it,
+ orig.members.end());
+ break;
+ }
+
+ if (*sp_it > member) {
+ split_temp_diff.push_back(member);
+ } else {
+ split_temp_inter.push_back(member);
+ }
+ }
+
+ assert(split_temp_diff.size() + split_temp_inter.size() == orig.size());
+
+ if (split_temp_inter.empty()) {
+ assert(split_temp_diff == orig.members);
+ return INVALID_SUBSET;
+ }
+
+ if (split_temp_diff.empty()) {
+ assert(split_temp_inter == orig.members);
+ return INVALID_SUBSET;
+ }
+
+ assert(MIN(split_temp_inter[0], split_temp_diff[0]) == orig.members[0]);
+
+ /* work out which is the bigger half */
+ std::vector<T> *big;
+ std::vector<T> *small;
+ if (split_temp_diff.size() > split_temp_inter.size()) {
+ big = &split_temp_diff;
+ small = &split_temp_inter;
+ } else {
+ big = &split_temp_inter;
+ small = &split_temp_diff;
+ }
+
+ /* larger subset replaces the input subset */
+ std::vector<T> temp_i;
+ insert(&temp_i, temp_i.end(), *big);
+ orig.members.swap(temp_i);
+
+ /* smaller subset is placed in the new subset */
+ size_t new_index = subsets.size();
+ subsets.push_back(subset());
+ insert(&subsets.back().members, subsets.back().members.end(), *small);
+
+ for (const auto &e : *small) {
+ member_to_subset[e] = new_index;
+ }
+
+ return new_index;
+ }
+
+ /**
+ * Returns all subsets which have a member in keys.
+ */
void find_overlapping(const flat_set<T> &keys,
- std::vector<size_t> *containing) const {
- boost::dynamic_bitset<> seen(subsets.size()); // all zero by default.
-
- for (const auto &key : keys) {
- assert(key < member_to_subset.size());
- size_t sub = member_to_subset[key];
- assert(sub < subsets.size());
- seen.set(sub);
- }
-
- for (size_t i = seen.find_first(); i != seen.npos;
- i = seen.find_next(i)) {
- containing->push_back(i);
- }
- }
-
- /**
- * Creates a partitioned set containing elements [0, state_to_subset.size() )
- *
- * The initial subset that an element belongs to is given by the
- * corresponding entry in state_to_subset. The subsets should be identified
- * by a dense range of indices starting from 0.
- */
- explicit partitioned_set(const std::vector<size_t> &state_to_subset) {
- assert(!state_to_subset.empty());
-
- subsets.reserve(state_to_subset.size());
- member_to_subset.resize(state_to_subset.size());
-
- split_temp_inter.reserve(state_to_subset.size());
- split_temp_diff.reserve(state_to_subset.size());
-
- size_t subset_count = 0;
- for (const auto &sub : state_to_subset) {
- assert(sub != INVALID_SUBSET);
- ENSURE_AT_LEAST(&subset_count, sub + 1);
- }
- assert(subset_count <= state_to_subset.size());
-
- subsets.resize(subset_count);
- for (size_t i = 0; i < state_to_subset.size(); i++) {
- /* ensure that our underlying type is big enough to hold all our
- * set members */
- assert(i == (size_t)(T)i);
-
- size_t sub = state_to_subset[i];
- assert(sub < subsets.size());
-
- member_to_subset[i] = sub;
- subsets[sub].members.push_back(i);
- }
-
- /* none of the subsets should be empty */
- assert(std::all_of(subsets.begin(), subsets.end(),
- [](const subset &sub){ return sub.size() > 0; }));
- }
-
-private:
- std::vector<size_t> member_to_subset;
- std::vector<subset> subsets;
-
- std::vector<T> split_temp_inter; /**< used internally by split to hold the
- * intersection. */
- std::vector<T> split_temp_diff; /**< used internally by split to hold the
- * set difference. */
-};
-
-} // namespace
-
-#endif
+ std::vector<size_t> *containing) const {
+ boost::dynamic_bitset<> seen(subsets.size()); // all zero by default.
+
+ for (const auto &key : keys) {
+ assert(key < member_to_subset.size());
+ size_t sub = member_to_subset[key];
+ assert(sub < subsets.size());
+ seen.set(sub);
+ }
+
+ for (size_t i = seen.find_first(); i != seen.npos;
+ i = seen.find_next(i)) {
+ containing->push_back(i);
+ }
+ }
+
+ /**
+ * Creates a partitioned set containing elements [0, state_to_subset.size() )
+ *
+ * The initial subset that an element belongs to is given by the
+ * corresponding entry in state_to_subset. The subsets should be identified
+ * by a dense range of indices starting from 0.
+ */
+ explicit partitioned_set(const std::vector<size_t> &state_to_subset) {
+ assert(!state_to_subset.empty());
+
+ subsets.reserve(state_to_subset.size());
+ member_to_subset.resize(state_to_subset.size());
+
+ split_temp_inter.reserve(state_to_subset.size());
+ split_temp_diff.reserve(state_to_subset.size());
+
+ size_t subset_count = 0;
+ for (const auto &sub : state_to_subset) {
+ assert(sub != INVALID_SUBSET);
+ ENSURE_AT_LEAST(&subset_count, sub + 1);
+ }
+ assert(subset_count <= state_to_subset.size());
+
+ subsets.resize(subset_count);
+ for (size_t i = 0; i < state_to_subset.size(); i++) {
+ /* ensure that our underlying type is big enough to hold all our
+ * set members */
+ assert(i == (size_t)(T)i);
+
+ size_t sub = state_to_subset[i];
+ assert(sub < subsets.size());
+
+ member_to_subset[i] = sub;
+ subsets[sub].members.push_back(i);
+ }
+
+ /* none of the subsets should be empty */
+ assert(std::all_of(subsets.begin(), subsets.end(),
+ [](const subset &sub){ return sub.size() > 0; }));
+ }
+
+private:
+ std::vector<size_t> member_to_subset;
+ std::vector<subset> subsets;
+
+ std::vector<T> split_temp_inter; /**< used internally by split to hold the
+ * intersection. */
+ std::vector<T> split_temp_diff; /**< used internally by split to hold the
+ * set difference. */
+};
+
+} // namespace
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/popcount.h b/contrib/libs/hyperscan/src/util/popcount.h
index f75397dd35..eb08f6b1b2 100644
--- a/contrib/libs/hyperscan/src/util/popcount.h
+++ b/contrib/libs/hyperscan/src/util/popcount.h
@@ -1,74 +1,74 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Platform specific popcount functions
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Platform specific popcount functions
+ */
+
#ifndef UTIL_POPCOUNT_H_
#define UTIL_POPCOUNT_H_
-
-#include "ue2common.h"
+
+#include "ue2common.h"
#include "util/arch.h"
-
-static really_inline
-u32 popcount32(u32 x) {
-#if defined(HAVE_POPCOUNT_INSTR)
- // Single-instruction builtin.
+
+static really_inline
+u32 popcount32(u32 x) {
+#if defined(HAVE_POPCOUNT_INSTR)
+ // Single-instruction builtin.
return _mm_popcnt_u32(x);
-#else
+#else
// Fast branch-free version from bit-twiddling hacks as older Intel
- // processors do not have a POPCNT instruction.
- x -= (x >> 1) & 0x55555555;
- x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
- return (((x + (x >> 4)) & 0xf0f0f0f) * 0x1010101) >> 24;
-#endif
-}
-
-static really_inline
-u32 popcount64(u64a x) {
+ // processors do not have a POPCNT instruction.
+ x -= (x >> 1) & 0x55555555;
+ x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+ return (((x + (x >> 4)) & 0xf0f0f0f) * 0x1010101) >> 24;
+#endif
+}
+
+static really_inline
+u32 popcount64(u64a x) {
#if defined(ARCH_X86_64)
# if defined(HAVE_POPCOUNT_INSTR)
- // Single-instruction builtin.
+ // Single-instruction builtin.
return (u32)_mm_popcnt_u64(x);
# else
// Fast branch-free version from bit-twiddling hacks as older Intel
- // processors do not have a POPCNT instruction.
- x -= (x >> 1) & 0x5555555555555555;
- x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
- x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f;
- return (x * 0x0101010101010101) >> 56;
+ // processors do not have a POPCNT instruction.
+ x -= (x >> 1) & 0x5555555555555555;
+ x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
+ x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f;
+ return (x * 0x0101010101010101) >> 56;
# endif
-#else
- // Synthesise from two 32-bit cases.
- return popcount32(x >> 32) + popcount32(x);
-#endif
-}
-
+#else
+ // Synthesise from two 32-bit cases.
+ return popcount32(x >> 32) + popcount32(x);
+#endif
+}
+
#endif /* UTIL_POPCOUNT_H_ */
-
+
diff --git a/contrib/libs/hyperscan/src/util/pqueue.h b/contrib/libs/hyperscan/src/util/pqueue.h
index 8e7055bd76..f0ba12e70f 100644
--- a/contrib/libs/hyperscan/src/util/pqueue.h
+++ b/contrib/libs/hyperscan/src/util/pqueue.h
@@ -1,109 +1,109 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef PQUEUE_H
-#define PQUEUE_H
-
-#include "ue2common.h"
-
-static really_inline u32
-pq_left(u32 i) {
- return (i << 1) + 1;
-}
-
-static really_inline u32
-pq_right(u32 i) {
- return (i << 1) + 2;
-}
-
-static really_inline
-u32 pq_parent(u32 i) {
- return (i - 1) >> 1;
-}
-
-static really_inline
-void pq_sift(PQ_T *items, u32 start, u32 end) {
- u32 j = start;
- PQ_T j_temp = items[j];
-
- while (pq_left(j) < end) {
- u32 max_child;
-
- if (pq_right(j) < end && PQ_COMP(items, pq_right(j), pq_left(j))) {
- max_child = pq_right(j);
- } else {
- max_child = pq_left(j);
- }
-
- if (PQ_COMP_B(items, max_child, j_temp)) {
- items[j] = items[max_child];
- j = max_child;
- } else {
- /* j is already less than its children. We know heap property
- * is already maintained for children we are done */
- break;
- }
- }
- items[j] = j_temp;
-}
-
-static really_inline
-PQ_T *pq_top(PQ_T *items) {
- return items;
-}
-
-static really_inline
-void pq_pop(PQ_T *items, u32 item_count) {
- item_count--;
- items[0] = items[item_count];
- pq_sift(items, 0, item_count);
-}
-
-static really_inline
-void pq_insert(PQ_T *items, u32 item_count, PQ_T new_item) {
- u32 pos = item_count;
- while (pos) {
- u32 parent = pq_parent(pos);
- if (!PQ_COMP_B(items, parent, new_item)) {
- items[pos] = items[parent];
- pos = parent;
- } else {
- break;
- }
- }
- items[pos] = new_item;
-}
-
-static really_inline
-void pq_replace_top(PQ_T *items, u32 item_count, PQ_T new_item) {
- items[0] = new_item;
- pq_sift(items, 0, item_count);
-}
-
-#endif
-
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef PQUEUE_H
+#define PQUEUE_H
+
+#include "ue2common.h"
+
+static really_inline u32
+pq_left(u32 i) {
+ return (i << 1) + 1;
+}
+
+static really_inline u32
+pq_right(u32 i) {
+ return (i << 1) + 2;
+}
+
+static really_inline
+u32 pq_parent(u32 i) {
+ return (i - 1) >> 1;
+}
+
+static really_inline
+void pq_sift(PQ_T *items, u32 start, u32 end) {
+ u32 j = start;
+ PQ_T j_temp = items[j];
+
+ while (pq_left(j) < end) {
+ u32 max_child;
+
+ if (pq_right(j) < end && PQ_COMP(items, pq_right(j), pq_left(j))) {
+ max_child = pq_right(j);
+ } else {
+ max_child = pq_left(j);
+ }
+
+ if (PQ_COMP_B(items, max_child, j_temp)) {
+ items[j] = items[max_child];
+ j = max_child;
+ } else {
+ /* j is already less than its children. We know heap property
+ * is already maintained for children we are done */
+ break;
+ }
+ }
+ items[j] = j_temp;
+}
+
+static really_inline
+PQ_T *pq_top(PQ_T *items) {
+ return items;
+}
+
+static really_inline
+void pq_pop(PQ_T *items, u32 item_count) {
+ item_count--;
+ items[0] = items[item_count];
+ pq_sift(items, 0, item_count);
+}
+
+static really_inline
+void pq_insert(PQ_T *items, u32 item_count, PQ_T new_item) {
+ u32 pos = item_count;
+ while (pos) {
+ u32 parent = pq_parent(pos);
+ if (!PQ_COMP_B(items, parent, new_item)) {
+ items[pos] = items[parent];
+ pos = parent;
+ } else {
+ break;
+ }
+ }
+ items[pos] = new_item;
+}
+
+static really_inline
+void pq_replace_top(PQ_T *items, u32 item_count, PQ_T new_item) {
+ items[0] = new_item;
+ pq_sift(items, 0, item_count);
+}
+
+#endif
+
diff --git a/contrib/libs/hyperscan/src/util/queue_index_factory.h b/contrib/libs/hyperscan/src/util/queue_index_factory.h
index 5db03a1003..e8f7028ec5 100644
--- a/contrib/libs/hyperscan/src/util/queue_index_factory.h
+++ b/contrib/libs/hyperscan/src/util/queue_index_factory.h
@@ -1,52 +1,52 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief QueueIndexFactory used to hand out NFA queues at compile time.
- */
-#ifndef UTIL_QUEUE_INDEX_FACTORY_H
-#define UTIL_QUEUE_INDEX_FACTORY_H
-
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief QueueIndexFactory used to hand out NFA queues at compile time.
+ */
+#ifndef UTIL_QUEUE_INDEX_FACTORY_H
+#define UTIL_QUEUE_INDEX_FACTORY_H
+
+#include "ue2common.h"
#include "util/noncopyable.h"
-
-namespace ue2 {
-
+
+namespace ue2 {
+
class QueueIndexFactory : noncopyable {
-public:
- QueueIndexFactory() : val(0) {}
- u32 get_queue() { return val++; }
- u32 allocated_count() const { return val; }
-
-private:
- u32 val;
-};
-
-} // namespace ue2
-
-#endif // UTIL_QUEUE_INDEX_FACTORY_H
+public:
+ QueueIndexFactory() : val(0) {}
+ u32 get_queue() { return val++; }
+ u32 allocated_count() const { return val; }
+
+private:
+ u32 val;
+};
+
+} // namespace ue2
+
+#endif // UTIL_QUEUE_INDEX_FACTORY_H
diff --git a/contrib/libs/hyperscan/src/util/report.h b/contrib/libs/hyperscan/src/util/report.h
index 3006527f8b..ee830d0f10 100644
--- a/contrib/libs/hyperscan/src/util/report.h
+++ b/contrib/libs/hyperscan/src/util/report.h
@@ -1,51 +1,51 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Report structure used to manage data associated with a report at
- * compile time.
- */
-
-#ifndef UTIL_REPORT_H
-#define UTIL_REPORT_H
-
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Report structure used to manage data associated with a report at
+ * compile time.
+ */
+
+#ifndef UTIL_REPORT_H
+#define UTIL_REPORT_H
+
+#include "ue2common.h"
#include "util/exhaust.h" // for INVALID_EKEY
#include "util/logical.h" // for INVALID_LKEY
#include "util/hash.h"
#include "util/order_check.h"
-
-#include <cassert>
-
-namespace ue2 {
-
-class ReportManager;
-
+
+#include <cassert>
+
+namespace ue2 {
+
+class ReportManager;
+
enum ReportType {
EXTERNAL_CALLBACK,
EXTERNAL_CALLBACK_SOM_REL,
@@ -67,47 +67,47 @@ enum ReportType {
EXTERNAL_CALLBACK_SOM_PASS
};
-/**
- * \brief All the data we use for handling a match.
- *
- * Includes extparam constraints and bounds, exhaustion/dedupe keys, offset
- * adjustment and SOM information.
- *
+/**
+ * \brief All the data we use for handling a match.
+ *
+ * Includes extparam constraints and bounds, exhaustion/dedupe keys, offset
+ * adjustment and SOM information.
+ *
* The data in this structure eventually becomes a list of Rose programs
* instructions.
- */
-struct Report {
+ */
+struct Report {
Report(ReportType type_in, u32 onmatch_in)
: type(type_in), onmatch(onmatch_in) {}
-
- /** \brief True if this report has bounds from extended parameters, i.e.
- * min offset, max offset, min length. */
- bool hasBounds() const {
- return minOffset > 0 || maxOffset < MAX_OFFSET || minLength > 0;
- }
-
+
+ /** \brief True if this report has bounds from extended parameters, i.e.
+ * min offset, max offset, min length. */
+ bool hasBounds() const {
+ return minOffset > 0 || maxOffset < MAX_OFFSET || minLength > 0;
+ }
+
/** \brief Type of this report. */
ReportType type;
-
- /** \brief use SOM for minLength, but don't report it to user callback. */
- bool quashSom = false;
-
- /** \brief min offset in the stream at which this report can match. */
- u64a minOffset = 0;
-
- /** \brief max offset in the stream at which this report can match. */
- u64a maxOffset = MAX_OFFSET;
-
- /** \brief min match length (start of match to current offset) */
- u64a minLength = 0;
-
- /** \brief Exhaustion key.
- *
- * If exhaustible, the ekey to check before reporting a match.
- * Additionally after reporting a match the ekey will be set. If not
- * exhaustible, this will be INVALID_EKEY. */
- u32 ekey = INVALID_EKEY;
-
+
+ /** \brief use SOM for minLength, but don't report it to user callback. */
+ bool quashSom = false;
+
+ /** \brief min offset in the stream at which this report can match. */
+ u64a minOffset = 0;
+
+ /** \brief max offset in the stream at which this report can match. */
+ u64a maxOffset = MAX_OFFSET;
+
+ /** \brief min match length (start of match to current offset) */
+ u64a minLength = 0;
+
+ /** \brief Exhaustion key.
+ *
+ * If exhaustible, the ekey to check before reporting a match.
+ * Additionally after reporting a match the ekey will be set. If not
+ * exhaustible, this will be INVALID_EKEY. */
+ u32 ekey = INVALID_EKEY;
+
/** \brief Logical Combination key in each combination.
*
* If in Logical Combination, the lkey to check before reporting a match.
@@ -118,95 +118,95 @@ struct Report {
/** \brief Quiet flag for expressions in any logical combination. */
bool quiet = false;
- /** \brief Adjustment to add to the match offset when we report a match.
- *
- * This is usually used for reports attached to states that form part of a
- * zero-width assertion, like '$'. */
- s32 offsetAdjust = 0;
-
- /** \brief Match report ID, for external reports.
- *
- * - external callback -> external report id
- * - internal_som_* -> som loc to modify
- * - INTERNAL_ROSE_CHAIN -> top event to push on
- * - otherwise -> target subnfa */
- u32 onmatch;
-
- /** \brief Index of the reverse nfa.
- *
- * Used by EXTERNAL_CALLBACK_SOM_REV_NFA and
- * INTERNAL_SOM_LOC_SET_SOM_REV_NFA*.
- */
- u32 revNfaIndex = 0;
-
- /** \brief SOM distance value, use varies according to type.
- *
- * - for EXTERNAL_CALLBACK_SOM_REL, from-offset is this many bytes
- * before the to-offset.
- * - for EXTERNAL_CALLBACK_SOM_ABS, set from-offset to this value.
- * - for INTERNAL_SOM_LOC_COPY*, som location read_from.
- */
- u64a somDistance = 0;
-
- /** \brief Number of bytes behind us that we are allowed to squash
- * identical top events on the queue.
- *
- * Used by INTERNAL_ROSE_CHAIN.
- */
- u64a topSquashDistance = 0;
-};
-
-static inline
-bool isExternalReport(const Report &r) {
- switch (r.type) {
- case INTERNAL_SOM_LOC_SET:
- case INTERNAL_SOM_LOC_SET_IF_UNSET:
- case INTERNAL_SOM_LOC_SET_IF_WRITABLE:
- case INTERNAL_SOM_LOC_SET_SOM_REV_NFA:
- case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET:
- case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE:
- case INTERNAL_SOM_LOC_COPY:
- case INTERNAL_SOM_LOC_COPY_IF_WRITABLE:
- case INTERNAL_SOM_LOC_MAKE_WRITABLE:
- case INTERNAL_SOM_LOC_SET_FROM:
- case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE:
- case INTERNAL_ROSE_CHAIN:
- return false;
- case EXTERNAL_CALLBACK:
- case EXTERNAL_CALLBACK_SOM_REL:
- case EXTERNAL_CALLBACK_SOM_STORED:
- case EXTERNAL_CALLBACK_SOM_ABS:
- case EXTERNAL_CALLBACK_SOM_REV_NFA:
+ /** \brief Adjustment to add to the match offset when we report a match.
+ *
+ * This is usually used for reports attached to states that form part of a
+ * zero-width assertion, like '$'. */
+ s32 offsetAdjust = 0;
+
+ /** \brief Match report ID, for external reports.
+ *
+ * - external callback -> external report id
+ * - internal_som_* -> som loc to modify
+ * - INTERNAL_ROSE_CHAIN -> top event to push on
+ * - otherwise -> target subnfa */
+ u32 onmatch;
+
+ /** \brief Index of the reverse nfa.
+ *
+ * Used by EXTERNAL_CALLBACK_SOM_REV_NFA and
+ * INTERNAL_SOM_LOC_SET_SOM_REV_NFA*.
+ */
+ u32 revNfaIndex = 0;
+
+ /** \brief SOM distance value, use varies according to type.
+ *
+ * - for EXTERNAL_CALLBACK_SOM_REL, from-offset is this many bytes
+ * before the to-offset.
+ * - for EXTERNAL_CALLBACK_SOM_ABS, set from-offset to this value.
+ * - for INTERNAL_SOM_LOC_COPY*, som location read_from.
+ */
+ u64a somDistance = 0;
+
+ /** \brief Number of bytes behind us that we are allowed to squash
+ * identical top events on the queue.
+ *
+ * Used by INTERNAL_ROSE_CHAIN.
+ */
+ u64a topSquashDistance = 0;
+};
+
+static inline
+bool isExternalReport(const Report &r) {
+ switch (r.type) {
+ case INTERNAL_SOM_LOC_SET:
+ case INTERNAL_SOM_LOC_SET_IF_UNSET:
+ case INTERNAL_SOM_LOC_SET_IF_WRITABLE:
+ case INTERNAL_SOM_LOC_SET_SOM_REV_NFA:
+ case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET:
+ case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE:
+ case INTERNAL_SOM_LOC_COPY:
+ case INTERNAL_SOM_LOC_COPY_IF_WRITABLE:
+ case INTERNAL_SOM_LOC_MAKE_WRITABLE:
+ case INTERNAL_SOM_LOC_SET_FROM:
+ case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE:
+ case INTERNAL_ROSE_CHAIN:
+ return false;
+ case EXTERNAL_CALLBACK:
+ case EXTERNAL_CALLBACK_SOM_REL:
+ case EXTERNAL_CALLBACK_SOM_STORED:
+ case EXTERNAL_CALLBACK_SOM_ABS:
+ case EXTERNAL_CALLBACK_SOM_REV_NFA:
case EXTERNAL_CALLBACK_SOM_PASS:
- return true;
- default:
- break; // fall through
- }
- assert(0); // unknown?
- return true;
-}
-
-static inline
+ return true;
+ default:
+ break; // fall through
+ }
+ assert(0); // unknown?
+ return true;
+}
+
+static inline
bool isExternalSomReport(const Report &r) {
return r.type != EXTERNAL_CALLBACK && isExternalReport(r);
}
static inline
-bool operator<(const Report &a, const Report &b) {
- ORDER_CHECK(type);
- ORDER_CHECK(quashSom);
- ORDER_CHECK(ekey);
- ORDER_CHECK(offsetAdjust);
- ORDER_CHECK(onmatch);
- ORDER_CHECK(minOffset);
- ORDER_CHECK(maxOffset);
- ORDER_CHECK(minLength);
- ORDER_CHECK(somDistance);
- ORDER_CHECK(revNfaIndex);
- ORDER_CHECK(topSquashDistance);
- return false;
-}
-
+bool operator<(const Report &a, const Report &b) {
+ ORDER_CHECK(type);
+ ORDER_CHECK(quashSom);
+ ORDER_CHECK(ekey);
+ ORDER_CHECK(offsetAdjust);
+ ORDER_CHECK(onmatch);
+ ORDER_CHECK(minOffset);
+ ORDER_CHECK(maxOffset);
+ ORDER_CHECK(minLength);
+ ORDER_CHECK(somDistance);
+ ORDER_CHECK(revNfaIndex);
+ ORDER_CHECK(topSquashDistance);
+ return false;
+}
+
inline
bool operator==(const Report &a, const Report &b) {
return a.type == b.type && a.quashSom == b.quashSom &&
@@ -217,60 +217,60 @@ bool operator==(const Report &a, const Report &b) {
a.topSquashDistance == b.topSquashDistance;
}
-static inline
+static inline
Report makeECallback(u32 report, s32 offsetAdjust, u32 ekey, bool quiet) {
- Report ir(EXTERNAL_CALLBACK, report);
- ir.offsetAdjust = offsetAdjust;
- ir.ekey = ekey;
+ Report ir(EXTERNAL_CALLBACK, report);
+ ir.offsetAdjust = offsetAdjust;
+ ir.ekey = ekey;
ir.quiet = (u8)quiet;
- return ir;
-}
-
-static inline
-Report makeCallback(u32 report, s32 offsetAdjust) {
+ return ir;
+}
+
+static inline
+Report makeCallback(u32 report, s32 offsetAdjust) {
return makeECallback(report, offsetAdjust, INVALID_EKEY, false);
-}
-
-static inline
-Report makeSomRelativeCallback(u32 report, s32 offsetAdjust, u64a distance) {
- Report ir(EXTERNAL_CALLBACK_SOM_REL, report);
- ir.offsetAdjust = offsetAdjust;
- ir.ekey = INVALID_EKEY;
- ir.somDistance = distance;
- return ir;
-}
-
-static inline
+}
+
+static inline
+Report makeSomRelativeCallback(u32 report, s32 offsetAdjust, u64a distance) {
+ Report ir(EXTERNAL_CALLBACK_SOM_REL, report);
+ ir.offsetAdjust = offsetAdjust;
+ ir.ekey = INVALID_EKEY;
+ ir.somDistance = distance;
+ return ir;
+}
+
+static inline
Report makeMpvTrigger(u32 event, u64a squashDistance) {
- Report ir(INTERNAL_ROSE_CHAIN, event);
- ir.ekey = INVALID_EKEY;
- ir.topSquashDistance = squashDistance;
- return ir;
-}
-
-/** simple exhaustible: exhaustible and if the first attempted match does not
- * succeed, no later matches will succeed either */
-static inline
-bool isSimpleExhaustible(const Report &ir) {
- if (ir.ekey == INVALID_EKEY) {
- return false;
- }
-
- if (ir.hasBounds() && (ir.minOffset || ir.minLength)) {
- return false;
- }
-
- if (!isExternalReport(ir)) {
- return false;
- }
-
- return true;
-}
-
+ Report ir(INTERNAL_ROSE_CHAIN, event);
+ ir.ekey = INVALID_EKEY;
+ ir.topSquashDistance = squashDistance;
+ return ir;
+}
+
+/** simple exhaustible: exhaustible and if the first attempted match does not
+ * succeed, no later matches will succeed either */
+static inline
+bool isSimpleExhaustible(const Report &ir) {
+ if (ir.ekey == INVALID_EKEY) {
+ return false;
+ }
+
+ if (ir.hasBounds() && (ir.minOffset || ir.minLength)) {
+ return false;
+ }
+
+ if (!isExternalReport(ir)) {
+ return false;
+ }
+
+ return true;
+}
+
} // namespace ue2
-
+
namespace std {
-
+
template<>
struct hash<ue2::Report> {
std::size_t operator()(const ue2::Report &r) const {
@@ -279,7 +279,7 @@ struct hash<ue2::Report> {
r.revNfaIndex, r.somDistance, r.topSquashDistance);
}
};
-
+
} // namespace std
-
-#endif // UTIL_REPORT_H
+
+#endif // UTIL_REPORT_H
diff --git a/contrib/libs/hyperscan/src/util/report_manager.cpp b/contrib/libs/hyperscan/src/util/report_manager.cpp
index 10f8975e37..78b9b73dfc 100644
--- a/contrib/libs/hyperscan/src/util/report_manager.cpp
+++ b/contrib/libs/hyperscan/src/util/report_manager.cpp
@@ -1,100 +1,100 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief ReportManager: tracks Report structures, exhaustion and dedupe keys.
- */
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief ReportManager: tracks Report structures, exhaustion and dedupe keys.
+ */
#include "report_manager.h"
-#include "grey.h"
-#include "ue2common.h"
+#include "grey.h"
+#include "ue2common.h"
#include "compiler/compiler.h"
-#include "nfagraph/ng.h"
-#include "rose/rose_build.h"
-#include "util/compile_error.h"
-#include "util/container.h"
-
-#include <deque>
-#include <map>
-#include <sstream>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-ReportManager::ReportManager(const Grey &g)
- : grey(g), freeEIndex(0), global_exhaust(true) {}
-
-u32 ReportManager::getInternalId(const Report &ir) {
- auto it = reportIdToInternalMap.find(ir);
- if (it != reportIdToInternalMap.end()) {
- DEBUG_PRINTF("existing report %zu\n", it->second);
- return it->second;
- }
-
- // Construct a new internal report and assign it a ReportID.
-
- if (numReports() >= grey.limitReportCount) {
- throw ResourceLimitError();
- }
-
- u32 size = reportIds.size();
- reportIds.push_back(ir);
+#include "nfagraph/ng.h"
+#include "rose/rose_build.h"
+#include "util/compile_error.h"
+#include "util/container.h"
+
+#include <deque>
+#include <map>
+#include <sstream>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+ReportManager::ReportManager(const Grey &g)
+ : grey(g), freeEIndex(0), global_exhaust(true) {}
+
+u32 ReportManager::getInternalId(const Report &ir) {
+ auto it = reportIdToInternalMap.find(ir);
+ if (it != reportIdToInternalMap.end()) {
+ DEBUG_PRINTF("existing report %zu\n", it->second);
+ return it->second;
+ }
+
+ // Construct a new internal report and assign it a ReportID.
+
+ if (numReports() >= grey.limitReportCount) {
+ throw ResourceLimitError();
+ }
+
+ u32 size = reportIds.size();
+ reportIds.push_back(ir);
reportIdToInternalMap.emplace(ir, size);
- DEBUG_PRINTF("new report %u\n", size);
- return size;
-}
-
-const Report &ReportManager::getReport(u32 id) const {
- assert(id < reportIds.size());
- return reportIds.at(id);
-}
-
-size_t ReportManager::numReports() const {
- return reportIds.size();
-}
-
-u32 ReportManager::getExhaustibleKey(u32 a) {
- auto it = toExhaustibleKeyMap.find(a);
- if (it == toExhaustibleKeyMap.end()) {
- // get size before assigning to avoid wacky LHS shenanigans
- u32 size = toExhaustibleKeyMap.size();
- bool inserted;
- tie(it, inserted) = toExhaustibleKeyMap.emplace(s64a{a}, size);
- assert(inserted);
- }
-
- DEBUG_PRINTF("%lld -> ekey %u\n", it->first, it->second);
- return it->second;
-}
-
+ DEBUG_PRINTF("new report %u\n", size);
+ return size;
+}
+
+const Report &ReportManager::getReport(u32 id) const {
+ assert(id < reportIds.size());
+ return reportIds.at(id);
+}
+
+size_t ReportManager::numReports() const {
+ return reportIds.size();
+}
+
+u32 ReportManager::getExhaustibleKey(u32 a) {
+ auto it = toExhaustibleKeyMap.find(a);
+ if (it == toExhaustibleKeyMap.end()) {
+ // get size before assigning to avoid wacky LHS shenanigans
+ u32 size = toExhaustibleKeyMap.size();
+ bool inserted;
+ tie(it, inserted) = toExhaustibleKeyMap.emplace(s64a{a}, size);
+ assert(inserted);
+ }
+
+ DEBUG_PRINTF("%lld -> ekey %u\n", it->first, it->second);
+ return it->second;
+}
+
const set<u32> &ReportManager::getRelateCKeys(u32 lkey) {
auto it = pl.lkey2ckeys.find(lkey);
assert(it != pl.lkey2ckeys.end());
@@ -120,26 +120,26 @@ const vector<CombInfo> &ReportManager::getCombInfoMap() const {
return pl.combInfoMap;
}
-u32 ReportManager::getUnassociatedExhaustibleKey(void) {
- u32 rv = toExhaustibleKeyMap.size();
- bool inserted;
- map<s64a, u32>::const_iterator it;
- tie(it, inserted) = toExhaustibleKeyMap.emplace(--freeEIndex, rv);
- assert(inserted);
- assert(it->second == rv);
-
- return rv;
-}
-
-u32 ReportManager::numDkeys() const {
- DEBUG_PRINTF("%zu dkeys\n", reportIdToDedupeKey.size());
- return reportIdToDedupeKey.size();
-}
-
-u32 ReportManager::numEkeys() const {
- return (u32) toExhaustibleKeyMap.size();
-}
-
+u32 ReportManager::getUnassociatedExhaustibleKey(void) {
+ u32 rv = toExhaustibleKeyMap.size();
+ bool inserted;
+ map<s64a, u32>::const_iterator it;
+ tie(it, inserted) = toExhaustibleKeyMap.emplace(--freeEIndex, rv);
+ assert(inserted);
+ assert(it->second == rv);
+
+ return rv;
+}
+
+u32 ReportManager::numDkeys() const {
+ DEBUG_PRINTF("%zu dkeys\n", reportIdToDedupeKey.size());
+ return reportIdToDedupeKey.size();
+}
+
+u32 ReportManager::numEkeys() const {
+ return (u32) toExhaustibleKeyMap.size();
+}
+
u32 ReportManager::numLogicalKeys() const {
return (u32) pl.toLogicalKeyMap.size();
}
@@ -152,113 +152,113 @@ u32 ReportManager::numCkeys() const {
return (u32) pl.toCombKeyMap.size();
}
-bool ReportManager::patternSetCanExhaust() const {
- return global_exhaust && !toExhaustibleKeyMap.empty();
-}
-
-vector<ReportID> ReportManager::getDkeyToReportTable() const {
- vector<ReportID> rv(reportIdToDedupeKey.size());
-
- for (const auto &m : reportIdToDedupeKey) {
- assert(m.second < rv.size());
- rv[m.second] = m.first;
- }
-
- return rv;
-}
-
-void ReportManager::assignDkeys(const RoseBuild *rose) {
- DEBUG_PRINTF("assigning...\n");
-
+bool ReportManager::patternSetCanExhaust() const {
+ return global_exhaust && !toExhaustibleKeyMap.empty();
+}
+
+vector<ReportID> ReportManager::getDkeyToReportTable() const {
+ vector<ReportID> rv(reportIdToDedupeKey.size());
+
+ for (const auto &m : reportIdToDedupeKey) {
+ assert(m.second < rv.size());
+ rv[m.second] = m.first;
+ }
+
+ return rv;
+}
+
+void ReportManager::assignDkeys(const RoseBuild *rose) {
+ DEBUG_PRINTF("assigning...\n");
+
map<u32, flat_set<ReportID>> ext_to_int;
-
- for (u32 i = 0; i < reportIds.size(); i++) {
- const Report &ir = reportIds[i];
-
- /* need to populate dkey */
- if (isExternalReport(ir)) {
- ext_to_int[ir.onmatch].insert(i);
- }
- }
-
- auto dedupe = rose->generateDedupeAux();
-
- for (const auto &m : ext_to_int) {
- u32 ext = m.first;
-
- if (!dedupe->requiresDedupeSupport(m.second)) {
- DEBUG_PRINTF("%u does not require dedupe\n", ext);
- continue; /* no dedupe required for this set */
- }
-
- u32 dkey = reportIdToDedupeKey.size();
- reportIdToDedupeKey[ext] = dkey;
- DEBUG_PRINTF("ext=%u -> dkey=%u\n", ext, dkey);
- }
-}
-
-u32 ReportManager::getDkey(const Report &r) const {
- if (!isExternalReport(r)) {
- return ~u32{0};
- }
-
- auto it = reportIdToDedupeKey.find(r.onmatch);
- if (it == reportIdToDedupeKey.end()) {
- return ~u32{0};
- }
- return it->second;
-}
-
-void ReportManager::registerExtReport(ReportID id,
- const external_report_info &ext) {
+
+ for (u32 i = 0; i < reportIds.size(); i++) {
+ const Report &ir = reportIds[i];
+
+ /* need to populate dkey */
+ if (isExternalReport(ir)) {
+ ext_to_int[ir.onmatch].insert(i);
+ }
+ }
+
+ auto dedupe = rose->generateDedupeAux();
+
+ for (const auto &m : ext_to_int) {
+ u32 ext = m.first;
+
+ if (!dedupe->requiresDedupeSupport(m.second)) {
+ DEBUG_PRINTF("%u does not require dedupe\n", ext);
+ continue; /* no dedupe required for this set */
+ }
+
+ u32 dkey = reportIdToDedupeKey.size();
+ reportIdToDedupeKey[ext] = dkey;
+ DEBUG_PRINTF("ext=%u -> dkey=%u\n", ext, dkey);
+ }
+}
+
+u32 ReportManager::getDkey(const Report &r) const {
+ if (!isExternalReport(r)) {
+ return ~u32{0};
+ }
+
+ auto it = reportIdToDedupeKey.find(r.onmatch);
+ if (it == reportIdToDedupeKey.end()) {
+ return ~u32{0};
+ }
+ return it->second;
+}
+
+void ReportManager::registerExtReport(ReportID id,
+ const external_report_info &ext) {
auto it = externalIdMap.find(id);
if (it != externalIdMap.end()) {
const external_report_info &eri = it->second;
- if (eri.highlander != ext.highlander) {
- /* we have a problem */
- ostringstream out;
- out << "Expression (index " << ext.first_pattern_index
- << ") with match ID " << id << " ";
- if (!ext.highlander) {
- out << "did not specify ";
- } else {
- out << "specified ";
- }
- out << "HS_FLAG_SINGLEMATCH whereas previous expression (index "
- << eri.first_pattern_index << ") with the same match ID did";
- if (ext.highlander) {
- out << " not";
- }
- out << ".";
- throw CompileError(ext.first_pattern_index, out.str());
- }
- } else {
- externalIdMap.emplace(id, ext);
- }
-
- // Any non-highlander pattern will render us not globally exhaustible.
- if (!ext.highlander) {
- global_exhaust = false;
- }
-}
-
+ if (eri.highlander != ext.highlander) {
+ /* we have a problem */
+ ostringstream out;
+ out << "Expression (index " << ext.first_pattern_index
+ << ") with match ID " << id << " ";
+ if (!ext.highlander) {
+ out << "did not specify ";
+ } else {
+ out << "specified ";
+ }
+ out << "HS_FLAG_SINGLEMATCH whereas previous expression (index "
+ << eri.first_pattern_index << ") with the same match ID did";
+ if (ext.highlander) {
+ out << " not";
+ }
+ out << ".";
+ throw CompileError(ext.first_pattern_index, out.str());
+ }
+ } else {
+ externalIdMap.emplace(id, ext);
+ }
+
+ // Any non-highlander pattern will render us not globally exhaustible.
+ if (!ext.highlander) {
+ global_exhaust = false;
+ }
+}
+
Report ReportManager::getBasicInternalReport(const ExpressionInfo &expr,
s32 adj) {
- /* validate that we are not violating highlander constraints, this will
- * throw a CompileError if so. */
+ /* validate that we are not violating highlander constraints, this will
+ * throw a CompileError if so. */
registerExtReport(expr.report,
external_report_info(expr.highlander, expr.index));
-
- /* create the internal report */
- u32 ekey = INVALID_EKEY;
+
+ /* create the internal report */
+ u32 ekey = INVALID_EKEY;
if (expr.highlander) {
- /* all patterns with the same report id share an ekey */
+ /* all patterns with the same report id share an ekey */
ekey = getExhaustibleKey(expr.report);
- }
-
+ }
+
return makeECallback(expr.report, adj, ekey, expr.quiet);
-}
-
+}
+
void ReportManager::setProgramOffset(ReportID id, u32 programOffset) {
assert(id < reportIds.size());
assert(!contains(reportIdToProgramOffset, id));
@@ -271,34 +271,34 @@ u32 ReportManager::getProgramOffset(ReportID id) const {
return reportIdToProgramOffset.at(id);
}
-static
-void ekeysUnion(std::set<u32> *ekeys, u32 more) {
- if (!ekeys->empty()) {
- if (more == INVALID_EKEY) {
- ekeys->clear();
- } else {
- ekeys->insert(more);
- }
- }
-}
-
-set<u32> reportsToEkeys(const set<ReportID> &reports, const ReportManager &rm) {
- assert(!reports.empty());
-
- set<u32> ekeys;
-
- for (auto it = reports.begin(), ite = reports.end(); it != ite; ++it) {
- u32 e = rm.getReport(*it).ekey;
- if (it == reports.begin()) {
- if (e != INVALID_EKEY) {
- ekeys.insert(e);
- }
- } else {
- ekeysUnion(&ekeys, e);
- }
- }
-
- return ekeys;
-}
-
-} // namespace ue2
+static
+void ekeysUnion(std::set<u32> *ekeys, u32 more) {
+ if (!ekeys->empty()) {
+ if (more == INVALID_EKEY) {
+ ekeys->clear();
+ } else {
+ ekeys->insert(more);
+ }
+ }
+}
+
+set<u32> reportsToEkeys(const set<ReportID> &reports, const ReportManager &rm) {
+ assert(!reports.empty());
+
+ set<u32> ekeys;
+
+ for (auto it = reports.begin(), ite = reports.end(); it != ite; ++it) {
+ u32 e = rm.getReport(*it).ekey;
+ if (it == reports.begin()) {
+ if (e != INVALID_EKEY) {
+ ekeys.insert(e);
+ }
+ } else {
+ ekeysUnion(&ekeys, e);
+ }
+ }
+
+ return ekeys;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/util/report_manager.h b/contrib/libs/hyperscan/src/util/report_manager.h
index 08e7dd65d8..015dc9c855 100644
--- a/contrib/libs/hyperscan/src/util/report_manager.h
+++ b/contrib/libs/hyperscan/src/util/report_manager.h
@@ -1,86 +1,86 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief ReportManager: tracks Report structures, exhaustion and
- * dedupe keys.
- */
-
-#ifndef REPORT_MANAGER_H
-#define REPORT_MANAGER_H
-
-#include "ue2common.h"
-#include "util/compile_error.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief ReportManager: tracks Report structures, exhaustion and
+ * dedupe keys.
+ */
+
+#ifndef REPORT_MANAGER_H
+#define REPORT_MANAGER_H
+
+#include "ue2common.h"
+#include "util/compile_error.h"
#include "util/noncopyable.h"
-#include "util/report.h"
+#include "util/report.h"
#include "parser/logical_combination.h"
-
-#include <map>
-#include <set>
+
+#include <map>
+#include <set>
#include <unordered_map>
-#include <vector>
-
-namespace ue2 {
-
-struct Grey;
-class RoseBuild;
+#include <vector>
+
+namespace ue2 {
+
+struct Grey;
+class RoseBuild;
class ExpressionInfo;
-
-struct external_report_info {
- external_report_info(bool h, u32 fpi)
- : highlander(h), first_pattern_index(fpi) { }
- const bool highlander;
- const u32 first_pattern_index;
-};
-
-/** \brief Tracks Report structures, exhaustion and dedupe keys. */
+
+struct external_report_info {
+ external_report_info(bool h, u32 fpi)
+ : highlander(h), first_pattern_index(fpi) { }
+ const bool highlander;
+ const u32 first_pattern_index;
+};
+
+/** \brief Tracks Report structures, exhaustion and dedupe keys. */
class ReportManager : noncopyable {
-public:
- explicit ReportManager(const Grey &g);
-
- /** \brief Fetch the ID associated with the given Report. */
- u32 getInternalId(const Report &r);
-
- /** \brief Fetch the Report associated with \a id. */
- const Report &getReport(u32 id) const;
-
- /** \brief Total number of reports. */
- size_t numReports() const;
-
- /** \brief Return an unused exhaustion key (the next available one). */
- u32 getUnassociatedExhaustibleKey(void);
-
- /** \brief Total number of dedupe keys. */
- u32 numDkeys() const;
-
- /** \brief Total number of exhaustion keys. */
- u32 numEkeys() const;
-
+public:
+ explicit ReportManager(const Grey &g);
+
+ /** \brief Fetch the ID associated with the given Report. */
+ u32 getInternalId(const Report &r);
+
+ /** \brief Fetch the Report associated with \a id. */
+ const Report &getReport(u32 id) const;
+
+ /** \brief Total number of reports. */
+ size_t numReports() const;
+
+ /** \brief Return an unused exhaustion key (the next available one). */
+ u32 getUnassociatedExhaustibleKey(void);
+
+ /** \brief Total number of dedupe keys. */
+ u32 numDkeys() const;
+
+ /** \brief Total number of exhaustion keys. */
+ u32 numEkeys() const;
+
/** \brief Total number of logical keys. */
u32 numLogicalKeys() const;
@@ -90,36 +90,36 @@ public:
/** \brief Total number of combination keys. */
u32 numCkeys() const;
- /** \brief True if the pattern set can exhaust (i.e. all patterns are
- * highlander). */
- bool patternSetCanExhaust() const;
-
- void assignDkeys(const RoseBuild *rose);
-
- std::vector<ReportID> getDkeyToReportTable() const;
-
- /** \brief Return a const reference to the table of Report
- * structures. */
- const std::vector<Report> &reports() const { return reportIds; }
-
- /**
+ /** \brief True if the pattern set can exhaust (i.e. all patterns are
+ * highlander). */
+ bool patternSetCanExhaust() const;
+
+ void assignDkeys(const RoseBuild *rose);
+
+ std::vector<ReportID> getDkeyToReportTable() const;
+
+ /** \brief Return a const reference to the table of Report
+ * structures. */
+ const std::vector<Report> &reports() const { return reportIds; }
+
+ /**
* Get a simple internal report corresponding to the expression. An ekey
* will be setup if required.
- *
- * Note: this function may throw a CompileError if constraints on external
- * match id are violated (mixed highlander status for example).
- */
+ *
+ * Note: this function may throw a CompileError if constraints on external
+ * match id are violated (mixed highlander status for example).
+ */
Report getBasicInternalReport(const ExpressionInfo &expr, s32 adj = 0);
-
- /** \brief Register an external report and validate that we are not
- * violating highlander constraints (which will cause an exception to be
- * thrown). */
- void registerExtReport(ReportID id, const external_report_info &ext);
-
- /** \brief Fetch the ekey associated with the given expression index,
- * assigning one if necessary. */
- u32 getExhaustibleKey(u32 expressionIndex);
-
+
+ /** \brief Register an external report and validate that we are not
+ * violating highlander constraints (which will cause an exception to be
+ * thrown). */
+ void registerExtReport(ReportID id, const external_report_info &ext);
+
+ /** \brief Fetch the ekey associated with the given expression index,
+ * assigning one if necessary. */
+ u32 getExhaustibleKey(u32 expressionIndex);
+
/** \brief Get lkey's corresponding ckeys. */
const std::set<u32> &getRelateCKeys(u32 lkey);
@@ -133,10 +133,10 @@ public:
/** \brief Used in Rose for writing bytecode. */
const std::vector<CombInfo> &getCombInfoMap() const;
- /** \brief Fetch the dedupe key associated with the given report. Returns
- * ~0U if no dkey is needed. */
- u32 getDkey(const Report &r) const;
-
+ /** \brief Fetch the dedupe key associated with the given report. Returns
+ * ~0U if no dkey is needed. */
+ u32 getDkey(const Report &r) const;
+
/** \brief Register a Rose program offset with the given report. */
void setProgramOffset(ReportID id, u32 programOffset);
@@ -148,45 +148,45 @@ public:
/** \brief Parsed logical combination structure. */
ParsedLogical pl;
-private:
- /** \brief Grey box ref, for checking resource limits. */
- const Grey &grey;
-
- /** \brief Report structures, indexed by ID. */
- std::vector<Report> reportIds;
-
- /** \brief Mapping from Report to ID (inverse of \ref reportIds
+private:
+ /** \brief Grey box ref, for checking resource limits. */
+ const Grey &grey;
+
+ /** \brief Report structures, indexed by ID. */
+ std::vector<Report> reportIds;
+
+ /** \brief Mapping from Report to ID (inverse of \ref reportIds
* vector). */
std::unordered_map<Report, size_t> reportIdToInternalMap;
-
- /** \brief Mapping from ReportID to dedupe key. */
+
+ /** \brief Mapping from ReportID to dedupe key. */
std::unordered_map<ReportID, u32> reportIdToDedupeKey;
-
+
/** \brief Mapping from ReportID to Rose program offset in bytecode. */
std::unordered_map<ReportID, u32> reportIdToProgramOffset;
- /** \brief Mapping from external match ids to information about that
- * id. */
+ /** \brief Mapping from external match ids to information about that
+ * id. */
std::unordered_map<ReportID, external_report_info> externalIdMap;
-
- /** \brief Mapping from expression index to exhaustion key. */
- std::map<s64a, u32> toExhaustibleKeyMap;
-
- /** \brief Unallocated expression index, used for \ref
- * getUnassociatedExhaustibleKey.
- *
- * TODO: work out why this is signed.
- */
- s64a freeEIndex;
-
- /** \brief True if database is globally exhaustible (all patterns must be
- * highlander for this to be the case). */
- bool global_exhaust;
-};
-
-std::set<u32> reportsToEkeys(const std::set<ReportID> &reports,
- const ReportManager &rm);
-
-} // namespace ue2
-
-#endif
+
+ /** \brief Mapping from expression index to exhaustion key. */
+ std::map<s64a, u32> toExhaustibleKeyMap;
+
+ /** \brief Unallocated expression index, used for \ref
+ * getUnassociatedExhaustibleKey.
+ *
+ * TODO: work out why this is signed.
+ */
+ s64a freeEIndex;
+
+ /** \brief True if database is globally exhaustible (all patterns must be
+ * highlander for this to be the case). */
+ bool global_exhaust;
+};
+
+std::set<u32> reportsToEkeys(const std::set<ReportID> &reports,
+ const ReportManager &rm);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/scatter.h b/contrib/libs/hyperscan/src/util/scatter.h
index f651439452..40a1ab248d 100644
--- a/contrib/libs/hyperscan/src/util/scatter.h
+++ b/contrib/libs/hyperscan/src/util/scatter.h
@@ -1,55 +1,55 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef UTIL_SCATTER_H
-#define UTIL_SCATTER_H
-
-#include "ue2common.h"
-
-#define SCATTER_STRUCT(t) \
- struct scatter_unit_##t { u32 offset; t val; };
-
-SCATTER_STRUCT(u64a)
-SCATTER_STRUCT(u32)
-SCATTER_STRUCT(u16)
-SCATTER_STRUCT(u8)
-
-struct scatter_full_plan {
- u32 s_u64a_offset;
- u32 s_u64a_count;
- u32 s_u32_offset;
- u32 s_u32_count;
- u32 s_u16_offset;
- u32 s_u16_count;
- u32 s_u8_count;
- u32 s_u8_offset;
-};
-
-#undef SCATTER_STRUCT
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef UTIL_SCATTER_H
+#define UTIL_SCATTER_H
+
+#include "ue2common.h"
+
+#define SCATTER_STRUCT(t) \
+ struct scatter_unit_##t { u32 offset; t val; };
+
+SCATTER_STRUCT(u64a)
+SCATTER_STRUCT(u32)
+SCATTER_STRUCT(u16)
+SCATTER_STRUCT(u8)
+
+struct scatter_full_plan {
+ u32 s_u64a_offset;
+ u32 s_u64a_count;
+ u32 s_u32_offset;
+ u32 s_u32_count;
+ u32 s_u16_offset;
+ u32 s_u16_count;
+ u32 s_u8_count;
+ u32 s_u8_offset;
+};
+
+#undef SCATTER_STRUCT
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/scatter_runtime.h b/contrib/libs/hyperscan/src/util/scatter_runtime.h
index d839199192..09bc742d97 100644
--- a/contrib/libs/hyperscan/src/util/scatter_runtime.h
+++ b/contrib/libs/hyperscan/src/util/scatter_runtime.h
@@ -1,74 +1,74 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef UTIL_SCATTER_RUNTIME_H
-#define UTIL_SCATTER_RUNTIME_H
-
-#include "scatter.h"
-
-#include "uniform_ops.h"
-
-#define SCATTER_DEF(t) \
-static really_inline \
-void scatter_##t(void *out, const struct scatter_unit_##t *plan, u32 count) { \
- for (u32 i = 0; i < count; i++) { \
- const struct scatter_unit_##t *item = plan + i; \
- DEBUG_PRINTF("storing %llu into offset %u\n", (u64a)item->val, \
- item->offset); \
- storeu_##t((char *)out + item->offset, item->val); \
- } \
-}
-
-SCATTER_DEF(u64a)
-SCATTER_DEF(u32)
-SCATTER_DEF(u16)
-SCATTER_DEF(u8)
-
-#undef SCATTER_DEF
-
-static really_inline
-void scatter(void *out, const void *base, const struct scatter_full_plan *p) {
-#define RUN_SUB(t) \
- if (p->s_##t##_offset) { \
- assert(p->s_##t##_count); \
- const struct scatter_unit_##t *pp \
- = (const void *)(b + p->s_##t##_offset); \
- scatter_##t(out, pp, p->s_##t##_count); \
- }
-
- const char *b = base;
-
- RUN_SUB(u64a);
- RUN_SUB(u32);
- RUN_SUB(u16);
- RUN_SUB(u8);
-
-#undef RUN_SUB
-}
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef UTIL_SCATTER_RUNTIME_H
+#define UTIL_SCATTER_RUNTIME_H
+
+#include "scatter.h"
+
+#include "uniform_ops.h"
+
+#define SCATTER_DEF(t) \
+static really_inline \
+void scatter_##t(void *out, const struct scatter_unit_##t *plan, u32 count) { \
+ for (u32 i = 0; i < count; i++) { \
+ const struct scatter_unit_##t *item = plan + i; \
+ DEBUG_PRINTF("storing %llu into offset %u\n", (u64a)item->val, \
+ item->offset); \
+ storeu_##t((char *)out + item->offset, item->val); \
+ } \
+}
+
+SCATTER_DEF(u64a)
+SCATTER_DEF(u32)
+SCATTER_DEF(u16)
+SCATTER_DEF(u8)
+
+#undef SCATTER_DEF
+
+static really_inline
+void scatter(void *out, const void *base, const struct scatter_full_plan *p) {
+#define RUN_SUB(t) \
+ if (p->s_##t##_offset) { \
+ assert(p->s_##t##_count); \
+ const struct scatter_unit_##t *pp \
+ = (const void *)(b + p->s_##t##_offset); \
+ scatter_##t(out, pp, p->s_##t##_count); \
+ }
+
+ const char *b = base;
+
+ RUN_SUB(u64a);
+ RUN_SUB(u32);
+ RUN_SUB(u16);
+ RUN_SUB(u8);
+
+#undef RUN_SUB
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/simd_types.h b/contrib/libs/hyperscan/src/util/simd_types.h
index 331026dc9b..962cad6c97 100644
--- a/contrib/libs/hyperscan/src/util/simd_types.h
+++ b/contrib/libs/hyperscan/src/util/simd_types.h
@@ -1,57 +1,57 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef SIMD_TYPES_H
-#define SIMD_TYPES_H
-
-#include "config.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SIMD_TYPES_H
+#define SIMD_TYPES_H
+
+#include "config.h"
#include "util/arch.h"
#include "util/intrinsics.h"
-#include "ue2common.h"
-
+#include "ue2common.h"
+
#if defined(HAVE_SSE2)
typedef __m128i m128;
-#else
+#else
typedef struct ALIGN_DIRECTIVE {u64a hi; u64a lo;} m128;
-#endif
-
+#endif
+
#if defined(HAVE_AVX2)
-typedef __m256i m256;
-#else
-typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256;
-#endif
-
-typedef struct {m128 lo; m128 mid; m128 hi;} m384;
+typedef __m256i m256;
+#else
+typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256;
+#endif
+
+typedef struct {m128 lo; m128 mid; m128 hi;} m384;
#if defined(HAVE_AVX512)
typedef __m512i m512;
#else
typedef struct ALIGN_ATTR(64) {m256 lo; m256 hi;} m512;
#endif
-
-#endif /* SIMD_TYPES_H */
-
+
+#endif /* SIMD_TYPES_H */
+
diff --git a/contrib/libs/hyperscan/src/util/simd_utils.h b/contrib/libs/hyperscan/src/util/simd_utils.h
index 4928065131..d1f060b070 100644
--- a/contrib/libs/hyperscan/src/util/simd_utils.h
+++ b/contrib/libs/hyperscan/src/util/simd_utils.h
@@ -1,68 +1,68 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief SIMD types and primitive operations.
- */
-
-#ifndef SIMD_UTILS
-#define SIMD_UTILS
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief SIMD types and primitive operations.
+ */
+
+#ifndef SIMD_UTILS
+#define SIMD_UTILS
+
#if !defined(_WIN32) && !defined(__SSSE3__)
#error SSSE3 instructions must be enabled
-#endif
-
+#endif
+
#include "config.h"
-#include "ue2common.h"
-#include "simd_types.h"
+#include "ue2common.h"
+#include "simd_types.h"
#include "unaligned.h"
#include "util/arch.h"
#include "util/intrinsics.h"
-
+
#include <string.h> // for memcpy
-
-// Define a common assume_aligned using an appropriate compiler built-in, if
-// it's available. Note that we need to handle C or C++ compilation.
-#ifdef __cplusplus
-# ifdef HAVE_CXX_BUILTIN_ASSUME_ALIGNED
-# define assume_aligned(x, y) __builtin_assume_aligned((x), (y))
-# endif
-#else
-# ifdef HAVE_CC_BUILTIN_ASSUME_ALIGNED
-# define assume_aligned(x, y) __builtin_assume_aligned((x), (y))
-# endif
-#endif
-
-// Fallback to identity case.
-#ifndef assume_aligned
-#define assume_aligned(x, y) (x)
-#endif
-
+
+// Define a common assume_aligned using an appropriate compiler built-in, if
+// it's available. Note that we need to handle C or C++ compilation.
+#ifdef __cplusplus
+# ifdef HAVE_CXX_BUILTIN_ASSUME_ALIGNED
+# define assume_aligned(x, y) __builtin_assume_aligned((x), (y))
+# endif
+#else
+# ifdef HAVE_CC_BUILTIN_ASSUME_ALIGNED
+# define assume_aligned(x, y) __builtin_assume_aligned((x), (y))
+# endif
+#endif
+
+// Fallback to identity case.
+#ifndef assume_aligned
+#define assume_aligned(x, y) (x)
+#endif
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -71,58 +71,58 @@ extern const char vbs_mask_data[];
}
#endif
-static really_inline m128 ones128(void) {
+static really_inline m128 ones128(void) {
#if defined(__GNUC__) || defined(__INTEL_COMPILER)
/* gcc gets this right */
return _mm_set1_epi8(0xFF);
-#else
+#else
/* trick from Intel's optimization guide to generate all-ones.
* ICC converts this to the single cmpeq instruction */
- return _mm_cmpeq_epi8(_mm_setzero_si128(), _mm_setzero_si128());
-#endif
-}
-
-static really_inline m128 zeroes128(void) {
- return _mm_setzero_si128();
-}
-
-/** \brief Bitwise not for m128*/
-static really_inline m128 not128(m128 a) {
- return _mm_xor_si128(a, ones128());
-}
-
-/** \brief Return 1 if a and b are different otherwise 0 */
-static really_inline int diff128(m128 a, m128 b) {
- return (_mm_movemask_epi8(_mm_cmpeq_epi8(a, b)) ^ 0xffff);
-}
-
-static really_inline int isnonzero128(m128 a) {
- return !!diff128(a, zeroes128());
-}
-
-/**
- * "Rich" version of diff128(). Takes two vectors a and b and returns a 4-bit
- * mask indicating which 32-bit words contain differences.
- */
-static really_inline u32 diffrich128(m128 a, m128 b) {
- a = _mm_cmpeq_epi32(a, b);
- return ~(_mm_movemask_ps(_mm_castsi128_ps(a))) & 0xf;
-}
-
-/**
- * "Rich" version of diff128(), 64-bit variant. Takes two vectors a and b and
- * returns a 4-bit mask indicating which 64-bit words contain differences.
- */
-static really_inline u32 diffrich64_128(m128 a, m128 b) {
+ return _mm_cmpeq_epi8(_mm_setzero_si128(), _mm_setzero_si128());
+#endif
+}
+
+static really_inline m128 zeroes128(void) {
+ return _mm_setzero_si128();
+}
+
+/** \brief Bitwise not for m128*/
+static really_inline m128 not128(m128 a) {
+ return _mm_xor_si128(a, ones128());
+}
+
+/** \brief Return 1 if a and b are different otherwise 0 */
+static really_inline int diff128(m128 a, m128 b) {
+ return (_mm_movemask_epi8(_mm_cmpeq_epi8(a, b)) ^ 0xffff);
+}
+
+static really_inline int isnonzero128(m128 a) {
+ return !!diff128(a, zeroes128());
+}
+
+/**
+ * "Rich" version of diff128(). Takes two vectors a and b and returns a 4-bit
+ * mask indicating which 32-bit words contain differences.
+ */
+static really_inline u32 diffrich128(m128 a, m128 b) {
+ a = _mm_cmpeq_epi32(a, b);
+ return ~(_mm_movemask_ps(_mm_castsi128_ps(a))) & 0xf;
+}
+
+/**
+ * "Rich" version of diff128(), 64-bit variant. Takes two vectors a and b and
+ * returns a 4-bit mask indicating which 64-bit words contain differences.
+ */
+static really_inline u32 diffrich64_128(m128 a, m128 b) {
#if defined(HAVE_SSE41)
- a = _mm_cmpeq_epi64(a, b);
- return ~(_mm_movemask_ps(_mm_castsi128_ps(a))) & 0x5;
-#else
- u32 d = diffrich128(a, b);
- return (d | (d >> 1)) & 0x5;
-#endif
-}
-
+ a = _mm_cmpeq_epi64(a, b);
+ return ~(_mm_movemask_ps(_mm_castsi128_ps(a))) & 0x5;
+#else
+ u32 d = diffrich128(a, b);
+ return (d | (d >> 1)) & 0x5;
+#endif
+}
+
static really_really_inline
m128 lshift64_m128(m128 a, unsigned b) {
#if defined(HAVE__BUILTIN_CONSTANT_P)
@@ -132,30 +132,30 @@ m128 lshift64_m128(m128 a, unsigned b) {
#endif
m128 x = _mm_cvtsi32_si128(b);
return _mm_sll_epi64(a, x);
-}
-
+}
+
#define rshift64_m128(a, b) _mm_srli_epi64((a), (b))
-#define eq128(a, b) _mm_cmpeq_epi8((a), (b))
-#define movemask128(a) ((u32)_mm_movemask_epi8((a)))
-
+#define eq128(a, b) _mm_cmpeq_epi8((a), (b))
+#define movemask128(a) ((u32)_mm_movemask_epi8((a)))
+
#if defined(HAVE_AVX512)
static really_inline m128 cast512to128(const m512 in) {
return _mm512_castsi512_si128(in);
}
#endif
-static really_inline m128 set16x8(u8 c) {
- return _mm_set1_epi8(c);
-}
-
+static really_inline m128 set16x8(u8 c) {
+ return _mm_set1_epi8(c);
+}
+
static really_inline m128 set4x32(u32 c) {
return _mm_set1_epi32(c);
}
-static really_inline u32 movd(const m128 in) {
- return _mm_cvtsi128_si32(in);
-}
-
+static really_inline u32 movd(const m128 in) {
+ return _mm_cvtsi128_si32(in);
+}
+
#if defined(HAVE_AVX512)
static really_inline u32 movd512(const m512 in) {
// NOTE: seems gcc doesn't support _mm512_cvtsi512_si32(in),
@@ -170,25 +170,25 @@ static really_inline u64a movq512(const m512 in) {
}
#endif
-static really_inline u64a movq(const m128 in) {
-#if defined(ARCH_X86_64)
- return _mm_cvtsi128_si64(in);
-#else // 32-bit - this is horrific
- u32 lo = movd(in);
- u32 hi = movd(_mm_srli_epi64(in, 32));
- return (u64a)hi << 32 | lo;
-#endif
-}
-
+static really_inline u64a movq(const m128 in) {
+#if defined(ARCH_X86_64)
+ return _mm_cvtsi128_si64(in);
+#else // 32-bit - this is horrific
+ u32 lo = movd(in);
+ u32 hi = movd(_mm_srli_epi64(in, 32));
+ return (u64a)hi << 32 | lo;
+#endif
+}
+
/* another form of movq */
static really_inline
m128 load_m128_from_u64a(const u64a *p) {
return _mm_set_epi64x(0LL, *p);
-}
-
+}
+
#define rshiftbyte_m128(a, count_immed) _mm_srli_si128(a, count_immed)
#define lshiftbyte_m128(a, count_immed) _mm_slli_si128(a, count_immed)
-
+
#if defined(HAVE_SSE41)
#define extract32from128(a, imm) _mm_extract_epi32(a, imm)
#define extract64from128(a, imm) _mm_extract_epi64(a, imm)
@@ -196,33 +196,33 @@ m128 load_m128_from_u64a(const u64a *p) {
#define extract32from128(a, imm) movd(_mm_srli_si128(a, imm << 2))
#define extract64from128(a, imm) movq(_mm_srli_si128(a, imm << 3))
#endif
-
+
#if !defined(HAVE_AVX2)
-// TODO: this entire file needs restructuring - this carveout is awful
-#define extractlow64from256(a) movq(a.lo)
-#define extractlow32from256(a) movd(a.lo)
+// TODO: this entire file needs restructuring - this carveout is awful
+#define extractlow64from256(a) movq(a.lo)
+#define extractlow32from256(a) movd(a.lo)
#if defined(HAVE_SSE41)
-#define extract32from256(a, imm) _mm_extract_epi32((imm >> 2) ? a.hi : a.lo, imm % 4)
+#define extract32from256(a, imm) _mm_extract_epi32((imm >> 2) ? a.hi : a.lo, imm % 4)
#define extract64from256(a, imm) _mm_extract_epi64((imm >> 1) ? a.hi : a.lo, imm % 2)
-#else
+#else
#define extract32from256(a, imm) movd(_mm_srli_si128((imm >> 2) ? a.hi : a.lo, (imm % 4) * 4))
#define extract64from256(a, imm) movq(_mm_srli_si128((imm >> 1) ? a.hi : a.lo, (imm % 2) * 8))
-#endif
-
-#endif // !AVX2
-
-static really_inline m128 and128(m128 a, m128 b) {
- return _mm_and_si128(a,b);
-}
-
-static really_inline m128 xor128(m128 a, m128 b) {
- return _mm_xor_si128(a,b);
-}
-
-static really_inline m128 or128(m128 a, m128 b) {
- return _mm_or_si128(a,b);
-}
-
+#endif
+
+#endif // !AVX2
+
+static really_inline m128 and128(m128 a, m128 b) {
+ return _mm_and_si128(a,b);
+}
+
+static really_inline m128 xor128(m128 a, m128 b) {
+ return _mm_xor_si128(a,b);
+}
+
+static really_inline m128 or128(m128 a, m128 b) {
+ return _mm_or_si128(a,b);
+}
+
#if defined(HAVE_AVX512VBMI)
static really_inline m512 expand128(m128 a) {
return _mm512_broadcast_i32x4(a);
@@ -241,50 +241,50 @@ static really_inline m512 expand384(m384 a) {
}
#endif
-static really_inline m128 andnot128(m128 a, m128 b) {
- return _mm_andnot_si128(a, b);
-}
-
-// aligned load
-static really_inline m128 load128(const void *ptr) {
- assert(ISALIGNED_N(ptr, alignof(m128)));
- ptr = assume_aligned(ptr, 16);
- return _mm_load_si128((const m128 *)ptr);
-}
-
-// aligned store
-static really_inline void store128(void *ptr, m128 a) {
- assert(ISALIGNED_N(ptr, alignof(m128)));
- ptr = assume_aligned(ptr, 16);
- *(m128 *)ptr = a;
-}
-
-// unaligned load
-static really_inline m128 loadu128(const void *ptr) {
- return _mm_loadu_si128((const m128 *)ptr);
-}
-
-// unaligned store
-static really_inline void storeu128(void *ptr, m128 a) {
- _mm_storeu_si128 ((m128 *)ptr, a);
-}
-
-// packed unaligned store of first N bytes
-static really_inline
-void storebytes128(void *ptr, m128 a, unsigned int n) {
- assert(n <= sizeof(a));
- memcpy(ptr, &a, n);
-}
-
-// packed unaligned load of first N bytes, pad with zero
-static really_inline
-m128 loadbytes128(const void *ptr, unsigned int n) {
- m128 a = zeroes128();
- assert(n <= sizeof(a));
- memcpy(&a, ptr, n);
- return a;
-}
-
+static really_inline m128 andnot128(m128 a, m128 b) {
+ return _mm_andnot_si128(a, b);
+}
+
+// aligned load
+static really_inline m128 load128(const void *ptr) {
+ assert(ISALIGNED_N(ptr, alignof(m128)));
+ ptr = assume_aligned(ptr, 16);
+ return _mm_load_si128((const m128 *)ptr);
+}
+
+// aligned store
+static really_inline void store128(void *ptr, m128 a) {
+ assert(ISALIGNED_N(ptr, alignof(m128)));
+ ptr = assume_aligned(ptr, 16);
+ *(m128 *)ptr = a;
+}
+
+// unaligned load
+static really_inline m128 loadu128(const void *ptr) {
+ return _mm_loadu_si128((const m128 *)ptr);
+}
+
+// unaligned store
+static really_inline void storeu128(void *ptr, m128 a) {
+ _mm_storeu_si128 ((m128 *)ptr, a);
+}
+
+// packed unaligned store of first N bytes
+static really_inline
+void storebytes128(void *ptr, m128 a, unsigned int n) {
+ assert(n <= sizeof(a));
+ memcpy(ptr, &a, n);
+}
+
+// packed unaligned load of first N bytes, pad with zero
+static really_inline
+m128 loadbytes128(const void *ptr, unsigned int n) {
+ m128 a = zeroes128();
+ assert(n <= sizeof(a));
+ memcpy(&a, ptr, n);
+ return a;
+}
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -301,18 +301,18 @@ m128 mask1bit128(unsigned int n) {
return loadu128(&simd_onebit_masks[mask_idx]);
}
-// switches on bit N in the given vector.
-static really_inline
-void setbit128(m128 *ptr, unsigned int n) {
+// switches on bit N in the given vector.
+static really_inline
+void setbit128(m128 *ptr, unsigned int n) {
*ptr = or128(mask1bit128(n), *ptr);
-}
-
-// switches off bit N in the given vector.
-static really_inline
-void clearbit128(m128 *ptr, unsigned int n) {
+}
+
+// switches off bit N in the given vector.
+static really_inline
+void clearbit128(m128 *ptr, unsigned int n) {
*ptr = andnot128(mask1bit128(n), *ptr);
}
-
+
// tests bit N in the given vector.
static really_inline
char testbit128(m128 val, unsigned int n) {
@@ -323,7 +323,7 @@ char testbit128(m128 val, unsigned int n) {
return isnonzero128(and128(mask, val));
#endif
}
-
+
// offset must be an immediate
#define palignr(r, l, offset) _mm_alignr_epi8(r, l, offset)
@@ -332,9 +332,9 @@ m128 pshufb_m128(m128 a, m128 b) {
m128 result;
result = _mm_shuffle_epi8(a, b);
return result;
-}
-
-static really_inline
+}
+
+static really_inline
m256 pshufb_m256(m256 a, m256 b) {
#if defined(HAVE_AVX2)
return _mm256_shuffle_epi8(a, b);
@@ -344,8 +344,8 @@ m256 pshufb_m256(m256 a, m256 b) {
rv.hi = pshufb_m128(a.hi, b.hi);
return rv;
#endif
-}
-
+}
+
#if defined(HAVE_AVX512)
static really_inline
m512 pshufb_m512(m512 a, m512 b) {
@@ -396,12 +396,12 @@ m128 set64x2(u64a hi, u64a lo) {
return _mm_set_epi64x(hi, lo);
}
-/****
- **** 256-bit Primitives
- ****/
-
+/****
+ **** 256-bit Primitives
+ ****/
+
#if defined(HAVE_AVX2)
-
+
static really_really_inline
m256 lshift64_m256(m256 a, unsigned b) {
#if defined(HAVE__BUILTIN_CONSTANT_P)
@@ -415,44 +415,44 @@ m256 lshift64_m256(m256 a, unsigned b) {
#define rshift64_m256(a, b) _mm256_srli_epi64((a), (b))
-static really_inline
-m256 set32x8(u32 in) {
+static really_inline
+m256 set32x8(u32 in) {
return _mm256_set1_epi8(in);
-}
-
-#define eq256(a, b) _mm256_cmpeq_epi8((a), (b))
-#define movemask256(a) ((u32)_mm256_movemask_epi8((a)))
-
-static really_inline
-m256 set2x128(m128 a) {
- return _mm256_broadcastsi128_si256(a);
-}
-
-#else
-
+}
+
+#define eq256(a, b) _mm256_cmpeq_epi8((a), (b))
+#define movemask256(a) ((u32)_mm256_movemask_epi8((a)))
+
+static really_inline
+m256 set2x128(m128 a) {
+ return _mm256_broadcastsi128_si256(a);
+}
+
+#else
+
static really_really_inline
m256 lshift64_m256(m256 a, int b) {
- m256 rv = a;
+ m256 rv = a;
rv.lo = lshift64_m128(rv.lo, b);
rv.hi = lshift64_m128(rv.hi, b);
- return rv;
-}
-
-static really_inline
+ return rv;
+}
+
+static really_inline
m256 rshift64_m256(m256 a, int b) {
- m256 rv = a;
+ m256 rv = a;
rv.lo = rshift64_m128(rv.lo, b);
rv.hi = rshift64_m128(rv.hi, b);
- return rv;
-}
-static really_inline
-m256 set32x8(u32 in) {
- m256 rv;
- rv.lo = set16x8((u8) in);
- rv.hi = rv.lo;
- return rv;
-}
-
+ return rv;
+}
+static really_inline
+m256 set32x8(u32 in) {
+ m256 rv;
+ rv.lo = set16x8((u8) in);
+ rv.hi = rv.lo;
+ return rv;
+}
+
static really_inline
m256 eq256(m256 a, m256 b) {
m256 rv;
@@ -473,207 +473,207 @@ m256 set2x128(m128 a) {
m256 rv = {a, a};
return rv;
}
-#endif
-
-static really_inline m256 zeroes256(void) {
+#endif
+
+static really_inline m256 zeroes256(void) {
#if defined(HAVE_AVX2)
- return _mm256_setzero_si256();
-#else
- m256 rv = {zeroes128(), zeroes128()};
- return rv;
-#endif
-}
-
-static really_inline m256 ones256(void) {
+ return _mm256_setzero_si256();
+#else
+ m256 rv = {zeroes128(), zeroes128()};
+ return rv;
+#endif
+}
+
+static really_inline m256 ones256(void) {
#if defined(HAVE_AVX2)
m256 rv = _mm256_set1_epi8(0xFF);
-#else
- m256 rv = {ones128(), ones128()};
-#endif
- return rv;
-}
-
+#else
+ m256 rv = {ones128(), ones128()};
+#endif
+ return rv;
+}
+
#if defined(HAVE_AVX2)
-static really_inline m256 and256(m256 a, m256 b) {
- return _mm256_and_si256(a, b);
-}
-#else
-static really_inline m256 and256(m256 a, m256 b) {
- m256 rv;
- rv.lo = and128(a.lo, b.lo);
- rv.hi = and128(a.hi, b.hi);
- return rv;
-}
-#endif
-
+static really_inline m256 and256(m256 a, m256 b) {
+ return _mm256_and_si256(a, b);
+}
+#else
+static really_inline m256 and256(m256 a, m256 b) {
+ m256 rv;
+ rv.lo = and128(a.lo, b.lo);
+ rv.hi = and128(a.hi, b.hi);
+ return rv;
+}
+#endif
+
#if defined(HAVE_AVX2)
-static really_inline m256 or256(m256 a, m256 b) {
- return _mm256_or_si256(a, b);
-}
-#else
-static really_inline m256 or256(m256 a, m256 b) {
- m256 rv;
- rv.lo = or128(a.lo, b.lo);
- rv.hi = or128(a.hi, b.hi);
- return rv;
-}
-#endif
-
+static really_inline m256 or256(m256 a, m256 b) {
+ return _mm256_or_si256(a, b);
+}
+#else
+static really_inline m256 or256(m256 a, m256 b) {
+ m256 rv;
+ rv.lo = or128(a.lo, b.lo);
+ rv.hi = or128(a.hi, b.hi);
+ return rv;
+}
+#endif
+
#if defined(HAVE_AVX2)
-static really_inline m256 xor256(m256 a, m256 b) {
- return _mm256_xor_si256(a, b);
-}
-#else
-static really_inline m256 xor256(m256 a, m256 b) {
- m256 rv;
- rv.lo = xor128(a.lo, b.lo);
- rv.hi = xor128(a.hi, b.hi);
- return rv;
-}
-#endif
-
+static really_inline m256 xor256(m256 a, m256 b) {
+ return _mm256_xor_si256(a, b);
+}
+#else
+static really_inline m256 xor256(m256 a, m256 b) {
+ m256 rv;
+ rv.lo = xor128(a.lo, b.lo);
+ rv.hi = xor128(a.hi, b.hi);
+ return rv;
+}
+#endif
+
#if defined(HAVE_AVX2)
-static really_inline m256 not256(m256 a) {
- return _mm256_xor_si256(a, ones256());
-}
-#else
-static really_inline m256 not256(m256 a) {
- m256 rv;
- rv.lo = not128(a.lo);
- rv.hi = not128(a.hi);
- return rv;
-}
-#endif
-
+static really_inline m256 not256(m256 a) {
+ return _mm256_xor_si256(a, ones256());
+}
+#else
+static really_inline m256 not256(m256 a) {
+ m256 rv;
+ rv.lo = not128(a.lo);
+ rv.hi = not128(a.hi);
+ return rv;
+}
+#endif
+
#if defined(HAVE_AVX2)
-static really_inline m256 andnot256(m256 a, m256 b) {
- return _mm256_andnot_si256(a, b);
-}
-#else
-static really_inline m256 andnot256(m256 a, m256 b) {
- m256 rv;
- rv.lo = andnot128(a.lo, b.lo);
- rv.hi = andnot128(a.hi, b.hi);
- return rv;
-}
-#endif
-
-static really_inline int diff256(m256 a, m256 b) {
+static really_inline m256 andnot256(m256 a, m256 b) {
+ return _mm256_andnot_si256(a, b);
+}
+#else
+static really_inline m256 andnot256(m256 a, m256 b) {
+ m256 rv;
+ rv.lo = andnot128(a.lo, b.lo);
+ rv.hi = andnot128(a.hi, b.hi);
+ return rv;
+}
+#endif
+
+static really_inline int diff256(m256 a, m256 b) {
#if defined(HAVE_AVX2)
- return !!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(a, b)) ^ (int)-1);
-#else
- return diff128(a.lo, b.lo) || diff128(a.hi, b.hi);
-#endif
-}
-
-static really_inline int isnonzero256(m256 a) {
+ return !!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(a, b)) ^ (int)-1);
+#else
+ return diff128(a.lo, b.lo) || diff128(a.hi, b.hi);
+#endif
+}
+
+static really_inline int isnonzero256(m256 a) {
#if defined(HAVE_AVX2)
- return !!diff256(a, zeroes256());
-#else
- return isnonzero128(or128(a.lo, a.hi));
-#endif
-}
-
-/**
- * "Rich" version of diff256(). Takes two vectors a and b and returns an 8-bit
- * mask indicating which 32-bit words contain differences.
- */
-static really_inline u32 diffrich256(m256 a, m256 b) {
+ return !!diff256(a, zeroes256());
+#else
+ return isnonzero128(or128(a.lo, a.hi));
+#endif
+}
+
+/**
+ * "Rich" version of diff256(). Takes two vectors a and b and returns an 8-bit
+ * mask indicating which 32-bit words contain differences.
+ */
+static really_inline u32 diffrich256(m256 a, m256 b) {
#if defined(HAVE_AVX2)
- a = _mm256_cmpeq_epi32(a, b);
- return ~(_mm256_movemask_ps(_mm256_castsi256_ps(a))) & 0xFF;
-#else
- m128 z = zeroes128();
- a.lo = _mm_cmpeq_epi32(a.lo, b.lo);
- a.hi = _mm_cmpeq_epi32(a.hi, b.hi);
- m128 packed = _mm_packs_epi16(_mm_packs_epi32(a.lo, a.hi), z);
- return ~(_mm_movemask_epi8(packed)) & 0xff;
-#endif
-}
-
-/**
- * "Rich" version of diff256(), 64-bit variant. Takes two vectors a and b and
- * returns an 8-bit mask indicating which 64-bit words contain differences.
- */
-static really_inline u32 diffrich64_256(m256 a, m256 b) {
- u32 d = diffrich256(a, b);
- return (d | (d >> 1)) & 0x55555555;
-}
-
-// aligned load
-static really_inline m256 load256(const void *ptr) {
- assert(ISALIGNED_N(ptr, alignof(m256)));
+ a = _mm256_cmpeq_epi32(a, b);
+ return ~(_mm256_movemask_ps(_mm256_castsi256_ps(a))) & 0xFF;
+#else
+ m128 z = zeroes128();
+ a.lo = _mm_cmpeq_epi32(a.lo, b.lo);
+ a.hi = _mm_cmpeq_epi32(a.hi, b.hi);
+ m128 packed = _mm_packs_epi16(_mm_packs_epi32(a.lo, a.hi), z);
+ return ~(_mm_movemask_epi8(packed)) & 0xff;
+#endif
+}
+
+/**
+ * "Rich" version of diff256(), 64-bit variant. Takes two vectors a and b and
+ * returns an 8-bit mask indicating which 64-bit words contain differences.
+ */
+static really_inline u32 diffrich64_256(m256 a, m256 b) {
+ u32 d = diffrich256(a, b);
+ return (d | (d >> 1)) & 0x55555555;
+}
+
+// aligned load
+static really_inline m256 load256(const void *ptr) {
+ assert(ISALIGNED_N(ptr, alignof(m256)));
#if defined(HAVE_AVX2)
- return _mm256_load_si256((const m256 *)ptr);
-#else
- m256 rv = { load128(ptr), load128((const char *)ptr + 16) };
- return rv;
-#endif
-}
-
-// aligned load of 128-bit value to low and high part of 256-bit value
-static really_inline m256 load2x128(const void *ptr) {
+ return _mm256_load_si256((const m256 *)ptr);
+#else
+ m256 rv = { load128(ptr), load128((const char *)ptr + 16) };
+ return rv;
+#endif
+}
+
+// aligned load of 128-bit value to low and high part of 256-bit value
+static really_inline m256 load2x128(const void *ptr) {
#if defined(HAVE_AVX2)
- return set2x128(load128(ptr));
-#else
- assert(ISALIGNED_N(ptr, alignof(m128)));
- m256 rv;
- rv.hi = rv.lo = load128(ptr);
- return rv;
-#endif
-}
-
+ return set2x128(load128(ptr));
+#else
+ assert(ISALIGNED_N(ptr, alignof(m128)));
+ m256 rv;
+ rv.hi = rv.lo = load128(ptr);
+ return rv;
+#endif
+}
+
static really_inline m256 loadu2x128(const void *ptr) {
return set2x128(loadu128(ptr));
}
-// aligned store
-static really_inline void store256(void *ptr, m256 a) {
- assert(ISALIGNED_N(ptr, alignof(m256)));
+// aligned store
+static really_inline void store256(void *ptr, m256 a) {
+ assert(ISALIGNED_N(ptr, alignof(m256)));
#if defined(HAVE_AVX2)
- _mm256_store_si256((m256 *)ptr, a);
-#else
- ptr = assume_aligned(ptr, 16);
- *(m256 *)ptr = a;
-#endif
-}
-
-// unaligned load
-static really_inline m256 loadu256(const void *ptr) {
+ _mm256_store_si256((m256 *)ptr, a);
+#else
+ ptr = assume_aligned(ptr, 16);
+ *(m256 *)ptr = a;
+#endif
+}
+
+// unaligned load
+static really_inline m256 loadu256(const void *ptr) {
#if defined(HAVE_AVX2)
- return _mm256_loadu_si256((const m256 *)ptr);
-#else
- m256 rv = { loadu128(ptr), loadu128((const char *)ptr + 16) };
- return rv;
-#endif
-}
-
+ return _mm256_loadu_si256((const m256 *)ptr);
+#else
+ m256 rv = { loadu128(ptr), loadu128((const char *)ptr + 16) };
+ return rv;
+#endif
+}
+
// unaligned store
static really_inline void storeu256(void *ptr, m256 a) {
#if defined(HAVE_AVX2)
_mm256_storeu_si256((m256 *)ptr, a);
-#else
+#else
storeu128(ptr, a.lo);
storeu128((char *)ptr + 16, a.hi);
-#endif
-}
-
-// packed unaligned store of first N bytes
-static really_inline
-void storebytes256(void *ptr, m256 a, unsigned int n) {
- assert(n <= sizeof(a));
- memcpy(ptr, &a, n);
-}
-
-// packed unaligned load of first N bytes, pad with zero
-static really_inline
-m256 loadbytes256(const void *ptr, unsigned int n) {
- m256 a = zeroes256();
- assert(n <= sizeof(a));
- memcpy(&a, ptr, n);
- return a;
-}
-
+#endif
+}
+
+// packed unaligned store of first N bytes
+static really_inline
+void storebytes256(void *ptr, m256 a, unsigned int n) {
+ assert(n <= sizeof(a));
+ memcpy(ptr, &a, n);
+}
+
+// packed unaligned load of first N bytes, pad with zero
+static really_inline
+m256 loadbytes256(const void *ptr, unsigned int n) {
+ m256 a = zeroes256();
+ assert(n <= sizeof(a));
+ memcpy(&a, ptr, n);
+ return a;
+}
+
static really_inline
m256 mask1bit256(unsigned int n) {
assert(n < sizeof(m256) * 8);
@@ -695,48 +695,48 @@ m256 set64x4(u64a hi_1, u64a hi_0, u64a lo_1, u64a lo_0) {
}
#if !defined(HAVE_AVX2)
-// switches on bit N in the given vector.
-static really_inline
-void setbit256(m256 *ptr, unsigned int n) {
- assert(n < sizeof(*ptr) * 8);
- m128 *sub;
- if (n < 128) {
- sub = &ptr->lo;
- } else {
- sub = &ptr->hi;
- n -= 128;
- }
- setbit128(sub, n);
-}
-
-// switches off bit N in the given vector.
-static really_inline
-void clearbit256(m256 *ptr, unsigned int n) {
- assert(n < sizeof(*ptr) * 8);
- m128 *sub;
- if (n < 128) {
- sub = &ptr->lo;
- } else {
- sub = &ptr->hi;
- n -= 128;
- }
- clearbit128(sub, n);
-}
-
-// tests bit N in the given vector.
-static really_inline
+// switches on bit N in the given vector.
+static really_inline
+void setbit256(m256 *ptr, unsigned int n) {
+ assert(n < sizeof(*ptr) * 8);
+ m128 *sub;
+ if (n < 128) {
+ sub = &ptr->lo;
+ } else {
+ sub = &ptr->hi;
+ n -= 128;
+ }
+ setbit128(sub, n);
+}
+
+// switches off bit N in the given vector.
+static really_inline
+void clearbit256(m256 *ptr, unsigned int n) {
+ assert(n < sizeof(*ptr) * 8);
+ m128 *sub;
+ if (n < 128) {
+ sub = &ptr->lo;
+ } else {
+ sub = &ptr->hi;
+ n -= 128;
+ }
+ clearbit128(sub, n);
+}
+
+// tests bit N in the given vector.
+static really_inline
char testbit256(m256 val, unsigned int n) {
assert(n < sizeof(val) * 8);
m128 sub;
- if (n < 128) {
+ if (n < 128) {
sub = val.lo;
- } else {
+ } else {
sub = val.hi;
- n -= 128;
- }
- return testbit128(sub, n);
-}
-
+ n -= 128;
+ }
+ return testbit128(sub, n);
+}
+
static really_really_inline
m128 movdq_hi(m256 x) {
return x.hi;
@@ -753,50 +753,50 @@ m256 combine2x128(m128 hi, m128 lo) {
return rv;
}
-#else // AVX2
-
-// switches on bit N in the given vector.
-static really_inline
-void setbit256(m256 *ptr, unsigned int n) {
+#else // AVX2
+
+// switches on bit N in the given vector.
+static really_inline
+void setbit256(m256 *ptr, unsigned int n) {
*ptr = or256(mask1bit256(n), *ptr);
-}
-
-static really_inline
-void clearbit256(m256 *ptr, unsigned int n) {
+}
+
+static really_inline
+void clearbit256(m256 *ptr, unsigned int n) {
*ptr = andnot256(mask1bit256(n), *ptr);
-}
-
-// tests bit N in the given vector.
-static really_inline
+}
+
+// tests bit N in the given vector.
+static really_inline
char testbit256(m256 val, unsigned int n) {
const m256 mask = mask1bit256(n);
return !_mm256_testz_si256(mask, val);
-}
-
-static really_really_inline
-m128 movdq_hi(m256 x) {
- return _mm256_extracti128_si256(x, 1);
-}
-
-static really_really_inline
-m128 movdq_lo(m256 x) {
- return _mm256_extracti128_si256(x, 0);
-}
-
-#define cast256to128(a) _mm256_castsi256_si128(a)
-#define cast128to256(a) _mm256_castsi128_si256(a)
-#define swap128in256(a) _mm256_permute4x64_epi64(a, 0x4E)
-#define insert128to256(a, b, imm) _mm256_inserti128_si256(a, b, imm)
+}
+
+static really_really_inline
+m128 movdq_hi(m256 x) {
+ return _mm256_extracti128_si256(x, 1);
+}
+
+static really_really_inline
+m128 movdq_lo(m256 x) {
+ return _mm256_extracti128_si256(x, 0);
+}
+
+#define cast256to128(a) _mm256_castsi256_si128(a)
+#define cast128to256(a) _mm256_castsi128_si256(a)
+#define swap128in256(a) _mm256_permute4x64_epi64(a, 0x4E)
+#define insert128to256(a, b, imm) _mm256_inserti128_si256(a, b, imm)
#define rshift128_m256(a, count_immed) _mm256_srli_si256(a, count_immed)
#define lshift128_m256(a, count_immed) _mm256_slli_si256(a, count_immed)
-#define extract64from256(a, imm) _mm_extract_epi64(_mm256_extracti128_si256(a, imm >> 1), imm % 2)
-#define extract32from256(a, imm) _mm_extract_epi32(_mm256_extracti128_si256(a, imm >> 2), imm % 4)
-#define extractlow64from256(a) _mm_cvtsi128_si64(cast256to128(a))
-#define extractlow32from256(a) movd(cast256to128(a))
+#define extract64from256(a, imm) _mm_extract_epi64(_mm256_extracti128_si256(a, imm >> 1), imm % 2)
+#define extract32from256(a, imm) _mm_extract_epi32(_mm256_extracti128_si256(a, imm >> 2), imm % 4)
+#define extractlow64from256(a) _mm_cvtsi128_si64(cast256to128(a))
+#define extractlow32from256(a) movd(cast256to128(a))
#define interleave256hi(a, b) _mm256_unpackhi_epi8(a, b)
#define interleave256lo(a, b) _mm256_unpacklo_epi8(a, b)
#define vpalignr(r, l, offset) _mm256_alignr_epi8(r, l, offset)
-
+
static really_inline
m256 combine2x128(m128 hi, m128 lo) {
#if defined(_mm256_set_m128i)
@@ -805,8 +805,8 @@ m256 combine2x128(m128 hi, m128 lo) {
return insert128to256(cast128to256(lo), hi, 1);
#endif
}
-#endif //AVX2
-
+#endif //AVX2
+
#if defined(HAVE_AVX512)
#define extract128from512(a, imm) _mm512_extracti32x4_epi32(a, imm)
#define interleave512hi(a, b) _mm512_unpackhi_epi8(a, b)
@@ -816,185 +816,185 @@ m256 combine2x128(m128 hi, m128 lo) {
#define vpermq512(idx, a) _mm512_permutexvar_epi64(idx, a)
#endif
-/****
- **** 384-bit Primitives
- ****/
-
-static really_inline m384 and384(m384 a, m384 b) {
- m384 rv;
- rv.lo = and128(a.lo, b.lo);
- rv.mid = and128(a.mid, b.mid);
- rv.hi = and128(a.hi, b.hi);
- return rv;
-}
-
-static really_inline m384 or384(m384 a, m384 b) {
- m384 rv;
- rv.lo = or128(a.lo, b.lo);
- rv.mid = or128(a.mid, b.mid);
- rv.hi = or128(a.hi, b.hi);
- return rv;
-}
-
-static really_inline m384 xor384(m384 a, m384 b) {
- m384 rv;
- rv.lo = xor128(a.lo, b.lo);
- rv.mid = xor128(a.mid, b.mid);
- rv.hi = xor128(a.hi, b.hi);
- return rv;
-}
-static really_inline m384 not384(m384 a) {
- m384 rv;
- rv.lo = not128(a.lo);
- rv.mid = not128(a.mid);
- rv.hi = not128(a.hi);
- return rv;
-}
-static really_inline m384 andnot384(m384 a, m384 b) {
- m384 rv;
- rv.lo = andnot128(a.lo, b.lo);
- rv.mid = andnot128(a.mid, b.mid);
- rv.hi = andnot128(a.hi, b.hi);
- return rv;
-}
-
+/****
+ **** 384-bit Primitives
+ ****/
+
+static really_inline m384 and384(m384 a, m384 b) {
+ m384 rv;
+ rv.lo = and128(a.lo, b.lo);
+ rv.mid = and128(a.mid, b.mid);
+ rv.hi = and128(a.hi, b.hi);
+ return rv;
+}
+
+static really_inline m384 or384(m384 a, m384 b) {
+ m384 rv;
+ rv.lo = or128(a.lo, b.lo);
+ rv.mid = or128(a.mid, b.mid);
+ rv.hi = or128(a.hi, b.hi);
+ return rv;
+}
+
+static really_inline m384 xor384(m384 a, m384 b) {
+ m384 rv;
+ rv.lo = xor128(a.lo, b.lo);
+ rv.mid = xor128(a.mid, b.mid);
+ rv.hi = xor128(a.hi, b.hi);
+ return rv;
+}
+static really_inline m384 not384(m384 a) {
+ m384 rv;
+ rv.lo = not128(a.lo);
+ rv.mid = not128(a.mid);
+ rv.hi = not128(a.hi);
+ return rv;
+}
+static really_inline m384 andnot384(m384 a, m384 b) {
+ m384 rv;
+ rv.lo = andnot128(a.lo, b.lo);
+ rv.mid = andnot128(a.mid, b.mid);
+ rv.hi = andnot128(a.hi, b.hi);
+ return rv;
+}
+
static really_really_inline
m384 lshift64_m384(m384 a, unsigned b) {
- m384 rv;
+ m384 rv;
rv.lo = lshift64_m128(a.lo, b);
rv.mid = lshift64_m128(a.mid, b);
rv.hi = lshift64_m128(a.hi, b);
- return rv;
-}
-
-static really_inline m384 zeroes384(void) {
- m384 rv = {zeroes128(), zeroes128(), zeroes128()};
- return rv;
-}
-
-static really_inline m384 ones384(void) {
- m384 rv = {ones128(), ones128(), ones128()};
- return rv;
-}
-
-static really_inline int diff384(m384 a, m384 b) {
- return diff128(a.lo, b.lo) || diff128(a.mid, b.mid) || diff128(a.hi, b.hi);
-}
-
-static really_inline int isnonzero384(m384 a) {
- return isnonzero128(or128(or128(a.lo, a.mid), a.hi));
-}
-
-/**
- * "Rich" version of diff384(). Takes two vectors a and b and returns a 12-bit
- * mask indicating which 32-bit words contain differences.
- */
-static really_inline u32 diffrich384(m384 a, m384 b) {
- m128 z = zeroes128();
- a.lo = _mm_cmpeq_epi32(a.lo, b.lo);
- a.mid = _mm_cmpeq_epi32(a.mid, b.mid);
- a.hi = _mm_cmpeq_epi32(a.hi, b.hi);
- m128 packed = _mm_packs_epi16(_mm_packs_epi32(a.lo, a.mid),
- _mm_packs_epi32(a.hi, z));
- return ~(_mm_movemask_epi8(packed)) & 0xfff;
-}
-
-/**
- * "Rich" version of diff384(), 64-bit variant. Takes two vectors a and b and
- * returns a 12-bit mask indicating which 64-bit words contain differences.
- */
-static really_inline u32 diffrich64_384(m384 a, m384 b) {
- u32 d = diffrich384(a, b);
- return (d | (d >> 1)) & 0x55555555;
-}
-
-// aligned load
-static really_inline m384 load384(const void *ptr) {
- assert(ISALIGNED_16(ptr));
- m384 rv = { load128(ptr), load128((const char *)ptr + 16),
- load128((const char *)ptr + 32) };
- return rv;
-}
-
-// aligned store
-static really_inline void store384(void *ptr, m384 a) {
- assert(ISALIGNED_16(ptr));
- ptr = assume_aligned(ptr, 16);
- *(m384 *)ptr = a;
-}
-
-// unaligned load
-static really_inline m384 loadu384(const void *ptr) {
- m384 rv = { loadu128(ptr), loadu128((const char *)ptr + 16),
- loadu128((const char *)ptr + 32)};
- return rv;
-}
-
-// packed unaligned store of first N bytes
-static really_inline
-void storebytes384(void *ptr, m384 a, unsigned int n) {
- assert(n <= sizeof(a));
- memcpy(ptr, &a, n);
-}
-
-// packed unaligned load of first N bytes, pad with zero
-static really_inline
-m384 loadbytes384(const void *ptr, unsigned int n) {
- m384 a = zeroes384();
- assert(n <= sizeof(a));
- memcpy(&a, ptr, n);
- return a;
-}
-
-// switches on bit N in the given vector.
-static really_inline
-void setbit384(m384 *ptr, unsigned int n) {
- assert(n < sizeof(*ptr) * 8);
- m128 *sub;
- if (n < 128) {
- sub = &ptr->lo;
- } else if (n < 256) {
- sub = &ptr->mid;
- } else {
- sub = &ptr->hi;
- }
- setbit128(sub, n % 128);
-}
-
-// switches off bit N in the given vector.
-static really_inline
-void clearbit384(m384 *ptr, unsigned int n) {
- assert(n < sizeof(*ptr) * 8);
- m128 *sub;
- if (n < 128) {
- sub = &ptr->lo;
- } else if (n < 256) {
- sub = &ptr->mid;
- } else {
- sub = &ptr->hi;
- }
- clearbit128(sub, n % 128);
-}
-
-// tests bit N in the given vector.
-static really_inline
+ return rv;
+}
+
+static really_inline m384 zeroes384(void) {
+ m384 rv = {zeroes128(), zeroes128(), zeroes128()};
+ return rv;
+}
+
+static really_inline m384 ones384(void) {
+ m384 rv = {ones128(), ones128(), ones128()};
+ return rv;
+}
+
+static really_inline int diff384(m384 a, m384 b) {
+ return diff128(a.lo, b.lo) || diff128(a.mid, b.mid) || diff128(a.hi, b.hi);
+}
+
+static really_inline int isnonzero384(m384 a) {
+ return isnonzero128(or128(or128(a.lo, a.mid), a.hi));
+}
+
+/**
+ * "Rich" version of diff384(). Takes two vectors a and b and returns a 12-bit
+ * mask indicating which 32-bit words contain differences.
+ */
+static really_inline u32 diffrich384(m384 a, m384 b) {
+ m128 z = zeroes128();
+ a.lo = _mm_cmpeq_epi32(a.lo, b.lo);
+ a.mid = _mm_cmpeq_epi32(a.mid, b.mid);
+ a.hi = _mm_cmpeq_epi32(a.hi, b.hi);
+ m128 packed = _mm_packs_epi16(_mm_packs_epi32(a.lo, a.mid),
+ _mm_packs_epi32(a.hi, z));
+ return ~(_mm_movemask_epi8(packed)) & 0xfff;
+}
+
+/**
+ * "Rich" version of diff384(), 64-bit variant. Takes two vectors a and b and
+ * returns a 12-bit mask indicating which 64-bit words contain differences.
+ */
+static really_inline u32 diffrich64_384(m384 a, m384 b) {
+ u32 d = diffrich384(a, b);
+ return (d | (d >> 1)) & 0x55555555;
+}
+
+// aligned load
+static really_inline m384 load384(const void *ptr) {
+ assert(ISALIGNED_16(ptr));
+ m384 rv = { load128(ptr), load128((const char *)ptr + 16),
+ load128((const char *)ptr + 32) };
+ return rv;
+}
+
+// aligned store
+static really_inline void store384(void *ptr, m384 a) {
+ assert(ISALIGNED_16(ptr));
+ ptr = assume_aligned(ptr, 16);
+ *(m384 *)ptr = a;
+}
+
+// unaligned load
+static really_inline m384 loadu384(const void *ptr) {
+ m384 rv = { loadu128(ptr), loadu128((const char *)ptr + 16),
+ loadu128((const char *)ptr + 32)};
+ return rv;
+}
+
+// packed unaligned store of first N bytes
+static really_inline
+void storebytes384(void *ptr, m384 a, unsigned int n) {
+ assert(n <= sizeof(a));
+ memcpy(ptr, &a, n);
+}
+
+// packed unaligned load of first N bytes, pad with zero
+static really_inline
+m384 loadbytes384(const void *ptr, unsigned int n) {
+ m384 a = zeroes384();
+ assert(n <= sizeof(a));
+ memcpy(&a, ptr, n);
+ return a;
+}
+
+// switches on bit N in the given vector.
+static really_inline
+void setbit384(m384 *ptr, unsigned int n) {
+ assert(n < sizeof(*ptr) * 8);
+ m128 *sub;
+ if (n < 128) {
+ sub = &ptr->lo;
+ } else if (n < 256) {
+ sub = &ptr->mid;
+ } else {
+ sub = &ptr->hi;
+ }
+ setbit128(sub, n % 128);
+}
+
+// switches off bit N in the given vector.
+static really_inline
+void clearbit384(m384 *ptr, unsigned int n) {
+ assert(n < sizeof(*ptr) * 8);
+ m128 *sub;
+ if (n < 128) {
+ sub = &ptr->lo;
+ } else if (n < 256) {
+ sub = &ptr->mid;
+ } else {
+ sub = &ptr->hi;
+ }
+ clearbit128(sub, n % 128);
+}
+
+// tests bit N in the given vector.
+static really_inline
char testbit384(m384 val, unsigned int n) {
assert(n < sizeof(val) * 8);
m128 sub;
- if (n < 128) {
+ if (n < 128) {
sub = val.lo;
- } else if (n < 256) {
+ } else if (n < 256) {
sub = val.mid;
- } else {
+ } else {
sub = val.hi;
- }
- return testbit128(sub, n % 128);
-}
-
-/****
- **** 512-bit Primitives
- ****/
-
+ }
+ return testbit128(sub, n % 128);
+}
+
+/****
+ **** 512-bit Primitives
+ ****/
+
#define eq512mask(a, b) _mm512_cmpeq_epi8_mask((a), (b))
#define masked_eq512mask(k, a, b) _mm512_mask_cmpeq_epi8_mask((k), (a), (b))
@@ -1002,7 +1002,7 @@ static really_inline
m512 zeroes512(void) {
#if defined(HAVE_AVX512)
return _mm512_setzero_si512();
-#else
+#else
m512 rv = {zeroes256(), zeroes256()};
return rv;
#endif
@@ -1079,60 +1079,60 @@ m512 and512(m512 a, m512 b) {
#if defined(HAVE_AVX512)
return _mm512_and_si512(a, b);
#else
- m512 rv;
- rv.lo = and256(a.lo, b.lo);
- rv.hi = and256(a.hi, b.hi);
- return rv;
+ m512 rv;
+ rv.lo = and256(a.lo, b.lo);
+ rv.hi = and256(a.hi, b.hi);
+ return rv;
#endif
-}
-
+}
+
static really_inline
m512 or512(m512 a, m512 b) {
#if defined(HAVE_AVX512)
return _mm512_or_si512(a, b);
-#else
- m512 rv;
- rv.lo = or256(a.lo, b.lo);
- rv.hi = or256(a.hi, b.hi);
- return rv;
+#else
+ m512 rv;
+ rv.lo = or256(a.lo, b.lo);
+ rv.hi = or256(a.hi, b.hi);
+ return rv;
#endif
-}
-
+}
+
static really_inline
m512 xor512(m512 a, m512 b) {
#if defined(HAVE_AVX512)
return _mm512_xor_si512(a, b);
-#else
- m512 rv;
- rv.lo = xor256(a.lo, b.lo);
- rv.hi = xor256(a.hi, b.hi);
- return rv;
+#else
+ m512 rv;
+ rv.lo = xor256(a.lo, b.lo);
+ rv.hi = xor256(a.hi, b.hi);
+ return rv;
#endif
-}
-
+}
+
static really_inline
m512 not512(m512 a) {
#if defined(HAVE_AVX512)
return _mm512_xor_si512(a, ones512());
-#else
- m512 rv;
- rv.lo = not256(a.lo);
- rv.hi = not256(a.hi);
- return rv;
+#else
+ m512 rv;
+ rv.lo = not256(a.lo);
+ rv.hi = not256(a.hi);
+ return rv;
#endif
-}
-
+}
+
static really_inline
m512 andnot512(m512 a, m512 b) {
#if defined(HAVE_AVX512)
return _mm512_andnot_si512(a, b);
-#else
- m512 rv;
- rv.lo = andnot256(a.lo, b.lo);
- rv.hi = andnot256(a.hi, b.hi);
- return rv;
+#else
+ m512 rv;
+ rv.lo = andnot256(a.lo, b.lo);
+ rv.hi = andnot256(a.hi, b.hi);
+ return rv;
#endif
-}
+}
#if defined(HAVE_AVX512)
static really_really_inline
@@ -1141,39 +1141,39 @@ m512 lshift64_m512(m512 a, unsigned b) {
if (__builtin_constant_p(b)) {
return _mm512_slli_epi64(a, b);
}
-#endif
+#endif
m128 x = _mm_cvtsi32_si128(b);
return _mm512_sll_epi64(a, x);
}
-#else
+#else
static really_really_inline
m512 lshift64_m512(m512 a, unsigned b) {
- m512 rv;
+ m512 rv;
rv.lo = lshift64_m256(a.lo, b);
rv.hi = lshift64_m256(a.hi, b);
- return rv;
-}
-#endif
-
+ return rv;
+}
+#endif
+
#if defined(HAVE_AVX512)
#define rshift64_m512(a, b) _mm512_srli_epi64((a), (b))
#define rshift128_m512(a, count_immed) _mm512_bsrli_epi128(a, count_immed)
#define lshift128_m512(a, count_immed) _mm512_bslli_epi128(a, count_immed)
#endif
-
+
#if !defined(_MM_CMPINT_NE)
#define _MM_CMPINT_NE 0x4
#endif
-
+
static really_inline
int diff512(m512 a, m512 b) {
#if defined(HAVE_AVX512)
return !!_mm512_cmp_epi8_mask(a, b, _MM_CMPINT_NE);
#else
- return diff256(a.lo, b.lo) || diff256(a.hi, b.hi);
+ return diff256(a.lo, b.lo) || diff256(a.hi, b.hi);
#endif
-}
-
+}
+
static really_inline
int isnonzero512(m512 a) {
#if defined(HAVE_AVX512)
@@ -1182,83 +1182,83 @@ int isnonzero512(m512 a) {
m256 x = or256(a.lo, a.hi);
return !!diff256(x, zeroes256());
#else
- m128 x = or128(a.lo.lo, a.lo.hi);
- m128 y = or128(a.hi.lo, a.hi.hi);
- return isnonzero128(or128(x, y));
-#endif
-}
-
-/**
- * "Rich" version of diff512(). Takes two vectors a and b and returns a 16-bit
- * mask indicating which 32-bit words contain differences.
- */
+ m128 x = or128(a.lo.lo, a.lo.hi);
+ m128 y = or128(a.hi.lo, a.hi.hi);
+ return isnonzero128(or128(x, y));
+#endif
+}
+
+/**
+ * "Rich" version of diff512(). Takes two vectors a and b and returns a 16-bit
+ * mask indicating which 32-bit words contain differences.
+ */
static really_inline
u32 diffrich512(m512 a, m512 b) {
#if defined(HAVE_AVX512)
return _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_NE);
#elif defined(HAVE_AVX2)
- return diffrich256(a.lo, b.lo) | (diffrich256(a.hi, b.hi) << 8);
-#else
- a.lo.lo = _mm_cmpeq_epi32(a.lo.lo, b.lo.lo);
- a.lo.hi = _mm_cmpeq_epi32(a.lo.hi, b.lo.hi);
- a.hi.lo = _mm_cmpeq_epi32(a.hi.lo, b.hi.lo);
- a.hi.hi = _mm_cmpeq_epi32(a.hi.hi, b.hi.hi);
- m128 packed = _mm_packs_epi16(_mm_packs_epi32(a.lo.lo, a.lo.hi),
- _mm_packs_epi32(a.hi.lo, a.hi.hi));
- return ~(_mm_movemask_epi8(packed)) & 0xffff;
-#endif
-}
-
-/**
- * "Rich" version of diffrich(), 64-bit variant. Takes two vectors a and b and
- * returns a 16-bit mask indicating which 64-bit words contain differences.
- */
+ return diffrich256(a.lo, b.lo) | (diffrich256(a.hi, b.hi) << 8);
+#else
+ a.lo.lo = _mm_cmpeq_epi32(a.lo.lo, b.lo.lo);
+ a.lo.hi = _mm_cmpeq_epi32(a.lo.hi, b.lo.hi);
+ a.hi.lo = _mm_cmpeq_epi32(a.hi.lo, b.hi.lo);
+ a.hi.hi = _mm_cmpeq_epi32(a.hi.hi, b.hi.hi);
+ m128 packed = _mm_packs_epi16(_mm_packs_epi32(a.lo.lo, a.lo.hi),
+ _mm_packs_epi32(a.hi.lo, a.hi.hi));
+ return ~(_mm_movemask_epi8(packed)) & 0xffff;
+#endif
+}
+
+/**
+ * "Rich" version of diffrich(), 64-bit variant. Takes two vectors a and b and
+ * returns a 16-bit mask indicating which 64-bit words contain differences.
+ */
static really_inline
u32 diffrich64_512(m512 a, m512 b) {
//TODO: cmp_epi64?
- u32 d = diffrich512(a, b);
- return (d | (d >> 1)) & 0x55555555;
-}
-
-// aligned load
+ u32 d = diffrich512(a, b);
+ return (d | (d >> 1)) & 0x55555555;
+}
+
+// aligned load
static really_inline
m512 load512(const void *ptr) {
#if defined(HAVE_AVX512)
return _mm512_load_si512(ptr);
#else
assert(ISALIGNED_N(ptr, alignof(m256)));
- m512 rv = { load256(ptr), load256((const char *)ptr + 32) };
- return rv;
+ m512 rv = { load256(ptr), load256((const char *)ptr + 32) };
+ return rv;
#endif
-}
-
-// aligned store
+}
+
+// aligned store
static really_inline
void store512(void *ptr, m512 a) {
assert(ISALIGNED_N(ptr, alignof(m512)));
#if defined(HAVE_AVX512)
return _mm512_store_si512(ptr, a);
#elif defined(HAVE_AVX2)
- m512 *x = (m512 *)ptr;
- store256(&x->lo, a.lo);
- store256(&x->hi, a.hi);
-#else
- ptr = assume_aligned(ptr, 16);
- *(m512 *)ptr = a;
-#endif
-}
-
-// unaligned load
+ m512 *x = (m512 *)ptr;
+ store256(&x->lo, a.lo);
+ store256(&x->hi, a.hi);
+#else
+ ptr = assume_aligned(ptr, 16);
+ *(m512 *)ptr = a;
+#endif
+}
+
+// unaligned load
static really_inline
m512 loadu512(const void *ptr) {
#if defined(HAVE_AVX512)
return _mm512_loadu_si512(ptr);
#else
- m512 rv = { loadu256(ptr), loadu256((const char *)ptr + 32) };
- return rv;
+ m512 rv = { loadu256(ptr), loadu256((const char *)ptr + 32) };
+ return rv;
#endif
-}
-
+}
+
// unaligned store
static really_inline
void storeu512(void *ptr, m512 a) {
@@ -1302,22 +1302,22 @@ m256 loadu_maskz_m256(__mmask32 k, const void *ptr) {
}
#endif
-// packed unaligned store of first N bytes
-static really_inline
-void storebytes512(void *ptr, m512 a, unsigned int n) {
- assert(n <= sizeof(a));
- memcpy(ptr, &a, n);
-}
-
-// packed unaligned load of first N bytes, pad with zero
-static really_inline
-m512 loadbytes512(const void *ptr, unsigned int n) {
- m512 a = zeroes512();
- assert(n <= sizeof(a));
- memcpy(&a, ptr, n);
- return a;
-}
-
+// packed unaligned store of first N bytes
+static really_inline
+void storebytes512(void *ptr, m512 a, unsigned int n) {
+ assert(n <= sizeof(a));
+ memcpy(ptr, &a, n);
+}
+
+// packed unaligned load of first N bytes, pad with zero
+static really_inline
+m512 loadbytes512(const void *ptr, unsigned int n) {
+ m512 a = zeroes512();
+ assert(n <= sizeof(a));
+ memcpy(&a, ptr, n);
+ return a;
+}
+
static really_inline
m512 mask1bit512(unsigned int n) {
assert(n < sizeof(m512) * 8);
@@ -1326,95 +1326,95 @@ m512 mask1bit512(unsigned int n) {
return loadu512(&simd_onebit_masks[mask_idx]);
}
-// switches on bit N in the given vector.
-static really_inline
-void setbit512(m512 *ptr, unsigned int n) {
- assert(n < sizeof(*ptr) * 8);
+// switches on bit N in the given vector.
+static really_inline
+void setbit512(m512 *ptr, unsigned int n) {
+ assert(n < sizeof(*ptr) * 8);
#if !defined(HAVE_AVX2)
- m128 *sub;
- if (n < 128) {
- sub = &ptr->lo.lo;
- } else if (n < 256) {
- sub = &ptr->lo.hi;
- } else if (n < 384) {
- sub = &ptr->hi.lo;
- } else {
- sub = &ptr->hi.hi;
- }
- setbit128(sub, n % 128);
+ m128 *sub;
+ if (n < 128) {
+ sub = &ptr->lo.lo;
+ } else if (n < 256) {
+ sub = &ptr->lo.hi;
+ } else if (n < 384) {
+ sub = &ptr->hi.lo;
+ } else {
+ sub = &ptr->hi.hi;
+ }
+ setbit128(sub, n % 128);
#elif defined(HAVE_AVX512)
*ptr = or512(mask1bit512(n), *ptr);
-#else
- m256 *sub;
- if (n < 256) {
- sub = &ptr->lo;
- } else {
- sub = &ptr->hi;
- n -= 256;
- }
- setbit256(sub, n);
-#endif
-}
-
-// switches off bit N in the given vector.
-static really_inline
-void clearbit512(m512 *ptr, unsigned int n) {
- assert(n < sizeof(*ptr) * 8);
+#else
+ m256 *sub;
+ if (n < 256) {
+ sub = &ptr->lo;
+ } else {
+ sub = &ptr->hi;
+ n -= 256;
+ }
+ setbit256(sub, n);
+#endif
+}
+
+// switches off bit N in the given vector.
+static really_inline
+void clearbit512(m512 *ptr, unsigned int n) {
+ assert(n < sizeof(*ptr) * 8);
#if !defined(HAVE_AVX2)
- m128 *sub;
- if (n < 128) {
- sub = &ptr->lo.lo;
- } else if (n < 256) {
- sub = &ptr->lo.hi;
- } else if (n < 384) {
- sub = &ptr->hi.lo;
- } else {
- sub = &ptr->hi.hi;
- }
- clearbit128(sub, n % 128);
+ m128 *sub;
+ if (n < 128) {
+ sub = &ptr->lo.lo;
+ } else if (n < 256) {
+ sub = &ptr->lo.hi;
+ } else if (n < 384) {
+ sub = &ptr->hi.lo;
+ } else {
+ sub = &ptr->hi.hi;
+ }
+ clearbit128(sub, n % 128);
#elif defined(HAVE_AVX512)
*ptr = andnot512(mask1bit512(n), *ptr);
-#else
- m256 *sub;
- if (n < 256) {
- sub = &ptr->lo;
- } else {
- sub = &ptr->hi;
- n -= 256;
- }
- clearbit256(sub, n);
-#endif
-}
-
-// tests bit N in the given vector.
-static really_inline
+#else
+ m256 *sub;
+ if (n < 256) {
+ sub = &ptr->lo;
+ } else {
+ sub = &ptr->hi;
+ n -= 256;
+ }
+ clearbit256(sub, n);
+#endif
+}
+
+// tests bit N in the given vector.
+static really_inline
char testbit512(m512 val, unsigned int n) {
assert(n < sizeof(val) * 8);
#if !defined(HAVE_AVX2)
m128 sub;
- if (n < 128) {
+ if (n < 128) {
sub = val.lo.lo;
- } else if (n < 256) {
+ } else if (n < 256) {
sub = val.lo.hi;
- } else if (n < 384) {
+ } else if (n < 384) {
sub = val.hi.lo;
- } else {
+ } else {
sub = val.hi.hi;
- }
- return testbit128(sub, n % 128);
+ }
+ return testbit128(sub, n % 128);
#elif defined(HAVE_AVX512)
const m512 mask = mask1bit512(n);
return !!_mm512_test_epi8_mask(mask, val);
-#else
+#else
m256 sub;
- if (n < 256) {
+ if (n < 256) {
sub = val.lo;
- } else {
+ } else {
sub = val.hi;
- n -= 256;
- }
- return testbit256(sub, n);
-#endif
-}
-
-#endif
+ n -= 256;
+ }
+ return testbit256(sub, n);
+#endif
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/state_compress.c b/contrib/libs/hyperscan/src/util/state_compress.c
index 0861aa78ac..7238849e7f 100644
--- a/contrib/libs/hyperscan/src/util/state_compress.c
+++ b/contrib/libs/hyperscan/src/util/state_compress.c
@@ -1,552 +1,552 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Mask-based state compression, used by the NFA.
- */
-#include "config.h"
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Mask-based state compression, used by the NFA.
+ */
+#include "config.h"
+#include "ue2common.h"
#include "arch.h"
-#include "bitutils.h"
-#include "unaligned.h"
-#include "pack_bits.h"
-#include "partial_store.h"
-#include "popcount.h"
-#include "state_compress.h"
-
-#include <string.h>
-
-/*
- * 32-bit store/load.
- */
-
-void storecompressed32(void *ptr, const u32 *x, const u32 *m, u32 bytes) {
- assert(popcount32(*m) <= bytes * 8);
-
- u32 v = compress32(*x, *m);
- partial_store_u32(ptr, v, bytes);
-}
-
-void loadcompressed32(u32 *x, const void *ptr, const u32 *m, u32 bytes) {
- assert(popcount32(*m) <= bytes * 8);
-
- u32 v = partial_load_u32(ptr, bytes);
- *x = expand32(v, *m);
-}
-
-/*
- * 64-bit store/load.
- */
-
-void storecompressed64(void *ptr, const u64a *x, const u64a *m, u32 bytes) {
- assert(popcount64(*m) <= bytes * 8);
-
- u64a v = compress64(*x, *m);
- partial_store_u64a(ptr, v, bytes);
-}
-
-void loadcompressed64(u64a *x, const void *ptr, const u64a *m, u32 bytes) {
- assert(popcount64(*m) <= bytes * 8);
-
- u64a v = partial_load_u64a(ptr, bytes);
- *x = expand64(v, *m);
-}
-
-/*
- * 128-bit store/load.
- */
-
-#if defined(ARCH_32_BIT)
-static really_inline
-void storecompressed128_32bit(void *ptr, m128 xvec, m128 mvec) {
- // First, decompose our vectors into 32-bit chunks.
- u32 x[4];
- memcpy(x, &xvec, sizeof(xvec));
- u32 m[4];
- memcpy(m, &mvec, sizeof(mvec));
-
- // Count the number of bits of compressed state we're writing out per
- // chunk.
- u32 bits[4] = { popcount32(m[0]), popcount32(m[1]),
- popcount32(m[2]), popcount32(m[3]) };
-
- // Compress each 32-bit chunk individually.
- u32 v[4] = { compress32(x[0], m[0]), compress32(x[1], m[1]),
- compress32(x[2], m[2]), compress32(x[3], m[3]) };
-
- // Write packed data out.
- pack_bits_32(ptr, v, bits, 4);
-}
-#endif
-
-#if defined(ARCH_64_BIT)
-static really_inline
-void storecompressed128_64bit(void *ptr, m128 xvec, m128 mvec) {
- // First, decompose our vectors into 64-bit chunks.
- u64a x[2];
- memcpy(x, &xvec, sizeof(xvec));
- u64a m[2];
- memcpy(m, &mvec, sizeof(mvec));
-
- // Count the number of bits of compressed state we're writing out per
- // chunk.
- u32 bits[2] = { popcount64(m[0]), popcount64(m[1]) };
-
- // Compress each 64-bit chunk individually.
- u64a v[2] = { compress64(x[0], m[0]), compress64(x[1], m[1]) };
-
- // Write packed data out.
- pack_bits_64(ptr, v, bits, 2);
-}
-#endif
-
-void storecompressed128(void *ptr, const m128 *x, const m128 *m,
- UNUSED u32 bytes) {
-#if defined(ARCH_64_BIT)
- storecompressed128_64bit(ptr, *x, *m);
-#else
- storecompressed128_32bit(ptr, *x, *m);
-#endif
-}
-
-#if defined(ARCH_32_BIT)
-static really_inline
-m128 loadcompressed128_32bit(const void *ptr, m128 mvec) {
- // First, decompose our vectors into 32-bit chunks.
- u32 m[8];
- memcpy(m, &mvec, sizeof(mvec));
-
- u32 bits[4] = { popcount32(m[0]), popcount32(m[1]),
- popcount32(m[2]), popcount32(m[3]) };
- u32 v[4];
-
- unpack_bits_32(v, (const u8 *)ptr, bits, 4);
-
- u32 x[4] = { expand32(v[0], m[0]), expand32(v[1], m[1]),
- expand32(v[2], m[2]), expand32(v[3], m[3]) };
-
- return _mm_set_epi32(x[3], x[2], x[1], x[0]);
-}
-#endif
-
-#if defined(ARCH_64_BIT)
-static really_inline
-m128 loadcompressed128_64bit(const void *ptr, m128 mvec) {
- // First, decompose our vectors into 64-bit chunks.
- u64a m[2] = { movq(mvec), movq(_mm_srli_si128(mvec, 8)) };
-
- u32 bits[2] = { popcount64(m[0]), popcount64(m[1]) };
- u64a v[2];
-
- unpack_bits_64(v, (const u8 *)ptr, bits, 2);
-
- u64a x[2] = { expand64(v[0], m[0]), expand64(v[1], m[1]) };
-
- return _mm_set_epi64x(x[1], x[0]);
-}
-#endif
-
-void loadcompressed128(m128 *x, const void *ptr, const m128 *m,
- UNUSED u32 bytes) {
-#if defined(ARCH_64_BIT)
- *x = loadcompressed128_64bit(ptr, *m);
-#else
- *x = loadcompressed128_32bit(ptr, *m);
-#endif
-}
-
-/*
- * 256-bit store/load.
- */
-
-#if defined(ARCH_32_BIT)
-static really_inline
-void storecompressed256_32bit(void *ptr, m256 xvec, m256 mvec) {
- // First, decompose our vectors into 32-bit chunks.
- u32 x[8];
- memcpy(x, &xvec, sizeof(xvec));
- u32 m[8];
- memcpy(m, &mvec, sizeof(mvec));
-
- // Count the number of bits of compressed state we're writing out per
- // chunk.
- u32 bits[8] = { popcount32(m[0]), popcount32(m[1]),
- popcount32(m[2]), popcount32(m[3]),
- popcount32(m[4]), popcount32(m[5]),
- popcount32(m[6]), popcount32(m[7])};
-
- // Compress each 32-bit chunk individually.
- u32 v[8] = { compress32(x[0], m[0]), compress32(x[1], m[1]),
- compress32(x[2], m[2]), compress32(x[3], m[3]),
- compress32(x[4], m[4]), compress32(x[5], m[5]),
- compress32(x[6], m[6]), compress32(x[7], m[7]) };
-
- // Write packed data out.
- pack_bits_32(ptr, v, bits, 8);
-}
-#endif
-
-#if defined(ARCH_64_BIT)
-static really_really_inline
-void storecompressed256_64bit(void *ptr, m256 xvec, m256 mvec) {
- // First, decompose our vectors into 64-bit chunks.
- u64a x[4];
- memcpy(x, &xvec, sizeof(xvec));
- u64a m[4];
- memcpy(m, &mvec, sizeof(mvec));
-
- // Count the number of bits of compressed state we're writing out per
- // chunk.
- u32 bits[4] = { popcount64(m[0]), popcount64(m[1]),
- popcount64(m[2]), popcount64(m[3]) };
-
- // Compress each 64-bit chunk individually.
- u64a v[4] = { compress64(x[0], m[0]), compress64(x[1], m[1]),
- compress64(x[2], m[2]), compress64(x[3], m[3]) };
-
- // Write packed data out.
- pack_bits_64(ptr, v, bits, 4);
-}
-#endif
-
-void storecompressed256(void *ptr, const m256 *x, const m256 *m,
- UNUSED u32 bytes) {
-#if defined(ARCH_64_BIT)
- storecompressed256_64bit(ptr, *x, *m);
-#else
- storecompressed256_32bit(ptr, *x, *m);
-#endif
-}
-
-#if defined(ARCH_32_BIT)
-static really_inline
-m256 loadcompressed256_32bit(const void *ptr, m256 mvec) {
- // First, decompose our vectors into 32-bit chunks.
- u32 m[8];
- memcpy(m, &mvec, sizeof(mvec));
-
- u32 bits[8] = { popcount32(m[0]), popcount32(m[1]),
- popcount32(m[2]), popcount32(m[3]),
- popcount32(m[4]), popcount32(m[5]),
- popcount32(m[6]), popcount32(m[7])};
- u32 v[8];
-
- unpack_bits_32(v, (const u8 *)ptr, bits, 8);
-
- u32 x[8] = { expand32(v[0], m[0]), expand32(v[1], m[1]),
- expand32(v[2], m[2]), expand32(v[3], m[3]),
- expand32(v[4], m[4]), expand32(v[5], m[5]),
- expand32(v[6], m[6]), expand32(v[7], m[7]) };
-
+#include "bitutils.h"
+#include "unaligned.h"
+#include "pack_bits.h"
+#include "partial_store.h"
+#include "popcount.h"
+#include "state_compress.h"
+
+#include <string.h>
+
+/*
+ * 32-bit store/load.
+ */
+
+void storecompressed32(void *ptr, const u32 *x, const u32 *m, u32 bytes) {
+ assert(popcount32(*m) <= bytes * 8);
+
+ u32 v = compress32(*x, *m);
+ partial_store_u32(ptr, v, bytes);
+}
+
+void loadcompressed32(u32 *x, const void *ptr, const u32 *m, u32 bytes) {
+ assert(popcount32(*m) <= bytes * 8);
+
+ u32 v = partial_load_u32(ptr, bytes);
+ *x = expand32(v, *m);
+}
+
+/*
+ * 64-bit store/load.
+ */
+
+void storecompressed64(void *ptr, const u64a *x, const u64a *m, u32 bytes) {
+ assert(popcount64(*m) <= bytes * 8);
+
+ u64a v = compress64(*x, *m);
+ partial_store_u64a(ptr, v, bytes);
+}
+
+void loadcompressed64(u64a *x, const void *ptr, const u64a *m, u32 bytes) {
+ assert(popcount64(*m) <= bytes * 8);
+
+ u64a v = partial_load_u64a(ptr, bytes);
+ *x = expand64(v, *m);
+}
+
+/*
+ * 128-bit store/load.
+ */
+
+#if defined(ARCH_32_BIT)
+static really_inline
+void storecompressed128_32bit(void *ptr, m128 xvec, m128 mvec) {
+ // First, decompose our vectors into 32-bit chunks.
+ u32 x[4];
+ memcpy(x, &xvec, sizeof(xvec));
+ u32 m[4];
+ memcpy(m, &mvec, sizeof(mvec));
+
+ // Count the number of bits of compressed state we're writing out per
+ // chunk.
+ u32 bits[4] = { popcount32(m[0]), popcount32(m[1]),
+ popcount32(m[2]), popcount32(m[3]) };
+
+ // Compress each 32-bit chunk individually.
+ u32 v[4] = { compress32(x[0], m[0]), compress32(x[1], m[1]),
+ compress32(x[2], m[2]), compress32(x[3], m[3]) };
+
+ // Write packed data out.
+ pack_bits_32(ptr, v, bits, 4);
+}
+#endif
+
+#if defined(ARCH_64_BIT)
+static really_inline
+void storecompressed128_64bit(void *ptr, m128 xvec, m128 mvec) {
+ // First, decompose our vectors into 64-bit chunks.
+ u64a x[2];
+ memcpy(x, &xvec, sizeof(xvec));
+ u64a m[2];
+ memcpy(m, &mvec, sizeof(mvec));
+
+ // Count the number of bits of compressed state we're writing out per
+ // chunk.
+ u32 bits[2] = { popcount64(m[0]), popcount64(m[1]) };
+
+ // Compress each 64-bit chunk individually.
+ u64a v[2] = { compress64(x[0], m[0]), compress64(x[1], m[1]) };
+
+ // Write packed data out.
+ pack_bits_64(ptr, v, bits, 2);
+}
+#endif
+
+void storecompressed128(void *ptr, const m128 *x, const m128 *m,
+ UNUSED u32 bytes) {
+#if defined(ARCH_64_BIT)
+ storecompressed128_64bit(ptr, *x, *m);
+#else
+ storecompressed128_32bit(ptr, *x, *m);
+#endif
+}
+
+#if defined(ARCH_32_BIT)
+static really_inline
+m128 loadcompressed128_32bit(const void *ptr, m128 mvec) {
+ // First, decompose our vectors into 32-bit chunks.
+ u32 m[8];
+ memcpy(m, &mvec, sizeof(mvec));
+
+ u32 bits[4] = { popcount32(m[0]), popcount32(m[1]),
+ popcount32(m[2]), popcount32(m[3]) };
+ u32 v[4];
+
+ unpack_bits_32(v, (const u8 *)ptr, bits, 4);
+
+ u32 x[4] = { expand32(v[0], m[0]), expand32(v[1], m[1]),
+ expand32(v[2], m[2]), expand32(v[3], m[3]) };
+
+ return _mm_set_epi32(x[3], x[2], x[1], x[0]);
+}
+#endif
+
+#if defined(ARCH_64_BIT)
+static really_inline
+m128 loadcompressed128_64bit(const void *ptr, m128 mvec) {
+ // First, decompose our vectors into 64-bit chunks.
+ u64a m[2] = { movq(mvec), movq(_mm_srli_si128(mvec, 8)) };
+
+ u32 bits[2] = { popcount64(m[0]), popcount64(m[1]) };
+ u64a v[2];
+
+ unpack_bits_64(v, (const u8 *)ptr, bits, 2);
+
+ u64a x[2] = { expand64(v[0], m[0]), expand64(v[1], m[1]) };
+
+ return _mm_set_epi64x(x[1], x[0]);
+}
+#endif
+
+void loadcompressed128(m128 *x, const void *ptr, const m128 *m,
+ UNUSED u32 bytes) {
+#if defined(ARCH_64_BIT)
+ *x = loadcompressed128_64bit(ptr, *m);
+#else
+ *x = loadcompressed128_32bit(ptr, *m);
+#endif
+}
+
+/*
+ * 256-bit store/load.
+ */
+
+#if defined(ARCH_32_BIT)
+static really_inline
+void storecompressed256_32bit(void *ptr, m256 xvec, m256 mvec) {
+ // First, decompose our vectors into 32-bit chunks.
+ u32 x[8];
+ memcpy(x, &xvec, sizeof(xvec));
+ u32 m[8];
+ memcpy(m, &mvec, sizeof(mvec));
+
+ // Count the number of bits of compressed state we're writing out per
+ // chunk.
+ u32 bits[8] = { popcount32(m[0]), popcount32(m[1]),
+ popcount32(m[2]), popcount32(m[3]),
+ popcount32(m[4]), popcount32(m[5]),
+ popcount32(m[6]), popcount32(m[7])};
+
+ // Compress each 32-bit chunk individually.
+ u32 v[8] = { compress32(x[0], m[0]), compress32(x[1], m[1]),
+ compress32(x[2], m[2]), compress32(x[3], m[3]),
+ compress32(x[4], m[4]), compress32(x[5], m[5]),
+ compress32(x[6], m[6]), compress32(x[7], m[7]) };
+
+ // Write packed data out.
+ pack_bits_32(ptr, v, bits, 8);
+}
+#endif
+
+#if defined(ARCH_64_BIT)
+static really_really_inline
+void storecompressed256_64bit(void *ptr, m256 xvec, m256 mvec) {
+ // First, decompose our vectors into 64-bit chunks.
+ u64a x[4];
+ memcpy(x, &xvec, sizeof(xvec));
+ u64a m[4];
+ memcpy(m, &mvec, sizeof(mvec));
+
+ // Count the number of bits of compressed state we're writing out per
+ // chunk.
+ u32 bits[4] = { popcount64(m[0]), popcount64(m[1]),
+ popcount64(m[2]), popcount64(m[3]) };
+
+ // Compress each 64-bit chunk individually.
+ u64a v[4] = { compress64(x[0], m[0]), compress64(x[1], m[1]),
+ compress64(x[2], m[2]), compress64(x[3], m[3]) };
+
+ // Write packed data out.
+ pack_bits_64(ptr, v, bits, 4);
+}
+#endif
+
+void storecompressed256(void *ptr, const m256 *x, const m256 *m,
+ UNUSED u32 bytes) {
+#if defined(ARCH_64_BIT)
+ storecompressed256_64bit(ptr, *x, *m);
+#else
+ storecompressed256_32bit(ptr, *x, *m);
+#endif
+}
+
+#if defined(ARCH_32_BIT)
+static really_inline
+m256 loadcompressed256_32bit(const void *ptr, m256 mvec) {
+ // First, decompose our vectors into 32-bit chunks.
+ u32 m[8];
+ memcpy(m, &mvec, sizeof(mvec));
+
+ u32 bits[8] = { popcount32(m[0]), popcount32(m[1]),
+ popcount32(m[2]), popcount32(m[3]),
+ popcount32(m[4]), popcount32(m[5]),
+ popcount32(m[6]), popcount32(m[7])};
+ u32 v[8];
+
+ unpack_bits_32(v, (const u8 *)ptr, bits, 8);
+
+ u32 x[8] = { expand32(v[0], m[0]), expand32(v[1], m[1]),
+ expand32(v[2], m[2]), expand32(v[3], m[3]),
+ expand32(v[4], m[4]), expand32(v[5], m[5]),
+ expand32(v[6], m[6]), expand32(v[7], m[7]) };
+
#if !defined(HAVE_AVX2)
- m256 xvec = { .lo = _mm_set_epi32(x[3], x[2], x[1], x[0]),
- .hi = _mm_set_epi32(x[7], x[6], x[5], x[4]) };
-#else
- m256 xvec = _mm256_set_epi32(x[7], x[6], x[5], x[4],
- x[3], x[2], x[1], x[0]);
-#endif
- return xvec;
-}
-#endif
-
-#if defined(ARCH_64_BIT)
-static really_inline
-m256 loadcompressed256_64bit(const void *ptr, m256 mvec) {
- // First, decompose our vectors into 64-bit chunks.
- u64a m[4];
- memcpy(m, &mvec, sizeof(mvec));
-
- u32 bits[4] = { popcount64(m[0]), popcount64(m[1]),
- popcount64(m[2]), popcount64(m[3]) };
- u64a v[4];
-
- unpack_bits_64(v, (const u8 *)ptr, bits, 4);
-
- u64a x[4] = { expand64(v[0], m[0]), expand64(v[1], m[1]),
- expand64(v[2], m[2]), expand64(v[3], m[3]) };
-
+ m256 xvec = { .lo = _mm_set_epi32(x[3], x[2], x[1], x[0]),
+ .hi = _mm_set_epi32(x[7], x[6], x[5], x[4]) };
+#else
+ m256 xvec = _mm256_set_epi32(x[7], x[6], x[5], x[4],
+ x[3], x[2], x[1], x[0]);
+#endif
+ return xvec;
+}
+#endif
+
+#if defined(ARCH_64_BIT)
+static really_inline
+m256 loadcompressed256_64bit(const void *ptr, m256 mvec) {
+ // First, decompose our vectors into 64-bit chunks.
+ u64a m[4];
+ memcpy(m, &mvec, sizeof(mvec));
+
+ u32 bits[4] = { popcount64(m[0]), popcount64(m[1]),
+ popcount64(m[2]), popcount64(m[3]) };
+ u64a v[4];
+
+ unpack_bits_64(v, (const u8 *)ptr, bits, 4);
+
+ u64a x[4] = { expand64(v[0], m[0]), expand64(v[1], m[1]),
+ expand64(v[2], m[2]), expand64(v[3], m[3]) };
+
#if !defined(HAVE_AVX2)
- m256 xvec = { .lo = _mm_set_epi64x(x[1], x[0]),
- .hi = _mm_set_epi64x(x[3], x[2]) };
-#else
- m256 xvec = _mm256_set_epi64x(x[3], x[2], x[1], x[0]);
-#endif
- return xvec;
-}
-#endif
-
-void loadcompressed256(m256 *x, const void *ptr, const m256 *m,
- UNUSED u32 bytes) {
-#if defined(ARCH_64_BIT)
- *x = loadcompressed256_64bit(ptr, *m);
-#else
- *x = loadcompressed256_32bit(ptr, *m);
-#endif
-}
-
-/*
- * 384-bit store/load.
- */
-
-#if defined(ARCH_32_BIT)
-static really_inline
-void storecompressed384_32bit(void *ptr, m384 xvec, m384 mvec) {
- // First, decompose our vectors into 32-bit chunks.
- u32 x[12];
- memcpy(x, &xvec, sizeof(xvec));
- u32 m[12];
- memcpy(m, &mvec, sizeof(mvec));
-
- // Count the number of bits of compressed state we're writing out per
- // chunk.
- u32 bits[12] = { popcount32(m[0]), popcount32(m[1]),
- popcount32(m[2]), popcount32(m[3]),
- popcount32(m[4]), popcount32(m[5]),
- popcount32(m[6]), popcount32(m[7]),
- popcount32(m[8]), popcount32(m[9]),
- popcount32(m[10]), popcount32(m[11]) };
-
- // Compress each 32-bit chunk individually.
- u32 v[12] = { compress32(x[0], m[0]), compress32(x[1], m[1]),
- compress32(x[2], m[2]), compress32(x[3], m[3]),
- compress32(x[4], m[4]), compress32(x[5], m[5]),
- compress32(x[6], m[6]), compress32(x[7], m[7]),
- compress32(x[8], m[8]), compress32(x[9], m[9]),
- compress32(x[10], m[10]), compress32(x[11], m[11])};
-
- // Write packed data out.
- pack_bits_32(ptr, v, bits, 12);
-}
-#endif
-
-#if defined(ARCH_64_BIT)
-static really_inline
-void storecompressed384_64bit(void *ptr, m384 xvec, m384 mvec) {
- // First, decompose our vectors into 64-bit chunks.
- u64a x[6];
- memcpy(x, &xvec, sizeof(xvec));
- u64a m[6];
- memcpy(m, &mvec, sizeof(mvec));
-
- // Count the number of bits of compressed state we're writing out per
- // chunk.
- u32 bits[6] = { popcount64(m[0]), popcount64(m[1]),
- popcount64(m[2]), popcount64(m[3]),
- popcount64(m[4]), popcount64(m[5]) };
-
- // Compress each 64-bit chunk individually.
- u64a v[6] = { compress64(x[0], m[0]), compress64(x[1], m[1]),
- compress64(x[2], m[2]), compress64(x[3], m[3]),
- compress64(x[4], m[4]), compress64(x[5], m[5]) };
-
- // Write packed data out.
- pack_bits_64(ptr, v, bits, 6);
-}
-#endif
-
-void storecompressed384(void *ptr, const m384 *x, const m384 *m,
- UNUSED u32 bytes) {
-#if defined(ARCH_64_BIT)
- storecompressed384_64bit(ptr, *x, *m);
-#else
- storecompressed384_32bit(ptr, *x, *m);
-#endif
-}
-
-#if defined(ARCH_32_BIT)
-static really_inline
-m384 loadcompressed384_32bit(const void *ptr, m384 mvec) {
- // First, decompose our vectors into 32-bit chunks.
- u32 m[12];
- memcpy(m, &mvec, sizeof(mvec));
-
- u32 bits[12] = { popcount32(m[0]), popcount32(m[1]),
- popcount32(m[2]), popcount32(m[3]),
- popcount32(m[4]), popcount32(m[5]),
- popcount32(m[6]), popcount32(m[7]),
- popcount32(m[8]), popcount32(m[9]),
- popcount32(m[10]), popcount32(m[11]) };
- u32 v[12];
-
- unpack_bits_32(v, (const u8 *)ptr, bits, 12);
-
- u32 x[12] = { expand32(v[0], m[0]), expand32(v[1], m[1]),
- expand32(v[2], m[2]), expand32(v[3], m[3]),
- expand32(v[4], m[4]), expand32(v[5], m[5]),
- expand32(v[6], m[6]), expand32(v[7], m[7]),
- expand32(v[8], m[8]), expand32(v[9], m[9]),
- expand32(v[10], m[10]), expand32(v[11], m[11]) };
-
- m384 xvec = { .lo = _mm_set_epi32(x[3], x[2], x[1], x[0]),
- .mid = _mm_set_epi32(x[7], x[6], x[5], x[4]),
- .hi = _mm_set_epi32(x[11], x[10], x[9], x[8]) };
- return xvec;
-}
-#endif
-
-#if defined(ARCH_64_BIT)
-static really_inline
-m384 loadcompressed384_64bit(const void *ptr, m384 mvec) {
- // First, decompose our vectors into 64-bit chunks.
- u64a m[6];
- memcpy(m, &mvec, sizeof(mvec));
-
- u32 bits[6] = { popcount64(m[0]), popcount64(m[1]),
- popcount64(m[2]), popcount64(m[3]),
- popcount64(m[4]), popcount64(m[5]) };
- u64a v[6];
-
- unpack_bits_64(v, (const u8 *)ptr, bits, 6);
-
- u64a x[6] = { expand64(v[0], m[0]), expand64(v[1], m[1]),
- expand64(v[2], m[2]), expand64(v[3], m[3]),
- expand64(v[4], m[4]), expand64(v[5], m[5]) };
-
- m384 xvec = { .lo = _mm_set_epi64x(x[1], x[0]),
- .mid = _mm_set_epi64x(x[3], x[2]),
- .hi = _mm_set_epi64x(x[5], x[4]) };
- return xvec;
-}
-#endif
-
-void loadcompressed384(m384 *x, const void *ptr, const m384 *m,
- UNUSED u32 bytes) {
-#if defined(ARCH_64_BIT)
- *x = loadcompressed384_64bit(ptr, *m);
-#else
- *x = loadcompressed384_32bit(ptr, *m);
-#endif
-}
-
-/*
- * 512-bit store/load.
- */
-
-#if defined(ARCH_32_BIT)
-static really_inline
-void storecompressed512_32bit(void *ptr, m512 xvec, m512 mvec) {
- // First, decompose our vectors into 32-bit chunks.
- u32 x[16];
- memcpy(x, &xvec, sizeof(xvec));
- u32 m[16];
- memcpy(m, &mvec, sizeof(mvec));
-
- // Count the number of bits of compressed state we're writing out per
- // chunk.
- u32 bits[16] = { popcount32(m[0]), popcount32(m[1]),
- popcount32(m[2]), popcount32(m[3]),
- popcount32(m[4]), popcount32(m[5]),
- popcount32(m[6]), popcount32(m[7]),
- popcount32(m[8]), popcount32(m[9]),
- popcount32(m[10]), popcount32(m[11]),
- popcount32(m[12]), popcount32(m[13]),
- popcount32(m[14]), popcount32(m[15])};
-
- // Compress each 32-bit chunk individually.
- u32 v[16] = { compress32(x[0], m[0]), compress32(x[1], m[1]),
- compress32(x[2], m[2]), compress32(x[3], m[3]),
- compress32(x[4], m[4]), compress32(x[5], m[5]),
- compress32(x[6], m[6]), compress32(x[7], m[7]),
- compress32(x[8], m[8]), compress32(x[9], m[9]),
- compress32(x[10], m[10]), compress32(x[11], m[11]),
- compress32(x[12], m[12]), compress32(x[13], m[13]),
- compress32(x[14], m[14]), compress32(x[15], m[15]) };
-
- // Write packed data out.
- pack_bits_32(ptr, v, bits, 16);
-}
-#endif
-
-#if defined(ARCH_64_BIT)
-static really_inline
-void storecompressed512_64bit(void *ptr, m512 xvec, m512 mvec) {
- // First, decompose our vectors into 64-bit chunks.
- u64a m[8];
- memcpy(m, &mvec, sizeof(mvec));
- u64a x[8];
- memcpy(x, &xvec, sizeof(xvec));
-
- // Count the number of bits of compressed state we're writing out per
- // chunk.
- u32 bits[8] = { popcount64(m[0]), popcount64(m[1]),
- popcount64(m[2]), popcount64(m[3]),
- popcount64(m[4]), popcount64(m[5]),
- popcount64(m[6]), popcount64(m[7]) };
-
- // Compress each 64-bit chunk individually.
- u64a v[8] = { compress64(x[0], m[0]), compress64(x[1], m[1]),
- compress64(x[2], m[2]), compress64(x[3], m[3]),
- compress64(x[4], m[4]), compress64(x[5], m[5]),
- compress64(x[6], m[6]), compress64(x[7], m[7]) };
-
- // Write packed data out.
- pack_bits_64(ptr, v, bits, 8);
-}
-#endif
-
-void storecompressed512(void *ptr, const m512 *x, const m512 *m,
- UNUSED u32 bytes) {
-#if defined(ARCH_64_BIT)
- storecompressed512_64bit(ptr, *x, *m);
-#else
- storecompressed512_32bit(ptr, *x, *m);
-#endif
-}
-
-#if defined(ARCH_32_BIT)
-static really_inline
-m512 loadcompressed512_32bit(const void *ptr, m512 mvec) {
- // First, decompose our vectors into 32-bit chunks.
- u32 m[16];
- memcpy(m, &mvec, sizeof(mvec));
-
- u32 bits[16] = { popcount32(m[0]), popcount32(m[1]),
- popcount32(m[2]), popcount32(m[3]),
- popcount32(m[4]), popcount32(m[5]),
- popcount32(m[6]), popcount32(m[7]),
- popcount32(m[8]), popcount32(m[9]),
- popcount32(m[10]), popcount32(m[11]),
- popcount32(m[12]), popcount32(m[13]),
- popcount32(m[14]), popcount32(m[15]) };
- u32 v[16];
-
- unpack_bits_32(v, (const u8 *)ptr, bits, 16);
-
- u32 x[16] = { expand32(v[0], m[0]), expand32(v[1], m[1]),
- expand32(v[2], m[2]), expand32(v[3], m[3]),
- expand32(v[4], m[4]), expand32(v[5], m[5]),
- expand32(v[6], m[6]), expand32(v[7], m[7]),
- expand32(v[8], m[8]), expand32(v[9], m[9]),
- expand32(v[10], m[10]), expand32(v[11], m[11]),
- expand32(v[12], m[12]), expand32(v[13], m[13]),
- expand32(v[14], m[14]), expand32(v[15], m[15]) };
-
- m512 xvec;
+ m256 xvec = { .lo = _mm_set_epi64x(x[1], x[0]),
+ .hi = _mm_set_epi64x(x[3], x[2]) };
+#else
+ m256 xvec = _mm256_set_epi64x(x[3], x[2], x[1], x[0]);
+#endif
+ return xvec;
+}
+#endif
+
+void loadcompressed256(m256 *x, const void *ptr, const m256 *m,
+ UNUSED u32 bytes) {
+#if defined(ARCH_64_BIT)
+ *x = loadcompressed256_64bit(ptr, *m);
+#else
+ *x = loadcompressed256_32bit(ptr, *m);
+#endif
+}
+
+/*
+ * 384-bit store/load.
+ */
+
+#if defined(ARCH_32_BIT)
+static really_inline
+void storecompressed384_32bit(void *ptr, m384 xvec, m384 mvec) {
+ // First, decompose our vectors into 32-bit chunks.
+ u32 x[12];
+ memcpy(x, &xvec, sizeof(xvec));
+ u32 m[12];
+ memcpy(m, &mvec, sizeof(mvec));
+
+ // Count the number of bits of compressed state we're writing out per
+ // chunk.
+ u32 bits[12] = { popcount32(m[0]), popcount32(m[1]),
+ popcount32(m[2]), popcount32(m[3]),
+ popcount32(m[4]), popcount32(m[5]),
+ popcount32(m[6]), popcount32(m[7]),
+ popcount32(m[8]), popcount32(m[9]),
+ popcount32(m[10]), popcount32(m[11]) };
+
+ // Compress each 32-bit chunk individually.
+ u32 v[12] = { compress32(x[0], m[0]), compress32(x[1], m[1]),
+ compress32(x[2], m[2]), compress32(x[3], m[3]),
+ compress32(x[4], m[4]), compress32(x[5], m[5]),
+ compress32(x[6], m[6]), compress32(x[7], m[7]),
+ compress32(x[8], m[8]), compress32(x[9], m[9]),
+ compress32(x[10], m[10]), compress32(x[11], m[11])};
+
+ // Write packed data out.
+ pack_bits_32(ptr, v, bits, 12);
+}
+#endif
+
+#if defined(ARCH_64_BIT)
+static really_inline
+void storecompressed384_64bit(void *ptr, m384 xvec, m384 mvec) {
+ // First, decompose our vectors into 64-bit chunks.
+ u64a x[6];
+ memcpy(x, &xvec, sizeof(xvec));
+ u64a m[6];
+ memcpy(m, &mvec, sizeof(mvec));
+
+ // Count the number of bits of compressed state we're writing out per
+ // chunk.
+ u32 bits[6] = { popcount64(m[0]), popcount64(m[1]),
+ popcount64(m[2]), popcount64(m[3]),
+ popcount64(m[4]), popcount64(m[5]) };
+
+ // Compress each 64-bit chunk individually.
+ u64a v[6] = { compress64(x[0], m[0]), compress64(x[1], m[1]),
+ compress64(x[2], m[2]), compress64(x[3], m[3]),
+ compress64(x[4], m[4]), compress64(x[5], m[5]) };
+
+ // Write packed data out.
+ pack_bits_64(ptr, v, bits, 6);
+}
+#endif
+
+void storecompressed384(void *ptr, const m384 *x, const m384 *m,
+ UNUSED u32 bytes) {
+#if defined(ARCH_64_BIT)
+ storecompressed384_64bit(ptr, *x, *m);
+#else
+ storecompressed384_32bit(ptr, *x, *m);
+#endif
+}
+
+#if defined(ARCH_32_BIT)
+static really_inline
+m384 loadcompressed384_32bit(const void *ptr, m384 mvec) {
+ // First, decompose our vectors into 32-bit chunks.
+ u32 m[12];
+ memcpy(m, &mvec, sizeof(mvec));
+
+ u32 bits[12] = { popcount32(m[0]), popcount32(m[1]),
+ popcount32(m[2]), popcount32(m[3]),
+ popcount32(m[4]), popcount32(m[5]),
+ popcount32(m[6]), popcount32(m[7]),
+ popcount32(m[8]), popcount32(m[9]),
+ popcount32(m[10]), popcount32(m[11]) };
+ u32 v[12];
+
+ unpack_bits_32(v, (const u8 *)ptr, bits, 12);
+
+ u32 x[12] = { expand32(v[0], m[0]), expand32(v[1], m[1]),
+ expand32(v[2], m[2]), expand32(v[3], m[3]),
+ expand32(v[4], m[4]), expand32(v[5], m[5]),
+ expand32(v[6], m[6]), expand32(v[7], m[7]),
+ expand32(v[8], m[8]), expand32(v[9], m[9]),
+ expand32(v[10], m[10]), expand32(v[11], m[11]) };
+
+ m384 xvec = { .lo = _mm_set_epi32(x[3], x[2], x[1], x[0]),
+ .mid = _mm_set_epi32(x[7], x[6], x[5], x[4]),
+ .hi = _mm_set_epi32(x[11], x[10], x[9], x[8]) };
+ return xvec;
+}
+#endif
+
+#if defined(ARCH_64_BIT)
+static really_inline
+m384 loadcompressed384_64bit(const void *ptr, m384 mvec) {
+ // First, decompose our vectors into 64-bit chunks.
+ u64a m[6];
+ memcpy(m, &mvec, sizeof(mvec));
+
+ u32 bits[6] = { popcount64(m[0]), popcount64(m[1]),
+ popcount64(m[2]), popcount64(m[3]),
+ popcount64(m[4]), popcount64(m[5]) };
+ u64a v[6];
+
+ unpack_bits_64(v, (const u8 *)ptr, bits, 6);
+
+ u64a x[6] = { expand64(v[0], m[0]), expand64(v[1], m[1]),
+ expand64(v[2], m[2]), expand64(v[3], m[3]),
+ expand64(v[4], m[4]), expand64(v[5], m[5]) };
+
+ m384 xvec = { .lo = _mm_set_epi64x(x[1], x[0]),
+ .mid = _mm_set_epi64x(x[3], x[2]),
+ .hi = _mm_set_epi64x(x[5], x[4]) };
+ return xvec;
+}
+#endif
+
+void loadcompressed384(m384 *x, const void *ptr, const m384 *m,
+ UNUSED u32 bytes) {
+#if defined(ARCH_64_BIT)
+ *x = loadcompressed384_64bit(ptr, *m);
+#else
+ *x = loadcompressed384_32bit(ptr, *m);
+#endif
+}
+
+/*
+ * 512-bit store/load.
+ */
+
+#if defined(ARCH_32_BIT)
+static really_inline
+void storecompressed512_32bit(void *ptr, m512 xvec, m512 mvec) {
+ // First, decompose our vectors into 32-bit chunks.
+ u32 x[16];
+ memcpy(x, &xvec, sizeof(xvec));
+ u32 m[16];
+ memcpy(m, &mvec, sizeof(mvec));
+
+ // Count the number of bits of compressed state we're writing out per
+ // chunk.
+ u32 bits[16] = { popcount32(m[0]), popcount32(m[1]),
+ popcount32(m[2]), popcount32(m[3]),
+ popcount32(m[4]), popcount32(m[5]),
+ popcount32(m[6]), popcount32(m[7]),
+ popcount32(m[8]), popcount32(m[9]),
+ popcount32(m[10]), popcount32(m[11]),
+ popcount32(m[12]), popcount32(m[13]),
+ popcount32(m[14]), popcount32(m[15])};
+
+ // Compress each 32-bit chunk individually.
+ u32 v[16] = { compress32(x[0], m[0]), compress32(x[1], m[1]),
+ compress32(x[2], m[2]), compress32(x[3], m[3]),
+ compress32(x[4], m[4]), compress32(x[5], m[5]),
+ compress32(x[6], m[6]), compress32(x[7], m[7]),
+ compress32(x[8], m[8]), compress32(x[9], m[9]),
+ compress32(x[10], m[10]), compress32(x[11], m[11]),
+ compress32(x[12], m[12]), compress32(x[13], m[13]),
+ compress32(x[14], m[14]), compress32(x[15], m[15]) };
+
+ // Write packed data out.
+ pack_bits_32(ptr, v, bits, 16);
+}
+#endif
+
+#if defined(ARCH_64_BIT)
+static really_inline
+void storecompressed512_64bit(void *ptr, m512 xvec, m512 mvec) {
+ // First, decompose our vectors into 64-bit chunks.
+ u64a m[8];
+ memcpy(m, &mvec, sizeof(mvec));
+ u64a x[8];
+ memcpy(x, &xvec, sizeof(xvec));
+
+ // Count the number of bits of compressed state we're writing out per
+ // chunk.
+ u32 bits[8] = { popcount64(m[0]), popcount64(m[1]),
+ popcount64(m[2]), popcount64(m[3]),
+ popcount64(m[4]), popcount64(m[5]),
+ popcount64(m[6]), popcount64(m[7]) };
+
+ // Compress each 64-bit chunk individually.
+ u64a v[8] = { compress64(x[0], m[0]), compress64(x[1], m[1]),
+ compress64(x[2], m[2]), compress64(x[3], m[3]),
+ compress64(x[4], m[4]), compress64(x[5], m[5]),
+ compress64(x[6], m[6]), compress64(x[7], m[7]) };
+
+ // Write packed data out.
+ pack_bits_64(ptr, v, bits, 8);
+}
+#endif
+
+void storecompressed512(void *ptr, const m512 *x, const m512 *m,
+ UNUSED u32 bytes) {
+#if defined(ARCH_64_BIT)
+ storecompressed512_64bit(ptr, *x, *m);
+#else
+ storecompressed512_32bit(ptr, *x, *m);
+#endif
+}
+
+#if defined(ARCH_32_BIT)
+static really_inline
+m512 loadcompressed512_32bit(const void *ptr, m512 mvec) {
+ // First, decompose our vectors into 32-bit chunks.
+ u32 m[16];
+ memcpy(m, &mvec, sizeof(mvec));
+
+ u32 bits[16] = { popcount32(m[0]), popcount32(m[1]),
+ popcount32(m[2]), popcount32(m[3]),
+ popcount32(m[4]), popcount32(m[5]),
+ popcount32(m[6]), popcount32(m[7]),
+ popcount32(m[8]), popcount32(m[9]),
+ popcount32(m[10]), popcount32(m[11]),
+ popcount32(m[12]), popcount32(m[13]),
+ popcount32(m[14]), popcount32(m[15]) };
+ u32 v[16];
+
+ unpack_bits_32(v, (const u8 *)ptr, bits, 16);
+
+ u32 x[16] = { expand32(v[0], m[0]), expand32(v[1], m[1]),
+ expand32(v[2], m[2]), expand32(v[3], m[3]),
+ expand32(v[4], m[4]), expand32(v[5], m[5]),
+ expand32(v[6], m[6]), expand32(v[7], m[7]),
+ expand32(v[8], m[8]), expand32(v[9], m[9]),
+ expand32(v[10], m[10]), expand32(v[11], m[11]),
+ expand32(v[12], m[12]), expand32(v[13], m[13]),
+ expand32(v[14], m[14]), expand32(v[15], m[15]) };
+
+ m512 xvec;
#if defined(HAVE_AVX512)
xvec = _mm512_set_epi32(x[15], x[14], x[13], x[12],
x[11], x[10], x[9], x[8],
@@ -558,35 +558,35 @@ m512 loadcompressed512_32bit(const void *ptr, m512 mvec) {
xvec.hi = _mm256_set_epi32(x[15], x[14], x[13], x[12],
x[11], x[10], x[9], x[8]);
#else
- xvec.lo.lo = _mm_set_epi32(x[3], x[2], x[1], x[0]);
- xvec.lo.hi = _mm_set_epi32(x[7], x[6], x[5], x[4]);
- xvec.hi.lo = _mm_set_epi32(x[11], x[10], x[9], x[8]);
- xvec.hi.hi = _mm_set_epi32(x[15], x[14], x[13], x[12]);
-#endif
- return xvec;
-}
-#endif
-
-#if defined(ARCH_64_BIT)
-static really_inline
-m512 loadcompressed512_64bit(const void *ptr, m512 mvec) {
- // First, decompose our vectors into 64-bit chunks.
- u64a m[8];
- memcpy(m, &mvec, sizeof(mvec));
-
- u32 bits[8] = { popcount64(m[0]), popcount64(m[1]),
- popcount64(m[2]), popcount64(m[3]),
- popcount64(m[4]), popcount64(m[5]),
- popcount64(m[6]), popcount64(m[7]) };
- u64a v[8];
-
- unpack_bits_64(v, (const u8 *)ptr, bits, 8);
-
- u64a x[8] = { expand64(v[0], m[0]), expand64(v[1], m[1]),
- expand64(v[2], m[2]), expand64(v[3], m[3]),
- expand64(v[4], m[4]), expand64(v[5], m[5]),
- expand64(v[6], m[6]), expand64(v[7], m[7]) };
-
+ xvec.lo.lo = _mm_set_epi32(x[3], x[2], x[1], x[0]);
+ xvec.lo.hi = _mm_set_epi32(x[7], x[6], x[5], x[4]);
+ xvec.hi.lo = _mm_set_epi32(x[11], x[10], x[9], x[8]);
+ xvec.hi.hi = _mm_set_epi32(x[15], x[14], x[13], x[12]);
+#endif
+ return xvec;
+}
+#endif
+
+#if defined(ARCH_64_BIT)
+static really_inline
+m512 loadcompressed512_64bit(const void *ptr, m512 mvec) {
+ // First, decompose our vectors into 64-bit chunks.
+ u64a m[8];
+ memcpy(m, &mvec, sizeof(mvec));
+
+ u32 bits[8] = { popcount64(m[0]), popcount64(m[1]),
+ popcount64(m[2]), popcount64(m[3]),
+ popcount64(m[4]), popcount64(m[5]),
+ popcount64(m[6]), popcount64(m[7]) };
+ u64a v[8];
+
+ unpack_bits_64(v, (const u8 *)ptr, bits, 8);
+
+ u64a x[8] = { expand64(v[0], m[0]), expand64(v[1], m[1]),
+ expand64(v[2], m[2]), expand64(v[3], m[3]),
+ expand64(v[4], m[4]), expand64(v[5], m[5]),
+ expand64(v[6], m[6]), expand64(v[7], m[7]) };
+
#if defined(HAVE_AVX512)
m512 xvec = _mm512_set_epi64(x[7], x[6], x[5], x[4],
x[3], x[2], x[1], x[0]);
@@ -594,20 +594,20 @@ m512 loadcompressed512_64bit(const void *ptr, m512 mvec) {
m512 xvec = { .lo = _mm256_set_epi64x(x[3], x[2], x[1], x[0]),
.hi = _mm256_set_epi64x(x[7], x[6], x[5], x[4])};
#else
- m512 xvec = { .lo = { _mm_set_epi64x(x[1], x[0]),
- _mm_set_epi64x(x[3], x[2]) },
- .hi = { _mm_set_epi64x(x[5], x[4]),
- _mm_set_epi64x(x[7], x[6]) } };
-#endif
- return xvec;
-}
-#endif
-
-void loadcompressed512(m512 *x, const void *ptr, const m512 *m,
- UNUSED u32 bytes) {
-#if defined(ARCH_64_BIT)
- *x = loadcompressed512_64bit(ptr, *m);
-#else
- *x = loadcompressed512_32bit(ptr, *m);
-#endif
-}
+ m512 xvec = { .lo = { _mm_set_epi64x(x[1], x[0]),
+ _mm_set_epi64x(x[3], x[2]) },
+ .hi = { _mm_set_epi64x(x[5], x[4]),
+ _mm_set_epi64x(x[7], x[6]) } };
+#endif
+ return xvec;
+}
+#endif
+
+void loadcompressed512(m512 *x, const void *ptr, const m512 *m,
+ UNUSED u32 bytes) {
+#if defined(ARCH_64_BIT)
+ *x = loadcompressed512_64bit(ptr, *m);
+#else
+ *x = loadcompressed512_32bit(ptr, *m);
+#endif
+}
diff --git a/contrib/libs/hyperscan/src/util/state_compress.h b/contrib/libs/hyperscan/src/util/state_compress.h
index 183f173888..a17d2355cc 100644
--- a/contrib/libs/hyperscan/src/util/state_compress.h
+++ b/contrib/libs/hyperscan/src/util/state_compress.h
@@ -1,68 +1,68 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Mask-based state compression, used by the NFA.
- */
-
-#ifndef STATE_COMPRESS_H
-#define STATE_COMPRESS_H
-
-#include "simd_utils.h"
-#include "ue2common.h"
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-/* Note: bytes is not used by implementations >= 128 */
-
-void storecompressed32(void *ptr, const u32 *x, const u32 *m, u32 bytes);
-void loadcompressed32(u32 *x, const void *ptr, const u32 *m, u32 bytes);
-
-void storecompressed64(void *ptr, const u64a *x, const u64a *m, u32 bytes);
-void loadcompressed64(u64a *x, const void *ptr, const u64a *m, u32 bytes);
-
-void storecompressed128(void *ptr, const m128 *x, const m128 *m, u32 bytes);
-void loadcompressed128(m128 *x, const void *ptr, const m128 *m, u32 bytes);
-
-void storecompressed256(void *ptr, const m256 *x, const m256 *m, u32 bytes);
-void loadcompressed256(m256 *x, const void *ptr, const m256 *m, u32 bytes);
-
-void storecompressed384(void *ptr, const m384 *x, const m384 *m, u32 bytes);
-void loadcompressed384(m384 *x, const void *ptr, const m384 *m, u32 bytes);
-
-void storecompressed512(void *ptr, const m512 *x, const m512 *m, u32 bytes);
-void loadcompressed512(m512 *x, const void *ptr, const m512 *m, u32 bytes);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Mask-based state compression, used by the NFA.
+ */
+
+#ifndef STATE_COMPRESS_H
+#define STATE_COMPRESS_H
+
+#include "simd_utils.h"
+#include "ue2common.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/* Note: bytes is not used by implementations >= 128 */
+
+void storecompressed32(void *ptr, const u32 *x, const u32 *m, u32 bytes);
+void loadcompressed32(u32 *x, const void *ptr, const u32 *m, u32 bytes);
+
+void storecompressed64(void *ptr, const u64a *x, const u64a *m, u32 bytes);
+void loadcompressed64(u64a *x, const void *ptr, const u64a *m, u32 bytes);
+
+void storecompressed128(void *ptr, const m128 *x, const m128 *m, u32 bytes);
+void loadcompressed128(m128 *x, const void *ptr, const m128 *m, u32 bytes);
+
+void storecompressed256(void *ptr, const m256 *x, const m256 *m, u32 bytes);
+void loadcompressed256(m256 *x, const void *ptr, const m256 *m, u32 bytes);
+
+void storecompressed384(void *ptr, const m384 *x, const m384 *m, u32 bytes);
+void loadcompressed384(m384 *x, const void *ptr, const m384 *m, u32 bytes);
+
+void storecompressed512(void *ptr, const m512 *x, const m512 *m, u32 bytes);
+void loadcompressed512(m512 *x, const void *ptr, const m512 *m, u32 bytes);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/target_info.cpp b/contrib/libs/hyperscan/src/util/target_info.cpp
index d30c9ae2ca..66ba5f5acc 100644
--- a/contrib/libs/hyperscan/src/util/target_info.cpp
+++ b/contrib/libs/hyperscan/src/util/target_info.cpp
@@ -1,51 +1,51 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include "hs_compile.h" // for various hs_platform_info flags
-#include "target_info.h"
-#include "util/cpuid_flags.h"
-
-namespace ue2 {
-
-target_t get_current_target(void) {
- hs_platform_info p;
- p.cpu_features = cpuid_flags();
- p.tune = cpuid_tune();
-
- return target_t(p);
-}
-
-bool target_t::can_run_on_code_built_for(const target_t &code_target) const {
- if (!has_avx2() && code_target.has_avx2()) {
- return false;
- }
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "hs_compile.h" // for various hs_platform_info flags
+#include "target_info.h"
+#include "util/cpuid_flags.h"
+
+namespace ue2 {
+
+target_t get_current_target(void) {
+ hs_platform_info p;
+ p.cpu_features = cpuid_flags();
+ p.tune = cpuid_tune();
+
+ return target_t(p);
+}
+
+bool target_t::can_run_on_code_built_for(const target_t &code_target) const {
+ if (!has_avx2() && code_target.has_avx2()) {
+ return false;
+ }
+
if (!has_avx512() && code_target.has_avx512()) {
return false;
}
@@ -54,16 +54,16 @@ bool target_t::can_run_on_code_built_for(const target_t &code_target) const {
return false;
}
- return true;
-}
-
-target_t::target_t(const hs_platform_info &p)
- : tune(p.tune), cpu_features(p.cpu_features) {}
-
-bool target_t::has_avx2(void) const {
+ return true;
+}
+
+target_t::target_t(const hs_platform_info &p)
+ : tune(p.tune), cpu_features(p.cpu_features) {}
+
+bool target_t::has_avx2(void) const {
return cpu_features & HS_CPU_FEATURES_AVX2;
-}
-
+}
+
bool target_t::has_avx512(void) const {
return cpu_features & HS_CPU_FEATURES_AVX512;
}
@@ -72,8 +72,8 @@ bool target_t::has_avx512vbmi(void) const {
return cpu_features & HS_CPU_FEATURES_AVX512VBMI;
}
-bool target_t::is_atom_class(void) const {
+bool target_t::is_atom_class(void) const {
return tune == HS_TUNE_FAMILY_SLM || tune == HS_TUNE_FAMILY_GLM;
-}
-
-} // namespace ue2
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/util/target_info.h b/contrib/libs/hyperscan/src/util/target_info.h
index 7fefbbed16..f64573aeda 100644
--- a/contrib/libs/hyperscan/src/util/target_info.h
+++ b/contrib/libs/hyperscan/src/util/target_info.h
@@ -1,63 +1,63 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TARGET_INFO_H
-#define TARGET_INFO_H
-
-#include "ue2common.h"
-
-struct hs_platform_info;
-
-namespace ue2 {
-
-struct target_t {
- explicit target_t(const hs_platform_info &pi);
-
- bool has_avx2(void) const;
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TARGET_INFO_H
+#define TARGET_INFO_H
+
+#include "ue2common.h"
+
+struct hs_platform_info;
+
+namespace ue2 {
+
+struct target_t {
+ explicit target_t(const hs_platform_info &pi);
+
+ bool has_avx2(void) const;
+
bool has_avx512(void) const;
bool has_avx512vbmi(void) const;
- bool is_atom_class(void) const;
-
- // This asks: can this target (the object) run on code that was built for
- // "code_target". Very wordy but less likely to be misinterpreted than
- // is_compatible() or some such.
- bool can_run_on_code_built_for(const target_t &code_target) const;
-
-private:
- u32 tune;
- u64a cpu_features;
-};
-
-target_t get_current_target(void);
-
-} // namespace ue2
-
-#endif
+ bool is_atom_class(void) const;
+
+ // This asks: can this target (the object) run on code that was built for
+ // "code_target". Very wordy but less likely to be misinterpreted than
+ // is_compatible() or some such.
+ bool can_run_on_code_built_for(const target_t &code_target) const;
+
+private:
+ u32 tune;
+ u64a cpu_features;
+};
+
+target_t get_current_target(void);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/ue2string.cpp b/contrib/libs/hyperscan/src/util/ue2string.cpp
index 3beb222ff2..50b2bbcc89 100644
--- a/contrib/libs/hyperscan/src/util/ue2string.cpp
+++ b/contrib/libs/hyperscan/src/util/ue2string.cpp
@@ -1,184 +1,184 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Tools for string manipulation, ue2_literal definition.
- */
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Tools for string manipulation, ue2_literal definition.
+ */
#include "ue2string.h"
-#include "charreach.h"
-#include "compare.h"
+#include "charreach.h"
+#include "compare.h"
#include "hash_dynamic_bitset.h"
-
-#include <algorithm>
+
+#include <algorithm>
#include <cstring>
-#include <iomanip>
-#include <sstream>
-#include <string>
-
-using namespace std;
-
-namespace ue2 {
-
-#if defined(DUMP_SUPPORT) || defined(DEBUG)
-
-// Escape a string so that it's screen-printable
-string escapeString(const string &s) {
- ostringstream os;
- for (unsigned int i = 0; i < s.size(); ++i) {
- char c = s[i];
- if (0x20 <= c && c <= 0x7e && c != '\\') {
- os << c;
- } else if (c == '\n') {
- os << "\\n";
- } else if (c == '\r') {
- os << "\\r";
- } else if (c == '\t') {
- os << "\\t";
- } else {
- os << "\\x" << hex << setw(2) << setfill('0')
- << (unsigned)(c & 0xff) << dec;
- }
- }
- return os.str();
-}
-
-string escapeString(const ue2_literal &lit) {
- ostringstream os;
- for (ue2_literal::const_iterator it = lit.begin(); it != lit.end(); ++it) {
- char c = it->c;
- if (0x20 <= c && c <= 0x7e && c != '\\') {
- os << c;
- } else if (c == '\n') {
- os << "\\n";
- } else {
- os << "\\x" << hex << setw(2) << setfill('0')
- << (unsigned)(c & 0xff) << dec;
- }
- }
- return os.str();
-}
-
-// escape any metacharacters in a literal string
-string escapeStringMeta(const string &s) {
- ostringstream os;
- for (unsigned int i = 0; i < s.size(); ++i) {
- char c = s[i];
- switch (c) {
- case '#': case '$': case '(': case ')':
- case '*': case '+': case '.': case '/':
- case '?': case '[': case ']': case '^':
- case '|':
- os << "\\" << c; break;
- default:
- os << c; break;
- }
- }
- return os.str();
-}
-
-string dotEscapeString(const string &s) {
- string ss = escapeString(s);
- string out;
- out.reserve(ss.size());
- for (size_t i = 0; i != ss.size(); i++) {
- char c = ss[i];
- switch (c) {
- case '\"':
- case '\\':
- out.push_back('\\');
- // fall through
- default:
- out.push_back(c);
- break;
- }
- }
- return out;
-}
-
-string dumpString(const ue2_literal &lit) {
- string s = escapeString(lit.get_string());
- if (lit.any_nocase()) {
- s += " (nocase)";
- }
-
- return s;
-}
-#endif
-
-void upperString(string &s) {
+#include <iomanip>
+#include <sstream>
+#include <string>
+
+using namespace std;
+
+namespace ue2 {
+
+#if defined(DUMP_SUPPORT) || defined(DEBUG)
+
+// Escape a string so that it's screen-printable
+string escapeString(const string &s) {
+ ostringstream os;
+ for (unsigned int i = 0; i < s.size(); ++i) {
+ char c = s[i];
+ if (0x20 <= c && c <= 0x7e && c != '\\') {
+ os << c;
+ } else if (c == '\n') {
+ os << "\\n";
+ } else if (c == '\r') {
+ os << "\\r";
+ } else if (c == '\t') {
+ os << "\\t";
+ } else {
+ os << "\\x" << hex << setw(2) << setfill('0')
+ << (unsigned)(c & 0xff) << dec;
+ }
+ }
+ return os.str();
+}
+
+string escapeString(const ue2_literal &lit) {
+ ostringstream os;
+ for (ue2_literal::const_iterator it = lit.begin(); it != lit.end(); ++it) {
+ char c = it->c;
+ if (0x20 <= c && c <= 0x7e && c != '\\') {
+ os << c;
+ } else if (c == '\n') {
+ os << "\\n";
+ } else {
+ os << "\\x" << hex << setw(2) << setfill('0')
+ << (unsigned)(c & 0xff) << dec;
+ }
+ }
+ return os.str();
+}
+
+// escape any metacharacters in a literal string
+string escapeStringMeta(const string &s) {
+ ostringstream os;
+ for (unsigned int i = 0; i < s.size(); ++i) {
+ char c = s[i];
+ switch (c) {
+ case '#': case '$': case '(': case ')':
+ case '*': case '+': case '.': case '/':
+ case '?': case '[': case ']': case '^':
+ case '|':
+ os << "\\" << c; break;
+ default:
+ os << c; break;
+ }
+ }
+ return os.str();
+}
+
+string dotEscapeString(const string &s) {
+ string ss = escapeString(s);
+ string out;
+ out.reserve(ss.size());
+ for (size_t i = 0; i != ss.size(); i++) {
+ char c = ss[i];
+ switch (c) {
+ case '\"':
+ case '\\':
+ out.push_back('\\');
+ // fall through
+ default:
+ out.push_back(c);
+ break;
+ }
+ }
+ return out;
+}
+
+string dumpString(const ue2_literal &lit) {
+ string s = escapeString(lit.get_string());
+ if (lit.any_nocase()) {
+ s += " (nocase)";
+ }
+
+ return s;
+}
+#endif
+
+void upperString(string &s) {
for (auto &c : s) {
c = mytoupper(c);
}
-}
-
-size_t maxStringOverlap(const string &a, const string &b, bool nocase) {
- size_t lena = a.length(), lenb = b.length();
- const char *astart = a.c_str();
- const char *bstart = b.c_str();
- const char *aend = astart + lena;
- size_t i = lenb;
-
- for (; i > lena; i--) {
- if (!cmp(astart, bstart + i - lena, lena, nocase)) {
- return i;
- }
- }
-
- for (; i && cmp(aend - i, bstart, i, nocase); i--) {
- ;
- }
-
- return i;
-}
-
-size_t maxStringOverlap(const ue2_literal &a, const ue2_literal &b) {
- /* todo: handle nocase better */
- return maxStringOverlap(a.get_string(), b.get_string(),
- a.any_nocase() || b.any_nocase());
-}
-
-size_t maxStringSelfOverlap(const string &a, bool nocase) {
- size_t lena = a.length();
- const char *astart = a.c_str();
- const char *bstart = a.c_str();
- const char *aend = astart + lena;
- size_t i = lena - 1;
-
- for (; i && cmp(aend - i, bstart, i, nocase); i--) {
- ;
- }
-
- return i;
-}
-
-u32 cmp(const char *a, const char *b, size_t len, bool nocase) {
+}
+
+size_t maxStringOverlap(const string &a, const string &b, bool nocase) {
+ size_t lena = a.length(), lenb = b.length();
+ const char *astart = a.c_str();
+ const char *bstart = b.c_str();
+ const char *aend = astart + lena;
+ size_t i = lenb;
+
+ for (; i > lena; i--) {
+ if (!cmp(astart, bstart + i - lena, lena, nocase)) {
+ return i;
+ }
+ }
+
+ for (; i && cmp(aend - i, bstart, i, nocase); i--) {
+ ;
+ }
+
+ return i;
+}
+
+size_t maxStringOverlap(const ue2_literal &a, const ue2_literal &b) {
+ /* todo: handle nocase better */
+ return maxStringOverlap(a.get_string(), b.get_string(),
+ a.any_nocase() || b.any_nocase());
+}
+
+size_t maxStringSelfOverlap(const string &a, bool nocase) {
+ size_t lena = a.length();
+ const char *astart = a.c_str();
+ const char *bstart = a.c_str();
+ const char *aend = astart + lena;
+ size_t i = lena - 1;
+
+ for (; i && cmp(aend - i, bstart, i, nocase); i--) {
+ ;
+ }
+
+ return i;
+}
+
+u32 cmp(const char *a, const char *b, size_t len, bool nocase) {
if (!nocase) {
return memcmp(a, b, len);
}
@@ -189,110 +189,110 @@ u32 cmp(const char *a, const char *b, size_t len, bool nocase) {
}
}
return 0;
-}
-
-case_iter::case_iter(const ue2_literal &ss) : s(ss.get_string()),
- s_orig(ss.get_string()) {
- for (ue2_literal::const_iterator it = ss.begin(); it != ss.end(); ++it) {
- nocase.push_back(it->nocase);
- }
-}
-
-case_iter caseIterateBegin(const ue2_literal &s) {
- return case_iter(s);
-}
-
-case_iter caseIterateEnd() {
- return case_iter(ue2_literal());
-}
-
-case_iter &case_iter::operator++ () {
- for (size_t i = s.length(); i != 0; i--) {
- char lower = mytolower(s[i - 1]);
- if (nocase[i - 1] && lower != s[i - 1]) {
- s[i - 1] = lower;
- copy(s_orig.begin() + i, s_orig.end(), s.begin() + i);
- return *this;
- }
- }
-
- s.clear();
- return *this;
-}
-
-static
-string toUpperString(string s) {
- upperString(s);
- return s;
-}
-
-ue2_literal::elem::operator CharReach () const {
- if (!nocase) {
- return CharReach(c);
- } else {
- CharReach rv;
- rv.set(mytoupper(c));
- rv.set(mytolower(c));
- return rv;
- }
-}
-
+}
+
+case_iter::case_iter(const ue2_literal &ss) : s(ss.get_string()),
+ s_orig(ss.get_string()) {
+ for (ue2_literal::const_iterator it = ss.begin(); it != ss.end(); ++it) {
+ nocase.push_back(it->nocase);
+ }
+}
+
+case_iter caseIterateBegin(const ue2_literal &s) {
+ return case_iter(s);
+}
+
+case_iter caseIterateEnd() {
+ return case_iter(ue2_literal());
+}
+
+case_iter &case_iter::operator++ () {
+ for (size_t i = s.length(); i != 0; i--) {
+ char lower = mytolower(s[i - 1]);
+ if (nocase[i - 1] && lower != s[i - 1]) {
+ s[i - 1] = lower;
+ copy(s_orig.begin() + i, s_orig.end(), s.begin() + i);
+ return *this;
+ }
+ }
+
+ s.clear();
+ return *this;
+}
+
+static
+string toUpperString(string s) {
+ upperString(s);
+ return s;
+}
+
+ue2_literal::elem::operator CharReach () const {
+ if (!nocase) {
+ return CharReach(c);
+ } else {
+ CharReach rv;
+ rv.set(mytoupper(c));
+ rv.set(mytolower(c));
+ return rv;
+ }
+}
+
const ue2_literal::size_type ue2_literal::npos = std::string::npos;
-ue2_literal::ue2_literal(const std::string &s_in, bool nc_in)
+ue2_literal::ue2_literal(const std::string &s_in, bool nc_in)
: s(nc_in ? toUpperString(s_in) : s_in), nocase(s_in.size()) {
- if (nc_in) {
+ if (nc_in) {
// Switch on nocase bit for all alpha characters.
- for (size_t i = 0; i < s.length(); i++) {
+ for (size_t i = 0; i < s.length(); i++) {
if (ourisalpha(s[i])) {
nocase.set(i);
- }
- }
- }
-}
-
-ue2_literal::ue2_literal(char c, bool nc)
- : s(1, nc ? mytoupper(c) : c), nocase(1, ourisalpha(c) ? nc : false) {}
-
-ue2_literal ue2_literal::substr(size_type pos, size_type n) const {
- ue2_literal rv;
- rv.s = s.substr(pos, n);
- size_type upper = nocase.size();
+ }
+ }
+ }
+}
+
+ue2_literal::ue2_literal(char c, bool nc)
+ : s(1, nc ? mytoupper(c) : c), nocase(1, ourisalpha(c) ? nc : false) {}
+
+ue2_literal ue2_literal::substr(size_type pos, size_type n) const {
+ ue2_literal rv;
+ rv.s = s.substr(pos, n);
+ size_type upper = nocase.size();
if (n != npos && n + pos < nocase.size()) {
- upper = n + pos;
- }
+ upper = n + pos;
+ }
rv.nocase.resize(upper - pos, false);
for (size_t i = pos; i < upper; i++) {
rv.nocase.set(i - pos, nocase.test(i));
}
assert(s.size() == nocase.size());
- return rv;
-}
-
-ue2_literal &ue2_literal::erase(size_type pos, size_type n) {
- s.erase(pos, n);
+ return rv;
+}
+
+ue2_literal &ue2_literal::erase(size_type pos, size_type n) {
+ s.erase(pos, n);
if (n != npos) {
for (size_type i = pos + n; i < nocase.size(); i++) {
nocase.set(i - n, nocase.test(i));
}
- }
+ }
nocase.resize(s.size());
- return *this;
-}
-
-void ue2_literal::push_back(char c, bool nc) {
- if (nc) {
- c = mytoupper(c);
- }
- nocase.push_back(nc);
- s.push_back(c);
-}
-
+ return *this;
+}
+
+void ue2_literal::push_back(char c, bool nc) {
+ if (nc) {
+ c = mytoupper(c);
+ }
+ nocase.push_back(nc);
+ s.push_back(c);
+}
+
void ue2_literal::reverse() {
std::reverse(s.begin(), s.end());
-
+
const size_t len = nocase.size();
for (size_t i = 0; i < len / 2; i++) {
size_t j = len - i - 1;
@@ -300,9 +300,9 @@ void ue2_literal::reverse() {
bool b = nocase.test(j);
nocase.set(i, b);
nocase.set(j, a);
- }
-}
-
+ }
+}
+
// Return a copy of this literal in reverse order.
ue2_literal reverse_literal(const ue2_literal &in) {
auto out = in;
@@ -310,94 +310,94 @@ ue2_literal reverse_literal(const ue2_literal &in) {
return out;
}
-bool ue2_literal::operator<(const ue2_literal &b) const {
- if (s < b.s) {
- return true;
- }
- if (s > b.s) {
- return false;
- }
- return nocase < b.nocase;
-}
-
-void ue2_literal::operator+=(const ue2_literal &b) {
- s += b.s;
+bool ue2_literal::operator<(const ue2_literal &b) const {
+ if (s < b.s) {
+ return true;
+ }
+ if (s > b.s) {
+ return false;
+ }
+ return nocase < b.nocase;
+}
+
+void ue2_literal::operator+=(const ue2_literal &b) {
+ s += b.s;
size_t prefix = nocase.size();
nocase.resize(prefix + b.nocase.size());
for (size_t i = 0; i < b.nocase.size(); i++) {
nocase.set(prefix + i, b.nocase[i]);
}
-}
-
-bool ue2_literal::any_nocase() const {
+}
+
+bool ue2_literal::any_nocase() const {
return nocase.any();
-}
-
+}
+
size_t ue2_literal::hash() const {
return hash_all(s, hash_dynamic_bitset()(nocase));
-}
-
-void make_nocase(ue2_literal *lit) {
- ue2_literal rv;
-
+}
+
+void make_nocase(ue2_literal *lit) {
+ ue2_literal rv;
+
for (const auto &elem: *lit) {
rv.push_back(elem.c, ourisalpha(elem.c));
- }
-
- lit->swap(rv);
-}
-
-static
-bool testchar(char c, const CharReach &cr, bool nocase) {
- if (nocase) {
- return cr.test((unsigned char)mytolower(c))
- || cr.test((unsigned char)mytoupper(c));
- } else {
- return cr.test((unsigned char)c);
- }
-}
-
-// Returns true if the given literal contains a char in the given CharReach
-bool contains(const ue2_literal &s, const CharReach &cr) {
- for (ue2_literal::const_iterator it = s.begin(), ite = s.end();
- it != ite; ++it) {
- if (testchar(it->c, cr, it->nocase)) {
- return true;
- }
- }
- return false;
-}
-
-size_t maxStringSelfOverlap(const ue2_literal &a) {
- /* overly conservative if only part of the string is nocase, TODO: fix */
- return maxStringSelfOverlap(a.get_string(), a.any_nocase());
-}
-
-size_t minStringPeriod(const ue2_literal &a) {
- return a.length() - maxStringSelfOverlap(a);
-}
-
-// Returns true if `a' is a suffix of (or equal to) `b'.
-bool isSuffix(const ue2_literal &a, const ue2_literal &b) {
- size_t alen = a.length(), blen = b.length();
- if (alen > blen) {
- return false;
- }
- return equal(a.begin(), a.end(), b.begin() + (blen - alen));
-}
-
-bool is_flood(const ue2_literal &s) {
- assert(!s.empty());
-
- ue2_literal::const_iterator it = s.begin(), ite = s.end();
- ue2_literal::elem f = *it;
- for (++it; it != ite; ++it) {
- if (*it != f) {
- return false;
- }
- }
-
- return true;
-}
-
-} // namespace ue2
+ }
+
+ lit->swap(rv);
+}
+
+static
+bool testchar(char c, const CharReach &cr, bool nocase) {
+ if (nocase) {
+ return cr.test((unsigned char)mytolower(c))
+ || cr.test((unsigned char)mytoupper(c));
+ } else {
+ return cr.test((unsigned char)c);
+ }
+}
+
+// Returns true if the given literal contains a char in the given CharReach
+bool contains(const ue2_literal &s, const CharReach &cr) {
+ for (ue2_literal::const_iterator it = s.begin(), ite = s.end();
+ it != ite; ++it) {
+ if (testchar(it->c, cr, it->nocase)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+size_t maxStringSelfOverlap(const ue2_literal &a) {
+ /* overly conservative if only part of the string is nocase, TODO: fix */
+ return maxStringSelfOverlap(a.get_string(), a.any_nocase());
+}
+
+size_t minStringPeriod(const ue2_literal &a) {
+ return a.length() - maxStringSelfOverlap(a);
+}
+
+// Returns true if `a' is a suffix of (or equal to) `b'.
+bool isSuffix(const ue2_literal &a, const ue2_literal &b) {
+ size_t alen = a.length(), blen = b.length();
+ if (alen > blen) {
+ return false;
+ }
+ return equal(a.begin(), a.end(), b.begin() + (blen - alen));
+}
+
+bool is_flood(const ue2_literal &s) {
+ assert(!s.empty());
+
+ ue2_literal::const_iterator it = s.begin(), ite = s.end();
+ ue2_literal::elem f = *it;
+ for (++it; it != ite; ++it) {
+ if (*it != f) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/util/ue2string.h b/contrib/libs/hyperscan/src/util/ue2string.h
index 2e89b2b478..0aa846896e 100644
--- a/contrib/libs/hyperscan/src/util/ue2string.h
+++ b/contrib/libs/hyperscan/src/util/ue2string.h
@@ -1,64 +1,64 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Tools for string manipulation, ue2_literal definition.
- */
-
-#ifndef UE2STRING_H
-#define UE2STRING_H
-
-#include "ue2common.h"
-#include "util/charreach.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Tools for string manipulation, ue2_literal definition.
+ */
+
+#ifndef UE2STRING_H
+#define UE2STRING_H
+
+#include "ue2common.h"
+#include "util/charreach.h"
#include "util/compare.h"
#include "util/hash.h"
#include "util/operators.h"
-
-#include <iterator>
-#include <string>
-#include <vector>
-
+
+#include <iterator>
+#include <string>
+#include <vector>
+
#include <boost/dynamic_bitset.hpp>
-#include <boost/iterator/iterator_facade.hpp>
-
-namespace ue2 {
-
-/// Force the given string to upper-case.
-void upperString(std::string &s);
-
-size_t maxStringOverlap(const std::string &a, const std::string &b,
- bool nocase);
-
-size_t maxStringSelfOverlap(const std::string &a, bool nocase);
-
-/// Compares two strings, returns non-zero if they're different.
-u32 cmp(const char *a, const char *b, size_t len, bool nocase);
-
+#include <boost/iterator/iterator_facade.hpp>
+
+namespace ue2 {
+
+/// Force the given string to upper-case.
+void upperString(std::string &s);
+
+size_t maxStringOverlap(const std::string &a, const std::string &b,
+ bool nocase);
+
+size_t maxStringSelfOverlap(const std::string &a, bool nocase);
+
+/// Compares two strings, returns non-zero if they're different.
+u32 cmp(const char *a, const char *b, size_t len, bool nocase);
+
/**
* \brief String type that also records whether the whole string is caseful or
* caseless.
@@ -73,7 +73,7 @@ struct ue2_case_string {
upperString(s);
}
}
-
+
bool operator==(const ue2_case_string &other) const {
return s == other.s && nocase == other.nocase;
}
@@ -83,72 +83,72 @@ struct ue2_case_string {
};
struct ue2_literal : totally_ordered<ue2_literal> {
-public:
- /// Single element proxy, pointed to by our const_iterator.
- struct elem {
- elem() : c(0), nocase(false) {}
- elem(char c_in, bool nc_in) : c(c_in), nocase(nc_in) {}
- bool operator==(const elem &o) const {
- return c == o.c && nocase == o.nocase;
- }
- bool operator!=(const elem &o) const {
- return c != o.c || nocase != o.nocase;
- }
- operator CharReach() const;
- char c;
- bool nocase;
- };
-
- /// Boost iterator_facade lets us synthesize an iterator simply.
- class const_iterator : public boost::iterator_facade<
- const_iterator,
- elem const,
- boost::random_access_traversal_tag,
- elem const> {
- public:
- const_iterator() {}
- private:
- friend class boost::iterator_core_access;
- void increment() {
+public:
+ /// Single element proxy, pointed to by our const_iterator.
+ struct elem {
+ elem() : c(0), nocase(false) {}
+ elem(char c_in, bool nc_in) : c(c_in), nocase(nc_in) {}
+ bool operator==(const elem &o) const {
+ return c == o.c && nocase == o.nocase;
+ }
+ bool operator!=(const elem &o) const {
+ return c != o.c || nocase != o.nocase;
+ }
+ operator CharReach() const;
+ char c;
+ bool nocase;
+ };
+
+ /// Boost iterator_facade lets us synthesize an iterator simply.
+ class const_iterator : public boost::iterator_facade<
+ const_iterator,
+ elem const,
+ boost::random_access_traversal_tag,
+ elem const> {
+ public:
+ const_iterator() {}
+ private:
+ friend class boost::iterator_core_access;
+ void increment() {
++idx;
- }
- void decrement() {
+ }
+ void decrement() {
--idx;
- }
- void advance(size_t n) {
+ }
+ void advance(size_t n) {
idx += n;
- }
- difference_type distance_to(const const_iterator &other) const {
+ }
+ difference_type distance_to(const const_iterator &other) const {
return other.idx - idx;
- }
- bool equal(const const_iterator &other) const {
+ }
+ bool equal(const const_iterator &other) const {
return idx == other.idx && lit == other.lit;
- }
- const elem dereference() const {
+ }
+ const elem dereference() const {
return elem(lit->s[idx], lit->nocase[idx]);
- }
-
- friend struct ue2_literal;
+ }
+
+ friend struct ue2_literal;
const_iterator(const ue2_literal &lit_in, size_t idx_in)
: lit(&lit_in), idx(idx_in) {}
-
+
const ue2_literal *lit = nullptr;
size_t idx;
- };
-
- using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+ };
+
+ using const_reverse_iterator = std::reverse_iterator<const_iterator>;
using size_type = std::string::size_type;
-
+
static const size_type npos;
-
+
ue2_literal() = default;
- ue2_literal(const std::string &s_in, bool nc_in);
- ue2_literal(char c, bool nc_in);
- ue2_literal(const ue2_literal &) = default;
- ue2_literal(ue2_literal &&) = default;
- ue2_literal &operator=(const ue2_literal &) = default;
- ue2_literal &operator=(ue2_literal &&) = default;
-
+ ue2_literal(const std::string &s_in, bool nc_in);
+ ue2_literal(char c, bool nc_in);
+ ue2_literal(const ue2_literal &) = default;
+ ue2_literal(ue2_literal &&) = default;
+ ue2_literal &operator=(const ue2_literal &) = default;
+ ue2_literal &operator=(ue2_literal &&) = default;
+
template<typename InputIt>
ue2_literal(InputIt b, InputIt e) {
for (; b != e; ++b) {
@@ -156,36 +156,36 @@ public:
}
}
- size_type length() const { return s.length(); }
- bool empty() const { return s.empty(); }
+ size_type length() const { return s.length(); }
+ bool empty() const { return s.empty(); }
ue2_literal substr(size_type pos, size_type n = npos) const;
- const char *c_str() const { return s.c_str(); }
- bool any_nocase() const;
-
- const_iterator begin() const {
+ const char *c_str() const { return s.c_str(); }
+ bool any_nocase() const;
+
+ const_iterator begin() const {
return const_iterator(*this, 0);
- }
-
- const_iterator end() const {
+ }
+
+ const_iterator end() const {
return const_iterator(*this, s.size());
- }
-
- const_reverse_iterator rbegin() const {
- return const_reverse_iterator(end());
- }
-
- const_reverse_iterator rend() const {
- return const_reverse_iterator(begin());
- }
-
+ }
+
+ const_reverse_iterator rbegin() const {
+ return const_reverse_iterator(end());
+ }
+
+ const_reverse_iterator rend() const {
+ return const_reverse_iterator(begin());
+ }
+
ue2_literal &erase(size_type pos = 0, size_type n = npos);
- void push_back(const elem &e) {
- push_back(e.c, e.nocase);
- }
-
- void push_back(char c, bool nc);
+ void push_back(const elem &e) {
+ push_back(e.c, e.nocase);
+ }
+
+ void push_back(char c, bool nc);
const elem back() const { return *rbegin(); }
-
+
friend ue2_literal operator+(ue2_literal a, const ue2_literal &b) {
a += b;
return a;
@@ -194,40 +194,40 @@ public:
/// Reverse this literal in-place.
void reverse();
- void operator+=(const ue2_literal &b);
- bool operator==(const ue2_literal &b) const {
- return s == b.s && nocase == b.nocase;
- }
- bool operator<(const ue2_literal &b) const;
-
- void clear(void) { s.clear(); nocase.clear(); }
-
- const std::string &get_string() const { return s; }
-
- void swap(ue2_literal &other) {
- s.swap(other.s);
- nocase.swap(other.nocase);
- }
-
+ void operator+=(const ue2_literal &b);
+ bool operator==(const ue2_literal &b) const {
+ return s == b.s && nocase == b.nocase;
+ }
+ bool operator<(const ue2_literal &b) const;
+
+ void clear(void) { s.clear(); nocase.clear(); }
+
+ const std::string &get_string() const { return s; }
+
+ void swap(ue2_literal &other) {
+ s.swap(other.s);
+ nocase.swap(other.nocase);
+ }
+
size_t hash() const;
-private:
+private:
friend const_iterator;
- std::string s;
+ std::string s;
boost::dynamic_bitset<> nocase;
-};
-
-/// Return a reversed copy of this literal.
-ue2_literal reverse_literal(const ue2_literal &in);
-
-// Escape any meta characters in a string
-std::string escapeStringMeta(const std::string &s);
-
-/** Note: may be overly conservative if only partially nocase */
-size_t maxStringSelfOverlap(const ue2_literal &a);
-size_t minStringPeriod(const ue2_literal &a);
-size_t maxStringOverlap(const ue2_literal &a, const ue2_literal &b);
-
+};
+
+/// Return a reversed copy of this literal.
+ue2_literal reverse_literal(const ue2_literal &in);
+
+// Escape any meta characters in a string
+std::string escapeStringMeta(const std::string &s);
+
+/** Note: may be overly conservative if only partially nocase */
+size_t maxStringSelfOverlap(const ue2_literal &a);
+size_t minStringPeriod(const ue2_literal &a);
+size_t maxStringOverlap(const ue2_literal &a, const ue2_literal &b);
+
/**
* \brief True iff the range of a literal given cannot be considered entirely
* case-sensitive nor entirely case-insensitive.
@@ -246,7 +246,7 @@ bool mixed_sensitivity_in(Iter begin, Iter end) {
cs = true;
}
}
-
+
return cs && nc;
}
@@ -259,62 +259,62 @@ bool mixed_sensitivity(const ue2_literal &s) {
return mixed_sensitivity_in(s.begin(), s.end());
}
-void make_nocase(ue2_literal *lit);
-
-struct case_iter {
- explicit case_iter(const ue2_literal &ss);
- const std::string &operator*() const { return s; } /* limited lifetime */
- case_iter &operator++ ();
- bool operator!=(const case_iter &b) const { return s != b.s; }
-private:
- std::string s;
- std::string s_orig;
- std::vector<bool> nocase;
-};
-
-case_iter caseIterateBegin(const ue2_literal &lit);
-case_iter caseIterateEnd();
-
-/** \brief True if there is any overlap between the characters in \a s and the
- * set characters in \a cr.
- *
- * Note: this means that if \a s is nocase, then \a cr only needs to have
- * either the lower-case or upper-case version of a letter set. */
-bool contains(const ue2_literal &s, const CharReach &cr);
-
-/// Returns true if \a a is a suffix of (or equal to) \a b.
-bool isSuffix(const ue2_literal &a, const ue2_literal &b);
-
-static inline
-std::vector<CharReach> as_cr_seq(const ue2_literal &s) {
- std::vector<CharReach> rv;
- rv.reserve(s.length());
- rv.insert(rv.end(), s.begin(), s.end());
- return rv;
-}
-
-/** \brief True if the given literal consists entirely of a flood of the same
- * character. */
-bool is_flood(const ue2_literal &s);
-
-#if defined(DUMP_SUPPORT) || defined(DEBUG)
-/* Utility functions for debugging/dumping */
-
-/// Escape a string so it's dot-printable.
-std::string dotEscapeString(const std::string &s);
-
-std::string dumpString(const ue2_literal &lit);
-
-/// Escape a string so that it's screen-printable.
-std::string escapeString(const std::string &s);
-
-/// Escape a ue2_literal so that it's screen-printable.
-std::string escapeString(const ue2_literal &lit);
-
-#endif
-
-} // namespace ue2
-
+void make_nocase(ue2_literal *lit);
+
+struct case_iter {
+ explicit case_iter(const ue2_literal &ss);
+ const std::string &operator*() const { return s; } /* limited lifetime */
+ case_iter &operator++ ();
+ bool operator!=(const case_iter &b) const { return s != b.s; }
+private:
+ std::string s;
+ std::string s_orig;
+ std::vector<bool> nocase;
+};
+
+case_iter caseIterateBegin(const ue2_literal &lit);
+case_iter caseIterateEnd();
+
+/** \brief True if there is any overlap between the characters in \a s and the
+ * set characters in \a cr.
+ *
+ * Note: this means that if \a s is nocase, then \a cr only needs to have
+ * either the lower-case or upper-case version of a letter set. */
+bool contains(const ue2_literal &s, const CharReach &cr);
+
+/// Returns true if \a a is a suffix of (or equal to) \a b.
+bool isSuffix(const ue2_literal &a, const ue2_literal &b);
+
+static inline
+std::vector<CharReach> as_cr_seq(const ue2_literal &s) {
+ std::vector<CharReach> rv;
+ rv.reserve(s.length());
+ rv.insert(rv.end(), s.begin(), s.end());
+ return rv;
+}
+
+/** \brief True if the given literal consists entirely of a flood of the same
+ * character. */
+bool is_flood(const ue2_literal &s);
+
+#if defined(DUMP_SUPPORT) || defined(DEBUG)
+/* Utility functions for debugging/dumping */
+
+/// Escape a string so it's dot-printable.
+std::string dotEscapeString(const std::string &s);
+
+std::string dumpString(const ue2_literal &lit);
+
+/// Escape a string so that it's screen-printable.
+std::string escapeString(const std::string &s);
+
+/// Escape a ue2_literal so that it's screen-printable.
+std::string escapeString(const ue2_literal &lit);
+
+#endif
+
+} // namespace ue2
+
namespace std {
template<>
@@ -333,4 +333,4 @@ struct hash<ue2::ue2_literal> {
} // namespace std
-#endif
+#endif
diff --git a/contrib/libs/hyperscan/src/util/unaligned.h b/contrib/libs/hyperscan/src/util/unaligned.h
index dd7f3b5abb..299e5677c3 100644
--- a/contrib/libs/hyperscan/src/util/unaligned.h
+++ b/contrib/libs/hyperscan/src/util/unaligned.h
@@ -1,98 +1,98 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Helper functions for unaligned loads and stores.
- */
-
-#ifndef UNALIGNED_H
-#define UNALIGNED_H
-
-#include "ue2common.h"
-
-#if !defined(_WIN32)
-#define PACKED__MAY_ALIAS __attribute__((packed, may_alias))
-#else
-#define PACKED__MAY_ALIAS
-#pragma pack(push, 1) // pack everything until told otherwise
-#endif
-
-/// Perform an unaligned 16-bit load
-static really_inline
-u16 unaligned_load_u16(const void *ptr) {
- struct unaligned { u16 u; } PACKED__MAY_ALIAS;
- const struct unaligned *uptr = (const struct unaligned *)ptr;
- return uptr->u;
-}
-
-/// Perform an unaligned 32-bit load
-static really_inline
-u32 unaligned_load_u32(const void *ptr) {
- struct unaligned { u32 u; } PACKED__MAY_ALIAS;
- const struct unaligned *uptr = (const struct unaligned *)ptr;
- return uptr->u;
-}
-
-/// Perform an unaligned 64-bit load
-static really_inline
-u64a unaligned_load_u64a(const void *ptr) {
- struct unaligned { u64a u; } PACKED__MAY_ALIAS;
- const struct unaligned *uptr = (const struct unaligned *)ptr;
- return uptr->u;
-}
-
-/// Perform an unaligned 16-bit store
-static really_inline
-void unaligned_store_u16(void *ptr, u16 val) {
- struct unaligned { u16 u; } PACKED__MAY_ALIAS;
- struct unaligned *uptr = (struct unaligned *)ptr;
- uptr->u = val;
-}
-
-/// Perform an unaligned 32-bit store
-static really_inline
-void unaligned_store_u32(void *ptr, u32 val) {
- struct unaligned { u32 u; } PACKED__MAY_ALIAS;
- struct unaligned *uptr = (struct unaligned *)ptr;
- uptr->u = val;
-}
-
-/// Perform an unaligned 64-bit store
-static really_inline
-void unaligned_store_u64a(void *ptr, u64a val) {
- struct unaligned { u64a u; } PACKED__MAY_ALIAS;
- struct unaligned *uptr = (struct unaligned *)ptr;
- uptr->u = val;
-}
-#if defined(_WIN32)
-#pragma pack(pop)
-#endif // win32
-
-#undef PACKED__MAY_ALIAS
-
-#endif // UNALIGNED_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Helper functions for unaligned loads and stores.
+ */
+
+#ifndef UNALIGNED_H
+#define UNALIGNED_H
+
+#include "ue2common.h"
+
+#if !defined(_WIN32)
+#define PACKED__MAY_ALIAS __attribute__((packed, may_alias))
+#else
+#define PACKED__MAY_ALIAS
+#pragma pack(push, 1) // pack everything until told otherwise
+#endif
+
+/// Perform an unaligned 16-bit load
+static really_inline
+u16 unaligned_load_u16(const void *ptr) {
+ struct unaligned { u16 u; } PACKED__MAY_ALIAS;
+ const struct unaligned *uptr = (const struct unaligned *)ptr;
+ return uptr->u;
+}
+
+/// Perform an unaligned 32-bit load
+static really_inline
+u32 unaligned_load_u32(const void *ptr) {
+ struct unaligned { u32 u; } PACKED__MAY_ALIAS;
+ const struct unaligned *uptr = (const struct unaligned *)ptr;
+ return uptr->u;
+}
+
+/// Perform an unaligned 64-bit load
+static really_inline
+u64a unaligned_load_u64a(const void *ptr) {
+ struct unaligned { u64a u; } PACKED__MAY_ALIAS;
+ const struct unaligned *uptr = (const struct unaligned *)ptr;
+ return uptr->u;
+}
+
+/// Perform an unaligned 16-bit store
+static really_inline
+void unaligned_store_u16(void *ptr, u16 val) {
+ struct unaligned { u16 u; } PACKED__MAY_ALIAS;
+ struct unaligned *uptr = (struct unaligned *)ptr;
+ uptr->u = val;
+}
+
+/// Perform an unaligned 32-bit store
+static really_inline
+void unaligned_store_u32(void *ptr, u32 val) {
+ struct unaligned { u32 u; } PACKED__MAY_ALIAS;
+ struct unaligned *uptr = (struct unaligned *)ptr;
+ uptr->u = val;
+}
+
+/// Perform an unaligned 64-bit store
+static really_inline
+void unaligned_store_u64a(void *ptr, u64a val) {
+ struct unaligned { u64a u; } PACKED__MAY_ALIAS;
+ struct unaligned *uptr = (struct unaligned *)ptr;
+ uptr->u = val;
+}
+#if defined(_WIN32)
+#pragma pack(pop)
+#endif // win32
+
+#undef PACKED__MAY_ALIAS
+
+#endif // UNALIGNED_H
diff --git a/contrib/libs/hyperscan/src/util/unicode_def.h b/contrib/libs/hyperscan/src/util/unicode_def.h
index e817cb42b2..73ff5e87d4 100644
--- a/contrib/libs/hyperscan/src/util/unicode_def.h
+++ b/contrib/libs/hyperscan/src/util/unicode_def.h
@@ -1,85 +1,85 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef UNICODE_DEF_H
-#define UNICODE_DEF_H
-
-#include "ue2common.h"
-
-#define MAX_UNICODE 0x10FFFF
-#define INVALID_UNICODE 0xffffffff /* unicode could never go above 2^31 */
-
-#define UTF_2CHAR_MIN (1U << 7)
-#define UTF_3CHAR_MIN (1U << 11)
-#define UTF_4CHAR_MIN (1U << 16)
-#define UTF_CONT_SHIFT 6
-#define UTF_CONT_BYTE_RANGE (1U << UTF_CONT_SHIFT)
-#define UTF_CONT_BYTE_HEADER ((u8)0x80) /* 10xx xxxx */
-#define UTF_TWO_BYTE_HEADER ((u8)0xc0) /* 110x xxxx */
-#define UTF_THREE_BYTE_HEADER ((u8)0xe0) /* 1110 xxxx */
-#define UTF_FOUR_BYTE_HEADER ((u8)0xf0) /* 1111 0xxx */
-
-#define UTF_CONT_BYTE_VALUE_MASK 0x3f
-
-#define UTF_CONT_MIN UTF_CONT_BYTE_HEADER
-#define UTF_CONT_MAX (UTF_TWO_BYTE_HEADER - 1)
-
-#define UTF_TWO_BYTE_MIN UTF_TWO_BYTE_HEADER
-#define UTF_TWO_BYTE_MAX (UTF_THREE_BYTE_HEADER - 1)
-
-#define UTF_THREE_BYTE_MIN UTF_THREE_BYTE_HEADER
-#define UTF_THREE_BYTE_MAX (UTF_FOUR_BYTE_HEADER - 1)
-
-#define UTF_FOUR_BYTE_MIN UTF_FOUR_BYTE_HEADER
-#define UTF_FOUR_BYTE_MAX ((u8)0xf4)
-
-#define UTF_CONT_CR CharReach(UTF_CONT_MIN, UTF_CONT_MAX)
-#define UTF_ASCII_CR CharReach(0, 127)
-#define UTF_START_CR CharReach(UTF_TWO_BYTE_MIN, UTF_FOUR_BYTE_MAX)
-#define UTF_TWO_START_CR CharReach(UTF_TWO_BYTE_MIN, UTF_TWO_BYTE_MAX)
-#define UTF_THREE_START_CR CharReach(UTF_THREE_BYTE_MIN, UTF_THREE_BYTE_MAX)
-#define UTF_FOUR_START_CR CharReach(UTF_FOUR_BYTE_MIN, UTF_FOUR_BYTE_MAX)
-
-#define UNICODE_SURROGATE_MIN 0xd800
-#define UNICODE_SURROGATE_MAX 0xdfff
-
-#ifdef __cplusplus
-
-namespace ue2 {
-typedef u32 unichar; /* represents a unicode code point */
-
-static UNUSED
-u8 makeContByte(u8 val) {
- return UTF_CONT_BYTE_HEADER | (val & UTF_CONT_BYTE_VALUE_MASK);
-}
-
-} // namespace
-
-#endif // __cplusplus
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef UNICODE_DEF_H
+#define UNICODE_DEF_H
+
+#include "ue2common.h"
+
+#define MAX_UNICODE 0x10FFFF
+#define INVALID_UNICODE 0xffffffff /* unicode could never go above 2^31 */
+
+#define UTF_2CHAR_MIN (1U << 7)
+#define UTF_3CHAR_MIN (1U << 11)
+#define UTF_4CHAR_MIN (1U << 16)
+#define UTF_CONT_SHIFT 6
+#define UTF_CONT_BYTE_RANGE (1U << UTF_CONT_SHIFT)
+#define UTF_CONT_BYTE_HEADER ((u8)0x80) /* 10xx xxxx */
+#define UTF_TWO_BYTE_HEADER ((u8)0xc0) /* 110x xxxx */
+#define UTF_THREE_BYTE_HEADER ((u8)0xe0) /* 1110 xxxx */
+#define UTF_FOUR_BYTE_HEADER ((u8)0xf0) /* 1111 0xxx */
+
+#define UTF_CONT_BYTE_VALUE_MASK 0x3f
+
+#define UTF_CONT_MIN UTF_CONT_BYTE_HEADER
+#define UTF_CONT_MAX (UTF_TWO_BYTE_HEADER - 1)
+
+#define UTF_TWO_BYTE_MIN UTF_TWO_BYTE_HEADER
+#define UTF_TWO_BYTE_MAX (UTF_THREE_BYTE_HEADER - 1)
+
+#define UTF_THREE_BYTE_MIN UTF_THREE_BYTE_HEADER
+#define UTF_THREE_BYTE_MAX (UTF_FOUR_BYTE_HEADER - 1)
+
+#define UTF_FOUR_BYTE_MIN UTF_FOUR_BYTE_HEADER
+#define UTF_FOUR_BYTE_MAX ((u8)0xf4)
+
+#define UTF_CONT_CR CharReach(UTF_CONT_MIN, UTF_CONT_MAX)
+#define UTF_ASCII_CR CharReach(0, 127)
+#define UTF_START_CR CharReach(UTF_TWO_BYTE_MIN, UTF_FOUR_BYTE_MAX)
+#define UTF_TWO_START_CR CharReach(UTF_TWO_BYTE_MIN, UTF_TWO_BYTE_MAX)
+#define UTF_THREE_START_CR CharReach(UTF_THREE_BYTE_MIN, UTF_THREE_BYTE_MAX)
+#define UTF_FOUR_START_CR CharReach(UTF_FOUR_BYTE_MIN, UTF_FOUR_BYTE_MAX)
+
+#define UNICODE_SURROGATE_MIN 0xd800
+#define UNICODE_SURROGATE_MAX 0xdfff
+
+#ifdef __cplusplus
+
+namespace ue2 {
+typedef u32 unichar; /* represents a unicode code point */
+
+static UNUSED
+u8 makeContByte(u8 val) {
+ return UTF_CONT_BYTE_HEADER | (val & UTF_CONT_BYTE_VALUE_MASK);
+}
+
+} // namespace
+
+#endif // __cplusplus
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/unicode_set.h b/contrib/libs/hyperscan/src/util/unicode_set.h
index 25a7b6cfbc..e2dd351a62 100644
--- a/contrib/libs/hyperscan/src/util/unicode_set.h
+++ b/contrib/libs/hyperscan/src/util/unicode_set.h
@@ -1,141 +1,141 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef UNICODE_SET
-#define UNICODE_SET
-
-#include "unicode_def.h"
-
-#include <boost/icl/interval_set.hpp>
-
-namespace ue2 {
-
-class CodePointSet {
-public:
- typedef boost::icl::closed_interval<unichar> interval;
- typedef boost::icl::interval_set<unichar, std::less, interval> implT;
- typedef implT::const_iterator const_iterator;
-
- CodePointSet(void) {}
-
- explicit CodePointSet(const interval &st) : impl(st) {}
-
- bool none(void) const {
- return impl.empty();
- }
-
- void set(unichar c) {
- assert(c <= MAX_UNICODE);
- impl.insert(c);
- }
-
- void unset(unichar c) {
- assert(c <= MAX_UNICODE);
- impl.subtract(c);
- }
-
- void setRange(unichar from, unichar to) { /* inclusive */
- assert(from <= to);
- assert(to <= MAX_UNICODE);
- impl.insert(interval(from, to));
- }
-
- void unsetRange(unichar from, unichar to) { /* inclusive */
- assert(from <= to);
- assert(to <= MAX_UNICODE);
- impl.subtract(interval(from, to));
- }
-
- void flip(void) {
- impl = implT(interval(0, MAX_UNICODE)) - impl;
- }
-
- void operator|=(const CodePointSet &a) {
- impl += a.impl;
- }
-
- const_iterator begin(void) const {
- return impl.begin();
- }
-
- const_iterator end(void) const {
- return impl.end();
- }
-
- size_t count(void) const {
- return cardinality(impl);
- }
-
- CodePointSet operator~(void) const {
- CodePointSet rv = *this;
- rv.flip();
- return rv;
- }
-
- bool operator==(const CodePointSet &a) const {
- return is_element_equal(impl, a.impl);
- }
-
- bool operator!=(const CodePointSet &a) const {
- return !is_element_equal(impl, a.impl);
- }
-
- bool isSubset(const CodePointSet &a) const {
- // Check that adding an interval set has no effect
- return ((impl + a.impl) == impl);
- }
-
- void operator-=(const CodePointSet &a) {
- impl -= a.impl;
- }
-
- /* finds the nth set codepoint, returns INVALID_UNICODE on failure */
- unichar at(size_t pos) const {
- for (const_iterator i = begin(), e = end(); i != e; ++i) {
- size_t int_count = cardinality(*i);
- if (int_count <= pos) {
- /* not in this interval, check next */
- pos -= int_count;
- continue;
- } else {
- return lower(*i) + pos;
- }
- }
-
- return INVALID_UNICODE;
- }
-
- void swap(CodePointSet &other) { impl.swap(other.impl); }
-
-private:
- implT impl;
-};
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef UNICODE_SET
+#define UNICODE_SET
+
+#include "unicode_def.h"
+
+#include <boost/icl/interval_set.hpp>
+
+namespace ue2 {
+
+class CodePointSet {
+public:
+ typedef boost::icl::closed_interval<unichar> interval;
+ typedef boost::icl::interval_set<unichar, std::less, interval> implT;
+ typedef implT::const_iterator const_iterator;
+
+ CodePointSet(void) {}
+
+ explicit CodePointSet(const interval &st) : impl(st) {}
+
+ bool none(void) const {
+ return impl.empty();
+ }
+
+ void set(unichar c) {
+ assert(c <= MAX_UNICODE);
+ impl.insert(c);
+ }
+
+ void unset(unichar c) {
+ assert(c <= MAX_UNICODE);
+ impl.subtract(c);
+ }
+
+ void setRange(unichar from, unichar to) { /* inclusive */
+ assert(from <= to);
+ assert(to <= MAX_UNICODE);
+ impl.insert(interval(from, to));
+ }
+
+ void unsetRange(unichar from, unichar to) { /* inclusive */
+ assert(from <= to);
+ assert(to <= MAX_UNICODE);
+ impl.subtract(interval(from, to));
+ }
+
+ void flip(void) {
+ impl = implT(interval(0, MAX_UNICODE)) - impl;
+ }
+
+ void operator|=(const CodePointSet &a) {
+ impl += a.impl;
+ }
+
+ const_iterator begin(void) const {
+ return impl.begin();
+ }
+
+ const_iterator end(void) const {
+ return impl.end();
+ }
+
+ size_t count(void) const {
+ return cardinality(impl);
+ }
+
+ CodePointSet operator~(void) const {
+ CodePointSet rv = *this;
+ rv.flip();
+ return rv;
+ }
+
+ bool operator==(const CodePointSet &a) const {
+ return is_element_equal(impl, a.impl);
+ }
+
+ bool operator!=(const CodePointSet &a) const {
+ return !is_element_equal(impl, a.impl);
+ }
+
+ bool isSubset(const CodePointSet &a) const {
+ // Check that adding an interval set has no effect
+ return ((impl + a.impl) == impl);
+ }
+
+ void operator-=(const CodePointSet &a) {
+ impl -= a.impl;
+ }
+
+ /* finds the nth set codepoint, returns INVALID_UNICODE on failure */
+ unichar at(size_t pos) const {
+ for (const_iterator i = begin(), e = end(); i != e; ++i) {
+ size_t int_count = cardinality(*i);
+ if (int_count <= pos) {
+ /* not in this interval, check next */
+ pos -= int_count;
+ continue;
+ } else {
+ return lower(*i) + pos;
+ }
+ }
+
+ return INVALID_UNICODE;
+ }
+
+ void swap(CodePointSet &other) { impl.swap(other.impl); }
+
+private:
+ implT impl;
+};
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/util/uniform_ops.h b/contrib/libs/hyperscan/src/util/uniform_ops.h
index 89afe39fc7..262104aca2 100644
--- a/contrib/libs/hyperscan/src/util/uniform_ops.h
+++ b/contrib/libs/hyperscan/src/util/uniform_ops.h
@@ -1,106 +1,106 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Uniformly-named primitives named by target type.
- *
- * The following are a set of primitives named by target type, so that we can
- * macro the hell out of all our NFA implementations. Hurrah!
- */
-
-#ifndef UNIFORM_OPS_H
-#define UNIFORM_OPS_H
-
-#include "ue2common.h"
-#include "simd_utils.h"
-#include "unaligned.h"
-
-// Aligned loads
-#define load_u8(a) (*(const u8 *)(a))
-#define load_u16(a) (*(const u16 *)(a))
-#define load_u32(a) (*(const u32 *)(a))
-#define load_u64a(a) (*(const u64a *)(a))
-#define load_m128(a) load128(a)
-#define load_m256(a) load256(a)
-#define load_m384(a) load384(a)
-#define load_m512(a) load512(a)
-
-// Unaligned loads
-#define loadu_u8(a) (*(const u8 *)(a))
-#define loadu_u16(a) unaligned_load_u16((const u8 *)(a))
-#define loadu_u32(a) unaligned_load_u32((const u8 *)(a))
-#define loadu_u64a(a) unaligned_load_u64a((const u8 *)(a))
-#define loadu_m128(a) loadu128(a)
-#define loadu_m256(a) loadu256(a)
-#define loadu_m384(a) loadu384(a)
-#define loadu_m512(a) loadu512(a)
-
-// Aligned stores
-#define store_u8(ptr, a) do { *(u8 *)(ptr) = (a); } while(0)
-#define store_u16(ptr, a) do { *(u16 *)(ptr) = (a); } while(0)
-#define store_u32(ptr, a) do { *(u32 *)(ptr) = (a); } while(0)
-#define store_u64a(ptr, a) do { *(u64a *)(ptr) = (a); } while(0)
-#define store_m128(ptr, a) store128(ptr, a)
-#define store_m256(ptr, a) store256(ptr, a)
-#define store_m384(ptr, a) store384(ptr, a)
-#define store_m512(ptr, a) store512(ptr, a)
-
-// Unaligned stores
-#define storeu_u8(ptr, a) do { *(u8 *)(ptr) = (a); } while(0)
-#define storeu_u16(ptr, a) unaligned_store_u16(ptr, a)
-#define storeu_u32(ptr, a) unaligned_store_u32(ptr, a)
-#define storeu_u64a(ptr, a) unaligned_store_u64a(ptr, a)
-#define storeu_m128(ptr, a) storeu128(ptr, a)
-
-#define zero_u8 0
-#define zero_u32 0
-#define zero_u64a 0
-#define zero_m128 zeroes128()
-#define zero_m256 zeroes256()
-#define zero_m384 zeroes384()
-#define zero_m512 zeroes512()
-
-#define ones_u8 0xff
-#define ones_u32 0xfffffffful
-#define ones_u64a 0xffffffffffffffffull
-#define ones_m128 ones128()
-#define ones_m256 ones256()
-#define ones_m384 ones384()
-#define ones_m512 ones512()
-
-#define or_u8(a, b) ((a) | (b))
-#define or_u32(a, b) ((a) | (b))
-#define or_u64a(a, b) ((a) | (b))
-#define or_m128(a, b) (or128(a, b))
-#define or_m256(a, b) (or256(a, b))
-#define or_m384(a, b) (or384(a, b))
-#define or_m512(a, b) (or512(a, b))
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Uniformly-named primitives named by target type.
+ *
+ * The following are a set of primitives named by target type, so that we can
+ * macro the hell out of all our NFA implementations. Hurrah!
+ */
+
+#ifndef UNIFORM_OPS_H
+#define UNIFORM_OPS_H
+
+#include "ue2common.h"
+#include "simd_utils.h"
+#include "unaligned.h"
+
+// Aligned loads
+#define load_u8(a) (*(const u8 *)(a))
+#define load_u16(a) (*(const u16 *)(a))
+#define load_u32(a) (*(const u32 *)(a))
+#define load_u64a(a) (*(const u64a *)(a))
+#define load_m128(a) load128(a)
+#define load_m256(a) load256(a)
+#define load_m384(a) load384(a)
+#define load_m512(a) load512(a)
+
+// Unaligned loads
+#define loadu_u8(a) (*(const u8 *)(a))
+#define loadu_u16(a) unaligned_load_u16((const u8 *)(a))
+#define loadu_u32(a) unaligned_load_u32((const u8 *)(a))
+#define loadu_u64a(a) unaligned_load_u64a((const u8 *)(a))
+#define loadu_m128(a) loadu128(a)
+#define loadu_m256(a) loadu256(a)
+#define loadu_m384(a) loadu384(a)
+#define loadu_m512(a) loadu512(a)
+
+// Aligned stores
+#define store_u8(ptr, a) do { *(u8 *)(ptr) = (a); } while(0)
+#define store_u16(ptr, a) do { *(u16 *)(ptr) = (a); } while(0)
+#define store_u32(ptr, a) do { *(u32 *)(ptr) = (a); } while(0)
+#define store_u64a(ptr, a) do { *(u64a *)(ptr) = (a); } while(0)
+#define store_m128(ptr, a) store128(ptr, a)
+#define store_m256(ptr, a) store256(ptr, a)
+#define store_m384(ptr, a) store384(ptr, a)
+#define store_m512(ptr, a) store512(ptr, a)
+
+// Unaligned stores
+#define storeu_u8(ptr, a) do { *(u8 *)(ptr) = (a); } while(0)
+#define storeu_u16(ptr, a) unaligned_store_u16(ptr, a)
+#define storeu_u32(ptr, a) unaligned_store_u32(ptr, a)
+#define storeu_u64a(ptr, a) unaligned_store_u64a(ptr, a)
+#define storeu_m128(ptr, a) storeu128(ptr, a)
+
+#define zero_u8 0
+#define zero_u32 0
+#define zero_u64a 0
+#define zero_m128 zeroes128()
+#define zero_m256 zeroes256()
+#define zero_m384 zeroes384()
+#define zero_m512 zeroes512()
+
+#define ones_u8 0xff
+#define ones_u32 0xfffffffful
+#define ones_u64a 0xffffffffffffffffull
+#define ones_m128 ones128()
+#define ones_m256 ones256()
+#define ones_m384 ones384()
+#define ones_m512 ones512()
+
+#define or_u8(a, b) ((a) | (b))
+#define or_u32(a, b) ((a) | (b))
+#define or_u64a(a, b) ((a) | (b))
+#define or_m128(a, b) (or128(a, b))
+#define or_m256(a, b) (or256(a, b))
+#define or_m384(a, b) (or384(a, b))
+#define or_m512(a, b) (or512(a, b))
+
#if defined(HAVE_AVX512VBMI)
#define expand_m128(a) (expand128(a))
#define expand_m256(a) (expand256(a))
@@ -113,131 +113,131 @@
#define shuffle_byte_m512(a, b) (vpermb512(a, b))
#endif
-#define and_u8(a, b) ((a) & (b))
-#define and_u32(a, b) ((a) & (b))
-#define and_u64a(a, b) ((a) & (b))
-#define and_m128(a, b) (and128(a, b))
-#define and_m256(a, b) (and256(a, b))
-#define and_m384(a, b) (and384(a, b))
-#define and_m512(a, b) (and512(a, b))
-
-#define not_u8(a) (~(a))
-#define not_u32(a) (~(a))
-#define not_u64a(a) (~(a))
-#define not_m128(a) (not128(a))
-#define not_m256(a) (not256(a))
-#define not_m384(a) (not384(a))
-#define not_m512(a) (not512(a))
-
-#define andnot_u8(a, b) ((~(a)) & (b))
-#define andnot_u32(a, b) ((~(a)) & (b))
-#define andnot_u64a(a, b) ((~(a)) & (b))
-#define andnot_m128(a, b) (andnot128(a, b))
-#define andnot_m256(a, b) (andnot256(a, b))
-#define andnot_m384(a, b) (andnot384(a, b))
-#define andnot_m512(a, b) (andnot512(a, b))
-
+#define and_u8(a, b) ((a) & (b))
+#define and_u32(a, b) ((a) & (b))
+#define and_u64a(a, b) ((a) & (b))
+#define and_m128(a, b) (and128(a, b))
+#define and_m256(a, b) (and256(a, b))
+#define and_m384(a, b) (and384(a, b))
+#define and_m512(a, b) (and512(a, b))
+
+#define not_u8(a) (~(a))
+#define not_u32(a) (~(a))
+#define not_u64a(a) (~(a))
+#define not_m128(a) (not128(a))
+#define not_m256(a) (not256(a))
+#define not_m384(a) (not384(a))
+#define not_m512(a) (not512(a))
+
+#define andnot_u8(a, b) ((~(a)) & (b))
+#define andnot_u32(a, b) ((~(a)) & (b))
+#define andnot_u64a(a, b) ((~(a)) & (b))
+#define andnot_m128(a, b) (andnot128(a, b))
+#define andnot_m256(a, b) (andnot256(a, b))
+#define andnot_m384(a, b) (andnot384(a, b))
+#define andnot_m512(a, b) (andnot512(a, b))
+
#define lshift_u32(a, b) ((a) << (b))
#define lshift_u64a(a, b) ((a) << (b))
#define lshift_m128(a, b) (lshift64_m128(a, b))
#define lshift_m256(a, b) (lshift64_m256(a, b))
#define lshift_m384(a, b) (lshift64_m384(a, b))
#define lshift_m512(a, b) (lshift64_m512(a, b))
-
-#define isZero_u8(a) ((a) == 0)
-#define isZero_u32(a) ((a) == 0)
-#define isZero_u64a(a) ((a) == 0)
-#define isZero_m128(a) (!isnonzero128(a))
-#define isZero_m256(a) (!isnonzero256(a))
-#define isZero_m384(a) (!isnonzero384(a))
-#define isZero_m512(a) (!isnonzero512(a))
-
-#define isNonZero_u8(a) ((a) != 0)
-#define isNonZero_u32(a) ((a) != 0)
-#define isNonZero_u64a(a) ((a) != 0)
-#define isNonZero_m128(a) (isnonzero128(a))
-#define isNonZero_m256(a) (isnonzero256(a))
-#define isNonZero_m384(a) (isnonzero384(a))
-#define isNonZero_m512(a) (isnonzero512(a))
-
-#define diffrich_u32(a, b) ((a) != (b))
-#define diffrich_u64a(a, b) ((a) != (b) ? 3 : 0) //TODO: impl 32bit granularity
-#define diffrich_m128(a, b) (diffrich128(a, b))
-#define diffrich_m256(a, b) (diffrich256(a, b))
-#define diffrich_m384(a, b) (diffrich384(a, b))
-#define diffrich_m512(a, b) (diffrich512(a, b))
-
-#define diffrich64_u32(a, b) ((a) != (b))
-#define diffrich64_u64a(a, b) ((a) != (b) ? 1 : 0)
-#define diffrich64_m128(a, b) (diffrich64_128(a, b))
-#define diffrich64_m256(a, b) (diffrich64_256(a, b))
-#define diffrich64_m384(a, b) (diffrich64_384(a, b))
-#define diffrich64_m512(a, b) (diffrich64_512(a, b))
-
-#define noteq_u8(a, b) ((a) != (b))
-#define noteq_u32(a, b) ((a) != (b))
-#define noteq_u64a(a, b) ((a) != (b))
-#define noteq_m128(a, b) (diff128(a, b))
-#define noteq_m256(a, b) (diff256(a, b))
-#define noteq_m384(a, b) (diff384(a, b))
-#define noteq_m512(a, b) (diff512(a, b))
-
-#define partial_store_m128(ptr, v, sz) storebytes128(ptr, v, sz)
-#define partial_store_m256(ptr, v, sz) storebytes256(ptr, v, sz)
-#define partial_store_m384(ptr, v, sz) storebytes384(ptr, v, sz)
-#define partial_store_m512(ptr, v, sz) storebytes512(ptr, v, sz)
-
-#define partial_load_m128(ptr, sz) loadbytes128(ptr, sz)
-#define partial_load_m256(ptr, sz) loadbytes256(ptr, sz)
-#define partial_load_m384(ptr, sz) loadbytes384(ptr, sz)
-#define partial_load_m512(ptr, sz) loadbytes512(ptr, sz)
-
+
+#define isZero_u8(a) ((a) == 0)
+#define isZero_u32(a) ((a) == 0)
+#define isZero_u64a(a) ((a) == 0)
+#define isZero_m128(a) (!isnonzero128(a))
+#define isZero_m256(a) (!isnonzero256(a))
+#define isZero_m384(a) (!isnonzero384(a))
+#define isZero_m512(a) (!isnonzero512(a))
+
+#define isNonZero_u8(a) ((a) != 0)
+#define isNonZero_u32(a) ((a) != 0)
+#define isNonZero_u64a(a) ((a) != 0)
+#define isNonZero_m128(a) (isnonzero128(a))
+#define isNonZero_m256(a) (isnonzero256(a))
+#define isNonZero_m384(a) (isnonzero384(a))
+#define isNonZero_m512(a) (isnonzero512(a))
+
+#define diffrich_u32(a, b) ((a) != (b))
+#define diffrich_u64a(a, b) ((a) != (b) ? 3 : 0) //TODO: impl 32bit granularity
+#define diffrich_m128(a, b) (diffrich128(a, b))
+#define diffrich_m256(a, b) (diffrich256(a, b))
+#define diffrich_m384(a, b) (diffrich384(a, b))
+#define diffrich_m512(a, b) (diffrich512(a, b))
+
+#define diffrich64_u32(a, b) ((a) != (b))
+#define diffrich64_u64a(a, b) ((a) != (b) ? 1 : 0)
+#define diffrich64_m128(a, b) (diffrich64_128(a, b))
+#define diffrich64_m256(a, b) (diffrich64_256(a, b))
+#define diffrich64_m384(a, b) (diffrich64_384(a, b))
+#define diffrich64_m512(a, b) (diffrich64_512(a, b))
+
+#define noteq_u8(a, b) ((a) != (b))
+#define noteq_u32(a, b) ((a) != (b))
+#define noteq_u64a(a, b) ((a) != (b))
+#define noteq_m128(a, b) (diff128(a, b))
+#define noteq_m256(a, b) (diff256(a, b))
+#define noteq_m384(a, b) (diff384(a, b))
+#define noteq_m512(a, b) (diff512(a, b))
+
+#define partial_store_m128(ptr, v, sz) storebytes128(ptr, v, sz)
+#define partial_store_m256(ptr, v, sz) storebytes256(ptr, v, sz)
+#define partial_store_m384(ptr, v, sz) storebytes384(ptr, v, sz)
+#define partial_store_m512(ptr, v, sz) storebytes512(ptr, v, sz)
+
+#define partial_load_m128(ptr, sz) loadbytes128(ptr, sz)
+#define partial_load_m256(ptr, sz) loadbytes256(ptr, sz)
+#define partial_load_m384(ptr, sz) loadbytes384(ptr, sz)
+#define partial_load_m512(ptr, sz) loadbytes512(ptr, sz)
+
#define store_compressed_u32(ptr, x, m, len) storecompressed32(ptr, x, m, len)
#define store_compressed_u64a(ptr, x, m, len) storecompressed64(ptr, x, m, len)
#define store_compressed_m128(ptr, x, m, len) storecompressed128(ptr, x, m, len)
#define store_compressed_m256(ptr, x, m, len) storecompressed256(ptr, x, m, len)
#define store_compressed_m384(ptr, x, m, len) storecompressed384(ptr, x, m, len)
#define store_compressed_m512(ptr, x, m, len) storecompressed512(ptr, x, m, len)
-
+
#define load_compressed_u32(x, ptr, m, len) loadcompressed32(x, ptr, m, len)
#define load_compressed_u64a(x, ptr, m, len) loadcompressed64(x, ptr, m, len)
#define load_compressed_m128(x, ptr, m, len) loadcompressed128(x, ptr, m, len)
#define load_compressed_m256(x, ptr, m, len) loadcompressed256(x, ptr, m, len)
#define load_compressed_m384(x, ptr, m, len) loadcompressed384(x, ptr, m, len)
#define load_compressed_m512(x, ptr, m, len) loadcompressed512(x, ptr, m, len)
-
+
static really_inline
void clearbit_u32(u32 *p, u32 n) {
- assert(n < sizeof(*p) * 8);
- *p &= ~(1U << n);
-}
+ assert(n < sizeof(*p) * 8);
+ *p &= ~(1U << n);
+}
static really_inline
void clearbit_u64a(u64a *p, u32 n) {
- assert(n < sizeof(*p) * 8);
- *p &= ~(1ULL << n);
-}
-
-#define clearbit_m128(ptr, n) (clearbit128(ptr, n))
-#define clearbit_m256(ptr, n) (clearbit256(ptr, n))
-#define clearbit_m384(ptr, n) (clearbit384(ptr, n))
-#define clearbit_m512(ptr, n) (clearbit512(ptr, n))
-
+ assert(n < sizeof(*p) * 8);
+ *p &= ~(1ULL << n);
+}
+
+#define clearbit_m128(ptr, n) (clearbit128(ptr, n))
+#define clearbit_m256(ptr, n) (clearbit256(ptr, n))
+#define clearbit_m384(ptr, n) (clearbit384(ptr, n))
+#define clearbit_m512(ptr, n) (clearbit512(ptr, n))
+
static really_inline
char testbit_u32(u32 val, u32 n) {
assert(n < sizeof(val) * 8);
return !!(val & (1U << n));
-}
+}
static really_inline
char testbit_u64a(u64a val, u32 n) {
assert(n < sizeof(val) * 8);
return !!(val & (1ULL << n));
-}
-
+}
+
#define testbit_m128(val, n) (testbit128(val, n))
#define testbit_m256(val, n) (testbit256(val, n))
#define testbit_m384(val, n) (testbit384(val, n))
#define testbit_m512(val, n) (testbit512(val, n))
-#endif
+#endif
diff --git a/contrib/libs/hyperscan/src/util/verify_types.h b/contrib/libs/hyperscan/src/util/verify_types.h
index 7426acdd29..5833d5ec62 100644
--- a/contrib/libs/hyperscan/src/util/verify_types.h
+++ b/contrib/libs/hyperscan/src/util/verify_types.h
@@ -1,42 +1,42 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef UTIL_VERIFY_TYPES
-#define UTIL_VERIFY_TYPES
-
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef UTIL_VERIFY_TYPES
+#define UTIL_VERIFY_TYPES
+
+#include "ue2common.h"
#include "util/compile_error.h"
-
-#include <cassert>
+
+#include <cassert>
#include <type_traits>
-
-namespace ue2 {
-
+
+namespace ue2 {
+
template<typename To_T, typename From_T>
To_T verify_cast(From_T val) {
static_assert(std::is_integral<To_T>::value,
@@ -53,38 +53,38 @@ To_T verify_cast(From_T val) {
}
return conv_val;
-}
-
+}
+
template<typename T>
s8 verify_s8(T val) {
return verify_cast<s8>(val);
-}
-
+}
+
template<typename T>
u8 verify_u8(T val) {
return verify_cast<u8>(val);
-}
-
+}
+
template<typename T>
s16 verify_s16(T val) {
return verify_cast<s16>(val);
-}
-
+}
+
template<typename T>
u16 verify_u16(T val) {
return verify_cast<u16>(val);
-}
-
+}
+
template<typename T>
s32 verify_s32(T val) {
return verify_cast<s32>(val);
-}
-
+}
+
template<typename T>
u32 verify_u32(T val) {
return verify_cast<u32>(val);
}
-} // namespace ue2
-
-#endif // UTIL_VERIFY_TYPES
+} // namespace ue2
+
+#endif // UTIL_VERIFY_TYPES
diff --git a/contrib/libs/hyperscan/ya.make b/contrib/libs/hyperscan/ya.make
index 441a661878..7783969e4a 100644
--- a/contrib/libs/hyperscan/ya.make
+++ b/contrib/libs/hyperscan/ya.make
@@ -1,5 +1,5 @@
# Generated by devtools/yamaker from nixpkgs cc3b147ed182a6cae239348ef094158815da14ae.
-
+
LIBRARY()
OWNER(
@@ -18,65 +18,65 @@ LICENSE(
BSD-3-Clause AND
BSL-1.0
)
-
+
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
contrib/restricted/boost
)
-ADDINCL(
+ADDINCL(
contrib/libs/hyperscan
contrib/libs/hyperscan/include
- contrib/libs/hyperscan/src
-)
-
+ contrib/libs/hyperscan/src
+)
+
NO_COMPILER_WARNINGS()
-
+
NO_UTIL()
-
+
CFLAGS(
-DDISABLE_AVX512VBMI_DISPATCH
)
-SRCS(
+SRCS(
src/alloc.c
- src/compiler/asserts.cpp
- src/compiler/compiler.cpp
- src/compiler/error.cpp
+ src/compiler/asserts.cpp
+ src/compiler/compiler.cpp
+ src/compiler/error.cpp
src/crc32.c
src/database.c
- src/fdr/engine_description.cpp
+ src/fdr/engine_description.cpp
src/fdr/fdr.c
- src/fdr/fdr_compile.cpp
- src/fdr/fdr_compile_util.cpp
- src/fdr/fdr_confirm_compile.cpp
- src/fdr/fdr_engine_description.cpp
- src/fdr/flood_compile.cpp
+ src/fdr/fdr_compile.cpp
+ src/fdr/fdr_compile_util.cpp
+ src/fdr/fdr_confirm_compile.cpp
+ src/fdr/fdr_engine_description.cpp
+ src/fdr/flood_compile.cpp
src/fdr/teddy.c
src/fdr/teddy_avx2.c
- src/fdr/teddy_compile.cpp
- src/fdr/teddy_engine_description.cpp
+ src/fdr/teddy_compile.cpp
+ src/fdr/teddy_engine_description.cpp
src/grey.cpp
src/hs.cpp
src/hs_valid_platform.c
src/hs_version.c
src/hwlm/hwlm.c
- src/hwlm/hwlm_build.cpp
- src/hwlm/hwlm_literal.cpp
- src/hwlm/noodle_build.cpp
+ src/hwlm/hwlm_build.cpp
+ src/hwlm/hwlm_literal.cpp
+ src/hwlm/noodle_build.cpp
src/hwlm/noodle_engine.c
src/nfa/accel.c
src/nfa/accel_dfa_build_strat.cpp
- src/nfa/accelcompile.cpp
+ src/nfa/accelcompile.cpp
src/nfa/castle.c
- src/nfa/castlecompile.cpp
+ src/nfa/castlecompile.cpp
src/nfa/dfa_build_strat.cpp
- src/nfa/dfa_min.cpp
+ src/nfa/dfa_min.cpp
src/nfa/gough.c
src/nfa/goughcompile.cpp
src/nfa/goughcompile_accel.cpp
- src/nfa/goughcompile_reg.cpp
+ src/nfa/goughcompile_reg.cpp
src/nfa/lbr.c
src/nfa/limex_64.c
src/nfa/limex_accel.c
@@ -110,116 +110,116 @@ SRCS(
src/nfa/truffle.c
src/nfa/trufflecompile.cpp
src/nfagraph/ng.cpp
- src/nfagraph/ng_anchored_acyclic.cpp
- src/nfagraph/ng_anchored_dots.cpp
- src/nfagraph/ng_asserts.cpp
- src/nfagraph/ng_builder.cpp
- src/nfagraph/ng_calc_components.cpp
- src/nfagraph/ng_cyclic_redundancy.cpp
- src/nfagraph/ng_depth.cpp
- src/nfagraph/ng_dominators.cpp
- src/nfagraph/ng_edge_redundancy.cpp
- src/nfagraph/ng_equivalence.cpp
- src/nfagraph/ng_execute.cpp
- src/nfagraph/ng_expr_info.cpp
- src/nfagraph/ng_extparam.cpp
- src/nfagraph/ng_fixed_width.cpp
+ src/nfagraph/ng_anchored_acyclic.cpp
+ src/nfagraph/ng_anchored_dots.cpp
+ src/nfagraph/ng_asserts.cpp
+ src/nfagraph/ng_builder.cpp
+ src/nfagraph/ng_calc_components.cpp
+ src/nfagraph/ng_cyclic_redundancy.cpp
+ src/nfagraph/ng_depth.cpp
+ src/nfagraph/ng_dominators.cpp
+ src/nfagraph/ng_edge_redundancy.cpp
+ src/nfagraph/ng_equivalence.cpp
+ src/nfagraph/ng_execute.cpp
+ src/nfagraph/ng_expr_info.cpp
+ src/nfagraph/ng_extparam.cpp
+ src/nfagraph/ng_fixed_width.cpp
src/nfagraph/ng_fuzzy.cpp
- src/nfagraph/ng_haig.cpp
- src/nfagraph/ng_holder.cpp
- src/nfagraph/ng_is_equal.cpp
- src/nfagraph/ng_lbr.cpp
+ src/nfagraph/ng_haig.cpp
+ src/nfagraph/ng_holder.cpp
+ src/nfagraph/ng_is_equal.cpp
+ src/nfagraph/ng_lbr.cpp
src/nfagraph/ng_limex.cpp
src/nfagraph/ng_limex_accel.cpp
- src/nfagraph/ng_literal_analysis.cpp
- src/nfagraph/ng_literal_component.cpp
- src/nfagraph/ng_literal_decorated.cpp
- src/nfagraph/ng_mcclellan.cpp
- src/nfagraph/ng_misc_opt.cpp
- src/nfagraph/ng_netflow.cpp
- src/nfagraph/ng_prefilter.cpp
- src/nfagraph/ng_prune.cpp
- src/nfagraph/ng_puff.cpp
- src/nfagraph/ng_redundancy.cpp
- src/nfagraph/ng_region.cpp
- src/nfagraph/ng_region_redundancy.cpp
- src/nfagraph/ng_repeat.cpp
- src/nfagraph/ng_reports.cpp
- src/nfagraph/ng_restructuring.cpp
- src/nfagraph/ng_revacc.cpp
- src/nfagraph/ng_sep.cpp
- src/nfagraph/ng_small_literal_set.cpp
+ src/nfagraph/ng_literal_analysis.cpp
+ src/nfagraph/ng_literal_component.cpp
+ src/nfagraph/ng_literal_decorated.cpp
+ src/nfagraph/ng_mcclellan.cpp
+ src/nfagraph/ng_misc_opt.cpp
+ src/nfagraph/ng_netflow.cpp
+ src/nfagraph/ng_prefilter.cpp
+ src/nfagraph/ng_prune.cpp
+ src/nfagraph/ng_puff.cpp
+ src/nfagraph/ng_redundancy.cpp
+ src/nfagraph/ng_region.cpp
+ src/nfagraph/ng_region_redundancy.cpp
+ src/nfagraph/ng_repeat.cpp
+ src/nfagraph/ng_reports.cpp
+ src/nfagraph/ng_restructuring.cpp
+ src/nfagraph/ng_revacc.cpp
+ src/nfagraph/ng_sep.cpp
+ src/nfagraph/ng_small_literal_set.cpp
src/nfagraph/ng_som.cpp
src/nfagraph/ng_som_add_redundancy.cpp
- src/nfagraph/ng_som_util.cpp
- src/nfagraph/ng_split.cpp
- src/nfagraph/ng_squash.cpp
- src/nfagraph/ng_stop.cpp
- src/nfagraph/ng_uncalc_components.cpp
- src/nfagraph/ng_utf8.cpp
- src/nfagraph/ng_util.cpp
- src/nfagraph/ng_vacuous.cpp
+ src/nfagraph/ng_som_util.cpp
+ src/nfagraph/ng_split.cpp
+ src/nfagraph/ng_squash.cpp
+ src/nfagraph/ng_stop.cpp
+ src/nfagraph/ng_uncalc_components.cpp
+ src/nfagraph/ng_utf8.cpp
+ src/nfagraph/ng_util.cpp
+ src/nfagraph/ng_vacuous.cpp
src/nfagraph/ng_violet.cpp
- src/nfagraph/ng_width.cpp
- src/parser/AsciiComponentClass.cpp
+ src/nfagraph/ng_width.cpp
+ src/parser/AsciiComponentClass.cpp
src/parser/Component.cpp
- src/parser/ComponentAlternation.cpp
- src/parser/ComponentAssertion.cpp
- src/parser/ComponentAtomicGroup.cpp
- src/parser/ComponentBackReference.cpp
- src/parser/ComponentBoundary.cpp
- src/parser/ComponentByte.cpp
- src/parser/ComponentClass.cpp
- src/parser/ComponentCondReference.cpp
+ src/parser/ComponentAlternation.cpp
+ src/parser/ComponentAssertion.cpp
+ src/parser/ComponentAtomicGroup.cpp
+ src/parser/ComponentBackReference.cpp
+ src/parser/ComponentBoundary.cpp
+ src/parser/ComponentByte.cpp
+ src/parser/ComponentClass.cpp
+ src/parser/ComponentCondReference.cpp
src/parser/ComponentEUS.cpp
src/parser/ComponentEmpty.cpp
- src/parser/ComponentRepeat.cpp
- src/parser/ComponentSequence.cpp
- src/parser/ComponentVisitor.cpp
- src/parser/ComponentWordBoundary.cpp
- src/parser/ConstComponentVisitor.cpp
+ src/parser/ComponentRepeat.cpp
+ src/parser/ComponentSequence.cpp
+ src/parser/ComponentVisitor.cpp
+ src/parser/ComponentWordBoundary.cpp
+ src/parser/ConstComponentVisitor.cpp
src/parser/Parser.rl6
src/parser/Utf8ComponentClass.cpp
src/parser/buildstate.cpp
src/parser/check_refs.cpp
src/parser/control_verbs.rl6
src/parser/logical_combination.cpp
- src/parser/parse_error.cpp
- src/parser/parser_util.cpp
- src/parser/prefilter.cpp
- src/parser/shortcut_literal.cpp
- src/parser/ucp_table.cpp
- src/parser/unsupported.cpp
- src/parser/utf8_validate.cpp
+ src/parser/parse_error.cpp
+ src/parser/parser_util.cpp
+ src/parser/prefilter.cpp
+ src/parser/shortcut_literal.cpp
+ src/parser/ucp_table.cpp
+ src/parser/unsupported.cpp
+ src/parser/utf8_validate.cpp
src/rose/block.c
src/rose/catchup.c
src/rose/init.c
src/rose/match.c
src/rose/program_runtime.c
- src/rose/rose_build_add.cpp
- src/rose/rose_build_add_mask.cpp
- src/rose/rose_build_anchored.cpp
- src/rose/rose_build_bytecode.cpp
+ src/rose/rose_build_add.cpp
+ src/rose/rose_build_add_mask.cpp
+ src/rose/rose_build_anchored.cpp
+ src/rose/rose_build_bytecode.cpp
src/rose/rose_build_castle.cpp
- src/rose/rose_build_compile.cpp
- src/rose/rose_build_convert.cpp
+ src/rose/rose_build_compile.cpp
+ src/rose/rose_build_convert.cpp
src/rose/rose_build_dedupe.cpp
src/rose/rose_build_engine_blob.cpp
src/rose/rose_build_exclusive.cpp
src/rose/rose_build_groups.cpp
- src/rose/rose_build_infix.cpp
+ src/rose/rose_build_infix.cpp
src/rose/rose_build_instructions.cpp
src/rose/rose_build_lit_accel.cpp
src/rose/rose_build_long_lit.cpp
- src/rose/rose_build_lookaround.cpp
+ src/rose/rose_build_lookaround.cpp
src/rose/rose_build_matchers.cpp
- src/rose/rose_build_merge.cpp
- src/rose/rose_build_misc.cpp
+ src/rose/rose_build_merge.cpp
+ src/rose/rose_build_misc.cpp
src/rose/rose_build_program.cpp
- src/rose/rose_build_role_aliasing.cpp
- src/rose/rose_build_scatter.cpp
- src/rose/rose_build_width.cpp
- src/rose/rose_in_util.cpp
+ src/rose/rose_build_role_aliasing.cpp
+ src/rose/rose_build_scatter.cpp
+ src/rose/rose_build_width.cpp
+ src/rose/rose_in_util.cpp
src/rose/stream.c
src/runtime.c
src/scratch.c
@@ -228,26 +228,26 @@ SRCS(
src/som/som_runtime.c
src/som/som_stream.c
src/stream_compress.c
- src/util/alloc.cpp
- src/util/charreach.cpp
+ src/util/alloc.cpp
+ src/util/charreach.cpp
src/util/clique.cpp
- src/util/compile_context.cpp
- src/util/compile_error.cpp
+ src/util/compile_context.cpp
+ src/util/compile_error.cpp
src/util/cpuid_flags.c
- src/util/depth.cpp
+ src/util/depth.cpp
src/util/dump_mask.cpp
src/util/fatbit_build.cpp
src/util/masked_move.c
src/util/multibit.c
- src/util/multibit_build.cpp
- src/util/report_manager.cpp
+ src/util/multibit_build.cpp
+ src/util/report_manager.cpp
src/util/simd_utils.c
src/util/state_compress.c
- src/util/target_info.cpp
- src/util/ue2string.cpp
-)
-
-END()
+ src/util/target_info.cpp
+ src/util/ue2string.cpp
+)
+
+END()
RECURSE(
runtime_avx2
diff --git a/contrib/libs/pire/pire/extra/capture.h b/contrib/libs/pire/pire/extra/capture.h
index e69b922edf..8399914a67 100644
--- a/contrib/libs/pire/pire/extra/capture.h
+++ b/contrib/libs/pire/pire/extra/capture.h
@@ -103,23 +103,23 @@ public:
}
}
- Char Translate(Char ch) const
- {
- return m_letters[static_cast<size_t>(ch)];
- }
-
- Action NextTranslated(State& s, unsigned char c) const
- {
- Transition x = reinterpret_cast<const Transition*>(s.m_state)[c];
- s.m_state += SignExtend(x.shift);
- ++s.m_counter;
-
- return x.action;
- }
-
+ Char Translate(Char ch) const
+ {
+ return m_letters[static_cast<size_t>(ch)];
+ }
+
+ Action NextTranslated(State& s, unsigned char c) const
+ {
+ Transition x = reinterpret_cast<const Transition*>(s.m_state)[c];
+ s.m_state += SignExtend(x.shift);
+ ++s.m_counter;
+
+ return x.action;
+ }
+
Action Next(State& s, Char c) const
{
- return NextTranslated(s, Translate(c));
+ return NextTranslated(s, Translate(c));
}
Action Next(const State& current, State& n, Char c) const
diff --git a/contrib/libs/pire/pire/extra/count.cpp b/contrib/libs/pire/pire/extra/count.cpp
index 5b677666ae..468ff61d92 100644
--- a/contrib/libs/pire/pire/extra/count.cpp
+++ b/contrib/libs/pire/pire/extra/count.cpp
@@ -912,10 +912,10 @@ protected:
TVector<State> States;
TAction Action(const Scanner& sc, InternalState state, Char letter) const
{
- size_t state_index = sc.StateIdx(state);
- size_t transition_index = sc.TransitionIndex(state_index, letter);
+ size_t state_index = sc.StateIdx(state);
+ size_t transition_index = sc.TransitionIndex(state_index, letter);
const auto& tr = sc.m_jumps[transition_index];
- return tr.action;
+ return tr.action;
}
};
diff --git a/contrib/libs/pire/pire/extra/count.h b/contrib/libs/pire/pire/extra/count.h
index 35c3d24aa7..bd1526b98d 100644
--- a/contrib/libs/pire/pire/extra/count.h
+++ b/contrib/libs/pire/pire/extra/count.h
@@ -45,71 +45,71 @@ namespace Impl {
AdvancedScanner MakeAdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple);
};
-template<size_t I>
-class IncrementPerformer {
-public:
- template<typename State, typename Action>
- PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- static void Do(State& s, Action mask)
- {
- if (mask & (1 << (I - 1))) {
- Increment(s);
- }
- IncrementPerformer<I - 1>::Do(s, mask);
- }
-
-private:
- template<typename State>
- PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- static void Increment(State& s)
- {
- ++s.m_current[I - 1];
- }
-};
-
-template<>
-class IncrementPerformer<0> {
-public:
- template<typename State, typename Action>
- PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- static void Do(State&, Action)
- {
- }
-};
-
-template<size_t I>
-class ResetPerformer {
-public:
- template<typename State, typename Action>
- PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- static void Do(State& s, Action mask)
- {
- if (mask & (1 << (LoadedScanner::MAX_RE_COUNT + (I - 1))) && s.m_current[I - 1]) {
- Reset(s);
- }
- ResetPerformer<I - 1>::Do(s, mask);
- }
-
-private:
- template<typename State>
- PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- static void Reset(State& s)
- {
- s.m_total[I - 1] = ymax(s.m_total[I - 1], s.m_current[I - 1]);
- s.m_current[I - 1] = 0;
- }
-};
-
-template<>
-class ResetPerformer<0> {
-public:
- template<typename State, typename Action>
- PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- static void Do(State&, Action)
- {
- }
-};
-
+template<size_t I>
+class IncrementPerformer {
+public:
+ template<typename State, typename Action>
+ PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static void Do(State& s, Action mask)
+ {
+ if (mask & (1 << (I - 1))) {
+ Increment(s);
+ }
+ IncrementPerformer<I - 1>::Do(s, mask);
+ }
+
+private:
+ template<typename State>
+ PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static void Increment(State& s)
+ {
+ ++s.m_current[I - 1];
+ }
+};
+
+template<>
+class IncrementPerformer<0> {
+public:
+ template<typename State, typename Action>
+ PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static void Do(State&, Action)
+ {
+ }
+};
+
+template<size_t I>
+class ResetPerformer {
+public:
+ template<typename State, typename Action>
+ PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static void Do(State& s, Action mask)
+ {
+ if (mask & (1 << (LoadedScanner::MAX_RE_COUNT + (I - 1))) && s.m_current[I - 1]) {
+ Reset(s);
+ }
+ ResetPerformer<I - 1>::Do(s, mask);
+ }
+
+private:
+ template<typename State>
+ PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static void Reset(State& s)
+ {
+ s.m_total[I - 1] = ymax(s.m_total[I - 1], s.m_current[I - 1]);
+ s.m_current[I - 1] = 0;
+ }
+};
+
+template<>
+class ResetPerformer<0> {
+public:
+ template<typename State, typename Action>
+ PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static void Do(State&, Action)
+ {
+ }
+};
+
/**
* A scanner which counts occurences of the
* given regexp separated by another regexp
@@ -134,29 +134,29 @@ public:
state.m_updatedMask = 0;
}
- PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- void TakeAction(State& s, Action a) const
- {
+ PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ void TakeAction(State& s, Action a) const
+ {
static_cast<const DerivedScanner*>(this)->template TakeActionImpl<MAX_RE_COUNT>(s, a);
- }
-
+ }
+
bool CanStop(const State&) const { return false; }
- Char Translate(Char ch) const
- {
- return m_letters[static_cast<size_t>(ch)];
- }
-
- Action NextTranslated(State& s, Char c) const
- {
- Transition x = reinterpret_cast<const Transition*>(s.m_state)[c];
- s.m_state += SignExtend(x.shift);
- return x.action;
- }
-
+ Char Translate(Char ch) const
+ {
+ return m_letters[static_cast<size_t>(ch)];
+ }
+
+ Action NextTranslated(State& s, Char c) const
+ {
+ Transition x = reinterpret_cast<const Transition*>(s.m_state)[c];
+ s.m_state += SignExtend(x.shift);
+ return x.action;
+ }
+
Action Next(State& s, Char c) const
{
- return NextTranslated(s, Translate(c));
+ return NextTranslated(s, Translate(c));
}
Action Next(const State& current, State& n, Char c) const
@@ -177,28 +177,28 @@ protected:
using LoadedScanner::Init;
using LoadedScanner::InternalState;
- template<size_t ActualReCount>
+ template<size_t ActualReCount>
void PerformIncrement(State& s, Action mask) const
{
if (mask) {
- IncrementPerformer<ActualReCount>::Do(s, mask);
+ IncrementPerformer<ActualReCount>::Do(s, mask);
s.m_updatedMask |= ((size_t)mask) << MAX_RE_COUNT;
}
}
- template<size_t ActualReCount>
+ template<size_t ActualReCount>
void PerformReset(State& s, Action mask) const
{
mask &= s.m_updatedMask;
if (mask) {
- ResetPerformer<ActualReCount>::Do(s, mask);
+ ResetPerformer<ActualReCount>::Do(s, mask);
s.m_updatedMask &= (Action)~mask;
}
}
void Next(InternalState& s, Char c) const
{
- Transition x = reinterpret_cast<const Transition*>(s)[Translate(c)];
+ Transition x = reinterpret_cast<const Transition*>(s)[Translate(c)];
s += SignExtend(x.shift);
}
};
diff --git a/contrib/libs/pire/pire/fsm.cpp b/contrib/libs/pire/pire/fsm.cpp
index 1f153d448a..984d708dfa 100644
--- a/contrib/libs/pire/pire/fsm.cpp
+++ b/contrib/libs/pire/pire/fsm.cpp
@@ -574,7 +574,7 @@ Fsm& Fsm::Complement()
return *this;
}
-Fsm Fsm::operator *(size_t count) const
+Fsm Fsm::operator *(size_t count) const
{
Fsm ret;
while (count--)
diff --git a/contrib/libs/pire/pire/fsm.h b/contrib/libs/pire/pire/fsm.h
index 7c11ea43c3..4dad06ca06 100644
--- a/contrib/libs/pire/pire/fsm.h
+++ b/contrib/libs/pire/pire/fsm.h
@@ -81,7 +81,7 @@ namespace Pire {
Fsm operator & (const Fsm& rhs) const { Fsm a(*this); return a &= rhs; }
Fsm operator * () const { Fsm a(*this); return a.Iterate(); }
Fsm operator ~ () const { Fsm a(*this); return a.Complement(); }
- Fsm operator * (size_t count) const;
+ Fsm operator * (size_t count) const;
// === Raw FSM construction ===
@@ -94,7 +94,7 @@ namespace Pire {
/// Completely removes given transition
void Disconnect(size_t from, size_t to);
- /// Creates an FSM which matches any prefix of any word current FSM matches.
+ /// Creates an FSM which matches any prefix of any word current FSM matches.
void MakePrefix();
/// Creates an FSM which matches any suffix of any word current FSM matches.
diff --git a/contrib/libs/pire/pire/platform.h b/contrib/libs/pire/pire/platform.h
index 2f35e192ed..54ded6b387 100644
--- a/contrib/libs/pire/pire/platform.h
+++ b/contrib/libs/pire/pire/platform.h
@@ -26,13 +26,13 @@
#include <contrib/libs/pire/pire/stub/defaults.h>
#include <contrib/libs/pire/pire/static_assert.h>
-#ifndef PIRE_FORCED_INLINE
+#ifndef PIRE_FORCED_INLINE
#ifdef __GNUC__
-#define PIRE_FORCED_INLINE inline __attribute__((__always_inline__))
+#define PIRE_FORCED_INLINE inline __attribute__((__always_inline__))
#elif _MSC_VER
-#define PIRE_FORCED_INLINE __forceinline
+#define PIRE_FORCED_INLINE __forceinline
#else
-#define PIRE_FORCED_INLINE inline
+#define PIRE_FORCED_INLINE inline
#endif
#endif
diff --git a/contrib/libs/pire/pire/re_lexer.h b/contrib/libs/pire/pire/re_lexer.h
index da32fe6963..5591c16d34 100644
--- a/contrib/libs/pire/pire/re_lexer.h
+++ b/contrib/libs/pire/pire/re_lexer.h
@@ -149,9 +149,9 @@ public:
const Pire::Encoding& Encoding() const { return *m_encoding; }
Lexer& SetEncoding(const Pire::Encoding& encoding) { m_encoding = &encoding; return *this; }
- void SetError(const char* msg) { errmsg = msg; }
- void SetError(ystring msg) { errmsg = msg; }
- ystring& GetError() { return errmsg; }
+ void SetError(const char* msg) { errmsg = msg; }
+ void SetError(ystring msg) { errmsg = msg; }
+ ystring& GetError() { return errmsg; }
Any& Retval() { return m_retval; }
@@ -174,7 +174,7 @@ private:
const Pire::Encoding* m_encoding;
TVector<THolder<Feature>> m_features;
Any m_retval;
- ystring errmsg;
+ ystring errmsg;
friend class Feature;
diff --git a/contrib/libs/pire/pire/re_parser.y b/contrib/libs/pire/pire/re_parser.y
index 0663d73c01..dbad88e287 100644
--- a/contrib/libs/pire/pire/re_parser.y
+++ b/contrib/libs/pire/pire/re_parser.y
@@ -157,23 +157,23 @@ term
int yylex(YYSTYPE* lval, Pire::Lexer& rlex)
{
- try {
- Pire::Term term = rlex.Lex();
- if (!term.Value().Empty())
- *lval = new Any(term.Value());
+ try {
+ Pire::Term term = rlex.Lex();
+ if (!term.Value().Empty())
+ *lval = new Any(term.Value());
else
*lval = nullptr;
- return term.Type();
- } catch (Pire::Error &e) {
- rlex.SetError(e.what());
- return 0;
- }
+ return term.Type();
+ } catch (Pire::Error &e) {
+ rlex.SetError(e.what());
+ return 0;
+ }
}
-void yyerror(Pire::Lexer& rlex, const char* str)
+void yyerror(Pire::Lexer& rlex, const char* str)
{
- if (rlex.GetError().length() == 0)
- rlex.SetError(ystring("Regexp parse error: ").append(str));
+ if (rlex.GetError().length() == 0)
+ rlex.SetError(ystring("Regexp parse error: ").append(str));
}
void AppendRange(const Encoding& encoding, Fsm& a, const Term::CharacterRange& cr)
@@ -235,13 +235,13 @@ Fsm& ConvertToFSM(const Encoding& encoding, Any* any)
namespace Pire {
namespace Impl {
- int yre_parse(Pire::Lexer& rlex)
- {
- int rc = yyparse(rlex);
-
- if (rlex.GetError().length() != 0)
- throw Error(rlex.GetError());
- return rc;
- }
+ int yre_parse(Pire::Lexer& rlex)
+ {
+ int rc = yyparse(rlex);
+
+ if (rlex.GetError().length() != 0)
+ throw Error(rlex.GetError());
+ return rc;
+ }
}
}
diff --git a/contrib/libs/pire/pire/run.h b/contrib/libs/pire/pire/run.h
index 434ef0851e..f6e1ff734d 100644
--- a/contrib/libs/pire/pire/run.h
+++ b/contrib/libs/pire/pire/run.h
@@ -51,7 +51,7 @@ namespace Pire {
namespace Pire {
template<class Scanner>
-PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
void Step(const Scanner& scanner, typename Scanner::State& state, Char ch)
{
Y_ASSERT(ch < MaxCharUnaligned);
@@ -65,7 +65,7 @@ namespace Impl {
template<class Scanner>
struct RunPred {
- PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
Action operator()(const Scanner&, const typename Scanner::State&, const char*) const { return Continue; }
};
@@ -73,7 +73,7 @@ namespace Impl {
struct ShortestPrefixPred {
explicit ShortestPrefixPred(const char*& pos): m_pos(&pos) {}
- PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
Action operator()(const Scanner& sc, const typename Scanner::State& st, const char* pos) const
{
if (sc.Final(st)) {
@@ -91,7 +91,7 @@ namespace Impl {
struct LongestPrefixPred {
explicit LongestPrefixPred(const char*& pos): m_pos(&pos) {}
- PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
Action operator()(const Scanner& sc, const typename Scanner::State& st, const char* pos) const
{
if (sc.Final(st))
@@ -108,37 +108,37 @@ namespace Impl {
namespace Impl {
- template<class Scanner, class Pred>
- PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- Action SafeRunChunk(const Scanner& scanner, typename Scanner::State& state, const size_t* p, size_t pos, size_t size, Pred pred)
- {
+ template<class Scanner, class Pred>
+ PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ Action SafeRunChunk(const Scanner& scanner, typename Scanner::State& state, const size_t* p, size_t pos, size_t size, Pred pred)
+ {
Y_ASSERT(pos <= sizeof(size_t));
Y_ASSERT(size <= sizeof(size_t));
Y_ASSERT(pos + size <= sizeof(size_t));
-
- if (PIRE_UNLIKELY(size == 0))
- return Continue;
-
- const char* ptr = (const char*) p + pos;
- for (; size--; ++ptr) {
- Step(scanner, state, (unsigned char) *ptr);
- if (pred(scanner, state, ptr + 1) == Stop)
- return Stop;
- }
- return Continue;
- }
-
+
+ if (PIRE_UNLIKELY(size == 0))
+ return Continue;
+
+ const char* ptr = (const char*) p + pos;
+ for (; size--; ++ptr) {
+ Step(scanner, state, (unsigned char) *ptr);
+ if (pred(scanner, state, ptr + 1) == Stop)
+ return Stop;
+ }
+ return Continue;
+ }
+
/// Effectively runs a scanner on a short data chunk, fit completely into one machine word.
template<class Scanner, class Pred>
- PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
Action RunChunk(const Scanner& scanner, typename Scanner::State& state, const size_t* p, size_t pos, size_t size, Pred pred)
{
Y_ASSERT(pos <= sizeof(size_t));
Y_ASSERT(size <= sizeof(size_t));
Y_ASSERT(pos + size <= sizeof(size_t));
- if (PIRE_UNLIKELY(size == 0))
- return Continue;
+ if (PIRE_UNLIKELY(size == 0))
+ return Continue;
size_t chunk = Impl::ToLittleEndian(*p) >> 8*pos;
const char* ptr = (const char*) p + pos + size + 1;
@@ -151,7 +151,7 @@ namespace Impl {
}
return Continue;
}
-
+
template<class Scanner>
struct AlignedRunner {
@@ -223,7 +223,7 @@ namespace Impl {
}
if (tailSize)
- Impl::SafeRunChunk(scanner, state, tail, 0, tailSize, pred);
+ Impl::SafeRunChunk(scanner, state, tail, 0, tailSize, pred);
st = state;
}
diff --git a/contrib/libs/pire/pire/scanner_io.cpp b/contrib/libs/pire/pire/scanner_io.cpp
index 353c80bae7..3956e3c6ed 100644
--- a/contrib/libs/pire/pire/scanner_io.cpp
+++ b/contrib/libs/pire/pire/scanner_io.cpp
@@ -205,10 +205,10 @@ void LoadedScanner::Load(yistream* s, ui32* type)
sc.Markup(sc.m_buffer.Get());
Impl::AlignedLoadArray(s, sc.m_letters, MaxChar);
Impl::AlignedLoadArray(s, sc.m_jumps, sc.m.statesCount * sc.m.lettersCount);
- if (header.Version == Header::RE_VERSION_WITH_MACTIONS) {
+ if (header.Version == Header::RE_VERSION_WITH_MACTIONS) {
TVector<Action> actions(sc.m.statesCount * sc.m.lettersCount);
- Impl::AlignedLoadArray(s, actions.data(), actions.size());
- }
+ Impl::AlignedLoadArray(s, actions.data(), actions.size());
+ }
Impl::AlignedLoadArray(s, sc.m_tags, sc.m.statesCount);
sc.m.initial += reinterpret_cast<size_t>(sc.m_jumps);
Swap(sc);
diff --git a/contrib/libs/pire/pire/scanners/common.h b/contrib/libs/pire/pire/scanners/common.h
index ad89ab76da..de5ea0af7b 100644
--- a/contrib/libs/pire/pire/scanners/common.h
+++ b/contrib/libs/pire/pire/scanners/common.h
@@ -50,8 +50,8 @@ namespace Pire {
ui32 HdrSize;
static const ui32 MAGIC = 0x45524950; // "PIRE" on litte-endian
- static const ui32 RE_VERSION = 7; // Should be incremented each time when the format of serialized scanner changes
- static const ui32 RE_VERSION_WITH_MACTIONS = 6; // LoadedScanner with m_actions, which is ignored
+ static const ui32 RE_VERSION = 7; // Should be incremented each time when the format of serialized scanner changes
+ static const ui32 RE_VERSION_WITH_MACTIONS = 6; // LoadedScanner with m_actions, which is ignored
explicit Header(ui32 type, size_t hdrsize)
: Magic(MAGIC)
@@ -66,7 +66,7 @@ namespace Pire {
{
if (Magic != MAGIC || PtrSize != sizeof(void*) || MaxWordSize != sizeof(Impl::MaxSizeWord))
throw Error("Serialized regexp incompatible with your system");
- if (Version != RE_VERSION && Version != RE_VERSION_WITH_MACTIONS)
+ if (Version != RE_VERSION && Version != RE_VERSION_WITH_MACTIONS)
throw Error("You are trying to used an incompatible version of a serialized regexp");
if (type != ScannerIOTypes::NoScanner && type != Type &&
!(type == ScannerIOTypes::LoadedScanner && Type == ScannerIOTypes::NoGlueLimitCountingScanner)) {
@@ -101,21 +101,21 @@ namespace Pire {
throw Error("Tried to mmap scanner at misaligned address");
}
- inline Header ValidateHeader(const size_t*& ptr, size_t& size, ui32 type, size_t hdrsize)
+ inline Header ValidateHeader(const size_t*& ptr, size_t& size, ui32 type, size_t hdrsize)
{
const Header* hdr;
MapPtr(hdr, 1, ptr, size);
hdr->Validate(type, hdrsize);
- return *hdr;
+ return *hdr;
}
- inline Header ValidateHeader(yistream* s, ui32 type, size_t hdrsize)
+ inline Header ValidateHeader(yistream* s, ui32 type, size_t hdrsize)
{
Header hdr(ScannerIOTypes::NoScanner, 0);
LoadPodType(s, hdr);
AlignLoad(s, sizeof(hdr));
hdr.Validate(type, hdrsize);
- return hdr;
+ return hdr;
}
}
}
diff --git a/contrib/libs/pire/pire/scanners/loaded.h b/contrib/libs/pire/pire/scanners/loaded.h
index f77c87cf1d..120dc403b7 100644
--- a/contrib/libs/pire/pire/scanners/loaded.h
+++ b/contrib/libs/pire/pire/scanners/loaded.h
@@ -73,8 +73,8 @@ public:
DeadFlag = 0
};
- static const size_t MAX_RE_COUNT = 16;
-
+ static const size_t MAX_RE_COUNT = 16;
+
protected:
LoadedScanner() { Alias(Null()); }
@@ -118,8 +118,8 @@ public:
size_t RegexpsCount() const { return Empty() ? 0 : m.regexpsCount; }
- size_t LettersCount() const { return m.lettersCount; }
-
+ size_t LettersCount() const { return m.lettersCount; }
+
const void* Mmap(const void* ptr, size_t size) {
return Mmap(ptr, size, nullptr);
}
@@ -140,10 +140,10 @@ public:
Impl::MapPtr(s.m_letters, MaxChar, p, size);
Impl::MapPtr(s.m_jumps, s.m.statesCount * s.m.lettersCount, p, size);
- if (header.Version == Header::RE_VERSION_WITH_MACTIONS) {
- Action* actions = 0;
- Impl::MapPtr(actions, s.m.statesCount * s.m.lettersCount, p, size);
- }
+ if (header.Version == Header::RE_VERSION_WITH_MACTIONS) {
+ Action* actions = 0;
+ Impl::MapPtr(actions, s.m.statesCount * s.m.lettersCount, p, size);
+ }
Impl::MapPtr(s.m_tags, s.m.statesCount, p, size);
s.m.initial += reinterpret_cast<size_t>(s.m_jumps);
@@ -176,27 +176,27 @@ public:
m_letters[character] = letter.second.first;
}
- size_t StateSize() const
- {
- return m.lettersCount * sizeof(*m_jumps);
- }
+ size_t StateSize() const
+ {
+ return m.lettersCount * sizeof(*m_jumps);
+ }
+
+ size_t TransitionIndex(size_t state, Char c) const
+ {
+ return state * m.lettersCount + m_letters[c];
+ }
- size_t TransitionIndex(size_t state, Char c) const
- {
- return state * m.lettersCount + m_letters[c];
- }
-
void SetJump(size_t oldState, Char c, size_t newState, Action action)
{
Y_ASSERT(m_buffer);
Y_ASSERT(oldState < m.statesCount);
Y_ASSERT(newState < m.statesCount);
- size_t shift = (newState - oldState) * StateSize();
+ size_t shift = (newState - oldState) * StateSize();
Transition tr;
tr.shift = (ui32)shift;
tr.action = action;
- m_jumps[TransitionIndex(oldState, c)] = tr;
+ m_jumps[TransitionIndex(oldState, c)] = tr;
}
Action RemapAction(Action action) { return action; }
@@ -212,19 +212,19 @@ public:
i64 SignExtend(i32 i) const { return i; }
- size_t BufSize() const
- {
- return
- MaxChar * sizeof(*m_letters)
- + m.statesCount * StateSize()
- + m.statesCount * sizeof(*m_tags)
- ;
- }
-
+ size_t BufSize() const
+ {
+ return
+ MaxChar * sizeof(*m_letters)
+ + m.statesCount * StateSize()
+ + m.statesCount * sizeof(*m_tags)
+ ;
+ }
+
protected:
- static const Action IncrementMask = (1 << MAX_RE_COUNT) - 1;
- static const Action ResetMask = IncrementMask << MAX_RE_COUNT;
+ static const Action IncrementMask = (1 << MAX_RE_COUNT) - 1;
+ static const Action ResetMask = IncrementMask << MAX_RE_COUNT;
// TODO: maybe, put fields in private section and provide data accessors
@@ -265,7 +265,7 @@ private:
{
m_letters = reinterpret_cast<Letter*>(buf);
m_jumps = reinterpret_cast<Transition*>(m_letters + MaxChar);
- m_tags = reinterpret_cast<Tag*>(m_jumps + m.statesCount * m.lettersCount);
+ m_tags = reinterpret_cast<Tag*>(m_jumps + m.statesCount * m.lettersCount);
}
void Alias(const LoadedScanner& s)
diff --git a/contrib/libs/pire/pire/scanners/multi.h b/contrib/libs/pire/pire/scanners/multi.h
index 3762e1e29a..29679e416e 100644
--- a/contrib/libs/pire/pire/scanners/multi.h
+++ b/contrib/libs/pire/pire/scanners/multi.h
@@ -161,21 +161,21 @@ public:
/// Returns an initial state for this scanner
void Initialize(State& state) const { state = m.initial; }
- Char Translate(Char ch) const
+ Char Translate(Char ch) const
+ {
+ return m_letters[static_cast<size_t>(ch)];
+ }
+
+ /// Handles one letter
+ Action NextTranslated(State& state, Char letter) const
{
- return m_letters[static_cast<size_t>(ch)];
- }
-
- /// Handles one letter
- Action NextTranslated(State& state, Char letter) const
- {
PIRE_IFDEBUG(
Y_ASSERT(state >= (size_t)m_transitions);
Y_ASSERT(state < (size_t)(m_transitions + RowSize()*Size()));
Y_ASSERT((state - (size_t)m_transitions) % (RowSize()*sizeof(Transition)) == 0);
);
- state = Relocation::Go(state, reinterpret_cast<const Transition*>(state)[letter]);
+ state = Relocation::Go(state, reinterpret_cast<const Transition*>(state)[letter]);
PIRE_IFDEBUG(
Y_ASSERT(state >= (size_t)m_transitions);
@@ -186,12 +186,12 @@ public:
return 0;
}
- /// Handles one character
- Action Next(State& state, Char c) const
- {
- return NextTranslated(state, Translate(c));
- }
-
+ /// Handles one character
+ Action Next(State& state, Char c) const
+ {
+ return NextTranslated(state, Translate(c));
+ }
+
void TakeAction(State&, Action) const {}
Scanner(const Scanner& s): m(s.m)
@@ -636,7 +636,7 @@ private:
template<class ScannerRowHeader, unsigned N>
struct MaskCheckerBase {
- static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
bool Check(const ScannerRowHeader& hdr, size_t alignOffset, Word chunk)
{
Word mask = CheckBytes(hdr.Mask(N, alignOffset), chunk);
@@ -646,7 +646,7 @@ private:
return !IsAnySet(mask);
}
- static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
const Word* DoRun(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end)
{
for (; begin != end && Check(hdr, alignOffset, ToLittleEndian(*begin)); ++begin) {}
@@ -659,7 +659,7 @@ private:
typedef MaskCheckerBase<ScannerRowHeader, N> Base;
typedef MaskChecker<ScannerRowHeader, N+1, Nmax> Next;
- static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
const Word* Run(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end)
{
if (hdr.Mask(N) == hdr.Mask(N + 1))
@@ -673,7 +673,7 @@ private:
struct MaskChecker<ScannerRowHeader, N, N> : MaskCheckerBase<ScannerRowHeader, N> {
typedef MaskCheckerBase<ScannerRowHeader, N> Base;
- static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
const Word* Run(const ScannerRowHeader& hdr, size_t alignOffset, const Word* begin, const Word* end)
{
return Base::DoRun(hdr, alignOffset, begin, end);
@@ -682,7 +682,7 @@ private:
// Compares the ExitMask[0] value without SSE reads which seems to be more optimal
template <class Relocation>
- static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
bool CheckFirstMask(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state, size_t val)
{
return (scanner.Header(state).Mask(0) == val);
@@ -717,7 +717,7 @@ public:
return *p;
}
- PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
size_t Mask(size_t i) const
{
Y_ASSERT(i < ExitMaskCount);
@@ -791,21 +791,21 @@ public:
}
template <class Relocation>
- static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
bool NoExit(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state)
{
return CheckFirstMask(scanner, state, NO_EXIT_MASK);
}
template <class Relocation>
- static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
bool NoShortcut(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state)
{
return CheckFirstMask(scanner, state, NO_SHORTCUT_MASK);
}
template <class Relocation>
- static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
const Word* Run(const Scanner<Relocation, ExitMasks<MaskCount> >& scanner, typename Scanner<Relocation, ExitMasks<MaskCount> >::State state, size_t alignOffset, const Word* begin, const Word* end)
{
return MaskChecker<typename Scanner<Relocation, ExitMasks<MaskCount> >::ScannerRowHeader, 0, MaskCount - 1>::Run(scanner.Header(state), alignOffset, begin, end);
@@ -846,7 +846,7 @@ struct NoShortcuts {
static void FinishMasks(Header&, size_t) {}
template <class Relocation>
- static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
bool NoExit(const Scanner<Relocation, NoShortcuts>&, typename Scanner<Relocation, NoShortcuts>::State)
{
// Cannot exit prematurely
@@ -854,7 +854,7 @@ struct NoShortcuts {
}
template <class Relocation>
- static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
bool NoShortcut(const Scanner<Relocation, NoShortcuts>&, typename Scanner<Relocation, NoShortcuts>::State)
{
// There's no shortcut regardless of the state
@@ -862,7 +862,7 @@ struct NoShortcuts {
}
template <class Relocation>
- static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
const Word* Run(const Scanner<Relocation, NoShortcuts>&, typename Scanner<Relocation, NoShortcuts>::State, size_t, const Word* begin, const Word*)
{
// Stop shortcutting right at the beginning
@@ -879,7 +879,7 @@ template <class Scanner, unsigned Count>
struct MultiChunk {
// Process Word-sized chunk which consist of >=1 size_t-sized chunks
template<class Pred>
- static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
Action Process(const Scanner& scanner, typename Scanner::State& state, const size_t* p, Pred pred)
{
if (RunChunk(scanner, state, p, 0, sizeof(void*), pred) == Continue)
@@ -893,7 +893,7 @@ template <class Scanner>
struct MultiChunk<Scanner, 0> {
// Process Word-sized chunk which consist of >=1 size_t-sized chunks
template<class Pred>
- static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
Action Process(const Scanner&, typename Scanner::State, const size_t*, Pred)
{
return Continue;
@@ -909,7 +909,7 @@ private:
// Processes Word-sized chuck of memory (depending on the platform a Word might
// consist of multiple size_t chuncks)
template <class Pred>
- static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ static PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
Action RunMultiChunk(const ScannerType& scanner, typename ScannerType::State& st, const size_t* begin, Pred pred)
{
return MultiChunk<ScannerType, sizeof(Word)/sizeof(size_t)>::Process(scanner, st, begin, pred);
diff --git a/contrib/libs/pire/pire/scanners/slow.h b/contrib/libs/pire/pire/scanners/slow.h
index 6103fc5b67..6adfcb8c1d 100644
--- a/contrib/libs/pire/pire/scanners/slow.h
+++ b/contrib/libs/pire/pire/scanners/slow.h
@@ -96,13 +96,13 @@ public:
BitSet(m.statesCount).Swap(state.flags);
}
- Char Translate(Char ch) const
+ Char Translate(Char ch) const
+ {
+ return m_letters[static_cast<size_t>(ch)];
+ }
+
+ Action NextTranslated(const State& current, State& next, Char l) const
{
- return m_letters[static_cast<size_t>(ch)];
- }
-
- Action NextTranslated(const State& current, State& next, Char l) const
- {
next.flags.Clear();
next.states.clear();
for (auto&& state : current.states) {
@@ -130,26 +130,26 @@ public:
return 0;
}
- Action Next(const State& current, State& next, Char c) const
- {
- return NextTranslated(current, next, Translate(c));
- }
-
+ Action Next(const State& current, State& next, Char c) const
+ {
+ return NextTranslated(current, next, Translate(c));
+ }
+
bool TakeAction(State&, Action) const { return false; }
- Action NextTranslated(State& s, Char l) const
+ Action NextTranslated(State& s, Char l) const
{
State dest(m.statesCount);
- Action a = NextTranslated(s, dest, l);
+ Action a = NextTranslated(s, dest, l);
s.Swap(dest);
return a;
}
- Action Next(State& s, Char c) const
- {
- return NextTranslated(s, Translate(c));
- }
-
+ Action Next(State& s, Char c) const
+ {
+ return NextTranslated(s, Translate(c));
+ }
+
bool Final(const State& s) const
{
for (auto&& state : s.states)
diff --git a/contrib/libs/ya.make b/contrib/libs/ya.make
index b7565d9c20..9c4640fdcf 100644
--- a/contrib/libs/ya.make
+++ b/contrib/libs/ya.make
@@ -105,7 +105,7 @@ RECURSE(
hiredis
httpd24
hwloc
- hyperscan
+ hyperscan
i18n
i18n/ut
icu
@@ -213,7 +213,7 @@ RECURSE(
lmdbxx/check
lmdbxx/example
lua
- lua-cjson
+ lua-cjson
luajit_21
lz4
lz4/generated
@@ -360,7 +360,7 @@ RECURSE(
xz
yajl
yaml
- yaml-cpp
+ yaml-cpp
zeromq
zlib
zlib-ng-develop
diff --git a/contrib/libs/yaml-cpp/LICENSE b/contrib/libs/yaml-cpp/LICENSE
index 1c6af6d417..991fdbbe7d 100644
--- a/contrib/libs/yaml-cpp/LICENSE
+++ b/contrib/libs/yaml-cpp/LICENSE
@@ -1,19 +1,19 @@
-Copyright (c) 2008-2015 Jesse Beder.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
+Copyright (c) 2008-2015 Jesse Beder.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/anchor.h b/contrib/libs/yaml-cpp/include/yaml-cpp/anchor.h
index 06965e8fba..06759c724d 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/anchor.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/anchor.h
@@ -1,17 +1,17 @@
-#ifndef ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <cstddef>
-
-namespace YAML {
-typedef std::size_t anchor_t;
-const anchor_t NullAnchor = 0;
-}
-
-#endif // ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <cstddef>
+
+namespace YAML {
+typedef std::size_t anchor_t;
+const anchor_t NullAnchor = 0;
+}
+
+#endif // ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/binary.h b/contrib/libs/yaml-cpp/include/yaml-cpp/binary.h
index ed61f438dd..29d5dbd027 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/binary.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/binary.h
@@ -1,67 +1,67 @@
-#ifndef BASE64_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define BASE64_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <string>
-#include <vector>
-
-#include "yaml-cpp/dll.h"
-
-namespace YAML {
-YAML_CPP_API std::string EncodeBase64(const unsigned char *data,
- std::size_t size);
-YAML_CPP_API std::vector<unsigned char> DecodeBase64(const std::string &input);
-
-class YAML_CPP_API Binary {
- public:
- Binary() : m_unownedData(0), m_unownedSize(0) {}
- Binary(const unsigned char *data_, std::size_t size_)
- : m_unownedData(data_), m_unownedSize(size_) {}
-
- bool owned() const { return !m_unownedData; }
- std::size_t size() const { return owned() ? m_data.size() : m_unownedSize; }
- const unsigned char *data() const {
- return owned() ? &m_data[0] : m_unownedData;
- }
-
- void swap(std::vector<unsigned char> &rhs) {
- if (m_unownedData) {
- m_data.swap(rhs);
- rhs.clear();
- rhs.resize(m_unownedSize);
- std::copy(m_unownedData, m_unownedData + m_unownedSize, rhs.begin());
- m_unownedData = 0;
- m_unownedSize = 0;
- } else {
- m_data.swap(rhs);
- }
- }
-
- bool operator==(const Binary &rhs) const {
- const std::size_t s = size();
- if (s != rhs.size())
- return false;
- const unsigned char *d1 = data();
- const unsigned char *d2 = rhs.data();
- for (std::size_t i = 0; i < s; i++) {
- if (*d1++ != *d2++)
- return false;
- }
- return true;
- }
-
- bool operator!=(const Binary &rhs) const { return !(*this == rhs); }
-
- private:
- std::vector<unsigned char> m_data;
- const unsigned char *m_unownedData;
- std::size_t m_unownedSize;
-};
-}
-
-#endif // BASE64_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef BASE64_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define BASE64_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+#include <vector>
+
+#include "yaml-cpp/dll.h"
+
+namespace YAML {
+YAML_CPP_API std::string EncodeBase64(const unsigned char *data,
+ std::size_t size);
+YAML_CPP_API std::vector<unsigned char> DecodeBase64(const std::string &input);
+
+class YAML_CPP_API Binary {
+ public:
+ Binary() : m_unownedData(0), m_unownedSize(0) {}
+ Binary(const unsigned char *data_, std::size_t size_)
+ : m_unownedData(data_), m_unownedSize(size_) {}
+
+ bool owned() const { return !m_unownedData; }
+ std::size_t size() const { return owned() ? m_data.size() : m_unownedSize; }
+ const unsigned char *data() const {
+ return owned() ? &m_data[0] : m_unownedData;
+ }
+
+ void swap(std::vector<unsigned char> &rhs) {
+ if (m_unownedData) {
+ m_data.swap(rhs);
+ rhs.clear();
+ rhs.resize(m_unownedSize);
+ std::copy(m_unownedData, m_unownedData + m_unownedSize, rhs.begin());
+ m_unownedData = 0;
+ m_unownedSize = 0;
+ } else {
+ m_data.swap(rhs);
+ }
+ }
+
+ bool operator==(const Binary &rhs) const {
+ const std::size_t s = size();
+ if (s != rhs.size())
+ return false;
+ const unsigned char *d1 = data();
+ const unsigned char *d2 = rhs.data();
+ for (std::size_t i = 0; i < s; i++) {
+ if (*d1++ != *d2++)
+ return false;
+ }
+ return true;
+ }
+
+ bool operator!=(const Binary &rhs) const { return !(*this == rhs); }
+
+ private:
+ std::vector<unsigned char> m_data;
+ const unsigned char *m_unownedData;
+ std::size_t m_unownedSize;
+};
+}
+
+#endif // BASE64_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/contrib/anchordict.h b/contrib/libs/yaml-cpp/include/yaml-cpp/contrib/anchordict.h
index 132f4e7f4a..78db9ec928 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/contrib/anchordict.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/contrib/anchordict.h
@@ -1,17 +1,17 @@
-#ifndef ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <vector>
-
-#include "../anchor.h"
-
-namespace YAML {
+#ifndef ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <vector>
+
+#include "../anchor.h"
+
+namespace YAML {
/**
* An object that stores and retrieves values correlating to {@link anchor_t}
* values.
@@ -19,21 +19,21 @@ namespace YAML {
* <p>Efficient implementation that can make assumptions about how
* {@code anchor_t} values are assigned by the {@link Parser} class.
*/
-template <class T>
-class AnchorDict {
- public:
- void Register(anchor_t anchor, T value) {
- if (anchor > m_data.size()) {
- m_data.resize(anchor);
- }
- m_data[anchor - 1] = value;
- }
-
- T Get(anchor_t anchor) const { return m_data[anchor - 1]; }
-
- private:
- std::vector<T> m_data;
-};
-}
-
-#endif // ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+template <class T>
+class AnchorDict {
+ public:
+ void Register(anchor_t anchor, T value) {
+ if (anchor > m_data.size()) {
+ m_data.resize(anchor);
+ }
+ m_data[anchor - 1] = value;
+ }
+
+ T Get(anchor_t anchor) const { return m_data[anchor - 1]; }
+
+ private:
+ std::vector<T> m_data;
+};
+}
+
+#endif // ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/contrib/graphbuilder.h b/contrib/libs/yaml-cpp/include/yaml-cpp/contrib/graphbuilder.h
index 4555291d41..f0a38f2887 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/contrib/graphbuilder.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/contrib/graphbuilder.h
@@ -1,149 +1,149 @@
-#ifndef GRAPHBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define GRAPHBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "yaml-cpp/mark.h"
-#include <string>
-
-namespace YAML {
-class Parser;
-
-// GraphBuilderInterface
-// . Abstraction of node creation
-// . pParentNode is always NULL or the return value of one of the NewXXX()
-// functions.
-class GraphBuilderInterface {
- public:
+#ifndef GRAPHBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define GRAPHBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/mark.h"
+#include <string>
+
+namespace YAML {
+class Parser;
+
+// GraphBuilderInterface
+// . Abstraction of node creation
+// . pParentNode is always NULL or the return value of one of the NewXXX()
+// functions.
+class GraphBuilderInterface {
+ public:
virtual ~GraphBuilderInterface() = 0;
- // Create and return a new node with a null value.
- virtual void *NewNull(const Mark &mark, void *pParentNode) = 0;
-
- // Create and return a new node with the given tag and value.
- virtual void *NewScalar(const Mark &mark, const std::string &tag,
- void *pParentNode, const std::string &value) = 0;
-
- // Create and return a new sequence node
- virtual void *NewSequence(const Mark &mark, const std::string &tag,
- void *pParentNode) = 0;
-
- // Add pNode to pSequence. pNode was created with one of the NewXxx()
- // functions and pSequence with NewSequence().
- virtual void AppendToSequence(void *pSequence, void *pNode) = 0;
-
- // Note that no moew entries will be added to pSequence
- virtual void SequenceComplete(void *pSequence) { (void)pSequence; }
-
- // Create and return a new map node
- virtual void *NewMap(const Mark &mark, const std::string &tag,
- void *pParentNode) = 0;
-
- // Add the pKeyNode => pValueNode mapping to pMap. pKeyNode and pValueNode
- // were created with one of the NewXxx() methods and pMap with NewMap().
- virtual void AssignInMap(void *pMap, void *pKeyNode, void *pValueNode) = 0;
-
- // Note that no more assignments will be made in pMap
- virtual void MapComplete(void *pMap) { (void)pMap; }
-
- // Return the node that should be used in place of an alias referencing
- // pNode (pNode by default)
- virtual void *AnchorReference(const Mark &mark, void *pNode) {
- (void)mark;
- return pNode;
- }
-};
-
-// Typesafe wrapper for GraphBuilderInterface. Assumes that Impl defines
-// Node, Sequence, and Map types. Sequence and Map must derive from Node
-// (unless Node is defined as void). Impl must also implement function with
-// all of the same names as the virtual functions in GraphBuilderInterface
-// -- including the ones with default implementations -- but with the
-// prototypes changed to accept an explicit Node*, Sequence*, or Map* where
-// appropriate.
-template <class Impl>
-class GraphBuilder : public GraphBuilderInterface {
- public:
- typedef typename Impl::Node Node;
- typedef typename Impl::Sequence Sequence;
- typedef typename Impl::Map Map;
-
- GraphBuilder(Impl &impl) : m_impl(impl) {
- Map *pMap = NULL;
- Sequence *pSeq = NULL;
- Node *pNode = NULL;
-
- // Type consistency checks
- pNode = pMap;
- pNode = pSeq;
- }
-
- GraphBuilderInterface &AsBuilderInterface() { return *this; }
-
- virtual void *NewNull(const Mark &mark, void *pParentNode) {
- return CheckType<Node>(m_impl.NewNull(mark, AsNode(pParentNode)));
- }
-
- virtual void *NewScalar(const Mark &mark, const std::string &tag,
- void *pParentNode, const std::string &value) {
- return CheckType<Node>(
- m_impl.NewScalar(mark, tag, AsNode(pParentNode), value));
- }
-
- virtual void *NewSequence(const Mark &mark, const std::string &tag,
- void *pParentNode) {
- return CheckType<Sequence>(
- m_impl.NewSequence(mark, tag, AsNode(pParentNode)));
- }
- virtual void AppendToSequence(void *pSequence, void *pNode) {
- m_impl.AppendToSequence(AsSequence(pSequence), AsNode(pNode));
- }
- virtual void SequenceComplete(void *pSequence) {
- m_impl.SequenceComplete(AsSequence(pSequence));
- }
-
- virtual void *NewMap(const Mark &mark, const std::string &tag,
- void *pParentNode) {
- return CheckType<Map>(m_impl.NewMap(mark, tag, AsNode(pParentNode)));
- }
- virtual void AssignInMap(void *pMap, void *pKeyNode, void *pValueNode) {
- m_impl.AssignInMap(AsMap(pMap), AsNode(pKeyNode), AsNode(pValueNode));
- }
- virtual void MapComplete(void *pMap) { m_impl.MapComplete(AsMap(pMap)); }
-
- virtual void *AnchorReference(const Mark &mark, void *pNode) {
- return CheckType<Node>(m_impl.AnchorReference(mark, AsNode(pNode)));
- }
-
- private:
- Impl &m_impl;
-
- // Static check for pointer to T
- template <class T, class U>
- static T *CheckType(U *p) {
- return p;
- }
-
- static Node *AsNode(void *pNode) { return static_cast<Node *>(pNode); }
- static Sequence *AsSequence(void *pSeq) {
- return static_cast<Sequence *>(pSeq);
- }
- static Map *AsMap(void *pMap) { return static_cast<Map *>(pMap); }
-};
-
-void *BuildGraphOfNextDocument(Parser &parser,
- GraphBuilderInterface &graphBuilder);
-
-template <class Impl>
-typename Impl::Node *BuildGraphOfNextDocument(Parser &parser, Impl &impl) {
- GraphBuilder<Impl> graphBuilder(impl);
- return static_cast<typename Impl::Node *>(
- BuildGraphOfNextDocument(parser, graphBuilder));
-}
-}
-
-#endif // GRAPHBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+ // Create and return a new node with a null value.
+ virtual void *NewNull(const Mark &mark, void *pParentNode) = 0;
+
+ // Create and return a new node with the given tag and value.
+ virtual void *NewScalar(const Mark &mark, const std::string &tag,
+ void *pParentNode, const std::string &value) = 0;
+
+ // Create and return a new sequence node
+ virtual void *NewSequence(const Mark &mark, const std::string &tag,
+ void *pParentNode) = 0;
+
+ // Add pNode to pSequence. pNode was created with one of the NewXxx()
+ // functions and pSequence with NewSequence().
+ virtual void AppendToSequence(void *pSequence, void *pNode) = 0;
+
+ // Note that no moew entries will be added to pSequence
+ virtual void SequenceComplete(void *pSequence) { (void)pSequence; }
+
+ // Create and return a new map node
+ virtual void *NewMap(const Mark &mark, const std::string &tag,
+ void *pParentNode) = 0;
+
+ // Add the pKeyNode => pValueNode mapping to pMap. pKeyNode and pValueNode
+ // were created with one of the NewXxx() methods and pMap with NewMap().
+ virtual void AssignInMap(void *pMap, void *pKeyNode, void *pValueNode) = 0;
+
+ // Note that no more assignments will be made in pMap
+ virtual void MapComplete(void *pMap) { (void)pMap; }
+
+ // Return the node that should be used in place of an alias referencing
+ // pNode (pNode by default)
+ virtual void *AnchorReference(const Mark &mark, void *pNode) {
+ (void)mark;
+ return pNode;
+ }
+};
+
+// Typesafe wrapper for GraphBuilderInterface. Assumes that Impl defines
+// Node, Sequence, and Map types. Sequence and Map must derive from Node
+// (unless Node is defined as void). Impl must also implement function with
+// all of the same names as the virtual functions in GraphBuilderInterface
+// -- including the ones with default implementations -- but with the
+// prototypes changed to accept an explicit Node*, Sequence*, or Map* where
+// appropriate.
+template <class Impl>
+class GraphBuilder : public GraphBuilderInterface {
+ public:
+ typedef typename Impl::Node Node;
+ typedef typename Impl::Sequence Sequence;
+ typedef typename Impl::Map Map;
+
+ GraphBuilder(Impl &impl) : m_impl(impl) {
+ Map *pMap = NULL;
+ Sequence *pSeq = NULL;
+ Node *pNode = NULL;
+
+ // Type consistency checks
+ pNode = pMap;
+ pNode = pSeq;
+ }
+
+ GraphBuilderInterface &AsBuilderInterface() { return *this; }
+
+ virtual void *NewNull(const Mark &mark, void *pParentNode) {
+ return CheckType<Node>(m_impl.NewNull(mark, AsNode(pParentNode)));
+ }
+
+ virtual void *NewScalar(const Mark &mark, const std::string &tag,
+ void *pParentNode, const std::string &value) {
+ return CheckType<Node>(
+ m_impl.NewScalar(mark, tag, AsNode(pParentNode), value));
+ }
+
+ virtual void *NewSequence(const Mark &mark, const std::string &tag,
+ void *pParentNode) {
+ return CheckType<Sequence>(
+ m_impl.NewSequence(mark, tag, AsNode(pParentNode)));
+ }
+ virtual void AppendToSequence(void *pSequence, void *pNode) {
+ m_impl.AppendToSequence(AsSequence(pSequence), AsNode(pNode));
+ }
+ virtual void SequenceComplete(void *pSequence) {
+ m_impl.SequenceComplete(AsSequence(pSequence));
+ }
+
+ virtual void *NewMap(const Mark &mark, const std::string &tag,
+ void *pParentNode) {
+ return CheckType<Map>(m_impl.NewMap(mark, tag, AsNode(pParentNode)));
+ }
+ virtual void AssignInMap(void *pMap, void *pKeyNode, void *pValueNode) {
+ m_impl.AssignInMap(AsMap(pMap), AsNode(pKeyNode), AsNode(pValueNode));
+ }
+ virtual void MapComplete(void *pMap) { m_impl.MapComplete(AsMap(pMap)); }
+
+ virtual void *AnchorReference(const Mark &mark, void *pNode) {
+ return CheckType<Node>(m_impl.AnchorReference(mark, AsNode(pNode)));
+ }
+
+ private:
+ Impl &m_impl;
+
+ // Static check for pointer to T
+ template <class T, class U>
+ static T *CheckType(U *p) {
+ return p;
+ }
+
+ static Node *AsNode(void *pNode) { return static_cast<Node *>(pNode); }
+ static Sequence *AsSequence(void *pSeq) {
+ return static_cast<Sequence *>(pSeq);
+ }
+ static Map *AsMap(void *pMap) { return static_cast<Map *>(pMap); }
+};
+
+void *BuildGraphOfNextDocument(Parser &parser,
+ GraphBuilderInterface &graphBuilder);
+
+template <class Impl>
+typename Impl::Node *BuildGraphOfNextDocument(Parser &parser, Impl &impl) {
+ GraphBuilder<Impl> graphBuilder(impl);
+ return static_cast<typename Impl::Node *>(
+ BuildGraphOfNextDocument(parser, graphBuilder));
+}
+}
+
+#endif // GRAPHBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/dll.h b/contrib/libs/yaml-cpp/include/yaml-cpp/dll.h
index 639f5410a9..a32c06b2e3 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/dll.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/dll.h
@@ -1,33 +1,33 @@
-#ifndef DLL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define DLL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-// The following ifdef block is the standard way of creating macros which make
+#ifndef DLL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define DLL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+// The following ifdef block is the standard way of creating macros which make
// exporting from a DLL simpler. All files within this DLL are compiled with the
// yaml_cpp_EXPORTS symbol defined on the command line. This symbol should not
// be defined on any project that uses this DLL. This way any other project
// whose source files include this file see YAML_CPP_API functions as being
// imported from a DLL, whereas this DLL sees symbols defined with this macro as
// being exported.
-#undef YAML_CPP_API
-
-#ifdef YAML_CPP_DLL // Using or Building YAML-CPP DLL (definition defined
- // manually)
-#ifdef yaml_cpp_EXPORTS // Building YAML-CPP DLL (definition created by CMake
- // or defined manually)
-// #pragma message( "Defining YAML_CPP_API for DLL export" )
-#define YAML_CPP_API __declspec(dllexport)
-#else // yaml_cpp_EXPORTS
-// #pragma message( "Defining YAML_CPP_API for DLL import" )
-#define YAML_CPP_API __declspec(dllimport)
-#endif // yaml_cpp_EXPORTS
-#else // YAML_CPP_DLL
-#define YAML_CPP_API
-#endif // YAML_CPP_DLL
-
-#endif // DLL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#undef YAML_CPP_API
+
+#ifdef YAML_CPP_DLL // Using or Building YAML-CPP DLL (definition defined
+ // manually)
+#ifdef yaml_cpp_EXPORTS // Building YAML-CPP DLL (definition created by CMake
+ // or defined manually)
+// #pragma message( "Defining YAML_CPP_API for DLL export" )
+#define YAML_CPP_API __declspec(dllexport)
+#else // yaml_cpp_EXPORTS
+// #pragma message( "Defining YAML_CPP_API for DLL import" )
+#define YAML_CPP_API __declspec(dllimport)
+#endif // yaml_cpp_EXPORTS
+#else // YAML_CPP_DLL
+#define YAML_CPP_API
+#endif // YAML_CPP_DLL
+
+#endif // DLL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/emitfromevents.h b/contrib/libs/yaml-cpp/include/yaml-cpp/emitfromevents.h
index bb4eee7789..f14b051ab0 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/emitfromevents.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/emitfromevents.h
@@ -1,57 +1,57 @@
-#ifndef EMITFROMEVENTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define EMITFROMEVENTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <stack>
-
-#include "yaml-cpp/anchor.h"
-#include "yaml-cpp/emitterstyle.h"
-#include "yaml-cpp/eventhandler.h"
-
-namespace YAML {
-struct Mark;
-} // namespace YAML
-
-namespace YAML {
-class Emitter;
-
-class EmitFromEvents : public EventHandler {
- public:
- EmitFromEvents(Emitter& emitter);
-
- virtual void OnDocumentStart(const Mark& mark);
- virtual void OnDocumentEnd();
-
- virtual void OnNull(const Mark& mark, anchor_t anchor);
- virtual void OnAlias(const Mark& mark, anchor_t anchor);
- virtual void OnScalar(const Mark& mark, const std::string& tag,
- anchor_t anchor, const std::string& value);
-
- virtual void OnSequenceStart(const Mark& mark, const std::string& tag,
- anchor_t anchor, EmitterStyle::value style);
- virtual void OnSequenceEnd();
-
- virtual void OnMapStart(const Mark& mark, const std::string& tag,
- anchor_t anchor, EmitterStyle::value style);
- virtual void OnMapEnd();
-
- private:
- void BeginNode();
- void EmitProps(const std::string& tag, anchor_t anchor);
-
- private:
- Emitter& m_emitter;
-
- struct State {
- enum value { WaitingForSequenceEntry, WaitingForKey, WaitingForValue };
- };
- std::stack<State::value> m_stateStack;
-};
-}
-
-#endif // EMITFROMEVENTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef EMITFROMEVENTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EMITFROMEVENTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <stack>
+
+#include "yaml-cpp/anchor.h"
+#include "yaml-cpp/emitterstyle.h"
+#include "yaml-cpp/eventhandler.h"
+
+namespace YAML {
+struct Mark;
+} // namespace YAML
+
+namespace YAML {
+class Emitter;
+
+class EmitFromEvents : public EventHandler {
+ public:
+ EmitFromEvents(Emitter& emitter);
+
+ virtual void OnDocumentStart(const Mark& mark);
+ virtual void OnDocumentEnd();
+
+ virtual void OnNull(const Mark& mark, anchor_t anchor);
+ virtual void OnAlias(const Mark& mark, anchor_t anchor);
+ virtual void OnScalar(const Mark& mark, const std::string& tag,
+ anchor_t anchor, const std::string& value);
+
+ virtual void OnSequenceStart(const Mark& mark, const std::string& tag,
+ anchor_t anchor, EmitterStyle::value style);
+ virtual void OnSequenceEnd();
+
+ virtual void OnMapStart(const Mark& mark, const std::string& tag,
+ anchor_t anchor, EmitterStyle::value style);
+ virtual void OnMapEnd();
+
+ private:
+ void BeginNode();
+ void EmitProps(const std::string& tag, anchor_t anchor);
+
+ private:
+ Emitter& m_emitter;
+
+ struct State {
+ enum value { WaitingForSequenceEntry, WaitingForKey, WaitingForValue };
+ };
+ std::stack<State::value> m_stateStack;
+};
+}
+
+#endif // EMITFROMEVENTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/emitter.h b/contrib/libs/yaml-cpp/include/yaml-cpp/emitter.h
index 0567704148..ef92cc4035 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/emitter.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/emitter.h
@@ -1,254 +1,254 @@
-#ifndef EMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define EMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <cstddef>
-#include <memory>
-#include <sstream>
-#include <string>
-
-#include "yaml-cpp/binary.h"
-#include "yaml-cpp/dll.h"
-#include "yaml-cpp/emitterdef.h"
-#include "yaml-cpp/emittermanip.h"
-#include "yaml-cpp/noncopyable.h"
-#include "yaml-cpp/null.h"
-#include "yaml-cpp/ostream_wrapper.h"
-
-namespace YAML {
-class Binary;
-struct _Null;
-} // namespace YAML
-
-namespace YAML {
-class EmitterState;
-
-class YAML_CPP_API Emitter : private noncopyable {
- public:
- Emitter();
- explicit Emitter(std::ostream& stream);
- ~Emitter();
-
- // output
- const char* c_str() const;
- std::size_t size() const;
-
- // state checking
- bool good() const;
- const std::string GetLastError() const;
-
- // global setters
- bool SetOutputCharset(EMITTER_MANIP value);
- bool SetStringFormat(EMITTER_MANIP value);
- bool SetBoolFormat(EMITTER_MANIP value);
- bool SetIntBase(EMITTER_MANIP value);
- bool SetSeqFormat(EMITTER_MANIP value);
- bool SetMapFormat(EMITTER_MANIP value);
- bool SetIndent(std::size_t n);
- bool SetPreCommentIndent(std::size_t n);
- bool SetPostCommentIndent(std::size_t n);
- bool SetFloatPrecision(std::size_t n);
- bool SetDoublePrecision(std::size_t n);
-
- // local setters
- Emitter& SetLocalValue(EMITTER_MANIP value);
- Emitter& SetLocalIndent(const _Indent& indent);
- Emitter& SetLocalPrecision(const _Precision& precision);
-
- // overloads of write
- Emitter& Write(const std::string& str);
- Emitter& Write(bool b);
- Emitter& Write(char ch);
- Emitter& Write(const _Alias& alias);
- Emitter& Write(const _Anchor& anchor);
- Emitter& Write(const _Tag& tag);
- Emitter& Write(const _Comment& comment);
- Emitter& Write(const _Null& n);
- Emitter& Write(const Binary& binary);
-
- template <typename T>
- Emitter& WriteIntegralType(T value);
-
- template <typename T>
- Emitter& WriteStreamable(T value);
-
- private:
- template <typename T>
- void SetStreamablePrecision(std::stringstream&) {}
- std::size_t GetFloatPrecision() const;
- std::size_t GetDoublePrecision() const;
-
- void PrepareIntegralStream(std::stringstream& stream) const;
- void StartedScalar();
-
- private:
- void EmitBeginDoc();
- void EmitEndDoc();
- void EmitBeginSeq();
- void EmitEndSeq();
- void EmitBeginMap();
- void EmitEndMap();
- void EmitNewline();
- void EmitKindTag();
- void EmitTag(bool verbatim, const _Tag& tag);
-
- void PrepareNode(EmitterNodeType::value child);
- void PrepareTopNode(EmitterNodeType::value child);
- void FlowSeqPrepareNode(EmitterNodeType::value child);
- void BlockSeqPrepareNode(EmitterNodeType::value child);
-
- void FlowMapPrepareNode(EmitterNodeType::value child);
-
- void FlowMapPrepareLongKey(EmitterNodeType::value child);
- void FlowMapPrepareLongKeyValue(EmitterNodeType::value child);
- void FlowMapPrepareSimpleKey(EmitterNodeType::value child);
- void FlowMapPrepareSimpleKeyValue(EmitterNodeType::value child);
-
- void BlockMapPrepareNode(EmitterNodeType::value child);
-
- void BlockMapPrepareLongKey(EmitterNodeType::value child);
- void BlockMapPrepareLongKeyValue(EmitterNodeType::value child);
- void BlockMapPrepareSimpleKey(EmitterNodeType::value child);
- void BlockMapPrepareSimpleKeyValue(EmitterNodeType::value child);
-
- void SpaceOrIndentTo(bool requireSpace, std::size_t indent);
-
- const char* ComputeFullBoolName(bool b) const;
- bool CanEmitNewline() const;
-
- private:
+#ifndef EMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <cstddef>
+#include <memory>
+#include <sstream>
+#include <string>
+
+#include "yaml-cpp/binary.h"
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/emitterdef.h"
+#include "yaml-cpp/emittermanip.h"
+#include "yaml-cpp/noncopyable.h"
+#include "yaml-cpp/null.h"
+#include "yaml-cpp/ostream_wrapper.h"
+
+namespace YAML {
+class Binary;
+struct _Null;
+} // namespace YAML
+
+namespace YAML {
+class EmitterState;
+
+class YAML_CPP_API Emitter : private noncopyable {
+ public:
+ Emitter();
+ explicit Emitter(std::ostream& stream);
+ ~Emitter();
+
+ // output
+ const char* c_str() const;
+ std::size_t size() const;
+
+ // state checking
+ bool good() const;
+ const std::string GetLastError() const;
+
+ // global setters
+ bool SetOutputCharset(EMITTER_MANIP value);
+ bool SetStringFormat(EMITTER_MANIP value);
+ bool SetBoolFormat(EMITTER_MANIP value);
+ bool SetIntBase(EMITTER_MANIP value);
+ bool SetSeqFormat(EMITTER_MANIP value);
+ bool SetMapFormat(EMITTER_MANIP value);
+ bool SetIndent(std::size_t n);
+ bool SetPreCommentIndent(std::size_t n);
+ bool SetPostCommentIndent(std::size_t n);
+ bool SetFloatPrecision(std::size_t n);
+ bool SetDoublePrecision(std::size_t n);
+
+ // local setters
+ Emitter& SetLocalValue(EMITTER_MANIP value);
+ Emitter& SetLocalIndent(const _Indent& indent);
+ Emitter& SetLocalPrecision(const _Precision& precision);
+
+ // overloads of write
+ Emitter& Write(const std::string& str);
+ Emitter& Write(bool b);
+ Emitter& Write(char ch);
+ Emitter& Write(const _Alias& alias);
+ Emitter& Write(const _Anchor& anchor);
+ Emitter& Write(const _Tag& tag);
+ Emitter& Write(const _Comment& comment);
+ Emitter& Write(const _Null& n);
+ Emitter& Write(const Binary& binary);
+
+ template <typename T>
+ Emitter& WriteIntegralType(T value);
+
+ template <typename T>
+ Emitter& WriteStreamable(T value);
+
+ private:
+ template <typename T>
+ void SetStreamablePrecision(std::stringstream&) {}
+ std::size_t GetFloatPrecision() const;
+ std::size_t GetDoublePrecision() const;
+
+ void PrepareIntegralStream(std::stringstream& stream) const;
+ void StartedScalar();
+
+ private:
+ void EmitBeginDoc();
+ void EmitEndDoc();
+ void EmitBeginSeq();
+ void EmitEndSeq();
+ void EmitBeginMap();
+ void EmitEndMap();
+ void EmitNewline();
+ void EmitKindTag();
+ void EmitTag(bool verbatim, const _Tag& tag);
+
+ void PrepareNode(EmitterNodeType::value child);
+ void PrepareTopNode(EmitterNodeType::value child);
+ void FlowSeqPrepareNode(EmitterNodeType::value child);
+ void BlockSeqPrepareNode(EmitterNodeType::value child);
+
+ void FlowMapPrepareNode(EmitterNodeType::value child);
+
+ void FlowMapPrepareLongKey(EmitterNodeType::value child);
+ void FlowMapPrepareLongKeyValue(EmitterNodeType::value child);
+ void FlowMapPrepareSimpleKey(EmitterNodeType::value child);
+ void FlowMapPrepareSimpleKeyValue(EmitterNodeType::value child);
+
+ void BlockMapPrepareNode(EmitterNodeType::value child);
+
+ void BlockMapPrepareLongKey(EmitterNodeType::value child);
+ void BlockMapPrepareLongKeyValue(EmitterNodeType::value child);
+ void BlockMapPrepareSimpleKey(EmitterNodeType::value child);
+ void BlockMapPrepareSimpleKeyValue(EmitterNodeType::value child);
+
+ void SpaceOrIndentTo(bool requireSpace, std::size_t indent);
+
+ const char* ComputeFullBoolName(bool b) const;
+ bool CanEmitNewline() const;
+
+ private:
std::unique_ptr<EmitterState> m_pState;
- ostream_wrapper m_stream;
-};
-
-template <typename T>
-inline Emitter& Emitter::WriteIntegralType(T value) {
- if (!good())
- return *this;
-
- PrepareNode(EmitterNodeType::Scalar);
-
- std::stringstream stream;
- PrepareIntegralStream(stream);
- stream << value;
- m_stream << stream.str();
-
- StartedScalar();
-
- return *this;
-}
-
-template <typename T>
-inline Emitter& Emitter::WriteStreamable(T value) {
- if (!good())
- return *this;
-
- PrepareNode(EmitterNodeType::Scalar);
-
- std::stringstream stream;
- SetStreamablePrecision<T>(stream);
- stream << value;
- m_stream << stream.str();
-
- StartedScalar();
-
- return *this;
-}
-
-template <>
-inline void Emitter::SetStreamablePrecision<float>(std::stringstream& stream) {
+ ostream_wrapper m_stream;
+};
+
+template <typename T>
+inline Emitter& Emitter::WriteIntegralType(T value) {
+ if (!good())
+ return *this;
+
+ PrepareNode(EmitterNodeType::Scalar);
+
+ std::stringstream stream;
+ PrepareIntegralStream(stream);
+ stream << value;
+ m_stream << stream.str();
+
+ StartedScalar();
+
+ return *this;
+}
+
+template <typename T>
+inline Emitter& Emitter::WriteStreamable(T value) {
+ if (!good())
+ return *this;
+
+ PrepareNode(EmitterNodeType::Scalar);
+
+ std::stringstream stream;
+ SetStreamablePrecision<T>(stream);
+ stream << value;
+ m_stream << stream.str();
+
+ StartedScalar();
+
+ return *this;
+}
+
+template <>
+inline void Emitter::SetStreamablePrecision<float>(std::stringstream& stream) {
stream.precision(static_cast<std::streamsize>(GetFloatPrecision()));
-}
-
-template <>
-inline void Emitter::SetStreamablePrecision<double>(std::stringstream& stream) {
+}
+
+template <>
+inline void Emitter::SetStreamablePrecision<double>(std::stringstream& stream) {
stream.precision(static_cast<std::streamsize>(GetDoublePrecision()));
-}
-
-// overloads of insertion
-inline Emitter& operator<<(Emitter& emitter, const std::string& v) {
- return emitter.Write(v);
-}
-inline Emitter& operator<<(Emitter& emitter, bool v) {
- return emitter.Write(v);
-}
-inline Emitter& operator<<(Emitter& emitter, char v) {
- return emitter.Write(v);
-}
-inline Emitter& operator<<(Emitter& emitter, unsigned char v) {
- return emitter.Write(static_cast<char>(v));
-}
-inline Emitter& operator<<(Emitter& emitter, const _Alias& v) {
- return emitter.Write(v);
-}
-inline Emitter& operator<<(Emitter& emitter, const _Anchor& v) {
- return emitter.Write(v);
-}
-inline Emitter& operator<<(Emitter& emitter, const _Tag& v) {
- return emitter.Write(v);
-}
-inline Emitter& operator<<(Emitter& emitter, const _Comment& v) {
- return emitter.Write(v);
-}
-inline Emitter& operator<<(Emitter& emitter, const _Null& v) {
- return emitter.Write(v);
-}
-inline Emitter& operator<<(Emitter& emitter, const Binary& b) {
- return emitter.Write(b);
-}
-
-inline Emitter& operator<<(Emitter& emitter, const char* v) {
- return emitter.Write(std::string(v));
-}
-
-inline Emitter& operator<<(Emitter& emitter, int v) {
- return emitter.WriteIntegralType(v);
-}
-inline Emitter& operator<<(Emitter& emitter, unsigned int v) {
- return emitter.WriteIntegralType(v);
-}
-inline Emitter& operator<<(Emitter& emitter, short v) {
- return emitter.WriteIntegralType(v);
-}
-inline Emitter& operator<<(Emitter& emitter, unsigned short v) {
- return emitter.WriteIntegralType(v);
-}
-inline Emitter& operator<<(Emitter& emitter, long v) {
- return emitter.WriteIntegralType(v);
-}
-inline Emitter& operator<<(Emitter& emitter, unsigned long v) {
- return emitter.WriteIntegralType(v);
-}
-inline Emitter& operator<<(Emitter& emitter, long long v) {
- return emitter.WriteIntegralType(v);
-}
-inline Emitter& operator<<(Emitter& emitter, unsigned long long v) {
- return emitter.WriteIntegralType(v);
-}
-
-inline Emitter& operator<<(Emitter& emitter, float v) {
- return emitter.WriteStreamable(v);
-}
-inline Emitter& operator<<(Emitter& emitter, double v) {
- return emitter.WriteStreamable(v);
-}
-
-inline Emitter& operator<<(Emitter& emitter, EMITTER_MANIP value) {
- return emitter.SetLocalValue(value);
-}
-
-inline Emitter& operator<<(Emitter& emitter, _Indent indent) {
- return emitter.SetLocalIndent(indent);
-}
-
-inline Emitter& operator<<(Emitter& emitter, _Precision precision) {
- return emitter.SetLocalPrecision(precision);
-}
-}
-
-#endif // EMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+}
+
+// overloads of insertion
+inline Emitter& operator<<(Emitter& emitter, const std::string& v) {
+ return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, bool v) {
+ return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, char v) {
+ return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, unsigned char v) {
+ return emitter.Write(static_cast<char>(v));
+}
+inline Emitter& operator<<(Emitter& emitter, const _Alias& v) {
+ return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, const _Anchor& v) {
+ return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, const _Tag& v) {
+ return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, const _Comment& v) {
+ return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, const _Null& v) {
+ return emitter.Write(v);
+}
+inline Emitter& operator<<(Emitter& emitter, const Binary& b) {
+ return emitter.Write(b);
+}
+
+inline Emitter& operator<<(Emitter& emitter, const char* v) {
+ return emitter.Write(std::string(v));
+}
+
+inline Emitter& operator<<(Emitter& emitter, int v) {
+ return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, unsigned int v) {
+ return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, short v) {
+ return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, unsigned short v) {
+ return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, long v) {
+ return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, unsigned long v) {
+ return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, long long v) {
+ return emitter.WriteIntegralType(v);
+}
+inline Emitter& operator<<(Emitter& emitter, unsigned long long v) {
+ return emitter.WriteIntegralType(v);
+}
+
+inline Emitter& operator<<(Emitter& emitter, float v) {
+ return emitter.WriteStreamable(v);
+}
+inline Emitter& operator<<(Emitter& emitter, double v) {
+ return emitter.WriteStreamable(v);
+}
+
+inline Emitter& operator<<(Emitter& emitter, EMITTER_MANIP value) {
+ return emitter.SetLocalValue(value);
+}
+
+inline Emitter& operator<<(Emitter& emitter, _Indent indent) {
+ return emitter.SetLocalIndent(indent);
+}
+
+inline Emitter& operator<<(Emitter& emitter, _Precision precision) {
+ return emitter.SetLocalPrecision(precision);
+}
+}
+
+#endif // EMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/emitterdef.h b/contrib/libs/yaml-cpp/include/yaml-cpp/emitterdef.h
index f610a1cfcb..0b426957fa 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/emitterdef.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/emitterdef.h
@@ -1,16 +1,16 @@
-#ifndef EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-namespace YAML {
-struct EmitterNodeType {
- enum value { NoType, Property, Scalar, FlowSeq, BlockSeq, FlowMap, BlockMap };
-};
-}
-
-#endif // EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+namespace YAML {
+struct EmitterNodeType {
+ enum value { NoType, Property, Scalar, FlowSeq, BlockSeq, FlowMap, BlockMap };
+};
+}
+
+#endif // EMITTERDEF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/emittermanip.h b/contrib/libs/yaml-cpp/include/yaml-cpp/emittermanip.h
index 06e855ae9e..89f7256714 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/emittermanip.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/emittermanip.h
@@ -1,137 +1,137 @@
-#ifndef EMITTERMANIP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define EMITTERMANIP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <string>
-
-namespace YAML {
-enum EMITTER_MANIP {
- // general manipulators
- Auto,
- TagByKind,
- Newline,
-
- // output character set
- EmitNonAscii,
- EscapeNonAscii,
-
- // string manipulators
- // Auto, // duplicate
- SingleQuoted,
- DoubleQuoted,
- Literal,
-
- // bool manipulators
- YesNoBool, // yes, no
- TrueFalseBool, // true, false
- OnOffBool, // on, off
- UpperCase, // TRUE, N
- LowerCase, // f, yes
- CamelCase, // No, Off
- LongBool, // yes, On
- ShortBool, // y, t
-
- // int manipulators
- Dec,
- Hex,
- Oct,
-
- // document manipulators
- BeginDoc,
- EndDoc,
-
- // sequence manipulators
- BeginSeq,
- EndSeq,
- Flow,
- Block,
-
- // map manipulators
- BeginMap,
- EndMap,
- Key,
- Value,
- // Flow, // duplicate
- // Block, // duplicate
- // Auto, // duplicate
- LongKey
-};
-
-struct _Indent {
- _Indent(int value_) : value(value_) {}
- int value;
-};
-
-inline _Indent Indent(int value) { return _Indent(value); }
-
-struct _Alias {
- _Alias(const std::string& content_) : content(content_) {}
- std::string content;
-};
-
-inline _Alias Alias(const std::string content) { return _Alias(content); }
-
-struct _Anchor {
- _Anchor(const std::string& content_) : content(content_) {}
- std::string content;
-};
-
-inline _Anchor Anchor(const std::string content) { return _Anchor(content); }
-
-struct _Tag {
- struct Type {
- enum value { Verbatim, PrimaryHandle, NamedHandle };
- };
-
- explicit _Tag(const std::string& prefix_, const std::string& content_,
- Type::value type_)
- : prefix(prefix_), content(content_), type(type_) {}
- std::string prefix;
- std::string content;
- Type::value type;
-};
-
-inline _Tag VerbatimTag(const std::string content) {
- return _Tag("", content, _Tag::Type::Verbatim);
-}
-
-inline _Tag LocalTag(const std::string content) {
- return _Tag("", content, _Tag::Type::PrimaryHandle);
-}
-
-inline _Tag LocalTag(const std::string& prefix, const std::string content) {
- return _Tag(prefix, content, _Tag::Type::NamedHandle);
-}
-
-inline _Tag SecondaryTag(const std::string content) {
- return _Tag("", content, _Tag::Type::NamedHandle);
-}
-
-struct _Comment {
- _Comment(const std::string& content_) : content(content_) {}
- std::string content;
-};
-
-inline _Comment Comment(const std::string content) { return _Comment(content); }
-
-struct _Precision {
- _Precision(int floatPrecision_, int doublePrecision_)
- : floatPrecision(floatPrecision_), doublePrecision(doublePrecision_) {}
-
- int floatPrecision;
- int doublePrecision;
-};
-
-inline _Precision FloatPrecision(int n) { return _Precision(n, -1); }
-
-inline _Precision DoublePrecision(int n) { return _Precision(-1, n); }
-
-inline _Precision Precision(int n) { return _Precision(n, n); }
-}
-
-#endif // EMITTERMANIP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef EMITTERMANIP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EMITTERMANIP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+
+namespace YAML {
+enum EMITTER_MANIP {
+ // general manipulators
+ Auto,
+ TagByKind,
+ Newline,
+
+ // output character set
+ EmitNonAscii,
+ EscapeNonAscii,
+
+ // string manipulators
+ // Auto, // duplicate
+ SingleQuoted,
+ DoubleQuoted,
+ Literal,
+
+ // bool manipulators
+ YesNoBool, // yes, no
+ TrueFalseBool, // true, false
+ OnOffBool, // on, off
+ UpperCase, // TRUE, N
+ LowerCase, // f, yes
+ CamelCase, // No, Off
+ LongBool, // yes, On
+ ShortBool, // y, t
+
+ // int manipulators
+ Dec,
+ Hex,
+ Oct,
+
+ // document manipulators
+ BeginDoc,
+ EndDoc,
+
+ // sequence manipulators
+ BeginSeq,
+ EndSeq,
+ Flow,
+ Block,
+
+ // map manipulators
+ BeginMap,
+ EndMap,
+ Key,
+ Value,
+ // Flow, // duplicate
+ // Block, // duplicate
+ // Auto, // duplicate
+ LongKey
+};
+
+struct _Indent {
+ _Indent(int value_) : value(value_) {}
+ int value;
+};
+
+inline _Indent Indent(int value) { return _Indent(value); }
+
+struct _Alias {
+ _Alias(const std::string& content_) : content(content_) {}
+ std::string content;
+};
+
+inline _Alias Alias(const std::string content) { return _Alias(content); }
+
+struct _Anchor {
+ _Anchor(const std::string& content_) : content(content_) {}
+ std::string content;
+};
+
+inline _Anchor Anchor(const std::string content) { return _Anchor(content); }
+
+struct _Tag {
+ struct Type {
+ enum value { Verbatim, PrimaryHandle, NamedHandle };
+ };
+
+ explicit _Tag(const std::string& prefix_, const std::string& content_,
+ Type::value type_)
+ : prefix(prefix_), content(content_), type(type_) {}
+ std::string prefix;
+ std::string content;
+ Type::value type;
+};
+
+inline _Tag VerbatimTag(const std::string content) {
+ return _Tag("", content, _Tag::Type::Verbatim);
+}
+
+inline _Tag LocalTag(const std::string content) {
+ return _Tag("", content, _Tag::Type::PrimaryHandle);
+}
+
+inline _Tag LocalTag(const std::string& prefix, const std::string content) {
+ return _Tag(prefix, content, _Tag::Type::NamedHandle);
+}
+
+inline _Tag SecondaryTag(const std::string content) {
+ return _Tag("", content, _Tag::Type::NamedHandle);
+}
+
+struct _Comment {
+ _Comment(const std::string& content_) : content(content_) {}
+ std::string content;
+};
+
+inline _Comment Comment(const std::string content) { return _Comment(content); }
+
+struct _Precision {
+ _Precision(int floatPrecision_, int doublePrecision_)
+ : floatPrecision(floatPrecision_), doublePrecision(doublePrecision_) {}
+
+ int floatPrecision;
+ int doublePrecision;
+};
+
+inline _Precision FloatPrecision(int n) { return _Precision(n, -1); }
+
+inline _Precision DoublePrecision(int n) { return _Precision(-1, n); }
+
+inline _Precision Precision(int n) { return _Precision(n, n); }
+}
+
+#endif // EMITTERMANIP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/emitterstyle.h b/contrib/libs/yaml-cpp/include/yaml-cpp/emitterstyle.h
index 623b80260e..67bb3981b1 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/emitterstyle.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/emitterstyle.h
@@ -1,16 +1,16 @@
-#ifndef EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-namespace YAML {
-struct EmitterStyle {
- enum value { Default, Block, Flow };
-};
-}
-
-#endif // EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+namespace YAML {
+struct EmitterStyle {
+ enum value { Default, Block, Flow };
+};
+}
+
+#endif // EMITTERSTYLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/eventhandler.h b/contrib/libs/yaml-cpp/include/yaml-cpp/eventhandler.h
index e1e793f5d9..efe381c621 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/eventhandler.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/eventhandler.h
@@ -1,40 +1,40 @@
-#ifndef EVENTHANDLER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define EVENTHANDLER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <string>
-
-#include "yaml-cpp/anchor.h"
-#include "yaml-cpp/emitterstyle.h"
-
-namespace YAML {
-struct Mark;
-
-class EventHandler {
- public:
- virtual ~EventHandler() {}
-
- virtual void OnDocumentStart(const Mark& mark) = 0;
- virtual void OnDocumentEnd() = 0;
-
- virtual void OnNull(const Mark& mark, anchor_t anchor) = 0;
- virtual void OnAlias(const Mark& mark, anchor_t anchor) = 0;
- virtual void OnScalar(const Mark& mark, const std::string& tag,
- anchor_t anchor, const std::string& value) = 0;
-
- virtual void OnSequenceStart(const Mark& mark, const std::string& tag,
- anchor_t anchor, EmitterStyle::value style) = 0;
- virtual void OnSequenceEnd() = 0;
-
- virtual void OnMapStart(const Mark& mark, const std::string& tag,
- anchor_t anchor, EmitterStyle::value style) = 0;
- virtual void OnMapEnd() = 0;
-};
-}
-
-#endif // EVENTHANDLER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef EVENTHANDLER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EVENTHANDLER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+
+#include "yaml-cpp/anchor.h"
+#include "yaml-cpp/emitterstyle.h"
+
+namespace YAML {
+struct Mark;
+
+class EventHandler {
+ public:
+ virtual ~EventHandler() {}
+
+ virtual void OnDocumentStart(const Mark& mark) = 0;
+ virtual void OnDocumentEnd() = 0;
+
+ virtual void OnNull(const Mark& mark, anchor_t anchor) = 0;
+ virtual void OnAlias(const Mark& mark, anchor_t anchor) = 0;
+ virtual void OnScalar(const Mark& mark, const std::string& tag,
+ anchor_t anchor, const std::string& value) = 0;
+
+ virtual void OnSequenceStart(const Mark& mark, const std::string& tag,
+ anchor_t anchor, EmitterStyle::value style) = 0;
+ virtual void OnSequenceEnd() = 0;
+
+ virtual void OnMapStart(const Mark& mark, const std::string& tag,
+ anchor_t anchor, EmitterStyle::value style) = 0;
+ virtual void OnMapEnd() = 0;
+};
+}
+
+#endif // EVENTHANDLER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/exceptions.h b/contrib/libs/yaml-cpp/include/yaml-cpp/exceptions.h
index 78f22d42d5..9c96859b2c 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/exceptions.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/exceptions.h
@@ -1,18 +1,18 @@
-#ifndef EXCEPTIONS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define EXCEPTIONS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "yaml-cpp/mark.h"
-#include "yaml-cpp/traits.h"
+#ifndef EXCEPTIONS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EXCEPTIONS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/mark.h"
+#include "yaml-cpp/traits.h"
#include <sstream>
-#include <stdexcept>
-#include <string>
-
+#include <stdexcept>
+#include <string>
+
// This is here for compatibility with older versions of Visual Studio
// which don't support noexcept
#ifdef _MSC_VER
@@ -21,247 +21,247 @@
#define YAML_CPP_NOEXCEPT noexcept
#endif
-namespace YAML {
-// error messages
-namespace ErrorMsg {
-const char* const YAML_DIRECTIVE_ARGS =
- "YAML directives must have exactly one argument";
-const char* const YAML_VERSION = "bad YAML version: ";
-const char* const YAML_MAJOR_VERSION = "YAML major version too large";
-const char* const REPEATED_YAML_DIRECTIVE = "repeated YAML directive";
-const char* const TAG_DIRECTIVE_ARGS =
- "TAG directives must have exactly two arguments";
-const char* const REPEATED_TAG_DIRECTIVE = "repeated TAG directive";
-const char* const CHAR_IN_TAG_HANDLE =
- "illegal character found while scanning tag handle";
-const char* const TAG_WITH_NO_SUFFIX = "tag handle with no suffix";
-const char* const END_OF_VERBATIM_TAG = "end of verbatim tag not found";
-const char* const END_OF_MAP = "end of map not found";
-const char* const END_OF_MAP_FLOW = "end of map flow not found";
-const char* const END_OF_SEQ = "end of sequence not found";
-const char* const END_OF_SEQ_FLOW = "end of sequence flow not found";
-const char* const MULTIPLE_TAGS =
- "cannot assign multiple tags to the same node";
-const char* const MULTIPLE_ANCHORS =
- "cannot assign multiple anchors to the same node";
-const char* const MULTIPLE_ALIASES =
- "cannot assign multiple aliases to the same node";
-const char* const ALIAS_CONTENT =
- "aliases can't have any content, *including* tags";
-const char* const INVALID_HEX = "bad character found while scanning hex number";
-const char* const INVALID_UNICODE = "invalid unicode: ";
-const char* const INVALID_ESCAPE = "unknown escape character: ";
-const char* const UNKNOWN_TOKEN = "unknown token";
-const char* const DOC_IN_SCALAR = "illegal document indicator in scalar";
-const char* const EOF_IN_SCALAR = "illegal EOF in scalar";
-const char* const CHAR_IN_SCALAR = "illegal character in scalar";
-const char* const TAB_IN_INDENTATION =
- "illegal tab when looking for indentation";
-const char* const FLOW_END = "illegal flow end";
-const char* const BLOCK_ENTRY = "illegal block entry";
-const char* const MAP_KEY = "illegal map key";
-const char* const MAP_VALUE = "illegal map value";
-const char* const ALIAS_NOT_FOUND = "alias not found after *";
-const char* const ANCHOR_NOT_FOUND = "anchor not found after &";
-const char* const CHAR_IN_ALIAS =
- "illegal character found while scanning alias";
-const char* const CHAR_IN_ANCHOR =
- "illegal character found while scanning anchor";
-const char* const ZERO_INDENT_IN_BLOCK =
- "cannot set zero indentation for a block scalar";
-const char* const CHAR_IN_BLOCK = "unexpected character in block scalar";
-const char* const AMBIGUOUS_ANCHOR =
- "cannot assign the same alias to multiple nodes";
-const char* const UNKNOWN_ANCHOR = "the referenced anchor is not defined";
-
-const char* const INVALID_NODE =
- "invalid node; this may result from using a map iterator as a sequence "
- "iterator, or vice-versa";
-const char* const INVALID_SCALAR = "invalid scalar";
-const char* const KEY_NOT_FOUND = "key not found";
-const char* const BAD_CONVERSION = "bad conversion";
-const char* const BAD_DEREFERENCE = "bad dereference";
-const char* const BAD_SUBSCRIPT = "operator[] call on a scalar";
-const char* const BAD_PUSHBACK = "appending to a non-sequence";
-const char* const BAD_INSERT = "inserting in a non-convertible-to-map";
-
-const char* const UNMATCHED_GROUP_TAG = "unmatched group tag";
-const char* const UNEXPECTED_END_SEQ = "unexpected end sequence token";
-const char* const UNEXPECTED_END_MAP = "unexpected end map token";
-const char* const SINGLE_QUOTED_CHAR =
- "invalid character in single-quoted string";
-const char* const INVALID_ANCHOR = "invalid anchor";
-const char* const INVALID_ALIAS = "invalid alias";
-const char* const INVALID_TAG = "invalid tag";
-const char* const BAD_FILE = "bad file";
-
-template <typename T>
-inline const std::string KEY_NOT_FOUND_WITH_KEY(
+namespace YAML {
+// error messages
+namespace ErrorMsg {
+const char* const YAML_DIRECTIVE_ARGS =
+ "YAML directives must have exactly one argument";
+const char* const YAML_VERSION = "bad YAML version: ";
+const char* const YAML_MAJOR_VERSION = "YAML major version too large";
+const char* const REPEATED_YAML_DIRECTIVE = "repeated YAML directive";
+const char* const TAG_DIRECTIVE_ARGS =
+ "TAG directives must have exactly two arguments";
+const char* const REPEATED_TAG_DIRECTIVE = "repeated TAG directive";
+const char* const CHAR_IN_TAG_HANDLE =
+ "illegal character found while scanning tag handle";
+const char* const TAG_WITH_NO_SUFFIX = "tag handle with no suffix";
+const char* const END_OF_VERBATIM_TAG = "end of verbatim tag not found";
+const char* const END_OF_MAP = "end of map not found";
+const char* const END_OF_MAP_FLOW = "end of map flow not found";
+const char* const END_OF_SEQ = "end of sequence not found";
+const char* const END_OF_SEQ_FLOW = "end of sequence flow not found";
+const char* const MULTIPLE_TAGS =
+ "cannot assign multiple tags to the same node";
+const char* const MULTIPLE_ANCHORS =
+ "cannot assign multiple anchors to the same node";
+const char* const MULTIPLE_ALIASES =
+ "cannot assign multiple aliases to the same node";
+const char* const ALIAS_CONTENT =
+ "aliases can't have any content, *including* tags";
+const char* const INVALID_HEX = "bad character found while scanning hex number";
+const char* const INVALID_UNICODE = "invalid unicode: ";
+const char* const INVALID_ESCAPE = "unknown escape character: ";
+const char* const UNKNOWN_TOKEN = "unknown token";
+const char* const DOC_IN_SCALAR = "illegal document indicator in scalar";
+const char* const EOF_IN_SCALAR = "illegal EOF in scalar";
+const char* const CHAR_IN_SCALAR = "illegal character in scalar";
+const char* const TAB_IN_INDENTATION =
+ "illegal tab when looking for indentation";
+const char* const FLOW_END = "illegal flow end";
+const char* const BLOCK_ENTRY = "illegal block entry";
+const char* const MAP_KEY = "illegal map key";
+const char* const MAP_VALUE = "illegal map value";
+const char* const ALIAS_NOT_FOUND = "alias not found after *";
+const char* const ANCHOR_NOT_FOUND = "anchor not found after &";
+const char* const CHAR_IN_ALIAS =
+ "illegal character found while scanning alias";
+const char* const CHAR_IN_ANCHOR =
+ "illegal character found while scanning anchor";
+const char* const ZERO_INDENT_IN_BLOCK =
+ "cannot set zero indentation for a block scalar";
+const char* const CHAR_IN_BLOCK = "unexpected character in block scalar";
+const char* const AMBIGUOUS_ANCHOR =
+ "cannot assign the same alias to multiple nodes";
+const char* const UNKNOWN_ANCHOR = "the referenced anchor is not defined";
+
+const char* const INVALID_NODE =
+ "invalid node; this may result from using a map iterator as a sequence "
+ "iterator, or vice-versa";
+const char* const INVALID_SCALAR = "invalid scalar";
+const char* const KEY_NOT_FOUND = "key not found";
+const char* const BAD_CONVERSION = "bad conversion";
+const char* const BAD_DEREFERENCE = "bad dereference";
+const char* const BAD_SUBSCRIPT = "operator[] call on a scalar";
+const char* const BAD_PUSHBACK = "appending to a non-sequence";
+const char* const BAD_INSERT = "inserting in a non-convertible-to-map";
+
+const char* const UNMATCHED_GROUP_TAG = "unmatched group tag";
+const char* const UNEXPECTED_END_SEQ = "unexpected end sequence token";
+const char* const UNEXPECTED_END_MAP = "unexpected end map token";
+const char* const SINGLE_QUOTED_CHAR =
+ "invalid character in single-quoted string";
+const char* const INVALID_ANCHOR = "invalid anchor";
+const char* const INVALID_ALIAS = "invalid alias";
+const char* const INVALID_TAG = "invalid tag";
+const char* const BAD_FILE = "bad file";
+
+template <typename T>
+inline const std::string KEY_NOT_FOUND_WITH_KEY(
const T&, typename disable_if<is_numeric<T>>::type* = 0) {
- return KEY_NOT_FOUND;
-}
-
-inline const std::string KEY_NOT_FOUND_WITH_KEY(const std::string& key) {
- std::stringstream stream;
- stream << KEY_NOT_FOUND << ": " << key;
- return stream.str();
-}
-
-template <typename T>
-inline const std::string KEY_NOT_FOUND_WITH_KEY(
+ return KEY_NOT_FOUND;
+}
+
+inline const std::string KEY_NOT_FOUND_WITH_KEY(const std::string& key) {
+ std::stringstream stream;
+ stream << KEY_NOT_FOUND << ": " << key;
+ return stream.str();
+}
+
+template <typename T>
+inline const std::string KEY_NOT_FOUND_WITH_KEY(
const T& key, typename enable_if<is_numeric<T>>::type* = 0) {
- std::stringstream stream;
- stream << KEY_NOT_FOUND << ": " << key;
- return stream.str();
-}
-}
-
+ std::stringstream stream;
+ stream << KEY_NOT_FOUND << ": " << key;
+ return stream.str();
+}
+}
+
class YAML_CPP_API Exception : public std::runtime_error {
- public:
- Exception(const Mark& mark_, const std::string& msg_)
- : std::runtime_error(build_what(mark_, msg_)), mark(mark_), msg(msg_) {}
+ public:
+ Exception(const Mark& mark_, const std::string& msg_)
+ : std::runtime_error(build_what(mark_, msg_)), mark(mark_), msg(msg_) {}
virtual ~Exception() YAML_CPP_NOEXCEPT;
-
+
Exception(const Exception&) = default;
- Mark mark;
- std::string msg;
-
- private:
- static const std::string build_what(const Mark& mark,
- const std::string& msg) {
- if (mark.is_null()) {
- return msg.c_str();
- }
-
- std::stringstream output;
- output << "yaml-cpp: error at line " << mark.line + 1 << ", column "
- << mark.column + 1 << ": " << msg;
- return output.str();
- }
-};
-
+ Mark mark;
+ std::string msg;
+
+ private:
+ static const std::string build_what(const Mark& mark,
+ const std::string& msg) {
+ if (mark.is_null()) {
+ return msg.c_str();
+ }
+
+ std::stringstream output;
+ output << "yaml-cpp: error at line " << mark.line + 1 << ", column "
+ << mark.column + 1 << ": " << msg;
+ return output.str();
+ }
+};
+
class YAML_CPP_API ParserException : public Exception {
- public:
- ParserException(const Mark& mark_, const std::string& msg_)
- : Exception(mark_, msg_) {}
+ public:
+ ParserException(const Mark& mark_, const std::string& msg_)
+ : Exception(mark_, msg_) {}
ParserException(const ParserException&) = default;
virtual ~ParserException() YAML_CPP_NOEXCEPT;
-};
-
+};
+
class YAML_CPP_API RepresentationException : public Exception {
- public:
- RepresentationException(const Mark& mark_, const std::string& msg_)
- : Exception(mark_, msg_) {}
+ public:
+ RepresentationException(const Mark& mark_, const std::string& msg_)
+ : Exception(mark_, msg_) {}
RepresentationException(const RepresentationException&) = default;
virtual ~RepresentationException() YAML_CPP_NOEXCEPT;
-};
-
-// representation exceptions
+};
+
+// representation exceptions
class YAML_CPP_API InvalidScalar : public RepresentationException {
- public:
- InvalidScalar(const Mark& mark_)
- : RepresentationException(mark_, ErrorMsg::INVALID_SCALAR) {}
+ public:
+ InvalidScalar(const Mark& mark_)
+ : RepresentationException(mark_, ErrorMsg::INVALID_SCALAR) {}
InvalidScalar(const InvalidScalar&) = default;
virtual ~InvalidScalar() YAML_CPP_NOEXCEPT;
-};
-
+};
+
class YAML_CPP_API KeyNotFound : public RepresentationException {
- public:
- template <typename T>
- KeyNotFound(const Mark& mark_, const T& key_)
- : RepresentationException(mark_, ErrorMsg::KEY_NOT_FOUND_WITH_KEY(key_)) {
- }
+ public:
+ template <typename T>
+ KeyNotFound(const Mark& mark_, const T& key_)
+ : RepresentationException(mark_, ErrorMsg::KEY_NOT_FOUND_WITH_KEY(key_)) {
+ }
KeyNotFound(const KeyNotFound&) = default;
virtual ~KeyNotFound() YAML_CPP_NOEXCEPT;
-};
-
-template <typename T>
+};
+
+template <typename T>
class YAML_CPP_API TypedKeyNotFound : public KeyNotFound {
- public:
- TypedKeyNotFound(const Mark& mark_, const T& key_)
- : KeyNotFound(mark_, key_), key(key_) {}
+ public:
+ TypedKeyNotFound(const Mark& mark_, const T& key_)
+ : KeyNotFound(mark_, key_), key(key_) {}
virtual ~TypedKeyNotFound() YAML_CPP_NOEXCEPT {}
-
- T key;
-};
-
-template <typename T>
-inline TypedKeyNotFound<T> MakeTypedKeyNotFound(const Mark& mark,
- const T& key) {
- return TypedKeyNotFound<T>(mark, key);
-}
-
+
+ T key;
+};
+
+template <typename T>
+inline TypedKeyNotFound<T> MakeTypedKeyNotFound(const Mark& mark,
+ const T& key) {
+ return TypedKeyNotFound<T>(mark, key);
+}
+
class YAML_CPP_API InvalidNode : public RepresentationException {
- public:
- InvalidNode()
- : RepresentationException(Mark::null_mark(), ErrorMsg::INVALID_NODE) {}
+ public:
+ InvalidNode()
+ : RepresentationException(Mark::null_mark(), ErrorMsg::INVALID_NODE) {}
InvalidNode(const InvalidNode&) = default;
virtual ~InvalidNode() YAML_CPP_NOEXCEPT;
-};
-
+};
+
class YAML_CPP_API BadConversion : public RepresentationException {
- public:
- explicit BadConversion(const Mark& mark_)
- : RepresentationException(mark_, ErrorMsg::BAD_CONVERSION) {}
+ public:
+ explicit BadConversion(const Mark& mark_)
+ : RepresentationException(mark_, ErrorMsg::BAD_CONVERSION) {}
BadConversion(const BadConversion&) = default;
virtual ~BadConversion() YAML_CPP_NOEXCEPT;
-};
-
-template <typename T>
-class TypedBadConversion : public BadConversion {
- public:
- explicit TypedBadConversion(const Mark& mark_) : BadConversion(mark_) {}
-};
-
+};
+
+template <typename T>
+class TypedBadConversion : public BadConversion {
+ public:
+ explicit TypedBadConversion(const Mark& mark_) : BadConversion(mark_) {}
+};
+
class YAML_CPP_API BadDereference : public RepresentationException {
- public:
- BadDereference()
- : RepresentationException(Mark::null_mark(), ErrorMsg::BAD_DEREFERENCE) {}
+ public:
+ BadDereference()
+ : RepresentationException(Mark::null_mark(), ErrorMsg::BAD_DEREFERENCE) {}
BadDereference(const BadDereference&) = default;
virtual ~BadDereference() YAML_CPP_NOEXCEPT;
-};
-
+};
+
class YAML_CPP_API BadSubscript : public RepresentationException {
- public:
- BadSubscript()
- : RepresentationException(Mark::null_mark(), ErrorMsg::BAD_SUBSCRIPT) {}
+ public:
+ BadSubscript()
+ : RepresentationException(Mark::null_mark(), ErrorMsg::BAD_SUBSCRIPT) {}
BadSubscript(const BadSubscript&) = default;
virtual ~BadSubscript() YAML_CPP_NOEXCEPT;
-};
-
+};
+
class YAML_CPP_API BadPushback : public RepresentationException {
- public:
- BadPushback()
- : RepresentationException(Mark::null_mark(), ErrorMsg::BAD_PUSHBACK) {}
+ public:
+ BadPushback()
+ : RepresentationException(Mark::null_mark(), ErrorMsg::BAD_PUSHBACK) {}
BadPushback(const BadPushback&) = default;
virtual ~BadPushback() YAML_CPP_NOEXCEPT;
-};
-
+};
+
class YAML_CPP_API BadInsert : public RepresentationException {
- public:
- BadInsert()
- : RepresentationException(Mark::null_mark(), ErrorMsg::BAD_INSERT) {}
+ public:
+ BadInsert()
+ : RepresentationException(Mark::null_mark(), ErrorMsg::BAD_INSERT) {}
BadInsert(const BadInsert&) = default;
virtual ~BadInsert() YAML_CPP_NOEXCEPT;
-};
-
+};
+
class YAML_CPP_API EmitterException : public Exception {
- public:
- EmitterException(const std::string& msg_)
- : Exception(Mark::null_mark(), msg_) {}
+ public:
+ EmitterException(const std::string& msg_)
+ : Exception(Mark::null_mark(), msg_) {}
EmitterException(const EmitterException&) = default;
virtual ~EmitterException() YAML_CPP_NOEXCEPT;
-};
-
+};
+
class YAML_CPP_API BadFile : public Exception {
- public:
- BadFile() : Exception(Mark::null_mark(), ErrorMsg::BAD_FILE) {}
+ public:
+ BadFile() : Exception(Mark::null_mark(), ErrorMsg::BAD_FILE) {}
BadFile(const BadFile&) = default;
virtual ~BadFile() YAML_CPP_NOEXCEPT;
-};
-}
-
+};
+}
+
#undef YAML_CPP_NOEXCEPT
-#endif // EXCEPTIONS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#endif // EXCEPTIONS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/mark.h b/contrib/libs/yaml-cpp/include/yaml-cpp/mark.h
index 27c4f74128..bf94b4f41f 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/mark.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/mark.h
@@ -1,29 +1,29 @@
-#ifndef MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "yaml-cpp/dll.h"
-
-namespace YAML {
-struct YAML_CPP_API Mark {
- Mark() : pos(0), line(0), column(0) {}
-
- static const Mark null_mark() { return Mark(-1, -1, -1); }
-
- bool is_null() const { return pos == -1 && line == -1 && column == -1; }
-
- int pos;
- int line, column;
-
- private:
- Mark(int pos_, int line_, int column_)
- : pos(pos_), line(line_), column(column_) {}
-};
-}
-
-#endif // MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+
+namespace YAML {
+struct YAML_CPP_API Mark {
+ Mark() : pos(0), line(0), column(0) {}
+
+ static const Mark null_mark() { return Mark(-1, -1, -1); }
+
+ bool is_null() const { return pos == -1 && line == -1 && column == -1; }
+
+ int pos;
+ int line, column;
+
+ private:
+ Mark(int pos_, int line_, int column_)
+ : pos(pos_), line(line_), column(column_) {}
+};
+}
+
+#endif // MARK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/node/convert.h b/contrib/libs/yaml-cpp/include/yaml-cpp/node/convert.h
index 791036869e..45a878ab0c 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/node/convert.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/node/convert.h
@@ -1,247 +1,247 @@
-#ifndef NODE_CONVERT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define NODE_CONVERT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
+#ifndef NODE_CONVERT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_CONVERT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
#include <array>
-#include <limits>
-#include <list>
-#include <map>
-#include <sstream>
-#include <vector>
-
-#include "yaml-cpp/binary.h"
-#include "yaml-cpp/node/impl.h"
-#include "yaml-cpp/node/iterator.h"
-#include "yaml-cpp/node/node.h"
-#include "yaml-cpp/node/type.h"
-#include "yaml-cpp/null.h"
-
-namespace YAML {
-class Binary;
-struct _Null;
-template <typename T>
-struct convert;
-} // namespace YAML
-
-namespace YAML {
-namespace conversion {
-inline bool IsInfinity(const std::string& input) {
- return input == ".inf" || input == ".Inf" || input == ".INF" ||
- input == "+.inf" || input == "+.Inf" || input == "+.INF";
-}
-
-inline bool IsNegativeInfinity(const std::string& input) {
- return input == "-.inf" || input == "-.Inf" || input == "-.INF";
-}
-
-inline bool IsNaN(const std::string& input) {
- return input == ".nan" || input == ".NaN" || input == ".NAN";
-}
-}
-
-// Node
-template <>
-struct convert<Node> {
- static Node encode(const Node& rhs) { return rhs; }
-
- static bool decode(const Node& node, Node& rhs) {
- rhs.reset(node);
- return true;
- }
-};
-
-// std::string
-template <>
-struct convert<std::string> {
- static Node encode(const std::string& rhs) { return Node(rhs); }
-
- static bool decode(const Node& node, std::string& rhs) {
- if (!node.IsScalar())
- return false;
- rhs = node.Scalar();
- return true;
- }
-};
-
-// C-strings can only be encoded
-template <>
-struct convert<const char*> {
- static Node encode(const char*& rhs) { return Node(rhs); }
-};
-
-template <std::size_t N>
-struct convert<const char[N]> {
- static Node encode(const char(&rhs)[N]) { return Node(rhs); }
-};
-
-template <>
-struct convert<_Null> {
- static Node encode(const _Null& /* rhs */) { return Node(); }
-
- static bool decode(const Node& node, _Null& /* rhs */) {
- return node.IsNull();
- }
-};
-
-#define YAML_DEFINE_CONVERT_STREAMABLE(type, negative_op) \
- template <> \
- struct convert<type> { \
- static Node encode(const type& rhs) { \
- std::stringstream stream; \
- stream.precision(std::numeric_limits<type>::digits10 + 1); \
- stream << rhs; \
- return Node(stream.str()); \
- } \
- \
- static bool decode(const Node& node, type& rhs) { \
- if (node.Type() != NodeType::Scalar) \
- return false; \
- const std::string& input = node.Scalar(); \
- std::stringstream stream(input); \
- stream.unsetf(std::ios::dec); \
- if ((stream >> std::noskipws >> rhs) && (stream >> std::ws).eof()) \
- return true; \
- if (std::numeric_limits<type>::has_infinity) { \
- if (conversion::IsInfinity(input)) { \
- rhs = std::numeric_limits<type>::infinity(); \
- return true; \
- } else if (conversion::IsNegativeInfinity(input)) { \
- rhs = negative_op std::numeric_limits<type>::infinity(); \
- return true; \
- } \
- } \
- \
- if (std::numeric_limits<type>::has_quiet_NaN && \
- conversion::IsNaN(input)) { \
- rhs = std::numeric_limits<type>::quiet_NaN(); \
- return true; \
- } \
- \
- return false; \
- } \
- }
-
-#define YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(type) \
- YAML_DEFINE_CONVERT_STREAMABLE(type, -)
-
-#define YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(type) \
- YAML_DEFINE_CONVERT_STREAMABLE(type, +)
-
-YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(int);
-YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(short);
-YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(long);
-YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(long long);
-YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned);
-YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned short);
-YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned long);
-YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned long long);
-
-YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(char);
-YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(signed char);
-YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned char);
-
-YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(float);
-YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(double);
-YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(long double);
-
-#undef YAML_DEFINE_CONVERT_STREAMABLE_SIGNED
-#undef YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED
-#undef YAML_DEFINE_CONVERT_STREAMABLE
-
-// bool
-template <>
-struct convert<bool> {
- static Node encode(bool rhs) { return rhs ? Node("true") : Node("false"); }
-
- YAML_CPP_API static bool decode(const Node& node, bool& rhs);
-};
-
-// std::map
-template <typename K, typename V>
+#include <limits>
+#include <list>
+#include <map>
+#include <sstream>
+#include <vector>
+
+#include "yaml-cpp/binary.h"
+#include "yaml-cpp/node/impl.h"
+#include "yaml-cpp/node/iterator.h"
+#include "yaml-cpp/node/node.h"
+#include "yaml-cpp/node/type.h"
+#include "yaml-cpp/null.h"
+
+namespace YAML {
+class Binary;
+struct _Null;
+template <typename T>
+struct convert;
+} // namespace YAML
+
+namespace YAML {
+namespace conversion {
+inline bool IsInfinity(const std::string& input) {
+ return input == ".inf" || input == ".Inf" || input == ".INF" ||
+ input == "+.inf" || input == "+.Inf" || input == "+.INF";
+}
+
+inline bool IsNegativeInfinity(const std::string& input) {
+ return input == "-.inf" || input == "-.Inf" || input == "-.INF";
+}
+
+inline bool IsNaN(const std::string& input) {
+ return input == ".nan" || input == ".NaN" || input == ".NAN";
+}
+}
+
+// Node
+template <>
+struct convert<Node> {
+ static Node encode(const Node& rhs) { return rhs; }
+
+ static bool decode(const Node& node, Node& rhs) {
+ rhs.reset(node);
+ return true;
+ }
+};
+
+// std::string
+template <>
+struct convert<std::string> {
+ static Node encode(const std::string& rhs) { return Node(rhs); }
+
+ static bool decode(const Node& node, std::string& rhs) {
+ if (!node.IsScalar())
+ return false;
+ rhs = node.Scalar();
+ return true;
+ }
+};
+
+// C-strings can only be encoded
+template <>
+struct convert<const char*> {
+ static Node encode(const char*& rhs) { return Node(rhs); }
+};
+
+template <std::size_t N>
+struct convert<const char[N]> {
+ static Node encode(const char(&rhs)[N]) { return Node(rhs); }
+};
+
+template <>
+struct convert<_Null> {
+ static Node encode(const _Null& /* rhs */) { return Node(); }
+
+ static bool decode(const Node& node, _Null& /* rhs */) {
+ return node.IsNull();
+ }
+};
+
+#define YAML_DEFINE_CONVERT_STREAMABLE(type, negative_op) \
+ template <> \
+ struct convert<type> { \
+ static Node encode(const type& rhs) { \
+ std::stringstream stream; \
+ stream.precision(std::numeric_limits<type>::digits10 + 1); \
+ stream << rhs; \
+ return Node(stream.str()); \
+ } \
+ \
+ static bool decode(const Node& node, type& rhs) { \
+ if (node.Type() != NodeType::Scalar) \
+ return false; \
+ const std::string& input = node.Scalar(); \
+ std::stringstream stream(input); \
+ stream.unsetf(std::ios::dec); \
+ if ((stream >> std::noskipws >> rhs) && (stream >> std::ws).eof()) \
+ return true; \
+ if (std::numeric_limits<type>::has_infinity) { \
+ if (conversion::IsInfinity(input)) { \
+ rhs = std::numeric_limits<type>::infinity(); \
+ return true; \
+ } else if (conversion::IsNegativeInfinity(input)) { \
+ rhs = negative_op std::numeric_limits<type>::infinity(); \
+ return true; \
+ } \
+ } \
+ \
+ if (std::numeric_limits<type>::has_quiet_NaN && \
+ conversion::IsNaN(input)) { \
+ rhs = std::numeric_limits<type>::quiet_NaN(); \
+ return true; \
+ } \
+ \
+ return false; \
+ } \
+ }
+
+#define YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(type) \
+ YAML_DEFINE_CONVERT_STREAMABLE(type, -)
+
+#define YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(type) \
+ YAML_DEFINE_CONVERT_STREAMABLE(type, +)
+
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(int);
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(short);
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(long);
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(long long);
+YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned);
+YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned short);
+YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned long);
+YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned long long);
+
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(char);
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(signed char);
+YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED(unsigned char);
+
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(float);
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(double);
+YAML_DEFINE_CONVERT_STREAMABLE_SIGNED(long double);
+
+#undef YAML_DEFINE_CONVERT_STREAMABLE_SIGNED
+#undef YAML_DEFINE_CONVERT_STREAMABLE_UNSIGNED
+#undef YAML_DEFINE_CONVERT_STREAMABLE
+
+// bool
+template <>
+struct convert<bool> {
+ static Node encode(bool rhs) { return rhs ? Node("true") : Node("false"); }
+
+ YAML_CPP_API static bool decode(const Node& node, bool& rhs);
+};
+
+// std::map
+template <typename K, typename V>
struct convert<std::map<K, V>> {
- static Node encode(const std::map<K, V>& rhs) {
- Node node(NodeType::Map);
- for (typename std::map<K, V>::const_iterator it = rhs.begin();
- it != rhs.end(); ++it)
- node.force_insert(it->first, it->second);
- return node;
- }
-
- static bool decode(const Node& node, std::map<K, V>& rhs) {
- if (!node.IsMap())
- return false;
-
- rhs.clear();
- for (const_iterator it = node.begin(); it != node.end(); ++it)
-#if defined(__GNUC__) && __GNUC__ < 4
- // workaround for GCC 3:
- rhs[it->first.template as<K>()] = it->second.template as<V>();
-#else
- rhs[it->first.as<K>()] = it->second.as<V>();
-#endif
- return true;
- }
-};
-
-// std::vector
-template <typename T>
+ static Node encode(const std::map<K, V>& rhs) {
+ Node node(NodeType::Map);
+ for (typename std::map<K, V>::const_iterator it = rhs.begin();
+ it != rhs.end(); ++it)
+ node.force_insert(it->first, it->second);
+ return node;
+ }
+
+ static bool decode(const Node& node, std::map<K, V>& rhs) {
+ if (!node.IsMap())
+ return false;
+
+ rhs.clear();
+ for (const_iterator it = node.begin(); it != node.end(); ++it)
+#if defined(__GNUC__) && __GNUC__ < 4
+ // workaround for GCC 3:
+ rhs[it->first.template as<K>()] = it->second.template as<V>();
+#else
+ rhs[it->first.as<K>()] = it->second.as<V>();
+#endif
+ return true;
+ }
+};
+
+// std::vector
+template <typename T>
struct convert<std::vector<T>> {
- static Node encode(const std::vector<T>& rhs) {
- Node node(NodeType::Sequence);
- for (typename std::vector<T>::const_iterator it = rhs.begin();
- it != rhs.end(); ++it)
- node.push_back(*it);
- return node;
- }
-
- static bool decode(const Node& node, std::vector<T>& rhs) {
- if (!node.IsSequence())
- return false;
-
- rhs.clear();
- for (const_iterator it = node.begin(); it != node.end(); ++it)
-#if defined(__GNUC__) && __GNUC__ < 4
- // workaround for GCC 3:
- rhs.push_back(it->template as<T>());
-#else
- rhs.push_back(it->as<T>());
-#endif
- return true;
- }
-};
-
-// std::list
-template <typename T>
+ static Node encode(const std::vector<T>& rhs) {
+ Node node(NodeType::Sequence);
+ for (typename std::vector<T>::const_iterator it = rhs.begin();
+ it != rhs.end(); ++it)
+ node.push_back(*it);
+ return node;
+ }
+
+ static bool decode(const Node& node, std::vector<T>& rhs) {
+ if (!node.IsSequence())
+ return false;
+
+ rhs.clear();
+ for (const_iterator it = node.begin(); it != node.end(); ++it)
+#if defined(__GNUC__) && __GNUC__ < 4
+ // workaround for GCC 3:
+ rhs.push_back(it->template as<T>());
+#else
+ rhs.push_back(it->as<T>());
+#endif
+ return true;
+ }
+};
+
+// std::list
+template <typename T>
struct convert<std::list<T>> {
- static Node encode(const std::list<T>& rhs) {
- Node node(NodeType::Sequence);
- for (typename std::list<T>::const_iterator it = rhs.begin();
- it != rhs.end(); ++it)
- node.push_back(*it);
- return node;
- }
-
- static bool decode(const Node& node, std::list<T>& rhs) {
- if (!node.IsSequence())
- return false;
-
- rhs.clear();
- for (const_iterator it = node.begin(); it != node.end(); ++it)
-#if defined(__GNUC__) && __GNUC__ < 4
- // workaround for GCC 3:
- rhs.push_back(it->template as<T>());
-#else
- rhs.push_back(it->as<T>());
-#endif
- return true;
- }
-};
-
+ static Node encode(const std::list<T>& rhs) {
+ Node node(NodeType::Sequence);
+ for (typename std::list<T>::const_iterator it = rhs.begin();
+ it != rhs.end(); ++it)
+ node.push_back(*it);
+ return node;
+ }
+
+ static bool decode(const Node& node, std::list<T>& rhs) {
+ if (!node.IsSequence())
+ return false;
+
+ rhs.clear();
+ for (const_iterator it = node.begin(); it != node.end(); ++it)
+#if defined(__GNUC__) && __GNUC__ < 4
+ // workaround for GCC 3:
+ rhs.push_back(it->template as<T>());
+#else
+ rhs.push_back(it->as<T>());
+#endif
+ return true;
+ }
+};
+
// std::array
template <typename T, std::size_t N>
struct convert<std::array<T, N>> {
@@ -275,57 +275,57 @@ struct convert<std::array<T, N>> {
}
};
-// std::pair
-template <typename T, typename U>
+// std::pair
+template <typename T, typename U>
struct convert<std::pair<T, U>> {
- static Node encode(const std::pair<T, U>& rhs) {
- Node node(NodeType::Sequence);
- node.push_back(rhs.first);
- node.push_back(rhs.second);
- return node;
- }
-
- static bool decode(const Node& node, std::pair<T, U>& rhs) {
- if (!node.IsSequence())
- return false;
- if (node.size() != 2)
- return false;
-
-#if defined(__GNUC__) && __GNUC__ < 4
- // workaround for GCC 3:
- rhs.first = node[0].template as<T>();
-#else
- rhs.first = node[0].as<T>();
-#endif
-#if defined(__GNUC__) && __GNUC__ < 4
- // workaround for GCC 3:
- rhs.second = node[1].template as<U>();
-#else
- rhs.second = node[1].as<U>();
-#endif
- return true;
- }
-};
-
-// binary
-template <>
-struct convert<Binary> {
- static Node encode(const Binary& rhs) {
- return Node(EncodeBase64(rhs.data(), rhs.size()));
- }
-
- static bool decode(const Node& node, Binary& rhs) {
- if (!node.IsScalar())
- return false;
-
- std::vector<unsigned char> data = DecodeBase64(node.Scalar());
- if (data.empty() && !node.Scalar().empty())
- return false;
-
- rhs.swap(data);
- return true;
- }
-};
-}
-
-#endif // NODE_CONVERT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+ static Node encode(const std::pair<T, U>& rhs) {
+ Node node(NodeType::Sequence);
+ node.push_back(rhs.first);
+ node.push_back(rhs.second);
+ return node;
+ }
+
+ static bool decode(const Node& node, std::pair<T, U>& rhs) {
+ if (!node.IsSequence())
+ return false;
+ if (node.size() != 2)
+ return false;
+
+#if defined(__GNUC__) && __GNUC__ < 4
+ // workaround for GCC 3:
+ rhs.first = node[0].template as<T>();
+#else
+ rhs.first = node[0].as<T>();
+#endif
+#if defined(__GNUC__) && __GNUC__ < 4
+ // workaround for GCC 3:
+ rhs.second = node[1].template as<U>();
+#else
+ rhs.second = node[1].as<U>();
+#endif
+ return true;
+ }
+};
+
+// binary
+template <>
+struct convert<Binary> {
+ static Node encode(const Binary& rhs) {
+ return Node(EncodeBase64(rhs.data(), rhs.size()));
+ }
+
+ static bool decode(const Node& node, Binary& rhs) {
+ if (!node.IsScalar())
+ return false;
+
+ std::vector<unsigned char> data = DecodeBase64(node.Scalar());
+ if (data.empty() && !node.Scalar().empty())
+ return false;
+
+ rhs.swap(data);
+ return true;
+ }
+};
+}
+
+#endif // NODE_CONVERT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/bool_type.h b/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/bool_type.h
index 203a7a4f27..2c80705c9a 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/bool_type.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/bool_type.h
@@ -1,26 +1,26 @@
-#ifndef NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-namespace YAML {
-namespace detail {
-struct unspecified_bool {
- struct NOT_ALLOWED;
- static void true_value(NOT_ALLOWED*) {}
-};
-typedef void (*unspecified_bool_type)(unspecified_bool::NOT_ALLOWED*);
-}
-}
-
-#define YAML_CPP_OPERATOR_BOOL() \
- operator YAML::detail::unspecified_bool_type() const { \
- return this->operator!() ? 0 \
- : &YAML::detail::unspecified_bool::true_value; \
- }
-
-#endif // NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+namespace YAML {
+namespace detail {
+struct unspecified_bool {
+ struct NOT_ALLOWED;
+ static void true_value(NOT_ALLOWED*) {}
+};
+typedef void (*unspecified_bool_type)(unspecified_bool::NOT_ALLOWED*);
+}
+}
+
+#define YAML_CPP_OPERATOR_BOOL() \
+ operator YAML::detail::unspecified_bool_type() const { \
+ return this->operator!() ? 0 \
+ : &YAML::detail::unspecified_bool::true_value; \
+ }
+
+#endif // NODE_DETAIL_BOOL_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/impl.h b/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/impl.h
index c058852e63..09e55f838c 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/impl.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/impl.h
@@ -1,137 +1,137 @@
-#ifndef NODE_DETAIL_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define NODE_DETAIL_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "yaml-cpp/node/detail/node.h"
-#include "yaml-cpp/node/detail/node_data.h"
+#ifndef NODE_DETAIL_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_DETAIL_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/node/detail/node.h"
+#include "yaml-cpp/node/detail/node_data.h"
#include <type_traits>
-
-namespace YAML {
-namespace detail {
-template <typename Key, typename Enable = void>
-struct get_idx {
- static node* get(const std::vector<node*>& /* sequence */,
- const Key& /* key */, shared_memory_holder /* pMemory */) {
- return 0;
- }
-};
-
-template <typename Key>
+
+namespace YAML {
+namespace detail {
+template <typename Key, typename Enable = void>
+struct get_idx {
+ static node* get(const std::vector<node*>& /* sequence */,
+ const Key& /* key */, shared_memory_holder /* pMemory */) {
+ return 0;
+ }
+};
+
+template <typename Key>
struct get_idx<Key,
typename std::enable_if<std::is_unsigned<Key>::value &&
!std::is_same<Key, bool>::value>::type> {
- static node* get(const std::vector<node*>& sequence, const Key& key,
- shared_memory_holder /* pMemory */) {
- return key < sequence.size() ? sequence[key] : 0;
- }
-
- static node* get(std::vector<node*>& sequence, const Key& key,
- shared_memory_holder pMemory) {
+ static node* get(const std::vector<node*>& sequence, const Key& key,
+ shared_memory_holder /* pMemory */) {
+ return key < sequence.size() ? sequence[key] : 0;
+ }
+
+ static node* get(std::vector<node*>& sequence, const Key& key,
+ shared_memory_holder pMemory) {
if (key > sequence.size() || (key > 0 && !sequence[key-1]->is_defined()))
- return 0;
- if (key == sequence.size())
- sequence.push_back(&pMemory->create_node());
- return sequence[key];
- }
-};
-
-template <typename Key>
+ return 0;
+ if (key == sequence.size())
+ sequence.push_back(&pMemory->create_node());
+ return sequence[key];
+ }
+};
+
+template <typename Key>
struct get_idx<Key, typename std::enable_if<std::is_signed<Key>::value>::type> {
- static node* get(const std::vector<node*>& sequence, const Key& key,
- shared_memory_holder pMemory) {
- return key >= 0 ? get_idx<std::size_t>::get(
- sequence, static_cast<std::size_t>(key), pMemory)
- : 0;
- }
- static node* get(std::vector<node*>& sequence, const Key& key,
- shared_memory_holder pMemory) {
- return key >= 0 ? get_idx<std::size_t>::get(
- sequence, static_cast<std::size_t>(key), pMemory)
- : 0;
- }
-};
-
-template <typename T>
-inline bool node::equals(const T& rhs, shared_memory_holder pMemory) {
- T lhs;
- if (convert<T>::decode(Node(*this, pMemory), lhs)) {
- return lhs == rhs;
- }
- return false;
-}
-
-inline bool node::equals(const char* rhs, shared_memory_holder pMemory) {
- return equals<std::string>(rhs, pMemory);
-}
-
-// indexing
-template <typename Key>
-inline node* node_data::get(const Key& key,
- shared_memory_holder pMemory) const {
- switch (m_type) {
- case NodeType::Map:
- break;
- case NodeType::Undefined:
- case NodeType::Null:
- return NULL;
- case NodeType::Sequence:
- if (node* pNode = get_idx<Key>::get(m_sequence, key, pMemory))
- return pNode;
- return NULL;
- case NodeType::Scalar:
- throw BadSubscript();
- }
-
- for (node_map::const_iterator it = m_map.begin(); it != m_map.end(); ++it) {
- if (it->first->equals(key, pMemory)) {
- return it->second;
- }
- }
-
- return NULL;
-}
-
-template <typename Key>
-inline node& node_data::get(const Key& key, shared_memory_holder pMemory) {
- switch (m_type) {
- case NodeType::Map:
- break;
- case NodeType::Undefined:
- case NodeType::Null:
- case NodeType::Sequence:
- if (node* pNode = get_idx<Key>::get(m_sequence, key, pMemory)) {
- m_type = NodeType::Sequence;
- return *pNode;
- }
-
- convert_to_map(pMemory);
- break;
- case NodeType::Scalar:
- throw BadSubscript();
- }
-
- for (node_map::const_iterator it = m_map.begin(); it != m_map.end(); ++it) {
- if (it->first->equals(key, pMemory)) {
- return *it->second;
- }
- }
-
- node& k = convert_to_node(key, pMemory);
- node& v = pMemory->create_node();
- insert_map_pair(k, v);
- return v;
-}
-
-template <typename Key>
-inline bool node_data::remove(const Key& key, shared_memory_holder pMemory) {
- if (m_type != NodeType::Map)
- return false;
-
+ static node* get(const std::vector<node*>& sequence, const Key& key,
+ shared_memory_holder pMemory) {
+ return key >= 0 ? get_idx<std::size_t>::get(
+ sequence, static_cast<std::size_t>(key), pMemory)
+ : 0;
+ }
+ static node* get(std::vector<node*>& sequence, const Key& key,
+ shared_memory_holder pMemory) {
+ return key >= 0 ? get_idx<std::size_t>::get(
+ sequence, static_cast<std::size_t>(key), pMemory)
+ : 0;
+ }
+};
+
+template <typename T>
+inline bool node::equals(const T& rhs, shared_memory_holder pMemory) {
+ T lhs;
+ if (convert<T>::decode(Node(*this, pMemory), lhs)) {
+ return lhs == rhs;
+ }
+ return false;
+}
+
+inline bool node::equals(const char* rhs, shared_memory_holder pMemory) {
+ return equals<std::string>(rhs, pMemory);
+}
+
+// indexing
+template <typename Key>
+inline node* node_data::get(const Key& key,
+ shared_memory_holder pMemory) const {
+ switch (m_type) {
+ case NodeType::Map:
+ break;
+ case NodeType::Undefined:
+ case NodeType::Null:
+ return NULL;
+ case NodeType::Sequence:
+ if (node* pNode = get_idx<Key>::get(m_sequence, key, pMemory))
+ return pNode;
+ return NULL;
+ case NodeType::Scalar:
+ throw BadSubscript();
+ }
+
+ for (node_map::const_iterator it = m_map.begin(); it != m_map.end(); ++it) {
+ if (it->first->equals(key, pMemory)) {
+ return it->second;
+ }
+ }
+
+ return NULL;
+}
+
+template <typename Key>
+inline node& node_data::get(const Key& key, shared_memory_holder pMemory) {
+ switch (m_type) {
+ case NodeType::Map:
+ break;
+ case NodeType::Undefined:
+ case NodeType::Null:
+ case NodeType::Sequence:
+ if (node* pNode = get_idx<Key>::get(m_sequence, key, pMemory)) {
+ m_type = NodeType::Sequence;
+ return *pNode;
+ }
+
+ convert_to_map(pMemory);
+ break;
+ case NodeType::Scalar:
+ throw BadSubscript();
+ }
+
+ for (node_map::const_iterator it = m_map.begin(); it != m_map.end(); ++it) {
+ if (it->first->equals(key, pMemory)) {
+ return *it->second;
+ }
+ }
+
+ node& k = convert_to_node(key, pMemory);
+ node& v = pMemory->create_node();
+ insert_map_pair(k, v);
+ return v;
+}
+
+template <typename Key>
+inline bool node_data::remove(const Key& key, shared_memory_holder pMemory) {
+ if (m_type != NodeType::Map)
+ return false;
+
for (kv_pairs::iterator it = m_undefinedPairs.begin();
it != m_undefinedPairs.end();) {
kv_pairs::iterator jt = std::next(it);
@@ -140,46 +140,46 @@ inline bool node_data::remove(const Key& key, shared_memory_holder pMemory) {
it = jt;
}
- for (node_map::iterator it = m_map.begin(); it != m_map.end(); ++it) {
- if (it->first->equals(key, pMemory)) {
- m_map.erase(it);
- return true;
- }
- }
-
- return false;
-}
-
-// map
-template <typename Key, typename Value>
-inline void node_data::force_insert(const Key& key, const Value& value,
- shared_memory_holder pMemory) {
- switch (m_type) {
- case NodeType::Map:
- break;
- case NodeType::Undefined:
- case NodeType::Null:
- case NodeType::Sequence:
- convert_to_map(pMemory);
- break;
- case NodeType::Scalar:
- throw BadInsert();
- }
-
- node& k = convert_to_node(key, pMemory);
- node& v = convert_to_node(value, pMemory);
- insert_map_pair(k, v);
-}
-
-template <typename T>
-inline node& node_data::convert_to_node(const T& rhs,
- shared_memory_holder pMemory) {
- Node value = convert<T>::encode(rhs);
- value.EnsureNodeExists();
- pMemory->merge(*value.m_pMemory);
- return *value.m_pNode;
-}
-}
-}
-
-#endif // NODE_DETAIL_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+ for (node_map::iterator it = m_map.begin(); it != m_map.end(); ++it) {
+ if (it->first->equals(key, pMemory)) {
+ m_map.erase(it);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// map
+template <typename Key, typename Value>
+inline void node_data::force_insert(const Key& key, const Value& value,
+ shared_memory_holder pMemory) {
+ switch (m_type) {
+ case NodeType::Map:
+ break;
+ case NodeType::Undefined:
+ case NodeType::Null:
+ case NodeType::Sequence:
+ convert_to_map(pMemory);
+ break;
+ case NodeType::Scalar:
+ throw BadInsert();
+ }
+
+ node& k = convert_to_node(key, pMemory);
+ node& v = convert_to_node(value, pMemory);
+ insert_map_pair(k, v);
+}
+
+template <typename T>
+inline node& node_data::convert_to_node(const T& rhs,
+ shared_memory_holder pMemory) {
+ Node value = convert<T>::encode(rhs);
+ value.EnsureNodeExists();
+ pMemory->merge(*value.m_pMemory);
+ return *value.m_pNode;
+}
+}
+}
+
+#endif // NODE_DETAIL_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/iterator.h b/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/iterator.h
index 2dd31391dd..deec8fb62c 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/iterator.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/iterator.h
@@ -1,33 +1,33 @@
-#ifndef VALUE_DETAIL_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define VALUE_DETAIL_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "yaml-cpp/dll.h"
+#ifndef VALUE_DETAIL_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_DETAIL_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
#include "yaml-cpp/node/node.h"
-#include "yaml-cpp/node/ptr.h"
-#include "yaml-cpp/node/detail/node_iterator.h"
+#include "yaml-cpp/node/ptr.h"
+#include "yaml-cpp/node/detail/node_iterator.h"
#include <cstddef>
#include <iterator>
-
-namespace YAML {
-namespace detail {
-struct iterator_value;
-
-template <typename V>
+
+namespace YAML {
+namespace detail {
+struct iterator_value;
+
+template <typename V>
class iterator_base : public std::iterator<std::forward_iterator_tag, V,
std::ptrdiff_t, V*, V> {
- private:
- template <typename>
- friend class iterator_base;
- struct enabler {};
+ private:
+ template <typename>
+ friend class iterator_base;
+ struct enabler {};
typedef node_iterator base_type;
-
+
struct proxy {
explicit proxy(const V& x) : m_ref(x) {}
V* operator->() { return std::addressof(m_ref); }
@@ -36,31 +36,31 @@ class iterator_base : public std::iterator<std::forward_iterator_tag, V,
V m_ref;
};
- public:
- typedef typename iterator_base::value_type value_type;
-
- public:
+ public:
+ typedef typename iterator_base::value_type value_type;
+
+ public:
iterator_base() : m_iterator(), m_pMemory() {}
- explicit iterator_base(base_type rhs, shared_memory_holder pMemory)
+ explicit iterator_base(base_type rhs, shared_memory_holder pMemory)
: m_iterator(rhs), m_pMemory(pMemory) {}
-
- template <class W>
- iterator_base(const iterator_base<W>& rhs,
+
+ template <class W>
+ iterator_base(const iterator_base<W>& rhs,
typename std::enable_if<std::is_convertible<W*, V*>::value,
enabler>::type = enabler())
: m_iterator(rhs.m_iterator), m_pMemory(rhs.m_pMemory) {}
-
+
iterator_base<V>& operator++() {
++m_iterator;
return *this;
}
-
+
iterator_base<V> operator++(int) {
iterator_base<V> iterator_pre(*this);
++(*this);
return iterator_pre;
}
-
+
template <typename W>
bool operator==(const iterator_base<W>& rhs) const {
return m_iterator == rhs.m_iterator;
@@ -73,20 +73,20 @@ class iterator_base : public std::iterator<std::forward_iterator_tag, V,
value_type operator*() const {
const typename base_type::value_type& v = *m_iterator;
- if (v.pNode)
- return value_type(Node(*v, m_pMemory));
- if (v.first && v.second)
- return value_type(Node(*v.first, m_pMemory), Node(*v.second, m_pMemory));
- return value_type();
- }
-
+ if (v.pNode)
+ return value_type(Node(*v, m_pMemory));
+ if (v.first && v.second)
+ return value_type(Node(*v.first, m_pMemory), Node(*v.second, m_pMemory));
+ return value_type();
+ }
+
proxy operator->() const { return proxy(**this); }
- private:
+ private:
base_type m_iterator;
- shared_memory_holder m_pMemory;
-};
-}
-}
-
-#endif // VALUE_DETAIL_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+ shared_memory_holder m_pMemory;
+};
+}
+}
+
+#endif // VALUE_DETAIL_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/iterator_fwd.h b/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/iterator_fwd.h
index 4a1490ecc6..5f1ffe7436 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/iterator_fwd.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/iterator_fwd.h
@@ -1,27 +1,27 @@
-#ifndef VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "yaml-cpp/dll.h"
-#include <list>
-#include <utility>
-#include <vector>
-
-namespace YAML {
-
-namespace detail {
-struct iterator_value;
-template <typename V>
-class iterator_base;
-}
-
-typedef detail::iterator_base<detail::iterator_value> iterator;
-typedef detail::iterator_base<const detail::iterator_value> const_iterator;
-}
-
-#endif // VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+#include <list>
+#include <utility>
+#include <vector>
+
+namespace YAML {
+
+namespace detail {
+struct iterator_value;
+template <typename V>
+class iterator_base;
+}
+
+typedef detail::iterator_base<detail::iterator_value> iterator;
+typedef detail::iterator_base<const detail::iterator_value> const_iterator;
+}
+
+#endif // VALUE_DETAIL_ITERATOR_FWD_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/memory.h b/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/memory.h
index bdd190bc4c..8f2bc2657a 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/memory.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/memory.h
@@ -1,46 +1,46 @@
-#ifndef VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <set>
-
-#include "yaml-cpp/dll.h"
-#include "yaml-cpp/node/ptr.h"
-
-namespace YAML {
-namespace detail {
-class node;
-} // namespace detail
-} // namespace YAML
-
-namespace YAML {
-namespace detail {
-class YAML_CPP_API memory {
- public:
- node& create_node();
- void merge(const memory& rhs);
-
- private:
- typedef std::set<shared_node> Nodes;
- Nodes m_nodes;
-};
-
-class YAML_CPP_API memory_holder {
- public:
- memory_holder() : m_pMemory(new memory) {}
-
- node& create_node() { return m_pMemory->create_node(); }
- void merge(memory_holder& rhs);
-
- private:
- shared_memory m_pMemory;
-};
-}
-}
-
-#endif // VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <set>
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/ptr.h"
+
+namespace YAML {
+namespace detail {
+class node;
+} // namespace detail
+} // namespace YAML
+
+namespace YAML {
+namespace detail {
+class YAML_CPP_API memory {
+ public:
+ node& create_node();
+ void merge(const memory& rhs);
+
+ private:
+ typedef std::set<shared_node> Nodes;
+ Nodes m_nodes;
+};
+
+class YAML_CPP_API memory_holder {
+ public:
+ memory_holder() : m_pMemory(new memory) {}
+
+ node& create_node() { return m_pMemory->create_node(); }
+ void merge(memory_holder& rhs);
+
+ private:
+ shared_memory m_pMemory;
+};
+}
+}
+
+#endif // VALUE_DETAIL_MEMORY_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node.h b/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node.h
index e7a77ef671..8a776f62a9 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node.h
@@ -1,169 +1,169 @@
-#ifndef NODE_DETAIL_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define NODE_DETAIL_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "yaml-cpp/emitterstyle.h"
-#include "yaml-cpp/dll.h"
-#include "yaml-cpp/node/type.h"
-#include "yaml-cpp/node/ptr.h"
-#include "yaml-cpp/node/detail/node_ref.h"
-#include <set>
-
-namespace YAML {
-namespace detail {
+#ifndef NODE_DETAIL_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_DETAIL_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/emitterstyle.h"
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/type.h"
+#include "yaml-cpp/node/ptr.h"
+#include "yaml-cpp/node/detail/node_ref.h"
+#include <set>
+
+namespace YAML {
+namespace detail {
class node {
- public:
- node() : m_pRef(new node_ref) {}
+ public:
+ node() : m_pRef(new node_ref) {}
node(const node&) = delete;
node& operator=(const node&) = delete;
-
- bool is(const node& rhs) const { return m_pRef == rhs.m_pRef; }
- const node_ref* ref() const { return m_pRef.get(); }
-
- bool is_defined() const { return m_pRef->is_defined(); }
- const Mark& mark() const { return m_pRef->mark(); }
- NodeType::value type() const { return m_pRef->type(); }
-
- const std::string& scalar() const { return m_pRef->scalar(); }
- const std::string& tag() const { return m_pRef->tag(); }
- EmitterStyle::value style() const { return m_pRef->style(); }
-
- template <typename T>
- bool equals(const T& rhs, shared_memory_holder pMemory);
- bool equals(const char* rhs, shared_memory_holder pMemory);
-
- void mark_defined() {
- if (is_defined())
- return;
-
- m_pRef->mark_defined();
- for (nodes::iterator it = m_dependencies.begin();
- it != m_dependencies.end(); ++it)
- (*it)->mark_defined();
- m_dependencies.clear();
- }
-
- void add_dependency(node& rhs) {
- if (is_defined())
- rhs.mark_defined();
- else
- m_dependencies.insert(&rhs);
- }
-
- void set_ref(const node& rhs) {
- if (rhs.is_defined())
- mark_defined();
- m_pRef = rhs.m_pRef;
- }
- void set_data(const node& rhs) {
- if (rhs.is_defined())
- mark_defined();
- m_pRef->set_data(*rhs.m_pRef);
- }
-
+
+ bool is(const node& rhs) const { return m_pRef == rhs.m_pRef; }
+ const node_ref* ref() const { return m_pRef.get(); }
+
+ bool is_defined() const { return m_pRef->is_defined(); }
+ const Mark& mark() const { return m_pRef->mark(); }
+ NodeType::value type() const { return m_pRef->type(); }
+
+ const std::string& scalar() const { return m_pRef->scalar(); }
+ const std::string& tag() const { return m_pRef->tag(); }
+ EmitterStyle::value style() const { return m_pRef->style(); }
+
+ template <typename T>
+ bool equals(const T& rhs, shared_memory_holder pMemory);
+ bool equals(const char* rhs, shared_memory_holder pMemory);
+
+ void mark_defined() {
+ if (is_defined())
+ return;
+
+ m_pRef->mark_defined();
+ for (nodes::iterator it = m_dependencies.begin();
+ it != m_dependencies.end(); ++it)
+ (*it)->mark_defined();
+ m_dependencies.clear();
+ }
+
+ void add_dependency(node& rhs) {
+ if (is_defined())
+ rhs.mark_defined();
+ else
+ m_dependencies.insert(&rhs);
+ }
+
+ void set_ref(const node& rhs) {
+ if (rhs.is_defined())
+ mark_defined();
+ m_pRef = rhs.m_pRef;
+ }
+ void set_data(const node& rhs) {
+ if (rhs.is_defined())
+ mark_defined();
+ m_pRef->set_data(*rhs.m_pRef);
+ }
+
void set_mark(const Mark& mark) { m_pRef->set_mark(mark); }
-
- void set_type(NodeType::value type) {
- if (type != NodeType::Undefined)
- mark_defined();
- m_pRef->set_type(type);
- }
- void set_null() {
- mark_defined();
- m_pRef->set_null();
- }
- void set_scalar(const std::string& scalar) {
- mark_defined();
- m_pRef->set_scalar(scalar);
- }
- void set_tag(const std::string& tag) {
- mark_defined();
- m_pRef->set_tag(tag);
- }
-
- // style
- void set_style(EmitterStyle::value style) {
- mark_defined();
- m_pRef->set_style(style);
- }
-
- // size/iterator
- std::size_t size() const { return m_pRef->size(); }
-
- const_node_iterator begin() const {
- return static_cast<const node_ref&>(*m_pRef).begin();
- }
- node_iterator begin() { return m_pRef->begin(); }
-
- const_node_iterator end() const {
- return static_cast<const node_ref&>(*m_pRef).end();
- }
- node_iterator end() { return m_pRef->end(); }
-
- // sequence
+
+ void set_type(NodeType::value type) {
+ if (type != NodeType::Undefined)
+ mark_defined();
+ m_pRef->set_type(type);
+ }
+ void set_null() {
+ mark_defined();
+ m_pRef->set_null();
+ }
+ void set_scalar(const std::string& scalar) {
+ mark_defined();
+ m_pRef->set_scalar(scalar);
+ }
+ void set_tag(const std::string& tag) {
+ mark_defined();
+ m_pRef->set_tag(tag);
+ }
+
+ // style
+ void set_style(EmitterStyle::value style) {
+ mark_defined();
+ m_pRef->set_style(style);
+ }
+
+ // size/iterator
+ std::size_t size() const { return m_pRef->size(); }
+
+ const_node_iterator begin() const {
+ return static_cast<const node_ref&>(*m_pRef).begin();
+ }
+ node_iterator begin() { return m_pRef->begin(); }
+
+ const_node_iterator end() const {
+ return static_cast<const node_ref&>(*m_pRef).end();
+ }
+ node_iterator end() { return m_pRef->end(); }
+
+ // sequence
void push_back(node& input, shared_memory_holder pMemory) {
m_pRef->push_back(input, pMemory);
input.add_dependency(*this);
- }
- void insert(node& key, node& value, shared_memory_holder pMemory) {
- m_pRef->insert(key, value, pMemory);
- key.add_dependency(*this);
- value.add_dependency(*this);
- }
-
- // indexing
- template <typename Key>
- node* get(const Key& key, shared_memory_holder pMemory) const {
- // NOTE: this returns a non-const node so that the top-level Node can wrap
- // it, and returns a pointer so that it can be NULL (if there is no such
- // key).
- return static_cast<const node_ref&>(*m_pRef).get(key, pMemory);
- }
- template <typename Key>
- node& get(const Key& key, shared_memory_holder pMemory) {
- node& value = m_pRef->get(key, pMemory);
- value.add_dependency(*this);
- return value;
- }
- template <typename Key>
- bool remove(const Key& key, shared_memory_holder pMemory) {
- return m_pRef->remove(key, pMemory);
- }
-
- node* get(node& key, shared_memory_holder pMemory) const {
- // NOTE: this returns a non-const node so that the top-level Node can wrap
- // it, and returns a pointer so that it can be NULL (if there is no such
- // key).
- return static_cast<const node_ref&>(*m_pRef).get(key, pMemory);
- }
- node& get(node& key, shared_memory_holder pMemory) {
- node& value = m_pRef->get(key, pMemory);
- key.add_dependency(*this);
- value.add_dependency(*this);
- return value;
- }
- bool remove(node& key, shared_memory_holder pMemory) {
- return m_pRef->remove(key, pMemory);
- }
-
- // map
- template <typename Key, typename Value>
- void force_insert(const Key& key, const Value& value,
- shared_memory_holder pMemory) {
- m_pRef->force_insert(key, value, pMemory);
- }
-
- private:
- shared_node_ref m_pRef;
- typedef std::set<node*> nodes;
- nodes m_dependencies;
-};
-}
-}
-
-#endif // NODE_DETAIL_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+ }
+ void insert(node& key, node& value, shared_memory_holder pMemory) {
+ m_pRef->insert(key, value, pMemory);
+ key.add_dependency(*this);
+ value.add_dependency(*this);
+ }
+
+ // indexing
+ template <typename Key>
+ node* get(const Key& key, shared_memory_holder pMemory) const {
+ // NOTE: this returns a non-const node so that the top-level Node can wrap
+ // it, and returns a pointer so that it can be NULL (if there is no such
+ // key).
+ return static_cast<const node_ref&>(*m_pRef).get(key, pMemory);
+ }
+ template <typename Key>
+ node& get(const Key& key, shared_memory_holder pMemory) {
+ node& value = m_pRef->get(key, pMemory);
+ value.add_dependency(*this);
+ return value;
+ }
+ template <typename Key>
+ bool remove(const Key& key, shared_memory_holder pMemory) {
+ return m_pRef->remove(key, pMemory);
+ }
+
+ node* get(node& key, shared_memory_holder pMemory) const {
+ // NOTE: this returns a non-const node so that the top-level Node can wrap
+ // it, and returns a pointer so that it can be NULL (if there is no such
+ // key).
+ return static_cast<const node_ref&>(*m_pRef).get(key, pMemory);
+ }
+ node& get(node& key, shared_memory_holder pMemory) {
+ node& value = m_pRef->get(key, pMemory);
+ key.add_dependency(*this);
+ value.add_dependency(*this);
+ return value;
+ }
+ bool remove(node& key, shared_memory_holder pMemory) {
+ return m_pRef->remove(key, pMemory);
+ }
+
+ // map
+ template <typename Key, typename Value>
+ void force_insert(const Key& key, const Value& value,
+ shared_memory_holder pMemory) {
+ m_pRef->force_insert(key, value, pMemory);
+ }
+
+ private:
+ shared_node_ref m_pRef;
+ typedef std::set<node*> nodes;
+ nodes m_dependencies;
+};
+}
+}
+
+#endif // NODE_DETAIL_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node_data.h b/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node_data.h
index e3001efae2..50bcd74352 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node_data.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node_data.h
@@ -1,127 +1,127 @@
-#ifndef VALUE_DETAIL_NODE_DATA_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define VALUE_DETAIL_NODE_DATA_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <list>
-#include <map>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "yaml-cpp/dll.h"
-#include "yaml-cpp/node/detail/node_iterator.h"
-#include "yaml-cpp/node/iterator.h"
-#include "yaml-cpp/node/ptr.h"
-#include "yaml-cpp/node/type.h"
-
-namespace YAML {
-namespace detail {
-class node;
-} // namespace detail
-} // namespace YAML
-
-namespace YAML {
-namespace detail {
+#ifndef VALUE_DETAIL_NODE_DATA_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_DETAIL_NODE_DATA_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <list>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/detail/node_iterator.h"
+#include "yaml-cpp/node/iterator.h"
+#include "yaml-cpp/node/ptr.h"
+#include "yaml-cpp/node/type.h"
+
+namespace YAML {
+namespace detail {
+class node;
+} // namespace detail
+} // namespace YAML
+
+namespace YAML {
+namespace detail {
class YAML_CPP_API node_data {
- public:
- node_data();
+ public:
+ node_data();
node_data(const node_data&) = delete;
node_data& operator=(const node_data&) = delete;
-
- void mark_defined();
- void set_mark(const Mark& mark);
- void set_type(NodeType::value type);
- void set_tag(const std::string& tag);
- void set_null();
- void set_scalar(const std::string& scalar);
- void set_style(EmitterStyle::value style);
-
- bool is_defined() const { return m_isDefined; }
- const Mark& mark() const { return m_mark; }
- NodeType::value type() const {
- return m_isDefined ? m_type : NodeType::Undefined;
- }
- const std::string& scalar() const { return m_scalar; }
- const std::string& tag() const { return m_tag; }
- EmitterStyle::value style() const { return m_style; }
-
- // size/iterator
- std::size_t size() const;
-
- const_node_iterator begin() const;
- node_iterator begin();
-
- const_node_iterator end() const;
- node_iterator end();
-
- // sequence
- void push_back(node& node, shared_memory_holder pMemory);
- void insert(node& key, node& value, shared_memory_holder pMemory);
-
- // indexing
- template <typename Key>
- node* get(const Key& key, shared_memory_holder pMemory) const;
- template <typename Key>
- node& get(const Key& key, shared_memory_holder pMemory);
- template <typename Key>
- bool remove(const Key& key, shared_memory_holder pMemory);
-
- node* get(node& key, shared_memory_holder pMemory) const;
- node& get(node& key, shared_memory_holder pMemory);
- bool remove(node& key, shared_memory_holder pMemory);
-
- // map
- template <typename Key, typename Value>
- void force_insert(const Key& key, const Value& value,
- shared_memory_holder pMemory);
-
- public:
- static std::string empty_scalar;
-
- private:
- void compute_seq_size() const;
- void compute_map_size() const;
-
- void reset_sequence();
- void reset_map();
-
- void insert_map_pair(node& key, node& value);
- void convert_to_map(shared_memory_holder pMemory);
- void convert_sequence_to_map(shared_memory_holder pMemory);
-
- template <typename T>
- static node& convert_to_node(const T& rhs, shared_memory_holder pMemory);
-
- private:
- bool m_isDefined;
- Mark m_mark;
- NodeType::value m_type;
- std::string m_tag;
- EmitterStyle::value m_style;
-
- // scalar
- std::string m_scalar;
-
- // sequence
- typedef std::vector<node*> node_seq;
- node_seq m_sequence;
-
- mutable std::size_t m_seqSize;
-
- // map
+
+ void mark_defined();
+ void set_mark(const Mark& mark);
+ void set_type(NodeType::value type);
+ void set_tag(const std::string& tag);
+ void set_null();
+ void set_scalar(const std::string& scalar);
+ void set_style(EmitterStyle::value style);
+
+ bool is_defined() const { return m_isDefined; }
+ const Mark& mark() const { return m_mark; }
+ NodeType::value type() const {
+ return m_isDefined ? m_type : NodeType::Undefined;
+ }
+ const std::string& scalar() const { return m_scalar; }
+ const std::string& tag() const { return m_tag; }
+ EmitterStyle::value style() const { return m_style; }
+
+ // size/iterator
+ std::size_t size() const;
+
+ const_node_iterator begin() const;
+ node_iterator begin();
+
+ const_node_iterator end() const;
+ node_iterator end();
+
+ // sequence
+ void push_back(node& node, shared_memory_holder pMemory);
+ void insert(node& key, node& value, shared_memory_holder pMemory);
+
+ // indexing
+ template <typename Key>
+ node* get(const Key& key, shared_memory_holder pMemory) const;
+ template <typename Key>
+ node& get(const Key& key, shared_memory_holder pMemory);
+ template <typename Key>
+ bool remove(const Key& key, shared_memory_holder pMemory);
+
+ node* get(node& key, shared_memory_holder pMemory) const;
+ node& get(node& key, shared_memory_holder pMemory);
+ bool remove(node& key, shared_memory_holder pMemory);
+
+ // map
+ template <typename Key, typename Value>
+ void force_insert(const Key& key, const Value& value,
+ shared_memory_holder pMemory);
+
+ public:
+ static std::string empty_scalar;
+
+ private:
+ void compute_seq_size() const;
+ void compute_map_size() const;
+
+ void reset_sequence();
+ void reset_map();
+
+ void insert_map_pair(node& key, node& value);
+ void convert_to_map(shared_memory_holder pMemory);
+ void convert_sequence_to_map(shared_memory_holder pMemory);
+
+ template <typename T>
+ static node& convert_to_node(const T& rhs, shared_memory_holder pMemory);
+
+ private:
+ bool m_isDefined;
+ Mark m_mark;
+ NodeType::value m_type;
+ std::string m_tag;
+ EmitterStyle::value m_style;
+
+ // scalar
+ std::string m_scalar;
+
+ // sequence
+ typedef std::vector<node*> node_seq;
+ node_seq m_sequence;
+
+ mutable std::size_t m_seqSize;
+
+ // map
typedef std::vector<std::pair<node*, node*>> node_map;
- node_map m_map;
-
- typedef std::pair<node*, node*> kv_pair;
- typedef std::list<kv_pair> kv_pairs;
- mutable kv_pairs m_undefinedPairs;
-};
-}
-}
-
-#endif // VALUE_DETAIL_NODE_DATA_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+ node_map m_map;
+
+ typedef std::pair<node*, node*> kv_pair;
+ typedef std::list<kv_pair> kv_pairs;
+ mutable kv_pairs m_undefinedPairs;
+};
+}
+}
+
+#endif // VALUE_DETAIL_NODE_DATA_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node_iterator.h b/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node_iterator.h
index c97e48adc8..088090fe74 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node_iterator.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node_iterator.h
@@ -1,64 +1,64 @@
-#ifndef VALUE_DETAIL_NODE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define VALUE_DETAIL_NODE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "yaml-cpp/dll.h"
-#include "yaml-cpp/node/ptr.h"
+#ifndef VALUE_DETAIL_NODE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_DETAIL_NODE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/ptr.h"
#include <cstddef>
#include <iterator>
#include <memory>
-#include <map>
-#include <utility>
-#include <vector>
-
-namespace YAML {
-namespace detail {
-struct iterator_type {
+#include <map>
+#include <utility>
+#include <vector>
+
+namespace YAML {
+namespace detail {
+struct iterator_type {
enum value { NoneType, Sequence, Map };
-};
-
-template <typename V>
-struct node_iterator_value : public std::pair<V*, V*> {
- typedef std::pair<V*, V*> kv;
-
- node_iterator_value() : kv(), pNode(0) {}
- explicit node_iterator_value(V& rhs) : kv(), pNode(&rhs) {}
- explicit node_iterator_value(V& key, V& value) : kv(&key, &value), pNode(0) {}
-
- V& operator*() const { return *pNode; }
- V& operator->() const { return *pNode; }
-
- V* pNode;
-};
-
-typedef std::vector<node*> node_seq;
+};
+
+template <typename V>
+struct node_iterator_value : public std::pair<V*, V*> {
+ typedef std::pair<V*, V*> kv;
+
+ node_iterator_value() : kv(), pNode(0) {}
+ explicit node_iterator_value(V& rhs) : kv(), pNode(&rhs) {}
+ explicit node_iterator_value(V& key, V& value) : kv(&key, &value), pNode(0) {}
+
+ V& operator*() const { return *pNode; }
+ V& operator->() const { return *pNode; }
+
+ V* pNode;
+};
+
+typedef std::vector<node*> node_seq;
typedef std::vector<std::pair<node*, node*>> node_map;
-
-template <typename V>
-struct node_iterator_type {
- typedef node_seq::iterator seq;
- typedef node_map::iterator map;
-};
-
-template <typename V>
-struct node_iterator_type<const V> {
- typedef node_seq::const_iterator seq;
- typedef node_map::const_iterator map;
-};
-
-template <typename V>
-class node_iterator_base
+
+template <typename V>
+struct node_iterator_type {
+ typedef node_seq::iterator seq;
+ typedef node_map::iterator map;
+};
+
+template <typename V>
+struct node_iterator_type<const V> {
+ typedef node_seq::const_iterator seq;
+ typedef node_map::const_iterator map;
+};
+
+template <typename V>
+class node_iterator_base
: public std::iterator<std::forward_iterator_tag, node_iterator_value<V>,
std::ptrdiff_t, node_iterator_value<V>*,
node_iterator_value<V>> {
- private:
- struct enabler {};
-
+ private:
+ struct enabler {};
+
struct proxy {
explicit proxy(const node_iterator_value<V>& x) : m_ref(x) {}
node_iterator_value<V>* operator->() { return std::addressof(m_ref); }
@@ -67,74 +67,74 @@ class node_iterator_base
node_iterator_value<V> m_ref;
};
- public:
- typedef typename node_iterator_type<V>::seq SeqIter;
- typedef typename node_iterator_type<V>::map MapIter;
- typedef node_iterator_value<V> value_type;
-
- node_iterator_base()
+ public:
+ typedef typename node_iterator_type<V>::seq SeqIter;
+ typedef typename node_iterator_type<V>::map MapIter;
+ typedef node_iterator_value<V> value_type;
+
+ node_iterator_base()
: m_type(iterator_type::NoneType), m_seqIt(), m_mapIt(), m_mapEnd() {}
- explicit node_iterator_base(SeqIter seqIt)
- : m_type(iterator_type::Sequence),
- m_seqIt(seqIt),
- m_mapIt(),
- m_mapEnd() {}
- explicit node_iterator_base(MapIter mapIt, MapIter mapEnd)
- : m_type(iterator_type::Map),
- m_seqIt(),
- m_mapIt(mapIt),
- m_mapEnd(mapEnd) {
- m_mapIt = increment_until_defined(m_mapIt);
- }
-
- template <typename W>
- node_iterator_base(const node_iterator_base<W>& rhs,
+ explicit node_iterator_base(SeqIter seqIt)
+ : m_type(iterator_type::Sequence),
+ m_seqIt(seqIt),
+ m_mapIt(),
+ m_mapEnd() {}
+ explicit node_iterator_base(MapIter mapIt, MapIter mapEnd)
+ : m_type(iterator_type::Map),
+ m_seqIt(),
+ m_mapIt(mapIt),
+ m_mapEnd(mapEnd) {
+ m_mapIt = increment_until_defined(m_mapIt);
+ }
+
+ template <typename W>
+ node_iterator_base(const node_iterator_base<W>& rhs,
typename std::enable_if<std::is_convertible<W*, V*>::value,
enabler>::type = enabler())
- : m_type(rhs.m_type),
- m_seqIt(rhs.m_seqIt),
- m_mapIt(rhs.m_mapIt),
- m_mapEnd(rhs.m_mapEnd) {}
-
- template <typename>
- friend class node_iterator_base;
-
- template <typename W>
+ : m_type(rhs.m_type),
+ m_seqIt(rhs.m_seqIt),
+ m_mapIt(rhs.m_mapIt),
+ m_mapEnd(rhs.m_mapEnd) {}
+
+ template <typename>
+ friend class node_iterator_base;
+
+ template <typename W>
bool operator==(const node_iterator_base<W>& rhs) const {
- if (m_type != rhs.m_type)
- return false;
-
- switch (m_type) {
+ if (m_type != rhs.m_type)
+ return false;
+
+ switch (m_type) {
case iterator_type::NoneType:
- return true;
- case iterator_type::Sequence:
- return m_seqIt == rhs.m_seqIt;
- case iterator_type::Map:
- return m_mapIt == rhs.m_mapIt;
- }
- return true;
- }
-
+ return true;
+ case iterator_type::Sequence:
+ return m_seqIt == rhs.m_seqIt;
+ case iterator_type::Map:
+ return m_mapIt == rhs.m_mapIt;
+ }
+ return true;
+ }
+
template <typename W>
bool operator!=(const node_iterator_base<W>& rhs) const {
return !(*this == rhs);
}
node_iterator_base<V>& operator++() {
- switch (m_type) {
+ switch (m_type) {
case iterator_type::NoneType:
- break;
- case iterator_type::Sequence:
- ++m_seqIt;
- break;
- case iterator_type::Map:
- ++m_mapIt;
- m_mapIt = increment_until_defined(m_mapIt);
- break;
- }
+ break;
+ case iterator_type::Sequence:
+ ++m_seqIt;
+ break;
+ case iterator_type::Map:
+ ++m_mapIt;
+ m_mapIt = increment_until_defined(m_mapIt);
+ break;
+ }
return *this;
- }
-
+ }
+
node_iterator_base<V> operator++(int) {
node_iterator_base<V> iterator_pre(*this);
++(*this);
@@ -142,39 +142,39 @@ class node_iterator_base
}
value_type operator*() const {
- switch (m_type) {
+ switch (m_type) {
case iterator_type::NoneType:
- return value_type();
- case iterator_type::Sequence:
- return value_type(**m_seqIt);
- case iterator_type::Map:
- return value_type(*m_mapIt->first, *m_mapIt->second);
- }
- return value_type();
- }
-
+ return value_type();
+ case iterator_type::Sequence:
+ return value_type(**m_seqIt);
+ case iterator_type::Map:
+ return value_type(*m_mapIt->first, *m_mapIt->second);
+ }
+ return value_type();
+ }
+
proxy operator->() const { return proxy(**this); }
- MapIter increment_until_defined(MapIter it) {
- while (it != m_mapEnd && !is_defined(it))
- ++it;
- return it;
- }
-
- bool is_defined(MapIter it) const {
- return it->first->is_defined() && it->second->is_defined();
- }
-
- private:
- typename iterator_type::value m_type;
-
- SeqIter m_seqIt;
- MapIter m_mapIt, m_mapEnd;
-};
-
-typedef node_iterator_base<node> node_iterator;
-typedef node_iterator_base<const node> const_node_iterator;
-}
-}
-
-#endif // VALUE_DETAIL_NODE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+ MapIter increment_until_defined(MapIter it) {
+ while (it != m_mapEnd && !is_defined(it))
+ ++it;
+ return it;
+ }
+
+ bool is_defined(MapIter it) const {
+ return it->first->is_defined() && it->second->is_defined();
+ }
+
+ private:
+ typename iterator_type::value m_type;
+
+ SeqIter m_seqIt;
+ MapIter m_mapIt, m_mapEnd;
+};
+
+typedef node_iterator_base<node> node_iterator;
+typedef node_iterator_base<const node> const_node_iterator;
+}
+}
+
+#endif // VALUE_DETAIL_NODE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node_ref.h b/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node_ref.h
index 6b0f3dae89..d8a94f8b80 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node_ref.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/node/detail/node_ref.h
@@ -1,98 +1,98 @@
-#ifndef VALUE_DETAIL_NODE_REF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define VALUE_DETAIL_NODE_REF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "yaml-cpp/dll.h"
-#include "yaml-cpp/node/type.h"
-#include "yaml-cpp/node/ptr.h"
-#include "yaml-cpp/node/detail/node_data.h"
-
-namespace YAML {
-namespace detail {
+#ifndef VALUE_DETAIL_NODE_REF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_DETAIL_NODE_REF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/type.h"
+#include "yaml-cpp/node/ptr.h"
+#include "yaml-cpp/node/detail/node_data.h"
+
+namespace YAML {
+namespace detail {
class node_ref {
- public:
- node_ref() : m_pData(new node_data) {}
+ public:
+ node_ref() : m_pData(new node_data) {}
node_ref(const node_ref&) = delete;
node_ref& operator=(const node_ref&) = delete;
-
- bool is_defined() const { return m_pData->is_defined(); }
- const Mark& mark() const { return m_pData->mark(); }
- NodeType::value type() const { return m_pData->type(); }
- const std::string& scalar() const { return m_pData->scalar(); }
- const std::string& tag() const { return m_pData->tag(); }
- EmitterStyle::value style() const { return m_pData->style(); }
-
- void mark_defined() { m_pData->mark_defined(); }
- void set_data(const node_ref& rhs) { m_pData = rhs.m_pData; }
-
- void set_mark(const Mark& mark) { m_pData->set_mark(mark); }
- void set_type(NodeType::value type) { m_pData->set_type(type); }
- void set_tag(const std::string& tag) { m_pData->set_tag(tag); }
- void set_null() { m_pData->set_null(); }
- void set_scalar(const std::string& scalar) { m_pData->set_scalar(scalar); }
- void set_style(EmitterStyle::value style) { m_pData->set_style(style); }
-
- // size/iterator
- std::size_t size() const { return m_pData->size(); }
-
- const_node_iterator begin() const {
- return static_cast<const node_data&>(*m_pData).begin();
- }
- node_iterator begin() { return m_pData->begin(); }
-
- const_node_iterator end() const {
- return static_cast<const node_data&>(*m_pData).end();
- }
- node_iterator end() { return m_pData->end(); }
-
- // sequence
- void push_back(node& node, shared_memory_holder pMemory) {
- m_pData->push_back(node, pMemory);
- }
- void insert(node& key, node& value, shared_memory_holder pMemory) {
- m_pData->insert(key, value, pMemory);
- }
-
- // indexing
- template <typename Key>
- node* get(const Key& key, shared_memory_holder pMemory) const {
- return static_cast<const node_data&>(*m_pData).get(key, pMemory);
- }
- template <typename Key>
- node& get(const Key& key, shared_memory_holder pMemory) {
- return m_pData->get(key, pMemory);
- }
- template <typename Key>
- bool remove(const Key& key, shared_memory_holder pMemory) {
- return m_pData->remove(key, pMemory);
- }
-
- node* get(node& key, shared_memory_holder pMemory) const {
- return static_cast<const node_data&>(*m_pData).get(key, pMemory);
- }
- node& get(node& key, shared_memory_holder pMemory) {
- return m_pData->get(key, pMemory);
- }
- bool remove(node& key, shared_memory_holder pMemory) {
- return m_pData->remove(key, pMemory);
- }
-
- // map
- template <typename Key, typename Value>
- void force_insert(const Key& key, const Value& value,
- shared_memory_holder pMemory) {
- m_pData->force_insert(key, value, pMemory);
- }
-
- private:
- shared_node_data m_pData;
-};
-}
-}
-
-#endif // VALUE_DETAIL_NODE_REF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+ bool is_defined() const { return m_pData->is_defined(); }
+ const Mark& mark() const { return m_pData->mark(); }
+ NodeType::value type() const { return m_pData->type(); }
+ const std::string& scalar() const { return m_pData->scalar(); }
+ const std::string& tag() const { return m_pData->tag(); }
+ EmitterStyle::value style() const { return m_pData->style(); }
+
+ void mark_defined() { m_pData->mark_defined(); }
+ void set_data(const node_ref& rhs) { m_pData = rhs.m_pData; }
+
+ void set_mark(const Mark& mark) { m_pData->set_mark(mark); }
+ void set_type(NodeType::value type) { m_pData->set_type(type); }
+ void set_tag(const std::string& tag) { m_pData->set_tag(tag); }
+ void set_null() { m_pData->set_null(); }
+ void set_scalar(const std::string& scalar) { m_pData->set_scalar(scalar); }
+ void set_style(EmitterStyle::value style) { m_pData->set_style(style); }
+
+ // size/iterator
+ std::size_t size() const { return m_pData->size(); }
+
+ const_node_iterator begin() const {
+ return static_cast<const node_data&>(*m_pData).begin();
+ }
+ node_iterator begin() { return m_pData->begin(); }
+
+ const_node_iterator end() const {
+ return static_cast<const node_data&>(*m_pData).end();
+ }
+ node_iterator end() { return m_pData->end(); }
+
+ // sequence
+ void push_back(node& node, shared_memory_holder pMemory) {
+ m_pData->push_back(node, pMemory);
+ }
+ void insert(node& key, node& value, shared_memory_holder pMemory) {
+ m_pData->insert(key, value, pMemory);
+ }
+
+ // indexing
+ template <typename Key>
+ node* get(const Key& key, shared_memory_holder pMemory) const {
+ return static_cast<const node_data&>(*m_pData).get(key, pMemory);
+ }
+ template <typename Key>
+ node& get(const Key& key, shared_memory_holder pMemory) {
+ return m_pData->get(key, pMemory);
+ }
+ template <typename Key>
+ bool remove(const Key& key, shared_memory_holder pMemory) {
+ return m_pData->remove(key, pMemory);
+ }
+
+ node* get(node& key, shared_memory_holder pMemory) const {
+ return static_cast<const node_data&>(*m_pData).get(key, pMemory);
+ }
+ node& get(node& key, shared_memory_holder pMemory) {
+ return m_pData->get(key, pMemory);
+ }
+ bool remove(node& key, shared_memory_holder pMemory) {
+ return m_pData->remove(key, pMemory);
+ }
+
+ // map
+ template <typename Key, typename Value>
+ void force_insert(const Key& key, const Value& value,
+ shared_memory_holder pMemory) {
+ m_pData->force_insert(key, value, pMemory);
+ }
+
+ private:
+ shared_node_data m_pData;
+};
+}
+}
+
+#endif // VALUE_DETAIL_NODE_REF_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/node/emit.h b/contrib/libs/yaml-cpp/include/yaml-cpp/node/emit.h
index df61813c4e..032268c5d0 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/node/emit.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/node/emit.h
@@ -1,32 +1,32 @@
-#ifndef NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <string>
-#include <iosfwd>
-
-#include "yaml-cpp/dll.h"
-
-namespace YAML {
-class Emitter;
-class Node;
-
+#ifndef NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+#include <iosfwd>
+
+#include "yaml-cpp/dll.h"
+
+namespace YAML {
+class Emitter;
+class Node;
+
/**
* Emits the node to the given {@link Emitter}. If there is an error in writing,
* {@link Emitter#good} will return false.
*/
-YAML_CPP_API Emitter& operator<<(Emitter& out, const Node& node);
+YAML_CPP_API Emitter& operator<<(Emitter& out, const Node& node);
/** Emits the node to the given output stream. */
-YAML_CPP_API std::ostream& operator<<(std::ostream& out, const Node& node);
-
+YAML_CPP_API std::ostream& operator<<(std::ostream& out, const Node& node);
+
/** Converts the node to a YAML string. */
-YAML_CPP_API std::string Dump(const Node& node);
+YAML_CPP_API std::string Dump(const Node& node);
} // namespace YAML
-
-#endif // NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#endif // NODE_EMIT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/node/impl.h b/contrib/libs/yaml-cpp/include/yaml-cpp/node/impl.h
index fc048a9f8e..20c487a687 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/node/impl.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/node/impl.h
@@ -1,448 +1,448 @@
-#ifndef NODE_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define NODE_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "yaml-cpp/node/node.h"
-#include "yaml-cpp/node/iterator.h"
-#include "yaml-cpp/node/detail/memory.h"
-#include "yaml-cpp/node/detail/node.h"
-#include "yaml-cpp/exceptions.h"
-#include <string>
-
-namespace YAML {
-inline Node::Node() : m_isValid(true), m_pNode(NULL) {}
-
-inline Node::Node(NodeType::value type)
- : m_isValid(true),
- m_pMemory(new detail::memory_holder),
- m_pNode(&m_pMemory->create_node()) {
- m_pNode->set_type(type);
-}
-
-template <typename T>
-inline Node::Node(const T& rhs)
- : m_isValid(true),
- m_pMemory(new detail::memory_holder),
- m_pNode(&m_pMemory->create_node()) {
- Assign(rhs);
-}
-
-inline Node::Node(const detail::iterator_value& rhs)
- : m_isValid(rhs.m_isValid),
- m_pMemory(rhs.m_pMemory),
- m_pNode(rhs.m_pNode) {}
-
-inline Node::Node(const Node& rhs)
- : m_isValid(rhs.m_isValid),
- m_pMemory(rhs.m_pMemory),
- m_pNode(rhs.m_pNode) {}
-
-inline Node::Node(Zombie) : m_isValid(false), m_pNode(NULL) {}
-
-inline Node::Node(detail::node& node, detail::shared_memory_holder pMemory)
- : m_isValid(true), m_pMemory(pMemory), m_pNode(&node) {}
-
-inline Node::~Node() {}
-
-inline void Node::EnsureNodeExists() const {
- if (!m_isValid)
- throw InvalidNode();
- if (!m_pNode) {
- m_pMemory.reset(new detail::memory_holder);
- m_pNode = &m_pMemory->create_node();
- m_pNode->set_null();
- }
-}
-
-inline bool Node::IsDefined() const {
- if (!m_isValid) {
- return false;
- }
- return m_pNode ? m_pNode->is_defined() : true;
-}
-
-inline Mark Node::Mark() const {
- if (!m_isValid) {
- throw InvalidNode();
- }
- return m_pNode ? m_pNode->mark() : Mark::null_mark();
-}
-
-inline NodeType::value Node::Type() const {
- if (!m_isValid)
- throw InvalidNode();
- return m_pNode ? m_pNode->type() : NodeType::Null;
-}
-
-// access
-
-// template helpers
-template <typename T, typename S>
-struct as_if {
- explicit as_if(const Node& node_) : node(node_) {}
- const Node& node;
-
- T operator()(const S& fallback) const {
- if (!node.m_pNode)
- return fallback;
-
- T t;
- if (convert<T>::decode(node, t))
- return t;
- return fallback;
- }
-};
-
-template <typename S>
-struct as_if<std::string, S> {
- explicit as_if(const Node& node_) : node(node_) {}
- const Node& node;
-
+#ifndef NODE_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/node/node.h"
+#include "yaml-cpp/node/iterator.h"
+#include "yaml-cpp/node/detail/memory.h"
+#include "yaml-cpp/node/detail/node.h"
+#include "yaml-cpp/exceptions.h"
+#include <string>
+
+namespace YAML {
+inline Node::Node() : m_isValid(true), m_pNode(NULL) {}
+
+inline Node::Node(NodeType::value type)
+ : m_isValid(true),
+ m_pMemory(new detail::memory_holder),
+ m_pNode(&m_pMemory->create_node()) {
+ m_pNode->set_type(type);
+}
+
+template <typename T>
+inline Node::Node(const T& rhs)
+ : m_isValid(true),
+ m_pMemory(new detail::memory_holder),
+ m_pNode(&m_pMemory->create_node()) {
+ Assign(rhs);
+}
+
+inline Node::Node(const detail::iterator_value& rhs)
+ : m_isValid(rhs.m_isValid),
+ m_pMemory(rhs.m_pMemory),
+ m_pNode(rhs.m_pNode) {}
+
+inline Node::Node(const Node& rhs)
+ : m_isValid(rhs.m_isValid),
+ m_pMemory(rhs.m_pMemory),
+ m_pNode(rhs.m_pNode) {}
+
+inline Node::Node(Zombie) : m_isValid(false), m_pNode(NULL) {}
+
+inline Node::Node(detail::node& node, detail::shared_memory_holder pMemory)
+ : m_isValid(true), m_pMemory(pMemory), m_pNode(&node) {}
+
+inline Node::~Node() {}
+
+inline void Node::EnsureNodeExists() const {
+ if (!m_isValid)
+ throw InvalidNode();
+ if (!m_pNode) {
+ m_pMemory.reset(new detail::memory_holder);
+ m_pNode = &m_pMemory->create_node();
+ m_pNode->set_null();
+ }
+}
+
+inline bool Node::IsDefined() const {
+ if (!m_isValid) {
+ return false;
+ }
+ return m_pNode ? m_pNode->is_defined() : true;
+}
+
+inline Mark Node::Mark() const {
+ if (!m_isValid) {
+ throw InvalidNode();
+ }
+ return m_pNode ? m_pNode->mark() : Mark::null_mark();
+}
+
+inline NodeType::value Node::Type() const {
+ if (!m_isValid)
+ throw InvalidNode();
+ return m_pNode ? m_pNode->type() : NodeType::Null;
+}
+
+// access
+
+// template helpers
+template <typename T, typename S>
+struct as_if {
+ explicit as_if(const Node& node_) : node(node_) {}
+ const Node& node;
+
+ T operator()(const S& fallback) const {
+ if (!node.m_pNode)
+ return fallback;
+
+ T t;
+ if (convert<T>::decode(node, t))
+ return t;
+ return fallback;
+ }
+};
+
+template <typename S>
+struct as_if<std::string, S> {
+ explicit as_if(const Node& node_) : node(node_) {}
+ const Node& node;
+
std::string operator()(const S& fallback) const {
- if (node.Type() != NodeType::Scalar)
- return fallback;
- return node.Scalar();
- }
-};
-
-template <typename T>
-struct as_if<T, void> {
- explicit as_if(const Node& node_) : node(node_) {}
- const Node& node;
-
+ if (node.Type() != NodeType::Scalar)
+ return fallback;
+ return node.Scalar();
+ }
+};
+
+template <typename T>
+struct as_if<T, void> {
+ explicit as_if(const Node& node_) : node(node_) {}
+ const Node& node;
+
T operator()() const {
- if (!node.m_pNode)
- throw TypedBadConversion<T>(node.Mark());
-
- T t;
- if (convert<T>::decode(node, t))
- return t;
- throw TypedBadConversion<T>(node.Mark());
- }
-};
-
-template <>
-struct as_if<std::string, void> {
- explicit as_if(const Node& node_) : node(node_) {}
- const Node& node;
-
+ if (!node.m_pNode)
+ throw TypedBadConversion<T>(node.Mark());
+
+ T t;
+ if (convert<T>::decode(node, t))
+ return t;
+ throw TypedBadConversion<T>(node.Mark());
+ }
+};
+
+template <>
+struct as_if<std::string, void> {
+ explicit as_if(const Node& node_) : node(node_) {}
+ const Node& node;
+
std::string operator()() const {
- if (node.Type() != NodeType::Scalar)
- throw TypedBadConversion<std::string>(node.Mark());
- return node.Scalar();
- }
-};
-
-// access functions
-template <typename T>
-inline T Node::as() const {
- if (!m_isValid)
- throw InvalidNode();
- return as_if<T, void>(*this)();
-}
-
-template <typename T, typename S>
-inline T Node::as(const S& fallback) const {
- if (!m_isValid)
- return fallback;
- return as_if<T, S>(*this)(fallback);
-}
-
-inline const std::string& Node::Scalar() const {
- if (!m_isValid)
- throw InvalidNode();
- return m_pNode ? m_pNode->scalar() : detail::node_data::empty_scalar;
-}
-
-inline const std::string& Node::Tag() const {
- if (!m_isValid)
- throw InvalidNode();
- return m_pNode ? m_pNode->tag() : detail::node_data::empty_scalar;
-}
-
-inline void Node::SetTag(const std::string& tag) {
- if (!m_isValid)
- throw InvalidNode();
- EnsureNodeExists();
- m_pNode->set_tag(tag);
-}
-
-inline EmitterStyle::value Node::Style() const {
- if (!m_isValid)
- throw InvalidNode();
- return m_pNode ? m_pNode->style() : EmitterStyle::Default;
-}
-
-inline void Node::SetStyle(EmitterStyle::value style) {
- if (!m_isValid)
- throw InvalidNode();
- EnsureNodeExists();
- m_pNode->set_style(style);
-}
-
-// assignment
-inline bool Node::is(const Node& rhs) const {
- if (!m_isValid || !rhs.m_isValid)
- throw InvalidNode();
- if (!m_pNode || !rhs.m_pNode)
- return false;
- return m_pNode->is(*rhs.m_pNode);
-}
-
-template <typename T>
-inline Node& Node::operator=(const T& rhs) {
- if (!m_isValid)
- throw InvalidNode();
- Assign(rhs);
- return *this;
-}
-
-inline void Node::reset(const YAML::Node& rhs) {
- if (!m_isValid || !rhs.m_isValid)
- throw InvalidNode();
- m_pMemory = rhs.m_pMemory;
- m_pNode = rhs.m_pNode;
-}
-
-template <typename T>
-inline void Node::Assign(const T& rhs) {
- if (!m_isValid)
- throw InvalidNode();
- AssignData(convert<T>::encode(rhs));
-}
-
-template <>
-inline void Node::Assign(const std::string& rhs) {
- if (!m_isValid)
- throw InvalidNode();
- EnsureNodeExists();
- m_pNode->set_scalar(rhs);
-}
-
-inline void Node::Assign(const char* rhs) {
- if (!m_isValid)
- throw InvalidNode();
- EnsureNodeExists();
- m_pNode->set_scalar(rhs);
-}
-
-inline void Node::Assign(char* rhs) {
- if (!m_isValid)
- throw InvalidNode();
- EnsureNodeExists();
- m_pNode->set_scalar(rhs);
-}
-
-inline Node& Node::operator=(const Node& rhs) {
- if (!m_isValid || !rhs.m_isValid)
- throw InvalidNode();
- if (is(rhs))
- return *this;
- AssignNode(rhs);
- return *this;
-}
-
-inline void Node::AssignData(const Node& rhs) {
- if (!m_isValid || !rhs.m_isValid)
- throw InvalidNode();
- EnsureNodeExists();
- rhs.EnsureNodeExists();
-
- m_pNode->set_data(*rhs.m_pNode);
- m_pMemory->merge(*rhs.m_pMemory);
-}
-
-inline void Node::AssignNode(const Node& rhs) {
- if (!m_isValid || !rhs.m_isValid)
- throw InvalidNode();
- rhs.EnsureNodeExists();
-
- if (!m_pNode) {
- m_pNode = rhs.m_pNode;
- m_pMemory = rhs.m_pMemory;
- return;
- }
-
- m_pNode->set_ref(*rhs.m_pNode);
- m_pMemory->merge(*rhs.m_pMemory);
- m_pNode = rhs.m_pNode;
-}
-
-// size/iterator
-inline std::size_t Node::size() const {
- if (!m_isValid)
- throw InvalidNode();
- return m_pNode ? m_pNode->size() : 0;
-}
-
-inline const_iterator Node::begin() const {
- if (!m_isValid)
- return const_iterator();
- return m_pNode ? const_iterator(m_pNode->begin(), m_pMemory)
- : const_iterator();
-}
-
-inline iterator Node::begin() {
- if (!m_isValid)
- return iterator();
- return m_pNode ? iterator(m_pNode->begin(), m_pMemory) : iterator();
-}
-
-inline const_iterator Node::end() const {
- if (!m_isValid)
- return const_iterator();
- return m_pNode ? const_iterator(m_pNode->end(), m_pMemory) : const_iterator();
-}
-
-inline iterator Node::end() {
- if (!m_isValid)
- return iterator();
- return m_pNode ? iterator(m_pNode->end(), m_pMemory) : iterator();
-}
-
-// sequence
-template <typename T>
-inline void Node::push_back(const T& rhs) {
- if (!m_isValid)
- throw InvalidNode();
- push_back(Node(rhs));
-}
-
-inline void Node::push_back(const Node& rhs) {
- if (!m_isValid || !rhs.m_isValid)
- throw InvalidNode();
- EnsureNodeExists();
- rhs.EnsureNodeExists();
-
- m_pNode->push_back(*rhs.m_pNode, m_pMemory);
- m_pMemory->merge(*rhs.m_pMemory);
-}
-
-// helpers for indexing
-namespace detail {
-template <typename T>
-struct to_value_t {
- explicit to_value_t(const T& t_) : t(t_) {}
- const T& t;
- typedef const T& return_type;
-
- const T& operator()() const { return t; }
-};
-
-template <>
-struct to_value_t<const char*> {
- explicit to_value_t(const char* t_) : t(t_) {}
- const char* t;
- typedef std::string return_type;
-
- const std::string operator()() const { return t; }
-};
-
-template <>
-struct to_value_t<char*> {
- explicit to_value_t(char* t_) : t(t_) {}
- const char* t;
- typedef std::string return_type;
-
- const std::string operator()() const { return t; }
-};
-
-template <std::size_t N>
-struct to_value_t<char[N]> {
- explicit to_value_t(const char* t_) : t(t_) {}
- const char* t;
- typedef std::string return_type;
-
- const std::string operator()() const { return t; }
-};
-
-// converts C-strings to std::strings so they can be copied
-template <typename T>
-inline typename to_value_t<T>::return_type to_value(const T& t) {
- return to_value_t<T>(t)();
-}
-}
-
-// indexing
-template <typename Key>
-inline const Node Node::operator[](const Key& key) const {
- if (!m_isValid)
- throw InvalidNode();
- EnsureNodeExists();
- detail::node* value = static_cast<const detail::node&>(*m_pNode)
- .get(detail::to_value(key), m_pMemory);
- if (!value) {
- return Node(ZombieNode);
- }
- return Node(*value, m_pMemory);
-}
-
-template <typename Key>
-inline Node Node::operator[](const Key& key) {
- if (!m_isValid)
- throw InvalidNode();
- EnsureNodeExists();
- detail::node& value = m_pNode->get(detail::to_value(key), m_pMemory);
- return Node(value, m_pMemory);
-}
-
-template <typename Key>
-inline bool Node::remove(const Key& key) {
- if (!m_isValid)
- throw InvalidNode();
- EnsureNodeExists();
- return m_pNode->remove(detail::to_value(key), m_pMemory);
-}
-
-inline const Node Node::operator[](const Node& key) const {
- if (!m_isValid || !key.m_isValid)
- throw InvalidNode();
- EnsureNodeExists();
- key.EnsureNodeExists();
- m_pMemory->merge(*key.m_pMemory);
- detail::node* value =
- static_cast<const detail::node&>(*m_pNode).get(*key.m_pNode, m_pMemory);
- if (!value) {
- return Node(ZombieNode);
- }
- return Node(*value, m_pMemory);
-}
-
-inline Node Node::operator[](const Node& key) {
- if (!m_isValid || !key.m_isValid)
- throw InvalidNode();
- EnsureNodeExists();
- key.EnsureNodeExists();
- m_pMemory->merge(*key.m_pMemory);
- detail::node& value = m_pNode->get(*key.m_pNode, m_pMemory);
- return Node(value, m_pMemory);
-}
-
-inline bool Node::remove(const Node& key) {
- if (!m_isValid || !key.m_isValid)
- throw InvalidNode();
- EnsureNodeExists();
- key.EnsureNodeExists();
- return m_pNode->remove(*key.m_pNode, m_pMemory);
-}
-
-// map
-template <typename Key, typename Value>
-inline void Node::force_insert(const Key& key, const Value& value) {
- if (!m_isValid)
- throw InvalidNode();
- EnsureNodeExists();
- m_pNode->force_insert(detail::to_value(key), detail::to_value(value),
- m_pMemory);
-}
-
-// free functions
-inline bool operator==(const Node& lhs, const Node& rhs) { return lhs.is(rhs); }
-}
-
-#endif // NODE_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+ if (node.Type() != NodeType::Scalar)
+ throw TypedBadConversion<std::string>(node.Mark());
+ return node.Scalar();
+ }
+};
+
+// access functions
+template <typename T>
+inline T Node::as() const {
+ if (!m_isValid)
+ throw InvalidNode();
+ return as_if<T, void>(*this)();
+}
+
+template <typename T, typename S>
+inline T Node::as(const S& fallback) const {
+ if (!m_isValid)
+ return fallback;
+ return as_if<T, S>(*this)(fallback);
+}
+
+inline const std::string& Node::Scalar() const {
+ if (!m_isValid)
+ throw InvalidNode();
+ return m_pNode ? m_pNode->scalar() : detail::node_data::empty_scalar;
+}
+
+inline const std::string& Node::Tag() const {
+ if (!m_isValid)
+ throw InvalidNode();
+ return m_pNode ? m_pNode->tag() : detail::node_data::empty_scalar;
+}
+
+inline void Node::SetTag(const std::string& tag) {
+ if (!m_isValid)
+ throw InvalidNode();
+ EnsureNodeExists();
+ m_pNode->set_tag(tag);
+}
+
+inline EmitterStyle::value Node::Style() const {
+ if (!m_isValid)
+ throw InvalidNode();
+ return m_pNode ? m_pNode->style() : EmitterStyle::Default;
+}
+
+inline void Node::SetStyle(EmitterStyle::value style) {
+ if (!m_isValid)
+ throw InvalidNode();
+ EnsureNodeExists();
+ m_pNode->set_style(style);
+}
+
+// assignment
+inline bool Node::is(const Node& rhs) const {
+ if (!m_isValid || !rhs.m_isValid)
+ throw InvalidNode();
+ if (!m_pNode || !rhs.m_pNode)
+ return false;
+ return m_pNode->is(*rhs.m_pNode);
+}
+
+template <typename T>
+inline Node& Node::operator=(const T& rhs) {
+ if (!m_isValid)
+ throw InvalidNode();
+ Assign(rhs);
+ return *this;
+}
+
+inline void Node::reset(const YAML::Node& rhs) {
+ if (!m_isValid || !rhs.m_isValid)
+ throw InvalidNode();
+ m_pMemory = rhs.m_pMemory;
+ m_pNode = rhs.m_pNode;
+}
+
+template <typename T>
+inline void Node::Assign(const T& rhs) {
+ if (!m_isValid)
+ throw InvalidNode();
+ AssignData(convert<T>::encode(rhs));
+}
+
+template <>
+inline void Node::Assign(const std::string& rhs) {
+ if (!m_isValid)
+ throw InvalidNode();
+ EnsureNodeExists();
+ m_pNode->set_scalar(rhs);
+}
+
+inline void Node::Assign(const char* rhs) {
+ if (!m_isValid)
+ throw InvalidNode();
+ EnsureNodeExists();
+ m_pNode->set_scalar(rhs);
+}
+
+inline void Node::Assign(char* rhs) {
+ if (!m_isValid)
+ throw InvalidNode();
+ EnsureNodeExists();
+ m_pNode->set_scalar(rhs);
+}
+
+inline Node& Node::operator=(const Node& rhs) {
+ if (!m_isValid || !rhs.m_isValid)
+ throw InvalidNode();
+ if (is(rhs))
+ return *this;
+ AssignNode(rhs);
+ return *this;
+}
+
+inline void Node::AssignData(const Node& rhs) {
+ if (!m_isValid || !rhs.m_isValid)
+ throw InvalidNode();
+ EnsureNodeExists();
+ rhs.EnsureNodeExists();
+
+ m_pNode->set_data(*rhs.m_pNode);
+ m_pMemory->merge(*rhs.m_pMemory);
+}
+
+inline void Node::AssignNode(const Node& rhs) {
+ if (!m_isValid || !rhs.m_isValid)
+ throw InvalidNode();
+ rhs.EnsureNodeExists();
+
+ if (!m_pNode) {
+ m_pNode = rhs.m_pNode;
+ m_pMemory = rhs.m_pMemory;
+ return;
+ }
+
+ m_pNode->set_ref(*rhs.m_pNode);
+ m_pMemory->merge(*rhs.m_pMemory);
+ m_pNode = rhs.m_pNode;
+}
+
+// size/iterator
+inline std::size_t Node::size() const {
+ if (!m_isValid)
+ throw InvalidNode();
+ return m_pNode ? m_pNode->size() : 0;
+}
+
+inline const_iterator Node::begin() const {
+ if (!m_isValid)
+ return const_iterator();
+ return m_pNode ? const_iterator(m_pNode->begin(), m_pMemory)
+ : const_iterator();
+}
+
+inline iterator Node::begin() {
+ if (!m_isValid)
+ return iterator();
+ return m_pNode ? iterator(m_pNode->begin(), m_pMemory) : iterator();
+}
+
+inline const_iterator Node::end() const {
+ if (!m_isValid)
+ return const_iterator();
+ return m_pNode ? const_iterator(m_pNode->end(), m_pMemory) : const_iterator();
+}
+
+inline iterator Node::end() {
+ if (!m_isValid)
+ return iterator();
+ return m_pNode ? iterator(m_pNode->end(), m_pMemory) : iterator();
+}
+
+// sequence
+template <typename T>
+inline void Node::push_back(const T& rhs) {
+ if (!m_isValid)
+ throw InvalidNode();
+ push_back(Node(rhs));
+}
+
+inline void Node::push_back(const Node& rhs) {
+ if (!m_isValid || !rhs.m_isValid)
+ throw InvalidNode();
+ EnsureNodeExists();
+ rhs.EnsureNodeExists();
+
+ m_pNode->push_back(*rhs.m_pNode, m_pMemory);
+ m_pMemory->merge(*rhs.m_pMemory);
+}
+
+// helpers for indexing
+namespace detail {
+template <typename T>
+struct to_value_t {
+ explicit to_value_t(const T& t_) : t(t_) {}
+ const T& t;
+ typedef const T& return_type;
+
+ const T& operator()() const { return t; }
+};
+
+template <>
+struct to_value_t<const char*> {
+ explicit to_value_t(const char* t_) : t(t_) {}
+ const char* t;
+ typedef std::string return_type;
+
+ const std::string operator()() const { return t; }
+};
+
+template <>
+struct to_value_t<char*> {
+ explicit to_value_t(char* t_) : t(t_) {}
+ const char* t;
+ typedef std::string return_type;
+
+ const std::string operator()() const { return t; }
+};
+
+template <std::size_t N>
+struct to_value_t<char[N]> {
+ explicit to_value_t(const char* t_) : t(t_) {}
+ const char* t;
+ typedef std::string return_type;
+
+ const std::string operator()() const { return t; }
+};
+
+// converts C-strings to std::strings so they can be copied
+template <typename T>
+inline typename to_value_t<T>::return_type to_value(const T& t) {
+ return to_value_t<T>(t)();
+}
+}
+
+// indexing
+template <typename Key>
+inline const Node Node::operator[](const Key& key) const {
+ if (!m_isValid)
+ throw InvalidNode();
+ EnsureNodeExists();
+ detail::node* value = static_cast<const detail::node&>(*m_pNode)
+ .get(detail::to_value(key), m_pMemory);
+ if (!value) {
+ return Node(ZombieNode);
+ }
+ return Node(*value, m_pMemory);
+}
+
+template <typename Key>
+inline Node Node::operator[](const Key& key) {
+ if (!m_isValid)
+ throw InvalidNode();
+ EnsureNodeExists();
+ detail::node& value = m_pNode->get(detail::to_value(key), m_pMemory);
+ return Node(value, m_pMemory);
+}
+
+template <typename Key>
+inline bool Node::remove(const Key& key) {
+ if (!m_isValid)
+ throw InvalidNode();
+ EnsureNodeExists();
+ return m_pNode->remove(detail::to_value(key), m_pMemory);
+}
+
+inline const Node Node::operator[](const Node& key) const {
+ if (!m_isValid || !key.m_isValid)
+ throw InvalidNode();
+ EnsureNodeExists();
+ key.EnsureNodeExists();
+ m_pMemory->merge(*key.m_pMemory);
+ detail::node* value =
+ static_cast<const detail::node&>(*m_pNode).get(*key.m_pNode, m_pMemory);
+ if (!value) {
+ return Node(ZombieNode);
+ }
+ return Node(*value, m_pMemory);
+}
+
+inline Node Node::operator[](const Node& key) {
+ if (!m_isValid || !key.m_isValid)
+ throw InvalidNode();
+ EnsureNodeExists();
+ key.EnsureNodeExists();
+ m_pMemory->merge(*key.m_pMemory);
+ detail::node& value = m_pNode->get(*key.m_pNode, m_pMemory);
+ return Node(value, m_pMemory);
+}
+
+inline bool Node::remove(const Node& key) {
+ if (!m_isValid || !key.m_isValid)
+ throw InvalidNode();
+ EnsureNodeExists();
+ key.EnsureNodeExists();
+ return m_pNode->remove(*key.m_pNode, m_pMemory);
+}
+
+// map
+template <typename Key, typename Value>
+inline void Node::force_insert(const Key& key, const Value& value) {
+ if (!m_isValid)
+ throw InvalidNode();
+ EnsureNodeExists();
+ m_pNode->force_insert(detail::to_value(key), detail::to_value(value),
+ m_pMemory);
+}
+
+// free functions
+inline bool operator==(const Node& lhs, const Node& rhs) { return lhs.is(rhs); }
+}
+
+#endif // NODE_IMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/node/iterator.h b/contrib/libs/yaml-cpp/include/yaml-cpp/node/iterator.h
index 551a693331..6618169c53 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/node/iterator.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/node/iterator.h
@@ -1,22 +1,22 @@
-#ifndef VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "yaml-cpp/dll.h"
-#include "yaml-cpp/node/node.h"
-#include "yaml-cpp/node/detail/iterator_fwd.h"
-#include "yaml-cpp/node/detail/iterator.h"
-#include <list>
-#include <utility>
-#include <vector>
-
-namespace YAML {
-namespace detail {
+#ifndef VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/node/node.h"
+#include "yaml-cpp/node/detail/iterator_fwd.h"
+#include "yaml-cpp/node/detail/iterator.h"
+#include <list>
+#include <utility>
+#include <vector>
+
+namespace YAML {
+namespace detail {
struct node_pair: public std::pair<Node, Node> {
node_pair() = default;
node_pair(const Node& first, const Node& second)
@@ -26,14 +26,14 @@ struct node_pair: public std::pair<Node, Node> {
};
struct iterator_value : public Node, node_pair {
- iterator_value() {}
- explicit iterator_value(const Node& rhs)
- : Node(rhs),
+ iterator_value() {}
+ explicit iterator_value(const Node& rhs)
+ : Node(rhs),
node_pair(Node(Node::ZombieNode), Node(Node::ZombieNode)) {}
- explicit iterator_value(const Node& key, const Node& value)
+ explicit iterator_value(const Node& key, const Node& value)
: Node(Node::ZombieNode), node_pair(key, value) {}
-};
-}
-}
-
-#endif // VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+};
+}
+}
+
+#endif // VALUE_ITERATOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/node/node.h b/contrib/libs/yaml-cpp/include/yaml-cpp/node/node.h
index 5e1bd5510f..1ded7d27b7 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/node/node.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/node/node.h
@@ -1,145 +1,145 @@
-#ifndef NODE_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define NODE_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <stdexcept>
-
-#include "yaml-cpp/dll.h"
-#include "yaml-cpp/emitterstyle.h"
-#include "yaml-cpp/mark.h"
-#include "yaml-cpp/node/detail/bool_type.h"
-#include "yaml-cpp/node/detail/iterator_fwd.h"
-#include "yaml-cpp/node/ptr.h"
-#include "yaml-cpp/node/type.h"
-
-namespace YAML {
-namespace detail {
-class node;
-class node_data;
-struct iterator_value;
-} // namespace detail
-} // namespace YAML
-
-namespace YAML {
-class YAML_CPP_API Node {
- public:
- friend class NodeBuilder;
- friend class NodeEvents;
- friend struct detail::iterator_value;
- friend class detail::node;
- friend class detail::node_data;
- template <typename>
- friend class detail::iterator_base;
- template <typename T, typename S>
- friend struct as_if;
-
- typedef YAML::iterator iterator;
- typedef YAML::const_iterator const_iterator;
-
- Node();
- explicit Node(NodeType::value type);
- template <typename T>
- explicit Node(const T& rhs);
- explicit Node(const detail::iterator_value& rhs);
- Node(const Node& rhs);
- ~Node();
-
- YAML::Mark Mark() const;
- NodeType::value Type() const;
- bool IsDefined() const;
- bool IsNull() const { return Type() == NodeType::Null; }
- bool IsScalar() const { return Type() == NodeType::Scalar; }
- bool IsSequence() const { return Type() == NodeType::Sequence; }
- bool IsMap() const { return Type() == NodeType::Map; }
-
- // bool conversions
+#ifndef NODE_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <stdexcept>
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/emitterstyle.h"
+#include "yaml-cpp/mark.h"
+#include "yaml-cpp/node/detail/bool_type.h"
+#include "yaml-cpp/node/detail/iterator_fwd.h"
+#include "yaml-cpp/node/ptr.h"
+#include "yaml-cpp/node/type.h"
+
+namespace YAML {
+namespace detail {
+class node;
+class node_data;
+struct iterator_value;
+} // namespace detail
+} // namespace YAML
+
+namespace YAML {
+class YAML_CPP_API Node {
+ public:
+ friend class NodeBuilder;
+ friend class NodeEvents;
+ friend struct detail::iterator_value;
+ friend class detail::node;
+ friend class detail::node_data;
+ template <typename>
+ friend class detail::iterator_base;
+ template <typename T, typename S>
+ friend struct as_if;
+
+ typedef YAML::iterator iterator;
+ typedef YAML::const_iterator const_iterator;
+
+ Node();
+ explicit Node(NodeType::value type);
+ template <typename T>
+ explicit Node(const T& rhs);
+ explicit Node(const detail::iterator_value& rhs);
+ Node(const Node& rhs);
+ ~Node();
+
+ YAML::Mark Mark() const;
+ NodeType::value Type() const;
+ bool IsDefined() const;
+ bool IsNull() const { return Type() == NodeType::Null; }
+ bool IsScalar() const { return Type() == NodeType::Scalar; }
+ bool IsSequence() const { return Type() == NodeType::Sequence; }
+ bool IsMap() const { return Type() == NodeType::Map; }
+
+ // bool conversions
YAML_CPP_OPERATOR_BOOL()
- bool operator!() const { return !IsDefined(); }
-
- // access
- template <typename T>
- T as() const;
- template <typename T, typename S>
- T as(const S& fallback) const;
- const std::string& Scalar() const;
-
- const std::string& Tag() const;
- void SetTag(const std::string& tag);
-
- // style
- // WARNING: This API might change in future releases.
- EmitterStyle::value Style() const;
- void SetStyle(EmitterStyle::value style);
-
- // assignment
- bool is(const Node& rhs) const;
- template <typename T>
- Node& operator=(const T& rhs);
- Node& operator=(const Node& rhs);
- void reset(const Node& rhs = Node());
-
- // size/iterator
- std::size_t size() const;
-
- const_iterator begin() const;
- iterator begin();
-
- const_iterator end() const;
- iterator end();
-
- // sequence
- template <typename T>
- void push_back(const T& rhs);
- void push_back(const Node& rhs);
-
- // indexing
- template <typename Key>
- const Node operator[](const Key& key) const;
- template <typename Key>
- Node operator[](const Key& key);
- template <typename Key>
- bool remove(const Key& key);
-
- const Node operator[](const Node& key) const;
- Node operator[](const Node& key);
- bool remove(const Node& key);
-
- // map
- template <typename Key, typename Value>
- void force_insert(const Key& key, const Value& value);
-
- private:
- enum Zombie { ZombieNode };
- explicit Node(Zombie);
- explicit Node(detail::node& node, detail::shared_memory_holder pMemory);
-
- void EnsureNodeExists() const;
-
- template <typename T>
- void Assign(const T& rhs);
- void Assign(const char* rhs);
- void Assign(char* rhs);
-
- void AssignData(const Node& rhs);
- void AssignNode(const Node& rhs);
-
- private:
- bool m_isValid;
- mutable detail::shared_memory_holder m_pMemory;
- mutable detail::node* m_pNode;
-};
-
-YAML_CPP_API bool operator==(const Node& lhs, const Node& rhs);
-
-YAML_CPP_API Node Clone(const Node& node);
-
-template <typename T>
-struct convert;
-}
-
-#endif // NODE_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+ bool operator!() const { return !IsDefined(); }
+
+ // access
+ template <typename T>
+ T as() const;
+ template <typename T, typename S>
+ T as(const S& fallback) const;
+ const std::string& Scalar() const;
+
+ const std::string& Tag() const;
+ void SetTag(const std::string& tag);
+
+ // style
+ // WARNING: This API might change in future releases.
+ EmitterStyle::value Style() const;
+ void SetStyle(EmitterStyle::value style);
+
+ // assignment
+ bool is(const Node& rhs) const;
+ template <typename T>
+ Node& operator=(const T& rhs);
+ Node& operator=(const Node& rhs);
+ void reset(const Node& rhs = Node());
+
+ // size/iterator
+ std::size_t size() const;
+
+ const_iterator begin() const;
+ iterator begin();
+
+ const_iterator end() const;
+ iterator end();
+
+ // sequence
+ template <typename T>
+ void push_back(const T& rhs);
+ void push_back(const Node& rhs);
+
+ // indexing
+ template <typename Key>
+ const Node operator[](const Key& key) const;
+ template <typename Key>
+ Node operator[](const Key& key);
+ template <typename Key>
+ bool remove(const Key& key);
+
+ const Node operator[](const Node& key) const;
+ Node operator[](const Node& key);
+ bool remove(const Node& key);
+
+ // map
+ template <typename Key, typename Value>
+ void force_insert(const Key& key, const Value& value);
+
+ private:
+ enum Zombie { ZombieNode };
+ explicit Node(Zombie);
+ explicit Node(detail::node& node, detail::shared_memory_holder pMemory);
+
+ void EnsureNodeExists() const;
+
+ template <typename T>
+ void Assign(const T& rhs);
+ void Assign(const char* rhs);
+ void Assign(char* rhs);
+
+ void AssignData(const Node& rhs);
+ void AssignNode(const Node& rhs);
+
+ private:
+ bool m_isValid;
+ mutable detail::shared_memory_holder m_pMemory;
+ mutable detail::node* m_pNode;
+};
+
+YAML_CPP_API bool operator==(const Node& lhs, const Node& rhs);
+
+YAML_CPP_API Node Clone(const Node& node);
+
+template <typename T>
+struct convert;
+}
+
+#endif // NODE_NODE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/node/parse.h b/contrib/libs/yaml-cpp/include/yaml-cpp/node/parse.h
index af5352ad14..7745fd7245 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/node/parse.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/node/parse.h
@@ -1,41 +1,41 @@
-#ifndef VALUE_PARSE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define VALUE_PARSE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <iosfwd>
-#include <string>
-#include <vector>
-
-#include "yaml-cpp/dll.h"
-
-namespace YAML {
-class Node;
-
+#ifndef VALUE_PARSE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_PARSE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <iosfwd>
+#include <string>
+#include <vector>
+
+#include "yaml-cpp/dll.h"
+
+namespace YAML {
+class Node;
+
/**
* Loads the input string as a single YAML document.
*
* @throws {@link ParserException} if it is malformed.
*/
-YAML_CPP_API Node Load(const std::string& input);
+YAML_CPP_API Node Load(const std::string& input);
/**
* Loads the input string as a single YAML document.
*
* @throws {@link ParserException} if it is malformed.
*/
-YAML_CPP_API Node Load(const char* input);
+YAML_CPP_API Node Load(const char* input);
/**
* Loads the input stream as a single YAML document.
*
* @throws {@link ParserException} if it is malformed.
*/
-YAML_CPP_API Node Load(std::istream& input);
+YAML_CPP_API Node Load(std::istream& input);
/**
* Loads the input file as a single YAML document.
@@ -43,28 +43,28 @@ YAML_CPP_API Node Load(std::istream& input);
* @throws {@link ParserException} if it is malformed.
* @throws {@link BadFile} if the file cannot be loaded.
*/
-YAML_CPP_API Node LoadFile(const std::string& filename);
-
+YAML_CPP_API Node LoadFile(const std::string& filename);
+
/**
* Loads the input string as a list of YAML documents.
*
* @throws {@link ParserException} if it is malformed.
*/
-YAML_CPP_API std::vector<Node> LoadAll(const std::string& input);
+YAML_CPP_API std::vector<Node> LoadAll(const std::string& input);
/**
* Loads the input string as a list of YAML documents.
*
* @throws {@link ParserException} if it is malformed.
*/
-YAML_CPP_API std::vector<Node> LoadAll(const char* input);
+YAML_CPP_API std::vector<Node> LoadAll(const char* input);
/**
* Loads the input stream as a list of YAML documents.
*
* @throws {@link ParserException} if it is malformed.
*/
-YAML_CPP_API std::vector<Node> LoadAll(std::istream& input);
+YAML_CPP_API std::vector<Node> LoadAll(std::istream& input);
/**
* Loads the input file as a list of YAML documents.
@@ -72,7 +72,7 @@ YAML_CPP_API std::vector<Node> LoadAll(std::istream& input);
* @throws {@link ParserException} if it is malformed.
* @throws {@link BadFile} if the file cannot be loaded.
*/
-YAML_CPP_API std::vector<Node> LoadAllFromFile(const std::string& filename);
+YAML_CPP_API std::vector<Node> LoadAllFromFile(const std::string& filename);
} // namespace YAML
-
-#endif // VALUE_PARSE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#endif // VALUE_PARSE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/node/ptr.h b/contrib/libs/yaml-cpp/include/yaml-cpp/node/ptr.h
index 7f30642edb..ce085dd5cd 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/node/ptr.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/node/ptr.h
@@ -1,29 +1,29 @@
-#ifndef VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "yaml-cpp/dll.h"
+#ifndef VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
#include <memory>
-
-namespace YAML {
-namespace detail {
-class node;
-class node_ref;
-class node_data;
-class memory;
-class memory_holder;
-
+
+namespace YAML {
+namespace detail {
+class node;
+class node_ref;
+class node_data;
+class memory;
+class memory_holder;
+
typedef std::shared_ptr<node> shared_node;
typedef std::shared_ptr<node_ref> shared_node_ref;
typedef std::shared_ptr<node_data> shared_node_data;
typedef std::shared_ptr<memory_holder> shared_memory_holder;
typedef std::shared_ptr<memory> shared_memory;
-}
-}
-
-#endif // VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+}
+}
+
+#endif // VALUE_PTR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/node/type.h b/contrib/libs/yaml-cpp/include/yaml-cpp/node/type.h
index 4a56d8e3ac..9d55ca9662 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/node/type.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/node/type.h
@@ -1,16 +1,16 @@
-#ifndef VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-namespace YAML {
-struct NodeType {
- enum value { Undefined, Null, Scalar, Sequence, Map };
-};
-}
-
-#endif // VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+namespace YAML {
+struct NodeType {
+ enum value { Undefined, Null, Scalar, Sequence, Map };
+};
+}
+
+#endif // VALUE_TYPE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/noncopyable.h b/contrib/libs/yaml-cpp/include/yaml-cpp/noncopyable.h
index c33c294590..a261040739 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/noncopyable.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/noncopyable.h
@@ -1,25 +1,25 @@
-#ifndef NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "yaml-cpp/dll.h"
-
-namespace YAML {
-// this is basically boost::noncopyable
-class YAML_CPP_API noncopyable {
- protected:
- noncopyable() {}
- ~noncopyable() {}
-
- private:
- noncopyable(const noncopyable&);
- const noncopyable& operator=(const noncopyable&);
-};
-}
-
-#endif // NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
+
+namespace YAML {
+// this is basically boost::noncopyable
+class YAML_CPP_API noncopyable {
+ protected:
+ noncopyable() {}
+ ~noncopyable() {}
+
+ private:
+ noncopyable(const noncopyable&);
+ const noncopyable& operator=(const noncopyable&);
+};
+}
+
+#endif // NONCOPYABLE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/null.h b/contrib/libs/yaml-cpp/include/yaml-cpp/null.h
index 79ebe4db07..b9521d488a 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/null.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/null.h
@@ -1,26 +1,26 @@
-#ifndef NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "yaml-cpp/dll.h"
+#ifndef NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/dll.h"
#include <string>
-
-namespace YAML {
-class Node;
-
-struct YAML_CPP_API _Null {};
-inline bool operator==(const _Null&, const _Null&) { return true; }
-inline bool operator!=(const _Null&, const _Null&) { return false; }
-
-YAML_CPP_API bool IsNull(const Node& node); // old API only
+
+namespace YAML {
+class Node;
+
+struct YAML_CPP_API _Null {};
+inline bool operator==(const _Null&, const _Null&) { return true; }
+inline bool operator!=(const _Null&, const _Null&) { return false; }
+
+YAML_CPP_API bool IsNull(const Node& node); // old API only
YAML_CPP_API bool IsNullString(const std::string& str);
-
-extern YAML_CPP_API _Null Null;
-}
-
-#endif // NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+extern YAML_CPP_API _Null Null;
+}
+
+#endif // NULL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/ostream_wrapper.h b/contrib/libs/yaml-cpp/include/yaml-cpp/ostream_wrapper.h
index bf79511d16..09d45f39b7 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/ostream_wrapper.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/ostream_wrapper.h
@@ -1,72 +1,72 @@
-#ifndef OSTREAM_WRAPPER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define OSTREAM_WRAPPER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <string>
-#include <vector>
-
-#include "yaml-cpp/dll.h"
-
-namespace YAML {
-class YAML_CPP_API ostream_wrapper {
- public:
- ostream_wrapper();
- explicit ostream_wrapper(std::ostream& stream);
- ~ostream_wrapper();
-
- void write(const std::string& str);
- void write(const char* str, std::size_t size);
-
- void set_comment() { m_comment = true; }
-
- const char* str() const {
- if (m_pStream) {
- return 0;
- } else {
- m_buffer[m_pos] = '\0';
- return &m_buffer[0];
- }
- }
-
- std::size_t row() const { return m_row; }
- std::size_t col() const { return m_col; }
- std::size_t pos() const { return m_pos; }
- bool comment() const { return m_comment; }
-
- private:
- void update_pos(char ch);
-
- private:
- mutable std::vector<char> m_buffer;
- std::ostream* const m_pStream;
-
- std::size_t m_pos;
- std::size_t m_row, m_col;
- bool m_comment;
-};
-
-template <std::size_t N>
-inline ostream_wrapper& operator<<(ostream_wrapper& stream,
- const char(&str)[N]) {
- stream.write(str, N - 1);
- return stream;
-}
-
-inline ostream_wrapper& operator<<(ostream_wrapper& stream,
- const std::string& str) {
- stream.write(str);
- return stream;
-}
-
-inline ostream_wrapper& operator<<(ostream_wrapper& stream, char ch) {
- stream.write(&ch, 1);
- return stream;
-}
-}
-
-#endif // OSTREAM_WRAPPER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef OSTREAM_WRAPPER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define OSTREAM_WRAPPER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+#include <vector>
+
+#include "yaml-cpp/dll.h"
+
+namespace YAML {
+class YAML_CPP_API ostream_wrapper {
+ public:
+ ostream_wrapper();
+ explicit ostream_wrapper(std::ostream& stream);
+ ~ostream_wrapper();
+
+ void write(const std::string& str);
+ void write(const char* str, std::size_t size);
+
+ void set_comment() { m_comment = true; }
+
+ const char* str() const {
+ if (m_pStream) {
+ return 0;
+ } else {
+ m_buffer[m_pos] = '\0';
+ return &m_buffer[0];
+ }
+ }
+
+ std::size_t row() const { return m_row; }
+ std::size_t col() const { return m_col; }
+ std::size_t pos() const { return m_pos; }
+ bool comment() const { return m_comment; }
+
+ private:
+ void update_pos(char ch);
+
+ private:
+ mutable std::vector<char> m_buffer;
+ std::ostream* const m_pStream;
+
+ std::size_t m_pos;
+ std::size_t m_row, m_col;
+ bool m_comment;
+};
+
+template <std::size_t N>
+inline ostream_wrapper& operator<<(ostream_wrapper& stream,
+ const char(&str)[N]) {
+ stream.write(str, N - 1);
+ return stream;
+}
+
+inline ostream_wrapper& operator<<(ostream_wrapper& stream,
+ const std::string& str) {
+ stream.write(str);
+ return stream;
+}
+
+inline ostream_wrapper& operator<<(ostream_wrapper& stream, char ch) {
+ stream.write(&ch, 1);
+ return stream;
+}
+}
+
+#endif // OSTREAM_WRAPPER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/parser.h b/contrib/libs/yaml-cpp/include/yaml-cpp/parser.h
index 9d22439769..ceac22d026 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/parser.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/parser.h
@@ -1,33 +1,33 @@
-#ifndef PARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define PARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <ios>
-#include <memory>
-
-#include "yaml-cpp/dll.h"
-#include "yaml-cpp/noncopyable.h"
-
-namespace YAML {
-class EventHandler;
-class Node;
-class Scanner;
-struct Directives;
-struct Token;
-
+#ifndef PARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define PARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <ios>
+#include <memory>
+
+#include "yaml-cpp/dll.h"
+#include "yaml-cpp/noncopyable.h"
+
+namespace YAML {
+class EventHandler;
+class Node;
+class Scanner;
+struct Directives;
+struct Token;
+
/**
* A parser turns a stream of bytes into one stream of "events" per YAML
* document in the input stream.
*/
-class YAML_CPP_API Parser : private noncopyable {
- public:
+class YAML_CPP_API Parser : private noncopyable {
+ public:
/** Constructs an empty parser (with no input. */
- Parser();
+ Parser();
/**
* Constructs a parser from the given input stream. The input stream must
@@ -35,16 +35,16 @@ class YAML_CPP_API Parser : private noncopyable {
*/
explicit Parser(std::istream& in);
- ~Parser();
-
+ ~Parser();
+
/** Evaluates to true if the parser has some valid input to be read. */
explicit operator bool() const;
-
+
/**
* Resets the parser with the given input stream. Any existing state is
* erased.
*/
- void Load(std::istream& in);
+ void Load(std::istream& in);
/**
* Handles the next document by calling events on the {@code eventHandler}.
@@ -52,35 +52,35 @@ class YAML_CPP_API Parser : private noncopyable {
* @throw a ParserException on error.
* @return false if there are no more documents
*/
- bool HandleNextDocument(EventHandler& eventHandler);
-
- void PrintTokens(std::ostream& out);
-
- private:
+ bool HandleNextDocument(EventHandler& eventHandler);
+
+ void PrintTokens(std::ostream& out);
+
+ private:
/**
* Reads any directives that are next in the queue, setting the internal
* {@code m_pDirectives} state.
*/
- void ParseDirectives();
+ void ParseDirectives();
- void HandleDirective(const Token& token);
+ void HandleDirective(const Token& token);
/**
* Handles a "YAML" directive, which should be of the form 'major.minor' (like
* a version number).
*/
- void HandleYamlDirective(const Token& token);
+ void HandleYamlDirective(const Token& token);
/**
* Handles a "TAG" directive, which should be of the form 'handle prefix',
* where 'handle' is converted to 'prefix' in the file.
*/
- void HandleTagDirective(const Token& token);
-
- private:
+ void HandleTagDirective(const Token& token);
+
+ private:
std::unique_ptr<Scanner> m_pScanner;
std::unique_ptr<Directives> m_pDirectives;
-};
-}
-
-#endif // PARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+};
+}
+
+#endif // PARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/stlemitter.h b/contrib/libs/yaml-cpp/include/yaml-cpp/stlemitter.h
index 4c19a67277..06780c861f 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/stlemitter.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/stlemitter.h
@@ -1,51 +1,51 @@
-#ifndef STLEMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define STLEMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <vector>
-#include <list>
-#include <set>
-#include <map>
-
-namespace YAML {
-template <typename Seq>
-inline Emitter& EmitSeq(Emitter& emitter, const Seq& seq) {
- emitter << BeginSeq;
- for (typename Seq::const_iterator it = seq.begin(); it != seq.end(); ++it)
- emitter << *it;
- emitter << EndSeq;
- return emitter;
-}
-
-template <typename T>
-inline Emitter& operator<<(Emitter& emitter, const std::vector<T>& v) {
- return EmitSeq(emitter, v);
-}
-
-template <typename T>
-inline Emitter& operator<<(Emitter& emitter, const std::list<T>& v) {
- return EmitSeq(emitter, v);
-}
-
-template <typename T>
-inline Emitter& operator<<(Emitter& emitter, const std::set<T>& v) {
- return EmitSeq(emitter, v);
-}
-
-template <typename K, typename V>
-inline Emitter& operator<<(Emitter& emitter, const std::map<K, V>& m) {
- typedef typename std::map<K, V> map;
- emitter << BeginMap;
- for (typename map::const_iterator it = m.begin(); it != m.end(); ++it)
- emitter << Key << it->first << Value << it->second;
- emitter << EndMap;
- return emitter;
-}
-}
-
-#endif // STLEMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef STLEMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define STLEMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <vector>
+#include <list>
+#include <set>
+#include <map>
+
+namespace YAML {
+template <typename Seq>
+inline Emitter& EmitSeq(Emitter& emitter, const Seq& seq) {
+ emitter << BeginSeq;
+ for (typename Seq::const_iterator it = seq.begin(); it != seq.end(); ++it)
+ emitter << *it;
+ emitter << EndSeq;
+ return emitter;
+}
+
+template <typename T>
+inline Emitter& operator<<(Emitter& emitter, const std::vector<T>& v) {
+ return EmitSeq(emitter, v);
+}
+
+template <typename T>
+inline Emitter& operator<<(Emitter& emitter, const std::list<T>& v) {
+ return EmitSeq(emitter, v);
+}
+
+template <typename T>
+inline Emitter& operator<<(Emitter& emitter, const std::set<T>& v) {
+ return EmitSeq(emitter, v);
+}
+
+template <typename K, typename V>
+inline Emitter& operator<<(Emitter& emitter, const std::map<K, V>& m) {
+ typedef typename std::map<K, V> map;
+ emitter << BeginMap;
+ for (typename map::const_iterator it = m.begin(); it != m.end(); ++it)
+ emitter << Key << it->first << Value << it->second;
+ emitter << EndMap;
+ return emitter;
+}
+}
+
+#endif // STLEMITTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/traits.h b/contrib/libs/yaml-cpp/include/yaml-cpp/traits.h
index 8775085a39..f33d0e1f63 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/traits.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/traits.h
@@ -1,103 +1,103 @@
-#ifndef TRAITS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define TRAITS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-namespace YAML {
-template <typename>
-struct is_numeric {
- enum { value = false };
-};
-
-template <>
-struct is_numeric<char> {
- enum { value = true };
-};
-template <>
-struct is_numeric<unsigned char> {
- enum { value = true };
-};
-template <>
-struct is_numeric<int> {
- enum { value = true };
-};
-template <>
-struct is_numeric<unsigned int> {
- enum { value = true };
-};
-template <>
-struct is_numeric<long int> {
- enum { value = true };
-};
-template <>
-struct is_numeric<unsigned long int> {
- enum { value = true };
-};
-template <>
-struct is_numeric<short int> {
- enum { value = true };
-};
-template <>
-struct is_numeric<unsigned short int> {
- enum { value = true };
-};
-#if defined(_MSC_VER) && (_MSC_VER < 1310)
-template <>
-struct is_numeric<__int64> {
- enum { value = true };
-};
-template <>
-struct is_numeric<unsigned __int64> {
- enum { value = true };
-};
-#else
-template <>
-struct is_numeric<long long> {
- enum { value = true };
-};
-template <>
-struct is_numeric<unsigned long long> {
- enum { value = true };
-};
-#endif
-template <>
-struct is_numeric<float> {
- enum { value = true };
-};
-template <>
-struct is_numeric<double> {
- enum { value = true };
-};
-template <>
-struct is_numeric<long double> {
- enum { value = true };
-};
-
-template <bool, class T = void>
-struct enable_if_c {
- typedef T type;
-};
-
-template <class T>
-struct enable_if_c<false, T> {};
-
-template <class Cond, class T = void>
-struct enable_if : public enable_if_c<Cond::value, T> {};
-
-template <bool, class T = void>
-struct disable_if_c {
- typedef T type;
-};
-
-template <class T>
-struct disable_if_c<true, T> {};
-
-template <class Cond, class T = void>
-struct disable_if : public disable_if_c<Cond::value, T> {};
-}
-
-#endif // TRAITS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef TRAITS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define TRAITS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+namespace YAML {
+template <typename>
+struct is_numeric {
+ enum { value = false };
+};
+
+template <>
+struct is_numeric<char> {
+ enum { value = true };
+};
+template <>
+struct is_numeric<unsigned char> {
+ enum { value = true };
+};
+template <>
+struct is_numeric<int> {
+ enum { value = true };
+};
+template <>
+struct is_numeric<unsigned int> {
+ enum { value = true };
+};
+template <>
+struct is_numeric<long int> {
+ enum { value = true };
+};
+template <>
+struct is_numeric<unsigned long int> {
+ enum { value = true };
+};
+template <>
+struct is_numeric<short int> {
+ enum { value = true };
+};
+template <>
+struct is_numeric<unsigned short int> {
+ enum { value = true };
+};
+#if defined(_MSC_VER) && (_MSC_VER < 1310)
+template <>
+struct is_numeric<__int64> {
+ enum { value = true };
+};
+template <>
+struct is_numeric<unsigned __int64> {
+ enum { value = true };
+};
+#else
+template <>
+struct is_numeric<long long> {
+ enum { value = true };
+};
+template <>
+struct is_numeric<unsigned long long> {
+ enum { value = true };
+};
+#endif
+template <>
+struct is_numeric<float> {
+ enum { value = true };
+};
+template <>
+struct is_numeric<double> {
+ enum { value = true };
+};
+template <>
+struct is_numeric<long double> {
+ enum { value = true };
+};
+
+template <bool, class T = void>
+struct enable_if_c {
+ typedef T type;
+};
+
+template <class T>
+struct enable_if_c<false, T> {};
+
+template <class Cond, class T = void>
+struct enable_if : public enable_if_c<Cond::value, T> {};
+
+template <bool, class T = void>
+struct disable_if_c {
+ typedef T type;
+};
+
+template <class T>
+struct disable_if_c<true, T> {};
+
+template <class Cond, class T = void>
+struct disable_if : public disable_if_c<Cond::value, T> {};
+}
+
+#endif // TRAITS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/include/yaml-cpp/yaml.h b/contrib/libs/yaml-cpp/include/yaml-cpp/yaml.h
index 11b6ad8ba9..7f515efb96 100644
--- a/contrib/libs/yaml-cpp/include/yaml-cpp/yaml.h
+++ b/contrib/libs/yaml-cpp/include/yaml-cpp/yaml.h
@@ -1,24 +1,24 @@
-#ifndef YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "yaml-cpp/parser.h"
-#include "yaml-cpp/emitter.h"
-#include "yaml-cpp/emitterstyle.h"
-#include "yaml-cpp/stlemitter.h"
-#include "yaml-cpp/exceptions.h"
-
-#include "yaml-cpp/node/node.h"
-#include "yaml-cpp/node/impl.h"
-#include "yaml-cpp/node/convert.h"
-#include "yaml-cpp/node/iterator.h"
-#include "yaml-cpp/node/detail/impl.h"
-#include "yaml-cpp/node/parse.h"
-#include "yaml-cpp/node/emit.h"
-
-#endif // YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/parser.h"
+#include "yaml-cpp/emitter.h"
+#include "yaml-cpp/emitterstyle.h"
+#include "yaml-cpp/stlemitter.h"
+#include "yaml-cpp/exceptions.h"
+
+#include "yaml-cpp/node/node.h"
+#include "yaml-cpp/node/impl.h"
+#include "yaml-cpp/node/convert.h"
+#include "yaml-cpp/node/iterator.h"
+#include "yaml-cpp/node/detail/impl.h"
+#include "yaml-cpp/node/parse.h"
+#include "yaml-cpp/node/emit.h"
+
+#endif // YAML_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/binary.cpp b/contrib/libs/yaml-cpp/src/binary.cpp
index a01abcb785..a7e51301b8 100644
--- a/contrib/libs/yaml-cpp/src/binary.cpp
+++ b/contrib/libs/yaml-cpp/src/binary.cpp
@@ -1,93 +1,93 @@
-#include "yaml-cpp/binary.h"
-
-namespace YAML {
-static const char encoding[] =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-
-std::string EncodeBase64(const unsigned char *data, std::size_t size) {
- const char PAD = '=';
-
- std::string ret;
- ret.resize(4 * size / 3 + 3);
- char *out = &ret[0];
-
- std::size_t chunks = size / 3;
- std::size_t remainder = size % 3;
-
- for (std::size_t i = 0; i < chunks; i++, data += 3) {
- *out++ = encoding[data[0] >> 2];
- *out++ = encoding[((data[0] & 0x3) << 4) | (data[1] >> 4)];
- *out++ = encoding[((data[1] & 0xf) << 2) | (data[2] >> 6)];
- *out++ = encoding[data[2] & 0x3f];
- }
-
- switch (remainder) {
- case 0:
- break;
- case 1:
- *out++ = encoding[data[0] >> 2];
- *out++ = encoding[((data[0] & 0x3) << 4)];
- *out++ = PAD;
- *out++ = PAD;
- break;
- case 2:
- *out++ = encoding[data[0] >> 2];
- *out++ = encoding[((data[0] & 0x3) << 4) | (data[1] >> 4)];
- *out++ = encoding[((data[1] & 0xf) << 2)];
- *out++ = PAD;
- break;
- }
-
- ret.resize(out - &ret[0]);
- return ret;
-}
-
-static const unsigned char decoding[] = {
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255,
- 255, 255, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255,
- 255, 0, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
- 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
- 25, 255, 255, 255, 255, 255, 255, 26, 27, 28, 29, 30, 31, 32, 33,
- 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
- 49, 50, 51, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255,
-};
-
-std::vector<unsigned char> DecodeBase64(const std::string &input) {
- typedef std::vector<unsigned char> ret_type;
- if (input.empty())
- return ret_type();
-
- ret_type ret(3 * input.size() / 4 + 1);
- unsigned char *out = &ret[0];
-
- unsigned value = 0;
- for (std::size_t i = 0; i < input.size(); i++) {
- unsigned char d = decoding[static_cast<unsigned>(input[i])];
- if (d == 255)
- return ret_type();
-
- value = (value << 6) | d;
- if (i % 4 == 3) {
- *out++ = value >> 16;
- if (i > 0 && input[i - 1] != '=')
- *out++ = value >> 8;
- if (input[i] != '=')
- *out++ = value;
- }
- }
-
- ret.resize(out - &ret[0]);
- return ret;
-}
-}
+#include "yaml-cpp/binary.h"
+
+namespace YAML {
+static const char encoding[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+std::string EncodeBase64(const unsigned char *data, std::size_t size) {
+ const char PAD = '=';
+
+ std::string ret;
+ ret.resize(4 * size / 3 + 3);
+ char *out = &ret[0];
+
+ std::size_t chunks = size / 3;
+ std::size_t remainder = size % 3;
+
+ for (std::size_t i = 0; i < chunks; i++, data += 3) {
+ *out++ = encoding[data[0] >> 2];
+ *out++ = encoding[((data[0] & 0x3) << 4) | (data[1] >> 4)];
+ *out++ = encoding[((data[1] & 0xf) << 2) | (data[2] >> 6)];
+ *out++ = encoding[data[2] & 0x3f];
+ }
+
+ switch (remainder) {
+ case 0:
+ break;
+ case 1:
+ *out++ = encoding[data[0] >> 2];
+ *out++ = encoding[((data[0] & 0x3) << 4)];
+ *out++ = PAD;
+ *out++ = PAD;
+ break;
+ case 2:
+ *out++ = encoding[data[0] >> 2];
+ *out++ = encoding[((data[0] & 0x3) << 4) | (data[1] >> 4)];
+ *out++ = encoding[((data[1] & 0xf) << 2)];
+ *out++ = PAD;
+ break;
+ }
+
+ ret.resize(out - &ret[0]);
+ return ret;
+}
+
+static const unsigned char decoding[] = {
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255,
+ 255, 255, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255,
+ 255, 0, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 255, 255, 255, 255, 255, 255, 26, 27, 28, 29, 30, 31, 32, 33,
+ 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255,
+};
+
+std::vector<unsigned char> DecodeBase64(const std::string &input) {
+ typedef std::vector<unsigned char> ret_type;
+ if (input.empty())
+ return ret_type();
+
+ ret_type ret(3 * input.size() / 4 + 1);
+ unsigned char *out = &ret[0];
+
+ unsigned value = 0;
+ for (std::size_t i = 0; i < input.size(); i++) {
+ unsigned char d = decoding[static_cast<unsigned>(input[i])];
+ if (d == 255)
+ return ret_type();
+
+ value = (value << 6) | d;
+ if (i % 4 == 3) {
+ *out++ = value >> 16;
+ if (i > 0 && input[i - 1] != '=')
+ *out++ = value >> 8;
+ if (input[i] != '=')
+ *out++ = value;
+ }
+ }
+
+ ret.resize(out - &ret[0]);
+ return ret;
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/collectionstack.h b/contrib/libs/yaml-cpp/src/collectionstack.h
index 415444ad06..46c463ebce 100644
--- a/contrib/libs/yaml-cpp/src/collectionstack.h
+++ b/contrib/libs/yaml-cpp/src/collectionstack.h
@@ -1,40 +1,40 @@
-#ifndef COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <stack>
-#include <cassert>
-
-namespace YAML {
-struct CollectionType {
- enum value { NoCollection, BlockMap, BlockSeq, FlowMap, FlowSeq, CompactMap };
-};
-
-class CollectionStack {
- public:
- CollectionType::value GetCurCollectionType() const {
- if (collectionStack.empty())
- return CollectionType::NoCollection;
- return collectionStack.top();
- }
-
- void PushCollectionType(CollectionType::value type) {
- collectionStack.push(type);
- }
- void PopCollectionType(CollectionType::value type) {
+#ifndef COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <stack>
+#include <cassert>
+
+namespace YAML {
+struct CollectionType {
+ enum value { NoCollection, BlockMap, BlockSeq, FlowMap, FlowSeq, CompactMap };
+};
+
+class CollectionStack {
+ public:
+ CollectionType::value GetCurCollectionType() const {
+ if (collectionStack.empty())
+ return CollectionType::NoCollection;
+ return collectionStack.top();
+ }
+
+ void PushCollectionType(CollectionType::value type) {
+ collectionStack.push(type);
+ }
+ void PopCollectionType(CollectionType::value type) {
(void)type;
- assert(type == GetCurCollectionType());
- collectionStack.pop();
- }
-
- private:
- std::stack<CollectionType::value> collectionStack;
-};
-}
-
-#endif // COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+ assert(type == GetCurCollectionType());
+ collectionStack.pop();
+ }
+
+ private:
+ std::stack<CollectionType::value> collectionStack;
+};
+}
+
+#endif // COLLECTIONSTACK_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/contrib/graphbuilder.cpp b/contrib/libs/yaml-cpp/src/contrib/graphbuilder.cpp
index e33597bb87..416c1359db 100644
--- a/contrib/libs/yaml-cpp/src/contrib/graphbuilder.cpp
+++ b/contrib/libs/yaml-cpp/src/contrib/graphbuilder.cpp
@@ -1,17 +1,17 @@
-#include "graphbuilderadapter.h"
-
-#include "yaml-cpp/parser.h" // IWYU pragma: keep
-
-namespace YAML {
-class GraphBuilderInterface;
-
-void* BuildGraphOfNextDocument(Parser& parser,
- GraphBuilderInterface& graphBuilder) {
- GraphBuilderAdapter eventHandler(graphBuilder);
- if (parser.HandleNextDocument(eventHandler)) {
- return eventHandler.RootNode();
- } else {
- return NULL;
- }
-}
-}
+#include "graphbuilderadapter.h"
+
+#include "yaml-cpp/parser.h" // IWYU pragma: keep
+
+namespace YAML {
+class GraphBuilderInterface;
+
+void* BuildGraphOfNextDocument(Parser& parser,
+ GraphBuilderInterface& graphBuilder) {
+ GraphBuilderAdapter eventHandler(graphBuilder);
+ if (parser.HandleNextDocument(eventHandler)) {
+ return eventHandler.RootNode();
+ } else {
+ return NULL;
+ }
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/contrib/graphbuilderadapter.cpp b/contrib/libs/yaml-cpp/src/contrib/graphbuilderadapter.cpp
index 6ea3128f2a..02a3d972a5 100644
--- a/contrib/libs/yaml-cpp/src/contrib/graphbuilderadapter.cpp
+++ b/contrib/libs/yaml-cpp/src/contrib/graphbuilderadapter.cpp
@@ -1,94 +1,94 @@
-#include "graphbuilderadapter.h"
-#include "yaml-cpp/contrib/graphbuilder.h"
-
-namespace YAML {
-struct Mark;
-
-int GraphBuilderAdapter::ContainerFrame::sequenceMarker;
-
-void GraphBuilderAdapter::OnNull(const Mark &mark, anchor_t anchor) {
- void *pParent = GetCurrentParent();
- void *pNode = m_builder.NewNull(mark, pParent);
- RegisterAnchor(anchor, pNode);
-
- DispositionNode(pNode);
-}
-
-void GraphBuilderAdapter::OnAlias(const Mark &mark, anchor_t anchor) {
- void *pReffedNode = m_anchors.Get(anchor);
- DispositionNode(m_builder.AnchorReference(mark, pReffedNode));
-}
-
-void GraphBuilderAdapter::OnScalar(const Mark &mark, const std::string &tag,
- anchor_t anchor, const std::string &value) {
- void *pParent = GetCurrentParent();
- void *pNode = m_builder.NewScalar(mark, tag, pParent, value);
- RegisterAnchor(anchor, pNode);
-
- DispositionNode(pNode);
-}
-
-void GraphBuilderAdapter::OnSequenceStart(const Mark &mark,
- const std::string &tag,
- anchor_t anchor,
- EmitterStyle::value /* style */) {
- void *pNode = m_builder.NewSequence(mark, tag, GetCurrentParent());
- m_containers.push(ContainerFrame(pNode));
- RegisterAnchor(anchor, pNode);
-}
-
-void GraphBuilderAdapter::OnSequenceEnd() {
- void *pSequence = m_containers.top().pContainer;
- m_containers.pop();
-
- DispositionNode(pSequence);
-}
-
-void GraphBuilderAdapter::OnMapStart(const Mark &mark, const std::string &tag,
- anchor_t anchor,
- EmitterStyle::value /* style */) {
- void *pNode = m_builder.NewMap(mark, tag, GetCurrentParent());
- m_containers.push(ContainerFrame(pNode, m_pKeyNode));
- m_pKeyNode = NULL;
- RegisterAnchor(anchor, pNode);
-}
-
-void GraphBuilderAdapter::OnMapEnd() {
- void *pMap = m_containers.top().pContainer;
- m_pKeyNode = m_containers.top().pPrevKeyNode;
- m_containers.pop();
- DispositionNode(pMap);
-}
-
-void *GraphBuilderAdapter::GetCurrentParent() const {
- if (m_containers.empty()) {
- return NULL;
- }
- return m_containers.top().pContainer;
-}
-
-void GraphBuilderAdapter::RegisterAnchor(anchor_t anchor, void *pNode) {
- if (anchor) {
- m_anchors.Register(anchor, pNode);
- }
-}
-
-void GraphBuilderAdapter::DispositionNode(void *pNode) {
- if (m_containers.empty()) {
- m_pRootNode = pNode;
- return;
- }
-
- void *pContainer = m_containers.top().pContainer;
- if (m_containers.top().isMap()) {
- if (m_pKeyNode) {
- m_builder.AssignInMap(pContainer, m_pKeyNode, pNode);
- m_pKeyNode = NULL;
- } else {
- m_pKeyNode = pNode;
- }
- } else {
- m_builder.AppendToSequence(pContainer, pNode);
- }
-}
-}
+#include "graphbuilderadapter.h"
+#include "yaml-cpp/contrib/graphbuilder.h"
+
+namespace YAML {
+struct Mark;
+
+int GraphBuilderAdapter::ContainerFrame::sequenceMarker;
+
+void GraphBuilderAdapter::OnNull(const Mark &mark, anchor_t anchor) {
+ void *pParent = GetCurrentParent();
+ void *pNode = m_builder.NewNull(mark, pParent);
+ RegisterAnchor(anchor, pNode);
+
+ DispositionNode(pNode);
+}
+
+void GraphBuilderAdapter::OnAlias(const Mark &mark, anchor_t anchor) {
+ void *pReffedNode = m_anchors.Get(anchor);
+ DispositionNode(m_builder.AnchorReference(mark, pReffedNode));
+}
+
+void GraphBuilderAdapter::OnScalar(const Mark &mark, const std::string &tag,
+ anchor_t anchor, const std::string &value) {
+ void *pParent = GetCurrentParent();
+ void *pNode = m_builder.NewScalar(mark, tag, pParent, value);
+ RegisterAnchor(anchor, pNode);
+
+ DispositionNode(pNode);
+}
+
+void GraphBuilderAdapter::OnSequenceStart(const Mark &mark,
+ const std::string &tag,
+ anchor_t anchor,
+ EmitterStyle::value /* style */) {
+ void *pNode = m_builder.NewSequence(mark, tag, GetCurrentParent());
+ m_containers.push(ContainerFrame(pNode));
+ RegisterAnchor(anchor, pNode);
+}
+
+void GraphBuilderAdapter::OnSequenceEnd() {
+ void *pSequence = m_containers.top().pContainer;
+ m_containers.pop();
+
+ DispositionNode(pSequence);
+}
+
+void GraphBuilderAdapter::OnMapStart(const Mark &mark, const std::string &tag,
+ anchor_t anchor,
+ EmitterStyle::value /* style */) {
+ void *pNode = m_builder.NewMap(mark, tag, GetCurrentParent());
+ m_containers.push(ContainerFrame(pNode, m_pKeyNode));
+ m_pKeyNode = NULL;
+ RegisterAnchor(anchor, pNode);
+}
+
+void GraphBuilderAdapter::OnMapEnd() {
+ void *pMap = m_containers.top().pContainer;
+ m_pKeyNode = m_containers.top().pPrevKeyNode;
+ m_containers.pop();
+ DispositionNode(pMap);
+}
+
+void *GraphBuilderAdapter::GetCurrentParent() const {
+ if (m_containers.empty()) {
+ return NULL;
+ }
+ return m_containers.top().pContainer;
+}
+
+void GraphBuilderAdapter::RegisterAnchor(anchor_t anchor, void *pNode) {
+ if (anchor) {
+ m_anchors.Register(anchor, pNode);
+ }
+}
+
+void GraphBuilderAdapter::DispositionNode(void *pNode) {
+ if (m_containers.empty()) {
+ m_pRootNode = pNode;
+ return;
+ }
+
+ void *pContainer = m_containers.top().pContainer;
+ if (m_containers.top().isMap()) {
+ if (m_pKeyNode) {
+ m_builder.AssignInMap(pContainer, m_pKeyNode, pNode);
+ m_pKeyNode = NULL;
+ } else {
+ m_pKeyNode = pNode;
+ }
+ } else {
+ m_builder.AppendToSequence(pContainer, pNode);
+ }
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/contrib/graphbuilderadapter.h b/contrib/libs/yaml-cpp/src/contrib/graphbuilderadapter.h
index 1665d986ca..0d1e579208 100644
--- a/contrib/libs/yaml-cpp/src/contrib/graphbuilderadapter.h
+++ b/contrib/libs/yaml-cpp/src/contrib/graphbuilderadapter.h
@@ -1,79 +1,79 @@
-#ifndef GRAPHBUILDERADAPTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define GRAPHBUILDERADAPTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <cstdlib>
-#include <map>
-#include <stack>
-
-#include "yaml-cpp/anchor.h"
-#include "yaml-cpp/contrib/anchordict.h"
-#include "yaml-cpp/contrib/graphbuilder.h"
-#include "yaml-cpp/emitterstyle.h"
-#include "yaml-cpp/eventhandler.h"
-
-namespace YAML {
-class GraphBuilderInterface;
-struct Mark;
-} // namespace YAML
-
-namespace YAML {
-class GraphBuilderAdapter : public EventHandler {
- public:
- GraphBuilderAdapter(GraphBuilderInterface& builder)
- : m_builder(builder), m_pRootNode(NULL), m_pKeyNode(NULL) {}
-
- virtual void OnDocumentStart(const Mark& mark) { (void)mark; }
- virtual void OnDocumentEnd() {}
-
- virtual void OnNull(const Mark& mark, anchor_t anchor);
- virtual void OnAlias(const Mark& mark, anchor_t anchor);
- virtual void OnScalar(const Mark& mark, const std::string& tag,
- anchor_t anchor, const std::string& value);
-
- virtual void OnSequenceStart(const Mark& mark, const std::string& tag,
- anchor_t anchor, EmitterStyle::value style);
- virtual void OnSequenceEnd();
-
- virtual void OnMapStart(const Mark& mark, const std::string& tag,
- anchor_t anchor, EmitterStyle::value style);
- virtual void OnMapEnd();
-
- void* RootNode() const { return m_pRootNode; }
-
- private:
- struct ContainerFrame {
- ContainerFrame(void* pSequence)
- : pContainer(pSequence), pPrevKeyNode(&sequenceMarker) {}
- ContainerFrame(void* pMap, void* pPrevKeyNode)
- : pContainer(pMap), pPrevKeyNode(pPrevKeyNode) {}
-
- void* pContainer;
- void* pPrevKeyNode;
-
- bool isMap() const { return pPrevKeyNode != &sequenceMarker; }
-
- private:
- static int sequenceMarker;
- };
- typedef std::stack<ContainerFrame> ContainerStack;
- typedef AnchorDict<void*> AnchorMap;
-
- GraphBuilderInterface& m_builder;
- ContainerStack m_containers;
- AnchorMap m_anchors;
- void* m_pRootNode;
- void* m_pKeyNode;
-
- void* GetCurrentParent() const;
- void RegisterAnchor(anchor_t anchor, void* pNode);
- void DispositionNode(void* pNode);
-};
-}
-
-#endif // GRAPHBUILDERADAPTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef GRAPHBUILDERADAPTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define GRAPHBUILDERADAPTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <cstdlib>
+#include <map>
+#include <stack>
+
+#include "yaml-cpp/anchor.h"
+#include "yaml-cpp/contrib/anchordict.h"
+#include "yaml-cpp/contrib/graphbuilder.h"
+#include "yaml-cpp/emitterstyle.h"
+#include "yaml-cpp/eventhandler.h"
+
+namespace YAML {
+class GraphBuilderInterface;
+struct Mark;
+} // namespace YAML
+
+namespace YAML {
+class GraphBuilderAdapter : public EventHandler {
+ public:
+ GraphBuilderAdapter(GraphBuilderInterface& builder)
+ : m_builder(builder), m_pRootNode(NULL), m_pKeyNode(NULL) {}
+
+ virtual void OnDocumentStart(const Mark& mark) { (void)mark; }
+ virtual void OnDocumentEnd() {}
+
+ virtual void OnNull(const Mark& mark, anchor_t anchor);
+ virtual void OnAlias(const Mark& mark, anchor_t anchor);
+ virtual void OnScalar(const Mark& mark, const std::string& tag,
+ anchor_t anchor, const std::string& value);
+
+ virtual void OnSequenceStart(const Mark& mark, const std::string& tag,
+ anchor_t anchor, EmitterStyle::value style);
+ virtual void OnSequenceEnd();
+
+ virtual void OnMapStart(const Mark& mark, const std::string& tag,
+ anchor_t anchor, EmitterStyle::value style);
+ virtual void OnMapEnd();
+
+ void* RootNode() const { return m_pRootNode; }
+
+ private:
+ struct ContainerFrame {
+ ContainerFrame(void* pSequence)
+ : pContainer(pSequence), pPrevKeyNode(&sequenceMarker) {}
+ ContainerFrame(void* pMap, void* pPrevKeyNode)
+ : pContainer(pMap), pPrevKeyNode(pPrevKeyNode) {}
+
+ void* pContainer;
+ void* pPrevKeyNode;
+
+ bool isMap() const { return pPrevKeyNode != &sequenceMarker; }
+
+ private:
+ static int sequenceMarker;
+ };
+ typedef std::stack<ContainerFrame> ContainerStack;
+ typedef AnchorDict<void*> AnchorMap;
+
+ GraphBuilderInterface& m_builder;
+ ContainerStack m_containers;
+ AnchorMap m_anchors;
+ void* m_pRootNode;
+ void* m_pKeyNode;
+
+ void* GetCurrentParent() const;
+ void RegisterAnchor(anchor_t anchor, void* pNode);
+ void DispositionNode(void* pNode);
+};
+}
+
+#endif // GRAPHBUILDERADAPTER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/convert.cpp b/contrib/libs/yaml-cpp/src/convert.cpp
index b7e9d7f7f4..ec05b77826 100644
--- a/contrib/libs/yaml-cpp/src/convert.cpp
+++ b/contrib/libs/yaml-cpp/src/convert.cpp
@@ -1,75 +1,75 @@
-#include <algorithm>
-
-#include "yaml-cpp/node/convert.h"
-
-namespace {
-// we're not gonna mess with the mess that is all the isupper/etc. functions
-bool IsLower(char ch) { return 'a' <= ch && ch <= 'z'; }
-bool IsUpper(char ch) { return 'A' <= ch && ch <= 'Z'; }
-char ToLower(char ch) { return IsUpper(ch) ? ch + 'a' - 'A' : ch; }
-
-std::string tolower(const std::string& str) {
- std::string s(str);
- std::transform(s.begin(), s.end(), s.begin(), ToLower);
- return s;
-}
-
-template <typename T>
-bool IsEntirely(const std::string& str, T func) {
- for (std::size_t i = 0; i < str.size(); i++)
- if (!func(str[i]))
- return false;
-
- return true;
-}
-
-// IsFlexibleCase
-// . Returns true if 'str' is:
-// . UPPERCASE
-// . lowercase
-// . Capitalized
-bool IsFlexibleCase(const std::string& str) {
- if (str.empty())
- return true;
-
- if (IsEntirely(str, IsLower))
- return true;
-
- bool firstcaps = IsUpper(str[0]);
- std::string rest = str.substr(1);
- return firstcaps && (IsEntirely(rest, IsLower) || IsEntirely(rest, IsUpper));
-}
-}
-
-namespace YAML {
-bool convert<bool>::decode(const Node& node, bool& rhs) {
- if (!node.IsScalar())
- return false;
-
- // we can't use iostream bool extraction operators as they don't
- // recognize all possible values in the table below (taken from
- // http://yaml.org/type/bool.html)
- static const struct {
- std::string truename, falsename;
- } names[] = {
- {"y", "n"}, {"yes", "no"}, {"true", "false"}, {"on", "off"},
- };
-
- if (!IsFlexibleCase(node.Scalar()))
- return false;
-
- for (unsigned i = 0; i < sizeof(names) / sizeof(names[0]); i++) {
- if (names[i].truename == tolower(node.Scalar())) {
- rhs = true;
- return true;
- }
-
- if (names[i].falsename == tolower(node.Scalar())) {
- rhs = false;
- return true;
- }
- }
-
- return false;
-}
-}
+#include <algorithm>
+
+#include "yaml-cpp/node/convert.h"
+
+namespace {
+// we're not gonna mess with the mess that is all the isupper/etc. functions
+bool IsLower(char ch) { return 'a' <= ch && ch <= 'z'; }
+bool IsUpper(char ch) { return 'A' <= ch && ch <= 'Z'; }
+char ToLower(char ch) { return IsUpper(ch) ? ch + 'a' - 'A' : ch; }
+
+std::string tolower(const std::string& str) {
+ std::string s(str);
+ std::transform(s.begin(), s.end(), s.begin(), ToLower);
+ return s;
+}
+
+template <typename T>
+bool IsEntirely(const std::string& str, T func) {
+ for (std::size_t i = 0; i < str.size(); i++)
+ if (!func(str[i]))
+ return false;
+
+ return true;
+}
+
+// IsFlexibleCase
+// . Returns true if 'str' is:
+// . UPPERCASE
+// . lowercase
+// . Capitalized
+bool IsFlexibleCase(const std::string& str) {
+ if (str.empty())
+ return true;
+
+ if (IsEntirely(str, IsLower))
+ return true;
+
+ bool firstcaps = IsUpper(str[0]);
+ std::string rest = str.substr(1);
+ return firstcaps && (IsEntirely(rest, IsLower) || IsEntirely(rest, IsUpper));
+}
+}
+
+namespace YAML {
+bool convert<bool>::decode(const Node& node, bool& rhs) {
+ if (!node.IsScalar())
+ return false;
+
+ // we can't use iostream bool extraction operators as they don't
+ // recognize all possible values in the table below (taken from
+ // http://yaml.org/type/bool.html)
+ static const struct {
+ std::string truename, falsename;
+ } names[] = {
+ {"y", "n"}, {"yes", "no"}, {"true", "false"}, {"on", "off"},
+ };
+
+ if (!IsFlexibleCase(node.Scalar()))
+ return false;
+
+ for (unsigned i = 0; i < sizeof(names) / sizeof(names[0]); i++) {
+ if (names[i].truename == tolower(node.Scalar())) {
+ rhs = true;
+ return true;
+ }
+
+ if (names[i].falsename == tolower(node.Scalar())) {
+ rhs = false;
+ return true;
+ }
+ }
+
+ return false;
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/directives.cpp b/contrib/libs/yaml-cpp/src/directives.cpp
index db778f2fff..963bd2cd37 100644
--- a/contrib/libs/yaml-cpp/src/directives.cpp
+++ b/contrib/libs/yaml-cpp/src/directives.cpp
@@ -1,22 +1,22 @@
-#include "directives.h"
-
-namespace YAML {
-Directives::Directives() {
- // version
- version.isDefault = true;
- version.major = 1;
- version.minor = 2;
-}
-
-const std::string Directives::TranslateTagHandle(
- const std::string& handle) const {
- std::map<std::string, std::string>::const_iterator it = tags.find(handle);
- if (it == tags.end()) {
- if (handle == "!!")
- return "tag:yaml.org,2002:";
- return handle;
- }
-
- return it->second;
-}
-}
+#include "directives.h"
+
+namespace YAML {
+Directives::Directives() {
+ // version
+ version.isDefault = true;
+ version.major = 1;
+ version.minor = 2;
+}
+
+const std::string Directives::TranslateTagHandle(
+ const std::string& handle) const {
+ std::map<std::string, std::string>::const_iterator it = tags.find(handle);
+ if (it == tags.end()) {
+ if (handle == "!!")
+ return "tag:yaml.org,2002:";
+ return handle;
+ }
+
+ return it->second;
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/directives.h b/contrib/libs/yaml-cpp/src/directives.h
index 40523d93c3..333af26e37 100644
--- a/contrib/libs/yaml-cpp/src/directives.h
+++ b/contrib/libs/yaml-cpp/src/directives.h
@@ -1,29 +1,29 @@
-#ifndef DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <string>
-#include <map>
-
-namespace YAML {
-struct Version {
- bool isDefault;
- int major, minor;
-};
-
-struct Directives {
- Directives();
-
- const std::string TranslateTagHandle(const std::string& handle) const;
-
- Version version;
- std::map<std::string, std::string> tags;
-};
-}
-
-#endif // DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+#include <map>
+
+namespace YAML {
+struct Version {
+ bool isDefault;
+ int major, minor;
+};
+
+struct Directives {
+ Directives();
+
+ const std::string TranslateTagHandle(const std::string& handle) const;
+
+ Version version;
+ std::map<std::string, std::string> tags;
+};
+}
+
+#endif // DIRECTIVES_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/emit.cpp b/contrib/libs/yaml-cpp/src/emit.cpp
index 01a41e11b7..51bc791533 100644
--- a/contrib/libs/yaml-cpp/src/emit.cpp
+++ b/contrib/libs/yaml-cpp/src/emit.cpp
@@ -1,25 +1,25 @@
-#include "yaml-cpp/node/emit.h"
-#include "yaml-cpp/emitfromevents.h"
-#include "yaml-cpp/emitter.h"
-#include "nodeevents.h"
-
-namespace YAML {
-Emitter& operator<<(Emitter& out, const Node& node) {
- EmitFromEvents emitFromEvents(out);
- NodeEvents events(node);
- events.Emit(emitFromEvents);
- return out;
-}
-
-std::ostream& operator<<(std::ostream& out, const Node& node) {
- Emitter emitter(out);
- emitter << node;
- return out;
-}
-
-std::string Dump(const Node& node) {
- Emitter emitter;
- emitter << node;
- return emitter.c_str();
-}
+#include "yaml-cpp/node/emit.h"
+#include "yaml-cpp/emitfromevents.h"
+#include "yaml-cpp/emitter.h"
+#include "nodeevents.h"
+
+namespace YAML {
+Emitter& operator<<(Emitter& out, const Node& node) {
+ EmitFromEvents emitFromEvents(out);
+ NodeEvents events(node);
+ events.Emit(emitFromEvents);
+ return out;
+}
+
+std::ostream& operator<<(std::ostream& out, const Node& node) {
+ Emitter emitter(out);
+ emitter << node;
+ return out;
+}
+
+std::string Dump(const Node& node) {
+ Emitter emitter;
+ emitter << node;
+ return emitter.c_str();
+}
} // namespace YAML
diff --git a/contrib/libs/yaml-cpp/src/emitfromevents.cpp b/contrib/libs/yaml-cpp/src/emitfromevents.cpp
index edf7f53846..4832649f3c 100644
--- a/contrib/libs/yaml-cpp/src/emitfromevents.cpp
+++ b/contrib/libs/yaml-cpp/src/emitfromevents.cpp
@@ -1,119 +1,119 @@
-#include <cassert>
-#include <sstream>
-
-#include "yaml-cpp/emitfromevents.h"
-#include "yaml-cpp/emitter.h"
-#include "yaml-cpp/emittermanip.h"
-#include "yaml-cpp/null.h"
-
-namespace YAML {
-struct Mark;
-} // namespace YAML
-
-namespace {
-std::string ToString(YAML::anchor_t anchor) {
- std::stringstream stream;
- stream << anchor;
- return stream.str();
-}
-}
-
-namespace YAML {
-EmitFromEvents::EmitFromEvents(Emitter& emitter) : m_emitter(emitter) {}
-
-void EmitFromEvents::OnDocumentStart(const Mark&) {}
-
-void EmitFromEvents::OnDocumentEnd() {}
-
-void EmitFromEvents::OnNull(const Mark&, anchor_t anchor) {
- BeginNode();
- EmitProps("", anchor);
- m_emitter << Null;
-}
-
-void EmitFromEvents::OnAlias(const Mark&, anchor_t anchor) {
- BeginNode();
- m_emitter << Alias(ToString(anchor));
-}
-
-void EmitFromEvents::OnScalar(const Mark&, const std::string& tag,
- anchor_t anchor, const std::string& value) {
- BeginNode();
- EmitProps(tag, anchor);
- m_emitter << value;
-}
-
-void EmitFromEvents::OnSequenceStart(const Mark&, const std::string& tag,
- anchor_t anchor,
- EmitterStyle::value style) {
- BeginNode();
- EmitProps(tag, anchor);
- switch (style) {
- case EmitterStyle::Block:
- m_emitter << Block;
- break;
- case EmitterStyle::Flow:
- m_emitter << Flow;
- break;
- default:
- break;
- }
- m_emitter << BeginSeq;
- m_stateStack.push(State::WaitingForSequenceEntry);
-}
-
-void EmitFromEvents::OnSequenceEnd() {
- m_emitter << EndSeq;
- assert(m_stateStack.top() == State::WaitingForSequenceEntry);
- m_stateStack.pop();
-}
-
-void EmitFromEvents::OnMapStart(const Mark&, const std::string& tag,
- anchor_t anchor, EmitterStyle::value style) {
- BeginNode();
- EmitProps(tag, anchor);
- switch (style) {
- case EmitterStyle::Block:
- m_emitter << Block;
- break;
- case EmitterStyle::Flow:
- m_emitter << Flow;
- break;
- default:
- break;
- }
- m_emitter << BeginMap;
- m_stateStack.push(State::WaitingForKey);
-}
-
-void EmitFromEvents::OnMapEnd() {
- m_emitter << EndMap;
- assert(m_stateStack.top() == State::WaitingForKey);
- m_stateStack.pop();
-}
-
-void EmitFromEvents::BeginNode() {
- if (m_stateStack.empty())
- return;
-
- switch (m_stateStack.top()) {
- case State::WaitingForKey:
- m_emitter << Key;
- m_stateStack.top() = State::WaitingForValue;
- break;
- case State::WaitingForValue:
- m_emitter << Value;
- m_stateStack.top() = State::WaitingForKey;
- break;
- default:
- break;
- }
-}
-
-void EmitFromEvents::EmitProps(const std::string& tag, anchor_t anchor) {
+#include <cassert>
+#include <sstream>
+
+#include "yaml-cpp/emitfromevents.h"
+#include "yaml-cpp/emitter.h"
+#include "yaml-cpp/emittermanip.h"
+#include "yaml-cpp/null.h"
+
+namespace YAML {
+struct Mark;
+} // namespace YAML
+
+namespace {
+std::string ToString(YAML::anchor_t anchor) {
+ std::stringstream stream;
+ stream << anchor;
+ return stream.str();
+}
+}
+
+namespace YAML {
+EmitFromEvents::EmitFromEvents(Emitter& emitter) : m_emitter(emitter) {}
+
+void EmitFromEvents::OnDocumentStart(const Mark&) {}
+
+void EmitFromEvents::OnDocumentEnd() {}
+
+void EmitFromEvents::OnNull(const Mark&, anchor_t anchor) {
+ BeginNode();
+ EmitProps("", anchor);
+ m_emitter << Null;
+}
+
+void EmitFromEvents::OnAlias(const Mark&, anchor_t anchor) {
+ BeginNode();
+ m_emitter << Alias(ToString(anchor));
+}
+
+void EmitFromEvents::OnScalar(const Mark&, const std::string& tag,
+ anchor_t anchor, const std::string& value) {
+ BeginNode();
+ EmitProps(tag, anchor);
+ m_emitter << value;
+}
+
+void EmitFromEvents::OnSequenceStart(const Mark&, const std::string& tag,
+ anchor_t anchor,
+ EmitterStyle::value style) {
+ BeginNode();
+ EmitProps(tag, anchor);
+ switch (style) {
+ case EmitterStyle::Block:
+ m_emitter << Block;
+ break;
+ case EmitterStyle::Flow:
+ m_emitter << Flow;
+ break;
+ default:
+ break;
+ }
+ m_emitter << BeginSeq;
+ m_stateStack.push(State::WaitingForSequenceEntry);
+}
+
+void EmitFromEvents::OnSequenceEnd() {
+ m_emitter << EndSeq;
+ assert(m_stateStack.top() == State::WaitingForSequenceEntry);
+ m_stateStack.pop();
+}
+
+void EmitFromEvents::OnMapStart(const Mark&, const std::string& tag,
+ anchor_t anchor, EmitterStyle::value style) {
+ BeginNode();
+ EmitProps(tag, anchor);
+ switch (style) {
+ case EmitterStyle::Block:
+ m_emitter << Block;
+ break;
+ case EmitterStyle::Flow:
+ m_emitter << Flow;
+ break;
+ default:
+ break;
+ }
+ m_emitter << BeginMap;
+ m_stateStack.push(State::WaitingForKey);
+}
+
+void EmitFromEvents::OnMapEnd() {
+ m_emitter << EndMap;
+ assert(m_stateStack.top() == State::WaitingForKey);
+ m_stateStack.pop();
+}
+
+void EmitFromEvents::BeginNode() {
+ if (m_stateStack.empty())
+ return;
+
+ switch (m_stateStack.top()) {
+ case State::WaitingForKey:
+ m_emitter << Key;
+ m_stateStack.top() = State::WaitingForValue;
+ break;
+ case State::WaitingForValue:
+ m_emitter << Value;
+ m_stateStack.top() = State::WaitingForKey;
+ break;
+ default:
+ break;
+ }
+}
+
+void EmitFromEvents::EmitProps(const std::string& tag, anchor_t anchor) {
if (!tag.empty() && tag != "?" && tag != "!")
- m_emitter << VerbatimTag(tag);
- if (anchor)
- m_emitter << Anchor(ToString(anchor));
-}
-}
+ m_emitter << VerbatimTag(tag);
+ if (anchor)
+ m_emitter << Anchor(ToString(anchor));
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/emitter.cpp b/contrib/libs/yaml-cpp/src/emitter.cpp
index 76ab6194d5..ebeb059554 100644
--- a/contrib/libs/yaml-cpp/src/emitter.cpp
+++ b/contrib/libs/yaml-cpp/src/emitter.cpp
@@ -1,911 +1,911 @@
-#include <sstream>
-
-#include "emitterutils.h"
-#include "indentation.h" // IWYU pragma: keep
-#include "yaml-cpp/emitter.h"
-#include "yaml-cpp/emitterdef.h"
-#include "yaml-cpp/emittermanip.h"
-#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
-
-namespace YAML {
-class Binary;
-struct _Null;
-
-Emitter::Emitter() : m_pState(new EmitterState) {}
-
-Emitter::Emitter(std::ostream& stream)
- : m_pState(new EmitterState), m_stream(stream) {}
-
-Emitter::~Emitter() {}
-
-const char* Emitter::c_str() const { return m_stream.str(); }
-
-std::size_t Emitter::size() const { return m_stream.pos(); }
-
-// state checking
-bool Emitter::good() const { return m_pState->good(); }
-
-const std::string Emitter::GetLastError() const {
- return m_pState->GetLastError();
-}
-
-// global setters
-bool Emitter::SetOutputCharset(EMITTER_MANIP value) {
- return m_pState->SetOutputCharset(value, FmtScope::Global);
-}
-
-bool Emitter::SetStringFormat(EMITTER_MANIP value) {
- return m_pState->SetStringFormat(value, FmtScope::Global);
-}
-
-bool Emitter::SetBoolFormat(EMITTER_MANIP value) {
- bool ok = false;
- if (m_pState->SetBoolFormat(value, FmtScope::Global))
- ok = true;
- if (m_pState->SetBoolCaseFormat(value, FmtScope::Global))
- ok = true;
- if (m_pState->SetBoolLengthFormat(value, FmtScope::Global))
- ok = true;
- return ok;
-}
-
-bool Emitter::SetIntBase(EMITTER_MANIP value) {
- return m_pState->SetIntFormat(value, FmtScope::Global);
-}
-
-bool Emitter::SetSeqFormat(EMITTER_MANIP value) {
- return m_pState->SetFlowType(GroupType::Seq, value, FmtScope::Global);
-}
-
-bool Emitter::SetMapFormat(EMITTER_MANIP value) {
- bool ok = false;
- if (m_pState->SetFlowType(GroupType::Map, value, FmtScope::Global))
- ok = true;
- if (m_pState->SetMapKeyFormat(value, FmtScope::Global))
- ok = true;
- return ok;
-}
-
-bool Emitter::SetIndent(std::size_t n) {
- return m_pState->SetIndent(n, FmtScope::Global);
-}
-
-bool Emitter::SetPreCommentIndent(std::size_t n) {
- return m_pState->SetPreCommentIndent(n, FmtScope::Global);
-}
-
-bool Emitter::SetPostCommentIndent(std::size_t n) {
- return m_pState->SetPostCommentIndent(n, FmtScope::Global);
-}
-
-bool Emitter::SetFloatPrecision(std::size_t n) {
- return m_pState->SetFloatPrecision(n, FmtScope::Global);
-}
-
-bool Emitter::SetDoublePrecision(std::size_t n) {
- return m_pState->SetDoublePrecision(n, FmtScope::Global);
-}
-
-// SetLocalValue
-// . Either start/end a group, or set a modifier locally
-Emitter& Emitter::SetLocalValue(EMITTER_MANIP value) {
- if (!good())
- return *this;
-
- switch (value) {
- case BeginDoc:
- EmitBeginDoc();
- break;
- case EndDoc:
- EmitEndDoc();
- break;
- case BeginSeq:
- EmitBeginSeq();
- break;
- case EndSeq:
- EmitEndSeq();
- break;
- case BeginMap:
- EmitBeginMap();
- break;
- case EndMap:
- EmitEndMap();
- break;
- case Key:
- case Value:
- // deprecated (these can be deduced by the parity of nodes in a map)
- break;
- case TagByKind:
- EmitKindTag();
- break;
- case Newline:
- EmitNewline();
- break;
- default:
- m_pState->SetLocalValue(value);
- break;
- }
- return *this;
-}
-
-Emitter& Emitter::SetLocalIndent(const _Indent& indent) {
- m_pState->SetIndent(indent.value, FmtScope::Local);
- return *this;
-}
-
-Emitter& Emitter::SetLocalPrecision(const _Precision& precision) {
- if (precision.floatPrecision >= 0)
- m_pState->SetFloatPrecision(precision.floatPrecision, FmtScope::Local);
- if (precision.doublePrecision >= 0)
- m_pState->SetDoublePrecision(precision.doublePrecision, FmtScope::Local);
- return *this;
-}
-
-// EmitBeginDoc
-void Emitter::EmitBeginDoc() {
- if (!good())
- return;
-
- if (m_pState->CurGroupType() != GroupType::NoType) {
- m_pState->SetError("Unexpected begin document");
- return;
- }
-
- if (m_pState->HasAnchor() || m_pState->HasTag()) {
- m_pState->SetError("Unexpected begin document");
- return;
- }
-
- if (m_stream.col() > 0)
- m_stream << "\n";
- m_stream << "---\n";
-
- m_pState->StartedDoc();
-}
-
-// EmitEndDoc
-void Emitter::EmitEndDoc() {
- if (!good())
- return;
-
- if (m_pState->CurGroupType() != GroupType::NoType) {
- m_pState->SetError("Unexpected begin document");
- return;
- }
-
- if (m_pState->HasAnchor() || m_pState->HasTag()) {
- m_pState->SetError("Unexpected begin document");
- return;
- }
-
- if (m_stream.col() > 0)
- m_stream << "\n";
- m_stream << "...\n";
-}
-
-// EmitBeginSeq
-void Emitter::EmitBeginSeq() {
- if (!good())
- return;
-
- PrepareNode(m_pState->NextGroupType(GroupType::Seq));
-
- m_pState->StartedGroup(GroupType::Seq);
-}
-
-// EmitEndSeq
-void Emitter::EmitEndSeq() {
- if (!good())
- return;
-
- if (m_pState->CurGroupChildCount() == 0)
- m_pState->ForceFlow();
-
- if (m_pState->CurGroupFlowType() == FlowType::Flow) {
- if (m_stream.comment())
- m_stream << "\n";
- m_stream << IndentTo(m_pState->CurIndent());
- if (m_pState->CurGroupChildCount() == 0)
- m_stream << "[";
- m_stream << "]";
- }
-
- m_pState->EndedGroup(GroupType::Seq);
-}
-
-// EmitBeginMap
-void Emitter::EmitBeginMap() {
- if (!good())
- return;
-
- PrepareNode(m_pState->NextGroupType(GroupType::Map));
-
- m_pState->StartedGroup(GroupType::Map);
-}
-
-// EmitEndMap
-void Emitter::EmitEndMap() {
- if (!good())
- return;
-
- if (m_pState->CurGroupChildCount() == 0)
- m_pState->ForceFlow();
-
- if (m_pState->CurGroupFlowType() == FlowType::Flow) {
- if (m_stream.comment())
- m_stream << "\n";
- m_stream << IndentTo(m_pState->CurIndent());
- if (m_pState->CurGroupChildCount() == 0)
- m_stream << "{";
- m_stream << "}";
- }
-
- m_pState->EndedGroup(GroupType::Map);
-}
-
-// EmitNewline
-void Emitter::EmitNewline() {
- if (!good())
- return;
-
- PrepareNode(EmitterNodeType::NoType);
- m_stream << "\n";
- m_pState->SetNonContent();
-}
-
-bool Emitter::CanEmitNewline() const { return true; }
-
-// Put the stream in a state so we can simply write the next node
-// E.g., if we're in a sequence, write the "- "
-void Emitter::PrepareNode(EmitterNodeType::value child) {
- switch (m_pState->CurGroupNodeType()) {
- case EmitterNodeType::NoType:
- PrepareTopNode(child);
- break;
- case EmitterNodeType::FlowSeq:
- FlowSeqPrepareNode(child);
- break;
- case EmitterNodeType::BlockSeq:
- BlockSeqPrepareNode(child);
- break;
- case EmitterNodeType::FlowMap:
- FlowMapPrepareNode(child);
- break;
- case EmitterNodeType::BlockMap:
- BlockMapPrepareNode(child);
- break;
- case EmitterNodeType::Property:
- case EmitterNodeType::Scalar:
- assert(false);
- break;
- }
-}
-
-void Emitter::PrepareTopNode(EmitterNodeType::value child) {
- if (child == EmitterNodeType::NoType)
- return;
-
- if (m_pState->CurGroupChildCount() > 0 && m_stream.col() > 0) {
- if (child != EmitterNodeType::NoType)
- EmitBeginDoc();
- }
-
- switch (child) {
- case EmitterNodeType::NoType:
- break;
- case EmitterNodeType::Property:
- case EmitterNodeType::Scalar:
- case EmitterNodeType::FlowSeq:
- case EmitterNodeType::FlowMap:
- // TODO: if we were writing null, and
- // we wanted it blank, we wouldn't want a space
- SpaceOrIndentTo(m_pState->HasBegunContent(), 0);
- break;
- case EmitterNodeType::BlockSeq:
- case EmitterNodeType::BlockMap:
- if (m_pState->HasBegunNode())
- m_stream << "\n";
- break;
- }
-}
-
-void Emitter::FlowSeqPrepareNode(EmitterNodeType::value child) {
- const std::size_t lastIndent = m_pState->LastIndent();
-
- if (!m_pState->HasBegunNode()) {
- if (m_stream.comment())
- m_stream << "\n";
- m_stream << IndentTo(lastIndent);
- if (m_pState->CurGroupChildCount() == 0)
- m_stream << "[";
- else
- m_stream << ",";
- }
-
- switch (child) {
- case EmitterNodeType::NoType:
- break;
- case EmitterNodeType::Property:
- case EmitterNodeType::Scalar:
- case EmitterNodeType::FlowSeq:
- case EmitterNodeType::FlowMap:
- SpaceOrIndentTo(
- m_pState->HasBegunContent() || m_pState->CurGroupChildCount() > 0,
- lastIndent);
- break;
- case EmitterNodeType::BlockSeq:
- case EmitterNodeType::BlockMap:
- assert(false);
- break;
- }
-}
-
-void Emitter::BlockSeqPrepareNode(EmitterNodeType::value child) {
- const std::size_t curIndent = m_pState->CurIndent();
- const std::size_t nextIndent = curIndent + m_pState->CurGroupIndent();
-
- if (child == EmitterNodeType::NoType)
- return;
-
- if (!m_pState->HasBegunContent()) {
- if (m_pState->CurGroupChildCount() > 0 || m_stream.comment()) {
- m_stream << "\n";
- }
- m_stream << IndentTo(curIndent);
- m_stream << "-";
- }
-
- switch (child) {
- case EmitterNodeType::NoType:
- break;
- case EmitterNodeType::Property:
- case EmitterNodeType::Scalar:
- case EmitterNodeType::FlowSeq:
- case EmitterNodeType::FlowMap:
- SpaceOrIndentTo(m_pState->HasBegunContent(), nextIndent);
- break;
- case EmitterNodeType::BlockSeq:
- m_stream << "\n";
- break;
- case EmitterNodeType::BlockMap:
- if (m_pState->HasBegunContent() || m_stream.comment())
- m_stream << "\n";
- break;
- }
-}
-
-void Emitter::FlowMapPrepareNode(EmitterNodeType::value child) {
- if (m_pState->CurGroupChildCount() % 2 == 0) {
- if (m_pState->GetMapKeyFormat() == LongKey)
- m_pState->SetLongKey();
-
- if (m_pState->CurGroupLongKey())
- FlowMapPrepareLongKey(child);
- else
- FlowMapPrepareSimpleKey(child);
- } else {
- if (m_pState->CurGroupLongKey())
- FlowMapPrepareLongKeyValue(child);
- else
- FlowMapPrepareSimpleKeyValue(child);
- }
-}
-
-void Emitter::FlowMapPrepareLongKey(EmitterNodeType::value child) {
- const std::size_t lastIndent = m_pState->LastIndent();
-
- if (!m_pState->HasBegunNode()) {
- if (m_stream.comment())
- m_stream << "\n";
- m_stream << IndentTo(lastIndent);
- if (m_pState->CurGroupChildCount() == 0)
- m_stream << "{ ?";
- else
- m_stream << ", ?";
- }
-
- switch (child) {
- case EmitterNodeType::NoType:
- break;
- case EmitterNodeType::Property:
- case EmitterNodeType::Scalar:
- case EmitterNodeType::FlowSeq:
- case EmitterNodeType::FlowMap:
- SpaceOrIndentTo(
- m_pState->HasBegunContent() || m_pState->CurGroupChildCount() > 0,
- lastIndent);
- break;
- case EmitterNodeType::BlockSeq:
- case EmitterNodeType::BlockMap:
- assert(false);
- break;
- }
-}
-
-void Emitter::FlowMapPrepareLongKeyValue(EmitterNodeType::value child) {
- const std::size_t lastIndent = m_pState->LastIndent();
-
- if (!m_pState->HasBegunNode()) {
- if (m_stream.comment())
- m_stream << "\n";
- m_stream << IndentTo(lastIndent);
- m_stream << ":";
- }
-
- switch (child) {
- case EmitterNodeType::NoType:
- break;
- case EmitterNodeType::Property:
- case EmitterNodeType::Scalar:
- case EmitterNodeType::FlowSeq:
- case EmitterNodeType::FlowMap:
- SpaceOrIndentTo(
- m_pState->HasBegunContent() || m_pState->CurGroupChildCount() > 0,
- lastIndent);
- break;
- case EmitterNodeType::BlockSeq:
- case EmitterNodeType::BlockMap:
- assert(false);
- break;
- }
-}
-
-void Emitter::FlowMapPrepareSimpleKey(EmitterNodeType::value child) {
- const std::size_t lastIndent = m_pState->LastIndent();
-
- if (!m_pState->HasBegunNode()) {
- if (m_stream.comment())
- m_stream << "\n";
- m_stream << IndentTo(lastIndent);
- if (m_pState->CurGroupChildCount() == 0)
- m_stream << "{";
- else
- m_stream << ",";
- }
-
- switch (child) {
- case EmitterNodeType::NoType:
- break;
- case EmitterNodeType::Property:
- case EmitterNodeType::Scalar:
- case EmitterNodeType::FlowSeq:
- case EmitterNodeType::FlowMap:
- SpaceOrIndentTo(
- m_pState->HasBegunContent() || m_pState->CurGroupChildCount() > 0,
- lastIndent);
- break;
- case EmitterNodeType::BlockSeq:
- case EmitterNodeType::BlockMap:
- assert(false);
- break;
- }
-}
-
-void Emitter::FlowMapPrepareSimpleKeyValue(EmitterNodeType::value child) {
- const std::size_t lastIndent = m_pState->LastIndent();
-
- if (!m_pState->HasBegunNode()) {
- if (m_stream.comment())
- m_stream << "\n";
- m_stream << IndentTo(lastIndent);
- m_stream << ":";
- }
-
- switch (child) {
- case EmitterNodeType::NoType:
- break;
- case EmitterNodeType::Property:
- case EmitterNodeType::Scalar:
- case EmitterNodeType::FlowSeq:
- case EmitterNodeType::FlowMap:
- SpaceOrIndentTo(
- m_pState->HasBegunContent() || m_pState->CurGroupChildCount() > 0,
- lastIndent);
- break;
- case EmitterNodeType::BlockSeq:
- case EmitterNodeType::BlockMap:
- assert(false);
- break;
- }
-}
-
-void Emitter::BlockMapPrepareNode(EmitterNodeType::value child) {
- if (m_pState->CurGroupChildCount() % 2 == 0) {
- if (m_pState->GetMapKeyFormat() == LongKey)
- m_pState->SetLongKey();
- if (child == EmitterNodeType::BlockSeq ||
- child == EmitterNodeType::BlockMap)
- m_pState->SetLongKey();
-
- if (m_pState->CurGroupLongKey())
- BlockMapPrepareLongKey(child);
- else
- BlockMapPrepareSimpleKey(child);
- } else {
- if (m_pState->CurGroupLongKey())
- BlockMapPrepareLongKeyValue(child);
- else
- BlockMapPrepareSimpleKeyValue(child);
- }
-}
-
-void Emitter::BlockMapPrepareLongKey(EmitterNodeType::value child) {
- const std::size_t curIndent = m_pState->CurIndent();
- const std::size_t childCount = m_pState->CurGroupChildCount();
-
- if (child == EmitterNodeType::NoType)
- return;
-
- if (!m_pState->HasBegunContent()) {
- if (childCount > 0) {
- m_stream << "\n";
- }
- if (m_stream.comment()) {
- m_stream << "\n";
- }
- m_stream << IndentTo(curIndent);
- m_stream << "?";
- }
-
- switch (child) {
- case EmitterNodeType::NoType:
- break;
- case EmitterNodeType::Property:
- case EmitterNodeType::Scalar:
- case EmitterNodeType::FlowSeq:
- case EmitterNodeType::FlowMap:
- SpaceOrIndentTo(true, curIndent + 1);
- break;
- case EmitterNodeType::BlockSeq:
- case EmitterNodeType::BlockMap:
- break;
- }
-}
-
-void Emitter::BlockMapPrepareLongKeyValue(EmitterNodeType::value child) {
- const std::size_t curIndent = m_pState->CurIndent();
-
- if (child == EmitterNodeType::NoType)
- return;
-
- if (!m_pState->HasBegunContent()) {
- m_stream << "\n";
- m_stream << IndentTo(curIndent);
- m_stream << ":";
- }
-
- switch (child) {
- case EmitterNodeType::NoType:
- break;
- case EmitterNodeType::Property:
- case EmitterNodeType::Scalar:
- case EmitterNodeType::FlowSeq:
- case EmitterNodeType::FlowMap:
- case EmitterNodeType::BlockSeq:
- case EmitterNodeType::BlockMap:
- SpaceOrIndentTo(true, curIndent + 1);
- break;
- }
-}
-
-void Emitter::BlockMapPrepareSimpleKey(EmitterNodeType::value child) {
- const std::size_t curIndent = m_pState->CurIndent();
- const std::size_t childCount = m_pState->CurGroupChildCount();
-
- if (child == EmitterNodeType::NoType)
- return;
-
- if (!m_pState->HasBegunNode()) {
- if (childCount > 0) {
- m_stream << "\n";
- }
- }
-
- switch (child) {
- case EmitterNodeType::NoType:
- break;
- case EmitterNodeType::Property:
- case EmitterNodeType::Scalar:
- case EmitterNodeType::FlowSeq:
- case EmitterNodeType::FlowMap:
- SpaceOrIndentTo(m_pState->HasBegunContent(), curIndent);
- break;
- case EmitterNodeType::BlockSeq:
- case EmitterNodeType::BlockMap:
- break;
- }
-}
-
-void Emitter::BlockMapPrepareSimpleKeyValue(EmitterNodeType::value child) {
- const std::size_t curIndent = m_pState->CurIndent();
- const std::size_t nextIndent = curIndent + m_pState->CurGroupIndent();
-
- if (!m_pState->HasBegunNode()) {
- m_stream << ":";
- }
-
- switch (child) {
- case EmitterNodeType::NoType:
- break;
- case EmitterNodeType::Property:
- case EmitterNodeType::Scalar:
- case EmitterNodeType::FlowSeq:
- case EmitterNodeType::FlowMap:
- SpaceOrIndentTo(true, nextIndent);
- break;
- case EmitterNodeType::BlockSeq:
- case EmitterNodeType::BlockMap:
- m_stream << "\n";
- break;
- }
-}
-
-// SpaceOrIndentTo
-// . Prepares for some more content by proper spacing
-void Emitter::SpaceOrIndentTo(bool requireSpace, std::size_t indent) {
- if (m_stream.comment())
- m_stream << "\n";
- if (m_stream.col() > 0 && requireSpace)
- m_stream << " ";
- m_stream << IndentTo(indent);
-}
-
-void Emitter::PrepareIntegralStream(std::stringstream& stream) const {
-
- switch (m_pState->GetIntFormat()) {
- case Dec:
- stream << std::dec;
- break;
- case Hex:
- stream << "0x";
- stream << std::hex;
- break;
- case Oct:
- stream << "0";
- stream << std::oct;
- break;
- default:
- assert(false);
- }
-}
-
-void Emitter::StartedScalar() { m_pState->StartedScalar(); }
-
-// *******************************************************************************************
-// overloads of Write
-
-Emitter& Emitter::Write(const std::string& str) {
- if (!good())
- return *this;
-
- const bool escapeNonAscii = m_pState->GetOutputCharset() == EscapeNonAscii;
- const StringFormat::value strFormat =
- Utils::ComputeStringFormat(str, m_pState->GetStringFormat(),
- m_pState->CurGroupFlowType(), escapeNonAscii);
-
- if (strFormat == StringFormat::Literal)
- m_pState->SetMapKeyFormat(YAML::LongKey, FmtScope::Local);
-
- PrepareNode(EmitterNodeType::Scalar);
-
- switch (strFormat) {
- case StringFormat::Plain:
- m_stream << str;
- break;
- case StringFormat::SingleQuoted:
- Utils::WriteSingleQuotedString(m_stream, str);
- break;
- case StringFormat::DoubleQuoted:
- Utils::WriteDoubleQuotedString(m_stream, str, escapeNonAscii);
- break;
- case StringFormat::Literal:
- Utils::WriteLiteralString(m_stream, str,
- m_pState->CurIndent() + m_pState->GetIndent());
- break;
- }
-
- StartedScalar();
-
- return *this;
-}
-
-std::size_t Emitter::GetFloatPrecision() const {
- return m_pState->GetFloatPrecision();
-}
-
-std::size_t Emitter::GetDoublePrecision() const {
- return m_pState->GetDoublePrecision();
-}
-
-const char* Emitter::ComputeFullBoolName(bool b) const {
- const EMITTER_MANIP mainFmt = (m_pState->GetBoolLengthFormat() == ShortBool
- ? YesNoBool
- : m_pState->GetBoolFormat());
- const EMITTER_MANIP caseFmt = m_pState->GetBoolCaseFormat();
- switch (mainFmt) {
- case YesNoBool:
- switch (caseFmt) {
- case UpperCase:
- return b ? "YES" : "NO";
- case CamelCase:
- return b ? "Yes" : "No";
- case LowerCase:
- return b ? "yes" : "no";
- default:
- break;
- }
- break;
- case OnOffBool:
- switch (caseFmt) {
- case UpperCase:
- return b ? "ON" : "OFF";
- case CamelCase:
- return b ? "On" : "Off";
- case LowerCase:
- return b ? "on" : "off";
- default:
- break;
- }
- break;
- case TrueFalseBool:
- switch (caseFmt) {
- case UpperCase:
- return b ? "TRUE" : "FALSE";
- case CamelCase:
- return b ? "True" : "False";
- case LowerCase:
- return b ? "true" : "false";
- default:
- break;
- }
- break;
- default:
- break;
- }
- return b ? "y" : "n"; // should never get here, but it can't hurt to give
- // these answers
-}
-
-Emitter& Emitter::Write(bool b) {
- if (!good())
- return *this;
-
- PrepareNode(EmitterNodeType::Scalar);
-
- const char* name = ComputeFullBoolName(b);
- if (m_pState->GetBoolLengthFormat() == ShortBool)
- m_stream << name[0];
- else
- m_stream << name;
-
- StartedScalar();
-
- return *this;
-}
-
-Emitter& Emitter::Write(char ch) {
- if (!good())
- return *this;
-
- PrepareNode(EmitterNodeType::Scalar);
- Utils::WriteChar(m_stream, ch);
- StartedScalar();
-
- return *this;
-}
-
-Emitter& Emitter::Write(const _Alias& alias) {
- if (!good())
- return *this;
-
- if (m_pState->HasAnchor() || m_pState->HasTag()) {
- m_pState->SetError(ErrorMsg::INVALID_ALIAS);
- return *this;
- }
-
- PrepareNode(EmitterNodeType::Scalar);
-
- if (!Utils::WriteAlias(m_stream, alias.content)) {
- m_pState->SetError(ErrorMsg::INVALID_ALIAS);
- return *this;
- }
-
- StartedScalar();
-
- return *this;
-}
-
-Emitter& Emitter::Write(const _Anchor& anchor) {
- if (!good())
- return *this;
-
- if (m_pState->HasAnchor()) {
- m_pState->SetError(ErrorMsg::INVALID_ANCHOR);
- return *this;
- }
-
- PrepareNode(EmitterNodeType::Property);
-
- if (!Utils::WriteAnchor(m_stream, anchor.content)) {
- m_pState->SetError(ErrorMsg::INVALID_ANCHOR);
- return *this;
- }
-
- m_pState->SetAnchor();
-
- return *this;
-}
-
-Emitter& Emitter::Write(const _Tag& tag) {
- if (!good())
- return *this;
-
- if (m_pState->HasTag()) {
- m_pState->SetError(ErrorMsg::INVALID_TAG);
- return *this;
- }
-
- PrepareNode(EmitterNodeType::Property);
-
- bool success = false;
- if (tag.type == _Tag::Type::Verbatim)
- success = Utils::WriteTag(m_stream, tag.content, true);
- else if (tag.type == _Tag::Type::PrimaryHandle)
- success = Utils::WriteTag(m_stream, tag.content, false);
- else
- success = Utils::WriteTagWithPrefix(m_stream, tag.prefix, tag.content);
-
- if (!success) {
- m_pState->SetError(ErrorMsg::INVALID_TAG);
- return *this;
- }
-
- m_pState->SetTag();
-
- return *this;
-}
-
-void Emitter::EmitKindTag() { Write(LocalTag("")); }
-
-Emitter& Emitter::Write(const _Comment& comment) {
- if (!good())
- return *this;
-
- PrepareNode(EmitterNodeType::NoType);
-
- if (m_stream.col() > 0)
- m_stream << Indentation(m_pState->GetPreCommentIndent());
- Utils::WriteComment(m_stream, comment.content,
- m_pState->GetPostCommentIndent());
-
- m_pState->SetNonContent();
-
- return *this;
-}
-
-Emitter& Emitter::Write(const _Null& /*null*/) {
- if (!good())
- return *this;
-
- PrepareNode(EmitterNodeType::Scalar);
-
- m_stream << "~";
-
- StartedScalar();
-
- return *this;
-}
-
-Emitter& Emitter::Write(const Binary& binary) {
- Write(SecondaryTag("binary"));
-
- if (!good())
- return *this;
-
- PrepareNode(EmitterNodeType::Scalar);
- Utils::WriteBinary(m_stream, binary);
- StartedScalar();
-
- return *this;
-}
-}
+#include <sstream>
+
+#include "emitterutils.h"
+#include "indentation.h" // IWYU pragma: keep
+#include "yaml-cpp/emitter.h"
+#include "yaml-cpp/emitterdef.h"
+#include "yaml-cpp/emittermanip.h"
+#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
+
+namespace YAML {
+class Binary;
+struct _Null;
+
+Emitter::Emitter() : m_pState(new EmitterState) {}
+
+Emitter::Emitter(std::ostream& stream)
+ : m_pState(new EmitterState), m_stream(stream) {}
+
+Emitter::~Emitter() {}
+
+const char* Emitter::c_str() const { return m_stream.str(); }
+
+std::size_t Emitter::size() const { return m_stream.pos(); }
+
+// state checking
+bool Emitter::good() const { return m_pState->good(); }
+
+const std::string Emitter::GetLastError() const {
+ return m_pState->GetLastError();
+}
+
+// global setters
+bool Emitter::SetOutputCharset(EMITTER_MANIP value) {
+ return m_pState->SetOutputCharset(value, FmtScope::Global);
+}
+
+bool Emitter::SetStringFormat(EMITTER_MANIP value) {
+ return m_pState->SetStringFormat(value, FmtScope::Global);
+}
+
+bool Emitter::SetBoolFormat(EMITTER_MANIP value) {
+ bool ok = false;
+ if (m_pState->SetBoolFormat(value, FmtScope::Global))
+ ok = true;
+ if (m_pState->SetBoolCaseFormat(value, FmtScope::Global))
+ ok = true;
+ if (m_pState->SetBoolLengthFormat(value, FmtScope::Global))
+ ok = true;
+ return ok;
+}
+
+bool Emitter::SetIntBase(EMITTER_MANIP value) {
+ return m_pState->SetIntFormat(value, FmtScope::Global);
+}
+
+bool Emitter::SetSeqFormat(EMITTER_MANIP value) {
+ return m_pState->SetFlowType(GroupType::Seq, value, FmtScope::Global);
+}
+
+bool Emitter::SetMapFormat(EMITTER_MANIP value) {
+ bool ok = false;
+ if (m_pState->SetFlowType(GroupType::Map, value, FmtScope::Global))
+ ok = true;
+ if (m_pState->SetMapKeyFormat(value, FmtScope::Global))
+ ok = true;
+ return ok;
+}
+
+bool Emitter::SetIndent(std::size_t n) {
+ return m_pState->SetIndent(n, FmtScope::Global);
+}
+
+bool Emitter::SetPreCommentIndent(std::size_t n) {
+ return m_pState->SetPreCommentIndent(n, FmtScope::Global);
+}
+
+bool Emitter::SetPostCommentIndent(std::size_t n) {
+ return m_pState->SetPostCommentIndent(n, FmtScope::Global);
+}
+
+bool Emitter::SetFloatPrecision(std::size_t n) {
+ return m_pState->SetFloatPrecision(n, FmtScope::Global);
+}
+
+bool Emitter::SetDoublePrecision(std::size_t n) {
+ return m_pState->SetDoublePrecision(n, FmtScope::Global);
+}
+
+// SetLocalValue
+// . Either start/end a group, or set a modifier locally
+Emitter& Emitter::SetLocalValue(EMITTER_MANIP value) {
+ if (!good())
+ return *this;
+
+ switch (value) {
+ case BeginDoc:
+ EmitBeginDoc();
+ break;
+ case EndDoc:
+ EmitEndDoc();
+ break;
+ case BeginSeq:
+ EmitBeginSeq();
+ break;
+ case EndSeq:
+ EmitEndSeq();
+ break;
+ case BeginMap:
+ EmitBeginMap();
+ break;
+ case EndMap:
+ EmitEndMap();
+ break;
+ case Key:
+ case Value:
+ // deprecated (these can be deduced by the parity of nodes in a map)
+ break;
+ case TagByKind:
+ EmitKindTag();
+ break;
+ case Newline:
+ EmitNewline();
+ break;
+ default:
+ m_pState->SetLocalValue(value);
+ break;
+ }
+ return *this;
+}
+
+Emitter& Emitter::SetLocalIndent(const _Indent& indent) {
+ m_pState->SetIndent(indent.value, FmtScope::Local);
+ return *this;
+}
+
+Emitter& Emitter::SetLocalPrecision(const _Precision& precision) {
+ if (precision.floatPrecision >= 0)
+ m_pState->SetFloatPrecision(precision.floatPrecision, FmtScope::Local);
+ if (precision.doublePrecision >= 0)
+ m_pState->SetDoublePrecision(precision.doublePrecision, FmtScope::Local);
+ return *this;
+}
+
+// EmitBeginDoc
+void Emitter::EmitBeginDoc() {
+ if (!good())
+ return;
+
+ if (m_pState->CurGroupType() != GroupType::NoType) {
+ m_pState->SetError("Unexpected begin document");
+ return;
+ }
+
+ if (m_pState->HasAnchor() || m_pState->HasTag()) {
+ m_pState->SetError("Unexpected begin document");
+ return;
+ }
+
+ if (m_stream.col() > 0)
+ m_stream << "\n";
+ m_stream << "---\n";
+
+ m_pState->StartedDoc();
+}
+
+// EmitEndDoc
+void Emitter::EmitEndDoc() {
+ if (!good())
+ return;
+
+ if (m_pState->CurGroupType() != GroupType::NoType) {
+ m_pState->SetError("Unexpected begin document");
+ return;
+ }
+
+ if (m_pState->HasAnchor() || m_pState->HasTag()) {
+ m_pState->SetError("Unexpected begin document");
+ return;
+ }
+
+ if (m_stream.col() > 0)
+ m_stream << "\n";
+ m_stream << "...\n";
+}
+
+// EmitBeginSeq
+void Emitter::EmitBeginSeq() {
+ if (!good())
+ return;
+
+ PrepareNode(m_pState->NextGroupType(GroupType::Seq));
+
+ m_pState->StartedGroup(GroupType::Seq);
+}
+
+// EmitEndSeq
+void Emitter::EmitEndSeq() {
+ if (!good())
+ return;
+
+ if (m_pState->CurGroupChildCount() == 0)
+ m_pState->ForceFlow();
+
+ if (m_pState->CurGroupFlowType() == FlowType::Flow) {
+ if (m_stream.comment())
+ m_stream << "\n";
+ m_stream << IndentTo(m_pState->CurIndent());
+ if (m_pState->CurGroupChildCount() == 0)
+ m_stream << "[";
+ m_stream << "]";
+ }
+
+ m_pState->EndedGroup(GroupType::Seq);
+}
+
+// EmitBeginMap
+void Emitter::EmitBeginMap() {
+ if (!good())
+ return;
+
+ PrepareNode(m_pState->NextGroupType(GroupType::Map));
+
+ m_pState->StartedGroup(GroupType::Map);
+}
+
+// EmitEndMap
+void Emitter::EmitEndMap() {
+ if (!good())
+ return;
+
+ if (m_pState->CurGroupChildCount() == 0)
+ m_pState->ForceFlow();
+
+ if (m_pState->CurGroupFlowType() == FlowType::Flow) {
+ if (m_stream.comment())
+ m_stream << "\n";
+ m_stream << IndentTo(m_pState->CurIndent());
+ if (m_pState->CurGroupChildCount() == 0)
+ m_stream << "{";
+ m_stream << "}";
+ }
+
+ m_pState->EndedGroup(GroupType::Map);
+}
+
+// EmitNewline
+void Emitter::EmitNewline() {
+ if (!good())
+ return;
+
+ PrepareNode(EmitterNodeType::NoType);
+ m_stream << "\n";
+ m_pState->SetNonContent();
+}
+
+bool Emitter::CanEmitNewline() const { return true; }
+
+// Put the stream in a state so we can simply write the next node
+// E.g., if we're in a sequence, write the "- "
+void Emitter::PrepareNode(EmitterNodeType::value child) {
+ switch (m_pState->CurGroupNodeType()) {
+ case EmitterNodeType::NoType:
+ PrepareTopNode(child);
+ break;
+ case EmitterNodeType::FlowSeq:
+ FlowSeqPrepareNode(child);
+ break;
+ case EmitterNodeType::BlockSeq:
+ BlockSeqPrepareNode(child);
+ break;
+ case EmitterNodeType::FlowMap:
+ FlowMapPrepareNode(child);
+ break;
+ case EmitterNodeType::BlockMap:
+ BlockMapPrepareNode(child);
+ break;
+ case EmitterNodeType::Property:
+ case EmitterNodeType::Scalar:
+ assert(false);
+ break;
+ }
+}
+
+void Emitter::PrepareTopNode(EmitterNodeType::value child) {
+ if (child == EmitterNodeType::NoType)
+ return;
+
+ if (m_pState->CurGroupChildCount() > 0 && m_stream.col() > 0) {
+ if (child != EmitterNodeType::NoType)
+ EmitBeginDoc();
+ }
+
+ switch (child) {
+ case EmitterNodeType::NoType:
+ break;
+ case EmitterNodeType::Property:
+ case EmitterNodeType::Scalar:
+ case EmitterNodeType::FlowSeq:
+ case EmitterNodeType::FlowMap:
+ // TODO: if we were writing null, and
+ // we wanted it blank, we wouldn't want a space
+ SpaceOrIndentTo(m_pState->HasBegunContent(), 0);
+ break;
+ case EmitterNodeType::BlockSeq:
+ case EmitterNodeType::BlockMap:
+ if (m_pState->HasBegunNode())
+ m_stream << "\n";
+ break;
+ }
+}
+
+void Emitter::FlowSeqPrepareNode(EmitterNodeType::value child) {
+ const std::size_t lastIndent = m_pState->LastIndent();
+
+ if (!m_pState->HasBegunNode()) {
+ if (m_stream.comment())
+ m_stream << "\n";
+ m_stream << IndentTo(lastIndent);
+ if (m_pState->CurGroupChildCount() == 0)
+ m_stream << "[";
+ else
+ m_stream << ",";
+ }
+
+ switch (child) {
+ case EmitterNodeType::NoType:
+ break;
+ case EmitterNodeType::Property:
+ case EmitterNodeType::Scalar:
+ case EmitterNodeType::FlowSeq:
+ case EmitterNodeType::FlowMap:
+ SpaceOrIndentTo(
+ m_pState->HasBegunContent() || m_pState->CurGroupChildCount() > 0,
+ lastIndent);
+ break;
+ case EmitterNodeType::BlockSeq:
+ case EmitterNodeType::BlockMap:
+ assert(false);
+ break;
+ }
+}
+
+void Emitter::BlockSeqPrepareNode(EmitterNodeType::value child) {
+ const std::size_t curIndent = m_pState->CurIndent();
+ const std::size_t nextIndent = curIndent + m_pState->CurGroupIndent();
+
+ if (child == EmitterNodeType::NoType)
+ return;
+
+ if (!m_pState->HasBegunContent()) {
+ if (m_pState->CurGroupChildCount() > 0 || m_stream.comment()) {
+ m_stream << "\n";
+ }
+ m_stream << IndentTo(curIndent);
+ m_stream << "-";
+ }
+
+ switch (child) {
+ case EmitterNodeType::NoType:
+ break;
+ case EmitterNodeType::Property:
+ case EmitterNodeType::Scalar:
+ case EmitterNodeType::FlowSeq:
+ case EmitterNodeType::FlowMap:
+ SpaceOrIndentTo(m_pState->HasBegunContent(), nextIndent);
+ break;
+ case EmitterNodeType::BlockSeq:
+ m_stream << "\n";
+ break;
+ case EmitterNodeType::BlockMap:
+ if (m_pState->HasBegunContent() || m_stream.comment())
+ m_stream << "\n";
+ break;
+ }
+}
+
+void Emitter::FlowMapPrepareNode(EmitterNodeType::value child) {
+ if (m_pState->CurGroupChildCount() % 2 == 0) {
+ if (m_pState->GetMapKeyFormat() == LongKey)
+ m_pState->SetLongKey();
+
+ if (m_pState->CurGroupLongKey())
+ FlowMapPrepareLongKey(child);
+ else
+ FlowMapPrepareSimpleKey(child);
+ } else {
+ if (m_pState->CurGroupLongKey())
+ FlowMapPrepareLongKeyValue(child);
+ else
+ FlowMapPrepareSimpleKeyValue(child);
+ }
+}
+
+void Emitter::FlowMapPrepareLongKey(EmitterNodeType::value child) {
+ const std::size_t lastIndent = m_pState->LastIndent();
+
+ if (!m_pState->HasBegunNode()) {
+ if (m_stream.comment())
+ m_stream << "\n";
+ m_stream << IndentTo(lastIndent);
+ if (m_pState->CurGroupChildCount() == 0)
+ m_stream << "{ ?";
+ else
+ m_stream << ", ?";
+ }
+
+ switch (child) {
+ case EmitterNodeType::NoType:
+ break;
+ case EmitterNodeType::Property:
+ case EmitterNodeType::Scalar:
+ case EmitterNodeType::FlowSeq:
+ case EmitterNodeType::FlowMap:
+ SpaceOrIndentTo(
+ m_pState->HasBegunContent() || m_pState->CurGroupChildCount() > 0,
+ lastIndent);
+ break;
+ case EmitterNodeType::BlockSeq:
+ case EmitterNodeType::BlockMap:
+ assert(false);
+ break;
+ }
+}
+
+void Emitter::FlowMapPrepareLongKeyValue(EmitterNodeType::value child) {
+ const std::size_t lastIndent = m_pState->LastIndent();
+
+ if (!m_pState->HasBegunNode()) {
+ if (m_stream.comment())
+ m_stream << "\n";
+ m_stream << IndentTo(lastIndent);
+ m_stream << ":";
+ }
+
+ switch (child) {
+ case EmitterNodeType::NoType:
+ break;
+ case EmitterNodeType::Property:
+ case EmitterNodeType::Scalar:
+ case EmitterNodeType::FlowSeq:
+ case EmitterNodeType::FlowMap:
+ SpaceOrIndentTo(
+ m_pState->HasBegunContent() || m_pState->CurGroupChildCount() > 0,
+ lastIndent);
+ break;
+ case EmitterNodeType::BlockSeq:
+ case EmitterNodeType::BlockMap:
+ assert(false);
+ break;
+ }
+}
+
+void Emitter::FlowMapPrepareSimpleKey(EmitterNodeType::value child) {
+ const std::size_t lastIndent = m_pState->LastIndent();
+
+ if (!m_pState->HasBegunNode()) {
+ if (m_stream.comment())
+ m_stream << "\n";
+ m_stream << IndentTo(lastIndent);
+ if (m_pState->CurGroupChildCount() == 0)
+ m_stream << "{";
+ else
+ m_stream << ",";
+ }
+
+ switch (child) {
+ case EmitterNodeType::NoType:
+ break;
+ case EmitterNodeType::Property:
+ case EmitterNodeType::Scalar:
+ case EmitterNodeType::FlowSeq:
+ case EmitterNodeType::FlowMap:
+ SpaceOrIndentTo(
+ m_pState->HasBegunContent() || m_pState->CurGroupChildCount() > 0,
+ lastIndent);
+ break;
+ case EmitterNodeType::BlockSeq:
+ case EmitterNodeType::BlockMap:
+ assert(false);
+ break;
+ }
+}
+
+void Emitter::FlowMapPrepareSimpleKeyValue(EmitterNodeType::value child) {
+ const std::size_t lastIndent = m_pState->LastIndent();
+
+ if (!m_pState->HasBegunNode()) {
+ if (m_stream.comment())
+ m_stream << "\n";
+ m_stream << IndentTo(lastIndent);
+ m_stream << ":";
+ }
+
+ switch (child) {
+ case EmitterNodeType::NoType:
+ break;
+ case EmitterNodeType::Property:
+ case EmitterNodeType::Scalar:
+ case EmitterNodeType::FlowSeq:
+ case EmitterNodeType::FlowMap:
+ SpaceOrIndentTo(
+ m_pState->HasBegunContent() || m_pState->CurGroupChildCount() > 0,
+ lastIndent);
+ break;
+ case EmitterNodeType::BlockSeq:
+ case EmitterNodeType::BlockMap:
+ assert(false);
+ break;
+ }
+}
+
+void Emitter::BlockMapPrepareNode(EmitterNodeType::value child) {
+ if (m_pState->CurGroupChildCount() % 2 == 0) {
+ if (m_pState->GetMapKeyFormat() == LongKey)
+ m_pState->SetLongKey();
+ if (child == EmitterNodeType::BlockSeq ||
+ child == EmitterNodeType::BlockMap)
+ m_pState->SetLongKey();
+
+ if (m_pState->CurGroupLongKey())
+ BlockMapPrepareLongKey(child);
+ else
+ BlockMapPrepareSimpleKey(child);
+ } else {
+ if (m_pState->CurGroupLongKey())
+ BlockMapPrepareLongKeyValue(child);
+ else
+ BlockMapPrepareSimpleKeyValue(child);
+ }
+}
+
+void Emitter::BlockMapPrepareLongKey(EmitterNodeType::value child) {
+ const std::size_t curIndent = m_pState->CurIndent();
+ const std::size_t childCount = m_pState->CurGroupChildCount();
+
+ if (child == EmitterNodeType::NoType)
+ return;
+
+ if (!m_pState->HasBegunContent()) {
+ if (childCount > 0) {
+ m_stream << "\n";
+ }
+ if (m_stream.comment()) {
+ m_stream << "\n";
+ }
+ m_stream << IndentTo(curIndent);
+ m_stream << "?";
+ }
+
+ switch (child) {
+ case EmitterNodeType::NoType:
+ break;
+ case EmitterNodeType::Property:
+ case EmitterNodeType::Scalar:
+ case EmitterNodeType::FlowSeq:
+ case EmitterNodeType::FlowMap:
+ SpaceOrIndentTo(true, curIndent + 1);
+ break;
+ case EmitterNodeType::BlockSeq:
+ case EmitterNodeType::BlockMap:
+ break;
+ }
+}
+
+void Emitter::BlockMapPrepareLongKeyValue(EmitterNodeType::value child) {
+ const std::size_t curIndent = m_pState->CurIndent();
+
+ if (child == EmitterNodeType::NoType)
+ return;
+
+ if (!m_pState->HasBegunContent()) {
+ m_stream << "\n";
+ m_stream << IndentTo(curIndent);
+ m_stream << ":";
+ }
+
+ switch (child) {
+ case EmitterNodeType::NoType:
+ break;
+ case EmitterNodeType::Property:
+ case EmitterNodeType::Scalar:
+ case EmitterNodeType::FlowSeq:
+ case EmitterNodeType::FlowMap:
+ case EmitterNodeType::BlockSeq:
+ case EmitterNodeType::BlockMap:
+ SpaceOrIndentTo(true, curIndent + 1);
+ break;
+ }
+}
+
+void Emitter::BlockMapPrepareSimpleKey(EmitterNodeType::value child) {
+ const std::size_t curIndent = m_pState->CurIndent();
+ const std::size_t childCount = m_pState->CurGroupChildCount();
+
+ if (child == EmitterNodeType::NoType)
+ return;
+
+ if (!m_pState->HasBegunNode()) {
+ if (childCount > 0) {
+ m_stream << "\n";
+ }
+ }
+
+ switch (child) {
+ case EmitterNodeType::NoType:
+ break;
+ case EmitterNodeType::Property:
+ case EmitterNodeType::Scalar:
+ case EmitterNodeType::FlowSeq:
+ case EmitterNodeType::FlowMap:
+ SpaceOrIndentTo(m_pState->HasBegunContent(), curIndent);
+ break;
+ case EmitterNodeType::BlockSeq:
+ case EmitterNodeType::BlockMap:
+ break;
+ }
+}
+
+void Emitter::BlockMapPrepareSimpleKeyValue(EmitterNodeType::value child) {
+ const std::size_t curIndent = m_pState->CurIndent();
+ const std::size_t nextIndent = curIndent + m_pState->CurGroupIndent();
+
+ if (!m_pState->HasBegunNode()) {
+ m_stream << ":";
+ }
+
+ switch (child) {
+ case EmitterNodeType::NoType:
+ break;
+ case EmitterNodeType::Property:
+ case EmitterNodeType::Scalar:
+ case EmitterNodeType::FlowSeq:
+ case EmitterNodeType::FlowMap:
+ SpaceOrIndentTo(true, nextIndent);
+ break;
+ case EmitterNodeType::BlockSeq:
+ case EmitterNodeType::BlockMap:
+ m_stream << "\n";
+ break;
+ }
+}
+
+// SpaceOrIndentTo
+// . Prepares for some more content by proper spacing
+void Emitter::SpaceOrIndentTo(bool requireSpace, std::size_t indent) {
+ if (m_stream.comment())
+ m_stream << "\n";
+ if (m_stream.col() > 0 && requireSpace)
+ m_stream << " ";
+ m_stream << IndentTo(indent);
+}
+
+void Emitter::PrepareIntegralStream(std::stringstream& stream) const {
+
+ switch (m_pState->GetIntFormat()) {
+ case Dec:
+ stream << std::dec;
+ break;
+ case Hex:
+ stream << "0x";
+ stream << std::hex;
+ break;
+ case Oct:
+ stream << "0";
+ stream << std::oct;
+ break;
+ default:
+ assert(false);
+ }
+}
+
+void Emitter::StartedScalar() { m_pState->StartedScalar(); }
+
+// *******************************************************************************************
+// overloads of Write
+
+Emitter& Emitter::Write(const std::string& str) {
+ if (!good())
+ return *this;
+
+ const bool escapeNonAscii = m_pState->GetOutputCharset() == EscapeNonAscii;
+ const StringFormat::value strFormat =
+ Utils::ComputeStringFormat(str, m_pState->GetStringFormat(),
+ m_pState->CurGroupFlowType(), escapeNonAscii);
+
+ if (strFormat == StringFormat::Literal)
+ m_pState->SetMapKeyFormat(YAML::LongKey, FmtScope::Local);
+
+ PrepareNode(EmitterNodeType::Scalar);
+
+ switch (strFormat) {
+ case StringFormat::Plain:
+ m_stream << str;
+ break;
+ case StringFormat::SingleQuoted:
+ Utils::WriteSingleQuotedString(m_stream, str);
+ break;
+ case StringFormat::DoubleQuoted:
+ Utils::WriteDoubleQuotedString(m_stream, str, escapeNonAscii);
+ break;
+ case StringFormat::Literal:
+ Utils::WriteLiteralString(m_stream, str,
+ m_pState->CurIndent() + m_pState->GetIndent());
+ break;
+ }
+
+ StartedScalar();
+
+ return *this;
+}
+
+std::size_t Emitter::GetFloatPrecision() const {
+ return m_pState->GetFloatPrecision();
+}
+
+std::size_t Emitter::GetDoublePrecision() const {
+ return m_pState->GetDoublePrecision();
+}
+
+const char* Emitter::ComputeFullBoolName(bool b) const {
+ const EMITTER_MANIP mainFmt = (m_pState->GetBoolLengthFormat() == ShortBool
+ ? YesNoBool
+ : m_pState->GetBoolFormat());
+ const EMITTER_MANIP caseFmt = m_pState->GetBoolCaseFormat();
+ switch (mainFmt) {
+ case YesNoBool:
+ switch (caseFmt) {
+ case UpperCase:
+ return b ? "YES" : "NO";
+ case CamelCase:
+ return b ? "Yes" : "No";
+ case LowerCase:
+ return b ? "yes" : "no";
+ default:
+ break;
+ }
+ break;
+ case OnOffBool:
+ switch (caseFmt) {
+ case UpperCase:
+ return b ? "ON" : "OFF";
+ case CamelCase:
+ return b ? "On" : "Off";
+ case LowerCase:
+ return b ? "on" : "off";
+ default:
+ break;
+ }
+ break;
+ case TrueFalseBool:
+ switch (caseFmt) {
+ case UpperCase:
+ return b ? "TRUE" : "FALSE";
+ case CamelCase:
+ return b ? "True" : "False";
+ case LowerCase:
+ return b ? "true" : "false";
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ return b ? "y" : "n"; // should never get here, but it can't hurt to give
+ // these answers
+}
+
+Emitter& Emitter::Write(bool b) {
+ if (!good())
+ return *this;
+
+ PrepareNode(EmitterNodeType::Scalar);
+
+ const char* name = ComputeFullBoolName(b);
+ if (m_pState->GetBoolLengthFormat() == ShortBool)
+ m_stream << name[0];
+ else
+ m_stream << name;
+
+ StartedScalar();
+
+ return *this;
+}
+
+Emitter& Emitter::Write(char ch) {
+ if (!good())
+ return *this;
+
+ PrepareNode(EmitterNodeType::Scalar);
+ Utils::WriteChar(m_stream, ch);
+ StartedScalar();
+
+ return *this;
+}
+
+Emitter& Emitter::Write(const _Alias& alias) {
+ if (!good())
+ return *this;
+
+ if (m_pState->HasAnchor() || m_pState->HasTag()) {
+ m_pState->SetError(ErrorMsg::INVALID_ALIAS);
+ return *this;
+ }
+
+ PrepareNode(EmitterNodeType::Scalar);
+
+ if (!Utils::WriteAlias(m_stream, alias.content)) {
+ m_pState->SetError(ErrorMsg::INVALID_ALIAS);
+ return *this;
+ }
+
+ StartedScalar();
+
+ return *this;
+}
+
+Emitter& Emitter::Write(const _Anchor& anchor) {
+ if (!good())
+ return *this;
+
+ if (m_pState->HasAnchor()) {
+ m_pState->SetError(ErrorMsg::INVALID_ANCHOR);
+ return *this;
+ }
+
+ PrepareNode(EmitterNodeType::Property);
+
+ if (!Utils::WriteAnchor(m_stream, anchor.content)) {
+ m_pState->SetError(ErrorMsg::INVALID_ANCHOR);
+ return *this;
+ }
+
+ m_pState->SetAnchor();
+
+ return *this;
+}
+
+Emitter& Emitter::Write(const _Tag& tag) {
+ if (!good())
+ return *this;
+
+ if (m_pState->HasTag()) {
+ m_pState->SetError(ErrorMsg::INVALID_TAG);
+ return *this;
+ }
+
+ PrepareNode(EmitterNodeType::Property);
+
+ bool success = false;
+ if (tag.type == _Tag::Type::Verbatim)
+ success = Utils::WriteTag(m_stream, tag.content, true);
+ else if (tag.type == _Tag::Type::PrimaryHandle)
+ success = Utils::WriteTag(m_stream, tag.content, false);
+ else
+ success = Utils::WriteTagWithPrefix(m_stream, tag.prefix, tag.content);
+
+ if (!success) {
+ m_pState->SetError(ErrorMsg::INVALID_TAG);
+ return *this;
+ }
+
+ m_pState->SetTag();
+
+ return *this;
+}
+
+void Emitter::EmitKindTag() { Write(LocalTag("")); }
+
+Emitter& Emitter::Write(const _Comment& comment) {
+ if (!good())
+ return *this;
+
+ PrepareNode(EmitterNodeType::NoType);
+
+ if (m_stream.col() > 0)
+ m_stream << Indentation(m_pState->GetPreCommentIndent());
+ Utils::WriteComment(m_stream, comment.content,
+ m_pState->GetPostCommentIndent());
+
+ m_pState->SetNonContent();
+
+ return *this;
+}
+
+Emitter& Emitter::Write(const _Null& /*null*/) {
+ if (!good())
+ return *this;
+
+ PrepareNode(EmitterNodeType::Scalar);
+
+ m_stream << "~";
+
+ StartedScalar();
+
+ return *this;
+}
+
+Emitter& Emitter::Write(const Binary& binary) {
+ Write(SecondaryTag("binary"));
+
+ if (!good())
+ return *this;
+
+ PrepareNode(EmitterNodeType::Scalar);
+ Utils::WriteBinary(m_stream, binary);
+ StartedScalar();
+
+ return *this;
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/emitterstate.cpp b/contrib/libs/yaml-cpp/src/emitterstate.cpp
index f32ede28d0..3542aaf507 100644
--- a/contrib/libs/yaml-cpp/src/emitterstate.cpp
+++ b/contrib/libs/yaml-cpp/src/emitterstate.cpp
@@ -1,365 +1,365 @@
-#include <limits>
-
-#include "emitterstate.h"
-#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
-
-namespace YAML {
-EmitterState::EmitterState()
- : m_isGood(true),
- m_curIndent(0),
- m_hasAnchor(false),
- m_hasTag(false),
- m_hasNonContent(false),
- m_docCount(0) {
- // set default global manipulators
- m_charset.set(EmitNonAscii);
- m_strFmt.set(Auto);
- m_boolFmt.set(TrueFalseBool);
- m_boolLengthFmt.set(LongBool);
- m_boolCaseFmt.set(LowerCase);
- m_intFmt.set(Dec);
- m_indent.set(2);
- m_preCommentIndent.set(2);
- m_postCommentIndent.set(1);
- m_seqFmt.set(Block);
- m_mapFmt.set(Block);
- m_mapKeyFmt.set(Auto);
- m_floatPrecision.set(std::numeric_limits<float>::digits10 + 1);
- m_doublePrecision.set(std::numeric_limits<double>::digits10 + 1);
-}
-
-EmitterState::~EmitterState() {}
-
-// SetLocalValue
-// . We blindly tries to set all possible formatters to this value
-// . Only the ones that make sense will be accepted
-void EmitterState::SetLocalValue(EMITTER_MANIP value) {
- SetOutputCharset(value, FmtScope::Local);
- SetStringFormat(value, FmtScope::Local);
- SetBoolFormat(value, FmtScope::Local);
- SetBoolCaseFormat(value, FmtScope::Local);
- SetBoolLengthFormat(value, FmtScope::Local);
- SetIntFormat(value, FmtScope::Local);
- SetFlowType(GroupType::Seq, value, FmtScope::Local);
- SetFlowType(GroupType::Map, value, FmtScope::Local);
- SetMapKeyFormat(value, FmtScope::Local);
-}
-
-void EmitterState::SetAnchor() { m_hasAnchor = true; }
-
-void EmitterState::SetTag() { m_hasTag = true; }
-
-void EmitterState::SetNonContent() { m_hasNonContent = true; }
-
-void EmitterState::SetLongKey() {
- assert(!m_groups.empty());
+#include <limits>
+
+#include "emitterstate.h"
+#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
+
+namespace YAML {
+EmitterState::EmitterState()
+ : m_isGood(true),
+ m_curIndent(0),
+ m_hasAnchor(false),
+ m_hasTag(false),
+ m_hasNonContent(false),
+ m_docCount(0) {
+ // set default global manipulators
+ m_charset.set(EmitNonAscii);
+ m_strFmt.set(Auto);
+ m_boolFmt.set(TrueFalseBool);
+ m_boolLengthFmt.set(LongBool);
+ m_boolCaseFmt.set(LowerCase);
+ m_intFmt.set(Dec);
+ m_indent.set(2);
+ m_preCommentIndent.set(2);
+ m_postCommentIndent.set(1);
+ m_seqFmt.set(Block);
+ m_mapFmt.set(Block);
+ m_mapKeyFmt.set(Auto);
+ m_floatPrecision.set(std::numeric_limits<float>::digits10 + 1);
+ m_doublePrecision.set(std::numeric_limits<double>::digits10 + 1);
+}
+
+EmitterState::~EmitterState() {}
+
+// SetLocalValue
+// . We blindly tries to set all possible formatters to this value
+// . Only the ones that make sense will be accepted
+void EmitterState::SetLocalValue(EMITTER_MANIP value) {
+ SetOutputCharset(value, FmtScope::Local);
+ SetStringFormat(value, FmtScope::Local);
+ SetBoolFormat(value, FmtScope::Local);
+ SetBoolCaseFormat(value, FmtScope::Local);
+ SetBoolLengthFormat(value, FmtScope::Local);
+ SetIntFormat(value, FmtScope::Local);
+ SetFlowType(GroupType::Seq, value, FmtScope::Local);
+ SetFlowType(GroupType::Map, value, FmtScope::Local);
+ SetMapKeyFormat(value, FmtScope::Local);
+}
+
+void EmitterState::SetAnchor() { m_hasAnchor = true; }
+
+void EmitterState::SetTag() { m_hasTag = true; }
+
+void EmitterState::SetNonContent() { m_hasNonContent = true; }
+
+void EmitterState::SetLongKey() {
+ assert(!m_groups.empty());
if (m_groups.empty()) {
- return;
+ return;
}
-
+
assert(m_groups.back()->type == GroupType::Map);
m_groups.back()->longKey = true;
-}
-
-void EmitterState::ForceFlow() {
- assert(!m_groups.empty());
+}
+
+void EmitterState::ForceFlow() {
+ assert(!m_groups.empty());
if (m_groups.empty()) {
- return;
+ return;
}
-
+
m_groups.back()->flowType = FlowType::Flow;
-}
-
-void EmitterState::StartedNode() {
- if (m_groups.empty()) {
- m_docCount++;
- } else {
+}
+
+void EmitterState::StartedNode() {
+ if (m_groups.empty()) {
+ m_docCount++;
+ } else {
m_groups.back()->childCount++;
if (m_groups.back()->childCount % 2 == 0) {
m_groups.back()->longKey = false;
}
- }
-
- m_hasAnchor = false;
- m_hasTag = false;
- m_hasNonContent = false;
-}
-
-EmitterNodeType::value EmitterState::NextGroupType(
- GroupType::value type) const {
- if (type == GroupType::Seq) {
- if (GetFlowType(type) == Block)
- return EmitterNodeType::BlockSeq;
- else
- return EmitterNodeType::FlowSeq;
- } else {
- if (GetFlowType(type) == Block)
- return EmitterNodeType::BlockMap;
- else
- return EmitterNodeType::FlowMap;
- }
-
- // can't happen
- assert(false);
- return EmitterNodeType::NoType;
-}
-
-void EmitterState::StartedDoc() {
- m_hasAnchor = false;
- m_hasTag = false;
- m_hasNonContent = false;
-}
-
-void EmitterState::EndedDoc() {
- m_hasAnchor = false;
- m_hasTag = false;
- m_hasNonContent = false;
-}
-
-void EmitterState::StartedScalar() {
- StartedNode();
- ClearModifiedSettings();
-}
-
-void EmitterState::StartedGroup(GroupType::value type) {
- StartedNode();
-
+ }
+
+ m_hasAnchor = false;
+ m_hasTag = false;
+ m_hasNonContent = false;
+}
+
+EmitterNodeType::value EmitterState::NextGroupType(
+ GroupType::value type) const {
+ if (type == GroupType::Seq) {
+ if (GetFlowType(type) == Block)
+ return EmitterNodeType::BlockSeq;
+ else
+ return EmitterNodeType::FlowSeq;
+ } else {
+ if (GetFlowType(type) == Block)
+ return EmitterNodeType::BlockMap;
+ else
+ return EmitterNodeType::FlowMap;
+ }
+
+ // can't happen
+ assert(false);
+ return EmitterNodeType::NoType;
+}
+
+void EmitterState::StartedDoc() {
+ m_hasAnchor = false;
+ m_hasTag = false;
+ m_hasNonContent = false;
+}
+
+void EmitterState::EndedDoc() {
+ m_hasAnchor = false;
+ m_hasTag = false;
+ m_hasNonContent = false;
+}
+
+void EmitterState::StartedScalar() {
+ StartedNode();
+ ClearModifiedSettings();
+}
+
+void EmitterState::StartedGroup(GroupType::value type) {
+ StartedNode();
+
const std::size_t lastGroupIndent =
(m_groups.empty() ? 0 : m_groups.back()->indent);
- m_curIndent += lastGroupIndent;
-
+ m_curIndent += lastGroupIndent;
+
// TODO: Create move constructors for settings types to simplify transfer
std::unique_ptr<Group> pGroup(new Group(type));
-
- // transfer settings (which last until this group is done)
+
+ // transfer settings (which last until this group is done)
//
// NB: if pGroup->modifiedSettings == m_modifiedSettings,
// m_modifiedSettings is not changed!
pGroup->modifiedSettings = std::move(m_modifiedSettings);
-
- // set up group
+
+ // set up group
if (GetFlowType(type) == Block) {
- pGroup->flowType = FlowType::Block;
+ pGroup->flowType = FlowType::Block;
} else {
- pGroup->flowType = FlowType::Flow;
+ pGroup->flowType = FlowType::Flow;
}
- pGroup->indent = GetIndent();
-
+ pGroup->indent = GetIndent();
+
m_groups.push_back(std::move(pGroup));
-}
-
-void EmitterState::EndedGroup(GroupType::value type) {
- if (m_groups.empty()) {
+}
+
+void EmitterState::EndedGroup(GroupType::value type) {
+ if (m_groups.empty()) {
if (type == GroupType::Seq) {
- return SetError(ErrorMsg::UNEXPECTED_END_SEQ);
+ return SetError(ErrorMsg::UNEXPECTED_END_SEQ);
} else {
- return SetError(ErrorMsg::UNEXPECTED_END_MAP);
+ return SetError(ErrorMsg::UNEXPECTED_END_MAP);
}
- }
-
- // get rid of the current group
- {
+ }
+
+ // get rid of the current group
+ {
std::unique_ptr<Group> pFinishedGroup = std::move(m_groups.back());
m_groups.pop_back();
if (pFinishedGroup->type != type) {
- return SetError(ErrorMsg::UNMATCHED_GROUP_TAG);
+ return SetError(ErrorMsg::UNMATCHED_GROUP_TAG);
}
- }
-
- // reset old settings
+ }
+
+ // reset old settings
std::size_t lastIndent = (m_groups.empty() ? 0 : m_groups.back()->indent);
- assert(m_curIndent >= lastIndent);
- m_curIndent -= lastIndent;
-
- // some global settings that we changed may have been overridden
- // by a local setting we just popped, so we need to restore them
- m_globalModifiedSettings.restore();
-
- ClearModifiedSettings();
-}
-
-EmitterNodeType::value EmitterState::CurGroupNodeType() const {
+ assert(m_curIndent >= lastIndent);
+ m_curIndent -= lastIndent;
+
+ // some global settings that we changed may have been overridden
+ // by a local setting we just popped, so we need to restore them
+ m_globalModifiedSettings.restore();
+
+ ClearModifiedSettings();
+}
+
+EmitterNodeType::value EmitterState::CurGroupNodeType() const {
if (m_groups.empty()) {
- return EmitterNodeType::NoType;
+ return EmitterNodeType::NoType;
}
-
+
return m_groups.back()->NodeType();
-}
-
-GroupType::value EmitterState::CurGroupType() const {
+}
+
+GroupType::value EmitterState::CurGroupType() const {
return m_groups.empty() ? GroupType::NoType : m_groups.back()->type;
-}
-
-FlowType::value EmitterState::CurGroupFlowType() const {
+}
+
+FlowType::value EmitterState::CurGroupFlowType() const {
return m_groups.empty() ? FlowType::NoType : m_groups.back()->flowType;
-}
-
+}
+
std::size_t EmitterState::CurGroupIndent() const {
return m_groups.empty() ? 0 : m_groups.back()->indent;
-}
-
-std::size_t EmitterState::CurGroupChildCount() const {
+}
+
+std::size_t EmitterState::CurGroupChildCount() const {
return m_groups.empty() ? m_docCount : m_groups.back()->childCount;
-}
-
-bool EmitterState::CurGroupLongKey() const {
+}
+
+bool EmitterState::CurGroupLongKey() const {
return m_groups.empty() ? false : m_groups.back()->longKey;
-}
-
+}
+
std::size_t EmitterState::LastIndent() const {
if (m_groups.size() <= 1) {
- return 0;
+ return 0;
}
-
+
return m_curIndent - m_groups[m_groups.size() - 2]->indent;
-}
-
-void EmitterState::ClearModifiedSettings() { m_modifiedSettings.clear(); }
-
-bool EmitterState::SetOutputCharset(EMITTER_MANIP value,
- FmtScope::value scope) {
- switch (value) {
- case EmitNonAscii:
- case EscapeNonAscii:
- _Set(m_charset, value, scope);
- return true;
- default:
- return false;
- }
-}
-
-bool EmitterState::SetStringFormat(EMITTER_MANIP value, FmtScope::value scope) {
- switch (value) {
- case Auto:
- case SingleQuoted:
- case DoubleQuoted:
- case Literal:
- _Set(m_strFmt, value, scope);
- return true;
- default:
- return false;
- }
-}
-
-bool EmitterState::SetBoolFormat(EMITTER_MANIP value, FmtScope::value scope) {
- switch (value) {
- case OnOffBool:
- case TrueFalseBool:
- case YesNoBool:
- _Set(m_boolFmt, value, scope);
- return true;
- default:
- return false;
- }
-}
-
-bool EmitterState::SetBoolLengthFormat(EMITTER_MANIP value,
- FmtScope::value scope) {
- switch (value) {
- case LongBool:
- case ShortBool:
- _Set(m_boolLengthFmt, value, scope);
- return true;
- default:
- return false;
- }
-}
-
-bool EmitterState::SetBoolCaseFormat(EMITTER_MANIP value,
- FmtScope::value scope) {
- switch (value) {
- case UpperCase:
- case LowerCase:
- case CamelCase:
- _Set(m_boolCaseFmt, value, scope);
- return true;
- default:
- return false;
- }
-}
-
-bool EmitterState::SetIntFormat(EMITTER_MANIP value, FmtScope::value scope) {
- switch (value) {
- case Dec:
- case Hex:
- case Oct:
- _Set(m_intFmt, value, scope);
- return true;
- default:
- return false;
- }
-}
-
-bool EmitterState::SetIndent(std::size_t value, FmtScope::value scope) {
- if (value <= 1)
- return false;
-
- _Set(m_indent, value, scope);
- return true;
-}
-
-bool EmitterState::SetPreCommentIndent(std::size_t value,
- FmtScope::value scope) {
- if (value == 0)
- return false;
-
- _Set(m_preCommentIndent, value, scope);
- return true;
-}
-
-bool EmitterState::SetPostCommentIndent(std::size_t value,
- FmtScope::value scope) {
- if (value == 0)
- return false;
-
- _Set(m_postCommentIndent, value, scope);
- return true;
-}
-
-bool EmitterState::SetFlowType(GroupType::value groupType, EMITTER_MANIP value,
- FmtScope::value scope) {
- switch (value) {
- case Block:
- case Flow:
- _Set(groupType == GroupType::Seq ? m_seqFmt : m_mapFmt, value, scope);
- return true;
- default:
- return false;
- }
-}
-
-EMITTER_MANIP EmitterState::GetFlowType(GroupType::value groupType) const {
- // force flow style if we're currently in a flow
- if (CurGroupFlowType() == FlowType::Flow)
- return Flow;
-
- // otherwise, go with what's asked of us
- return (groupType == GroupType::Seq ? m_seqFmt.get() : m_mapFmt.get());
-}
-
-bool EmitterState::SetMapKeyFormat(EMITTER_MANIP value, FmtScope::value scope) {
- switch (value) {
- case Auto:
- case LongKey:
- _Set(m_mapKeyFmt, value, scope);
- return true;
- default:
- return false;
- }
-}
-
+}
+
+void EmitterState::ClearModifiedSettings() { m_modifiedSettings.clear(); }
+
+bool EmitterState::SetOutputCharset(EMITTER_MANIP value,
+ FmtScope::value scope) {
+ switch (value) {
+ case EmitNonAscii:
+ case EscapeNonAscii:
+ _Set(m_charset, value, scope);
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool EmitterState::SetStringFormat(EMITTER_MANIP value, FmtScope::value scope) {
+ switch (value) {
+ case Auto:
+ case SingleQuoted:
+ case DoubleQuoted:
+ case Literal:
+ _Set(m_strFmt, value, scope);
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool EmitterState::SetBoolFormat(EMITTER_MANIP value, FmtScope::value scope) {
+ switch (value) {
+ case OnOffBool:
+ case TrueFalseBool:
+ case YesNoBool:
+ _Set(m_boolFmt, value, scope);
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool EmitterState::SetBoolLengthFormat(EMITTER_MANIP value,
+ FmtScope::value scope) {
+ switch (value) {
+ case LongBool:
+ case ShortBool:
+ _Set(m_boolLengthFmt, value, scope);
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool EmitterState::SetBoolCaseFormat(EMITTER_MANIP value,
+ FmtScope::value scope) {
+ switch (value) {
+ case UpperCase:
+ case LowerCase:
+ case CamelCase:
+ _Set(m_boolCaseFmt, value, scope);
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool EmitterState::SetIntFormat(EMITTER_MANIP value, FmtScope::value scope) {
+ switch (value) {
+ case Dec:
+ case Hex:
+ case Oct:
+ _Set(m_intFmt, value, scope);
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool EmitterState::SetIndent(std::size_t value, FmtScope::value scope) {
+ if (value <= 1)
+ return false;
+
+ _Set(m_indent, value, scope);
+ return true;
+}
+
+bool EmitterState::SetPreCommentIndent(std::size_t value,
+ FmtScope::value scope) {
+ if (value == 0)
+ return false;
+
+ _Set(m_preCommentIndent, value, scope);
+ return true;
+}
+
+bool EmitterState::SetPostCommentIndent(std::size_t value,
+ FmtScope::value scope) {
+ if (value == 0)
+ return false;
+
+ _Set(m_postCommentIndent, value, scope);
+ return true;
+}
+
+bool EmitterState::SetFlowType(GroupType::value groupType, EMITTER_MANIP value,
+ FmtScope::value scope) {
+ switch (value) {
+ case Block:
+ case Flow:
+ _Set(groupType == GroupType::Seq ? m_seqFmt : m_mapFmt, value, scope);
+ return true;
+ default:
+ return false;
+ }
+}
+
+EMITTER_MANIP EmitterState::GetFlowType(GroupType::value groupType) const {
+ // force flow style if we're currently in a flow
+ if (CurGroupFlowType() == FlowType::Flow)
+ return Flow;
+
+ // otherwise, go with what's asked of us
+ return (groupType == GroupType::Seq ? m_seqFmt.get() : m_mapFmt.get());
+}
+
+bool EmitterState::SetMapKeyFormat(EMITTER_MANIP value, FmtScope::value scope) {
+ switch (value) {
+ case Auto:
+ case LongKey:
+ _Set(m_mapKeyFmt, value, scope);
+ return true;
+ default:
+ return false;
+ }
+}
+
bool EmitterState::SetFloatPrecision(std::size_t value, FmtScope::value scope) {
if (value > std::numeric_limits<float>::digits10 + 1)
- return false;
- _Set(m_floatPrecision, value, scope);
- return true;
-}
-
+ return false;
+ _Set(m_floatPrecision, value, scope);
+ return true;
+}
+
bool EmitterState::SetDoublePrecision(std::size_t value,
FmtScope::value scope) {
if (value > std::numeric_limits<double>::digits10 + 1)
- return false;
- _Set(m_doublePrecision, value, scope);
- return true;
-}
-}
+ return false;
+ _Set(m_doublePrecision, value, scope);
+ return true;
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/emitterstate.h b/contrib/libs/yaml-cpp/src/emitterstate.h
index 719fcfd5ef..0937f000d9 100644
--- a/contrib/libs/yaml-cpp/src/emitterstate.h
+++ b/contrib/libs/yaml-cpp/src/emitterstate.h
@@ -1,203 +1,203 @@
-#ifndef EMITTERSTATE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define EMITTERSTATE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "setting.h"
-#include "yaml-cpp/emitterdef.h"
-#include "yaml-cpp/emittermanip.h"
-
-#include <cassert>
+#ifndef EMITTERSTATE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EMITTERSTATE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "setting.h"
+#include "yaml-cpp/emitterdef.h"
+#include "yaml-cpp/emittermanip.h"
+
+#include <cassert>
#include <memory>
-#include <stack>
-#include <stdexcept>
+#include <stack>
+#include <stdexcept>
#include <vector>
-
-namespace YAML {
-struct FmtScope {
- enum value { Local, Global };
-};
-struct GroupType {
- enum value { NoType, Seq, Map };
-};
-struct FlowType {
- enum value { NoType, Flow, Block };
-};
-
-class EmitterState {
- public:
- EmitterState();
- ~EmitterState();
-
- // basic state checking
- bool good() const { return m_isGood; }
- const std::string GetLastError() const { return m_lastError; }
- void SetError(const std::string& error) {
- m_isGood = false;
- m_lastError = error;
- }
-
- // node handling
- void SetAnchor();
- void SetTag();
- void SetNonContent();
- void SetLongKey();
- void ForceFlow();
- void StartedDoc();
- void EndedDoc();
- void StartedScalar();
- void StartedGroup(GroupType::value type);
- void EndedGroup(GroupType::value type);
-
- EmitterNodeType::value NextGroupType(GroupType::value type) const;
- EmitterNodeType::value CurGroupNodeType() const;
-
- GroupType::value CurGroupType() const;
- FlowType::value CurGroupFlowType() const;
+
+namespace YAML {
+struct FmtScope {
+ enum value { Local, Global };
+};
+struct GroupType {
+ enum value { NoType, Seq, Map };
+};
+struct FlowType {
+ enum value { NoType, Flow, Block };
+};
+
+class EmitterState {
+ public:
+ EmitterState();
+ ~EmitterState();
+
+ // basic state checking
+ bool good() const { return m_isGood; }
+ const std::string GetLastError() const { return m_lastError; }
+ void SetError(const std::string& error) {
+ m_isGood = false;
+ m_lastError = error;
+ }
+
+ // node handling
+ void SetAnchor();
+ void SetTag();
+ void SetNonContent();
+ void SetLongKey();
+ void ForceFlow();
+ void StartedDoc();
+ void EndedDoc();
+ void StartedScalar();
+ void StartedGroup(GroupType::value type);
+ void EndedGroup(GroupType::value type);
+
+ EmitterNodeType::value NextGroupType(GroupType::value type) const;
+ EmitterNodeType::value CurGroupNodeType() const;
+
+ GroupType::value CurGroupType() const;
+ FlowType::value CurGroupFlowType() const;
std::size_t CurGroupIndent() const;
- std::size_t CurGroupChildCount() const;
- bool CurGroupLongKey() const;
-
+ std::size_t CurGroupChildCount() const;
+ bool CurGroupLongKey() const;
+
std::size_t LastIndent() const;
std::size_t CurIndent() const { return m_curIndent; }
- bool HasAnchor() const { return m_hasAnchor; }
- bool HasTag() const { return m_hasTag; }
- bool HasBegunNode() const {
- return m_hasAnchor || m_hasTag || m_hasNonContent;
- }
- bool HasBegunContent() const { return m_hasAnchor || m_hasTag; }
-
- void ClearModifiedSettings();
-
- // formatters
- void SetLocalValue(EMITTER_MANIP value);
-
- bool SetOutputCharset(EMITTER_MANIP value, FmtScope::value scope);
- EMITTER_MANIP GetOutputCharset() const { return m_charset.get(); }
-
- bool SetStringFormat(EMITTER_MANIP value, FmtScope::value scope);
- EMITTER_MANIP GetStringFormat() const { return m_strFmt.get(); }
-
- bool SetBoolFormat(EMITTER_MANIP value, FmtScope::value scope);
- EMITTER_MANIP GetBoolFormat() const { return m_boolFmt.get(); }
-
- bool SetBoolLengthFormat(EMITTER_MANIP value, FmtScope::value scope);
- EMITTER_MANIP GetBoolLengthFormat() const { return m_boolLengthFmt.get(); }
-
- bool SetBoolCaseFormat(EMITTER_MANIP value, FmtScope::value scope);
- EMITTER_MANIP GetBoolCaseFormat() const { return m_boolCaseFmt.get(); }
-
- bool SetIntFormat(EMITTER_MANIP value, FmtScope::value scope);
- EMITTER_MANIP GetIntFormat() const { return m_intFmt.get(); }
-
- bool SetIndent(std::size_t value, FmtScope::value scope);
+ bool HasAnchor() const { return m_hasAnchor; }
+ bool HasTag() const { return m_hasTag; }
+ bool HasBegunNode() const {
+ return m_hasAnchor || m_hasTag || m_hasNonContent;
+ }
+ bool HasBegunContent() const { return m_hasAnchor || m_hasTag; }
+
+ void ClearModifiedSettings();
+
+ // formatters
+ void SetLocalValue(EMITTER_MANIP value);
+
+ bool SetOutputCharset(EMITTER_MANIP value, FmtScope::value scope);
+ EMITTER_MANIP GetOutputCharset() const { return m_charset.get(); }
+
+ bool SetStringFormat(EMITTER_MANIP value, FmtScope::value scope);
+ EMITTER_MANIP GetStringFormat() const { return m_strFmt.get(); }
+
+ bool SetBoolFormat(EMITTER_MANIP value, FmtScope::value scope);
+ EMITTER_MANIP GetBoolFormat() const { return m_boolFmt.get(); }
+
+ bool SetBoolLengthFormat(EMITTER_MANIP value, FmtScope::value scope);
+ EMITTER_MANIP GetBoolLengthFormat() const { return m_boolLengthFmt.get(); }
+
+ bool SetBoolCaseFormat(EMITTER_MANIP value, FmtScope::value scope);
+ EMITTER_MANIP GetBoolCaseFormat() const { return m_boolCaseFmt.get(); }
+
+ bool SetIntFormat(EMITTER_MANIP value, FmtScope::value scope);
+ EMITTER_MANIP GetIntFormat() const { return m_intFmt.get(); }
+
+ bool SetIndent(std::size_t value, FmtScope::value scope);
std::size_t GetIndent() const { return m_indent.get(); }
-
- bool SetPreCommentIndent(std::size_t value, FmtScope::value scope);
+
+ bool SetPreCommentIndent(std::size_t value, FmtScope::value scope);
std::size_t GetPreCommentIndent() const { return m_preCommentIndent.get(); }
- bool SetPostCommentIndent(std::size_t value, FmtScope::value scope);
+ bool SetPostCommentIndent(std::size_t value, FmtScope::value scope);
std::size_t GetPostCommentIndent() const { return m_postCommentIndent.get(); }
-
- bool SetFlowType(GroupType::value groupType, EMITTER_MANIP value,
- FmtScope::value scope);
- EMITTER_MANIP GetFlowType(GroupType::value groupType) const;
-
- bool SetMapKeyFormat(EMITTER_MANIP value, FmtScope::value scope);
- EMITTER_MANIP GetMapKeyFormat() const { return m_mapKeyFmt.get(); }
-
+
+ bool SetFlowType(GroupType::value groupType, EMITTER_MANIP value,
+ FmtScope::value scope);
+ EMITTER_MANIP GetFlowType(GroupType::value groupType) const;
+
+ bool SetMapKeyFormat(EMITTER_MANIP value, FmtScope::value scope);
+ EMITTER_MANIP GetMapKeyFormat() const { return m_mapKeyFmt.get(); }
+
bool SetFloatPrecision(std::size_t value, FmtScope::value scope);
- std::size_t GetFloatPrecision() const { return m_floatPrecision.get(); }
+ std::size_t GetFloatPrecision() const { return m_floatPrecision.get(); }
bool SetDoublePrecision(std::size_t value, FmtScope::value scope);
- std::size_t GetDoublePrecision() const { return m_doublePrecision.get(); }
-
- private:
- template <typename T>
- void _Set(Setting<T>& fmt, T value, FmtScope::value scope);
-
- void StartedNode();
-
- private:
- // basic state ok?
- bool m_isGood;
- std::string m_lastError;
-
- // other state
- Setting<EMITTER_MANIP> m_charset;
- Setting<EMITTER_MANIP> m_strFmt;
- Setting<EMITTER_MANIP> m_boolFmt;
- Setting<EMITTER_MANIP> m_boolLengthFmt;
- Setting<EMITTER_MANIP> m_boolCaseFmt;
- Setting<EMITTER_MANIP> m_intFmt;
- Setting<std::size_t> m_indent;
- Setting<std::size_t> m_preCommentIndent, m_postCommentIndent;
- Setting<EMITTER_MANIP> m_seqFmt;
- Setting<EMITTER_MANIP> m_mapFmt;
- Setting<EMITTER_MANIP> m_mapKeyFmt;
+ std::size_t GetDoublePrecision() const { return m_doublePrecision.get(); }
+
+ private:
+ template <typename T>
+ void _Set(Setting<T>& fmt, T value, FmtScope::value scope);
+
+ void StartedNode();
+
+ private:
+ // basic state ok?
+ bool m_isGood;
+ std::string m_lastError;
+
+ // other state
+ Setting<EMITTER_MANIP> m_charset;
+ Setting<EMITTER_MANIP> m_strFmt;
+ Setting<EMITTER_MANIP> m_boolFmt;
+ Setting<EMITTER_MANIP> m_boolLengthFmt;
+ Setting<EMITTER_MANIP> m_boolCaseFmt;
+ Setting<EMITTER_MANIP> m_intFmt;
+ Setting<std::size_t> m_indent;
+ Setting<std::size_t> m_preCommentIndent, m_postCommentIndent;
+ Setting<EMITTER_MANIP> m_seqFmt;
+ Setting<EMITTER_MANIP> m_mapFmt;
+ Setting<EMITTER_MANIP> m_mapKeyFmt;
Setting<std::size_t> m_floatPrecision;
Setting<std::size_t> m_doublePrecision;
-
- SettingChanges m_modifiedSettings;
- SettingChanges m_globalModifiedSettings;
-
- struct Group {
- explicit Group(GroupType::value type_)
- : type(type_), indent(0), childCount(0), longKey(false) {}
-
- GroupType::value type;
- FlowType::value flowType;
+
+ SettingChanges m_modifiedSettings;
+ SettingChanges m_globalModifiedSettings;
+
+ struct Group {
+ explicit Group(GroupType::value type_)
+ : type(type_), indent(0), childCount(0), longKey(false) {}
+
+ GroupType::value type;
+ FlowType::value flowType;
std::size_t indent;
- std::size_t childCount;
- bool longKey;
-
- SettingChanges modifiedSettings;
-
- EmitterNodeType::value NodeType() const {
- if (type == GroupType::Seq) {
- if (flowType == FlowType::Flow)
- return EmitterNodeType::FlowSeq;
- else
- return EmitterNodeType::BlockSeq;
- } else {
- if (flowType == FlowType::Flow)
- return EmitterNodeType::FlowMap;
- else
- return EmitterNodeType::BlockMap;
- }
-
- // can't get here
- assert(false);
- return EmitterNodeType::NoType;
- }
- };
-
+ std::size_t childCount;
+ bool longKey;
+
+ SettingChanges modifiedSettings;
+
+ EmitterNodeType::value NodeType() const {
+ if (type == GroupType::Seq) {
+ if (flowType == FlowType::Flow)
+ return EmitterNodeType::FlowSeq;
+ else
+ return EmitterNodeType::BlockSeq;
+ } else {
+ if (flowType == FlowType::Flow)
+ return EmitterNodeType::FlowMap;
+ else
+ return EmitterNodeType::BlockMap;
+ }
+
+ // can't get here
+ assert(false);
+ return EmitterNodeType::NoType;
+ }
+ };
+
std::vector<std::unique_ptr<Group>> m_groups;
- std::size_t m_curIndent;
- bool m_hasAnchor;
- bool m_hasTag;
- bool m_hasNonContent;
- std::size_t m_docCount;
-};
-
-template <typename T>
-void EmitterState::_Set(Setting<T>& fmt, T value, FmtScope::value scope) {
- switch (scope) {
- case FmtScope::Local:
- m_modifiedSettings.push(fmt.set(value));
- break;
- case FmtScope::Global:
- fmt.set(value);
- m_globalModifiedSettings.push(
- fmt.set(value)); // this pushes an identity set, so when we restore,
- // it restores to the value here, and not the previous one
- break;
- default:
- assert(false);
- }
-}
-}
-
-#endif // EMITTERSTATE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+ std::size_t m_curIndent;
+ bool m_hasAnchor;
+ bool m_hasTag;
+ bool m_hasNonContent;
+ std::size_t m_docCount;
+};
+
+template <typename T>
+void EmitterState::_Set(Setting<T>& fmt, T value, FmtScope::value scope) {
+ switch (scope) {
+ case FmtScope::Local:
+ m_modifiedSettings.push(fmt.set(value));
+ break;
+ case FmtScope::Global:
+ fmt.set(value);
+ m_globalModifiedSettings.push(
+ fmt.set(value)); // this pushes an identity set, so when we restore,
+ // it restores to the value here, and not the previous one
+ break;
+ default:
+ assert(false);
+ }
+}
+}
+
+#endif // EMITTERSTATE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/emitterutils.cpp b/contrib/libs/yaml-cpp/src/emitterutils.cpp
index 244d2ff65f..147738ad8a 100644
--- a/contrib/libs/yaml-cpp/src/emitterutils.cpp
+++ b/contrib/libs/yaml-cpp/src/emitterutils.cpp
@@ -1,483 +1,483 @@
-#include <iomanip>
-#include <sstream>
-
-#include "emitterutils.h"
-#include "exp.h"
-#include "indentation.h"
-#include "regex_yaml.h"
-#include "regeximpl.h"
-#include "stringsource.h"
-#include "yaml-cpp/binary.h" // IWYU pragma: keep
-#include "yaml-cpp/ostream_wrapper.h"
+#include <iomanip>
+#include <sstream>
+
+#include "emitterutils.h"
+#include "exp.h"
+#include "indentation.h"
+#include "regex_yaml.h"
+#include "regeximpl.h"
+#include "stringsource.h"
+#include "yaml-cpp/binary.h" // IWYU pragma: keep
+#include "yaml-cpp/ostream_wrapper.h"
#include "yaml-cpp/null.h"
-
-namespace YAML {
-namespace Utils {
-namespace {
-enum { REPLACEMENT_CHARACTER = 0xFFFD };
-
-bool IsAnchorChar(int ch) { // test for ns-anchor-char
- switch (ch) {
- case ',':
- case '[':
- case ']':
- case '{':
- case '}': // c-flow-indicator
- case ' ':
- case '\t': // s-white
- case 0xFEFF: // c-byte-order-mark
- case 0xA:
- case 0xD: // b-char
- return false;
- case 0x85:
- return true;
- }
-
- if (ch < 0x20) {
- return false;
- }
-
- if (ch < 0x7E) {
- return true;
- }
-
- if (ch < 0xA0) {
- return false;
- }
- if (ch >= 0xD800 && ch <= 0xDFFF) {
- return false;
- }
- if ((ch & 0xFFFE) == 0xFFFE) {
- return false;
- }
- if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) {
- return false;
- }
- if (ch > 0x10FFFF) {
- return false;
- }
-
- return true;
-}
-
-int Utf8BytesIndicated(char ch) {
- int byteVal = static_cast<unsigned char>(ch);
- switch (byteVal >> 4) {
- case 0:
- case 1:
- case 2:
- case 3:
- case 4:
- case 5:
- case 6:
- case 7:
- return 1;
- case 12:
- case 13:
- return 2;
- case 14:
- return 3;
- case 15:
- return 4;
- default:
- return -1;
- }
-}
-
-bool IsTrailingByte(char ch) { return (ch & 0xC0) == 0x80; }
-
-bool GetNextCodePointAndAdvance(int& codePoint,
- std::string::const_iterator& first,
- std::string::const_iterator last) {
- if (first == last)
- return false;
-
- int nBytes = Utf8BytesIndicated(*first);
- if (nBytes < 1) {
- // Bad lead byte
- ++first;
- codePoint = REPLACEMENT_CHARACTER;
- return true;
- }
-
- if (nBytes == 1) {
- codePoint = *first++;
- return true;
- }
-
- // Gather bits from trailing bytes
- codePoint = static_cast<unsigned char>(*first) & ~(0xFF << (7 - nBytes));
- ++first;
- --nBytes;
- for (; nBytes > 0; ++first, --nBytes) {
- if ((first == last) || !IsTrailingByte(*first)) {
- codePoint = REPLACEMENT_CHARACTER;
- break;
- }
- codePoint <<= 6;
- codePoint |= *first & 0x3F;
- }
-
- // Check for illegal code points
- if (codePoint > 0x10FFFF)
- codePoint = REPLACEMENT_CHARACTER;
- else if (codePoint >= 0xD800 && codePoint <= 0xDFFF)
- codePoint = REPLACEMENT_CHARACTER;
- else if ((codePoint & 0xFFFE) == 0xFFFE)
- codePoint = REPLACEMENT_CHARACTER;
- else if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF)
- codePoint = REPLACEMENT_CHARACTER;
- return true;
-}
-
-void WriteCodePoint(ostream_wrapper& out, int codePoint) {
- if (codePoint < 0 || codePoint > 0x10FFFF) {
- codePoint = REPLACEMENT_CHARACTER;
- }
- if (codePoint < 0x7F) {
- out << static_cast<char>(codePoint);
- } else if (codePoint < 0x7FF) {
- out << static_cast<char>(0xC0 | (codePoint >> 6))
- << static_cast<char>(0x80 | (codePoint & 0x3F));
- } else if (codePoint < 0xFFFF) {
- out << static_cast<char>(0xE0 | (codePoint >> 12))
- << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
- << static_cast<char>(0x80 | (codePoint & 0x3F));
- } else {
- out << static_cast<char>(0xF0 | (codePoint >> 18))
- << static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F))
- << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
- << static_cast<char>(0x80 | (codePoint & 0x3F));
- }
-}
-
-bool IsValidPlainScalar(const std::string& str, FlowType::value flowType,
- bool allowOnlyAscii) {
- // check against null
+
+namespace YAML {
+namespace Utils {
+namespace {
+enum { REPLACEMENT_CHARACTER = 0xFFFD };
+
+bool IsAnchorChar(int ch) { // test for ns-anchor-char
+ switch (ch) {
+ case ',':
+ case '[':
+ case ']':
+ case '{':
+ case '}': // c-flow-indicator
+ case ' ':
+ case '\t': // s-white
+ case 0xFEFF: // c-byte-order-mark
+ case 0xA:
+ case 0xD: // b-char
+ return false;
+ case 0x85:
+ return true;
+ }
+
+ if (ch < 0x20) {
+ return false;
+ }
+
+ if (ch < 0x7E) {
+ return true;
+ }
+
+ if (ch < 0xA0) {
+ return false;
+ }
+ if (ch >= 0xD800 && ch <= 0xDFFF) {
+ return false;
+ }
+ if ((ch & 0xFFFE) == 0xFFFE) {
+ return false;
+ }
+ if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) {
+ return false;
+ }
+ if (ch > 0x10FFFF) {
+ return false;
+ }
+
+ return true;
+}
+
+int Utf8BytesIndicated(char ch) {
+ int byteVal = static_cast<unsigned char>(ch);
+ switch (byteVal >> 4) {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ return 1;
+ case 12:
+ case 13:
+ return 2;
+ case 14:
+ return 3;
+ case 15:
+ return 4;
+ default:
+ return -1;
+ }
+}
+
+bool IsTrailingByte(char ch) { return (ch & 0xC0) == 0x80; }
+
+bool GetNextCodePointAndAdvance(int& codePoint,
+ std::string::const_iterator& first,
+ std::string::const_iterator last) {
+ if (first == last)
+ return false;
+
+ int nBytes = Utf8BytesIndicated(*first);
+ if (nBytes < 1) {
+ // Bad lead byte
+ ++first;
+ codePoint = REPLACEMENT_CHARACTER;
+ return true;
+ }
+
+ if (nBytes == 1) {
+ codePoint = *first++;
+ return true;
+ }
+
+ // Gather bits from trailing bytes
+ codePoint = static_cast<unsigned char>(*first) & ~(0xFF << (7 - nBytes));
+ ++first;
+ --nBytes;
+ for (; nBytes > 0; ++first, --nBytes) {
+ if ((first == last) || !IsTrailingByte(*first)) {
+ codePoint = REPLACEMENT_CHARACTER;
+ break;
+ }
+ codePoint <<= 6;
+ codePoint |= *first & 0x3F;
+ }
+
+ // Check for illegal code points
+ if (codePoint > 0x10FFFF)
+ codePoint = REPLACEMENT_CHARACTER;
+ else if (codePoint >= 0xD800 && codePoint <= 0xDFFF)
+ codePoint = REPLACEMENT_CHARACTER;
+ else if ((codePoint & 0xFFFE) == 0xFFFE)
+ codePoint = REPLACEMENT_CHARACTER;
+ else if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF)
+ codePoint = REPLACEMENT_CHARACTER;
+ return true;
+}
+
+void WriteCodePoint(ostream_wrapper& out, int codePoint) {
+ if (codePoint < 0 || codePoint > 0x10FFFF) {
+ codePoint = REPLACEMENT_CHARACTER;
+ }
+ if (codePoint < 0x7F) {
+ out << static_cast<char>(codePoint);
+ } else if (codePoint < 0x7FF) {
+ out << static_cast<char>(0xC0 | (codePoint >> 6))
+ << static_cast<char>(0x80 | (codePoint & 0x3F));
+ } else if (codePoint < 0xFFFF) {
+ out << static_cast<char>(0xE0 | (codePoint >> 12))
+ << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
+ << static_cast<char>(0x80 | (codePoint & 0x3F));
+ } else {
+ out << static_cast<char>(0xF0 | (codePoint >> 18))
+ << static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F))
+ << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
+ << static_cast<char>(0x80 | (codePoint & 0x3F));
+ }
+}
+
+bool IsValidPlainScalar(const std::string& str, FlowType::value flowType,
+ bool allowOnlyAscii) {
+ // check against null
if (IsNullString(str)) {
- return false;
- }
-
- // check the start
- const RegEx& start = (flowType == FlowType::Flow ? Exp::PlainScalarInFlow()
- : Exp::PlainScalar());
- if (!start.Matches(str)) {
- return false;
- }
-
- // and check the end for plain whitespace (which can't be faithfully kept in a
- // plain scalar)
- if (!str.empty() && *str.rbegin() == ' ') {
- return false;
- }
-
- // then check until something is disallowed
- static const RegEx& disallowed_flow =
- Exp::EndScalarInFlow() || (Exp::BlankOrBreak() + Exp::Comment()) ||
- Exp::NotPrintable() || Exp::Utf8_ByteOrderMark() || Exp::Break() ||
- Exp::Tab();
- static const RegEx& disallowed_block =
- Exp::EndScalar() || (Exp::BlankOrBreak() + Exp::Comment()) ||
- Exp::NotPrintable() || Exp::Utf8_ByteOrderMark() || Exp::Break() ||
- Exp::Tab();
- const RegEx& disallowed =
- flowType == FlowType::Flow ? disallowed_flow : disallowed_block;
-
- StringCharSource buffer(str.c_str(), str.size());
- while (buffer) {
- if (disallowed.Matches(buffer)) {
- return false;
- }
- if (allowOnlyAscii && (0x80 <= static_cast<unsigned char>(buffer[0]))) {
- return false;
- }
- ++buffer;
- }
-
- return true;
-}
-
-bool IsValidSingleQuotedScalar(const std::string& str, bool escapeNonAscii) {
- // TODO: check for non-printable characters?
- for (std::size_t i = 0; i < str.size(); i++) {
- if (escapeNonAscii && (0x80 <= static_cast<unsigned char>(str[i]))) {
- return false;
- }
- if (str[i] == '\n') {
- return false;
- }
- }
- return true;
-}
-
-bool IsValidLiteralScalar(const std::string& str, FlowType::value flowType,
- bool escapeNonAscii) {
- if (flowType == FlowType::Flow) {
- return false;
- }
-
- // TODO: check for non-printable characters?
- for (std::size_t i = 0; i < str.size(); i++) {
- if (escapeNonAscii && (0x80 <= static_cast<unsigned char>(str[i]))) {
- return false;
- }
- }
- return true;
-}
-
-void WriteDoubleQuoteEscapeSequence(ostream_wrapper& out, int codePoint) {
- static const char hexDigits[] = "0123456789abcdef";
-
- out << "\\";
- int digits = 8;
- if (codePoint < 0xFF) {
- out << "x";
- digits = 2;
- } else if (codePoint < 0xFFFF) {
- out << "u";
- digits = 4;
- } else {
- out << "U";
- digits = 8;
- }
-
- // Write digits into the escape sequence
- for (; digits > 0; --digits)
- out << hexDigits[(codePoint >> (4 * (digits - 1))) & 0xF];
-}
-
-bool WriteAliasName(ostream_wrapper& out, const std::string& str) {
- int codePoint;
- for (std::string::const_iterator i = str.begin();
- GetNextCodePointAndAdvance(codePoint, i, str.end());) {
- if (!IsAnchorChar(codePoint)) {
- return false;
- }
-
- WriteCodePoint(out, codePoint);
- }
- return true;
-}
-}
-
-StringFormat::value ComputeStringFormat(const std::string& str,
- EMITTER_MANIP strFormat,
- FlowType::value flowType,
- bool escapeNonAscii) {
- switch (strFormat) {
- case Auto:
- if (IsValidPlainScalar(str, flowType, escapeNonAscii)) {
- return StringFormat::Plain;
- }
- return StringFormat::DoubleQuoted;
- case SingleQuoted:
- if (IsValidSingleQuotedScalar(str, escapeNonAscii)) {
- return StringFormat::SingleQuoted;
- }
- return StringFormat::DoubleQuoted;
- case DoubleQuoted:
- return StringFormat::DoubleQuoted;
- case Literal:
- if (IsValidLiteralScalar(str, flowType, escapeNonAscii)) {
- return StringFormat::Literal;
- }
- return StringFormat::DoubleQuoted;
- default:
- break;
- }
-
- return StringFormat::DoubleQuoted;
-}
-
-bool WriteSingleQuotedString(ostream_wrapper& out, const std::string& str) {
- out << "'";
- int codePoint;
- for (std::string::const_iterator i = str.begin();
- GetNextCodePointAndAdvance(codePoint, i, str.end());) {
- if (codePoint == '\n') {
- return false; // We can't handle a new line and the attendant indentation
- // yet
- }
-
- if (codePoint == '\'') {
- out << "''";
- } else {
- WriteCodePoint(out, codePoint);
- }
- }
- out << "'";
- return true;
-}
-
-bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
- bool escapeNonAscii) {
- out << "\"";
- int codePoint;
- for (std::string::const_iterator i = str.begin();
- GetNextCodePointAndAdvance(codePoint, i, str.end());) {
- switch (codePoint) {
- case '\"':
- out << "\\\"";
- break;
- case '\\':
- out << "\\\\";
- break;
- case '\n':
- out << "\\n";
- break;
- case '\t':
- out << "\\t";
- break;
- case '\r':
- out << "\\r";
- break;
- case '\b':
- out << "\\b";
- break;
- default:
- if (codePoint < 0x20 ||
- (codePoint >= 0x80 &&
- codePoint <= 0xA0)) { // Control characters and non-breaking space
- WriteDoubleQuoteEscapeSequence(out, codePoint);
- } else if (codePoint == 0xFEFF) { // Byte order marks (ZWNS) should be
- // escaped (YAML 1.2, sec. 5.2)
- WriteDoubleQuoteEscapeSequence(out, codePoint);
- } else if (escapeNonAscii && codePoint > 0x7E) {
- WriteDoubleQuoteEscapeSequence(out, codePoint);
- } else {
- WriteCodePoint(out, codePoint);
- }
- }
- }
- out << "\"";
- return true;
-}
-
-bool WriteLiteralString(ostream_wrapper& out, const std::string& str,
+ return false;
+ }
+
+ // check the start
+ const RegEx& start = (flowType == FlowType::Flow ? Exp::PlainScalarInFlow()
+ : Exp::PlainScalar());
+ if (!start.Matches(str)) {
+ return false;
+ }
+
+ // and check the end for plain whitespace (which can't be faithfully kept in a
+ // plain scalar)
+ if (!str.empty() && *str.rbegin() == ' ') {
+ return false;
+ }
+
+ // then check until something is disallowed
+ static const RegEx& disallowed_flow =
+ Exp::EndScalarInFlow() || (Exp::BlankOrBreak() + Exp::Comment()) ||
+ Exp::NotPrintable() || Exp::Utf8_ByteOrderMark() || Exp::Break() ||
+ Exp::Tab();
+ static const RegEx& disallowed_block =
+ Exp::EndScalar() || (Exp::BlankOrBreak() + Exp::Comment()) ||
+ Exp::NotPrintable() || Exp::Utf8_ByteOrderMark() || Exp::Break() ||
+ Exp::Tab();
+ const RegEx& disallowed =
+ flowType == FlowType::Flow ? disallowed_flow : disallowed_block;
+
+ StringCharSource buffer(str.c_str(), str.size());
+ while (buffer) {
+ if (disallowed.Matches(buffer)) {
+ return false;
+ }
+ if (allowOnlyAscii && (0x80 <= static_cast<unsigned char>(buffer[0]))) {
+ return false;
+ }
+ ++buffer;
+ }
+
+ return true;
+}
+
+bool IsValidSingleQuotedScalar(const std::string& str, bool escapeNonAscii) {
+ // TODO: check for non-printable characters?
+ for (std::size_t i = 0; i < str.size(); i++) {
+ if (escapeNonAscii && (0x80 <= static_cast<unsigned char>(str[i]))) {
+ return false;
+ }
+ if (str[i] == '\n') {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool IsValidLiteralScalar(const std::string& str, FlowType::value flowType,
+ bool escapeNonAscii) {
+ if (flowType == FlowType::Flow) {
+ return false;
+ }
+
+ // TODO: check for non-printable characters?
+ for (std::size_t i = 0; i < str.size(); i++) {
+ if (escapeNonAscii && (0x80 <= static_cast<unsigned char>(str[i]))) {
+ return false;
+ }
+ }
+ return true;
+}
+
+void WriteDoubleQuoteEscapeSequence(ostream_wrapper& out, int codePoint) {
+ static const char hexDigits[] = "0123456789abcdef";
+
+ out << "\\";
+ int digits = 8;
+ if (codePoint < 0xFF) {
+ out << "x";
+ digits = 2;
+ } else if (codePoint < 0xFFFF) {
+ out << "u";
+ digits = 4;
+ } else {
+ out << "U";
+ digits = 8;
+ }
+
+ // Write digits into the escape sequence
+ for (; digits > 0; --digits)
+ out << hexDigits[(codePoint >> (4 * (digits - 1))) & 0xF];
+}
+
+bool WriteAliasName(ostream_wrapper& out, const std::string& str) {
+ int codePoint;
+ for (std::string::const_iterator i = str.begin();
+ GetNextCodePointAndAdvance(codePoint, i, str.end());) {
+ if (!IsAnchorChar(codePoint)) {
+ return false;
+ }
+
+ WriteCodePoint(out, codePoint);
+ }
+ return true;
+}
+}
+
+StringFormat::value ComputeStringFormat(const std::string& str,
+ EMITTER_MANIP strFormat,
+ FlowType::value flowType,
+ bool escapeNonAscii) {
+ switch (strFormat) {
+ case Auto:
+ if (IsValidPlainScalar(str, flowType, escapeNonAscii)) {
+ return StringFormat::Plain;
+ }
+ return StringFormat::DoubleQuoted;
+ case SingleQuoted:
+ if (IsValidSingleQuotedScalar(str, escapeNonAscii)) {
+ return StringFormat::SingleQuoted;
+ }
+ return StringFormat::DoubleQuoted;
+ case DoubleQuoted:
+ return StringFormat::DoubleQuoted;
+ case Literal:
+ if (IsValidLiteralScalar(str, flowType, escapeNonAscii)) {
+ return StringFormat::Literal;
+ }
+ return StringFormat::DoubleQuoted;
+ default:
+ break;
+ }
+
+ return StringFormat::DoubleQuoted;
+}
+
+bool WriteSingleQuotedString(ostream_wrapper& out, const std::string& str) {
+ out << "'";
+ int codePoint;
+ for (std::string::const_iterator i = str.begin();
+ GetNextCodePointAndAdvance(codePoint, i, str.end());) {
+ if (codePoint == '\n') {
+ return false; // We can't handle a new line and the attendant indentation
+ // yet
+ }
+
+ if (codePoint == '\'') {
+ out << "''";
+ } else {
+ WriteCodePoint(out, codePoint);
+ }
+ }
+ out << "'";
+ return true;
+}
+
+bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
+ bool escapeNonAscii) {
+ out << "\"";
+ int codePoint;
+ for (std::string::const_iterator i = str.begin();
+ GetNextCodePointAndAdvance(codePoint, i, str.end());) {
+ switch (codePoint) {
+ case '\"':
+ out << "\\\"";
+ break;
+ case '\\':
+ out << "\\\\";
+ break;
+ case '\n':
+ out << "\\n";
+ break;
+ case '\t':
+ out << "\\t";
+ break;
+ case '\r':
+ out << "\\r";
+ break;
+ case '\b':
+ out << "\\b";
+ break;
+ default:
+ if (codePoint < 0x20 ||
+ (codePoint >= 0x80 &&
+ codePoint <= 0xA0)) { // Control characters and non-breaking space
+ WriteDoubleQuoteEscapeSequence(out, codePoint);
+ } else if (codePoint == 0xFEFF) { // Byte order marks (ZWNS) should be
+ // escaped (YAML 1.2, sec. 5.2)
+ WriteDoubleQuoteEscapeSequence(out, codePoint);
+ } else if (escapeNonAscii && codePoint > 0x7E) {
+ WriteDoubleQuoteEscapeSequence(out, codePoint);
+ } else {
+ WriteCodePoint(out, codePoint);
+ }
+ }
+ }
+ out << "\"";
+ return true;
+}
+
+bool WriteLiteralString(ostream_wrapper& out, const std::string& str,
std::size_t indent) {
- out << "|\n";
- out << IndentTo(indent);
- int codePoint;
- for (std::string::const_iterator i = str.begin();
- GetNextCodePointAndAdvance(codePoint, i, str.end());) {
- if (codePoint == '\n') {
- out << "\n" << IndentTo(indent);
- } else {
- WriteCodePoint(out, codePoint);
- }
- }
- return true;
-}
-
-bool WriteChar(ostream_wrapper& out, char ch) {
- if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) {
- out << ch;
- } else if (ch == '\"') {
- out << "\"\\\"\"";
- } else if (ch == '\t') {
- out << "\"\\t\"";
- } else if (ch == '\n') {
- out << "\"\\n\"";
- } else if (ch == '\b') {
- out << "\"\\b\"";
+ out << "|\n";
+ out << IndentTo(indent);
+ int codePoint;
+ for (std::string::const_iterator i = str.begin();
+ GetNextCodePointAndAdvance(codePoint, i, str.end());) {
+ if (codePoint == '\n') {
+ out << "\n" << IndentTo(indent);
+ } else {
+ WriteCodePoint(out, codePoint);
+ }
+ }
+ return true;
+}
+
+bool WriteChar(ostream_wrapper& out, char ch) {
+ if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) {
+ out << ch;
+ } else if (ch == '\"') {
+ out << "\"\\\"\"";
+ } else if (ch == '\t') {
+ out << "\"\\t\"";
+ } else if (ch == '\n') {
+ out << "\"\\n\"";
+ } else if (ch == '\b') {
+ out << "\"\\b\"";
} else if (ch == '\\') {
out << "\"\\\\\"";
- } else if ((0x20 <= ch && ch <= 0x7e) || ch == ' ') {
- out << "\"" << ch << "\"";
- } else {
- out << "\"";
- WriteDoubleQuoteEscapeSequence(out, ch);
- out << "\"";
- }
- return true;
-}
-
-bool WriteComment(ostream_wrapper& out, const std::string& str,
+ } else if ((0x20 <= ch && ch <= 0x7e) || ch == ' ') {
+ out << "\"" << ch << "\"";
+ } else {
+ out << "\"";
+ WriteDoubleQuoteEscapeSequence(out, ch);
+ out << "\"";
+ }
+ return true;
+}
+
+bool WriteComment(ostream_wrapper& out, const std::string& str,
std::size_t postCommentIndent) {
- const std::size_t curIndent = out.col();
- out << "#" << Indentation(postCommentIndent);
- out.set_comment();
- int codePoint;
- for (std::string::const_iterator i = str.begin();
- GetNextCodePointAndAdvance(codePoint, i, str.end());) {
- if (codePoint == '\n') {
- out << "\n" << IndentTo(curIndent) << "#"
- << Indentation(postCommentIndent);
- out.set_comment();
- } else {
- WriteCodePoint(out, codePoint);
- }
- }
- return true;
-}
-
-bool WriteAlias(ostream_wrapper& out, const std::string& str) {
- out << "*";
- return WriteAliasName(out, str);
-}
-
-bool WriteAnchor(ostream_wrapper& out, const std::string& str) {
- out << "&";
- return WriteAliasName(out, str);
-}
-
-bool WriteTag(ostream_wrapper& out, const std::string& str, bool verbatim) {
- out << (verbatim ? "!<" : "!");
- StringCharSource buffer(str.c_str(), str.size());
- const RegEx& reValid = verbatim ? Exp::URI() : Exp::Tag();
- while (buffer) {
- int n = reValid.Match(buffer);
- if (n <= 0) {
- return false;
- }
-
- while (--n >= 0) {
- out << buffer[0];
- ++buffer;
- }
- }
- if (verbatim) {
- out << ">";
- }
- return true;
-}
-
-bool WriteTagWithPrefix(ostream_wrapper& out, const std::string& prefix,
- const std::string& tag) {
- out << "!";
- StringCharSource prefixBuffer(prefix.c_str(), prefix.size());
- while (prefixBuffer) {
- int n = Exp::URI().Match(prefixBuffer);
- if (n <= 0) {
- return false;
- }
-
- while (--n >= 0) {
- out << prefixBuffer[0];
- ++prefixBuffer;
- }
- }
-
- out << "!";
- StringCharSource tagBuffer(tag.c_str(), tag.size());
- while (tagBuffer) {
- int n = Exp::Tag().Match(tagBuffer);
- if (n <= 0) {
- return false;
- }
-
- while (--n >= 0) {
- out << tagBuffer[0];
- ++tagBuffer;
- }
- }
- return true;
-}
-
-bool WriteBinary(ostream_wrapper& out, const Binary& binary) {
- WriteDoubleQuotedString(out, EncodeBase64(binary.data(), binary.size()),
- false);
- return true;
-}
-}
-}
+ const std::size_t curIndent = out.col();
+ out << "#" << Indentation(postCommentIndent);
+ out.set_comment();
+ int codePoint;
+ for (std::string::const_iterator i = str.begin();
+ GetNextCodePointAndAdvance(codePoint, i, str.end());) {
+ if (codePoint == '\n') {
+ out << "\n" << IndentTo(curIndent) << "#"
+ << Indentation(postCommentIndent);
+ out.set_comment();
+ } else {
+ WriteCodePoint(out, codePoint);
+ }
+ }
+ return true;
+}
+
+bool WriteAlias(ostream_wrapper& out, const std::string& str) {
+ out << "*";
+ return WriteAliasName(out, str);
+}
+
+bool WriteAnchor(ostream_wrapper& out, const std::string& str) {
+ out << "&";
+ return WriteAliasName(out, str);
+}
+
+bool WriteTag(ostream_wrapper& out, const std::string& str, bool verbatim) {
+ out << (verbatim ? "!<" : "!");
+ StringCharSource buffer(str.c_str(), str.size());
+ const RegEx& reValid = verbatim ? Exp::URI() : Exp::Tag();
+ while (buffer) {
+ int n = reValid.Match(buffer);
+ if (n <= 0) {
+ return false;
+ }
+
+ while (--n >= 0) {
+ out << buffer[0];
+ ++buffer;
+ }
+ }
+ if (verbatim) {
+ out << ">";
+ }
+ return true;
+}
+
+bool WriteTagWithPrefix(ostream_wrapper& out, const std::string& prefix,
+ const std::string& tag) {
+ out << "!";
+ StringCharSource prefixBuffer(prefix.c_str(), prefix.size());
+ while (prefixBuffer) {
+ int n = Exp::URI().Match(prefixBuffer);
+ if (n <= 0) {
+ return false;
+ }
+
+ while (--n >= 0) {
+ out << prefixBuffer[0];
+ ++prefixBuffer;
+ }
+ }
+
+ out << "!";
+ StringCharSource tagBuffer(tag.c_str(), tag.size());
+ while (tagBuffer) {
+ int n = Exp::Tag().Match(tagBuffer);
+ if (n <= 0) {
+ return false;
+ }
+
+ while (--n >= 0) {
+ out << tagBuffer[0];
+ ++tagBuffer;
+ }
+ }
+ return true;
+}
+
+bool WriteBinary(ostream_wrapper& out, const Binary& binary) {
+ WriteDoubleQuotedString(out, EncodeBase64(binary.data(), binary.size()),
+ false);
+ return true;
+}
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/emitterutils.h b/contrib/libs/yaml-cpp/src/emitterutils.h
index 1a668a524c..6cc7319147 100644
--- a/contrib/libs/yaml-cpp/src/emitterutils.h
+++ b/contrib/libs/yaml-cpp/src/emitterutils.h
@@ -1,50 +1,50 @@
-#ifndef EMITTERUTILS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define EMITTERUTILS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <string>
-
-#include "emitterstate.h"
-#include "yaml-cpp/emittermanip.h"
-#include "yaml-cpp/ostream_wrapper.h"
-
-namespace YAML {
-class ostream_wrapper;
-} // namespace YAML
-
-namespace YAML {
-class Binary;
-
-struct StringFormat {
- enum value { Plain, SingleQuoted, DoubleQuoted, Literal };
-};
-
-namespace Utils {
-StringFormat::value ComputeStringFormat(const std::string& str,
- EMITTER_MANIP strFormat,
- FlowType::value flowType,
- bool escapeNonAscii);
-
-bool WriteSingleQuotedString(ostream_wrapper& out, const std::string& str);
-bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
- bool escapeNonAscii);
-bool WriteLiteralString(ostream_wrapper& out, const std::string& str,
+#ifndef EMITTERUTILS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EMITTERUTILS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+
+#include "emitterstate.h"
+#include "yaml-cpp/emittermanip.h"
+#include "yaml-cpp/ostream_wrapper.h"
+
+namespace YAML {
+class ostream_wrapper;
+} // namespace YAML
+
+namespace YAML {
+class Binary;
+
+struct StringFormat {
+ enum value { Plain, SingleQuoted, DoubleQuoted, Literal };
+};
+
+namespace Utils {
+StringFormat::value ComputeStringFormat(const std::string& str,
+ EMITTER_MANIP strFormat,
+ FlowType::value flowType,
+ bool escapeNonAscii);
+
+bool WriteSingleQuotedString(ostream_wrapper& out, const std::string& str);
+bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
+ bool escapeNonAscii);
+bool WriteLiteralString(ostream_wrapper& out, const std::string& str,
std::size_t indent);
-bool WriteChar(ostream_wrapper& out, char ch);
-bool WriteComment(ostream_wrapper& out, const std::string& str,
+bool WriteChar(ostream_wrapper& out, char ch);
+bool WriteComment(ostream_wrapper& out, const std::string& str,
std::size_t postCommentIndent);
-bool WriteAlias(ostream_wrapper& out, const std::string& str);
-bool WriteAnchor(ostream_wrapper& out, const std::string& str);
-bool WriteTag(ostream_wrapper& out, const std::string& str, bool verbatim);
-bool WriteTagWithPrefix(ostream_wrapper& out, const std::string& prefix,
- const std::string& tag);
-bool WriteBinary(ostream_wrapper& out, const Binary& binary);
-}
-}
-
-#endif // EMITTERUTILS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+bool WriteAlias(ostream_wrapper& out, const std::string& str);
+bool WriteAnchor(ostream_wrapper& out, const std::string& str);
+bool WriteTag(ostream_wrapper& out, const std::string& str, bool verbatim);
+bool WriteTagWithPrefix(ostream_wrapper& out, const std::string& prefix,
+ const std::string& tag);
+bool WriteBinary(ostream_wrapper& out, const Binary& binary);
+}
+}
+
+#endif // EMITTERUTILS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/exp.cpp b/contrib/libs/yaml-cpp/src/exp.cpp
index 01a6ed0c4c..695440aec0 100644
--- a/contrib/libs/yaml-cpp/src/exp.cpp
+++ b/contrib/libs/yaml-cpp/src/exp.cpp
@@ -1,136 +1,136 @@
-#include <sstream>
-
-#include "exp.h"
-#include "stream.h"
-#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
-
-namespace YAML {
-struct Mark;
-} // namespace YAML
-
-namespace YAML {
-namespace Exp {
-unsigned ParseHex(const std::string& str, const Mark& mark) {
- unsigned value = 0;
- for (std::size_t i = 0; i < str.size(); i++) {
- char ch = str[i];
- int digit = 0;
- if ('a' <= ch && ch <= 'f')
- digit = ch - 'a' + 10;
- else if ('A' <= ch && ch <= 'F')
- digit = ch - 'A' + 10;
- else if ('0' <= ch && ch <= '9')
- digit = ch - '0';
- else
- throw ParserException(mark, ErrorMsg::INVALID_HEX);
-
- value = (value << 4) + digit;
- }
-
- return value;
-}
-
-std::string Str(unsigned ch) { return std::string(1, static_cast<char>(ch)); }
-
-// Escape
-// . Translates the next 'codeLength' characters into a hex number and returns
-// the result.
-// . Throws if it's not actually hex.
-std::string Escape(Stream& in, int codeLength) {
- // grab string
- std::string str;
- for (int i = 0; i < codeLength; i++)
- str += in.get();
-
- // get the value
- unsigned value = ParseHex(str, in.mark());
-
- // legal unicode?
- if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
- std::stringstream msg;
- msg << ErrorMsg::INVALID_UNICODE << value;
- throw ParserException(in.mark(), msg.str());
- }
-
- // now break it up into chars
- if (value <= 0x7F)
- return Str(value);
- else if (value <= 0x7FF)
- return Str(0xC0 + (value >> 6)) + Str(0x80 + (value & 0x3F));
- else if (value <= 0xFFFF)
- return Str(0xE0 + (value >> 12)) + Str(0x80 + ((value >> 6) & 0x3F)) +
- Str(0x80 + (value & 0x3F));
- else
- return Str(0xF0 + (value >> 18)) + Str(0x80 + ((value >> 12) & 0x3F)) +
- Str(0x80 + ((value >> 6) & 0x3F)) + Str(0x80 + (value & 0x3F));
-}
-
-// Escape
-// . Escapes the sequence starting 'in' (it must begin with a '\' or single
-// quote)
-// and returns the result.
-// . Throws if it's an unknown escape character.
-std::string Escape(Stream& in) {
- // eat slash
- char escape = in.get();
-
- // switch on escape character
- char ch = in.get();
-
- // first do single quote, since it's easier
- if (escape == '\'' && ch == '\'')
- return "\'";
-
- // now do the slash (we're not gonna check if it's a slash - you better pass
- // one!)
- switch (ch) {
- case '0':
- return std::string(1, '\x00');
- case 'a':
- return "\x07";
- case 'b':
- return "\x08";
- case 't':
- case '\t':
- return "\x09";
- case 'n':
- return "\x0A";
- case 'v':
- return "\x0B";
- case 'f':
- return "\x0C";
- case 'r':
- return "\x0D";
- case 'e':
- return "\x1B";
- case ' ':
- return "\x20";
- case '\"':
- return "\"";
- case '\'':
- return "\'";
- case '\\':
- return "\\";
- case '/':
- return "/";
- case 'N':
- return "\x85";
- case '_':
- return "\xA0";
- case 'L':
- return "\xE2\x80\xA8"; // LS (#x2028)
- case 'P':
- return "\xE2\x80\xA9"; // PS (#x2029)
- case 'x':
- return Escape(in, 2);
- case 'u':
- return Escape(in, 4);
- case 'U':
- return Escape(in, 8);
- }
-
- std::stringstream msg;
- throw ParserException(in.mark(), std::string(ErrorMsg::INVALID_ESCAPE) + ch);
-}
-}
-}
+#include <sstream>
+
+#include "exp.h"
+#include "stream.h"
+#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
+
+namespace YAML {
+struct Mark;
+} // namespace YAML
+
+namespace YAML {
+namespace Exp {
+unsigned ParseHex(const std::string& str, const Mark& mark) {
+ unsigned value = 0;
+ for (std::size_t i = 0; i < str.size(); i++) {
+ char ch = str[i];
+ int digit = 0;
+ if ('a' <= ch && ch <= 'f')
+ digit = ch - 'a' + 10;
+ else if ('A' <= ch && ch <= 'F')
+ digit = ch - 'A' + 10;
+ else if ('0' <= ch && ch <= '9')
+ digit = ch - '0';
+ else
+ throw ParserException(mark, ErrorMsg::INVALID_HEX);
+
+ value = (value << 4) + digit;
+ }
+
+ return value;
+}
+
+std::string Str(unsigned ch) { return std::string(1, static_cast<char>(ch)); }
+
+// Escape
+// . Translates the next 'codeLength' characters into a hex number and returns
+// the result.
+// . Throws if it's not actually hex.
+std::string Escape(Stream& in, int codeLength) {
+ // grab string
+ std::string str;
+ for (int i = 0; i < codeLength; i++)
+ str += in.get();
+
+ // get the value
+ unsigned value = ParseHex(str, in.mark());
+
+ // legal unicode?
+ if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
+ std::stringstream msg;
+ msg << ErrorMsg::INVALID_UNICODE << value;
+ throw ParserException(in.mark(), msg.str());
+ }
+
+ // now break it up into chars
+ if (value <= 0x7F)
+ return Str(value);
+ else if (value <= 0x7FF)
+ return Str(0xC0 + (value >> 6)) + Str(0x80 + (value & 0x3F));
+ else if (value <= 0xFFFF)
+ return Str(0xE0 + (value >> 12)) + Str(0x80 + ((value >> 6) & 0x3F)) +
+ Str(0x80 + (value & 0x3F));
+ else
+ return Str(0xF0 + (value >> 18)) + Str(0x80 + ((value >> 12) & 0x3F)) +
+ Str(0x80 + ((value >> 6) & 0x3F)) + Str(0x80 + (value & 0x3F));
+}
+
+// Escape
+// . Escapes the sequence starting 'in' (it must begin with a '\' or single
+// quote)
+// and returns the result.
+// . Throws if it's an unknown escape character.
+std::string Escape(Stream& in) {
+ // eat slash
+ char escape = in.get();
+
+ // switch on escape character
+ char ch = in.get();
+
+ // first do single quote, since it's easier
+ if (escape == '\'' && ch == '\'')
+ return "\'";
+
+ // now do the slash (we're not gonna check if it's a slash - you better pass
+ // one!)
+ switch (ch) {
+ case '0':
+ return std::string(1, '\x00');
+ case 'a':
+ return "\x07";
+ case 'b':
+ return "\x08";
+ case 't':
+ case '\t':
+ return "\x09";
+ case 'n':
+ return "\x0A";
+ case 'v':
+ return "\x0B";
+ case 'f':
+ return "\x0C";
+ case 'r':
+ return "\x0D";
+ case 'e':
+ return "\x1B";
+ case ' ':
+ return "\x20";
+ case '\"':
+ return "\"";
+ case '\'':
+ return "\'";
+ case '\\':
+ return "\\";
+ case '/':
+ return "/";
+ case 'N':
+ return "\x85";
+ case '_':
+ return "\xA0";
+ case 'L':
+ return "\xE2\x80\xA8"; // LS (#x2028)
+ case 'P':
+ return "\xE2\x80\xA9"; // PS (#x2029)
+ case 'x':
+ return Escape(in, 2);
+ case 'u':
+ return Escape(in, 4);
+ case 'U':
+ return Escape(in, 8);
+ }
+
+ std::stringstream msg;
+ throw ParserException(in.mark(), std::string(ErrorMsg::INVALID_ESCAPE) + ch);
+}
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/exp.h b/contrib/libs/yaml-cpp/src/exp.h
index 1bd50b7ab1..50b0220b81 100644
--- a/contrib/libs/yaml-cpp/src/exp.h
+++ b/contrib/libs/yaml-cpp/src/exp.h
@@ -1,174 +1,174 @@
-#ifndef EXP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define EXP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <ios>
-#include <string>
-
-#include "regex_yaml.h"
-#include "stream.h"
-
-namespace YAML {
-////////////////////////////////////////////////////////////////////////////////
-// Here we store a bunch of expressions for matching different parts of the
-// file.
-
-namespace Exp {
-// misc
+#ifndef EXP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define EXP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <ios>
+#include <string>
+
+#include "regex_yaml.h"
+#include "stream.h"
+
+namespace YAML {
+////////////////////////////////////////////////////////////////////////////////
+// Here we store a bunch of expressions for matching different parts of the
+// file.
+
+namespace Exp {
+// misc
inline const RegEx& Empty() {
static const RegEx e;
return e;
}
-inline const RegEx& Space() {
- static const RegEx e = RegEx(' ');
- return e;
-}
-inline const RegEx& Tab() {
- static const RegEx e = RegEx('\t');
- return e;
-}
-inline const RegEx& Blank() {
- static const RegEx e = Space() || Tab();
- return e;
-}
-inline const RegEx& Break() {
- static const RegEx e = RegEx('\n') || RegEx("\r\n");
- return e;
-}
-inline const RegEx& BlankOrBreak() {
- static const RegEx e = Blank() || Break();
- return e;
-}
-inline const RegEx& Digit() {
- static const RegEx e = RegEx('0', '9');
- return e;
-}
-inline const RegEx& Alpha() {
- static const RegEx e = RegEx('a', 'z') || RegEx('A', 'Z');
- return e;
-}
-inline const RegEx& AlphaNumeric() {
- static const RegEx e = Alpha() || Digit();
- return e;
-}
-inline const RegEx& Word() {
- static const RegEx e = AlphaNumeric() || RegEx('-');
- return e;
-}
-inline const RegEx& Hex() {
- static const RegEx e = Digit() || RegEx('A', 'F') || RegEx('a', 'f');
- return e;
-}
-// Valid Unicode code points that are not part of c-printable (YAML 1.2, sec.
-// 5.1)
-inline const RegEx& NotPrintable() {
- static const RegEx e =
- RegEx(0) ||
- RegEx("\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x7F", REGEX_OR) ||
- RegEx(0x0E, 0x1F) ||
- (RegEx('\xC2') + (RegEx('\x80', '\x84') || RegEx('\x86', '\x9F')));
- return e;
-}
-inline const RegEx& Utf8_ByteOrderMark() {
- static const RegEx e = RegEx("\xEF\xBB\xBF");
- return e;
-}
-
-// actual tags
-
-inline const RegEx& DocStart() {
- static const RegEx e = RegEx("---") + (BlankOrBreak() || RegEx());
- return e;
-}
-inline const RegEx& DocEnd() {
- static const RegEx e = RegEx("...") + (BlankOrBreak() || RegEx());
- return e;
-}
-inline const RegEx& DocIndicator() {
- static const RegEx e = DocStart() || DocEnd();
- return e;
-}
-inline const RegEx& BlockEntry() {
- static const RegEx e = RegEx('-') + (BlankOrBreak() || RegEx());
- return e;
-}
-inline const RegEx& Key() {
- static const RegEx e = RegEx('?') + BlankOrBreak();
- return e;
-}
-inline const RegEx& KeyInFlow() {
- static const RegEx e = RegEx('?') + BlankOrBreak();
- return e;
-}
-inline const RegEx& Value() {
- static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx());
- return e;
-}
-inline const RegEx& ValueInFlow() {
- static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx(",}", REGEX_OR));
- return e;
-}
-inline const RegEx& ValueInJSONFlow() {
- static const RegEx e = RegEx(':');
- return e;
-}
-inline const RegEx Comment() {
- static const RegEx e = RegEx('#');
- return e;
-}
-inline const RegEx& Anchor() {
- static const RegEx e = !(RegEx("[]{},", REGEX_OR) || BlankOrBreak());
- return e;
-}
-inline const RegEx& AnchorEnd() {
- static const RegEx e = RegEx("?:,]}%@`", REGEX_OR) || BlankOrBreak();
- return e;
-}
-inline const RegEx& URI() {
- static const RegEx e = Word() || RegEx("#;/?:@&=+$,_.!~*'()[]", REGEX_OR) ||
- (RegEx('%') + Hex() + Hex());
- return e;
-}
-inline const RegEx& Tag() {
- static const RegEx e = Word() || RegEx("#;/?:@&=+$_.~*'", REGEX_OR) ||
- (RegEx('%') + Hex() + Hex());
- return e;
-}
-
-// Plain scalar rules:
-// . Cannot start with a blank.
-// . Can never start with any of , [ ] { } # & * ! | > \' \" % @ `
-// . In the block context - ? : must be not be followed with a space.
-// . In the flow context ? is illegal and : and - must not be followed with a
-// space.
-inline const RegEx& PlainScalar() {
- static const RegEx e =
- !(BlankOrBreak() || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) ||
- (RegEx("-?:", REGEX_OR) + (BlankOrBreak() || RegEx())));
- return e;
-}
-inline const RegEx& PlainScalarInFlow() {
- static const RegEx e =
- !(BlankOrBreak() || RegEx("?,[]{}#&*!|>\'\"%@`", REGEX_OR) ||
- (RegEx("-:", REGEX_OR) + Blank()));
- return e;
-}
-inline const RegEx& EndScalar() {
- static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx());
- return e;
-}
-inline const RegEx& EndScalarInFlow() {
- static const RegEx e =
- (RegEx(':') + (BlankOrBreak() || RegEx() || RegEx(",]}", REGEX_OR))) ||
- RegEx(",?[]{}", REGEX_OR);
- return e;
-}
-
+inline const RegEx& Space() {
+ static const RegEx e = RegEx(' ');
+ return e;
+}
+inline const RegEx& Tab() {
+ static const RegEx e = RegEx('\t');
+ return e;
+}
+inline const RegEx& Blank() {
+ static const RegEx e = Space() || Tab();
+ return e;
+}
+inline const RegEx& Break() {
+ static const RegEx e = RegEx('\n') || RegEx("\r\n");
+ return e;
+}
+inline const RegEx& BlankOrBreak() {
+ static const RegEx e = Blank() || Break();
+ return e;
+}
+inline const RegEx& Digit() {
+ static const RegEx e = RegEx('0', '9');
+ return e;
+}
+inline const RegEx& Alpha() {
+ static const RegEx e = RegEx('a', 'z') || RegEx('A', 'Z');
+ return e;
+}
+inline const RegEx& AlphaNumeric() {
+ static const RegEx e = Alpha() || Digit();
+ return e;
+}
+inline const RegEx& Word() {
+ static const RegEx e = AlphaNumeric() || RegEx('-');
+ return e;
+}
+inline const RegEx& Hex() {
+ static const RegEx e = Digit() || RegEx('A', 'F') || RegEx('a', 'f');
+ return e;
+}
+// Valid Unicode code points that are not part of c-printable (YAML 1.2, sec.
+// 5.1)
+inline const RegEx& NotPrintable() {
+ static const RegEx e =
+ RegEx(0) ||
+ RegEx("\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x7F", REGEX_OR) ||
+ RegEx(0x0E, 0x1F) ||
+ (RegEx('\xC2') + (RegEx('\x80', '\x84') || RegEx('\x86', '\x9F')));
+ return e;
+}
+inline const RegEx& Utf8_ByteOrderMark() {
+ static const RegEx e = RegEx("\xEF\xBB\xBF");
+ return e;
+}
+
+// actual tags
+
+inline const RegEx& DocStart() {
+ static const RegEx e = RegEx("---") + (BlankOrBreak() || RegEx());
+ return e;
+}
+inline const RegEx& DocEnd() {
+ static const RegEx e = RegEx("...") + (BlankOrBreak() || RegEx());
+ return e;
+}
+inline const RegEx& DocIndicator() {
+ static const RegEx e = DocStart() || DocEnd();
+ return e;
+}
+inline const RegEx& BlockEntry() {
+ static const RegEx e = RegEx('-') + (BlankOrBreak() || RegEx());
+ return e;
+}
+inline const RegEx& Key() {
+ static const RegEx e = RegEx('?') + BlankOrBreak();
+ return e;
+}
+inline const RegEx& KeyInFlow() {
+ static const RegEx e = RegEx('?') + BlankOrBreak();
+ return e;
+}
+inline const RegEx& Value() {
+ static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx());
+ return e;
+}
+inline const RegEx& ValueInFlow() {
+ static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx(",}", REGEX_OR));
+ return e;
+}
+inline const RegEx& ValueInJSONFlow() {
+ static const RegEx e = RegEx(':');
+ return e;
+}
+inline const RegEx Comment() {
+ static const RegEx e = RegEx('#');
+ return e;
+}
+inline const RegEx& Anchor() {
+ static const RegEx e = !(RegEx("[]{},", REGEX_OR) || BlankOrBreak());
+ return e;
+}
+inline const RegEx& AnchorEnd() {
+ static const RegEx e = RegEx("?:,]}%@`", REGEX_OR) || BlankOrBreak();
+ return e;
+}
+inline const RegEx& URI() {
+ static const RegEx e = Word() || RegEx("#;/?:@&=+$,_.!~*'()[]", REGEX_OR) ||
+ (RegEx('%') + Hex() + Hex());
+ return e;
+}
+inline const RegEx& Tag() {
+ static const RegEx e = Word() || RegEx("#;/?:@&=+$_.~*'", REGEX_OR) ||
+ (RegEx('%') + Hex() + Hex());
+ return e;
+}
+
+// Plain scalar rules:
+// . Cannot start with a blank.
+// . Can never start with any of , [ ] { } # & * ! | > \' \" % @ `
+// . In the block context - ? : must be not be followed with a space.
+// . In the flow context ? is illegal and : and - must not be followed with a
+// space.
+inline const RegEx& PlainScalar() {
+ static const RegEx e =
+ !(BlankOrBreak() || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) ||
+ (RegEx("-?:", REGEX_OR) + (BlankOrBreak() || RegEx())));
+ return e;
+}
+inline const RegEx& PlainScalarInFlow() {
+ static const RegEx e =
+ !(BlankOrBreak() || RegEx("?,[]{}#&*!|>\'\"%@`", REGEX_OR) ||
+ (RegEx("-:", REGEX_OR) + Blank()));
+ return e;
+}
+inline const RegEx& EndScalar() {
+ static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx());
+ return e;
+}
+inline const RegEx& EndScalarInFlow() {
+ static const RegEx e =
+ (RegEx(':') + (BlankOrBreak() || RegEx() || RegEx(",]}", REGEX_OR))) ||
+ RegEx(",?[]{}", REGEX_OR);
+ return e;
+}
+
inline const RegEx& ScanScalarEndInFlow() {
static const RegEx e = (EndScalarInFlow() || (BlankOrBreak() + Comment()));
return e;
@@ -178,45 +178,45 @@ inline const RegEx& ScanScalarEnd() {
static const RegEx e = EndScalar() || (BlankOrBreak() + Comment());
return e;
}
-inline const RegEx& EscSingleQuote() {
- static const RegEx e = RegEx("\'\'");
- return e;
-}
-inline const RegEx& EscBreak() {
- static const RegEx e = RegEx('\\') + Break();
- return e;
-}
-
-inline const RegEx& ChompIndicator() {
- static const RegEx e = RegEx("+-", REGEX_OR);
- return e;
-}
-inline const RegEx& Chomp() {
- static const RegEx e = (ChompIndicator() + Digit()) ||
- (Digit() + ChompIndicator()) || ChompIndicator() ||
- Digit();
- return e;
-}
-
-// and some functions
-std::string Escape(Stream& in);
-}
-
-namespace Keys {
-const char Directive = '%';
-const char FlowSeqStart = '[';
-const char FlowSeqEnd = ']';
-const char FlowMapStart = '{';
-const char FlowMapEnd = '}';
-const char FlowEntry = ',';
-const char Alias = '*';
-const char Anchor = '&';
-const char Tag = '!';
-const char LiteralScalar = '|';
-const char FoldedScalar = '>';
-const char VerbatimTagStart = '<';
-const char VerbatimTagEnd = '>';
-}
-}
-
-#endif // EXP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+inline const RegEx& EscSingleQuote() {
+ static const RegEx e = RegEx("\'\'");
+ return e;
+}
+inline const RegEx& EscBreak() {
+ static const RegEx e = RegEx('\\') + Break();
+ return e;
+}
+
+inline const RegEx& ChompIndicator() {
+ static const RegEx e = RegEx("+-", REGEX_OR);
+ return e;
+}
+inline const RegEx& Chomp() {
+ static const RegEx e = (ChompIndicator() + Digit()) ||
+ (Digit() + ChompIndicator()) || ChompIndicator() ||
+ Digit();
+ return e;
+}
+
+// and some functions
+std::string Escape(Stream& in);
+}
+
+namespace Keys {
+const char Directive = '%';
+const char FlowSeqStart = '[';
+const char FlowSeqEnd = ']';
+const char FlowMapStart = '{';
+const char FlowMapEnd = '}';
+const char FlowEntry = ',';
+const char Alias = '*';
+const char Anchor = '&';
+const char Tag = '!';
+const char LiteralScalar = '|';
+const char FoldedScalar = '>';
+const char VerbatimTagStart = '<';
+const char VerbatimTagEnd = '>';
+}
+}
+
+#endif // EXP_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/indentation.h b/contrib/libs/yaml-cpp/src/indentation.h
index d6b1ce62cd..1a2ccaea2e 100644
--- a/contrib/libs/yaml-cpp/src/indentation.h
+++ b/contrib/libs/yaml-cpp/src/indentation.h
@@ -1,41 +1,41 @@
-#ifndef INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <iostream>
-#include <cstddef>
-
-#include "yaml-cpp/ostream_wrapper.h"
-
-namespace YAML {
-struct Indentation {
- Indentation(std::size_t n_) : n(n_) {}
- std::size_t n;
-};
-
-inline ostream_wrapper& operator<<(ostream_wrapper& out,
- const Indentation& indent) {
- for (std::size_t i = 0; i < indent.n; i++)
- out << ' ';
- return out;
-}
-
-struct IndentTo {
- IndentTo(std::size_t n_) : n(n_) {}
- std::size_t n;
-};
-
-inline ostream_wrapper& operator<<(ostream_wrapper& out,
- const IndentTo& indent) {
- while (out.col() < indent.n)
- out << ' ';
- return out;
-}
-}
-
-#endif // INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <iostream>
+#include <cstddef>
+
+#include "yaml-cpp/ostream_wrapper.h"
+
+namespace YAML {
+struct Indentation {
+ Indentation(std::size_t n_) : n(n_) {}
+ std::size_t n;
+};
+
+inline ostream_wrapper& operator<<(ostream_wrapper& out,
+ const Indentation& indent) {
+ for (std::size_t i = 0; i < indent.n; i++)
+ out << ' ';
+ return out;
+}
+
+struct IndentTo {
+ IndentTo(std::size_t n_) : n(n_) {}
+ std::size_t n;
+};
+
+inline ostream_wrapper& operator<<(ostream_wrapper& out,
+ const IndentTo& indent) {
+ while (out.col() < indent.n)
+ out << ' ';
+ return out;
+}
+}
+
+#endif // INDENTATION_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/memory.cpp b/contrib/libs/yaml-cpp/src/memory.cpp
index 737f70526f..e5f8a9d3f8 100644
--- a/contrib/libs/yaml-cpp/src/memory.cpp
+++ b/contrib/libs/yaml-cpp/src/memory.cpp
@@ -1,26 +1,26 @@
-#include "yaml-cpp/node/detail/memory.h"
-#include "yaml-cpp/node/detail/node.h" // IWYU pragma: keep
-#include "yaml-cpp/node/ptr.h"
-
-namespace YAML {
-namespace detail {
-
-void memory_holder::merge(memory_holder& rhs) {
- if (m_pMemory == rhs.m_pMemory)
- return;
-
- m_pMemory->merge(*rhs.m_pMemory);
- rhs.m_pMemory = m_pMemory;
-}
-
-node& memory::create_node() {
- shared_node pNode(new node);
- m_nodes.insert(pNode);
- return *pNode;
-}
-
-void memory::merge(const memory& rhs) {
- m_nodes.insert(rhs.m_nodes.begin(), rhs.m_nodes.end());
-}
-}
-}
+#include "yaml-cpp/node/detail/memory.h"
+#include "yaml-cpp/node/detail/node.h" // IWYU pragma: keep
+#include "yaml-cpp/node/ptr.h"
+
+namespace YAML {
+namespace detail {
+
+void memory_holder::merge(memory_holder& rhs) {
+ if (m_pMemory == rhs.m_pMemory)
+ return;
+
+ m_pMemory->merge(*rhs.m_pMemory);
+ rhs.m_pMemory = m_pMemory;
+}
+
+node& memory::create_node() {
+ shared_node pNode(new node);
+ m_nodes.insert(pNode);
+ return *pNode;
+}
+
+void memory::merge(const memory& rhs) {
+ m_nodes.insert(rhs.m_nodes.begin(), rhs.m_nodes.end());
+}
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/node.cpp b/contrib/libs/yaml-cpp/src/node.cpp
index 34ccfa2177..2088e13c9a 100644
--- a/contrib/libs/yaml-cpp/src/node.cpp
+++ b/contrib/libs/yaml-cpp/src/node.cpp
@@ -1,12 +1,12 @@
-#include "yaml-cpp/node/node.h"
-#include "nodebuilder.h"
-#include "nodeevents.h"
-
-namespace YAML {
-Node Clone(const Node& node) {
- NodeEvents events(node);
- NodeBuilder builder;
- events.Emit(builder);
- return builder.Root();
-}
-}
+#include "yaml-cpp/node/node.h"
+#include "nodebuilder.h"
+#include "nodeevents.h"
+
+namespace YAML {
+Node Clone(const Node& node) {
+ NodeEvents events(node);
+ NodeBuilder builder;
+ events.Emit(builder);
+ return builder.Root();
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/node_data.cpp b/contrib/libs/yaml-cpp/src/node_data.cpp
index bfe4fec44f..77cd465780 100644
--- a/contrib/libs/yaml-cpp/src/node_data.cpp
+++ b/contrib/libs/yaml-cpp/src/node_data.cpp
@@ -1,300 +1,300 @@
-#include <assert.h>
+#include <assert.h>
#include <iterator>
-#include <sstream>
-
-#include "yaml-cpp/exceptions.h"
-#include "yaml-cpp/node/detail/memory.h"
-#include "yaml-cpp/node/detail/node.h" // IWYU pragma: keep
-#include "yaml-cpp/node/detail/node_data.h"
-#include "yaml-cpp/node/detail/node_iterator.h"
-#include "yaml-cpp/node/ptr.h"
-#include "yaml-cpp/node/type.h"
-
-namespace YAML {
-namespace detail {
-
-std::string node_data::empty_scalar;
-
-node_data::node_data()
- : m_isDefined(false),
- m_mark(Mark::null_mark()),
- m_type(NodeType::Null),
- m_style(EmitterStyle::Default),
- m_seqSize(0) {}
-
-void node_data::mark_defined() {
- if (m_type == NodeType::Undefined)
- m_type = NodeType::Null;
- m_isDefined = true;
-}
-
+#include <sstream>
+
+#include "yaml-cpp/exceptions.h"
+#include "yaml-cpp/node/detail/memory.h"
+#include "yaml-cpp/node/detail/node.h" // IWYU pragma: keep
+#include "yaml-cpp/node/detail/node_data.h"
+#include "yaml-cpp/node/detail/node_iterator.h"
+#include "yaml-cpp/node/ptr.h"
+#include "yaml-cpp/node/type.h"
+
+namespace YAML {
+namespace detail {
+
+std::string node_data::empty_scalar;
+
+node_data::node_data()
+ : m_isDefined(false),
+ m_mark(Mark::null_mark()),
+ m_type(NodeType::Null),
+ m_style(EmitterStyle::Default),
+ m_seqSize(0) {}
+
+void node_data::mark_defined() {
+ if (m_type == NodeType::Undefined)
+ m_type = NodeType::Null;
+ m_isDefined = true;
+}
+
void node_data::set_mark(const Mark& mark) { m_mark = mark; }
-
-void node_data::set_type(NodeType::value type) {
- if (type == NodeType::Undefined) {
- m_type = type;
- m_isDefined = false;
- return;
- }
-
- m_isDefined = true;
- if (type == m_type)
- return;
-
- m_type = type;
-
- switch (m_type) {
- case NodeType::Null:
- break;
- case NodeType::Scalar:
- m_scalar.clear();
- break;
- case NodeType::Sequence:
- reset_sequence();
- break;
- case NodeType::Map:
- reset_map();
- break;
- case NodeType::Undefined:
- assert(false);
- break;
- }
-}
-
-void node_data::set_tag(const std::string& tag) { m_tag = tag; }
-
-void node_data::set_style(EmitterStyle::value style) { m_style = style; }
-
-void node_data::set_null() {
- m_isDefined = true;
- m_type = NodeType::Null;
-}
-
-void node_data::set_scalar(const std::string& scalar) {
- m_isDefined = true;
- m_type = NodeType::Scalar;
- m_scalar = scalar;
-}
-
-// size/iterator
-std::size_t node_data::size() const {
- if (!m_isDefined)
- return 0;
-
- switch (m_type) {
- case NodeType::Sequence:
- compute_seq_size();
- return m_seqSize;
- case NodeType::Map:
- compute_map_size();
- return m_map.size() - m_undefinedPairs.size();
- default:
- return 0;
- }
- return 0;
-}
-
-void node_data::compute_seq_size() const {
- while (m_seqSize < m_sequence.size() && m_sequence[m_seqSize]->is_defined())
- m_seqSize++;
-}
-
-void node_data::compute_map_size() const {
- kv_pairs::iterator it = m_undefinedPairs.begin();
- while (it != m_undefinedPairs.end()) {
+
+void node_data::set_type(NodeType::value type) {
+ if (type == NodeType::Undefined) {
+ m_type = type;
+ m_isDefined = false;
+ return;
+ }
+
+ m_isDefined = true;
+ if (type == m_type)
+ return;
+
+ m_type = type;
+
+ switch (m_type) {
+ case NodeType::Null:
+ break;
+ case NodeType::Scalar:
+ m_scalar.clear();
+ break;
+ case NodeType::Sequence:
+ reset_sequence();
+ break;
+ case NodeType::Map:
+ reset_map();
+ break;
+ case NodeType::Undefined:
+ assert(false);
+ break;
+ }
+}
+
+void node_data::set_tag(const std::string& tag) { m_tag = tag; }
+
+void node_data::set_style(EmitterStyle::value style) { m_style = style; }
+
+void node_data::set_null() {
+ m_isDefined = true;
+ m_type = NodeType::Null;
+}
+
+void node_data::set_scalar(const std::string& scalar) {
+ m_isDefined = true;
+ m_type = NodeType::Scalar;
+ m_scalar = scalar;
+}
+
+// size/iterator
+std::size_t node_data::size() const {
+ if (!m_isDefined)
+ return 0;
+
+ switch (m_type) {
+ case NodeType::Sequence:
+ compute_seq_size();
+ return m_seqSize;
+ case NodeType::Map:
+ compute_map_size();
+ return m_map.size() - m_undefinedPairs.size();
+ default:
+ return 0;
+ }
+ return 0;
+}
+
+void node_data::compute_seq_size() const {
+ while (m_seqSize < m_sequence.size() && m_sequence[m_seqSize]->is_defined())
+ m_seqSize++;
+}
+
+void node_data::compute_map_size() const {
+ kv_pairs::iterator it = m_undefinedPairs.begin();
+ while (it != m_undefinedPairs.end()) {
kv_pairs::iterator jt = std::next(it);
- if (it->first->is_defined() && it->second->is_defined())
- m_undefinedPairs.erase(it);
- it = jt;
- }
-}
-
-const_node_iterator node_data::begin() const {
- if (!m_isDefined)
- return const_node_iterator();
-
- switch (m_type) {
- case NodeType::Sequence:
- return const_node_iterator(m_sequence.begin());
- case NodeType::Map:
- return const_node_iterator(m_map.begin(), m_map.end());
- default:
- return const_node_iterator();
- }
-}
-
-node_iterator node_data::begin() {
- if (!m_isDefined)
- return node_iterator();
-
- switch (m_type) {
- case NodeType::Sequence:
- return node_iterator(m_sequence.begin());
- case NodeType::Map:
- return node_iterator(m_map.begin(), m_map.end());
- default:
- return node_iterator();
- }
-}
-
-const_node_iterator node_data::end() const {
- if (!m_isDefined)
- return const_node_iterator();
-
- switch (m_type) {
- case NodeType::Sequence:
- return const_node_iterator(m_sequence.end());
- case NodeType::Map:
- return const_node_iterator(m_map.end(), m_map.end());
- default:
- return const_node_iterator();
- }
-}
-
-node_iterator node_data::end() {
- if (!m_isDefined)
- return node_iterator();
-
- switch (m_type) {
- case NodeType::Sequence:
- return node_iterator(m_sequence.end());
- case NodeType::Map:
- return node_iterator(m_map.end(), m_map.end());
- default:
- return node_iterator();
- }
-}
-
-// sequence
-void node_data::push_back(node& node, shared_memory_holder /* pMemory */) {
- if (m_type == NodeType::Undefined || m_type == NodeType::Null) {
- m_type = NodeType::Sequence;
- reset_sequence();
- }
-
- if (m_type != NodeType::Sequence)
- throw BadPushback();
-
- m_sequence.push_back(&node);
-}
-
-void node_data::insert(node& key, node& value, shared_memory_holder pMemory) {
- switch (m_type) {
- case NodeType::Map:
- break;
- case NodeType::Undefined:
- case NodeType::Null:
- case NodeType::Sequence:
- convert_to_map(pMemory);
- break;
- case NodeType::Scalar:
- throw BadSubscript();
- }
-
- insert_map_pair(key, value);
-}
-
-// indexing
-node* node_data::get(node& key, shared_memory_holder /* pMemory */) const {
- if (m_type != NodeType::Map) {
- return NULL;
- }
-
- for (node_map::const_iterator it = m_map.begin(); it != m_map.end(); ++it) {
- if (it->first->is(key))
- return it->second;
- }
-
- return NULL;
-}
-
-node& node_data::get(node& key, shared_memory_holder pMemory) {
- switch (m_type) {
- case NodeType::Map:
- break;
- case NodeType::Undefined:
- case NodeType::Null:
- case NodeType::Sequence:
- convert_to_map(pMemory);
- break;
- case NodeType::Scalar:
- throw BadSubscript();
- }
-
- for (node_map::const_iterator it = m_map.begin(); it != m_map.end(); ++it) {
- if (it->first->is(key))
- return *it->second;
- }
-
- node& value = pMemory->create_node();
- insert_map_pair(key, value);
- return value;
-}
-
-bool node_data::remove(node& key, shared_memory_holder /* pMemory */) {
- if (m_type != NodeType::Map)
- return false;
-
- for (node_map::iterator it = m_map.begin(); it != m_map.end(); ++it) {
- if (it->first->is(key)) {
- m_map.erase(it);
- return true;
- }
- }
-
- return false;
-}
-
-void node_data::reset_sequence() {
- m_sequence.clear();
- m_seqSize = 0;
-}
-
-void node_data::reset_map() {
- m_map.clear();
- m_undefinedPairs.clear();
-}
-
-void node_data::insert_map_pair(node& key, node& value) {
+ if (it->first->is_defined() && it->second->is_defined())
+ m_undefinedPairs.erase(it);
+ it = jt;
+ }
+}
+
+const_node_iterator node_data::begin() const {
+ if (!m_isDefined)
+ return const_node_iterator();
+
+ switch (m_type) {
+ case NodeType::Sequence:
+ return const_node_iterator(m_sequence.begin());
+ case NodeType::Map:
+ return const_node_iterator(m_map.begin(), m_map.end());
+ default:
+ return const_node_iterator();
+ }
+}
+
+node_iterator node_data::begin() {
+ if (!m_isDefined)
+ return node_iterator();
+
+ switch (m_type) {
+ case NodeType::Sequence:
+ return node_iterator(m_sequence.begin());
+ case NodeType::Map:
+ return node_iterator(m_map.begin(), m_map.end());
+ default:
+ return node_iterator();
+ }
+}
+
+const_node_iterator node_data::end() const {
+ if (!m_isDefined)
+ return const_node_iterator();
+
+ switch (m_type) {
+ case NodeType::Sequence:
+ return const_node_iterator(m_sequence.end());
+ case NodeType::Map:
+ return const_node_iterator(m_map.end(), m_map.end());
+ default:
+ return const_node_iterator();
+ }
+}
+
+node_iterator node_data::end() {
+ if (!m_isDefined)
+ return node_iterator();
+
+ switch (m_type) {
+ case NodeType::Sequence:
+ return node_iterator(m_sequence.end());
+ case NodeType::Map:
+ return node_iterator(m_map.end(), m_map.end());
+ default:
+ return node_iterator();
+ }
+}
+
+// sequence
+void node_data::push_back(node& node, shared_memory_holder /* pMemory */) {
+ if (m_type == NodeType::Undefined || m_type == NodeType::Null) {
+ m_type = NodeType::Sequence;
+ reset_sequence();
+ }
+
+ if (m_type != NodeType::Sequence)
+ throw BadPushback();
+
+ m_sequence.push_back(&node);
+}
+
+void node_data::insert(node& key, node& value, shared_memory_holder pMemory) {
+ switch (m_type) {
+ case NodeType::Map:
+ break;
+ case NodeType::Undefined:
+ case NodeType::Null:
+ case NodeType::Sequence:
+ convert_to_map(pMemory);
+ break;
+ case NodeType::Scalar:
+ throw BadSubscript();
+ }
+
+ insert_map_pair(key, value);
+}
+
+// indexing
+node* node_data::get(node& key, shared_memory_holder /* pMemory */) const {
+ if (m_type != NodeType::Map) {
+ return NULL;
+ }
+
+ for (node_map::const_iterator it = m_map.begin(); it != m_map.end(); ++it) {
+ if (it->first->is(key))
+ return it->second;
+ }
+
+ return NULL;
+}
+
+node& node_data::get(node& key, shared_memory_holder pMemory) {
+ switch (m_type) {
+ case NodeType::Map:
+ break;
+ case NodeType::Undefined:
+ case NodeType::Null:
+ case NodeType::Sequence:
+ convert_to_map(pMemory);
+ break;
+ case NodeType::Scalar:
+ throw BadSubscript();
+ }
+
+ for (node_map::const_iterator it = m_map.begin(); it != m_map.end(); ++it) {
+ if (it->first->is(key))
+ return *it->second;
+ }
+
+ node& value = pMemory->create_node();
+ insert_map_pair(key, value);
+ return value;
+}
+
+bool node_data::remove(node& key, shared_memory_holder /* pMemory */) {
+ if (m_type != NodeType::Map)
+ return false;
+
+ for (node_map::iterator it = m_map.begin(); it != m_map.end(); ++it) {
+ if (it->first->is(key)) {
+ m_map.erase(it);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void node_data::reset_sequence() {
+ m_sequence.clear();
+ m_seqSize = 0;
+}
+
+void node_data::reset_map() {
+ m_map.clear();
+ m_undefinedPairs.clear();
+}
+
+void node_data::insert_map_pair(node& key, node& value) {
m_map.emplace_back(&key, &value);
- if (!key.is_defined() || !value.is_defined())
+ if (!key.is_defined() || !value.is_defined())
m_undefinedPairs.emplace_back(&key, &value);
-}
-
-void node_data::convert_to_map(shared_memory_holder pMemory) {
- switch (m_type) {
- case NodeType::Undefined:
- case NodeType::Null:
- reset_map();
- m_type = NodeType::Map;
- break;
- case NodeType::Sequence:
- convert_sequence_to_map(pMemory);
- break;
- case NodeType::Map:
- break;
- case NodeType::Scalar:
- assert(false);
- break;
- }
-}
-
-void node_data::convert_sequence_to_map(shared_memory_holder pMemory) {
- assert(m_type == NodeType::Sequence);
-
- reset_map();
- for (std::size_t i = 0; i < m_sequence.size(); i++) {
- std::stringstream stream;
- stream << i;
-
- node& key = pMemory->create_node();
- key.set_scalar(stream.str());
- insert_map_pair(key, *m_sequence[i]);
- }
-
- reset_sequence();
- m_type = NodeType::Map;
-}
-}
-}
+}
+
+void node_data::convert_to_map(shared_memory_holder pMemory) {
+ switch (m_type) {
+ case NodeType::Undefined:
+ case NodeType::Null:
+ reset_map();
+ m_type = NodeType::Map;
+ break;
+ case NodeType::Sequence:
+ convert_sequence_to_map(pMemory);
+ break;
+ case NodeType::Map:
+ break;
+ case NodeType::Scalar:
+ assert(false);
+ break;
+ }
+}
+
+void node_data::convert_sequence_to_map(shared_memory_holder pMemory) {
+ assert(m_type == NodeType::Sequence);
+
+ reset_map();
+ for (std::size_t i = 0; i < m_sequence.size(); i++) {
+ std::stringstream stream;
+ stream << i;
+
+ node& key = pMemory->create_node();
+ key.set_scalar(stream.str());
+ insert_map_pair(key, *m_sequence[i]);
+ }
+
+ reset_sequence();
+ m_type = NodeType::Map;
+}
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/nodebuilder.cpp b/contrib/libs/yaml-cpp/src/nodebuilder.cpp
index da09e7fd44..093d2efeb7 100644
--- a/contrib/libs/yaml-cpp/src/nodebuilder.cpp
+++ b/contrib/libs/yaml-cpp/src/nodebuilder.cpp
@@ -1,130 +1,130 @@
-#include <assert.h>
-#include <cassert>
-
-#include "nodebuilder.h"
-#include "yaml-cpp/node/detail/node.h"
-#include "yaml-cpp/node/impl.h"
-#include "yaml-cpp/node/node.h"
-#include "yaml-cpp/node/type.h"
-
-namespace YAML {
-struct Mark;
-
-NodeBuilder::NodeBuilder()
- : m_pMemory(new detail::memory_holder), m_pRoot(0), m_mapDepth(0) {
- m_anchors.push_back(0); // since the anchors start at 1
-}
-
-NodeBuilder::~NodeBuilder() {}
-
-Node NodeBuilder::Root() {
- if (!m_pRoot)
- return Node();
-
- return Node(*m_pRoot, m_pMemory);
-}
-
-void NodeBuilder::OnDocumentStart(const Mark&) {}
-
-void NodeBuilder::OnDocumentEnd() {}
-
-void NodeBuilder::OnNull(const Mark& mark, anchor_t anchor) {
- detail::node& node = Push(mark, anchor);
- node.set_null();
- Pop();
-}
-
-void NodeBuilder::OnAlias(const Mark& /* mark */, anchor_t anchor) {
- detail::node& node = *m_anchors[anchor];
- Push(node);
- Pop();
-}
-
-void NodeBuilder::OnScalar(const Mark& mark, const std::string& tag,
- anchor_t anchor, const std::string& value) {
- detail::node& node = Push(mark, anchor);
- node.set_scalar(value);
- node.set_tag(tag);
- Pop();
-}
-
+#include <assert.h>
+#include <cassert>
+
+#include "nodebuilder.h"
+#include "yaml-cpp/node/detail/node.h"
+#include "yaml-cpp/node/impl.h"
+#include "yaml-cpp/node/node.h"
+#include "yaml-cpp/node/type.h"
+
+namespace YAML {
+struct Mark;
+
+NodeBuilder::NodeBuilder()
+ : m_pMemory(new detail::memory_holder), m_pRoot(0), m_mapDepth(0) {
+ m_anchors.push_back(0); // since the anchors start at 1
+}
+
+NodeBuilder::~NodeBuilder() {}
+
+Node NodeBuilder::Root() {
+ if (!m_pRoot)
+ return Node();
+
+ return Node(*m_pRoot, m_pMemory);
+}
+
+void NodeBuilder::OnDocumentStart(const Mark&) {}
+
+void NodeBuilder::OnDocumentEnd() {}
+
+void NodeBuilder::OnNull(const Mark& mark, anchor_t anchor) {
+ detail::node& node = Push(mark, anchor);
+ node.set_null();
+ Pop();
+}
+
+void NodeBuilder::OnAlias(const Mark& /* mark */, anchor_t anchor) {
+ detail::node& node = *m_anchors[anchor];
+ Push(node);
+ Pop();
+}
+
+void NodeBuilder::OnScalar(const Mark& mark, const std::string& tag,
+ anchor_t anchor, const std::string& value) {
+ detail::node& node = Push(mark, anchor);
+ node.set_scalar(value);
+ node.set_tag(tag);
+ Pop();
+}
+
void NodeBuilder::OnSequenceStart(const Mark& mark, const std::string& tag,
anchor_t anchor, EmitterStyle::value style) {
- detail::node& node = Push(mark, anchor);
- node.set_tag(tag);
- node.set_type(NodeType::Sequence);
- node.set_style(style);
-}
-
-void NodeBuilder::OnSequenceEnd() { Pop(); }
-
-void NodeBuilder::OnMapStart(const Mark& mark, const std::string& tag,
- anchor_t anchor, EmitterStyle::value style) {
- detail::node& node = Push(mark, anchor);
- node.set_type(NodeType::Map);
- node.set_tag(tag);
- node.set_style(style);
- m_mapDepth++;
-}
-
-void NodeBuilder::OnMapEnd() {
- assert(m_mapDepth > 0);
- m_mapDepth--;
- Pop();
-}
-
-detail::node& NodeBuilder::Push(const Mark& mark, anchor_t anchor) {
- detail::node& node = m_pMemory->create_node();
- node.set_mark(mark);
- RegisterAnchor(anchor, node);
- Push(node);
- return node;
-}
-
-void NodeBuilder::Push(detail::node& node) {
- const bool needsKey =
- (!m_stack.empty() && m_stack.back()->type() == NodeType::Map &&
- m_keys.size() < m_mapDepth);
-
- m_stack.push_back(&node);
- if (needsKey)
- m_keys.push_back(PushedKey(&node, false));
-}
-
-void NodeBuilder::Pop() {
- assert(!m_stack.empty());
- if (m_stack.size() == 1) {
- m_pRoot = m_stack[0];
- m_stack.pop_back();
- return;
- }
-
- detail::node& node = *m_stack.back();
- m_stack.pop_back();
-
- detail::node& collection = *m_stack.back();
-
- if (collection.type() == NodeType::Sequence) {
- collection.push_back(node, m_pMemory);
- } else if (collection.type() == NodeType::Map) {
- assert(!m_keys.empty());
- PushedKey& key = m_keys.back();
- if (key.second) {
- collection.insert(*key.first, node, m_pMemory);
- m_keys.pop_back();
- } else {
- key.second = true;
- }
- } else {
- assert(false);
- m_stack.clear();
- }
-}
-
-void NodeBuilder::RegisterAnchor(anchor_t anchor, detail::node& node) {
- if (anchor) {
- assert(anchor == m_anchors.size());
- m_anchors.push_back(&node);
- }
-}
-}
+ detail::node& node = Push(mark, anchor);
+ node.set_tag(tag);
+ node.set_type(NodeType::Sequence);
+ node.set_style(style);
+}
+
+void NodeBuilder::OnSequenceEnd() { Pop(); }
+
+void NodeBuilder::OnMapStart(const Mark& mark, const std::string& tag,
+ anchor_t anchor, EmitterStyle::value style) {
+ detail::node& node = Push(mark, anchor);
+ node.set_type(NodeType::Map);
+ node.set_tag(tag);
+ node.set_style(style);
+ m_mapDepth++;
+}
+
+void NodeBuilder::OnMapEnd() {
+ assert(m_mapDepth > 0);
+ m_mapDepth--;
+ Pop();
+}
+
+detail::node& NodeBuilder::Push(const Mark& mark, anchor_t anchor) {
+ detail::node& node = m_pMemory->create_node();
+ node.set_mark(mark);
+ RegisterAnchor(anchor, node);
+ Push(node);
+ return node;
+}
+
+void NodeBuilder::Push(detail::node& node) {
+ const bool needsKey =
+ (!m_stack.empty() && m_stack.back()->type() == NodeType::Map &&
+ m_keys.size() < m_mapDepth);
+
+ m_stack.push_back(&node);
+ if (needsKey)
+ m_keys.push_back(PushedKey(&node, false));
+}
+
+void NodeBuilder::Pop() {
+ assert(!m_stack.empty());
+ if (m_stack.size() == 1) {
+ m_pRoot = m_stack[0];
+ m_stack.pop_back();
+ return;
+ }
+
+ detail::node& node = *m_stack.back();
+ m_stack.pop_back();
+
+ detail::node& collection = *m_stack.back();
+
+ if (collection.type() == NodeType::Sequence) {
+ collection.push_back(node, m_pMemory);
+ } else if (collection.type() == NodeType::Map) {
+ assert(!m_keys.empty());
+ PushedKey& key = m_keys.back();
+ if (key.second) {
+ collection.insert(*key.first, node, m_pMemory);
+ m_keys.pop_back();
+ } else {
+ key.second = true;
+ }
+ } else {
+ assert(false);
+ m_stack.clear();
+ }
+}
+
+void NodeBuilder::RegisterAnchor(anchor_t anchor, detail::node& node) {
+ if (anchor) {
+ assert(anchor == m_anchors.size());
+ m_anchors.push_back(&node);
+ }
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/nodebuilder.h b/contrib/libs/yaml-cpp/src/nodebuilder.h
index 9d81488af5..a6a47f007b 100644
--- a/contrib/libs/yaml-cpp/src/nodebuilder.h
+++ b/contrib/libs/yaml-cpp/src/nodebuilder.h
@@ -1,70 +1,70 @@
-#ifndef NODE_NODEBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define NODE_NODEBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <vector>
-
-#include "yaml-cpp/anchor.h"
-#include "yaml-cpp/emitterstyle.h"
-#include "yaml-cpp/eventhandler.h"
-#include "yaml-cpp/node/ptr.h"
-
-namespace YAML {
-namespace detail {
-class node;
-} // namespace detail
-struct Mark;
-} // namespace YAML
-
-namespace YAML {
-class Node;
-
-class NodeBuilder : public EventHandler {
- public:
- NodeBuilder();
- virtual ~NodeBuilder();
-
- Node Root();
-
- virtual void OnDocumentStart(const Mark& mark);
- virtual void OnDocumentEnd();
-
- virtual void OnNull(const Mark& mark, anchor_t anchor);
- virtual void OnAlias(const Mark& mark, anchor_t anchor);
- virtual void OnScalar(const Mark& mark, const std::string& tag,
- anchor_t anchor, const std::string& value);
-
- virtual void OnSequenceStart(const Mark& mark, const std::string& tag,
- anchor_t anchor, EmitterStyle::value style);
- virtual void OnSequenceEnd();
-
- virtual void OnMapStart(const Mark& mark, const std::string& tag,
- anchor_t anchor, EmitterStyle::value style);
- virtual void OnMapEnd();
-
- private:
- detail::node& Push(const Mark& mark, anchor_t anchor);
- void Push(detail::node& node);
- void Pop();
- void RegisterAnchor(anchor_t anchor, detail::node& node);
-
- private:
- detail::shared_memory_holder m_pMemory;
- detail::node* m_pRoot;
-
- typedef std::vector<detail::node*> Nodes;
- Nodes m_stack;
- Nodes m_anchors;
-
- typedef std::pair<detail::node*, bool> PushedKey;
- std::vector<PushedKey> m_keys;
- std::size_t m_mapDepth;
-};
-}
-
-#endif // NODE_NODEBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef NODE_NODEBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_NODEBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <vector>
+
+#include "yaml-cpp/anchor.h"
+#include "yaml-cpp/emitterstyle.h"
+#include "yaml-cpp/eventhandler.h"
+#include "yaml-cpp/node/ptr.h"
+
+namespace YAML {
+namespace detail {
+class node;
+} // namespace detail
+struct Mark;
+} // namespace YAML
+
+namespace YAML {
+class Node;
+
+class NodeBuilder : public EventHandler {
+ public:
+ NodeBuilder();
+ virtual ~NodeBuilder();
+
+ Node Root();
+
+ virtual void OnDocumentStart(const Mark& mark);
+ virtual void OnDocumentEnd();
+
+ virtual void OnNull(const Mark& mark, anchor_t anchor);
+ virtual void OnAlias(const Mark& mark, anchor_t anchor);
+ virtual void OnScalar(const Mark& mark, const std::string& tag,
+ anchor_t anchor, const std::string& value);
+
+ virtual void OnSequenceStart(const Mark& mark, const std::string& tag,
+ anchor_t anchor, EmitterStyle::value style);
+ virtual void OnSequenceEnd();
+
+ virtual void OnMapStart(const Mark& mark, const std::string& tag,
+ anchor_t anchor, EmitterStyle::value style);
+ virtual void OnMapEnd();
+
+ private:
+ detail::node& Push(const Mark& mark, anchor_t anchor);
+ void Push(detail::node& node);
+ void Pop();
+ void RegisterAnchor(anchor_t anchor, detail::node& node);
+
+ private:
+ detail::shared_memory_holder m_pMemory;
+ detail::node* m_pRoot;
+
+ typedef std::vector<detail::node*> Nodes;
+ Nodes m_stack;
+ Nodes m_anchors;
+
+ typedef std::pair<detail::node*, bool> PushedKey;
+ std::vector<PushedKey> m_keys;
+ std::size_t m_mapDepth;
+};
+}
+
+#endif // NODE_NODEBUILDER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/nodeevents.cpp b/contrib/libs/yaml-cpp/src/nodeevents.cpp
index 726fc13ae6..82261feb05 100644
--- a/contrib/libs/yaml-cpp/src/nodeevents.cpp
+++ b/contrib/libs/yaml-cpp/src/nodeevents.cpp
@@ -1,101 +1,101 @@
-#include "nodeevents.h"
-#include "yaml-cpp/eventhandler.h"
-#include "yaml-cpp/mark.h"
-#include "yaml-cpp/node/detail/node.h"
-#include "yaml-cpp/node/detail/node_iterator.h"
-#include "yaml-cpp/node/node.h"
-#include "yaml-cpp/node/type.h"
-
-namespace YAML {
-void NodeEvents::AliasManager::RegisterReference(const detail::node& node) {
- m_anchorByIdentity.insert(std::make_pair(node.ref(), _CreateNewAnchor()));
-}
-
-anchor_t NodeEvents::AliasManager::LookupAnchor(
- const detail::node& node) const {
- AnchorByIdentity::const_iterator it = m_anchorByIdentity.find(node.ref());
- if (it == m_anchorByIdentity.end())
- return 0;
- return it->second;
-}
-
-NodeEvents::NodeEvents(const Node& node)
- : m_pMemory(node.m_pMemory), m_root(node.m_pNode) {
- if (m_root)
- Setup(*m_root);
-}
-
-void NodeEvents::Setup(const detail::node& node) {
- int& refCount = m_refCount[node.ref()];
- refCount++;
- if (refCount > 1)
- return;
-
- if (node.type() == NodeType::Sequence) {
- for (detail::const_node_iterator it = node.begin(); it != node.end(); ++it)
- Setup(**it);
- } else if (node.type() == NodeType::Map) {
- for (detail::const_node_iterator it = node.begin(); it != node.end();
- ++it) {
- Setup(*it->first);
- Setup(*it->second);
- }
- }
-}
-
-void NodeEvents::Emit(EventHandler& handler) {
- AliasManager am;
-
- handler.OnDocumentStart(Mark());
- if (m_root)
- Emit(*m_root, handler, am);
- handler.OnDocumentEnd();
-}
-
-void NodeEvents::Emit(const detail::node& node, EventHandler& handler,
- AliasManager& am) const {
- anchor_t anchor = NullAnchor;
- if (IsAliased(node)) {
- anchor = am.LookupAnchor(node);
- if (anchor) {
- handler.OnAlias(Mark(), anchor);
- return;
- }
-
- am.RegisterReference(node);
- anchor = am.LookupAnchor(node);
- }
-
- switch (node.type()) {
- case NodeType::Undefined:
- break;
- case NodeType::Null:
- handler.OnNull(Mark(), anchor);
- break;
- case NodeType::Scalar:
- handler.OnScalar(Mark(), node.tag(), anchor, node.scalar());
- break;
- case NodeType::Sequence:
- handler.OnSequenceStart(Mark(), node.tag(), anchor, node.style());
- for (detail::const_node_iterator it = node.begin(); it != node.end();
- ++it)
- Emit(**it, handler, am);
- handler.OnSequenceEnd();
- break;
- case NodeType::Map:
- handler.OnMapStart(Mark(), node.tag(), anchor, node.style());
- for (detail::const_node_iterator it = node.begin(); it != node.end();
- ++it) {
- Emit(*it->first, handler, am);
- Emit(*it->second, handler, am);
- }
- handler.OnMapEnd();
- break;
- }
-}
-
-bool NodeEvents::IsAliased(const detail::node& node) const {
- RefCount::const_iterator it = m_refCount.find(node.ref());
- return it != m_refCount.end() && it->second > 1;
-}
-}
+#include "nodeevents.h"
+#include "yaml-cpp/eventhandler.h"
+#include "yaml-cpp/mark.h"
+#include "yaml-cpp/node/detail/node.h"
+#include "yaml-cpp/node/detail/node_iterator.h"
+#include "yaml-cpp/node/node.h"
+#include "yaml-cpp/node/type.h"
+
+namespace YAML {
+void NodeEvents::AliasManager::RegisterReference(const detail::node& node) {
+ m_anchorByIdentity.insert(std::make_pair(node.ref(), _CreateNewAnchor()));
+}
+
+anchor_t NodeEvents::AliasManager::LookupAnchor(
+ const detail::node& node) const {
+ AnchorByIdentity::const_iterator it = m_anchorByIdentity.find(node.ref());
+ if (it == m_anchorByIdentity.end())
+ return 0;
+ return it->second;
+}
+
+NodeEvents::NodeEvents(const Node& node)
+ : m_pMemory(node.m_pMemory), m_root(node.m_pNode) {
+ if (m_root)
+ Setup(*m_root);
+}
+
+void NodeEvents::Setup(const detail::node& node) {
+ int& refCount = m_refCount[node.ref()];
+ refCount++;
+ if (refCount > 1)
+ return;
+
+ if (node.type() == NodeType::Sequence) {
+ for (detail::const_node_iterator it = node.begin(); it != node.end(); ++it)
+ Setup(**it);
+ } else if (node.type() == NodeType::Map) {
+ for (detail::const_node_iterator it = node.begin(); it != node.end();
+ ++it) {
+ Setup(*it->first);
+ Setup(*it->second);
+ }
+ }
+}
+
+void NodeEvents::Emit(EventHandler& handler) {
+ AliasManager am;
+
+ handler.OnDocumentStart(Mark());
+ if (m_root)
+ Emit(*m_root, handler, am);
+ handler.OnDocumentEnd();
+}
+
+void NodeEvents::Emit(const detail::node& node, EventHandler& handler,
+ AliasManager& am) const {
+ anchor_t anchor = NullAnchor;
+ if (IsAliased(node)) {
+ anchor = am.LookupAnchor(node);
+ if (anchor) {
+ handler.OnAlias(Mark(), anchor);
+ return;
+ }
+
+ am.RegisterReference(node);
+ anchor = am.LookupAnchor(node);
+ }
+
+ switch (node.type()) {
+ case NodeType::Undefined:
+ break;
+ case NodeType::Null:
+ handler.OnNull(Mark(), anchor);
+ break;
+ case NodeType::Scalar:
+ handler.OnScalar(Mark(), node.tag(), anchor, node.scalar());
+ break;
+ case NodeType::Sequence:
+ handler.OnSequenceStart(Mark(), node.tag(), anchor, node.style());
+ for (detail::const_node_iterator it = node.begin(); it != node.end();
+ ++it)
+ Emit(**it, handler, am);
+ handler.OnSequenceEnd();
+ break;
+ case NodeType::Map:
+ handler.OnMapStart(Mark(), node.tag(), anchor, node.style());
+ for (detail::const_node_iterator it = node.begin(); it != node.end();
+ ++it) {
+ Emit(*it->first, handler, am);
+ Emit(*it->second, handler, am);
+ }
+ handler.OnMapEnd();
+ break;
+ }
+}
+
+bool NodeEvents::IsAliased(const detail::node& node) const {
+ RefCount::const_iterator it = m_refCount.find(node.ref());
+ return it != m_refCount.end() && it->second > 1;
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/nodeevents.h b/contrib/libs/yaml-cpp/src/nodeevents.h
index ac52cd110f..49c18eb854 100644
--- a/contrib/libs/yaml-cpp/src/nodeevents.h
+++ b/contrib/libs/yaml-cpp/src/nodeevents.h
@@ -1,64 +1,64 @@
-#ifndef NODE_NODEEVENTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define NODE_NODEEVENTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <map>
-#include <vector>
-
-#include "yaml-cpp/anchor.h"
-#include "yaml-cpp/node/ptr.h"
-
-namespace YAML {
-namespace detail {
-class node;
-} // namespace detail
-} // namespace YAML
-
-namespace YAML {
-class EventHandler;
-class Node;
-
-class NodeEvents {
- public:
- explicit NodeEvents(const Node& node);
-
- void Emit(EventHandler& handler);
-
- private:
- class AliasManager {
- public:
- AliasManager() : m_curAnchor(0) {}
-
- void RegisterReference(const detail::node& node);
- anchor_t LookupAnchor(const detail::node& node) const;
-
- private:
- anchor_t _CreateNewAnchor() { return ++m_curAnchor; }
-
- private:
- typedef std::map<const detail::node_ref*, anchor_t> AnchorByIdentity;
- AnchorByIdentity m_anchorByIdentity;
-
- anchor_t m_curAnchor;
- };
-
- void Setup(const detail::node& node);
- void Emit(const detail::node& node, EventHandler& handler,
- AliasManager& am) const;
- bool IsAliased(const detail::node& node) const;
-
- private:
- detail::shared_memory_holder m_pMemory;
- detail::node* m_root;
-
- typedef std::map<const detail::node_ref*, int> RefCount;
- RefCount m_refCount;
-};
-}
-
-#endif // NODE_NODEEVENTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef NODE_NODEEVENTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define NODE_NODEEVENTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <map>
+#include <vector>
+
+#include "yaml-cpp/anchor.h"
+#include "yaml-cpp/node/ptr.h"
+
+namespace YAML {
+namespace detail {
+class node;
+} // namespace detail
+} // namespace YAML
+
+namespace YAML {
+class EventHandler;
+class Node;
+
+class NodeEvents {
+ public:
+ explicit NodeEvents(const Node& node);
+
+ void Emit(EventHandler& handler);
+
+ private:
+ class AliasManager {
+ public:
+ AliasManager() : m_curAnchor(0) {}
+
+ void RegisterReference(const detail::node& node);
+ anchor_t LookupAnchor(const detail::node& node) const;
+
+ private:
+ anchor_t _CreateNewAnchor() { return ++m_curAnchor; }
+
+ private:
+ typedef std::map<const detail::node_ref*, anchor_t> AnchorByIdentity;
+ AnchorByIdentity m_anchorByIdentity;
+
+ anchor_t m_curAnchor;
+ };
+
+ void Setup(const detail::node& node);
+ void Emit(const detail::node& node, EventHandler& handler,
+ AliasManager& am) const;
+ bool IsAliased(const detail::node& node) const;
+
+ private:
+ detail::shared_memory_holder m_pMemory;
+ detail::node* m_root;
+
+ typedef std::map<const detail::node_ref*, int> RefCount;
+ RefCount m_refCount;
+};
+}
+
+#endif // NODE_NODEEVENTS_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/null.cpp b/contrib/libs/yaml-cpp/src/null.cpp
index 3f29fd0f89..d12dd08ce4 100644
--- a/contrib/libs/yaml-cpp/src/null.cpp
+++ b/contrib/libs/yaml-cpp/src/null.cpp
@@ -1,10 +1,10 @@
-#include "yaml-cpp/null.h"
-
-namespace YAML {
-_Null Null;
+#include "yaml-cpp/null.h"
+
+namespace YAML {
+_Null Null;
bool IsNullString(const std::string& str) {
return str.empty() || str == "~" || str == "null" || str == "Null" ||
str == "NULL";
-}
+}
}
diff --git a/contrib/libs/yaml-cpp/src/ostream_wrapper.cpp b/contrib/libs/yaml-cpp/src/ostream_wrapper.cpp
index 4198a94371..357fc0094c 100644
--- a/contrib/libs/yaml-cpp/src/ostream_wrapper.cpp
+++ b/contrib/libs/yaml-cpp/src/ostream_wrapper.cpp
@@ -1,57 +1,57 @@
-#include "yaml-cpp/ostream_wrapper.h"
-
-#include <algorithm>
-#include <cstring>
-#include <iostream>
-
-namespace YAML {
-ostream_wrapper::ostream_wrapper()
- : m_buffer(1, '\0'),
- m_pStream(0),
- m_pos(0),
- m_row(0),
- m_col(0),
- m_comment(false) {}
-
-ostream_wrapper::ostream_wrapper(std::ostream& stream)
- : m_pStream(&stream), m_pos(0), m_row(0), m_col(0), m_comment(false) {}
-
-ostream_wrapper::~ostream_wrapper() {}
-
-void ostream_wrapper::write(const std::string& str) {
- if (m_pStream) {
- m_pStream->write(str.c_str(), str.size());
- } else {
- m_buffer.resize(std::max(m_buffer.size(), m_pos + str.size() + 1));
- std::copy(str.begin(), str.end(), m_buffer.begin() + m_pos);
- }
-
- for (std::size_t i = 0; i < str.size(); i++) {
- update_pos(str[i]);
- }
-}
-
-void ostream_wrapper::write(const char* str, std::size_t size) {
- if (m_pStream) {
- m_pStream->write(str, size);
- } else {
- m_buffer.resize(std::max(m_buffer.size(), m_pos + size + 1));
- std::copy(str, str + size, m_buffer.begin() + m_pos);
- }
-
- for (std::size_t i = 0; i < size; i++) {
- update_pos(str[i]);
- }
-}
-
-void ostream_wrapper::update_pos(char ch) {
- m_pos++;
- m_col++;
-
- if (ch == '\n') {
- m_row++;
- m_col = 0;
- m_comment = false;
- }
-}
-}
+#include "yaml-cpp/ostream_wrapper.h"
+
+#include <algorithm>
+#include <cstring>
+#include <iostream>
+
+namespace YAML {
+ostream_wrapper::ostream_wrapper()
+ : m_buffer(1, '\0'),
+ m_pStream(0),
+ m_pos(0),
+ m_row(0),
+ m_col(0),
+ m_comment(false) {}
+
+ostream_wrapper::ostream_wrapper(std::ostream& stream)
+ : m_pStream(&stream), m_pos(0), m_row(0), m_col(0), m_comment(false) {}
+
+ostream_wrapper::~ostream_wrapper() {}
+
+void ostream_wrapper::write(const std::string& str) {
+ if (m_pStream) {
+ m_pStream->write(str.c_str(), str.size());
+ } else {
+ m_buffer.resize(std::max(m_buffer.size(), m_pos + str.size() + 1));
+ std::copy(str.begin(), str.end(), m_buffer.begin() + m_pos);
+ }
+
+ for (std::size_t i = 0; i < str.size(); i++) {
+ update_pos(str[i]);
+ }
+}
+
+void ostream_wrapper::write(const char* str, std::size_t size) {
+ if (m_pStream) {
+ m_pStream->write(str, size);
+ } else {
+ m_buffer.resize(std::max(m_buffer.size(), m_pos + size + 1));
+ std::copy(str, str + size, m_buffer.begin() + m_pos);
+ }
+
+ for (std::size_t i = 0; i < size; i++) {
+ update_pos(str[i]);
+ }
+}
+
+void ostream_wrapper::update_pos(char ch) {
+ m_pos++;
+ m_col++;
+
+ if (ch == '\n') {
+ m_row++;
+ m_col = 0;
+ m_comment = false;
+ }
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/parse.cpp b/contrib/libs/yaml-cpp/src/parse.cpp
index 6499f85df0..0b2ae4a4f6 100644
--- a/contrib/libs/yaml-cpp/src/parse.cpp
+++ b/contrib/libs/yaml-cpp/src/parse.cpp
@@ -1,72 +1,72 @@
-#include "yaml-cpp/node/parse.h"
-
-#include <fstream>
-#include <sstream>
-
-#include "yaml-cpp/node/node.h"
-#include "yaml-cpp/node/impl.h"
-#include "yaml-cpp/parser.h"
-#include "nodebuilder.h"
-
-namespace YAML {
-Node Load(const std::string& input) {
- std::stringstream stream(input);
- return Load(stream);
-}
-
-Node Load(const char* input) {
- std::stringstream stream(input);
- return Load(stream);
-}
-
-Node Load(std::istream& input) {
- Parser parser(input);
- NodeBuilder builder;
+#include "yaml-cpp/node/parse.h"
+
+#include <fstream>
+#include <sstream>
+
+#include "yaml-cpp/node/node.h"
+#include "yaml-cpp/node/impl.h"
+#include "yaml-cpp/parser.h"
+#include "nodebuilder.h"
+
+namespace YAML {
+Node Load(const std::string& input) {
+ std::stringstream stream(input);
+ return Load(stream);
+}
+
+Node Load(const char* input) {
+ std::stringstream stream(input);
+ return Load(stream);
+}
+
+Node Load(std::istream& input) {
+ Parser parser(input);
+ NodeBuilder builder;
if (!parser.HandleNextDocument(builder)) {
- return Node();
+ return Node();
}
-
- return builder.Root();
-}
-
-Node LoadFile(const std::string& filename) {
- std::ifstream fin(filename.c_str());
+
+ return builder.Root();
+}
+
+Node LoadFile(const std::string& filename) {
+ std::ifstream fin(filename.c_str());
if (!fin) {
- throw BadFile();
+ throw BadFile();
}
- return Load(fin);
-}
-
-std::vector<Node> LoadAll(const std::string& input) {
- std::stringstream stream(input);
- return LoadAll(stream);
-}
-
-std::vector<Node> LoadAll(const char* input) {
- std::stringstream stream(input);
- return LoadAll(stream);
-}
-
-std::vector<Node> LoadAll(std::istream& input) {
- std::vector<Node> docs;
-
- Parser parser(input);
- while (1) {
- NodeBuilder builder;
+ return Load(fin);
+}
+
+std::vector<Node> LoadAll(const std::string& input) {
+ std::stringstream stream(input);
+ return LoadAll(stream);
+}
+
+std::vector<Node> LoadAll(const char* input) {
+ std::stringstream stream(input);
+ return LoadAll(stream);
+}
+
+std::vector<Node> LoadAll(std::istream& input) {
+ std::vector<Node> docs;
+
+ Parser parser(input);
+ while (1) {
+ NodeBuilder builder;
if (!parser.HandleNextDocument(builder)) {
- break;
+ break;
}
- docs.push_back(builder.Root());
- }
-
- return docs;
-}
-
-std::vector<Node> LoadAllFromFile(const std::string& filename) {
- std::ifstream fin(filename.c_str());
+ docs.push_back(builder.Root());
+ }
+
+ return docs;
+}
+
+std::vector<Node> LoadAllFromFile(const std::string& filename) {
+ std::ifstream fin(filename.c_str());
if (!fin) {
- throw BadFile();
+ throw BadFile();
}
- return LoadAll(fin);
-}
+ return LoadAll(fin);
+}
} // namespace YAML
diff --git a/contrib/libs/yaml-cpp/src/parser.cpp b/contrib/libs/yaml-cpp/src/parser.cpp
index 1c5ff53c89..cd69f39fce 100644
--- a/contrib/libs/yaml-cpp/src/parser.cpp
+++ b/contrib/libs/yaml-cpp/src/parser.cpp
@@ -1,129 +1,129 @@
-#include <cstdio>
-#include <sstream>
-
-#include "directives.h" // IWYU pragma: keep
-#include "scanner.h" // IWYU pragma: keep
-#include "singledocparser.h"
-#include "token.h"
-#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
-#include "yaml-cpp/parser.h"
-
-namespace YAML {
-class EventHandler;
-
-Parser::Parser() {}
-
-Parser::Parser(std::istream& in) { Load(in); }
-
-Parser::~Parser() {}
-
-Parser::operator bool() const {
- return m_pScanner.get() && !m_pScanner->empty();
-}
-
-void Parser::Load(std::istream& in) {
- m_pScanner.reset(new Scanner(in));
- m_pDirectives.reset(new Directives);
-}
-
-bool Parser::HandleNextDocument(EventHandler& eventHandler) {
- if (!m_pScanner.get())
- return false;
-
- ParseDirectives();
+#include <cstdio>
+#include <sstream>
+
+#include "directives.h" // IWYU pragma: keep
+#include "scanner.h" // IWYU pragma: keep
+#include "singledocparser.h"
+#include "token.h"
+#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
+#include "yaml-cpp/parser.h"
+
+namespace YAML {
+class EventHandler;
+
+Parser::Parser() {}
+
+Parser::Parser(std::istream& in) { Load(in); }
+
+Parser::~Parser() {}
+
+Parser::operator bool() const {
+ return m_pScanner.get() && !m_pScanner->empty();
+}
+
+void Parser::Load(std::istream& in) {
+ m_pScanner.reset(new Scanner(in));
+ m_pDirectives.reset(new Directives);
+}
+
+bool Parser::HandleNextDocument(EventHandler& eventHandler) {
+ if (!m_pScanner.get())
+ return false;
+
+ ParseDirectives();
if (m_pScanner->empty()) {
- return false;
+ return false;
}
-
- SingleDocParser sdp(*m_pScanner, *m_pDirectives);
- sdp.HandleDocument(eventHandler);
- return true;
-}
-
-void Parser::ParseDirectives() {
- bool readDirective = false;
-
- while (1) {
+
+ SingleDocParser sdp(*m_pScanner, *m_pDirectives);
+ sdp.HandleDocument(eventHandler);
+ return true;
+}
+
+void Parser::ParseDirectives() {
+ bool readDirective = false;
+
+ while (1) {
if (m_pScanner->empty()) {
- break;
+ break;
}
-
- Token& token = m_pScanner->peek();
+
+ Token& token = m_pScanner->peek();
if (token.type != Token::DIRECTIVE) {
- break;
+ break;
}
-
- // we keep the directives from the last document if none are specified;
- // but if any directives are specific, then we reset them
+
+ // we keep the directives from the last document if none are specified;
+ // but if any directives are specific, then we reset them
if (!readDirective) {
- m_pDirectives.reset(new Directives);
+ m_pDirectives.reset(new Directives);
}
-
- readDirective = true;
- HandleDirective(token);
- m_pScanner->pop();
- }
-}
-
-void Parser::HandleDirective(const Token& token) {
+
+ readDirective = true;
+ HandleDirective(token);
+ m_pScanner->pop();
+ }
+}
+
+void Parser::HandleDirective(const Token& token) {
if (token.value == "YAML") {
- HandleYamlDirective(token);
+ HandleYamlDirective(token);
} else if (token.value == "TAG") {
- HandleTagDirective(token);
+ HandleTagDirective(token);
}
-}
-
-void Parser::HandleYamlDirective(const Token& token) {
+}
+
+void Parser::HandleYamlDirective(const Token& token) {
if (token.params.size() != 1) {
- throw ParserException(token.mark, ErrorMsg::YAML_DIRECTIVE_ARGS);
+ throw ParserException(token.mark, ErrorMsg::YAML_DIRECTIVE_ARGS);
}
-
+
if (!m_pDirectives->version.isDefault) {
- throw ParserException(token.mark, ErrorMsg::REPEATED_YAML_DIRECTIVE);
+ throw ParserException(token.mark, ErrorMsg::REPEATED_YAML_DIRECTIVE);
}
-
- std::stringstream str(token.params[0]);
- str >> m_pDirectives->version.major;
- str.get();
- str >> m_pDirectives->version.minor;
+
+ std::stringstream str(token.params[0]);
+ str >> m_pDirectives->version.major;
+ str.get();
+ str >> m_pDirectives->version.minor;
if (!str || str.peek() != EOF) {
- throw ParserException(
- token.mark, std::string(ErrorMsg::YAML_VERSION) + token.params[0]);
+ throw ParserException(
+ token.mark, std::string(ErrorMsg::YAML_VERSION) + token.params[0]);
}
-
+
if (m_pDirectives->version.major > 1) {
- throw ParserException(token.mark, ErrorMsg::YAML_MAJOR_VERSION);
+ throw ParserException(token.mark, ErrorMsg::YAML_MAJOR_VERSION);
}
-
- m_pDirectives->version.isDefault = false;
- // TODO: warning on major == 1, minor > 2?
-}
-
-void Parser::HandleTagDirective(const Token& token) {
- if (token.params.size() != 2)
- throw ParserException(token.mark, ErrorMsg::TAG_DIRECTIVE_ARGS);
-
- const std::string& handle = token.params[0];
- const std::string& prefix = token.params[1];
+
+ m_pDirectives->version.isDefault = false;
+ // TODO: warning on major == 1, minor > 2?
+}
+
+void Parser::HandleTagDirective(const Token& token) {
+ if (token.params.size() != 2)
+ throw ParserException(token.mark, ErrorMsg::TAG_DIRECTIVE_ARGS);
+
+ const std::string& handle = token.params[0];
+ const std::string& prefix = token.params[1];
if (m_pDirectives->tags.find(handle) != m_pDirectives->tags.end()) {
- throw ParserException(token.mark, ErrorMsg::REPEATED_TAG_DIRECTIVE);
+ throw ParserException(token.mark, ErrorMsg::REPEATED_TAG_DIRECTIVE);
}
-
- m_pDirectives->tags[handle] = prefix;
-}
-
-void Parser::PrintTokens(std::ostream& out) {
+
+ m_pDirectives->tags[handle] = prefix;
+}
+
+void Parser::PrintTokens(std::ostream& out) {
if (!m_pScanner.get()) {
- return;
+ return;
}
-
- while (1) {
+
+ while (1) {
if (m_pScanner->empty()) {
- break;
+ break;
}
-
- out << m_pScanner->peek() << "\n";
- m_pScanner->pop();
- }
-}
-}
+
+ out << m_pScanner->peek() << "\n";
+ m_pScanner->pop();
+ }
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/ptr_vector.h b/contrib/libs/yaml-cpp/src/ptr_vector.h
index 051a48a1e7..955aebd8d5 100644
--- a/contrib/libs/yaml-cpp/src/ptr_vector.h
+++ b/contrib/libs/yaml-cpp/src/ptr_vector.h
@@ -1,43 +1,43 @@
-#ifndef PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <cstddef>
-#include <cstdlib>
-#include <memory>
-#include <vector>
-
-#include "yaml-cpp/noncopyable.h"
-
-namespace YAML {
-
+#ifndef PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <cstddef>
+#include <cstdlib>
+#include <memory>
+#include <vector>
+
+#include "yaml-cpp/noncopyable.h"
+
+namespace YAML {
+
// TODO: This class is no longer needed
-template <typename T>
-class ptr_vector : private YAML::noncopyable {
- public:
- ptr_vector() {}
-
+template <typename T>
+class ptr_vector : private YAML::noncopyable {
+ public:
+ ptr_vector() {}
+
void clear() { m_data.clear(); }
-
- std::size_t size() const { return m_data.size(); }
- bool empty() const { return m_data.empty(); }
-
+
+ std::size_t size() const { return m_data.size(); }
+ bool empty() const { return m_data.empty(); }
+
void push_back(std::unique_ptr<T>&& t) { m_data.push_back(std::move(t)); }
- T& operator[](std::size_t i) { return *m_data[i]; }
- const T& operator[](std::size_t i) const { return *m_data[i]; }
-
+ T& operator[](std::size_t i) { return *m_data[i]; }
+ const T& operator[](std::size_t i) const { return *m_data[i]; }
+
T& back() { return *(m_data.back().get()); }
-
+
const T& back() const { return *(m_data.back().get()); }
- private:
+ private:
std::vector<std::unique_ptr<T>> m_data;
-};
-}
-
-#endif // PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+};
+}
+
+#endif // PTR_VECTOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/regex_yaml.cpp b/contrib/libs/yaml-cpp/src/regex_yaml.cpp
index 2933a012dd..20b772051d 100644
--- a/contrib/libs/yaml-cpp/src/regex_yaml.cpp
+++ b/contrib/libs/yaml-cpp/src/regex_yaml.cpp
@@ -1,45 +1,45 @@
-#include "regex_yaml.h"
-
-namespace YAML {
-// constructors
-RegEx::RegEx() : m_op(REGEX_EMPTY) {}
-
-RegEx::RegEx(REGEX_OP op) : m_op(op) {}
-
-RegEx::RegEx(char ch) : m_op(REGEX_MATCH), m_a(ch) {}
-
-RegEx::RegEx(char a, char z) : m_op(REGEX_RANGE), m_a(a), m_z(z) {}
-
-RegEx::RegEx(const std::string& str, REGEX_OP op) : m_op(op) {
- for (std::size_t i = 0; i < str.size(); i++)
- m_params.push_back(RegEx(str[i]));
-}
-
-// combination constructors
-RegEx operator!(const RegEx& ex) {
- RegEx ret(REGEX_NOT);
- ret.m_params.push_back(ex);
- return ret;
-}
-
-RegEx operator||(const RegEx& ex1, const RegEx& ex2) {
- RegEx ret(REGEX_OR);
- ret.m_params.push_back(ex1);
- ret.m_params.push_back(ex2);
- return ret;
-}
-
-RegEx operator&&(const RegEx& ex1, const RegEx& ex2) {
- RegEx ret(REGEX_AND);
- ret.m_params.push_back(ex1);
- ret.m_params.push_back(ex2);
- return ret;
-}
-
-RegEx operator+(const RegEx& ex1, const RegEx& ex2) {
- RegEx ret(REGEX_SEQ);
- ret.m_params.push_back(ex1);
- ret.m_params.push_back(ex2);
- return ret;
-}
-}
+#include "regex_yaml.h"
+
+namespace YAML {
+// constructors
+RegEx::RegEx() : m_op(REGEX_EMPTY) {}
+
+RegEx::RegEx(REGEX_OP op) : m_op(op) {}
+
+RegEx::RegEx(char ch) : m_op(REGEX_MATCH), m_a(ch) {}
+
+RegEx::RegEx(char a, char z) : m_op(REGEX_RANGE), m_a(a), m_z(z) {}
+
+RegEx::RegEx(const std::string& str, REGEX_OP op) : m_op(op) {
+ for (std::size_t i = 0; i < str.size(); i++)
+ m_params.push_back(RegEx(str[i]));
+}
+
+// combination constructors
+RegEx operator!(const RegEx& ex) {
+ RegEx ret(REGEX_NOT);
+ ret.m_params.push_back(ex);
+ return ret;
+}
+
+RegEx operator||(const RegEx& ex1, const RegEx& ex2) {
+ RegEx ret(REGEX_OR);
+ ret.m_params.push_back(ex1);
+ ret.m_params.push_back(ex2);
+ return ret;
+}
+
+RegEx operator&&(const RegEx& ex1, const RegEx& ex2) {
+ RegEx ret(REGEX_AND);
+ ret.m_params.push_back(ex1);
+ ret.m_params.push_back(ex2);
+ return ret;
+}
+
+RegEx operator+(const RegEx& ex1, const RegEx& ex2) {
+ RegEx ret(REGEX_SEQ);
+ ret.m_params.push_back(ex1);
+ ret.m_params.push_back(ex2);
+ return ret;
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/regex_yaml.h b/contrib/libs/yaml-cpp/src/regex_yaml.h
index 35f7ef52b7..8f28b852a2 100644
--- a/contrib/libs/yaml-cpp/src/regex_yaml.h
+++ b/contrib/libs/yaml-cpp/src/regex_yaml.h
@@ -1,87 +1,87 @@
-#ifndef REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <string>
-#include <vector>
-
+#ifndef REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+#include <vector>
+
#include "yaml-cpp/dll.h"
-namespace YAML {
-class Stream;
-
-enum REGEX_OP {
- REGEX_EMPTY,
- REGEX_MATCH,
- REGEX_RANGE,
- REGEX_OR,
- REGEX_AND,
- REGEX_NOT,
- REGEX_SEQ
-};
-
-// simplified regular expressions
-// . Only straightforward matches (no repeated characters)
-// . Only matches from start of string
+namespace YAML {
+class Stream;
+
+enum REGEX_OP {
+ REGEX_EMPTY,
+ REGEX_MATCH,
+ REGEX_RANGE,
+ REGEX_OR,
+ REGEX_AND,
+ REGEX_NOT,
+ REGEX_SEQ
+};
+
+// simplified regular expressions
+// . Only straightforward matches (no repeated characters)
+// . Only matches from start of string
class YAML_CPP_API RegEx {
- public:
- RegEx();
- RegEx(char ch);
- RegEx(char a, char z);
- RegEx(const std::string& str, REGEX_OP op = REGEX_SEQ);
- ~RegEx() {}
-
+ public:
+ RegEx();
+ RegEx(char ch);
+ RegEx(char a, char z);
+ RegEx(const std::string& str, REGEX_OP op = REGEX_SEQ);
+ ~RegEx() {}
+
friend YAML_CPP_API RegEx operator!(const RegEx& ex);
friend YAML_CPP_API RegEx operator||(const RegEx& ex1, const RegEx& ex2);
friend YAML_CPP_API RegEx operator&&(const RegEx& ex1, const RegEx& ex2);
friend YAML_CPP_API RegEx operator+(const RegEx& ex1, const RegEx& ex2);
-
- bool Matches(char ch) const;
- bool Matches(const std::string& str) const;
- bool Matches(const Stream& in) const;
- template <typename Source>
- bool Matches(const Source& source) const;
-
- int Match(const std::string& str) const;
- int Match(const Stream& in) const;
- template <typename Source>
- int Match(const Source& source) const;
-
- private:
- RegEx(REGEX_OP op);
-
- template <typename Source>
- bool IsValidSource(const Source& source) const;
- template <typename Source>
- int MatchUnchecked(const Source& source) const;
-
- template <typename Source>
- int MatchOpEmpty(const Source& source) const;
- template <typename Source>
- int MatchOpMatch(const Source& source) const;
- template <typename Source>
- int MatchOpRange(const Source& source) const;
- template <typename Source>
- int MatchOpOr(const Source& source) const;
- template <typename Source>
- int MatchOpAnd(const Source& source) const;
- template <typename Source>
- int MatchOpNot(const Source& source) const;
- template <typename Source>
- int MatchOpSeq(const Source& source) const;
-
- private:
- REGEX_OP m_op;
- char m_a, m_z;
- std::vector<RegEx> m_params;
-};
-}
-
-#include "regeximpl.h"
-
-#endif // REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+ bool Matches(char ch) const;
+ bool Matches(const std::string& str) const;
+ bool Matches(const Stream& in) const;
+ template <typename Source>
+ bool Matches(const Source& source) const;
+
+ int Match(const std::string& str) const;
+ int Match(const Stream& in) const;
+ template <typename Source>
+ int Match(const Source& source) const;
+
+ private:
+ RegEx(REGEX_OP op);
+
+ template <typename Source>
+ bool IsValidSource(const Source& source) const;
+ template <typename Source>
+ int MatchUnchecked(const Source& source) const;
+
+ template <typename Source>
+ int MatchOpEmpty(const Source& source) const;
+ template <typename Source>
+ int MatchOpMatch(const Source& source) const;
+ template <typename Source>
+ int MatchOpRange(const Source& source) const;
+ template <typename Source>
+ int MatchOpOr(const Source& source) const;
+ template <typename Source>
+ int MatchOpAnd(const Source& source) const;
+ template <typename Source>
+ int MatchOpNot(const Source& source) const;
+ template <typename Source>
+ int MatchOpSeq(const Source& source) const;
+
+ private:
+ REGEX_OP m_op;
+ char m_a, m_z;
+ std::vector<RegEx> m_params;
+};
+}
+
+#include "regeximpl.h"
+
+#endif // REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/regeximpl.h b/contrib/libs/yaml-cpp/src/regeximpl.h
index 4a529e7a0d..709124f008 100644
--- a/contrib/libs/yaml-cpp/src/regeximpl.h
+++ b/contrib/libs/yaml-cpp/src/regeximpl.h
@@ -1,186 +1,186 @@
-#ifndef REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "stream.h"
-#include "stringsource.h"
-#include "streamcharsource.h"
-
-namespace YAML {
-// query matches
-inline bool RegEx::Matches(char ch) const {
- std::string str;
- str += ch;
- return Matches(str);
-}
-
-inline bool RegEx::Matches(const std::string& str) const {
- return Match(str) >= 0;
-}
-
-inline bool RegEx::Matches(const Stream& in) const { return Match(in) >= 0; }
-
-template <typename Source>
-inline bool RegEx::Matches(const Source& source) const {
- return Match(source) >= 0;
-}
-
-// Match
-// . Matches the given string against this regular expression.
-// . Returns the number of characters matched.
-// . Returns -1 if no characters were matched (the reason for
-// not returning zero is that we may have an empty regex
-// which is ALWAYS successful at matching zero characters).
-// . REMEMBER that we only match from the start of the buffer!
-inline int RegEx::Match(const std::string& str) const {
- StringCharSource source(str.c_str(), str.size());
- return Match(source);
-}
-
-inline int RegEx::Match(const Stream& in) const {
- StreamCharSource source(in);
- return Match(source);
-}
-
-template <typename Source>
-inline bool RegEx::IsValidSource(const Source& source) const {
- return source;
-}
-
-template <>
-inline bool RegEx::IsValidSource<StringCharSource>(
- const StringCharSource& source) const {
- switch (m_op) {
- case REGEX_MATCH:
- case REGEX_RANGE:
- return source;
- default:
- return true;
- }
-}
-
-template <typename Source>
-inline int RegEx::Match(const Source& source) const {
- return IsValidSource(source) ? MatchUnchecked(source) : -1;
-}
-
-template <typename Source>
-inline int RegEx::MatchUnchecked(const Source& source) const {
- switch (m_op) {
- case REGEX_EMPTY:
- return MatchOpEmpty(source);
- case REGEX_MATCH:
- return MatchOpMatch(source);
- case REGEX_RANGE:
- return MatchOpRange(source);
- case REGEX_OR:
- return MatchOpOr(source);
- case REGEX_AND:
- return MatchOpAnd(source);
- case REGEX_NOT:
- return MatchOpNot(source);
- case REGEX_SEQ:
- return MatchOpSeq(source);
- }
-
- return -1;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Operators
-// Note: the convention MatchOp*<Source> is that we can assume
-// IsSourceValid(source).
-// So we do all our checks *before* we call these functions
-
-// EmptyOperator
-template <typename Source>
-inline int RegEx::MatchOpEmpty(const Source& source) const {
- return source[0] == Stream::eof() ? 0 : -1;
-}
-
-template <>
-inline int RegEx::MatchOpEmpty<StringCharSource>(
- const StringCharSource& source) const {
- return !source
- ? 0
- : -1; // the empty regex only is successful on the empty string
-}
-
-// MatchOperator
-template <typename Source>
-inline int RegEx::MatchOpMatch(const Source& source) const {
- if (source[0] != m_a)
- return -1;
- return 1;
-}
-
-// RangeOperator
-template <typename Source>
-inline int RegEx::MatchOpRange(const Source& source) const {
- if (m_a > source[0] || m_z < source[0])
- return -1;
- return 1;
-}
-
-// OrOperator
-template <typename Source>
-inline int RegEx::MatchOpOr(const Source& source) const {
- for (std::size_t i = 0; i < m_params.size(); i++) {
- int n = m_params[i].MatchUnchecked(source);
- if (n >= 0)
- return n;
- }
- return -1;
-}
-
-// AndOperator
-// Note: 'AND' is a little funny, since we may be required to match things
-// of different lengths. If we find a match, we return the length of
-// the FIRST entry on the list.
-template <typename Source>
-inline int RegEx::MatchOpAnd(const Source& source) const {
- int first = -1;
- for (std::size_t i = 0; i < m_params.size(); i++) {
- int n = m_params[i].MatchUnchecked(source);
- if (n == -1)
- return -1;
- if (i == 0)
- first = n;
- }
- return first;
-}
-
-// NotOperator
-template <typename Source>
-inline int RegEx::MatchOpNot(const Source& source) const {
- if (m_params.empty())
- return -1;
- if (m_params[0].MatchUnchecked(source) >= 0)
- return -1;
- return 1;
-}
-
-// SeqOperator
-template <typename Source>
-inline int RegEx::MatchOpSeq(const Source& source) const {
- int offset = 0;
- for (std::size_t i = 0; i < m_params.size(); i++) {
- int n = m_params[i].Match(source + offset); // note Match, not
- // MatchUnchecked because we
- // need to check validity after
- // the offset
- if (n == -1)
- return -1;
- offset += n;
- }
-
- return offset;
-}
-}
-
-#endif // REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "stream.h"
+#include "stringsource.h"
+#include "streamcharsource.h"
+
+namespace YAML {
+// query matches
+inline bool RegEx::Matches(char ch) const {
+ std::string str;
+ str += ch;
+ return Matches(str);
+}
+
+inline bool RegEx::Matches(const std::string& str) const {
+ return Match(str) >= 0;
+}
+
+inline bool RegEx::Matches(const Stream& in) const { return Match(in) >= 0; }
+
+template <typename Source>
+inline bool RegEx::Matches(const Source& source) const {
+ return Match(source) >= 0;
+}
+
+// Match
+// . Matches the given string against this regular expression.
+// . Returns the number of characters matched.
+// . Returns -1 if no characters were matched (the reason for
+// not returning zero is that we may have an empty regex
+// which is ALWAYS successful at matching zero characters).
+// . REMEMBER that we only match from the start of the buffer!
+inline int RegEx::Match(const std::string& str) const {
+ StringCharSource source(str.c_str(), str.size());
+ return Match(source);
+}
+
+inline int RegEx::Match(const Stream& in) const {
+ StreamCharSource source(in);
+ return Match(source);
+}
+
+template <typename Source>
+inline bool RegEx::IsValidSource(const Source& source) const {
+ return source;
+}
+
+template <>
+inline bool RegEx::IsValidSource<StringCharSource>(
+ const StringCharSource& source) const {
+ switch (m_op) {
+ case REGEX_MATCH:
+ case REGEX_RANGE:
+ return source;
+ default:
+ return true;
+ }
+}
+
+template <typename Source>
+inline int RegEx::Match(const Source& source) const {
+ return IsValidSource(source) ? MatchUnchecked(source) : -1;
+}
+
+template <typename Source>
+inline int RegEx::MatchUnchecked(const Source& source) const {
+ switch (m_op) {
+ case REGEX_EMPTY:
+ return MatchOpEmpty(source);
+ case REGEX_MATCH:
+ return MatchOpMatch(source);
+ case REGEX_RANGE:
+ return MatchOpRange(source);
+ case REGEX_OR:
+ return MatchOpOr(source);
+ case REGEX_AND:
+ return MatchOpAnd(source);
+ case REGEX_NOT:
+ return MatchOpNot(source);
+ case REGEX_SEQ:
+ return MatchOpSeq(source);
+ }
+
+ return -1;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Operators
+// Note: the convention MatchOp*<Source> is that we can assume
+// IsSourceValid(source).
+// So we do all our checks *before* we call these functions
+
+// EmptyOperator
+template <typename Source>
+inline int RegEx::MatchOpEmpty(const Source& source) const {
+ return source[0] == Stream::eof() ? 0 : -1;
+}
+
+template <>
+inline int RegEx::MatchOpEmpty<StringCharSource>(
+ const StringCharSource& source) const {
+ return !source
+ ? 0
+ : -1; // the empty regex only is successful on the empty string
+}
+
+// MatchOperator
+template <typename Source>
+inline int RegEx::MatchOpMatch(const Source& source) const {
+ if (source[0] != m_a)
+ return -1;
+ return 1;
+}
+
+// RangeOperator
+template <typename Source>
+inline int RegEx::MatchOpRange(const Source& source) const {
+ if (m_a > source[0] || m_z < source[0])
+ return -1;
+ return 1;
+}
+
+// OrOperator
+template <typename Source>
+inline int RegEx::MatchOpOr(const Source& source) const {
+ for (std::size_t i = 0; i < m_params.size(); i++) {
+ int n = m_params[i].MatchUnchecked(source);
+ if (n >= 0)
+ return n;
+ }
+ return -1;
+}
+
+// AndOperator
+// Note: 'AND' is a little funny, since we may be required to match things
+// of different lengths. If we find a match, we return the length of
+// the FIRST entry on the list.
+template <typename Source>
+inline int RegEx::MatchOpAnd(const Source& source) const {
+ int first = -1;
+ for (std::size_t i = 0; i < m_params.size(); i++) {
+ int n = m_params[i].MatchUnchecked(source);
+ if (n == -1)
+ return -1;
+ if (i == 0)
+ first = n;
+ }
+ return first;
+}
+
+// NotOperator
+template <typename Source>
+inline int RegEx::MatchOpNot(const Source& source) const {
+ if (m_params.empty())
+ return -1;
+ if (m_params[0].MatchUnchecked(source) >= 0)
+ return -1;
+ return 1;
+}
+
+// SeqOperator
+template <typename Source>
+inline int RegEx::MatchOpSeq(const Source& source) const {
+ int offset = 0;
+ for (std::size_t i = 0; i < m_params.size(); i++) {
+ int n = m_params[i].Match(source + offset); // note Match, not
+ // MatchUnchecked because we
+ // need to check validity after
+ // the offset
+ if (n == -1)
+ return -1;
+ offset += n;
+ }
+
+ return offset;
+}
+}
+
+#endif // REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/scanner.cpp b/contrib/libs/yaml-cpp/src/scanner.cpp
index 29301f6bb0..b5cfcc12b2 100644
--- a/contrib/libs/yaml-cpp/src/scanner.cpp
+++ b/contrib/libs/yaml-cpp/src/scanner.cpp
@@ -1,386 +1,386 @@
-#include <cassert>
-#include <memory>
-
-#include "exp.h"
-#include "scanner.h"
-#include "token.h"
-#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
-
-namespace YAML {
-Scanner::Scanner(std::istream& in)
- : INPUT(in),
- m_startedStream(false),
- m_endedStream(false),
- m_simpleKeyAllowed(false),
- m_canBeJSONFlow(false) {}
-
-Scanner::~Scanner() {}
-
-bool Scanner::empty() {
- EnsureTokensInQueue();
- return m_tokens.empty();
-}
-
-void Scanner::pop() {
- EnsureTokensInQueue();
- if (!m_tokens.empty())
- m_tokens.pop();
-}
-
-Token& Scanner::peek() {
- EnsureTokensInQueue();
- assert(!m_tokens.empty()); // should we be asserting here? I mean, we really
- // just be checking
- // if it's empty before peeking.
-
-#if 0
- static Token *pLast = 0;
- if(pLast != &m_tokens.front())
- std::cerr << "peek: " << m_tokens.front() << "\n";
- pLast = &m_tokens.front();
-#endif
-
- return m_tokens.front();
-}
-
-Mark Scanner::mark() const { return INPUT.mark(); }
-
-void Scanner::EnsureTokensInQueue() {
- while (1) {
- if (!m_tokens.empty()) {
- Token& token = m_tokens.front();
-
- // if this guy's valid, then we're done
+#include <cassert>
+#include <memory>
+
+#include "exp.h"
+#include "scanner.h"
+#include "token.h"
+#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
+
+namespace YAML {
+Scanner::Scanner(std::istream& in)
+ : INPUT(in),
+ m_startedStream(false),
+ m_endedStream(false),
+ m_simpleKeyAllowed(false),
+ m_canBeJSONFlow(false) {}
+
+Scanner::~Scanner() {}
+
+bool Scanner::empty() {
+ EnsureTokensInQueue();
+ return m_tokens.empty();
+}
+
+void Scanner::pop() {
+ EnsureTokensInQueue();
+ if (!m_tokens.empty())
+ m_tokens.pop();
+}
+
+Token& Scanner::peek() {
+ EnsureTokensInQueue();
+ assert(!m_tokens.empty()); // should we be asserting here? I mean, we really
+ // just be checking
+ // if it's empty before peeking.
+
+#if 0
+ static Token *pLast = 0;
+ if(pLast != &m_tokens.front())
+ std::cerr << "peek: " << m_tokens.front() << "\n";
+ pLast = &m_tokens.front();
+#endif
+
+ return m_tokens.front();
+}
+
+Mark Scanner::mark() const { return INPUT.mark(); }
+
+void Scanner::EnsureTokensInQueue() {
+ while (1) {
+ if (!m_tokens.empty()) {
+ Token& token = m_tokens.front();
+
+ // if this guy's valid, then we're done
if (token.status == Token::VALID) {
- return;
+ return;
}
-
- // here's where we clean up the impossible tokens
- if (token.status == Token::INVALID) {
- m_tokens.pop();
- continue;
- }
-
- // note: what's left are the unverified tokens
- }
-
- // no token? maybe we've actually finished
+
+ // here's where we clean up the impossible tokens
+ if (token.status == Token::INVALID) {
+ m_tokens.pop();
+ continue;
+ }
+
+ // note: what's left are the unverified tokens
+ }
+
+ // no token? maybe we've actually finished
if (m_endedStream) {
- return;
+ return;
}
-
- // no? then scan...
- ScanNextToken();
- }
-}
-
-void Scanner::ScanNextToken() {
+
+ // no? then scan...
+ ScanNextToken();
+ }
+}
+
+void Scanner::ScanNextToken() {
if (m_endedStream) {
- return;
+ return;
}
-
+
if (!m_startedStream) {
- return StartStream();
- }
-
- // get rid of whitespace, etc. (in between tokens it should be irrelevent)
- ScanToNextToken();
-
- // maybe need to end some blocks
- PopIndentToHere();
-
- // *****
- // And now branch based on the next few characters!
- // *****
-
- // end of stream
+ return StartStream();
+ }
+
+ // get rid of whitespace, etc. (in between tokens it should be irrelevent)
+ ScanToNextToken();
+
+ // maybe need to end some blocks
+ PopIndentToHere();
+
+ // *****
+ // And now branch based on the next few characters!
+ // *****
+
+ // end of stream
if (!INPUT) {
- return EndStream();
+ return EndStream();
}
-
+
if (INPUT.column() == 0 && INPUT.peek() == Keys::Directive) {
- return ScanDirective();
+ return ScanDirective();
}
-
- // document token
+
+ // document token
if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) {
- return ScanDocStart();
+ return ScanDocStart();
}
-
+
if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) {
- return ScanDocEnd();
+ return ScanDocEnd();
}
-
- // flow start/end/entry
+
+ // flow start/end/entry
if (INPUT.peek() == Keys::FlowSeqStart ||
INPUT.peek() == Keys::FlowMapStart) {
- return ScanFlowStart();
+ return ScanFlowStart();
}
-
+
if (INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd) {
- return ScanFlowEnd();
+ return ScanFlowEnd();
}
-
+
if (INPUT.peek() == Keys::FlowEntry) {
- return ScanFlowEntry();
+ return ScanFlowEntry();
}
-
- // block/map stuff
+
+ // block/map stuff
if (Exp::BlockEntry().Matches(INPUT)) {
- return ScanBlockEntry();
+ return ScanBlockEntry();
}
-
+
if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) {
- return ScanKey();
+ return ScanKey();
}
-
+
if (GetValueRegex().Matches(INPUT)) {
- return ScanValue();
+ return ScanValue();
}
-
- // alias/anchor
+
+ // alias/anchor
if (INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor) {
- return ScanAnchorOrAlias();
+ return ScanAnchorOrAlias();
}
-
- // tag
+
+ // tag
if (INPUT.peek() == Keys::Tag) {
- return ScanTag();
+ return ScanTag();
}
-
- // special scalars
- if (InBlockContext() && (INPUT.peek() == Keys::LiteralScalar ||
+
+ // special scalars
+ if (InBlockContext() && (INPUT.peek() == Keys::LiteralScalar ||
INPUT.peek() == Keys::FoldedScalar)) {
- return ScanBlockScalar();
+ return ScanBlockScalar();
}
-
+
if (INPUT.peek() == '\'' || INPUT.peek() == '\"') {
- return ScanQuotedScalar();
+ return ScanQuotedScalar();
}
-
- // plain scalars
- if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow())
+
+ // plain scalars
+ if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow())
.Matches(INPUT)) {
- return ScanPlainScalar();
- }
-
- // don't know what it is!
- throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN);
-}
-
-void Scanner::ScanToNextToken() {
- while (1) {
- // first eat whitespace
- while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
+ return ScanPlainScalar();
+ }
+
+ // don't know what it is!
+ throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN);
+}
+
+void Scanner::ScanToNextToken() {
+ while (1) {
+ // first eat whitespace
+ while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
if (InBlockContext() && Exp::Tab().Matches(INPUT)) {
- m_simpleKeyAllowed = false;
+ m_simpleKeyAllowed = false;
}
- INPUT.eat(1);
- }
-
- // then eat a comment
- if (Exp::Comment().Matches(INPUT)) {
- // eat until line break
+ INPUT.eat(1);
+ }
+
+ // then eat a comment
+ if (Exp::Comment().Matches(INPUT)) {
+ // eat until line break
while (INPUT && !Exp::Break().Matches(INPUT)) {
- INPUT.eat(1);
+ INPUT.eat(1);
}
- }
-
- // if it's NOT a line break, then we're done!
+ }
+
+ // if it's NOT a line break, then we're done!
if (!Exp::Break().Matches(INPUT)) {
- break;
+ break;
}
-
- // otherwise, let's eat the line break and keep going
- int n = Exp::Break().Match(INPUT);
- INPUT.eat(n);
-
- // oh yeah, and let's get rid of that simple key
- InvalidateSimpleKey();
-
- // new line - we may be able to accept a simple key now
+
+ // otherwise, let's eat the line break and keep going
+ int n = Exp::Break().Match(INPUT);
+ INPUT.eat(n);
+
+ // oh yeah, and let's get rid of that simple key
+ InvalidateSimpleKey();
+
+ // new line - we may be able to accept a simple key now
if (InBlockContext()) {
- m_simpleKeyAllowed = true;
+ m_simpleKeyAllowed = true;
}
- }
-}
-
-///////////////////////////////////////////////////////////////////////
-// Misc. helpers
-
-// IsWhitespaceToBeEaten
-// . We can eat whitespace if it's a space or tab
-// . Note: originally tabs in block context couldn't be eaten
-// "where a simple key could be allowed
-// (i.e., not at the beginning of a line, or following '-', '?', or
-// ':')"
-// I think this is wrong, since tabs can be non-content whitespace; it's just
-// that they can't contribute to indentation, so once you've seen a tab in a
-// line, you can't start a simple key
-bool Scanner::IsWhitespaceToBeEaten(char ch) {
+ }
+}
+
+///////////////////////////////////////////////////////////////////////
+// Misc. helpers
+
+// IsWhitespaceToBeEaten
+// . We can eat whitespace if it's a space or tab
+// . Note: originally tabs in block context couldn't be eaten
+// "where a simple key could be allowed
+// (i.e., not at the beginning of a line, or following '-', '?', or
+// ':')"
+// I think this is wrong, since tabs can be non-content whitespace; it's just
+// that they can't contribute to indentation, so once you've seen a tab in a
+// line, you can't start a simple key
+bool Scanner::IsWhitespaceToBeEaten(char ch) {
if (ch == ' ') {
- return true;
+ return true;
}
-
+
if (ch == '\t') {
- return true;
+ return true;
}
-
- return false;
-}
-
-const RegEx& Scanner::GetValueRegex() const {
+
+ return false;
+}
+
+const RegEx& Scanner::GetValueRegex() const {
if (InBlockContext()) {
- return Exp::Value();
- }
-
- return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
-}
-
-void Scanner::StartStream() {
- m_startedStream = true;
- m_simpleKeyAllowed = true;
+ return Exp::Value();
+ }
+
+ return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
+}
+
+void Scanner::StartStream() {
+ m_startedStream = true;
+ m_simpleKeyAllowed = true;
std::unique_ptr<IndentMarker> pIndent(
new IndentMarker(-1, IndentMarker::NONE));
m_indentRefs.push_back(std::move(pIndent));
- m_indents.push(&m_indentRefs.back());
-}
-
-void Scanner::EndStream() {
- // force newline
+ m_indents.push(&m_indentRefs.back());
+}
+
+void Scanner::EndStream() {
+ // force newline
if (INPUT.column() > 0) {
- INPUT.ResetColumn();
- }
-
- PopAllIndents();
- PopAllSimpleKeys();
-
- m_simpleKeyAllowed = false;
- m_endedStream = true;
-}
-
-Token* Scanner::PushToken(Token::TYPE type) {
- m_tokens.push(Token(type, INPUT.mark()));
- return &m_tokens.back();
-}
-
-Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const {
- switch (type) {
- case IndentMarker::SEQ:
- return Token::BLOCK_SEQ_START;
- case IndentMarker::MAP:
- return Token::BLOCK_MAP_START;
- case IndentMarker::NONE:
- assert(false);
- break;
- }
- assert(false);
- throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
-}
-
-Scanner::IndentMarker* Scanner::PushIndentTo(int column,
- IndentMarker::INDENT_TYPE type) {
- // are we in flow?
+ INPUT.ResetColumn();
+ }
+
+ PopAllIndents();
+ PopAllSimpleKeys();
+
+ m_simpleKeyAllowed = false;
+ m_endedStream = true;
+}
+
+Token* Scanner::PushToken(Token::TYPE type) {
+ m_tokens.push(Token(type, INPUT.mark()));
+ return &m_tokens.back();
+}
+
+Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const {
+ switch (type) {
+ case IndentMarker::SEQ:
+ return Token::BLOCK_SEQ_START;
+ case IndentMarker::MAP:
+ return Token::BLOCK_MAP_START;
+ case IndentMarker::NONE:
+ assert(false);
+ break;
+ }
+ assert(false);
+ throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
+}
+
+Scanner::IndentMarker* Scanner::PushIndentTo(int column,
+ IndentMarker::INDENT_TYPE type) {
+ // are we in flow?
if (InFlowContext()) {
- return 0;
+ return 0;
}
-
+
std::unique_ptr<IndentMarker> pIndent(new IndentMarker(column, type));
- IndentMarker& indent = *pIndent;
- const IndentMarker& lastIndent = *m_indents.top();
-
- // is this actually an indentation?
+ IndentMarker& indent = *pIndent;
+ const IndentMarker& lastIndent = *m_indents.top();
+
+ // is this actually an indentation?
if (indent.column < lastIndent.column) {
- return 0;
+ return 0;
}
- if (indent.column == lastIndent.column &&
- !(indent.type == IndentMarker::SEQ &&
+ if (indent.column == lastIndent.column &&
+ !(indent.type == IndentMarker::SEQ &&
lastIndent.type == IndentMarker::MAP)) {
- return 0;
- }
-
- // push a start token
- indent.pStartToken = PushToken(GetStartTokenFor(type));
-
- // and then the indent
- m_indents.push(&indent);
+ return 0;
+ }
+
+ // push a start token
+ indent.pStartToken = PushToken(GetStartTokenFor(type));
+
+ // and then the indent
+ m_indents.push(&indent);
m_indentRefs.push_back(std::move(pIndent));
- return &m_indentRefs.back();
-}
-
-void Scanner::PopIndentToHere() {
- // are we in flow?
+ return &m_indentRefs.back();
+}
+
+void Scanner::PopIndentToHere() {
+ // are we in flow?
if (InFlowContext()) {
- return;
+ return;
}
-
- // now pop away
- while (!m_indents.empty()) {
- const IndentMarker& indent = *m_indents.top();
+
+ // now pop away
+ while (!m_indents.empty()) {
+ const IndentMarker& indent = *m_indents.top();
if (indent.column < INPUT.column()) {
- break;
+ break;
}
- if (indent.column == INPUT.column() &&
- !(indent.type == IndentMarker::SEQ &&
+ if (indent.column == INPUT.column() &&
+ !(indent.type == IndentMarker::SEQ &&
!Exp::BlockEntry().Matches(INPUT))) {
- break;
+ break;
}
-
- PopIndent();
- }
-
+
+ PopIndent();
+ }
+
while (!m_indents.empty() &&
m_indents.top()->status == IndentMarker::INVALID) {
- PopIndent();
+ PopIndent();
}
-}
-
-void Scanner::PopAllIndents() {
- // are we in flow?
+}
+
+void Scanner::PopAllIndents() {
+ // are we in flow?
if (InFlowContext()) {
- return;
+ return;
}
-
- // now pop away
- while (!m_indents.empty()) {
- const IndentMarker& indent = *m_indents.top();
+
+ // now pop away
+ while (!m_indents.empty()) {
+ const IndentMarker& indent = *m_indents.top();
if (indent.type == IndentMarker::NONE) {
- break;
+ break;
}
-
- PopIndent();
- }
-}
-
-void Scanner::PopIndent() {
- const IndentMarker& indent = *m_indents.top();
- m_indents.pop();
-
- if (indent.status != IndentMarker::VALID) {
- InvalidateSimpleKey();
- return;
- }
-
+
+ PopIndent();
+ }
+}
+
+void Scanner::PopIndent() {
+ const IndentMarker& indent = *m_indents.top();
+ m_indents.pop();
+
+ if (indent.status != IndentMarker::VALID) {
+ InvalidateSimpleKey();
+ return;
+ }
+
if (indent.type == IndentMarker::SEQ) {
- m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark()));
+ m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark()));
} else if (indent.type == IndentMarker::MAP) {
- m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark()));
+ m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark()));
}
-}
-
-int Scanner::GetTopIndent() const {
+}
+
+int Scanner::GetTopIndent() const {
if (m_indents.empty()) {
- return 0;
- }
- return m_indents.top()->column;
-}
-
-void Scanner::ThrowParserException(const std::string& msg) const {
- Mark mark = Mark::null_mark();
- if (!m_tokens.empty()) {
- const Token& token = m_tokens.front();
- mark = token.mark;
- }
- throw ParserException(mark, msg);
-}
+ return 0;
+ }
+ return m_indents.top()->column;
+}
+
+void Scanner::ThrowParserException(const std::string& msg) const {
+ Mark mark = Mark::null_mark();
+ if (!m_tokens.empty()) {
+ const Token& token = m_tokens.front();
+ mark = token.mark;
+ }
+ throw ParserException(mark, msg);
+}
} // namespace YAML
diff --git a/contrib/libs/yaml-cpp/src/scanner.h b/contrib/libs/yaml-cpp/src/scanner.h
index 62a385b3ea..7bb2ccc71a 100644
--- a/contrib/libs/yaml-cpp/src/scanner.h
+++ b/contrib/libs/yaml-cpp/src/scanner.h
@@ -1,96 +1,96 @@
-#ifndef SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <cstddef>
-#include <ios>
-#include <map>
-#include <queue>
-#include <set>
-#include <stack>
-#include <string>
-
-#include "ptr_vector.h"
-#include "stream.h"
-#include "token.h"
-#include "yaml-cpp/mark.h"
-
-namespace YAML {
-class Node;
-class RegEx;
-
+#ifndef SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <cstddef>
+#include <ios>
+#include <map>
+#include <queue>
+#include <set>
+#include <stack>
+#include <string>
+
+#include "ptr_vector.h"
+#include "stream.h"
+#include "token.h"
+#include "yaml-cpp/mark.h"
+
+namespace YAML {
+class Node;
+class RegEx;
+
/**
* A scanner transforms a stream of characters into a stream of tokens.
*/
-class Scanner {
- public:
+class Scanner {
+ public:
explicit Scanner(std::istream &in);
- ~Scanner();
-
+ ~Scanner();
+
/** Returns true if there are no more tokens to be read. */
- bool empty();
+ bool empty();
/** Removes the next token in the queue. */
- void pop();
+ void pop();
/** Returns, but does not remove, the next token in the queue. */
- Token &peek();
+ Token &peek();
/** Returns the current mark in the input stream. */
- Mark mark() const;
-
- private:
- struct IndentMarker {
- enum INDENT_TYPE { MAP, SEQ, NONE };
- enum STATUS { VALID, INVALID, UNKNOWN };
- IndentMarker(int column_, INDENT_TYPE type_)
- : column(column_), type(type_), status(VALID), pStartToken(0) {}
-
- int column;
- INDENT_TYPE type;
- STATUS status;
- Token *pStartToken;
- };
-
- enum FLOW_MARKER { FLOW_MAP, FLOW_SEQ };
-
- private:
- // scanning
+ Mark mark() const;
+
+ private:
+ struct IndentMarker {
+ enum INDENT_TYPE { MAP, SEQ, NONE };
+ enum STATUS { VALID, INVALID, UNKNOWN };
+ IndentMarker(int column_, INDENT_TYPE type_)
+ : column(column_), type(type_), status(VALID), pStartToken(0) {}
+
+ int column;
+ INDENT_TYPE type;
+ STATUS status;
+ Token *pStartToken;
+ };
+
+ enum FLOW_MARKER { FLOW_MAP, FLOW_SEQ };
+
+ private:
+ // scanning
/**
* Scans until there's a valid token at the front of the queue, or the queue
* is empty. The state can be checked by {@link #empty}, and the next token
* retrieved by {@link #peek}.
*/
- void EnsureTokensInQueue();
+ void EnsureTokensInQueue();
/**
* The main scanning function; this method branches out to scan whatever the
* next token should be.
*/
- void ScanNextToken();
+ void ScanNextToken();
/** Eats the input stream until it reaches the next token-like thing. */
- void ScanToNextToken();
+ void ScanToNextToken();
/** Sets the initial conditions for starting a stream. */
- void StartStream();
+ void StartStream();
/** Closes out the stream, finish up, etc. */
- void EndStream();
+ void EndStream();
+
+ Token *PushToken(Token::TYPE type);
+
+ bool InFlowContext() const { return !m_flows.empty(); }
+ bool InBlockContext() const { return m_flows.empty(); }
+ std::size_t GetFlowLevel() const { return m_flows.size(); }
- Token *PushToken(Token::TYPE type);
-
- bool InFlowContext() const { return !m_flows.empty(); }
- bool InBlockContext() const { return m_flows.empty(); }
- std::size_t GetFlowLevel() const { return m_flows.size(); }
-
- Token::TYPE GetStartTokenFor(IndentMarker::INDENT_TYPE type) const;
+ Token::TYPE GetStartTokenFor(IndentMarker::INDENT_TYPE type) const;
/**
* Pushes an indentation onto the stack, and enqueues the proper token
@@ -98,93 +98,93 @@ class Scanner {
*
* @return the indent marker it generates (if any).
*/
- IndentMarker *PushIndentTo(int column, IndentMarker::INDENT_TYPE type);
+ IndentMarker *PushIndentTo(int column, IndentMarker::INDENT_TYPE type);
/**
* Pops indentations off the stack until it reaches the current indentation
* level, and enqueues the proper token each time. Then pops all invalid
* indentations off.
*/
- void PopIndentToHere();
+ void PopIndentToHere();
/**
* Pops all indentations (except for the base empty one) off the stack, and
* enqueues the proper token each time.
*/
- void PopAllIndents();
+ void PopAllIndents();
/** Pops a single indent, pushing the proper token. */
- void PopIndent();
- int GetTopIndent() const;
-
- // checking input
- bool CanInsertPotentialSimpleKey() const;
- bool ExistsActiveSimpleKey() const;
- void InsertPotentialSimpleKey();
- void InvalidateSimpleKey();
- bool VerifySimpleKey();
- void PopAllSimpleKeys();
-
+ void PopIndent();
+ int GetTopIndent() const;
+
+ // checking input
+ bool CanInsertPotentialSimpleKey() const;
+ bool ExistsActiveSimpleKey() const;
+ void InsertPotentialSimpleKey();
+ void InvalidateSimpleKey();
+ bool VerifySimpleKey();
+ void PopAllSimpleKeys();
+
/**
* Throws a ParserException with the current token location (if available),
* and does not parse any more tokens.
*/
- void ThrowParserException(const std::string &msg) const;
-
- bool IsWhitespaceToBeEaten(char ch);
+ void ThrowParserException(const std::string &msg) const;
+
+ bool IsWhitespaceToBeEaten(char ch);
/**
* Returns the appropriate regex to check if the next token is a value token.
*/
- const RegEx &GetValueRegex() const;
-
- struct SimpleKey {
- SimpleKey(const Mark &mark_, std::size_t flowLevel_);
-
- void Validate();
- void Invalidate();
-
- Mark mark;
- std::size_t flowLevel;
- IndentMarker *pIndent;
- Token *pMapStart, *pKey;
- };
-
- // and the tokens
- void ScanDirective();
- void ScanDocStart();
- void ScanDocEnd();
- void ScanBlockSeqStart();
- void ScanBlockMapSTart();
- void ScanBlockEnd();
- void ScanBlockEntry();
- void ScanFlowStart();
- void ScanFlowEnd();
- void ScanFlowEntry();
- void ScanKey();
- void ScanValue();
- void ScanAnchorOrAlias();
- void ScanTag();
- void ScanPlainScalar();
- void ScanQuotedScalar();
- void ScanBlockScalar();
-
- private:
- // the stream
- Stream INPUT;
-
- // the output (tokens)
- std::queue<Token> m_tokens;
-
- // state info
- bool m_startedStream, m_endedStream;
- bool m_simpleKeyAllowed;
- bool m_canBeJSONFlow;
- std::stack<SimpleKey> m_simpleKeys;
- std::stack<IndentMarker *> m_indents;
- ptr_vector<IndentMarker> m_indentRefs; // for "garbage collection"
- std::stack<FLOW_MARKER> m_flows;
-};
-}
-
-#endif // SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+ const RegEx &GetValueRegex() const;
+
+ struct SimpleKey {
+ SimpleKey(const Mark &mark_, std::size_t flowLevel_);
+
+ void Validate();
+ void Invalidate();
+
+ Mark mark;
+ std::size_t flowLevel;
+ IndentMarker *pIndent;
+ Token *pMapStart, *pKey;
+ };
+
+ // and the tokens
+ void ScanDirective();
+ void ScanDocStart();
+ void ScanDocEnd();
+ void ScanBlockSeqStart();
+ void ScanBlockMapSTart();
+ void ScanBlockEnd();
+ void ScanBlockEntry();
+ void ScanFlowStart();
+ void ScanFlowEnd();
+ void ScanFlowEntry();
+ void ScanKey();
+ void ScanValue();
+ void ScanAnchorOrAlias();
+ void ScanTag();
+ void ScanPlainScalar();
+ void ScanQuotedScalar();
+ void ScanBlockScalar();
+
+ private:
+ // the stream
+ Stream INPUT;
+
+ // the output (tokens)
+ std::queue<Token> m_tokens;
+
+ // state info
+ bool m_startedStream, m_endedStream;
+ bool m_simpleKeyAllowed;
+ bool m_canBeJSONFlow;
+ std::stack<SimpleKey> m_simpleKeys;
+ std::stack<IndentMarker *> m_indents;
+ ptr_vector<IndentMarker> m_indentRefs; // for "garbage collection"
+ std::stack<FLOW_MARKER> m_flows;
+};
+}
+
+#endif // SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/scanscalar.cpp b/contrib/libs/yaml-cpp/src/scanscalar.cpp
index 02faea08fb..10e359d446 100644
--- a/contrib/libs/yaml-cpp/src/scanscalar.cpp
+++ b/contrib/libs/yaml-cpp/src/scanscalar.cpp
@@ -1,250 +1,250 @@
-#include "scanscalar.h"
-
-#include <algorithm>
-
-#include "exp.h"
-#include "regeximpl.h"
-#include "stream.h"
-#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
-
-namespace YAML {
-// ScanScalar
-// . This is where the scalar magic happens.
-//
-// . We do the scanning in three phases:
-// 1. Scan until newline
-// 2. Eat newline
-// 3. Scan leading blanks.
-//
-// . Depending on the parameters given, we store or stop
-// and different places in the above flow.
-std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
- bool foundNonEmptyLine = false;
- bool pastOpeningBreak = (params.fold == FOLD_FLOW);
- bool emptyLine = false, moreIndented = false;
- int foldedNewlineCount = 0;
- bool foldedNewlineStartedMoreIndented = false;
- std::size_t lastEscapedChar = std::string::npos;
- std::string scalar;
- params.leadingSpaces = false;
-
+#include "scanscalar.h"
+
+#include <algorithm>
+
+#include "exp.h"
+#include "regeximpl.h"
+#include "stream.h"
+#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
+
+namespace YAML {
+// ScanScalar
+// . This is where the scalar magic happens.
+//
+// . We do the scanning in three phases:
+// 1. Scan until newline
+// 2. Eat newline
+// 3. Scan leading blanks.
+//
+// . Depending on the parameters given, we store or stop
+// and different places in the above flow.
+std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
+ bool foundNonEmptyLine = false;
+ bool pastOpeningBreak = (params.fold == FOLD_FLOW);
+ bool emptyLine = false, moreIndented = false;
+ int foldedNewlineCount = 0;
+ bool foldedNewlineStartedMoreIndented = false;
+ std::size_t lastEscapedChar = std::string::npos;
+ std::string scalar;
+ params.leadingSpaces = false;
+
if (!params.end) {
params.end = &Exp::Empty();
}
- while (INPUT) {
- // ********************************
- // Phase #1: scan until line ending
-
- std::size_t lastNonWhitespaceChar = scalar.size();
- bool escapedNewline = false;
+ while (INPUT) {
+ // ********************************
+ // Phase #1: scan until line ending
+
+ std::size_t lastNonWhitespaceChar = scalar.size();
+ bool escapedNewline = false;
while (!params.end->Matches(INPUT) && !Exp::Break().Matches(INPUT)) {
if (!INPUT) {
- break;
+ break;
}
-
- // document indicator?
- if (INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) {
+
+ // document indicator?
+ if (INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) {
if (params.onDocIndicator == BREAK) {
- break;
+ break;
} else if (params.onDocIndicator == THROW) {
- throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR);
+ throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR);
}
- }
-
- foundNonEmptyLine = true;
- pastOpeningBreak = true;
-
- // escaped newline? (only if we're escaping on slash)
- if (params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) {
- // eat escape character and get out (but preserve trailing whitespace!)
- INPUT.get();
- lastNonWhitespaceChar = scalar.size();
- lastEscapedChar = scalar.size();
- escapedNewline = true;
- break;
- }
-
- // escape this?
- if (INPUT.peek() == params.escape) {
- scalar += Exp::Escape(INPUT);
- lastNonWhitespaceChar = scalar.size();
- lastEscapedChar = scalar.size();
- continue;
- }
-
- // otherwise, just add the damn character
- char ch = INPUT.get();
- scalar += ch;
+ }
+
+ foundNonEmptyLine = true;
+ pastOpeningBreak = true;
+
+ // escaped newline? (only if we're escaping on slash)
+ if (params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) {
+ // eat escape character and get out (but preserve trailing whitespace!)
+ INPUT.get();
+ lastNonWhitespaceChar = scalar.size();
+ lastEscapedChar = scalar.size();
+ escapedNewline = true;
+ break;
+ }
+
+ // escape this?
+ if (INPUT.peek() == params.escape) {
+ scalar += Exp::Escape(INPUT);
+ lastNonWhitespaceChar = scalar.size();
+ lastEscapedChar = scalar.size();
+ continue;
+ }
+
+ // otherwise, just add the damn character
+ char ch = INPUT.get();
+ scalar += ch;
if (ch != ' ' && ch != '\t') {
- lastNonWhitespaceChar = scalar.size();
+ lastNonWhitespaceChar = scalar.size();
}
- }
-
- // eof? if we're looking to eat something, then we throw
- if (!INPUT) {
+ }
+
+ // eof? if we're looking to eat something, then we throw
+ if (!INPUT) {
if (params.eatEnd) {
- throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR);
+ throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR);
}
- break;
- }
-
- // doc indicator?
- if (params.onDocIndicator == BREAK && INPUT.column() == 0 &&
+ break;
+ }
+
+ // doc indicator?
+ if (params.onDocIndicator == BREAK && INPUT.column() == 0 &&
Exp::DocIndicator().Matches(INPUT)) {
- break;
+ break;
}
-
- // are we done via character match?
+
+ // are we done via character match?
int n = params.end->Match(INPUT);
- if (n >= 0) {
+ if (n >= 0) {
if (params.eatEnd) {
- INPUT.eat(n);
- }
- break;
- }
-
- // do we remove trailing whitespace?
- if (params.fold == FOLD_FLOW)
- scalar.erase(lastNonWhitespaceChar);
-
- // ********************************
- // Phase #2: eat line ending
- n = Exp::Break().Match(INPUT);
- INPUT.eat(n);
-
- // ********************************
- // Phase #3: scan initial spaces
-
- // first the required indentation
+ INPUT.eat(n);
+ }
+ break;
+ }
+
+ // do we remove trailing whitespace?
+ if (params.fold == FOLD_FLOW)
+ scalar.erase(lastNonWhitespaceChar);
+
+ // ********************************
+ // Phase #2: eat line ending
+ n = Exp::Break().Match(INPUT);
+ INPUT.eat(n);
+
+ // ********************************
+ // Phase #3: scan initial spaces
+
+ // first the required indentation
while (INPUT.peek() == ' ' &&
(INPUT.column() < params.indent ||
(params.detectIndent && !foundNonEmptyLine)) &&
!params.end->Matches(INPUT)) {
- INPUT.eat(1);
+ INPUT.eat(1);
}
-
- // update indent if we're auto-detecting
+
+ // update indent if we're auto-detecting
if (params.detectIndent && !foundNonEmptyLine) {
- params.indent = std::max(params.indent, INPUT.column());
+ params.indent = std::max(params.indent, INPUT.column());
}
-
- // and then the rest of the whitespace
- while (Exp::Blank().Matches(INPUT)) {
- // we check for tabs that masquerade as indentation
- if (INPUT.peek() == '\t' && INPUT.column() < params.indent &&
+
+ // and then the rest of the whitespace
+ while (Exp::Blank().Matches(INPUT)) {
+ // we check for tabs that masquerade as indentation
+ if (INPUT.peek() == '\t' && INPUT.column() < params.indent &&
params.onTabInIndentation == THROW) {
- throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION);
+ throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION);
}
-
+
if (!params.eatLeadingWhitespace) {
- break;
+ break;
}
-
+
if (params.end->Matches(INPUT)) {
break;
}
- INPUT.eat(1);
- }
-
- // was this an empty line?
- bool nextEmptyLine = Exp::Break().Matches(INPUT);
- bool nextMoreIndented = Exp::Blank().Matches(INPUT);
- if (params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine)
- foldedNewlineStartedMoreIndented = moreIndented;
-
- // for block scalars, we always start with a newline, so we should ignore it
- // (not fold or keep)
- if (pastOpeningBreak) {
- switch (params.fold) {
- case DONT_FOLD:
- scalar += "\n";
- break;
- case FOLD_BLOCK:
- if (!emptyLine && !nextEmptyLine && !moreIndented &&
+ INPUT.eat(1);
+ }
+
+ // was this an empty line?
+ bool nextEmptyLine = Exp::Break().Matches(INPUT);
+ bool nextMoreIndented = Exp::Blank().Matches(INPUT);
+ if (params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine)
+ foldedNewlineStartedMoreIndented = moreIndented;
+
+ // for block scalars, we always start with a newline, so we should ignore it
+ // (not fold or keep)
+ if (pastOpeningBreak) {
+ switch (params.fold) {
+ case DONT_FOLD:
+ scalar += "\n";
+ break;
+ case FOLD_BLOCK:
+ if (!emptyLine && !nextEmptyLine && !moreIndented &&
!nextMoreIndented && INPUT.column() >= params.indent) {
- scalar += " ";
+ scalar += " ";
} else if (nextEmptyLine) {
- foldedNewlineCount++;
+ foldedNewlineCount++;
} else {
- scalar += "\n";
+ scalar += "\n";
}
-
- if (!nextEmptyLine && foldedNewlineCount > 0) {
- scalar += std::string(foldedNewlineCount - 1, '\n');
- if (foldedNewlineStartedMoreIndented ||
+
+ if (!nextEmptyLine && foldedNewlineCount > 0) {
+ scalar += std::string(foldedNewlineCount - 1, '\n');
+ if (foldedNewlineStartedMoreIndented ||
nextMoreIndented | !foundNonEmptyLine) {
- scalar += "\n";
+ scalar += "\n";
}
- foldedNewlineCount = 0;
- }
- break;
- case FOLD_FLOW:
+ foldedNewlineCount = 0;
+ }
+ break;
+ case FOLD_FLOW:
if (nextEmptyLine) {
- scalar += "\n";
+ scalar += "\n";
} else if (!emptyLine && !nextEmptyLine && !escapedNewline) {
- scalar += " ";
+ scalar += " ";
}
- break;
- }
- }
-
- emptyLine = nextEmptyLine;
- moreIndented = nextMoreIndented;
- pastOpeningBreak = true;
-
- // are we done via indentation?
- if (!emptyLine && INPUT.column() < params.indent) {
- params.leadingSpaces = true;
- break;
- }
- }
-
- // post-processing
- if (params.trimTrailingSpaces) {
- std::size_t pos = scalar.find_last_not_of(' ');
- if (lastEscapedChar != std::string::npos) {
+ break;
+ }
+ }
+
+ emptyLine = nextEmptyLine;
+ moreIndented = nextMoreIndented;
+ pastOpeningBreak = true;
+
+ // are we done via indentation?
+ if (!emptyLine && INPUT.column() < params.indent) {
+ params.leadingSpaces = true;
+ break;
+ }
+ }
+
+ // post-processing
+ if (params.trimTrailingSpaces) {
+ std::size_t pos = scalar.find_last_not_of(' ');
+ if (lastEscapedChar != std::string::npos) {
if (pos < lastEscapedChar || pos == std::string::npos) {
- pos = lastEscapedChar;
+ pos = lastEscapedChar;
}
- }
+ }
if (pos < scalar.size()) {
- scalar.erase(pos + 1);
+ scalar.erase(pos + 1);
}
- }
-
- switch (params.chomp) {
- case CLIP: {
- std::size_t pos = scalar.find_last_not_of('\n');
- if (lastEscapedChar != std::string::npos) {
+ }
+
+ switch (params.chomp) {
+ case CLIP: {
+ std::size_t pos = scalar.find_last_not_of('\n');
+ if (lastEscapedChar != std::string::npos) {
if (pos < lastEscapedChar || pos == std::string::npos) {
- pos = lastEscapedChar;
+ pos = lastEscapedChar;
}
- }
+ }
if (pos == std::string::npos) {
- scalar.erase();
+ scalar.erase();
} else if (pos + 1 < scalar.size()) {
- scalar.erase(pos + 2);
+ scalar.erase(pos + 2);
}
- } break;
- case STRIP: {
- std::size_t pos = scalar.find_last_not_of('\n');
- if (lastEscapedChar != std::string::npos) {
+ } break;
+ case STRIP: {
+ std::size_t pos = scalar.find_last_not_of('\n');
+ if (lastEscapedChar != std::string::npos) {
if (pos < lastEscapedChar || pos == std::string::npos) {
- pos = lastEscapedChar;
+ pos = lastEscapedChar;
}
- }
+ }
if (pos == std::string::npos) {
- scalar.erase();
+ scalar.erase();
} else if (pos < scalar.size()) {
- scalar.erase(pos + 1);
- }
- } break;
- default:
- break;
- }
-
- return scalar;
-}
-}
+ scalar.erase(pos + 1);
+ }
+ } break;
+ default:
+ break;
+ }
+
+ return scalar;
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/scanscalar.h b/contrib/libs/yaml-cpp/src/scanscalar.h
index f7fce060ae..c3a574ad9b 100644
--- a/contrib/libs/yaml-cpp/src/scanscalar.h
+++ b/contrib/libs/yaml-cpp/src/scanscalar.h
@@ -1,63 +1,63 @@
-#ifndef SCANSCALAR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define SCANSCALAR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <string>
-
-#include "regex_yaml.h"
-#include "stream.h"
-
-namespace YAML {
-enum CHOMP { STRIP = -1, CLIP, KEEP };
-enum ACTION { NONE, BREAK, THROW };
-enum FOLD { DONT_FOLD, FOLD_BLOCK, FOLD_FLOW };
-
-struct ScanScalarParams {
- ScanScalarParams()
+#ifndef SCANSCALAR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define SCANSCALAR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+
+#include "regex_yaml.h"
+#include "stream.h"
+
+namespace YAML {
+enum CHOMP { STRIP = -1, CLIP, KEEP };
+enum ACTION { NONE, BREAK, THROW };
+enum FOLD { DONT_FOLD, FOLD_BLOCK, FOLD_FLOW };
+
+struct ScanScalarParams {
+ ScanScalarParams()
: end(nullptr),
eatEnd(false),
- indent(0),
- detectIndent(false),
- eatLeadingWhitespace(0),
- escape(0),
- fold(DONT_FOLD),
- trimTrailingSpaces(0),
- chomp(CLIP),
- onDocIndicator(NONE),
- onTabInIndentation(NONE),
- leadingSpaces(false) {}
-
- // input:
+ indent(0),
+ detectIndent(false),
+ eatLeadingWhitespace(0),
+ escape(0),
+ fold(DONT_FOLD),
+ trimTrailingSpaces(0),
+ chomp(CLIP),
+ onDocIndicator(NONE),
+ onTabInIndentation(NONE),
+ leadingSpaces(false) {}
+
+ // input:
const RegEx* end; // what condition ends this scalar?
// unowned.
- bool eatEnd; // should we eat that condition when we see it?
- int indent; // what level of indentation should be eaten and ignored?
- bool detectIndent; // should we try to autodetect the indent?
- bool eatLeadingWhitespace; // should we continue eating this delicious
- // indentation after 'indent' spaces?
- char escape; // what character do we escape on (i.e., slash or single quote)
- // (0 for none)
- FOLD fold; // how do we fold line ends?
- bool trimTrailingSpaces; // do we remove all trailing spaces (at the very
- // end)
- CHOMP chomp; // do we strip, clip, or keep trailing newlines (at the very
- // end)
- // Note: strip means kill all, clip means keep at most one, keep means keep
- // all
- ACTION onDocIndicator; // what do we do if we see a document indicator?
- ACTION onTabInIndentation; // what do we do if we see a tab where we should
- // be seeing indentation spaces
-
- // output:
- bool leadingSpaces;
-};
-
-std::string ScanScalar(Stream& INPUT, ScanScalarParams& info);
-}
-
-#endif // SCANSCALAR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+ bool eatEnd; // should we eat that condition when we see it?
+ int indent; // what level of indentation should be eaten and ignored?
+ bool detectIndent; // should we try to autodetect the indent?
+ bool eatLeadingWhitespace; // should we continue eating this delicious
+ // indentation after 'indent' spaces?
+ char escape; // what character do we escape on (i.e., slash or single quote)
+ // (0 for none)
+ FOLD fold; // how do we fold line ends?
+ bool trimTrailingSpaces; // do we remove all trailing spaces (at the very
+ // end)
+ CHOMP chomp; // do we strip, clip, or keep trailing newlines (at the very
+ // end)
+ // Note: strip means kill all, clip means keep at most one, keep means keep
+ // all
+ ACTION onDocIndicator; // what do we do if we see a document indicator?
+ ACTION onTabInIndentation; // what do we do if we see a tab where we should
+ // be seeing indentation spaces
+
+ // output:
+ bool leadingSpaces;
+};
+
+std::string ScanScalar(Stream& INPUT, ScanScalarParams& info);
+}
+
+#endif // SCANSCALAR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/scantag.cpp b/contrib/libs/yaml-cpp/src/scantag.cpp
index f775ed5758..c5b39652ad 100644
--- a/contrib/libs/yaml-cpp/src/scantag.cpp
+++ b/contrib/libs/yaml-cpp/src/scantag.cpp
@@ -1,81 +1,81 @@
-#include "exp.h"
-#include "regex_yaml.h"
-#include "regeximpl.h"
-#include "stream.h"
-#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
-#include "yaml-cpp/mark.h"
-
-namespace YAML {
-const std::string ScanVerbatimTag(Stream& INPUT) {
- std::string tag;
-
- // eat the start character
- INPUT.get();
-
- while (INPUT) {
- if (INPUT.peek() == Keys::VerbatimTagEnd) {
- // eat the end character
- INPUT.get();
- return tag;
- }
-
- int n = Exp::URI().Match(INPUT);
- if (n <= 0)
- break;
-
- tag += INPUT.get(n);
- }
-
- throw ParserException(INPUT.mark(), ErrorMsg::END_OF_VERBATIM_TAG);
-}
-
-const std::string ScanTagHandle(Stream& INPUT, bool& canBeHandle) {
- std::string tag;
- canBeHandle = true;
- Mark firstNonWordChar;
-
- while (INPUT) {
- if (INPUT.peek() == Keys::Tag) {
- if (!canBeHandle)
- throw ParserException(firstNonWordChar, ErrorMsg::CHAR_IN_TAG_HANDLE);
- break;
- }
-
- int n = 0;
- if (canBeHandle) {
- n = Exp::Word().Match(INPUT);
- if (n <= 0) {
- canBeHandle = false;
- firstNonWordChar = INPUT.mark();
- }
- }
-
- if (!canBeHandle)
- n = Exp::Tag().Match(INPUT);
-
- if (n <= 0)
- break;
-
- tag += INPUT.get(n);
- }
-
- return tag;
-}
-
-const std::string ScanTagSuffix(Stream& INPUT) {
- std::string tag;
-
- while (INPUT) {
- int n = Exp::Tag().Match(INPUT);
- if (n <= 0)
- break;
-
- tag += INPUT.get(n);
- }
-
- if (tag.empty())
- throw ParserException(INPUT.mark(), ErrorMsg::TAG_WITH_NO_SUFFIX);
-
- return tag;
-}
-}
+#include "exp.h"
+#include "regex_yaml.h"
+#include "regeximpl.h"
+#include "stream.h"
+#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
+#include "yaml-cpp/mark.h"
+
+namespace YAML {
+const std::string ScanVerbatimTag(Stream& INPUT) {
+ std::string tag;
+
+ // eat the start character
+ INPUT.get();
+
+ while (INPUT) {
+ if (INPUT.peek() == Keys::VerbatimTagEnd) {
+ // eat the end character
+ INPUT.get();
+ return tag;
+ }
+
+ int n = Exp::URI().Match(INPUT);
+ if (n <= 0)
+ break;
+
+ tag += INPUT.get(n);
+ }
+
+ throw ParserException(INPUT.mark(), ErrorMsg::END_OF_VERBATIM_TAG);
+}
+
+const std::string ScanTagHandle(Stream& INPUT, bool& canBeHandle) {
+ std::string tag;
+ canBeHandle = true;
+ Mark firstNonWordChar;
+
+ while (INPUT) {
+ if (INPUT.peek() == Keys::Tag) {
+ if (!canBeHandle)
+ throw ParserException(firstNonWordChar, ErrorMsg::CHAR_IN_TAG_HANDLE);
+ break;
+ }
+
+ int n = 0;
+ if (canBeHandle) {
+ n = Exp::Word().Match(INPUT);
+ if (n <= 0) {
+ canBeHandle = false;
+ firstNonWordChar = INPUT.mark();
+ }
+ }
+
+ if (!canBeHandle)
+ n = Exp::Tag().Match(INPUT);
+
+ if (n <= 0)
+ break;
+
+ tag += INPUT.get(n);
+ }
+
+ return tag;
+}
+
+const std::string ScanTagSuffix(Stream& INPUT) {
+ std::string tag;
+
+ while (INPUT) {
+ int n = Exp::Tag().Match(INPUT);
+ if (n <= 0)
+ break;
+
+ tag += INPUT.get(n);
+ }
+
+ if (tag.empty())
+ throw ParserException(INPUT.mark(), ErrorMsg::TAG_WITH_NO_SUFFIX);
+
+ return tag;
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/scantag.h b/contrib/libs/yaml-cpp/src/scantag.h
index e904751b31..522ba5495e 100644
--- a/contrib/libs/yaml-cpp/src/scantag.h
+++ b/contrib/libs/yaml-cpp/src/scantag.h
@@ -1,19 +1,19 @@
-#ifndef SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <string>
-#include "stream.h"
-
-namespace YAML {
-const std::string ScanVerbatimTag(Stream& INPUT);
-const std::string ScanTagHandle(Stream& INPUT, bool& canBeHandle);
-const std::string ScanTagSuffix(Stream& INPUT);
-}
-
-#endif // SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+#include "stream.h"
+
+namespace YAML {
+const std::string ScanVerbatimTag(Stream& INPUT);
+const std::string ScanTagHandle(Stream& INPUT, bool& canBeHandle);
+const std::string ScanTagSuffix(Stream& INPUT);
+}
+
+#endif // SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/scantoken.cpp b/contrib/libs/yaml-cpp/src/scantoken.cpp
index 4b308a78d5..fd8758d781 100644
--- a/contrib/libs/yaml-cpp/src/scantoken.cpp
+++ b/contrib/libs/yaml-cpp/src/scantoken.cpp
@@ -1,437 +1,437 @@
-#include <sstream>
-
-#include "exp.h"
-#include "regex_yaml.h"
-#include "regeximpl.h"
-#include "scanner.h"
-#include "scanscalar.h"
-#include "scantag.h" // IWYU pragma: keep
-#include "tag.h" // IWYU pragma: keep
-#include "token.h"
-#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
-#include "yaml-cpp/mark.h"
-
-namespace YAML {
-///////////////////////////////////////////////////////////////////////
-// Specialization for scanning specific tokens
-
-// Directive
-// . Note: no semantic checking is done here (that's for the parser to do)
-void Scanner::ScanDirective() {
- std::string name;
- std::vector<std::string> params;
-
- // pop indents and simple keys
- PopAllIndents();
- PopAllSimpleKeys();
-
- m_simpleKeyAllowed = false;
- m_canBeJSONFlow = false;
-
- // store pos and eat indicator
- Token token(Token::DIRECTIVE, INPUT.mark());
- INPUT.eat(1);
-
- // read name
- while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
- token.value += INPUT.get();
-
- // read parameters
- while (1) {
- // first get rid of whitespace
- while (Exp::Blank().Matches(INPUT))
- INPUT.eat(1);
-
- // break on newline or comment
- if (!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT))
- break;
-
- // now read parameter
- std::string param;
- while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
- param += INPUT.get();
-
- token.params.push_back(param);
- }
-
- m_tokens.push(token);
-}
-
-// DocStart
-void Scanner::ScanDocStart() {
- PopAllIndents();
- PopAllSimpleKeys();
- m_simpleKeyAllowed = false;
- m_canBeJSONFlow = false;
-
- // eat
- Mark mark = INPUT.mark();
- INPUT.eat(3);
- m_tokens.push(Token(Token::DOC_START, mark));
-}
-
-// DocEnd
-void Scanner::ScanDocEnd() {
- PopAllIndents();
- PopAllSimpleKeys();
- m_simpleKeyAllowed = false;
- m_canBeJSONFlow = false;
-
- // eat
- Mark mark = INPUT.mark();
- INPUT.eat(3);
- m_tokens.push(Token(Token::DOC_END, mark));
-}
-
-// FlowStart
-void Scanner::ScanFlowStart() {
- // flows can be simple keys
- InsertPotentialSimpleKey();
- m_simpleKeyAllowed = true;
- m_canBeJSONFlow = false;
-
- // eat
- Mark mark = INPUT.mark();
- char ch = INPUT.get();
- FLOW_MARKER flowType = (ch == Keys::FlowSeqStart ? FLOW_SEQ : FLOW_MAP);
- m_flows.push(flowType);
- Token::TYPE type =
- (flowType == FLOW_SEQ ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START);
- m_tokens.push(Token(type, mark));
-}
-
-// FlowEnd
-void Scanner::ScanFlowEnd() {
- if (InBlockContext())
- throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END);
-
- // we might have a solo entry in the flow context
- if (InFlowContext()) {
- if (m_flows.top() == FLOW_MAP && VerifySimpleKey())
- m_tokens.push(Token(Token::VALUE, INPUT.mark()));
- else if (m_flows.top() == FLOW_SEQ)
- InvalidateSimpleKey();
- }
-
- m_simpleKeyAllowed = false;
- m_canBeJSONFlow = true;
-
- // eat
- Mark mark = INPUT.mark();
- char ch = INPUT.get();
-
- // check that it matches the start
- FLOW_MARKER flowType = (ch == Keys::FlowSeqEnd ? FLOW_SEQ : FLOW_MAP);
- if (m_flows.top() != flowType)
- throw ParserException(mark, ErrorMsg::FLOW_END);
- m_flows.pop();
-
- Token::TYPE type = (flowType ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END);
- m_tokens.push(Token(type, mark));
-}
-
-// FlowEntry
-void Scanner::ScanFlowEntry() {
- // we might have a solo entry in the flow context
- if (InFlowContext()) {
- if (m_flows.top() == FLOW_MAP && VerifySimpleKey())
- m_tokens.push(Token(Token::VALUE, INPUT.mark()));
- else if (m_flows.top() == FLOW_SEQ)
- InvalidateSimpleKey();
- }
-
- m_simpleKeyAllowed = true;
- m_canBeJSONFlow = false;
-
- // eat
- Mark mark = INPUT.mark();
- INPUT.eat(1);
- m_tokens.push(Token(Token::FLOW_ENTRY, mark));
-}
-
-// BlockEntry
-void Scanner::ScanBlockEntry() {
- // we better be in the block context!
- if (InFlowContext())
- throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
-
- // can we put it here?
- if (!m_simpleKeyAllowed)
- throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
-
- PushIndentTo(INPUT.column(), IndentMarker::SEQ);
- m_simpleKeyAllowed = true;
- m_canBeJSONFlow = false;
-
- // eat
- Mark mark = INPUT.mark();
- INPUT.eat(1);
- m_tokens.push(Token(Token::BLOCK_ENTRY, mark));
-}
-
-// Key
-void Scanner::ScanKey() {
- // handle keys diffently in the block context (and manage indents)
- if (InBlockContext()) {
- if (!m_simpleKeyAllowed)
- throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY);
-
- PushIndentTo(INPUT.column(), IndentMarker::MAP);
- }
-
- // can only put a simple key here if we're in block context
- m_simpleKeyAllowed = InBlockContext();
-
- // eat
- Mark mark = INPUT.mark();
- INPUT.eat(1);
- m_tokens.push(Token(Token::KEY, mark));
-}
-
-// Value
-void Scanner::ScanValue() {
- // and check that simple key
- bool isSimpleKey = VerifySimpleKey();
- m_canBeJSONFlow = false;
-
- if (isSimpleKey) {
- // can't follow a simple key with another simple key (dunno why, though - it
- // seems fine)
- m_simpleKeyAllowed = false;
- } else {
- // handle values diffently in the block context (and manage indents)
- if (InBlockContext()) {
- if (!m_simpleKeyAllowed)
- throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE);
-
- PushIndentTo(INPUT.column(), IndentMarker::MAP);
- }
-
- // can only put a simple key here if we're in block context
- m_simpleKeyAllowed = InBlockContext();
- }
-
- // eat
- Mark mark = INPUT.mark();
- INPUT.eat(1);
- m_tokens.push(Token(Token::VALUE, mark));
-}
-
-// AnchorOrAlias
-void Scanner::ScanAnchorOrAlias() {
- bool alias;
- std::string name;
-
- // insert a potential simple key
- InsertPotentialSimpleKey();
- m_simpleKeyAllowed = false;
- m_canBeJSONFlow = false;
-
- // eat the indicator
- Mark mark = INPUT.mark();
- char indicator = INPUT.get();
- alias = (indicator == Keys::Alias);
-
- // now eat the content
- while (INPUT && Exp::Anchor().Matches(INPUT))
- name += INPUT.get();
-
- // we need to have read SOMETHING!
- if (name.empty())
- throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND
- : ErrorMsg::ANCHOR_NOT_FOUND);
-
- // and needs to end correctly
- if (INPUT && !Exp::AnchorEnd().Matches(INPUT))
- throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS
- : ErrorMsg::CHAR_IN_ANCHOR);
-
- // and we're done
- Token token(alias ? Token::ALIAS : Token::ANCHOR, mark);
- token.value = name;
- m_tokens.push(token);
-}
-
-// Tag
-void Scanner::ScanTag() {
- // insert a potential simple key
- InsertPotentialSimpleKey();
- m_simpleKeyAllowed = false;
- m_canBeJSONFlow = false;
-
- Token token(Token::TAG, INPUT.mark());
-
- // eat the indicator
- INPUT.get();
-
- if (INPUT && INPUT.peek() == Keys::VerbatimTagStart) {
- std::string tag = ScanVerbatimTag(INPUT);
-
- token.value = tag;
- token.data = Tag::VERBATIM;
- } else {
- bool canBeHandle;
- token.value = ScanTagHandle(INPUT, canBeHandle);
- if (!canBeHandle && token.value.empty())
- token.data = Tag::NON_SPECIFIC;
- else if (token.value.empty())
- token.data = Tag::SECONDARY_HANDLE;
- else
- token.data = Tag::PRIMARY_HANDLE;
-
- // is there a suffix?
- if (canBeHandle && INPUT.peek() == Keys::Tag) {
- // eat the indicator
- INPUT.get();
- token.params.push_back(ScanTagSuffix(INPUT));
- token.data = Tag::NAMED_HANDLE;
- }
- }
-
- m_tokens.push(token);
-}
-
-// PlainScalar
-void Scanner::ScanPlainScalar() {
- std::string scalar;
-
- // set up the scanning parameters
- ScanScalarParams params;
+#include <sstream>
+
+#include "exp.h"
+#include "regex_yaml.h"
+#include "regeximpl.h"
+#include "scanner.h"
+#include "scanscalar.h"
+#include "scantag.h" // IWYU pragma: keep
+#include "tag.h" // IWYU pragma: keep
+#include "token.h"
+#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
+#include "yaml-cpp/mark.h"
+
+namespace YAML {
+///////////////////////////////////////////////////////////////////////
+// Specialization for scanning specific tokens
+
+// Directive
+// . Note: no semantic checking is done here (that's for the parser to do)
+void Scanner::ScanDirective() {
+ std::string name;
+ std::vector<std::string> params;
+
+ // pop indents and simple keys
+ PopAllIndents();
+ PopAllSimpleKeys();
+
+ m_simpleKeyAllowed = false;
+ m_canBeJSONFlow = false;
+
+ // store pos and eat indicator
+ Token token(Token::DIRECTIVE, INPUT.mark());
+ INPUT.eat(1);
+
+ // read name
+ while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
+ token.value += INPUT.get();
+
+ // read parameters
+ while (1) {
+ // first get rid of whitespace
+ while (Exp::Blank().Matches(INPUT))
+ INPUT.eat(1);
+
+ // break on newline or comment
+ if (!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT))
+ break;
+
+ // now read parameter
+ std::string param;
+ while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
+ param += INPUT.get();
+
+ token.params.push_back(param);
+ }
+
+ m_tokens.push(token);
+}
+
+// DocStart
+void Scanner::ScanDocStart() {
+ PopAllIndents();
+ PopAllSimpleKeys();
+ m_simpleKeyAllowed = false;
+ m_canBeJSONFlow = false;
+
+ // eat
+ Mark mark = INPUT.mark();
+ INPUT.eat(3);
+ m_tokens.push(Token(Token::DOC_START, mark));
+}
+
+// DocEnd
+void Scanner::ScanDocEnd() {
+ PopAllIndents();
+ PopAllSimpleKeys();
+ m_simpleKeyAllowed = false;
+ m_canBeJSONFlow = false;
+
+ // eat
+ Mark mark = INPUT.mark();
+ INPUT.eat(3);
+ m_tokens.push(Token(Token::DOC_END, mark));
+}
+
+// FlowStart
+void Scanner::ScanFlowStart() {
+ // flows can be simple keys
+ InsertPotentialSimpleKey();
+ m_simpleKeyAllowed = true;
+ m_canBeJSONFlow = false;
+
+ // eat
+ Mark mark = INPUT.mark();
+ char ch = INPUT.get();
+ FLOW_MARKER flowType = (ch == Keys::FlowSeqStart ? FLOW_SEQ : FLOW_MAP);
+ m_flows.push(flowType);
+ Token::TYPE type =
+ (flowType == FLOW_SEQ ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START);
+ m_tokens.push(Token(type, mark));
+}
+
+// FlowEnd
+void Scanner::ScanFlowEnd() {
+ if (InBlockContext())
+ throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END);
+
+ // we might have a solo entry in the flow context
+ if (InFlowContext()) {
+ if (m_flows.top() == FLOW_MAP && VerifySimpleKey())
+ m_tokens.push(Token(Token::VALUE, INPUT.mark()));
+ else if (m_flows.top() == FLOW_SEQ)
+ InvalidateSimpleKey();
+ }
+
+ m_simpleKeyAllowed = false;
+ m_canBeJSONFlow = true;
+
+ // eat
+ Mark mark = INPUT.mark();
+ char ch = INPUT.get();
+
+ // check that it matches the start
+ FLOW_MARKER flowType = (ch == Keys::FlowSeqEnd ? FLOW_SEQ : FLOW_MAP);
+ if (m_flows.top() != flowType)
+ throw ParserException(mark, ErrorMsg::FLOW_END);
+ m_flows.pop();
+
+ Token::TYPE type = (flowType ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END);
+ m_tokens.push(Token(type, mark));
+}
+
+// FlowEntry
+void Scanner::ScanFlowEntry() {
+ // we might have a solo entry in the flow context
+ if (InFlowContext()) {
+ if (m_flows.top() == FLOW_MAP && VerifySimpleKey())
+ m_tokens.push(Token(Token::VALUE, INPUT.mark()));
+ else if (m_flows.top() == FLOW_SEQ)
+ InvalidateSimpleKey();
+ }
+
+ m_simpleKeyAllowed = true;
+ m_canBeJSONFlow = false;
+
+ // eat
+ Mark mark = INPUT.mark();
+ INPUT.eat(1);
+ m_tokens.push(Token(Token::FLOW_ENTRY, mark));
+}
+
+// BlockEntry
+void Scanner::ScanBlockEntry() {
+ // we better be in the block context!
+ if (InFlowContext())
+ throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
+
+ // can we put it here?
+ if (!m_simpleKeyAllowed)
+ throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
+
+ PushIndentTo(INPUT.column(), IndentMarker::SEQ);
+ m_simpleKeyAllowed = true;
+ m_canBeJSONFlow = false;
+
+ // eat
+ Mark mark = INPUT.mark();
+ INPUT.eat(1);
+ m_tokens.push(Token(Token::BLOCK_ENTRY, mark));
+}
+
+// Key
+void Scanner::ScanKey() {
+ // handle keys diffently in the block context (and manage indents)
+ if (InBlockContext()) {
+ if (!m_simpleKeyAllowed)
+ throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY);
+
+ PushIndentTo(INPUT.column(), IndentMarker::MAP);
+ }
+
+ // can only put a simple key here if we're in block context
+ m_simpleKeyAllowed = InBlockContext();
+
+ // eat
+ Mark mark = INPUT.mark();
+ INPUT.eat(1);
+ m_tokens.push(Token(Token::KEY, mark));
+}
+
+// Value
+void Scanner::ScanValue() {
+ // and check that simple key
+ bool isSimpleKey = VerifySimpleKey();
+ m_canBeJSONFlow = false;
+
+ if (isSimpleKey) {
+ // can't follow a simple key with another simple key (dunno why, though - it
+ // seems fine)
+ m_simpleKeyAllowed = false;
+ } else {
+ // handle values diffently in the block context (and manage indents)
+ if (InBlockContext()) {
+ if (!m_simpleKeyAllowed)
+ throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE);
+
+ PushIndentTo(INPUT.column(), IndentMarker::MAP);
+ }
+
+ // can only put a simple key here if we're in block context
+ m_simpleKeyAllowed = InBlockContext();
+ }
+
+ // eat
+ Mark mark = INPUT.mark();
+ INPUT.eat(1);
+ m_tokens.push(Token(Token::VALUE, mark));
+}
+
+// AnchorOrAlias
+void Scanner::ScanAnchorOrAlias() {
+ bool alias;
+ std::string name;
+
+ // insert a potential simple key
+ InsertPotentialSimpleKey();
+ m_simpleKeyAllowed = false;
+ m_canBeJSONFlow = false;
+
+ // eat the indicator
+ Mark mark = INPUT.mark();
+ char indicator = INPUT.get();
+ alias = (indicator == Keys::Alias);
+
+ // now eat the content
+ while (INPUT && Exp::Anchor().Matches(INPUT))
+ name += INPUT.get();
+
+ // we need to have read SOMETHING!
+ if (name.empty())
+ throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND
+ : ErrorMsg::ANCHOR_NOT_FOUND);
+
+ // and needs to end correctly
+ if (INPUT && !Exp::AnchorEnd().Matches(INPUT))
+ throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS
+ : ErrorMsg::CHAR_IN_ANCHOR);
+
+ // and we're done
+ Token token(alias ? Token::ALIAS : Token::ANCHOR, mark);
+ token.value = name;
+ m_tokens.push(token);
+}
+
+// Tag
+void Scanner::ScanTag() {
+ // insert a potential simple key
+ InsertPotentialSimpleKey();
+ m_simpleKeyAllowed = false;
+ m_canBeJSONFlow = false;
+
+ Token token(Token::TAG, INPUT.mark());
+
+ // eat the indicator
+ INPUT.get();
+
+ if (INPUT && INPUT.peek() == Keys::VerbatimTagStart) {
+ std::string tag = ScanVerbatimTag(INPUT);
+
+ token.value = tag;
+ token.data = Tag::VERBATIM;
+ } else {
+ bool canBeHandle;
+ token.value = ScanTagHandle(INPUT, canBeHandle);
+ if (!canBeHandle && token.value.empty())
+ token.data = Tag::NON_SPECIFIC;
+ else if (token.value.empty())
+ token.data = Tag::SECONDARY_HANDLE;
+ else
+ token.data = Tag::PRIMARY_HANDLE;
+
+ // is there a suffix?
+ if (canBeHandle && INPUT.peek() == Keys::Tag) {
+ // eat the indicator
+ INPUT.get();
+ token.params.push_back(ScanTagSuffix(INPUT));
+ token.data = Tag::NAMED_HANDLE;
+ }
+ }
+
+ m_tokens.push(token);
+}
+
+// PlainScalar
+void Scanner::ScanPlainScalar() {
+ std::string scalar;
+
+ // set up the scanning parameters
+ ScanScalarParams params;
params.end =
(InFlowContext() ? &Exp::ScanScalarEndInFlow() : &Exp::ScanScalarEnd());
- params.eatEnd = false;
- params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1);
- params.fold = FOLD_FLOW;
- params.eatLeadingWhitespace = true;
- params.trimTrailingSpaces = true;
- params.chomp = STRIP;
- params.onDocIndicator = BREAK;
- params.onTabInIndentation = THROW;
-
- // insert a potential simple key
- InsertPotentialSimpleKey();
-
- Mark mark = INPUT.mark();
- scalar = ScanScalar(INPUT, params);
-
- // can have a simple key only if we ended the scalar by starting a new line
- m_simpleKeyAllowed = params.leadingSpaces;
- m_canBeJSONFlow = false;
-
- // finally, check and see if we ended on an illegal character
- // if(Exp::IllegalCharInScalar.Matches(INPUT))
- // throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR);
-
- Token token(Token::PLAIN_SCALAR, mark);
- token.value = scalar;
- m_tokens.push(token);
-}
-
-// QuotedScalar
-void Scanner::ScanQuotedScalar() {
- std::string scalar;
-
- // peek at single or double quote (don't eat because we need to preserve (for
- // the time being) the input position)
- char quote = INPUT.peek();
- bool single = (quote == '\'');
-
- // setup the scanning parameters
- ScanScalarParams params;
+ params.eatEnd = false;
+ params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1);
+ params.fold = FOLD_FLOW;
+ params.eatLeadingWhitespace = true;
+ params.trimTrailingSpaces = true;
+ params.chomp = STRIP;
+ params.onDocIndicator = BREAK;
+ params.onTabInIndentation = THROW;
+
+ // insert a potential simple key
+ InsertPotentialSimpleKey();
+
+ Mark mark = INPUT.mark();
+ scalar = ScanScalar(INPUT, params);
+
+ // can have a simple key only if we ended the scalar by starting a new line
+ m_simpleKeyAllowed = params.leadingSpaces;
+ m_canBeJSONFlow = false;
+
+ // finally, check and see if we ended on an illegal character
+ // if(Exp::IllegalCharInScalar.Matches(INPUT))
+ // throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR);
+
+ Token token(Token::PLAIN_SCALAR, mark);
+ token.value = scalar;
+ m_tokens.push(token);
+}
+
+// QuotedScalar
+void Scanner::ScanQuotedScalar() {
+ std::string scalar;
+
+ // peek at single or double quote (don't eat because we need to preserve (for
+ // the time being) the input position)
+ char quote = INPUT.peek();
+ bool single = (quote == '\'');
+
+ // setup the scanning parameters
+ ScanScalarParams params;
RegEx end = (single ? RegEx(quote) && !Exp::EscSingleQuote() : RegEx(quote));
params.end = &end;
- params.eatEnd = true;
- params.escape = (single ? '\'' : '\\');
- params.indent = 0;
- params.fold = FOLD_FLOW;
- params.eatLeadingWhitespace = true;
- params.trimTrailingSpaces = false;
- params.chomp = CLIP;
- params.onDocIndicator = THROW;
-
- // insert a potential simple key
- InsertPotentialSimpleKey();
-
- Mark mark = INPUT.mark();
-
- // now eat that opening quote
- INPUT.get();
-
- // and scan
- scalar = ScanScalar(INPUT, params);
- m_simpleKeyAllowed = false;
- m_canBeJSONFlow = true;
-
- Token token(Token::NON_PLAIN_SCALAR, mark);
- token.value = scalar;
- m_tokens.push(token);
-}
-
-// BlockScalarToken
-// . These need a little extra processing beforehand.
-// . We need to scan the line where the indicator is (this doesn't count as part
-// of the scalar),
-// and then we need to figure out what level of indentation we'll be using.
-void Scanner::ScanBlockScalar() {
- std::string scalar;
-
- ScanScalarParams params;
- params.indent = 1;
- params.detectIndent = true;
-
- // eat block indicator ('|' or '>')
- Mark mark = INPUT.mark();
- char indicator = INPUT.get();
- params.fold = (indicator == Keys::FoldedScalar ? FOLD_BLOCK : DONT_FOLD);
-
- // eat chomping/indentation indicators
- params.chomp = CLIP;
- int n = Exp::Chomp().Match(INPUT);
- for (int i = 0; i < n; i++) {
- char ch = INPUT.get();
- if (ch == '+')
- params.chomp = KEEP;
- else if (ch == '-')
- params.chomp = STRIP;
- else if (Exp::Digit().Matches(ch)) {
- if (ch == '0')
- throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK);
-
- params.indent = ch - '0';
- params.detectIndent = false;
- }
- }
-
- // now eat whitespace
- while (Exp::Blank().Matches(INPUT))
- INPUT.eat(1);
-
- // and comments to the end of the line
- if (Exp::Comment().Matches(INPUT))
- while (INPUT && !Exp::Break().Matches(INPUT))
- INPUT.eat(1);
-
- // if it's not a line break, then we ran into a bad character inline
- if (INPUT && !Exp::Break().Matches(INPUT))
- throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK);
-
- // set the initial indentation
- if (GetTopIndent() >= 0)
- params.indent += GetTopIndent();
-
- params.eatLeadingWhitespace = false;
- params.trimTrailingSpaces = false;
- params.onTabInIndentation = THROW;
-
- scalar = ScanScalar(INPUT, params);
-
- // simple keys always ok after block scalars (since we're gonna start a new
- // line anyways)
- m_simpleKeyAllowed = true;
- m_canBeJSONFlow = false;
-
- Token token(Token::NON_PLAIN_SCALAR, mark);
- token.value = scalar;
- m_tokens.push(token);
-}
-}
+ params.eatEnd = true;
+ params.escape = (single ? '\'' : '\\');
+ params.indent = 0;
+ params.fold = FOLD_FLOW;
+ params.eatLeadingWhitespace = true;
+ params.trimTrailingSpaces = false;
+ params.chomp = CLIP;
+ params.onDocIndicator = THROW;
+
+ // insert a potential simple key
+ InsertPotentialSimpleKey();
+
+ Mark mark = INPUT.mark();
+
+ // now eat that opening quote
+ INPUT.get();
+
+ // and scan
+ scalar = ScanScalar(INPUT, params);
+ m_simpleKeyAllowed = false;
+ m_canBeJSONFlow = true;
+
+ Token token(Token::NON_PLAIN_SCALAR, mark);
+ token.value = scalar;
+ m_tokens.push(token);
+}
+
+// BlockScalarToken
+// . These need a little extra processing beforehand.
+// . We need to scan the line where the indicator is (this doesn't count as part
+// of the scalar),
+// and then we need to figure out what level of indentation we'll be using.
+void Scanner::ScanBlockScalar() {
+ std::string scalar;
+
+ ScanScalarParams params;
+ params.indent = 1;
+ params.detectIndent = true;
+
+ // eat block indicator ('|' or '>')
+ Mark mark = INPUT.mark();
+ char indicator = INPUT.get();
+ params.fold = (indicator == Keys::FoldedScalar ? FOLD_BLOCK : DONT_FOLD);
+
+ // eat chomping/indentation indicators
+ params.chomp = CLIP;
+ int n = Exp::Chomp().Match(INPUT);
+ for (int i = 0; i < n; i++) {
+ char ch = INPUT.get();
+ if (ch == '+')
+ params.chomp = KEEP;
+ else if (ch == '-')
+ params.chomp = STRIP;
+ else if (Exp::Digit().Matches(ch)) {
+ if (ch == '0')
+ throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK);
+
+ params.indent = ch - '0';
+ params.detectIndent = false;
+ }
+ }
+
+ // now eat whitespace
+ while (Exp::Blank().Matches(INPUT))
+ INPUT.eat(1);
+
+ // and comments to the end of the line
+ if (Exp::Comment().Matches(INPUT))
+ while (INPUT && !Exp::Break().Matches(INPUT))
+ INPUT.eat(1);
+
+ // if it's not a line break, then we ran into a bad character inline
+ if (INPUT && !Exp::Break().Matches(INPUT))
+ throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK);
+
+ // set the initial indentation
+ if (GetTopIndent() >= 0)
+ params.indent += GetTopIndent();
+
+ params.eatLeadingWhitespace = false;
+ params.trimTrailingSpaces = false;
+ params.onTabInIndentation = THROW;
+
+ scalar = ScanScalar(INPUT, params);
+
+ // simple keys always ok after block scalars (since we're gonna start a new
+ // line anyways)
+ m_simpleKeyAllowed = true;
+ m_canBeJSONFlow = false;
+
+ Token token(Token::NON_PLAIN_SCALAR, mark);
+ token.value = scalar;
+ m_tokens.push(token);
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/setting.h b/contrib/libs/yaml-cpp/src/setting.h
index c6a81e6e85..b78d40e2e8 100644
--- a/contrib/libs/yaml-cpp/src/setting.h
+++ b/contrib/libs/yaml-cpp/src/setting.h
@@ -1,95 +1,95 @@
-#ifndef SETTING_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define SETTING_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <memory>
-#include <vector>
-#include "yaml-cpp/noncopyable.h"
-
-namespace YAML {
-class SettingChangeBase;
-
-template <typename T>
-class Setting {
- public:
- Setting() : m_value() {}
-
- const T get() const { return m_value; }
+#ifndef SETTING_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define SETTING_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <memory>
+#include <vector>
+#include "yaml-cpp/noncopyable.h"
+
+namespace YAML {
+class SettingChangeBase;
+
+template <typename T>
+class Setting {
+ public:
+ Setting() : m_value() {}
+
+ const T get() const { return m_value; }
std::unique_ptr<SettingChangeBase> set(const T& value);
- void restore(const Setting<T>& oldSetting) { m_value = oldSetting.get(); }
-
- private:
- T m_value;
-};
-
-class SettingChangeBase {
- public:
- virtual ~SettingChangeBase() {}
- virtual void pop() = 0;
-};
-
-template <typename T>
-class SettingChange : public SettingChangeBase {
- public:
- SettingChange(Setting<T>* pSetting) : m_pCurSetting(pSetting) {
- // copy old setting to save its state
- m_oldSetting = *pSetting;
- }
-
- virtual void pop() { m_pCurSetting->restore(m_oldSetting); }
-
- private:
- Setting<T>* m_pCurSetting;
- Setting<T> m_oldSetting;
-};
-
-template <typename T>
+ void restore(const Setting<T>& oldSetting) { m_value = oldSetting.get(); }
+
+ private:
+ T m_value;
+};
+
+class SettingChangeBase {
+ public:
+ virtual ~SettingChangeBase() {}
+ virtual void pop() = 0;
+};
+
+template <typename T>
+class SettingChange : public SettingChangeBase {
+ public:
+ SettingChange(Setting<T>* pSetting) : m_pCurSetting(pSetting) {
+ // copy old setting to save its state
+ m_oldSetting = *pSetting;
+ }
+
+ virtual void pop() { m_pCurSetting->restore(m_oldSetting); }
+
+ private:
+ Setting<T>* m_pCurSetting;
+ Setting<T> m_oldSetting;
+};
+
+template <typename T>
inline std::unique_ptr<SettingChangeBase> Setting<T>::set(const T& value) {
std::unique_ptr<SettingChangeBase> pChange(new SettingChange<T>(this));
- m_value = value;
- return pChange;
-}
-
-class SettingChanges : private noncopyable {
- public:
- SettingChanges() {}
- ~SettingChanges() { clear(); }
-
- void clear() {
- restore();
- m_settingChanges.clear();
- }
-
- void restore() {
- for (setting_changes::const_iterator it = m_settingChanges.begin();
- it != m_settingChanges.end(); ++it)
- (*it)->pop();
- }
-
+ m_value = value;
+ return pChange;
+}
+
+class SettingChanges : private noncopyable {
+ public:
+ SettingChanges() {}
+ ~SettingChanges() { clear(); }
+
+ void clear() {
+ restore();
+ m_settingChanges.clear();
+ }
+
+ void restore() {
+ for (setting_changes::const_iterator it = m_settingChanges.begin();
+ it != m_settingChanges.end(); ++it)
+ (*it)->pop();
+ }
+
void push(std::unique_ptr<SettingChangeBase> pSettingChange) {
m_settingChanges.push_back(std::move(pSettingChange));
- }
-
+ }
+
// like std::unique_ptr - assignment is transfer of ownership
SettingChanges& operator=(SettingChanges&& rhs) {
- if (this == &rhs)
- return *this;
-
- clear();
+ if (this == &rhs)
+ return *this;
+
+ clear();
std::swap(m_settingChanges, rhs.m_settingChanges);
- return *this;
- }
-
- private:
+ return *this;
+ }
+
+ private:
typedef std::vector<std::unique_ptr<SettingChangeBase>> setting_changes;
- setting_changes m_settingChanges;
-};
-}
-
-#endif // SETTING_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+ setting_changes m_settingChanges;
+};
+}
+
+#endif // SETTING_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/simplekey.cpp b/contrib/libs/yaml-cpp/src/simplekey.cpp
index 4d8835c6e6..70f56b6ae4 100644
--- a/contrib/libs/yaml-cpp/src/simplekey.cpp
+++ b/contrib/libs/yaml-cpp/src/simplekey.cpp
@@ -1,128 +1,128 @@
-#include "scanner.h"
-#include "token.h"
-
-namespace YAML {
-struct Mark;
-
-Scanner::SimpleKey::SimpleKey(const Mark& mark_, std::size_t flowLevel_)
- : mark(mark_), flowLevel(flowLevel_), pIndent(0), pMapStart(0), pKey(0) {}
-
-void Scanner::SimpleKey::Validate() {
- // Note: pIndent will *not* be garbage here;
- // we "garbage collect" them so we can
- // always refer to them
- if (pIndent)
- pIndent->status = IndentMarker::VALID;
- if (pMapStart)
- pMapStart->status = Token::VALID;
- if (pKey)
- pKey->status = Token::VALID;
-}
-
-void Scanner::SimpleKey::Invalidate() {
- if (pIndent)
- pIndent->status = IndentMarker::INVALID;
- if (pMapStart)
- pMapStart->status = Token::INVALID;
- if (pKey)
- pKey->status = Token::INVALID;
-}
-
-// CanInsertPotentialSimpleKey
-bool Scanner::CanInsertPotentialSimpleKey() const {
- if (!m_simpleKeyAllowed)
- return false;
-
- return !ExistsActiveSimpleKey();
-}
-
-// ExistsActiveSimpleKey
-// . Returns true if there's a potential simple key at our flow level
-// (there's allowed at most one per flow level, i.e., at the start of the flow
-// start token)
-bool Scanner::ExistsActiveSimpleKey() const {
- if (m_simpleKeys.empty())
- return false;
-
- const SimpleKey& key = m_simpleKeys.top();
- return key.flowLevel == GetFlowLevel();
-}
-
-// InsertPotentialSimpleKey
-// . If we can, add a potential simple key to the queue,
-// and save it on a stack.
-void Scanner::InsertPotentialSimpleKey() {
- if (!CanInsertPotentialSimpleKey())
- return;
-
- SimpleKey key(INPUT.mark(), GetFlowLevel());
-
- // first add a map start, if necessary
- if (InBlockContext()) {
- key.pIndent = PushIndentTo(INPUT.column(), IndentMarker::MAP);
- if (key.pIndent) {
- key.pIndent->status = IndentMarker::UNKNOWN;
- key.pMapStart = key.pIndent->pStartToken;
- key.pMapStart->status = Token::UNVERIFIED;
- }
- }
-
- // then add the (now unverified) key
- m_tokens.push(Token(Token::KEY, INPUT.mark()));
- key.pKey = &m_tokens.back();
- key.pKey->status = Token::UNVERIFIED;
-
- m_simpleKeys.push(key);
-}
-
-// InvalidateSimpleKey
-// . Automatically invalidate the simple key in our flow level
-void Scanner::InvalidateSimpleKey() {
- if (m_simpleKeys.empty())
- return;
-
- // grab top key
- SimpleKey& key = m_simpleKeys.top();
- if (key.flowLevel != GetFlowLevel())
- return;
-
- key.Invalidate();
- m_simpleKeys.pop();
-}
-
-// VerifySimpleKey
-// . Determines whether the latest simple key to be added is valid,
-// and if so, makes it valid.
-bool Scanner::VerifySimpleKey() {
- if (m_simpleKeys.empty())
- return false;
-
- // grab top key
- SimpleKey key = m_simpleKeys.top();
-
- // only validate if we're in the correct flow level
- if (key.flowLevel != GetFlowLevel())
- return false;
-
- m_simpleKeys.pop();
-
- bool isValid = true;
-
- // needs to be less than 1024 characters and inline
- if (INPUT.line() != key.mark.line || INPUT.pos() - key.mark.pos > 1024)
- isValid = false;
-
- // invalidate key
- if (isValid)
- key.Validate();
- else
- key.Invalidate();
-
- return isValid;
-}
-
-void Scanner::PopAllSimpleKeys() {
- while (!m_simpleKeys.empty())
- m_simpleKeys.pop();
-}
-}
+#include "scanner.h"
+#include "token.h"
+
+namespace YAML {
+struct Mark;
+
+Scanner::SimpleKey::SimpleKey(const Mark& mark_, std::size_t flowLevel_)
+ : mark(mark_), flowLevel(flowLevel_), pIndent(0), pMapStart(0), pKey(0) {}
+
+void Scanner::SimpleKey::Validate() {
+ // Note: pIndent will *not* be garbage here;
+ // we "garbage collect" them so we can
+ // always refer to them
+ if (pIndent)
+ pIndent->status = IndentMarker::VALID;
+ if (pMapStart)
+ pMapStart->status = Token::VALID;
+ if (pKey)
+ pKey->status = Token::VALID;
+}
+
+void Scanner::SimpleKey::Invalidate() {
+ if (pIndent)
+ pIndent->status = IndentMarker::INVALID;
+ if (pMapStart)
+ pMapStart->status = Token::INVALID;
+ if (pKey)
+ pKey->status = Token::INVALID;
+}
+
+// CanInsertPotentialSimpleKey
+bool Scanner::CanInsertPotentialSimpleKey() const {
+ if (!m_simpleKeyAllowed)
+ return false;
+
+ return !ExistsActiveSimpleKey();
+}
+
+// ExistsActiveSimpleKey
+// . Returns true if there's a potential simple key at our flow level
+// (there's allowed at most one per flow level, i.e., at the start of the flow
+// start token)
+bool Scanner::ExistsActiveSimpleKey() const {
+ if (m_simpleKeys.empty())
+ return false;
+
+ const SimpleKey& key = m_simpleKeys.top();
+ return key.flowLevel == GetFlowLevel();
+}
+
+// InsertPotentialSimpleKey
+// . If we can, add a potential simple key to the queue,
+// and save it on a stack.
+void Scanner::InsertPotentialSimpleKey() {
+ if (!CanInsertPotentialSimpleKey())
+ return;
+
+ SimpleKey key(INPUT.mark(), GetFlowLevel());
+
+ // first add a map start, if necessary
+ if (InBlockContext()) {
+ key.pIndent = PushIndentTo(INPUT.column(), IndentMarker::MAP);
+ if (key.pIndent) {
+ key.pIndent->status = IndentMarker::UNKNOWN;
+ key.pMapStart = key.pIndent->pStartToken;
+ key.pMapStart->status = Token::UNVERIFIED;
+ }
+ }
+
+ // then add the (now unverified) key
+ m_tokens.push(Token(Token::KEY, INPUT.mark()));
+ key.pKey = &m_tokens.back();
+ key.pKey->status = Token::UNVERIFIED;
+
+ m_simpleKeys.push(key);
+}
+
+// InvalidateSimpleKey
+// . Automatically invalidate the simple key in our flow level
+void Scanner::InvalidateSimpleKey() {
+ if (m_simpleKeys.empty())
+ return;
+
+ // grab top key
+ SimpleKey& key = m_simpleKeys.top();
+ if (key.flowLevel != GetFlowLevel())
+ return;
+
+ key.Invalidate();
+ m_simpleKeys.pop();
+}
+
+// VerifySimpleKey
+// . Determines whether the latest simple key to be added is valid,
+// and if so, makes it valid.
+bool Scanner::VerifySimpleKey() {
+ if (m_simpleKeys.empty())
+ return false;
+
+ // grab top key
+ SimpleKey key = m_simpleKeys.top();
+
+ // only validate if we're in the correct flow level
+ if (key.flowLevel != GetFlowLevel())
+ return false;
+
+ m_simpleKeys.pop();
+
+ bool isValid = true;
+
+ // needs to be less than 1024 characters and inline
+ if (INPUT.line() != key.mark.line || INPUT.pos() - key.mark.pos > 1024)
+ isValid = false;
+
+ // invalidate key
+ if (isValid)
+ key.Validate();
+ else
+ key.Invalidate();
+
+ return isValid;
+}
+
+void Scanner::PopAllSimpleKeys() {
+ while (!m_simpleKeys.empty())
+ m_simpleKeys.pop();
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/singledocparser.cpp b/contrib/libs/yaml-cpp/src/singledocparser.cpp
index bf13a4b433..a27c1c3b04 100644
--- a/contrib/libs/yaml-cpp/src/singledocparser.cpp
+++ b/contrib/libs/yaml-cpp/src/singledocparser.cpp
@@ -1,414 +1,414 @@
-#include <algorithm>
-#include <cstdio>
-#include <sstream>
-
-#include "collectionstack.h" // IWYU pragma: keep
-#include "scanner.h"
-#include "singledocparser.h"
-#include "tag.h"
-#include "token.h"
-#include "yaml-cpp/emitterstyle.h"
-#include "yaml-cpp/eventhandler.h"
-#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
-#include "yaml-cpp/mark.h"
+#include <algorithm>
+#include <cstdio>
+#include <sstream>
+
+#include "collectionstack.h" // IWYU pragma: keep
+#include "scanner.h"
+#include "singledocparser.h"
+#include "tag.h"
+#include "token.h"
+#include "yaml-cpp/emitterstyle.h"
+#include "yaml-cpp/eventhandler.h"
+#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
+#include "yaml-cpp/mark.h"
#include "yaml-cpp/null.h"
-
-namespace YAML {
-SingleDocParser::SingleDocParser(Scanner& scanner, const Directives& directives)
- : m_scanner(scanner),
- m_directives(directives),
- m_pCollectionStack(new CollectionStack),
- m_curAnchor(0) {}
-
-SingleDocParser::~SingleDocParser() {}
-
-// HandleDocument
-// . Handles the next document
-// . Throws a ParserException on error.
-void SingleDocParser::HandleDocument(EventHandler& eventHandler) {
- assert(!m_scanner.empty()); // guaranteed that there are tokens
- assert(!m_curAnchor);
-
- eventHandler.OnDocumentStart(m_scanner.peek().mark);
-
- // eat doc start
- if (m_scanner.peek().type == Token::DOC_START)
- m_scanner.pop();
-
- // recurse!
- HandleNode(eventHandler);
-
- eventHandler.OnDocumentEnd();
-
- // and finally eat any doc ends we see
- while (!m_scanner.empty() && m_scanner.peek().type == Token::DOC_END)
- m_scanner.pop();
-}
-
-void SingleDocParser::HandleNode(EventHandler& eventHandler) {
- // an empty node *is* a possibility
- if (m_scanner.empty()) {
- eventHandler.OnNull(m_scanner.mark(), NullAnchor);
- return;
- }
-
- // save location
- Mark mark = m_scanner.peek().mark;
-
- // special case: a value node by itself must be a map, with no header
- if (m_scanner.peek().type == Token::VALUE) {
- eventHandler.OnMapStart(mark, "?", NullAnchor, EmitterStyle::Default);
- HandleMap(eventHandler);
- eventHandler.OnMapEnd();
- return;
- }
-
- // special case: an alias node
- if (m_scanner.peek().type == Token::ALIAS) {
- eventHandler.OnAlias(mark, LookupAnchor(mark, m_scanner.peek().value));
- m_scanner.pop();
- return;
- }
-
- std::string tag;
- anchor_t anchor;
- ParseProperties(tag, anchor);
-
- const Token& token = m_scanner.peek();
-
+
+namespace YAML {
+SingleDocParser::SingleDocParser(Scanner& scanner, const Directives& directives)
+ : m_scanner(scanner),
+ m_directives(directives),
+ m_pCollectionStack(new CollectionStack),
+ m_curAnchor(0) {}
+
+SingleDocParser::~SingleDocParser() {}
+
+// HandleDocument
+// . Handles the next document
+// . Throws a ParserException on error.
+void SingleDocParser::HandleDocument(EventHandler& eventHandler) {
+ assert(!m_scanner.empty()); // guaranteed that there are tokens
+ assert(!m_curAnchor);
+
+ eventHandler.OnDocumentStart(m_scanner.peek().mark);
+
+ // eat doc start
+ if (m_scanner.peek().type == Token::DOC_START)
+ m_scanner.pop();
+
+ // recurse!
+ HandleNode(eventHandler);
+
+ eventHandler.OnDocumentEnd();
+
+ // and finally eat any doc ends we see
+ while (!m_scanner.empty() && m_scanner.peek().type == Token::DOC_END)
+ m_scanner.pop();
+}
+
+void SingleDocParser::HandleNode(EventHandler& eventHandler) {
+ // an empty node *is* a possibility
+ if (m_scanner.empty()) {
+ eventHandler.OnNull(m_scanner.mark(), NullAnchor);
+ return;
+ }
+
+ // save location
+ Mark mark = m_scanner.peek().mark;
+
+ // special case: a value node by itself must be a map, with no header
+ if (m_scanner.peek().type == Token::VALUE) {
+ eventHandler.OnMapStart(mark, "?", NullAnchor, EmitterStyle::Default);
+ HandleMap(eventHandler);
+ eventHandler.OnMapEnd();
+ return;
+ }
+
+ // special case: an alias node
+ if (m_scanner.peek().type == Token::ALIAS) {
+ eventHandler.OnAlias(mark, LookupAnchor(mark, m_scanner.peek().value));
+ m_scanner.pop();
+ return;
+ }
+
+ std::string tag;
+ anchor_t anchor;
+ ParseProperties(tag, anchor);
+
+ const Token& token = m_scanner.peek();
+
if (token.type == Token::PLAIN_SCALAR && IsNullString(token.value)) {
- eventHandler.OnNull(mark, anchor);
- m_scanner.pop();
- return;
- }
-
- // add non-specific tags
- if (tag.empty())
- tag = (token.type == Token::NON_PLAIN_SCALAR ? "!" : "?");
-
- // now split based on what kind of node we should be
- switch (token.type) {
- case Token::PLAIN_SCALAR:
- case Token::NON_PLAIN_SCALAR:
- eventHandler.OnScalar(mark, tag, anchor, token.value);
- m_scanner.pop();
- return;
- case Token::FLOW_SEQ_START:
- eventHandler.OnSequenceStart(mark, tag, anchor, EmitterStyle::Flow);
- HandleSequence(eventHandler);
- eventHandler.OnSequenceEnd();
- return;
- case Token::BLOCK_SEQ_START:
- eventHandler.OnSequenceStart(mark, tag, anchor, EmitterStyle::Block);
- HandleSequence(eventHandler);
- eventHandler.OnSequenceEnd();
- return;
- case Token::FLOW_MAP_START:
- eventHandler.OnMapStart(mark, tag, anchor, EmitterStyle::Flow);
- HandleMap(eventHandler);
- eventHandler.OnMapEnd();
- return;
- case Token::BLOCK_MAP_START:
- eventHandler.OnMapStart(mark, tag, anchor, EmitterStyle::Block);
- HandleMap(eventHandler);
- eventHandler.OnMapEnd();
- return;
- case Token::KEY:
- // compact maps can only go in a flow sequence
- if (m_pCollectionStack->GetCurCollectionType() ==
- CollectionType::FlowSeq) {
- eventHandler.OnMapStart(mark, tag, anchor, EmitterStyle::Flow);
- HandleMap(eventHandler);
- eventHandler.OnMapEnd();
- return;
- }
- break;
- default:
- break;
- }
-
- if (tag == "?")
- eventHandler.OnNull(mark, anchor);
- else
- eventHandler.OnScalar(mark, tag, anchor, "");
-}
-
-void SingleDocParser::HandleSequence(EventHandler& eventHandler) {
- // split based on start token
- switch (m_scanner.peek().type) {
- case Token::BLOCK_SEQ_START:
- HandleBlockSequence(eventHandler);
- break;
- case Token::FLOW_SEQ_START:
- HandleFlowSequence(eventHandler);
- break;
- default:
- break;
- }
-}
-
-void SingleDocParser::HandleBlockSequence(EventHandler& eventHandler) {
- // eat start token
- m_scanner.pop();
- m_pCollectionStack->PushCollectionType(CollectionType::BlockSeq);
-
- while (1) {
- if (m_scanner.empty())
- throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ);
-
- Token token = m_scanner.peek();
- if (token.type != Token::BLOCK_ENTRY && token.type != Token::BLOCK_SEQ_END)
- throw ParserException(token.mark, ErrorMsg::END_OF_SEQ);
-
- m_scanner.pop();
- if (token.type == Token::BLOCK_SEQ_END)
- break;
-
- // check for null
- if (!m_scanner.empty()) {
- const Token& token = m_scanner.peek();
- if (token.type == Token::BLOCK_ENTRY ||
- token.type == Token::BLOCK_SEQ_END) {
- eventHandler.OnNull(token.mark, NullAnchor);
- continue;
- }
- }
-
- HandleNode(eventHandler);
- }
-
- m_pCollectionStack->PopCollectionType(CollectionType::BlockSeq);
-}
-
-void SingleDocParser::HandleFlowSequence(EventHandler& eventHandler) {
- // eat start token
- m_scanner.pop();
- m_pCollectionStack->PushCollectionType(CollectionType::FlowSeq);
-
- while (1) {
- if (m_scanner.empty())
- throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ_FLOW);
-
- // first check for end
- if (m_scanner.peek().type == Token::FLOW_SEQ_END) {
- m_scanner.pop();
- break;
- }
-
- // then read the node
- HandleNode(eventHandler);
-
- if (m_scanner.empty())
- throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ_FLOW);
-
- // now eat the separator (or could be a sequence end, which we ignore - but
- // if it's neither, then it's a bad node)
- Token& token = m_scanner.peek();
- if (token.type == Token::FLOW_ENTRY)
- m_scanner.pop();
- else if (token.type != Token::FLOW_SEQ_END)
- throw ParserException(token.mark, ErrorMsg::END_OF_SEQ_FLOW);
- }
-
- m_pCollectionStack->PopCollectionType(CollectionType::FlowSeq);
-}
-
-void SingleDocParser::HandleMap(EventHandler& eventHandler) {
- // split based on start token
- switch (m_scanner.peek().type) {
- case Token::BLOCK_MAP_START:
- HandleBlockMap(eventHandler);
- break;
- case Token::FLOW_MAP_START:
- HandleFlowMap(eventHandler);
- break;
- case Token::KEY:
- HandleCompactMap(eventHandler);
- break;
- case Token::VALUE:
- HandleCompactMapWithNoKey(eventHandler);
- break;
- default:
- break;
- }
-}
-
-void SingleDocParser::HandleBlockMap(EventHandler& eventHandler) {
- // eat start token
- m_scanner.pop();
- m_pCollectionStack->PushCollectionType(CollectionType::BlockMap);
-
- while (1) {
- if (m_scanner.empty())
- throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP);
-
- Token token = m_scanner.peek();
- if (token.type != Token::KEY && token.type != Token::VALUE &&
- token.type != Token::BLOCK_MAP_END)
- throw ParserException(token.mark, ErrorMsg::END_OF_MAP);
-
- if (token.type == Token::BLOCK_MAP_END) {
- m_scanner.pop();
- break;
- }
-
- // grab key (if non-null)
- if (token.type == Token::KEY) {
- m_scanner.pop();
- HandleNode(eventHandler);
- } else {
- eventHandler.OnNull(token.mark, NullAnchor);
- }
-
- // now grab value (optional)
- if (!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
- m_scanner.pop();
- HandleNode(eventHandler);
- } else {
- eventHandler.OnNull(token.mark, NullAnchor);
- }
- }
-
- m_pCollectionStack->PopCollectionType(CollectionType::BlockMap);
-}
-
-void SingleDocParser::HandleFlowMap(EventHandler& eventHandler) {
- // eat start token
- m_scanner.pop();
- m_pCollectionStack->PushCollectionType(CollectionType::FlowMap);
-
- while (1) {
- if (m_scanner.empty())
- throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP_FLOW);
-
- Token& token = m_scanner.peek();
- const Mark mark = token.mark;
- // first check for end
- if (token.type == Token::FLOW_MAP_END) {
- m_scanner.pop();
- break;
- }
-
- // grab key (if non-null)
- if (token.type == Token::KEY) {
- m_scanner.pop();
- HandleNode(eventHandler);
- } else {
- eventHandler.OnNull(mark, NullAnchor);
- }
-
- // now grab value (optional)
- if (!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
- m_scanner.pop();
- HandleNode(eventHandler);
- } else {
- eventHandler.OnNull(mark, NullAnchor);
- }
-
- if (m_scanner.empty())
- throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP_FLOW);
-
- // now eat the separator (or could be a map end, which we ignore - but if
- // it's neither, then it's a bad node)
- Token& nextToken = m_scanner.peek();
- if (nextToken.type == Token::FLOW_ENTRY)
- m_scanner.pop();
- else if (nextToken.type != Token::FLOW_MAP_END)
- throw ParserException(nextToken.mark, ErrorMsg::END_OF_MAP_FLOW);
- }
-
- m_pCollectionStack->PopCollectionType(CollectionType::FlowMap);
-}
-
-// . Single "key: value" pair in a flow sequence
-void SingleDocParser::HandleCompactMap(EventHandler& eventHandler) {
- m_pCollectionStack->PushCollectionType(CollectionType::CompactMap);
-
- // grab key
- Mark mark = m_scanner.peek().mark;
- m_scanner.pop();
- HandleNode(eventHandler);
-
- // now grab value (optional)
- if (!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
- m_scanner.pop();
- HandleNode(eventHandler);
- } else {
- eventHandler.OnNull(mark, NullAnchor);
- }
-
- m_pCollectionStack->PopCollectionType(CollectionType::CompactMap);
-}
-
-// . Single ": value" pair in a flow sequence
-void SingleDocParser::HandleCompactMapWithNoKey(EventHandler& eventHandler) {
- m_pCollectionStack->PushCollectionType(CollectionType::CompactMap);
-
- // null key
- eventHandler.OnNull(m_scanner.peek().mark, NullAnchor);
-
- // grab value
- m_scanner.pop();
- HandleNode(eventHandler);
-
- m_pCollectionStack->PopCollectionType(CollectionType::CompactMap);
-}
-
-// ParseProperties
-// . Grabs any tag or anchor tokens and deals with them.
-void SingleDocParser::ParseProperties(std::string& tag, anchor_t& anchor) {
- tag.clear();
- anchor = NullAnchor;
-
- while (1) {
- if (m_scanner.empty())
- return;
-
- switch (m_scanner.peek().type) {
- case Token::TAG:
- ParseTag(tag);
- break;
- case Token::ANCHOR:
- ParseAnchor(anchor);
- break;
- default:
- return;
- }
- }
-}
-
-void SingleDocParser::ParseTag(std::string& tag) {
- Token& token = m_scanner.peek();
- if (!tag.empty())
- throw ParserException(token.mark, ErrorMsg::MULTIPLE_TAGS);
-
- Tag tagInfo(token);
- tag = tagInfo.Translate(m_directives);
- m_scanner.pop();
-}
-
-void SingleDocParser::ParseAnchor(anchor_t& anchor) {
- Token& token = m_scanner.peek();
- if (anchor)
- throw ParserException(token.mark, ErrorMsg::MULTIPLE_ANCHORS);
-
- anchor = RegisterAnchor(token.value);
- m_scanner.pop();
-}
-
-anchor_t SingleDocParser::RegisterAnchor(const std::string& name) {
- if (name.empty())
- return NullAnchor;
-
- return m_anchors[name] = ++m_curAnchor;
-}
-
-anchor_t SingleDocParser::LookupAnchor(const Mark& mark,
- const std::string& name) const {
- Anchors::const_iterator it = m_anchors.find(name);
- if (it == m_anchors.end())
- throw ParserException(mark, ErrorMsg::UNKNOWN_ANCHOR);
-
- return it->second;
-}
-}
+ eventHandler.OnNull(mark, anchor);
+ m_scanner.pop();
+ return;
+ }
+
+ // add non-specific tags
+ if (tag.empty())
+ tag = (token.type == Token::NON_PLAIN_SCALAR ? "!" : "?");
+
+ // now split based on what kind of node we should be
+ switch (token.type) {
+ case Token::PLAIN_SCALAR:
+ case Token::NON_PLAIN_SCALAR:
+ eventHandler.OnScalar(mark, tag, anchor, token.value);
+ m_scanner.pop();
+ return;
+ case Token::FLOW_SEQ_START:
+ eventHandler.OnSequenceStart(mark, tag, anchor, EmitterStyle::Flow);
+ HandleSequence(eventHandler);
+ eventHandler.OnSequenceEnd();
+ return;
+ case Token::BLOCK_SEQ_START:
+ eventHandler.OnSequenceStart(mark, tag, anchor, EmitterStyle::Block);
+ HandleSequence(eventHandler);
+ eventHandler.OnSequenceEnd();
+ return;
+ case Token::FLOW_MAP_START:
+ eventHandler.OnMapStart(mark, tag, anchor, EmitterStyle::Flow);
+ HandleMap(eventHandler);
+ eventHandler.OnMapEnd();
+ return;
+ case Token::BLOCK_MAP_START:
+ eventHandler.OnMapStart(mark, tag, anchor, EmitterStyle::Block);
+ HandleMap(eventHandler);
+ eventHandler.OnMapEnd();
+ return;
+ case Token::KEY:
+ // compact maps can only go in a flow sequence
+ if (m_pCollectionStack->GetCurCollectionType() ==
+ CollectionType::FlowSeq) {
+ eventHandler.OnMapStart(mark, tag, anchor, EmitterStyle::Flow);
+ HandleMap(eventHandler);
+ eventHandler.OnMapEnd();
+ return;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (tag == "?")
+ eventHandler.OnNull(mark, anchor);
+ else
+ eventHandler.OnScalar(mark, tag, anchor, "");
+}
+
+void SingleDocParser::HandleSequence(EventHandler& eventHandler) {
+ // split based on start token
+ switch (m_scanner.peek().type) {
+ case Token::BLOCK_SEQ_START:
+ HandleBlockSequence(eventHandler);
+ break;
+ case Token::FLOW_SEQ_START:
+ HandleFlowSequence(eventHandler);
+ break;
+ default:
+ break;
+ }
+}
+
+void SingleDocParser::HandleBlockSequence(EventHandler& eventHandler) {
+ // eat start token
+ m_scanner.pop();
+ m_pCollectionStack->PushCollectionType(CollectionType::BlockSeq);
+
+ while (1) {
+ if (m_scanner.empty())
+ throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ);
+
+ Token token = m_scanner.peek();
+ if (token.type != Token::BLOCK_ENTRY && token.type != Token::BLOCK_SEQ_END)
+ throw ParserException(token.mark, ErrorMsg::END_OF_SEQ);
+
+ m_scanner.pop();
+ if (token.type == Token::BLOCK_SEQ_END)
+ break;
+
+ // check for null
+ if (!m_scanner.empty()) {
+ const Token& token = m_scanner.peek();
+ if (token.type == Token::BLOCK_ENTRY ||
+ token.type == Token::BLOCK_SEQ_END) {
+ eventHandler.OnNull(token.mark, NullAnchor);
+ continue;
+ }
+ }
+
+ HandleNode(eventHandler);
+ }
+
+ m_pCollectionStack->PopCollectionType(CollectionType::BlockSeq);
+}
+
+void SingleDocParser::HandleFlowSequence(EventHandler& eventHandler) {
+ // eat start token
+ m_scanner.pop();
+ m_pCollectionStack->PushCollectionType(CollectionType::FlowSeq);
+
+ while (1) {
+ if (m_scanner.empty())
+ throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ_FLOW);
+
+ // first check for end
+ if (m_scanner.peek().type == Token::FLOW_SEQ_END) {
+ m_scanner.pop();
+ break;
+ }
+
+ // then read the node
+ HandleNode(eventHandler);
+
+ if (m_scanner.empty())
+ throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ_FLOW);
+
+ // now eat the separator (or could be a sequence end, which we ignore - but
+ // if it's neither, then it's a bad node)
+ Token& token = m_scanner.peek();
+ if (token.type == Token::FLOW_ENTRY)
+ m_scanner.pop();
+ else if (token.type != Token::FLOW_SEQ_END)
+ throw ParserException(token.mark, ErrorMsg::END_OF_SEQ_FLOW);
+ }
+
+ m_pCollectionStack->PopCollectionType(CollectionType::FlowSeq);
+}
+
+void SingleDocParser::HandleMap(EventHandler& eventHandler) {
+ // split based on start token
+ switch (m_scanner.peek().type) {
+ case Token::BLOCK_MAP_START:
+ HandleBlockMap(eventHandler);
+ break;
+ case Token::FLOW_MAP_START:
+ HandleFlowMap(eventHandler);
+ break;
+ case Token::KEY:
+ HandleCompactMap(eventHandler);
+ break;
+ case Token::VALUE:
+ HandleCompactMapWithNoKey(eventHandler);
+ break;
+ default:
+ break;
+ }
+}
+
+void SingleDocParser::HandleBlockMap(EventHandler& eventHandler) {
+ // eat start token
+ m_scanner.pop();
+ m_pCollectionStack->PushCollectionType(CollectionType::BlockMap);
+
+ while (1) {
+ if (m_scanner.empty())
+ throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP);
+
+ Token token = m_scanner.peek();
+ if (token.type != Token::KEY && token.type != Token::VALUE &&
+ token.type != Token::BLOCK_MAP_END)
+ throw ParserException(token.mark, ErrorMsg::END_OF_MAP);
+
+ if (token.type == Token::BLOCK_MAP_END) {
+ m_scanner.pop();
+ break;
+ }
+
+ // grab key (if non-null)
+ if (token.type == Token::KEY) {
+ m_scanner.pop();
+ HandleNode(eventHandler);
+ } else {
+ eventHandler.OnNull(token.mark, NullAnchor);
+ }
+
+ // now grab value (optional)
+ if (!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
+ m_scanner.pop();
+ HandleNode(eventHandler);
+ } else {
+ eventHandler.OnNull(token.mark, NullAnchor);
+ }
+ }
+
+ m_pCollectionStack->PopCollectionType(CollectionType::BlockMap);
+}
+
+void SingleDocParser::HandleFlowMap(EventHandler& eventHandler) {
+ // eat start token
+ m_scanner.pop();
+ m_pCollectionStack->PushCollectionType(CollectionType::FlowMap);
+
+ while (1) {
+ if (m_scanner.empty())
+ throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP_FLOW);
+
+ Token& token = m_scanner.peek();
+ const Mark mark = token.mark;
+ // first check for end
+ if (token.type == Token::FLOW_MAP_END) {
+ m_scanner.pop();
+ break;
+ }
+
+ // grab key (if non-null)
+ if (token.type == Token::KEY) {
+ m_scanner.pop();
+ HandleNode(eventHandler);
+ } else {
+ eventHandler.OnNull(mark, NullAnchor);
+ }
+
+ // now grab value (optional)
+ if (!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
+ m_scanner.pop();
+ HandleNode(eventHandler);
+ } else {
+ eventHandler.OnNull(mark, NullAnchor);
+ }
+
+ if (m_scanner.empty())
+ throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP_FLOW);
+
+ // now eat the separator (or could be a map end, which we ignore - but if
+ // it's neither, then it's a bad node)
+ Token& nextToken = m_scanner.peek();
+ if (nextToken.type == Token::FLOW_ENTRY)
+ m_scanner.pop();
+ else if (nextToken.type != Token::FLOW_MAP_END)
+ throw ParserException(nextToken.mark, ErrorMsg::END_OF_MAP_FLOW);
+ }
+
+ m_pCollectionStack->PopCollectionType(CollectionType::FlowMap);
+}
+
+// . Single "key: value" pair in a flow sequence
+void SingleDocParser::HandleCompactMap(EventHandler& eventHandler) {
+ m_pCollectionStack->PushCollectionType(CollectionType::CompactMap);
+
+ // grab key
+ Mark mark = m_scanner.peek().mark;
+ m_scanner.pop();
+ HandleNode(eventHandler);
+
+ // now grab value (optional)
+ if (!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
+ m_scanner.pop();
+ HandleNode(eventHandler);
+ } else {
+ eventHandler.OnNull(mark, NullAnchor);
+ }
+
+ m_pCollectionStack->PopCollectionType(CollectionType::CompactMap);
+}
+
+// . Single ": value" pair in a flow sequence
+void SingleDocParser::HandleCompactMapWithNoKey(EventHandler& eventHandler) {
+ m_pCollectionStack->PushCollectionType(CollectionType::CompactMap);
+
+ // null key
+ eventHandler.OnNull(m_scanner.peek().mark, NullAnchor);
+
+ // grab value
+ m_scanner.pop();
+ HandleNode(eventHandler);
+
+ m_pCollectionStack->PopCollectionType(CollectionType::CompactMap);
+}
+
+// ParseProperties
+// . Grabs any tag or anchor tokens and deals with them.
+void SingleDocParser::ParseProperties(std::string& tag, anchor_t& anchor) {
+ tag.clear();
+ anchor = NullAnchor;
+
+ while (1) {
+ if (m_scanner.empty())
+ return;
+
+ switch (m_scanner.peek().type) {
+ case Token::TAG:
+ ParseTag(tag);
+ break;
+ case Token::ANCHOR:
+ ParseAnchor(anchor);
+ break;
+ default:
+ return;
+ }
+ }
+}
+
+void SingleDocParser::ParseTag(std::string& tag) {
+ Token& token = m_scanner.peek();
+ if (!tag.empty())
+ throw ParserException(token.mark, ErrorMsg::MULTIPLE_TAGS);
+
+ Tag tagInfo(token);
+ tag = tagInfo.Translate(m_directives);
+ m_scanner.pop();
+}
+
+void SingleDocParser::ParseAnchor(anchor_t& anchor) {
+ Token& token = m_scanner.peek();
+ if (anchor)
+ throw ParserException(token.mark, ErrorMsg::MULTIPLE_ANCHORS);
+
+ anchor = RegisterAnchor(token.value);
+ m_scanner.pop();
+}
+
+anchor_t SingleDocParser::RegisterAnchor(const std::string& name) {
+ if (name.empty())
+ return NullAnchor;
+
+ return m_anchors[name] = ++m_curAnchor;
+}
+
+anchor_t SingleDocParser::LookupAnchor(const Mark& mark,
+ const std::string& name) const {
+ Anchors::const_iterator it = m_anchors.find(name);
+ if (it == m_anchors.end())
+ throw ParserException(mark, ErrorMsg::UNKNOWN_ANCHOR);
+
+ return it->second;
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/singledocparser.h b/contrib/libs/yaml-cpp/src/singledocparser.h
index 7f480d9b45..2b92067cdd 100644
--- a/contrib/libs/yaml-cpp/src/singledocparser.h
+++ b/contrib/libs/yaml-cpp/src/singledocparser.h
@@ -1,65 +1,65 @@
-#ifndef SINGLEDOCPARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define SINGLEDOCPARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <map>
-#include <memory>
-#include <string>
-
-#include "yaml-cpp/anchor.h"
-#include "yaml-cpp/noncopyable.h"
-
-namespace YAML {
-class CollectionStack;
-class EventHandler;
-class Node;
-class Scanner;
-struct Directives;
-struct Mark;
-struct Token;
-
-class SingleDocParser : private noncopyable {
- public:
- SingleDocParser(Scanner& scanner, const Directives& directives);
- ~SingleDocParser();
-
- void HandleDocument(EventHandler& eventHandler);
-
- private:
- void HandleNode(EventHandler& eventHandler);
-
- void HandleSequence(EventHandler& eventHandler);
- void HandleBlockSequence(EventHandler& eventHandler);
- void HandleFlowSequence(EventHandler& eventHandler);
-
- void HandleMap(EventHandler& eventHandler);
- void HandleBlockMap(EventHandler& eventHandler);
- void HandleFlowMap(EventHandler& eventHandler);
- void HandleCompactMap(EventHandler& eventHandler);
- void HandleCompactMapWithNoKey(EventHandler& eventHandler);
-
- void ParseProperties(std::string& tag, anchor_t& anchor);
- void ParseTag(std::string& tag);
- void ParseAnchor(anchor_t& anchor);
-
- anchor_t RegisterAnchor(const std::string& name);
- anchor_t LookupAnchor(const Mark& mark, const std::string& name) const;
-
- private:
- Scanner& m_scanner;
- const Directives& m_directives;
+#ifndef SINGLEDOCPARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define SINGLEDOCPARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <map>
+#include <memory>
+#include <string>
+
+#include "yaml-cpp/anchor.h"
+#include "yaml-cpp/noncopyable.h"
+
+namespace YAML {
+class CollectionStack;
+class EventHandler;
+class Node;
+class Scanner;
+struct Directives;
+struct Mark;
+struct Token;
+
+class SingleDocParser : private noncopyable {
+ public:
+ SingleDocParser(Scanner& scanner, const Directives& directives);
+ ~SingleDocParser();
+
+ void HandleDocument(EventHandler& eventHandler);
+
+ private:
+ void HandleNode(EventHandler& eventHandler);
+
+ void HandleSequence(EventHandler& eventHandler);
+ void HandleBlockSequence(EventHandler& eventHandler);
+ void HandleFlowSequence(EventHandler& eventHandler);
+
+ void HandleMap(EventHandler& eventHandler);
+ void HandleBlockMap(EventHandler& eventHandler);
+ void HandleFlowMap(EventHandler& eventHandler);
+ void HandleCompactMap(EventHandler& eventHandler);
+ void HandleCompactMapWithNoKey(EventHandler& eventHandler);
+
+ void ParseProperties(std::string& tag, anchor_t& anchor);
+ void ParseTag(std::string& tag);
+ void ParseAnchor(anchor_t& anchor);
+
+ anchor_t RegisterAnchor(const std::string& name);
+ anchor_t LookupAnchor(const Mark& mark, const std::string& name) const;
+
+ private:
+ Scanner& m_scanner;
+ const Directives& m_directives;
std::unique_ptr<CollectionStack> m_pCollectionStack;
-
- typedef std::map<std::string, anchor_t> Anchors;
- Anchors m_anchors;
-
- anchor_t m_curAnchor;
-};
-}
-
-#endif // SINGLEDOCPARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+ typedef std::map<std::string, anchor_t> Anchors;
+ Anchors m_anchors;
+
+ anchor_t m_curAnchor;
+};
+}
+
+#endif // SINGLEDOCPARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/stream.cpp b/contrib/libs/yaml-cpp/src/stream.cpp
index 070eda1ad9..3b013cfa7d 100644
--- a/contrib/libs/yaml-cpp/src/stream.cpp
+++ b/contrib/libs/yaml-cpp/src/stream.cpp
@@ -1,448 +1,448 @@
-#include <iostream>
-
-#include "stream.h"
-
-#ifndef YAML_PREFETCH_SIZE
-#define YAML_PREFETCH_SIZE 2048
-#endif
-
-#define S_ARRAY_SIZE(A) (sizeof(A) / sizeof(*(A)))
-#define S_ARRAY_END(A) ((A) + S_ARRAY_SIZE(A))
-
-#define CP_REPLACEMENT_CHARACTER (0xFFFD)
-
-namespace YAML {
-enum UtfIntroState {
- uis_start,
- uis_utfbe_b1,
- uis_utf32be_b2,
- uis_utf32be_bom3,
- uis_utf32be,
- uis_utf16be,
- uis_utf16be_bom1,
- uis_utfle_bom1,
- uis_utf16le_bom2,
- uis_utf32le_bom3,
- uis_utf16le,
- uis_utf32le,
- uis_utf8_imp,
- uis_utf16le_imp,
- uis_utf32le_imp3,
- uis_utf8_bom1,
- uis_utf8_bom2,
- uis_utf8,
- uis_error
-};
-
-enum UtfIntroCharType {
- uict00,
- uictBB,
- uictBF,
- uictEF,
- uictFE,
- uictFF,
- uictAscii,
- uictOther,
- uictMax
-};
-
-static bool s_introFinalState[] = {
- false, // uis_start
- false, // uis_utfbe_b1
- false, // uis_utf32be_b2
- false, // uis_utf32be_bom3
- true, // uis_utf32be
- true, // uis_utf16be
- false, // uis_utf16be_bom1
- false, // uis_utfle_bom1
- false, // uis_utf16le_bom2
- false, // uis_utf32le_bom3
- true, // uis_utf16le
- true, // uis_utf32le
- false, // uis_utf8_imp
- false, // uis_utf16le_imp
- false, // uis_utf32le_imp3
- false, // uis_utf8_bom1
- false, // uis_utf8_bom2
- true, // uis_utf8
- true, // uis_error
-};
-
-static UtfIntroState s_introTransitions[][uictMax] = {
- // uict00, uictBB, uictBF, uictEF,
- // uictFE, uictFF, uictAscii, uictOther
- {uis_utfbe_b1, uis_utf8, uis_utf8, uis_utf8_bom1, uis_utf16be_bom1,
- uis_utfle_bom1, uis_utf8_imp, uis_utf8},
- {uis_utf32be_b2, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8,
- uis_utf16be, uis_utf8},
- {uis_utf32be, uis_utf8, uis_utf8, uis_utf8, uis_utf32be_bom3, uis_utf8,
- uis_utf8, uis_utf8},
- {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf32be, uis_utf8,
- uis_utf8},
- {uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be,
- uis_utf32be, uis_utf32be, uis_utf32be},
- {uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be,
- uis_utf16be, uis_utf16be, uis_utf16be},
- {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16be, uis_utf8,
- uis_utf8},
- {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16le_bom2, uis_utf8,
- uis_utf8, uis_utf8},
- {uis_utf32le_bom3, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le,
- uis_utf16le, uis_utf16le, uis_utf16le},
- {uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le,
- uis_utf16le, uis_utf16le, uis_utf16le},
- {uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le,
- uis_utf16le, uis_utf16le, uis_utf16le},
- {uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le,
- uis_utf32le, uis_utf32le, uis_utf32le},
- {uis_utf16le_imp, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8,
- uis_utf8, uis_utf8},
- {uis_utf32le_imp3, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le,
- uis_utf16le, uis_utf16le, uis_utf16le},
- {uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le,
- uis_utf16le, uis_utf16le, uis_utf16le},
- {uis_utf8, uis_utf8_bom2, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8,
- uis_utf8},
- {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8,
- uis_utf8},
- {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8,
- uis_utf8},
-};
-
-static char s_introUngetCount[][uictMax] = {
- // uict00, uictBB, uictBF, uictEF, uictFE, uictFF, uictAscii, uictOther
- {0, 1, 1, 0, 0, 0, 0, 1},
- {0, 2, 2, 2, 2, 2, 2, 2},
- {3, 3, 3, 3, 0, 3, 3, 3},
- {4, 4, 4, 4, 4, 0, 4, 4},
- {1, 1, 1, 1, 1, 1, 1, 1},
- {1, 1, 1, 1, 1, 1, 1, 1},
- {2, 2, 2, 2, 2, 0, 2, 2},
- {2, 2, 2, 2, 0, 2, 2, 2},
- {0, 1, 1, 1, 1, 1, 1, 1},
- {0, 2, 2, 2, 2, 2, 2, 2},
- {1, 1, 1, 1, 1, 1, 1, 1},
- {1, 1, 1, 1, 1, 1, 1, 1},
- {0, 2, 2, 2, 2, 2, 2, 2},
- {0, 3, 3, 3, 3, 3, 3, 3},
- {4, 4, 4, 4, 4, 4, 4, 4},
- {2, 0, 2, 2, 2, 2, 2, 2},
- {3, 3, 0, 3, 3, 3, 3, 3},
- {1, 1, 1, 1, 1, 1, 1, 1},
-};
-
-inline UtfIntroCharType IntroCharTypeOf(std::istream::int_type ch) {
- if (std::istream::traits_type::eof() == ch) {
- return uictOther;
- }
-
- switch (ch) {
- case 0:
- return uict00;
- case 0xBB:
- return uictBB;
- case 0xBF:
- return uictBF;
- case 0xEF:
- return uictEF;
- case 0xFE:
- return uictFE;
- case 0xFF:
- return uictFF;
- }
-
- if ((ch > 0) && (ch < 0xFF)) {
- return uictAscii;
- }
-
- return uictOther;
-}
-
-inline char Utf8Adjust(unsigned long ch, unsigned char lead_bits,
- unsigned char rshift) {
- const unsigned char header = ((1 << lead_bits) - 1) << (8 - lead_bits);
- const unsigned char mask = (0xFF >> (lead_bits + 1));
- return static_cast<char>(
- static_cast<unsigned char>(header | ((ch >> rshift) & mask)));
-}
-
-inline void QueueUnicodeCodepoint(std::deque<char>& q, unsigned long ch) {
- // We are not allowed to queue the Stream::eof() codepoint, so
- // replace it with CP_REPLACEMENT_CHARACTER
- if (static_cast<unsigned long>(Stream::eof()) == ch) {
- ch = CP_REPLACEMENT_CHARACTER;
- }
-
- if (ch < 0x80) {
- q.push_back(Utf8Adjust(ch, 0, 0));
- } else if (ch < 0x800) {
- q.push_back(Utf8Adjust(ch, 2, 6));
- q.push_back(Utf8Adjust(ch, 1, 0));
- } else if (ch < 0x10000) {
- q.push_back(Utf8Adjust(ch, 3, 12));
- q.push_back(Utf8Adjust(ch, 1, 6));
- q.push_back(Utf8Adjust(ch, 1, 0));
- } else {
- q.push_back(Utf8Adjust(ch, 4, 18));
- q.push_back(Utf8Adjust(ch, 1, 12));
- q.push_back(Utf8Adjust(ch, 1, 6));
- q.push_back(Utf8Adjust(ch, 1, 0));
- }
-}
-
-Stream::Stream(std::istream& input)
- : m_input(input),
- m_pPrefetched(new unsigned char[YAML_PREFETCH_SIZE]),
- m_nPrefetchedAvailable(0),
- m_nPrefetchedUsed(0) {
- typedef std::istream::traits_type char_traits;
-
- if (!input)
- return;
-
- // Determine (or guess) the character-set by reading the BOM, if any. See
- // the YAML specification for the determination algorithm.
- char_traits::int_type intro[4];
- int nIntroUsed = 0;
- UtfIntroState state = uis_start;
- for (; !s_introFinalState[state];) {
- std::istream::int_type ch = input.get();
- intro[nIntroUsed++] = ch;
- UtfIntroCharType charType = IntroCharTypeOf(ch);
- UtfIntroState newState = s_introTransitions[state][charType];
- int nUngets = s_introUngetCount[state][charType];
- if (nUngets > 0) {
- input.clear();
- for (; nUngets > 0; --nUngets) {
- if (char_traits::eof() != intro[--nIntroUsed])
- input.putback(char_traits::to_char_type(intro[nIntroUsed]));
- }
- }
- state = newState;
- }
-
- switch (state) {
- case uis_utf8:
- m_charSet = utf8;
- break;
- case uis_utf16le:
- m_charSet = utf16le;
- break;
- case uis_utf16be:
- m_charSet = utf16be;
- break;
- case uis_utf32le:
- m_charSet = utf32le;
- break;
- case uis_utf32be:
- m_charSet = utf32be;
- break;
- default:
- m_charSet = utf8;
- break;
- }
-
- ReadAheadTo(0);
-}
-
-Stream::~Stream() { delete[] m_pPrefetched; }
-
-char Stream::peek() const {
- if (m_readahead.empty()) {
- return Stream::eof();
- }
-
- return m_readahead[0];
-}
-
-Stream::operator bool() const {
- return m_input.good() ||
- (!m_readahead.empty() && m_readahead[0] != Stream::eof());
-}
-
-// get
-// . Extracts a character from the stream and updates our position
-char Stream::get() {
- char ch = peek();
- AdvanceCurrent();
- m_mark.column++;
-
- if (ch == '\n') {
- m_mark.column = 0;
- m_mark.line++;
- }
-
- return ch;
-}
-
-// get
-// . Extracts 'n' characters from the stream and updates our position
-std::string Stream::get(int n) {
- std::string ret;
- ret.reserve(n);
- for (int i = 0; i < n; i++)
- ret += get();
- return ret;
-}
-
-// eat
-// . Eats 'n' characters and updates our position.
-void Stream::eat(int n) {
- for (int i = 0; i < n; i++)
- get();
-}
-
-void Stream::AdvanceCurrent() {
- if (!m_readahead.empty()) {
- m_readahead.pop_front();
- m_mark.pos++;
- }
-
- ReadAheadTo(0);
-}
-
-bool Stream::_ReadAheadTo(size_t i) const {
- while (m_input.good() && (m_readahead.size() <= i)) {
- switch (m_charSet) {
- case utf8:
- StreamInUtf8();
- break;
- case utf16le:
- StreamInUtf16();
- break;
- case utf16be:
- StreamInUtf16();
- break;
- case utf32le:
- StreamInUtf32();
- break;
- case utf32be:
- StreamInUtf32();
- break;
- }
- }
-
- // signal end of stream
- if (!m_input.good())
- m_readahead.push_back(Stream::eof());
-
- return m_readahead.size() > i;
-}
-
-void Stream::StreamInUtf8() const {
- unsigned char b = GetNextByte();
- if (m_input.good()) {
- m_readahead.push_back(b);
- }
-}
-
-void Stream::StreamInUtf16() const {
- unsigned long ch = 0;
- unsigned char bytes[2];
- int nBigEnd = (m_charSet == utf16be) ? 0 : 1;
-
- bytes[0] = GetNextByte();
- bytes[1] = GetNextByte();
- if (!m_input.good()) {
- return;
- }
- ch = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) |
- static_cast<unsigned long>(bytes[1 ^ nBigEnd]);
-
- if (ch >= 0xDC00 && ch < 0xE000) {
- // Trailing (low) surrogate...ugh, wrong order
- QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
- return;
- } else if (ch >= 0xD800 && ch < 0xDC00) {
- // ch is a leading (high) surrogate
-
- // Four byte UTF-8 code point
-
- // Read the trailing (low) surrogate
- for (;;) {
- bytes[0] = GetNextByte();
- bytes[1] = GetNextByte();
- if (!m_input.good()) {
- QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
- return;
- }
- unsigned long chLow = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) |
- static_cast<unsigned long>(bytes[1 ^ nBigEnd]);
- if (chLow < 0xDC00 || chLow >= 0xE000) {
- // Trouble...not a low surrogate. Dump a REPLACEMENT CHARACTER into the
- // stream.
- QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
-
- // Deal with the next UTF-16 unit
- if (chLow < 0xD800 || chLow >= 0xE000) {
- // Easiest case: queue the codepoint and return
- QueueUnicodeCodepoint(m_readahead, ch);
- return;
- } else {
- // Start the loop over with the new high surrogate
- ch = chLow;
- continue;
- }
- }
-
- // Select the payload bits from the high surrogate
- ch &= 0x3FF;
- ch <<= 10;
-
- // Include bits from low surrogate
- ch |= (chLow & 0x3FF);
-
- // Add the surrogacy offset
- ch += 0x10000;
- break;
- }
- }
-
- QueueUnicodeCodepoint(m_readahead, ch);
-}
-
-inline char* ReadBuffer(unsigned char* pBuffer) {
- return reinterpret_cast<char*>(pBuffer);
-}
-
-unsigned char Stream::GetNextByte() const {
- if (m_nPrefetchedUsed >= m_nPrefetchedAvailable) {
- std::streambuf* pBuf = m_input.rdbuf();
- m_nPrefetchedAvailable = static_cast<std::size_t>(
- pBuf->sgetn(ReadBuffer(m_pPrefetched), YAML_PREFETCH_SIZE));
- m_nPrefetchedUsed = 0;
- if (!m_nPrefetchedAvailable) {
- m_input.setstate(std::ios_base::eofbit);
- }
-
- if (0 == m_nPrefetchedAvailable) {
- return 0;
- }
- }
-
- return m_pPrefetched[m_nPrefetchedUsed++];
-}
-
-void Stream::StreamInUtf32() const {
- static int indexes[2][4] = {{3, 2, 1, 0}, {0, 1, 2, 3}};
-
- unsigned long ch = 0;
- unsigned char bytes[4];
- int* pIndexes = (m_charSet == utf32be) ? indexes[1] : indexes[0];
-
- bytes[0] = GetNextByte();
- bytes[1] = GetNextByte();
- bytes[2] = GetNextByte();
- bytes[3] = GetNextByte();
- if (!m_input.good()) {
- return;
- }
-
- for (int i = 0; i < 4; ++i) {
- ch <<= 8;
- ch |= bytes[pIndexes[i]];
- }
-
- QueueUnicodeCodepoint(m_readahead, ch);
-}
-}
+#include <iostream>
+
+#include "stream.h"
+
+#ifndef YAML_PREFETCH_SIZE
+#define YAML_PREFETCH_SIZE 2048
+#endif
+
+#define S_ARRAY_SIZE(A) (sizeof(A) / sizeof(*(A)))
+#define S_ARRAY_END(A) ((A) + S_ARRAY_SIZE(A))
+
+#define CP_REPLACEMENT_CHARACTER (0xFFFD)
+
+namespace YAML {
+enum UtfIntroState {
+ uis_start,
+ uis_utfbe_b1,
+ uis_utf32be_b2,
+ uis_utf32be_bom3,
+ uis_utf32be,
+ uis_utf16be,
+ uis_utf16be_bom1,
+ uis_utfle_bom1,
+ uis_utf16le_bom2,
+ uis_utf32le_bom3,
+ uis_utf16le,
+ uis_utf32le,
+ uis_utf8_imp,
+ uis_utf16le_imp,
+ uis_utf32le_imp3,
+ uis_utf8_bom1,
+ uis_utf8_bom2,
+ uis_utf8,
+ uis_error
+};
+
+enum UtfIntroCharType {
+ uict00,
+ uictBB,
+ uictBF,
+ uictEF,
+ uictFE,
+ uictFF,
+ uictAscii,
+ uictOther,
+ uictMax
+};
+
+static bool s_introFinalState[] = {
+ false, // uis_start
+ false, // uis_utfbe_b1
+ false, // uis_utf32be_b2
+ false, // uis_utf32be_bom3
+ true, // uis_utf32be
+ true, // uis_utf16be
+ false, // uis_utf16be_bom1
+ false, // uis_utfle_bom1
+ false, // uis_utf16le_bom2
+ false, // uis_utf32le_bom3
+ true, // uis_utf16le
+ true, // uis_utf32le
+ false, // uis_utf8_imp
+ false, // uis_utf16le_imp
+ false, // uis_utf32le_imp3
+ false, // uis_utf8_bom1
+ false, // uis_utf8_bom2
+ true, // uis_utf8
+ true, // uis_error
+};
+
+static UtfIntroState s_introTransitions[][uictMax] = {
+ // uict00, uictBB, uictBF, uictEF,
+ // uictFE, uictFF, uictAscii, uictOther
+ {uis_utfbe_b1, uis_utf8, uis_utf8, uis_utf8_bom1, uis_utf16be_bom1,
+ uis_utfle_bom1, uis_utf8_imp, uis_utf8},
+ {uis_utf32be_b2, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8,
+ uis_utf16be, uis_utf8},
+ {uis_utf32be, uis_utf8, uis_utf8, uis_utf8, uis_utf32be_bom3, uis_utf8,
+ uis_utf8, uis_utf8},
+ {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf32be, uis_utf8,
+ uis_utf8},
+ {uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be,
+ uis_utf32be, uis_utf32be, uis_utf32be},
+ {uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be,
+ uis_utf16be, uis_utf16be, uis_utf16be},
+ {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16be, uis_utf8,
+ uis_utf8},
+ {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16le_bom2, uis_utf8,
+ uis_utf8, uis_utf8},
+ {uis_utf32le_bom3, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le,
+ uis_utf16le, uis_utf16le, uis_utf16le},
+ {uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le,
+ uis_utf16le, uis_utf16le, uis_utf16le},
+ {uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le,
+ uis_utf16le, uis_utf16le, uis_utf16le},
+ {uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le,
+ uis_utf32le, uis_utf32le, uis_utf32le},
+ {uis_utf16le_imp, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8,
+ uis_utf8, uis_utf8},
+ {uis_utf32le_imp3, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le,
+ uis_utf16le, uis_utf16le, uis_utf16le},
+ {uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le,
+ uis_utf16le, uis_utf16le, uis_utf16le},
+ {uis_utf8, uis_utf8_bom2, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8,
+ uis_utf8},
+ {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8,
+ uis_utf8},
+ {uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8,
+ uis_utf8},
+};
+
+static char s_introUngetCount[][uictMax] = {
+ // uict00, uictBB, uictBF, uictEF, uictFE, uictFF, uictAscii, uictOther
+ {0, 1, 1, 0, 0, 0, 0, 1},
+ {0, 2, 2, 2, 2, 2, 2, 2},
+ {3, 3, 3, 3, 0, 3, 3, 3},
+ {4, 4, 4, 4, 4, 0, 4, 4},
+ {1, 1, 1, 1, 1, 1, 1, 1},
+ {1, 1, 1, 1, 1, 1, 1, 1},
+ {2, 2, 2, 2, 2, 0, 2, 2},
+ {2, 2, 2, 2, 0, 2, 2, 2},
+ {0, 1, 1, 1, 1, 1, 1, 1},
+ {0, 2, 2, 2, 2, 2, 2, 2},
+ {1, 1, 1, 1, 1, 1, 1, 1},
+ {1, 1, 1, 1, 1, 1, 1, 1},
+ {0, 2, 2, 2, 2, 2, 2, 2},
+ {0, 3, 3, 3, 3, 3, 3, 3},
+ {4, 4, 4, 4, 4, 4, 4, 4},
+ {2, 0, 2, 2, 2, 2, 2, 2},
+ {3, 3, 0, 3, 3, 3, 3, 3},
+ {1, 1, 1, 1, 1, 1, 1, 1},
+};
+
+inline UtfIntroCharType IntroCharTypeOf(std::istream::int_type ch) {
+ if (std::istream::traits_type::eof() == ch) {
+ return uictOther;
+ }
+
+ switch (ch) {
+ case 0:
+ return uict00;
+ case 0xBB:
+ return uictBB;
+ case 0xBF:
+ return uictBF;
+ case 0xEF:
+ return uictEF;
+ case 0xFE:
+ return uictFE;
+ case 0xFF:
+ return uictFF;
+ }
+
+ if ((ch > 0) && (ch < 0xFF)) {
+ return uictAscii;
+ }
+
+ return uictOther;
+}
+
+inline char Utf8Adjust(unsigned long ch, unsigned char lead_bits,
+ unsigned char rshift) {
+ const unsigned char header = ((1 << lead_bits) - 1) << (8 - lead_bits);
+ const unsigned char mask = (0xFF >> (lead_bits + 1));
+ return static_cast<char>(
+ static_cast<unsigned char>(header | ((ch >> rshift) & mask)));
+}
+
+inline void QueueUnicodeCodepoint(std::deque<char>& q, unsigned long ch) {
+ // We are not allowed to queue the Stream::eof() codepoint, so
+ // replace it with CP_REPLACEMENT_CHARACTER
+ if (static_cast<unsigned long>(Stream::eof()) == ch) {
+ ch = CP_REPLACEMENT_CHARACTER;
+ }
+
+ if (ch < 0x80) {
+ q.push_back(Utf8Adjust(ch, 0, 0));
+ } else if (ch < 0x800) {
+ q.push_back(Utf8Adjust(ch, 2, 6));
+ q.push_back(Utf8Adjust(ch, 1, 0));
+ } else if (ch < 0x10000) {
+ q.push_back(Utf8Adjust(ch, 3, 12));
+ q.push_back(Utf8Adjust(ch, 1, 6));
+ q.push_back(Utf8Adjust(ch, 1, 0));
+ } else {
+ q.push_back(Utf8Adjust(ch, 4, 18));
+ q.push_back(Utf8Adjust(ch, 1, 12));
+ q.push_back(Utf8Adjust(ch, 1, 6));
+ q.push_back(Utf8Adjust(ch, 1, 0));
+ }
+}
+
+Stream::Stream(std::istream& input)
+ : m_input(input),
+ m_pPrefetched(new unsigned char[YAML_PREFETCH_SIZE]),
+ m_nPrefetchedAvailable(0),
+ m_nPrefetchedUsed(0) {
+ typedef std::istream::traits_type char_traits;
+
+ if (!input)
+ return;
+
+ // Determine (or guess) the character-set by reading the BOM, if any. See
+ // the YAML specification for the determination algorithm.
+ char_traits::int_type intro[4];
+ int nIntroUsed = 0;
+ UtfIntroState state = uis_start;
+ for (; !s_introFinalState[state];) {
+ std::istream::int_type ch = input.get();
+ intro[nIntroUsed++] = ch;
+ UtfIntroCharType charType = IntroCharTypeOf(ch);
+ UtfIntroState newState = s_introTransitions[state][charType];
+ int nUngets = s_introUngetCount[state][charType];
+ if (nUngets > 0) {
+ input.clear();
+ for (; nUngets > 0; --nUngets) {
+ if (char_traits::eof() != intro[--nIntroUsed])
+ input.putback(char_traits::to_char_type(intro[nIntroUsed]));
+ }
+ }
+ state = newState;
+ }
+
+ switch (state) {
+ case uis_utf8:
+ m_charSet = utf8;
+ break;
+ case uis_utf16le:
+ m_charSet = utf16le;
+ break;
+ case uis_utf16be:
+ m_charSet = utf16be;
+ break;
+ case uis_utf32le:
+ m_charSet = utf32le;
+ break;
+ case uis_utf32be:
+ m_charSet = utf32be;
+ break;
+ default:
+ m_charSet = utf8;
+ break;
+ }
+
+ ReadAheadTo(0);
+}
+
+Stream::~Stream() { delete[] m_pPrefetched; }
+
+char Stream::peek() const {
+ if (m_readahead.empty()) {
+ return Stream::eof();
+ }
+
+ return m_readahead[0];
+}
+
+Stream::operator bool() const {
+ return m_input.good() ||
+ (!m_readahead.empty() && m_readahead[0] != Stream::eof());
+}
+
+// get
+// . Extracts a character from the stream and updates our position
+char Stream::get() {
+ char ch = peek();
+ AdvanceCurrent();
+ m_mark.column++;
+
+ if (ch == '\n') {
+ m_mark.column = 0;
+ m_mark.line++;
+ }
+
+ return ch;
+}
+
+// get
+// . Extracts 'n' characters from the stream and updates our position
+std::string Stream::get(int n) {
+ std::string ret;
+ ret.reserve(n);
+ for (int i = 0; i < n; i++)
+ ret += get();
+ return ret;
+}
+
+// eat
+// . Eats 'n' characters and updates our position.
+void Stream::eat(int n) {
+ for (int i = 0; i < n; i++)
+ get();
+}
+
+void Stream::AdvanceCurrent() {
+ if (!m_readahead.empty()) {
+ m_readahead.pop_front();
+ m_mark.pos++;
+ }
+
+ ReadAheadTo(0);
+}
+
+bool Stream::_ReadAheadTo(size_t i) const {
+ while (m_input.good() && (m_readahead.size() <= i)) {
+ switch (m_charSet) {
+ case utf8:
+ StreamInUtf8();
+ break;
+ case utf16le:
+ StreamInUtf16();
+ break;
+ case utf16be:
+ StreamInUtf16();
+ break;
+ case utf32le:
+ StreamInUtf32();
+ break;
+ case utf32be:
+ StreamInUtf32();
+ break;
+ }
+ }
+
+ // signal end of stream
+ if (!m_input.good())
+ m_readahead.push_back(Stream::eof());
+
+ return m_readahead.size() > i;
+}
+
+void Stream::StreamInUtf8() const {
+ unsigned char b = GetNextByte();
+ if (m_input.good()) {
+ m_readahead.push_back(b);
+ }
+}
+
+void Stream::StreamInUtf16() const {
+ unsigned long ch = 0;
+ unsigned char bytes[2];
+ int nBigEnd = (m_charSet == utf16be) ? 0 : 1;
+
+ bytes[0] = GetNextByte();
+ bytes[1] = GetNextByte();
+ if (!m_input.good()) {
+ return;
+ }
+ ch = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) |
+ static_cast<unsigned long>(bytes[1 ^ nBigEnd]);
+
+ if (ch >= 0xDC00 && ch < 0xE000) {
+ // Trailing (low) surrogate...ugh, wrong order
+ QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
+ return;
+ } else if (ch >= 0xD800 && ch < 0xDC00) {
+ // ch is a leading (high) surrogate
+
+ // Four byte UTF-8 code point
+
+ // Read the trailing (low) surrogate
+ for (;;) {
+ bytes[0] = GetNextByte();
+ bytes[1] = GetNextByte();
+ if (!m_input.good()) {
+ QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
+ return;
+ }
+ unsigned long chLow = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) |
+ static_cast<unsigned long>(bytes[1 ^ nBigEnd]);
+ if (chLow < 0xDC00 || chLow >= 0xE000) {
+ // Trouble...not a low surrogate. Dump a REPLACEMENT CHARACTER into the
+ // stream.
+ QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
+
+ // Deal with the next UTF-16 unit
+ if (chLow < 0xD800 || chLow >= 0xE000) {
+ // Easiest case: queue the codepoint and return
+ QueueUnicodeCodepoint(m_readahead, ch);
+ return;
+ } else {
+ // Start the loop over with the new high surrogate
+ ch = chLow;
+ continue;
+ }
+ }
+
+ // Select the payload bits from the high surrogate
+ ch &= 0x3FF;
+ ch <<= 10;
+
+ // Include bits from low surrogate
+ ch |= (chLow & 0x3FF);
+
+ // Add the surrogacy offset
+ ch += 0x10000;
+ break;
+ }
+ }
+
+ QueueUnicodeCodepoint(m_readahead, ch);
+}
+
+inline char* ReadBuffer(unsigned char* pBuffer) {
+ return reinterpret_cast<char*>(pBuffer);
+}
+
+unsigned char Stream::GetNextByte() const {
+ if (m_nPrefetchedUsed >= m_nPrefetchedAvailable) {
+ std::streambuf* pBuf = m_input.rdbuf();
+ m_nPrefetchedAvailable = static_cast<std::size_t>(
+ pBuf->sgetn(ReadBuffer(m_pPrefetched), YAML_PREFETCH_SIZE));
+ m_nPrefetchedUsed = 0;
+ if (!m_nPrefetchedAvailable) {
+ m_input.setstate(std::ios_base::eofbit);
+ }
+
+ if (0 == m_nPrefetchedAvailable) {
+ return 0;
+ }
+ }
+
+ return m_pPrefetched[m_nPrefetchedUsed++];
+}
+
+void Stream::StreamInUtf32() const {
+ static int indexes[2][4] = {{3, 2, 1, 0}, {0, 1, 2, 3}};
+
+ unsigned long ch = 0;
+ unsigned char bytes[4];
+ int* pIndexes = (m_charSet == utf32be) ? indexes[1] : indexes[0];
+
+ bytes[0] = GetNextByte();
+ bytes[1] = GetNextByte();
+ bytes[2] = GetNextByte();
+ bytes[3] = GetNextByte();
+ if (!m_input.good()) {
+ return;
+ }
+
+ for (int i = 0; i < 4; ++i) {
+ ch <<= 8;
+ ch |= bytes[pIndexes[i]];
+ }
+
+ QueueUnicodeCodepoint(m_readahead, ch);
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/stream.h b/contrib/libs/yaml-cpp/src/stream.h
index 410051b833..42d542d5b1 100644
--- a/contrib/libs/yaml-cpp/src/stream.h
+++ b/contrib/libs/yaml-cpp/src/stream.h
@@ -1,76 +1,76 @@
-#ifndef STREAM_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define STREAM_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "yaml-cpp/noncopyable.h"
-#include "yaml-cpp/mark.h"
-#include <cstddef>
-#include <deque>
-#include <ios>
-#include <iostream>
-#include <set>
-#include <string>
-
-namespace YAML {
-class Stream : private noncopyable {
- public:
- friend class StreamCharSource;
-
- Stream(std::istream& input);
- ~Stream();
-
- operator bool() const;
- bool operator!() const { return !static_cast<bool>(*this); }
-
- char peek() const;
- char get();
- std::string get(int n);
- void eat(int n = 1);
-
- static char eof() { return 0x04; }
-
- const Mark mark() const { return m_mark; }
- int pos() const { return m_mark.pos; }
- int line() const { return m_mark.line; }
- int column() const { return m_mark.column; }
- void ResetColumn() { m_mark.column = 0; }
-
- private:
- enum CharacterSet { utf8, utf16le, utf16be, utf32le, utf32be };
-
- std::istream& m_input;
- Mark m_mark;
-
- CharacterSet m_charSet;
- mutable std::deque<char> m_readahead;
- unsigned char* const m_pPrefetched;
- mutable size_t m_nPrefetchedAvailable;
- mutable size_t m_nPrefetchedUsed;
-
- void AdvanceCurrent();
- char CharAt(size_t i) const;
- bool ReadAheadTo(size_t i) const;
- bool _ReadAheadTo(size_t i) const;
- void StreamInUtf8() const;
- void StreamInUtf16() const;
- void StreamInUtf32() const;
- unsigned char GetNextByte() const;
-};
-
-// CharAt
-// . Unchecked access
-inline char Stream::CharAt(size_t i) const { return m_readahead[i]; }
-
-inline bool Stream::ReadAheadTo(size_t i) const {
- if (m_readahead.size() > i)
- return true;
- return _ReadAheadTo(i);
-}
-}
-
-#endif // STREAM_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef STREAM_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define STREAM_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/noncopyable.h"
+#include "yaml-cpp/mark.h"
+#include <cstddef>
+#include <deque>
+#include <ios>
+#include <iostream>
+#include <set>
+#include <string>
+
+namespace YAML {
+class Stream : private noncopyable {
+ public:
+ friend class StreamCharSource;
+
+ Stream(std::istream& input);
+ ~Stream();
+
+ operator bool() const;
+ bool operator!() const { return !static_cast<bool>(*this); }
+
+ char peek() const;
+ char get();
+ std::string get(int n);
+ void eat(int n = 1);
+
+ static char eof() { return 0x04; }
+
+ const Mark mark() const { return m_mark; }
+ int pos() const { return m_mark.pos; }
+ int line() const { return m_mark.line; }
+ int column() const { return m_mark.column; }
+ void ResetColumn() { m_mark.column = 0; }
+
+ private:
+ enum CharacterSet { utf8, utf16le, utf16be, utf32le, utf32be };
+
+ std::istream& m_input;
+ Mark m_mark;
+
+ CharacterSet m_charSet;
+ mutable std::deque<char> m_readahead;
+ unsigned char* const m_pPrefetched;
+ mutable size_t m_nPrefetchedAvailable;
+ mutable size_t m_nPrefetchedUsed;
+
+ void AdvanceCurrent();
+ char CharAt(size_t i) const;
+ bool ReadAheadTo(size_t i) const;
+ bool _ReadAheadTo(size_t i) const;
+ void StreamInUtf8() const;
+ void StreamInUtf16() const;
+ void StreamInUtf32() const;
+ unsigned char GetNextByte() const;
+};
+
+// CharAt
+// . Unchecked access
+inline char Stream::CharAt(size_t i) const { return m_readahead[i]; }
+
+inline bool Stream::ReadAheadTo(size_t i) const {
+ if (m_readahead.size() > i)
+ return true;
+ return _ReadAheadTo(i);
+}
+}
+
+#endif // STREAM_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/streamcharsource.h b/contrib/libs/yaml-cpp/src/streamcharsource.h
index 0918413f11..624599e65d 100644
--- a/contrib/libs/yaml-cpp/src/streamcharsource.h
+++ b/contrib/libs/yaml-cpp/src/streamcharsource.h
@@ -1,48 +1,48 @@
-#ifndef STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "yaml-cpp/noncopyable.h"
-#include <cstddef>
-
-namespace YAML {
-class StreamCharSource {
- public:
- StreamCharSource(const Stream& stream) : m_offset(0), m_stream(stream) {}
- StreamCharSource(const StreamCharSource& source)
- : m_offset(source.m_offset), m_stream(source.m_stream) {}
- ~StreamCharSource() {}
-
- operator bool() const;
- char operator[](std::size_t i) const { return m_stream.CharAt(m_offset + i); }
- bool operator!() const { return !static_cast<bool>(*this); }
-
- const StreamCharSource operator+(int i) const;
-
- private:
- std::size_t m_offset;
- const Stream& m_stream;
-
- StreamCharSource& operator=(const StreamCharSource&); // non-assignable
-};
-
-inline StreamCharSource::operator bool() const {
- return m_stream.ReadAheadTo(m_offset);
-}
-
-inline const StreamCharSource StreamCharSource::operator+(int i) const {
- StreamCharSource source(*this);
- if (static_cast<int>(source.m_offset) + i >= 0)
- source.m_offset += i;
- else
- source.m_offset = 0;
- return source;
-}
-}
-
-#endif // STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/noncopyable.h"
+#include <cstddef>
+
+namespace YAML {
+class StreamCharSource {
+ public:
+ StreamCharSource(const Stream& stream) : m_offset(0), m_stream(stream) {}
+ StreamCharSource(const StreamCharSource& source)
+ : m_offset(source.m_offset), m_stream(source.m_stream) {}
+ ~StreamCharSource() {}
+
+ operator bool() const;
+ char operator[](std::size_t i) const { return m_stream.CharAt(m_offset + i); }
+ bool operator!() const { return !static_cast<bool>(*this); }
+
+ const StreamCharSource operator+(int i) const;
+
+ private:
+ std::size_t m_offset;
+ const Stream& m_stream;
+
+ StreamCharSource& operator=(const StreamCharSource&); // non-assignable
+};
+
+inline StreamCharSource::operator bool() const {
+ return m_stream.ReadAheadTo(m_offset);
+}
+
+inline const StreamCharSource StreamCharSource::operator+(int i) const {
+ StreamCharSource source(*this);
+ if (static_cast<int>(source.m_offset) + i >= 0)
+ source.m_offset += i;
+ else
+ source.m_offset = 0;
+ return source;
+}
+}
+
+#endif // STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/stringsource.h b/contrib/libs/yaml-cpp/src/stringsource.h
index 1691040db7..6fee44bb28 100644
--- a/contrib/libs/yaml-cpp/src/stringsource.h
+++ b/contrib/libs/yaml-cpp/src/stringsource.h
@@ -1,48 +1,48 @@
-#ifndef STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <cstddef>
-
-namespace YAML {
-class StringCharSource {
- public:
- StringCharSource(const char* str, std::size_t size)
- : m_str(str), m_size(size), m_offset(0) {}
-
- operator bool() const { return m_offset < m_size; }
- char operator[](std::size_t i) const { return m_str[m_offset + i]; }
- bool operator!() const { return !static_cast<bool>(*this); }
-
- const StringCharSource operator+(int i) const {
- StringCharSource source(*this);
- if (static_cast<int>(source.m_offset) + i >= 0)
- source.m_offset += i;
- else
- source.m_offset = 0;
- return source;
- }
-
- StringCharSource& operator++() {
- ++m_offset;
- return *this;
- }
-
- StringCharSource& operator+=(std::size_t offset) {
- m_offset += offset;
- return *this;
- }
-
- private:
- const char* m_str;
- std::size_t m_size;
- std::size_t m_offset;
-};
-}
-
-#endif // STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <cstddef>
+
+namespace YAML {
+class StringCharSource {
+ public:
+ StringCharSource(const char* str, std::size_t size)
+ : m_str(str), m_size(size), m_offset(0) {}
+
+ operator bool() const { return m_offset < m_size; }
+ char operator[](std::size_t i) const { return m_str[m_offset + i]; }
+ bool operator!() const { return !static_cast<bool>(*this); }
+
+ const StringCharSource operator+(int i) const {
+ StringCharSource source(*this);
+ if (static_cast<int>(source.m_offset) + i >= 0)
+ source.m_offset += i;
+ else
+ source.m_offset = 0;
+ return source;
+ }
+
+ StringCharSource& operator++() {
+ ++m_offset;
+ return *this;
+ }
+
+ StringCharSource& operator+=(std::size_t offset) {
+ m_offset += offset;
+ return *this;
+ }
+
+ private:
+ const char* m_str;
+ std::size_t m_size;
+ std::size_t m_offset;
+};
+}
+
+#endif // STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/tag.cpp b/contrib/libs/yaml-cpp/src/tag.cpp
index 16b4b6ae5c..51435520e4 100644
--- a/contrib/libs/yaml-cpp/src/tag.cpp
+++ b/contrib/libs/yaml-cpp/src/tag.cpp
@@ -1,49 +1,49 @@
-#include <cassert>
-#include <stdexcept>
-
-#include "directives.h" // IWYU pragma: keep
-#include "tag.h"
-#include "token.h"
-
-namespace YAML {
-Tag::Tag(const Token& token) : type(static_cast<TYPE>(token.data)) {
- switch (type) {
- case VERBATIM:
- value = token.value;
- break;
- case PRIMARY_HANDLE:
- value = token.value;
- break;
- case SECONDARY_HANDLE:
- value = token.value;
- break;
- case NAMED_HANDLE:
- handle = token.value;
- value = token.params[0];
- break;
- case NON_SPECIFIC:
- break;
- default:
- assert(false);
- }
-}
-
-const std::string Tag::Translate(const Directives& directives) {
- switch (type) {
- case VERBATIM:
- return value;
- case PRIMARY_HANDLE:
- return directives.TranslateTagHandle("!") + value;
- case SECONDARY_HANDLE:
- return directives.TranslateTagHandle("!!") + value;
- case NAMED_HANDLE:
- return directives.TranslateTagHandle("!" + handle + "!") + value;
- case NON_SPECIFIC:
- // TODO:
- return "!";
- default:
- assert(false);
- }
- throw std::runtime_error("yaml-cpp: internal error, bad tag type");
-}
-}
+#include <cassert>
+#include <stdexcept>
+
+#include "directives.h" // IWYU pragma: keep
+#include "tag.h"
+#include "token.h"
+
+namespace YAML {
+Tag::Tag(const Token& token) : type(static_cast<TYPE>(token.data)) {
+ switch (type) {
+ case VERBATIM:
+ value = token.value;
+ break;
+ case PRIMARY_HANDLE:
+ value = token.value;
+ break;
+ case SECONDARY_HANDLE:
+ value = token.value;
+ break;
+ case NAMED_HANDLE:
+ handle = token.value;
+ value = token.params[0];
+ break;
+ case NON_SPECIFIC:
+ break;
+ default:
+ assert(false);
+ }
+}
+
+const std::string Tag::Translate(const Directives& directives) {
+ switch (type) {
+ case VERBATIM:
+ return value;
+ case PRIMARY_HANDLE:
+ return directives.TranslateTagHandle("!") + value;
+ case SECONDARY_HANDLE:
+ return directives.TranslateTagHandle("!!") + value;
+ case NAMED_HANDLE:
+ return directives.TranslateTagHandle("!" + handle + "!") + value;
+ case NON_SPECIFIC:
+ // TODO:
+ return "!";
+ default:
+ assert(false);
+ }
+ throw std::runtime_error("yaml-cpp: internal error, bad tag type");
+}
+}
diff --git a/contrib/libs/yaml-cpp/src/tag.h b/contrib/libs/yaml-cpp/src/tag.h
index a51645cf1f..ac30673b9e 100644
--- a/contrib/libs/yaml-cpp/src/tag.h
+++ b/contrib/libs/yaml-cpp/src/tag.h
@@ -1,33 +1,33 @@
-#ifndef TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include <string>
-
-namespace YAML {
-struct Directives;
-struct Token;
-
-struct Tag {
- enum TYPE {
- VERBATIM,
- PRIMARY_HANDLE,
- SECONDARY_HANDLE,
- NAMED_HANDLE,
- NON_SPECIFIC
- };
-
- Tag(const Token& token);
- const std::string Translate(const Directives& directives);
-
- TYPE type;
- std::string handle, value;
-};
-}
-
-#endif // TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include <string>
+
+namespace YAML {
+struct Directives;
+struct Token;
+
+struct Tag {
+ enum TYPE {
+ VERBATIM,
+ PRIMARY_HANDLE,
+ SECONDARY_HANDLE,
+ NAMED_HANDLE,
+ NON_SPECIFIC
+ };
+
+ Tag(const Token& token);
+ const std::string Translate(const Directives& directives);
+
+ TYPE type;
+ std::string handle, value;
+};
+}
+
+#endif // TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/src/token.h b/contrib/libs/yaml-cpp/src/token.h
index f7486b0c6d..ad0b7d0a00 100644
--- a/contrib/libs/yaml-cpp/src/token.h
+++ b/contrib/libs/yaml-cpp/src/token.h
@@ -1,69 +1,69 @@
-#ifndef TOKEN_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-#define TOKEN_H_62B23520_7C8E_11DE_8A39_0800200C9A66
-
-#if defined(_MSC_VER) || \
- (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
- (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
-#pragma once
-#endif
-
-#include "yaml-cpp/mark.h"
-#include <iostream>
-#include <string>
-#include <vector>
-
-namespace YAML {
-const std::string TokenNames[] = {
- "DIRECTIVE", "DOC_START", "DOC_END", "BLOCK_SEQ_START", "BLOCK_MAP_START",
- "BLOCK_SEQ_END", "BLOCK_MAP_END", "BLOCK_ENTRY", "FLOW_SEQ_START",
- "FLOW_MAP_START", "FLOW_SEQ_END", "FLOW_MAP_END", "FLOW_MAP_COMPACT",
- "FLOW_ENTRY", "KEY", "VALUE", "ANCHOR", "ALIAS", "TAG", "SCALAR"};
-
-struct Token {
- // enums
- enum STATUS { VALID, INVALID, UNVERIFIED };
- enum TYPE {
- DIRECTIVE,
- DOC_START,
- DOC_END,
- BLOCK_SEQ_START,
- BLOCK_MAP_START,
- BLOCK_SEQ_END,
- BLOCK_MAP_END,
- BLOCK_ENTRY,
- FLOW_SEQ_START,
- FLOW_MAP_START,
- FLOW_SEQ_END,
- FLOW_MAP_END,
- FLOW_MAP_COMPACT,
- FLOW_ENTRY,
- KEY,
- VALUE,
- ANCHOR,
- ALIAS,
- TAG,
- PLAIN_SCALAR,
- NON_PLAIN_SCALAR
- };
-
- // data
- Token(TYPE type_, const Mark& mark_)
- : status(VALID), type(type_), mark(mark_), data(0) {}
-
- friend std::ostream& operator<<(std::ostream& out, const Token& token) {
- out << TokenNames[token.type] << std::string(": ") << token.value;
- for (std::size_t i = 0; i < token.params.size(); i++)
- out << std::string(" ") << token.params[i];
- return out;
- }
-
- STATUS status;
- TYPE type;
- Mark mark;
- std::string value;
- std::vector<std::string> params;
- int data;
-};
-}
-
-#endif // TOKEN_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#ifndef TOKEN_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+#define TOKEN_H_62B23520_7C8E_11DE_8A39_0800200C9A66
+
+#if defined(_MSC_VER) || \
+ (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
+#pragma once
+#endif
+
+#include "yaml-cpp/mark.h"
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace YAML {
+const std::string TokenNames[] = {
+ "DIRECTIVE", "DOC_START", "DOC_END", "BLOCK_SEQ_START", "BLOCK_MAP_START",
+ "BLOCK_SEQ_END", "BLOCK_MAP_END", "BLOCK_ENTRY", "FLOW_SEQ_START",
+ "FLOW_MAP_START", "FLOW_SEQ_END", "FLOW_MAP_END", "FLOW_MAP_COMPACT",
+ "FLOW_ENTRY", "KEY", "VALUE", "ANCHOR", "ALIAS", "TAG", "SCALAR"};
+
+struct Token {
+ // enums
+ enum STATUS { VALID, INVALID, UNVERIFIED };
+ enum TYPE {
+ DIRECTIVE,
+ DOC_START,
+ DOC_END,
+ BLOCK_SEQ_START,
+ BLOCK_MAP_START,
+ BLOCK_SEQ_END,
+ BLOCK_MAP_END,
+ BLOCK_ENTRY,
+ FLOW_SEQ_START,
+ FLOW_MAP_START,
+ FLOW_SEQ_END,
+ FLOW_MAP_END,
+ FLOW_MAP_COMPACT,
+ FLOW_ENTRY,
+ KEY,
+ VALUE,
+ ANCHOR,
+ ALIAS,
+ TAG,
+ PLAIN_SCALAR,
+ NON_PLAIN_SCALAR
+ };
+
+ // data
+ Token(TYPE type_, const Mark& mark_)
+ : status(VALID), type(type_), mark(mark_), data(0) {}
+
+ friend std::ostream& operator<<(std::ostream& out, const Token& token) {
+ out << TokenNames[token.type] << std::string(": ") << token.value;
+ for (std::size_t i = 0; i < token.params.size(); i++)
+ out << std::string(" ") << token.params[i];
+ return out;
+ }
+
+ STATUS status;
+ TYPE type;
+ Mark mark;
+ std::string value;
+ std::vector<std::string> params;
+ int data;
+};
+}
+
+#endif // TOKEN_H_62B23520_7C8E_11DE_8A39_0800200C9A66
diff --git a/contrib/libs/yaml-cpp/ya.make b/contrib/libs/yaml-cpp/ya.make
index 6313048de1..058caf92fa 100644
--- a/contrib/libs/yaml-cpp/ya.make
+++ b/contrib/libs/yaml-cpp/ya.make
@@ -1,52 +1,52 @@
-LIBRARY()
-
+LIBRARY()
+
LICENSE(MIT)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
VERSION(0.5.3)
-OWNER(
+OWNER(
g:antiinfra
g:contrib
g:cpp-contrib
-)
-
-NO_UTIL()
+)
+
+NO_UTIL()
NO_WSHADOW()
-
+
ADDINCL(GLOBAL contrib/libs/yaml-cpp/include)
-
-SRCS(
- src/binary.cpp
- src/convert.cpp
- src/directives.cpp
- src/emit.cpp
- src/emitfromevents.cpp
- src/emitter.cpp
- src/emitterstate.cpp
- src/emitterutils.cpp
+
+SRCS(
+ src/binary.cpp
+ src/convert.cpp
+ src/directives.cpp
+ src/emit.cpp
+ src/emitfromevents.cpp
+ src/emitter.cpp
+ src/emitterstate.cpp
+ src/emitterutils.cpp
src/exceptions.cpp
- src/exp.cpp
- src/memory.cpp
- src/nodebuilder.cpp
- src/node.cpp
- src/node_data.cpp
- src/nodeevents.cpp
- src/null.cpp
- src/ostream_wrapper.cpp
- src/parse.cpp
- src/parser.cpp
- src/regex_yaml.cpp
- src/scanner.cpp
- src/scanscalar.cpp
- src/scantag.cpp
- src/scantoken.cpp
- src/simplekey.cpp
- src/singledocparser.cpp
- src/stream.cpp
- src/tag.cpp
-)
-
-END()
+ src/exp.cpp
+ src/memory.cpp
+ src/nodebuilder.cpp
+ src/node.cpp
+ src/node_data.cpp
+ src/nodeevents.cpp
+ src/null.cpp
+ src/ostream_wrapper.cpp
+ src/parse.cpp
+ src/parser.cpp
+ src/regex_yaml.cpp
+ src/scanner.cpp
+ src/scanscalar.cpp
+ src/scantag.cpp
+ src/scantoken.cpp
+ src/simplekey.cpp
+ src/singledocparser.cpp
+ src/stream.cpp
+ src/tag.cpp
+)
+
+END()